diff --git a/lib/html5lib/__init__.py b/lib/html5lib/__init__.py
index 19a4b7d6..320e0c3b 100644
--- a/lib/html5lib/__init__.py
+++ b/lib/html5lib/__init__.py
@@ -1,14 +1,23 @@
 """
-HTML parsing library based on the WHATWG "HTML5"
-specification. The parser is designed to be compatible with existing
-HTML found in the wild and implements well-defined error recovery that
+HTML parsing library based on the `WHATWG HTML specification
+<https://whatwg.org/html>`_. The parser is designed to be compatible with
+existing HTML found in the wild and implements well-defined error recovery that
 is largely compatible with modern desktop web browsers.
 
-Example usage:
+Example usage::
 
-import html5lib
-f = open("my_document.html")
-tree = html5lib.parse(f)
+    import html5lib
+    with open("my_document.html", "rb") as f:
+        tree = html5lib.parse(f)
+
+For convenience, this module re-exports the following names:
+
+* :func:`~.html5parser.parse`
+* :func:`~.html5parser.parseFragment`
+* :class:`~.html5parser.HTMLParser`
+* :func:`~.treebuilders.getTreeBuilder`
+* :func:`~.treewalkers.getTreeWalker`
+* :func:`~.serializer.serialize`
 """
 
 from __future__ import absolute_import, division, unicode_literals
@@ -20,4 +29,7 @@ from .serializer import serialize
 
 __all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder",
            "getTreeWalker", "serialize"]
-__version__ = "0.999"
+
+# this has to be at the top level, see how setup.py parses this
+#: Distribution version number.
+__version__ = "1.1"
diff --git a/lib/html5lib/ihatexml.py b/lib/html5lib/_ihatexml.py
similarity index 96%
rename from lib/html5lib/ihatexml.py
rename to lib/html5lib/_ihatexml.py
index 0fc79308..3ff803c1 100644
--- a/lib/html5lib/ihatexml.py
+++ b/lib/html5lib/_ihatexml.py
@@ -136,6 +136,7 @@ def normaliseCharList(charList):
         i += j
     return rv
 
+
 # We don't really support characters above the BMP :(
 max_unicode = int("FFFF", 16)
 
@@ -175,18 +176,18 @@ def escapeRegexp(string):
     return string
 
 # output from the above
-nonXmlNameBMPRegexp = re.compile('[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')
+nonXmlNameBMPRegexp = re.compile('[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')  # noqa
 
-nonXmlNameFirstBMPRegexp = re.compile('[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')
+nonXmlNameFirstBMPRegexp = re.compile('[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')  # noqa
 
 # Simpler things
-nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\-\'()+,./:=?;!*#@$_%]")
+nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\\-'()+,./:=?;!*#@$_%]")
 
 
 class InfosetFilter(object):
     replacementRegexp = re.compile(r"U[\dA-F]{5,5}")
 
-    def __init__(self, replaceChars=None,
+    def __init__(self,
                  dropXmlnsLocalName=False,
                  dropXmlnsAttrNs=False,
                  preventDoubleDashComments=False,
@@ -217,7 +218,7 @@ class InfosetFilter(object):
         else:
             return self.toXmlName(name)
 
-    def coerceElement(self, name, namespace=None):
+    def coerceElement(self, name):
         return self.toXmlName(name)
 
     def coerceComment(self, data):
@@ -225,11 +226,14 @@ class InfosetFilter(object):
             while "--" in data:
                 warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning)
                 data = data.replace("--", "- -")
+            if data.endswith("-"):
+                warnings.warn("Comments cannot end in a dash", DataLossWarning)
+                data += " "
         return data
 
     def coerceCharacters(self, data):
         if self.replaceFormFeedCharacters:
-            for i in range(data.count("\x0C")):
+            for _ in range(data.count("\x0C")):
                 warnings.warn("Text cannot contain U+000C", DataLossWarning)
             data = data.replace("\x0C", " ")
         # Other non-xml characters
@@ -251,7 +255,7 @@ class InfosetFilter(object):
         nameRest = name[1:]
         m = nonXmlNameFirstBMPRegexp.match(nameFirst)
         if m:
-            warnings.warn("Coercing non-XML name", DataLossWarning)
+            warnings.warn("Coercing non-XML name: %s" % name, DataLossWarning)
             nameFirstOutput = self.getReplacementCharacter(nameFirst)
         else:
             nameFirstOutput = nameFirst
@@ -259,7 +263,7 @@ class InfosetFilter(object):
         nameRestOutput = nameRest
         replaceChars = set(nonXmlNameBMPRegexp.findall(nameRest))
         for char in replaceChars:
-            warnings.warn("Coercing non-XML name", DataLossWarning)
+            warnings.warn("Coercing non-XML name: %s" % name, DataLossWarning)
             replacement = self.getReplacementCharacter(char)
             nameRestOutput = nameRestOutput.replace(char, replacement)
         return nameFirstOutput + nameRestOutput
diff --git a/lib/html5lib/inputstream.py b/lib/html5lib/_inputstream.py
similarity index 77%
rename from lib/html5lib/inputstream.py
rename to lib/html5lib/_inputstream.py
index 9e03b931..0207dd21 100644
--- a/lib/html5lib/inputstream.py
+++ b/lib/html5lib/_inputstream.py
@@ -1,26 +1,17 @@
 from __future__ import absolute_import, division, unicode_literals
+
 from six import text_type
-from six.moves import http_client
+from six.moves import http_client, urllib
 
 import codecs
 import re
+from io import BytesIO, StringIO
+
+import webencodings
 
 from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase
-from .constants import encodings, ReparseException
-from . import utils
-
-from io import StringIO
-
-try:
-    from io import BytesIO
-except ImportError:
-    BytesIO = StringIO
-
-try:
-    from io import BufferedIOBase
-except ImportError:
-    class BufferedIOBase(object):
-        pass
+from .constants import _ReparseException
+from . import _utils
 
 # Non-unicode versions of constants for use in the pre-parser
 spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters])
@@ -28,17 +19,30 @@ asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters])
 asciiUppercaseBytes = frozenset([item.encode("ascii") for item in asciiUppercase])
 spacesAngleBrackets = spaceCharactersBytes | frozenset([b">", b"<"])
 
-invalid_unicode_re = re.compile("[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uD800-\uDFFF\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]")
 
-non_bmp_invalid_codepoints = set([0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
-                                  0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF,
-                                  0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE,
-                                  0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF,
-                                  0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE,
-                                  0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF,
-                                  0x10FFFE, 0x10FFFF])
+invalid_unicode_no_surrogate = "[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]"  # noqa
 
-ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005B-\u0060\u007B-\u007E]")
+if _utils.supports_lone_surrogates:
+    # Use one extra step of indirection and create surrogates with
+    # eval. Not using this indirection would introduce an illegal
+    # unicode literal on platforms not supporting such lone
+    # surrogates.
+    assert invalid_unicode_no_surrogate[-1] == "]" and invalid_unicode_no_surrogate.count("]") == 1
+    invalid_unicode_re = re.compile(invalid_unicode_no_surrogate[:-1] +
+                                    eval('"\\uD800-\\uDFFF"') +  # pylint:disable=eval-used
+                                    "]")
+else:
+    invalid_unicode_re = re.compile(invalid_unicode_no_surrogate)
+
+non_bmp_invalid_codepoints = {0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
+                              0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF,
+                              0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE,
+                              0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF,
+                              0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE,
+                              0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF,
+                              0x10FFFE, 0x10FFFF}
+
+ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005C\u005B-\u0060\u007B-\u007E]")
 
 # Cache for charsUntil()
 charsUntilRegEx = {}
@@ -118,10 +122,13 @@ class BufferedStream(object):
         return b"".join(rv)
 
 
-def HTMLInputStream(source, encoding=None, parseMeta=True, chardet=True):
-    if isinstance(source, http_client.HTTPResponse):
-        # Work around Python bug #20007: read(0) closes the connection.
-        # http://bugs.python.org/issue20007
+def HTMLInputStream(source, **kwargs):
+    # Work around Python bug #20007: read(0) closes the connection.
+    # http://bugs.python.org/issue20007
+    if (isinstance(source, http_client.HTTPResponse) or
+        # Also check for addinfourl wrapping HTTPResponse
+        (isinstance(source, urllib.response.addbase) and
+         isinstance(source.fp, http_client.HTTPResponse))):
         isUnicode = False
     elif hasattr(source, "read"):
         isUnicode = isinstance(source.read(0), text_type)
@@ -129,12 +136,13 @@ def HTMLInputStream(source, encoding=None, parseMeta=True, chardet=True):
         isUnicode = isinstance(source, text_type)
 
     if isUnicode:
-        if encoding is not None:
-            raise TypeError("Cannot explicitly set an encoding with a unicode string")
+        encodings = [x for x in kwargs if x.endswith("_encoding")]
+        if encodings:
+            raise TypeError("Cannot set an encoding with a unicode input, set %r" % encodings)
 
-        return HTMLUnicodeInputStream(source)
+        return HTMLUnicodeInputStream(source, **kwargs)
     else:
-        return HTMLBinaryInputStream(source, encoding, parseMeta, chardet)
+        return HTMLBinaryInputStream(source, **kwargs)
 
 
 class HTMLUnicodeInputStream(object):
@@ -160,22 +168,21 @@ class HTMLUnicodeInputStream(object):
         regardless of any BOM or later declaration (such as in a meta
         element)
 
-        parseMeta - Look for a <meta> element containing encoding information
-
         """
 
-        # Craziness
-        if len("\U0010FFFF") == 1:
+        if not _utils.supports_lone_surrogates:
+            # Such platforms will have already checked for such
+            # surrogate errors, so no need to do this checking.
+            self.reportCharacterErrors = None
+        elif len("\U0010FFFF") == 1:
             self.reportCharacterErrors = self.characterErrorsUCS4
-            self.replaceCharactersRegexp = re.compile("[\uD800-\uDFFF]")
         else:
             self.reportCharacterErrors = self.characterErrorsUCS2
-            self.replaceCharactersRegexp = re.compile("([\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF])")
 
         # List of where new lines occur
         self.newLines = [0]
 
-        self.charEncoding = ("utf-8", "certain")
+        self.charEncoding = (lookupEncoding("utf-8"), "certain")
         self.dataStream = self.openStream(source)
 
         self.reset()
@@ -265,12 +272,10 @@ class HTMLUnicodeInputStream(object):
                 self._bufferedCharacter = data[-1]
                 data = data[:-1]
 
-        self.reportCharacterErrors(data)
+        if self.reportCharacterErrors:
+            self.reportCharacterErrors(data)
 
         # Replace invalid characters
-        # Note U+0000 is dealt with in the tokenizer
-        data = self.replaceCharactersRegexp.sub("\ufffd", data)
-
         data = data.replace("\r\n", "\n")
         data = data.replace("\r", "\n")
 
@@ -280,7 +285,7 @@ class HTMLUnicodeInputStream(object):
         return True
 
     def characterErrorsUCS4(self, data):
-        for i in range(len(invalid_unicode_re.findall(data))):
+        for _ in range(len(invalid_unicode_re.findall(data))):
             self.errors.append("invalid-codepoint")
 
     def characterErrorsUCS2(self, data):
@@ -293,9 +298,9 @@ class HTMLUnicodeInputStream(object):
             codepoint = ord(match.group())
             pos = match.start()
             # Pretty sure there should be endianness issues here
-            if utils.isSurrogatePair(data[pos:pos + 2]):
+            if _utils.isSurrogatePair(data[pos:pos + 2]):
                 # We have a surrogate pair!
-                char_val = utils.surrogatePairToCodepoint(data[pos:pos + 2])
+                char_val = _utils.surrogatePairToCodepoint(data[pos:pos + 2])
                 if char_val in non_bmp_invalid_codepoints:
                     self.errors.append("invalid-codepoint")
                 skip = True
@@ -356,7 +361,7 @@ class HTMLUnicodeInputStream(object):
     def unget(self, char):
         # Only one character is allowed to be ungotten at once - it must
         # be consumed again before any further call to unget
-        if char is not None:
+        if char is not EOF:
             if self.chunkOffset == 0:
                 # unget is called quite rarely, so it's a good idea to do
                 # more work here if it saves a bit of work in the frequently
@@ -378,7 +383,9 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
 
     """
 
-    def __init__(self, source, encoding=None, parseMeta=True, chardet=True):
+    def __init__(self, source, override_encoding=None, transport_encoding=None,
+                 same_origin_parent_encoding=None, likely_encoding=None,
+                 default_encoding="windows-1252", useChardet=True):
         """Initialises the HTMLInputStream.
 
         HTMLInputStream(source, [encoding]) -> Normalized stream from source
@@ -391,8 +398,6 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
         regardless of any BOM or later declaration (such as in a meta
         element)
 
-        parseMeta - Look for a <meta> element containing encoding information
-
         """
         # Raw Stream - for unicode objects this will encode to utf-8 and set
         #              self.charEncoding as appropriate
@@ -400,27 +405,28 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
 
         HTMLUnicodeInputStream.__init__(self, self.rawStream)
 
-        self.charEncoding = (codecName(encoding), "certain")
-
         # Encoding Information
         # Number of bytes to use when looking for a meta element with
         # encoding information
-        self.numBytesMeta = 512
+        self.numBytesMeta = 1024
         # Number of bytes to use when using detecting encoding using chardet
         self.numBytesChardet = 100
-        # Encoding to use if no other information can be found
-        self.defaultEncoding = "windows-1252"
+        # Things from args
+        self.override_encoding = override_encoding
+        self.transport_encoding = transport_encoding
+        self.same_origin_parent_encoding = same_origin_parent_encoding
+        self.likely_encoding = likely_encoding
+        self.default_encoding = default_encoding
 
-        # Detect encoding iff no explicit "transport level" encoding is supplied
-        if (self.charEncoding[0] is None):
-            self.charEncoding = self.detectEncoding(parseMeta, chardet)
+        # Determine encoding
+        self.charEncoding = self.determineEncoding(useChardet)
+        assert self.charEncoding[0] is not None
 
         # Call superclass
         self.reset()
 
     def reset(self):
-        self.dataStream = codecs.getreader(self.charEncoding[0])(self.rawStream,
-                                                                 'replace')
+        self.dataStream = self.charEncoding[0].codec_info.streamreader(self.rawStream, 'replace')
         HTMLUnicodeInputStream.reset(self)
 
     def openStream(self, source):
@@ -437,29 +443,50 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
 
         try:
             stream.seek(stream.tell())
-        except:
+        except Exception:
             stream = BufferedStream(stream)
 
         return stream
 
-    def detectEncoding(self, parseMeta=True, chardet=True):
-        # First look for a BOM
+    def determineEncoding(self, chardet=True):
+        # BOMs take precedence over everything
         # This will also read past the BOM if present
-        encoding = self.detectBOM()
-        confidence = "certain"
-        # If there is no BOM need to look for meta elements with encoding
-        # information
-        if encoding is None and parseMeta:
-            encoding = self.detectEncodingMeta()
-            confidence = "tentative"
-        # Guess with chardet, if avaliable
-        if encoding is None and chardet:
-            confidence = "tentative"
+        charEncoding = self.detectBOM(), "certain"
+        if charEncoding[0] is not None:
+            return charEncoding
+
+        # If we've been overridden, we've been overridden
+        charEncoding = lookupEncoding(self.override_encoding), "certain"
+        if charEncoding[0] is not None:
+            return charEncoding
+
+        # Now check the transport layer
+        charEncoding = lookupEncoding(self.transport_encoding), "certain"
+        if charEncoding[0] is not None:
+            return charEncoding
+
+        # Look for meta elements with encoding information
+        charEncoding = self.detectEncodingMeta(), "tentative"
+        if charEncoding[0] is not None:
+            return charEncoding
+
+        # Parent document encoding
+        charEncoding = lookupEncoding(self.same_origin_parent_encoding), "tentative"
+        if charEncoding[0] is not None and not charEncoding[0].name.startswith("utf-16"):
+            return charEncoding
+
+        # "likely" encoding
+        charEncoding = lookupEncoding(self.likely_encoding), "tentative"
+        if charEncoding[0] is not None:
+            return charEncoding
+
+        # Guess with chardet, if available
+        if chardet:
             try:
-                try:
-                    from charade.universaldetector import UniversalDetector
-                except ImportError:
-                    from chardet.universaldetector import UniversalDetector
+                from chardet.universaldetector import UniversalDetector
+            except ImportError:
+                pass
+            else:
                 buffers = []
                 detector = UniversalDetector()
                 while not detector.done:
@@ -470,37 +497,34 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
                     buffers.append(buffer)
                     detector.feed(buffer)
                 detector.close()
-                encoding = detector.result['encoding']
+                encoding = lookupEncoding(detector.result['encoding'])
                 self.rawStream.seek(0)
-            except ImportError:
-                pass
-        # If all else fails use the default encoding
-        if encoding is None:
-            confidence = "tentative"
-            encoding = self.defaultEncoding
+                if encoding is not None:
+                    return encoding, "tentative"
 
-        # Substitute for equivalent encodings:
-        encodingSub = {"iso-8859-1": "windows-1252"}
+        # Try the default encoding
+        charEncoding = lookupEncoding(self.default_encoding), "tentative"
+        if charEncoding[0] is not None:
+            return charEncoding
 
-        if encoding.lower() in encodingSub:
-            encoding = encodingSub[encoding.lower()]
-
-        return encoding, confidence
+        # Fallback to html5lib's default if even that hasn't worked
+        return lookupEncoding("windows-1252"), "tentative"
 
     def changeEncoding(self, newEncoding):
         assert self.charEncoding[1] != "certain"
-        newEncoding = codecName(newEncoding)
-        if newEncoding in ("utf-16", "utf-16-be", "utf-16-le"):
-            newEncoding = "utf-8"
+        newEncoding = lookupEncoding(newEncoding)
         if newEncoding is None:
             return
+        if newEncoding.name in ("utf-16be", "utf-16le"):
+            newEncoding = lookupEncoding("utf-8")
+            assert newEncoding is not None
         elif newEncoding == self.charEncoding[0]:
             self.charEncoding = (self.charEncoding[0], "certain")
         else:
             self.rawStream.seek(0)
-            self.reset()
             self.charEncoding = (newEncoding, "certain")
-            raise ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding))
+            self.reset()
+            raise _ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding))
 
     def detectBOM(self):
         """Attempts to detect at BOM at the start of the stream. If
@@ -508,8 +532,8 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
         encoding otherwise return None"""
         bomDict = {
             codecs.BOM_UTF8: 'utf-8',
-            codecs.BOM_UTF16_LE: 'utf-16-le', codecs.BOM_UTF16_BE: 'utf-16-be',
-            codecs.BOM_UTF32_LE: 'utf-32-le', codecs.BOM_UTF32_BE: 'utf-32-be'
+            codecs.BOM_UTF16_LE: 'utf-16le', codecs.BOM_UTF16_BE: 'utf-16be',
+            codecs.BOM_UTF32_LE: 'utf-32le', codecs.BOM_UTF32_BE: 'utf-32be'
         }
 
         # Go to beginning of file and read in 4 bytes
@@ -529,9 +553,12 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
 
         # Set the read position past the BOM if one was found, otherwise
         # set it to the start of the stream
-        self.rawStream.seek(encoding and seek or 0)
-
-        return encoding
+        if encoding:
+            self.rawStream.seek(seek)
+            return lookupEncoding(encoding)
+        else:
+            self.rawStream.seek(0)
+            return None
 
     def detectEncodingMeta(self):
         """Report the encoding declared by the meta element
@@ -542,8 +569,8 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
         self.rawStream.seek(0)
         encoding = parser.getEncoding()
 
-        if encoding in ("utf-16", "utf-16-be", "utf-16-le"):
-            encoding = "utf-8"
+        if encoding is not None and encoding.name in ("utf-16be", "utf-16le"):
+            encoding = lookupEncoding("utf-8")
 
         return encoding
 
@@ -557,6 +584,7 @@ class EncodingBytes(bytes):
         return bytes.__new__(self, value.lower())
 
     def __init__(self, value):
+        # pylint:disable=unused-argument
         self._position = -1
 
     def __iter__(self):
@@ -630,9 +658,7 @@ class EncodingBytes(bytes):
         """Look for a sequence of bytes at the start of a string. If the bytes
         are found return True and advance the position to the byte after the
         match. Otherwise return False and leave the position alone"""
-        p = self.position
-        data = self[p:p + len(bytes)]
-        rv = data.startswith(bytes)
+        rv = self.startswith(bytes, self.position)
         if rv:
             self.position += len(bytes)
         return rv
@@ -640,15 +666,11 @@ class EncodingBytes(bytes):
     def jumpTo(self, bytes):
         """Look for the next sequence of bytes matching a given sequence. If
         a match is found advance the position to the last byte of the match"""
-        newPosition = self[self.position:].find(bytes)
-        if newPosition > -1:
-            # XXX: This is ugly, but I can't see a nicer way to fix this.
-            if self._position == -1:
-                self._position = 0
-            self._position += (newPosition + len(bytes) - 1)
-            return True
-        else:
+        try:
+            self._position = self.index(bytes, self.position) + len(bytes) - 1
+        except ValueError:
             raise StopIteration
+        return True
 
 
 class EncodingParser(object):
@@ -660,6 +682,9 @@ class EncodingParser(object):
         self.encoding = None
 
     def getEncoding(self):
+        if b"<meta" not in self.data:
+            return None
+
         methodDispatch = (
             (b"<!--", self.handleComment),
             (b"<meta", self.handleMeta),
@@ -667,8 +692,12 @@ class EncodingParser(object):
             (b"<!", self.handleOther),
             (b"<?", self.handleOther),
             (b"<", self.handlePossibleStartTag))
-        for byte in self.data:
+        for _ in self.data:
             keepParsing = True
+            try:
+                self.data.jumpTo(b"<")
+            except StopIteration:
+                break
             for key, method in methodDispatch:
                 if self.data.matchBytes(key):
                     try:
@@ -706,7 +735,7 @@ class EncodingParser(object):
                         return False
                 elif attr[0] == b"charset":
                     tentativeEncoding = attr[1]
-                    codec = codecName(tentativeEncoding)
+                    codec = lookupEncoding(tentativeEncoding)
                     if codec is not None:
                         self.encoding = codec
                         return False
@@ -714,7 +743,7 @@ class EncodingParser(object):
                     contentParser = ContentAttrParser(EncodingBytes(attr[1]))
                     tentativeEncoding = contentParser.parse()
                     if tentativeEncoding is not None:
-                        codec = codecName(tentativeEncoding)
+                        codec = lookupEncoding(tentativeEncoding)
                         if codec is not None:
                             if hasPragma:
                                 self.encoding = codec
@@ -871,7 +900,7 @@ class ContentAttrParser(object):
             return None
 
 
-def codecName(encoding):
+def lookupEncoding(encoding):
     """Return the python codec name corresponding to an encoding or None if the
     string doesn't correspond to a valid encoding."""
     if isinstance(encoding, bytes):
@@ -879,8 +908,11 @@ def codecName(encoding):
             encoding = encoding.decode("ascii")
         except UnicodeDecodeError:
             return None
-    if encoding:
-        canonicalName = ascii_punctuation_re.sub("", encoding).lower()
-        return encodings.get(canonicalName, None)
+
+    if encoding is not None:
+        try:
+            return webencodings.lookup(encoding)
+        except AttributeError:
+            return None
     else:
         return None
diff --git a/lib/html5lib/tokenizer.py b/lib/html5lib/_tokenizer.py
similarity index 98%
rename from lib/html5lib/tokenizer.py
rename to lib/html5lib/_tokenizer.py
index 79774578..4748a197 100644
--- a/lib/html5lib/tokenizer.py
+++ b/lib/html5lib/_tokenizer.py
@@ -1,11 +1,9 @@
 from __future__ import absolute_import, division, unicode_literals
 
-try:
-    chr = unichr # flake8: noqa
-except NameError:
-    pass
+from six import unichr as chr
 
-from collections import deque
+from collections import deque, OrderedDict
+from sys import version_info
 
 from .constants import spaceCharacters
 from .constants import entities
@@ -14,12 +12,17 @@ from .constants import digits, hexDigits, EOF
 from .constants import tokenTypes, tagTokenTypes
 from .constants import replacementCharacters
 
-from .inputstream import HTMLInputStream
+from ._inputstream import HTMLInputStream
 
-from .trie import Trie
+from ._trie import Trie
 
 entitiesTrie = Trie(entities)
 
+if version_info >= (3, 7):
+    attributeMap = dict
+else:
+    attributeMap = OrderedDict
+
 
 class HTMLTokenizer(object):
     """ This class takes care of tokenizing HTML.
@@ -34,16 +37,11 @@ class HTMLTokenizer(object):
       Points to HTMLInputStream object.
     """
 
-    def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True,
-                 lowercaseElementName=True, lowercaseAttrName=True, parser=None):
+    def __init__(self, stream, parser=None, **kwargs):
 
-        self.stream = HTMLInputStream(stream, encoding, parseMeta, useChardet)
+        self.stream = HTMLInputStream(stream, **kwargs)
         self.parser = parser
 
-        # Perform case conversions?
-        self.lowercaseElementName = lowercaseElementName
-        self.lowercaseAttrName = lowercaseAttrName
-
         # Setup the initial tokenizer state
         self.escapeFlag = False
         self.lastFourChars = []
@@ -147,8 +145,8 @@ class HTMLTokenizer(object):
         output = "&"
 
         charStack = [self.stream.char()]
-        if (charStack[0] in spaceCharacters or charStack[0] in (EOF, "<", "&")
-                or (allowedChar is not None and allowedChar == charStack[0])):
+        if (charStack[0] in spaceCharacters or charStack[0] in (EOF, "<", "&") or
+                (allowedChar is not None and allowedChar == charStack[0])):
             self.stream.unget(charStack[0])
 
         elif charStack[0] == "#":
@@ -235,8 +233,15 @@ class HTMLTokenizer(object):
         token = self.currentToken
         # Add token to the queue to be yielded
         if (token["type"] in tagTokenTypes):
-            if self.lowercaseElementName:
-                token["name"] = token["name"].translate(asciiUpper2Lower)
+            token["name"] = token["name"].translate(asciiUpper2Lower)
+            if token["type"] == tokenTypes["StartTag"]:
+                raw = token["data"]
+                data = attributeMap(raw)
+                if len(raw) > len(data):
+                    # we had some duplicated attribute, fix so first wins
+                    data.update(raw[::-1])
+                token["data"] = data
+
             if token["type"] == tokenTypes["EndTag"]:
                 if token["data"]:
                     self.tokenQueue.append({"type": tokenTypes["ParseError"],
@@ -921,10 +926,9 @@ class HTMLTokenizer(object):
             # Attributes are not dropped at this stage. That happens when the
             # start tag token is emitted so values can still be safely appended
             # to attributes, but we do want to report the parse error in time.
-            if self.lowercaseAttrName:
-                self.currentToken["data"][-1][0] = (
-                    self.currentToken["data"][-1][0].translate(asciiUpper2Lower))
-            for name, value in self.currentToken["data"][:-1]:
+            self.currentToken["data"][-1][0] = (
+                self.currentToken["data"][-1][0].translate(asciiUpper2Lower))
+            for name, _ in self.currentToken["data"][:-1]:
                 if self.currentToken["data"][-1][0] == name:
                     self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
                                             "duplicate-attribute"})
@@ -1716,11 +1720,11 @@ class HTMLTokenizer(object):
                 else:
                     data.append(char)
 
-        data = "".join(data)
+        data = "".join(data)  # pylint:disable=redefined-variable-type
         # Deal with null here rather than in the parser
         nullCount = data.count("\u0000")
         if nullCount > 0:
-            for i in range(nullCount):
+            for _ in range(nullCount):
                 self.tokenQueue.append({"type": tokenTypes["ParseError"],
                                         "data": "invalid-codepoint"})
             data = data.replace("\u0000", "\uFFFD")
diff --git a/lib/html5lib/_trie/__init__.py b/lib/html5lib/_trie/__init__.py
new file mode 100644
index 00000000..07bad5d3
--- /dev/null
+++ b/lib/html5lib/_trie/__init__.py
@@ -0,0 +1,5 @@
+from __future__ import absolute_import, division, unicode_literals
+
+from .py import Trie
+
+__all__ = ["Trie"]
diff --git a/lib/html5lib/trie/_base.py b/lib/html5lib/_trie/_base.py
similarity index 74%
rename from lib/html5lib/trie/_base.py
rename to lib/html5lib/_trie/_base.py
index 724486b1..6b71975f 100644
--- a/lib/html5lib/trie/_base.py
+++ b/lib/html5lib/_trie/_base.py
@@ -1,19 +1,22 @@
 from __future__ import absolute_import, division, unicode_literals
 
-from collections import Mapping
+try:
+    from collections.abc import Mapping
+except ImportError:  # Python 2.7
+    from collections import Mapping
 
 
 class Trie(Mapping):
     """Abstract base class for tries"""
 
     def keys(self, prefix=None):
-        keys = super().keys()
+        # pylint:disable=arguments-differ
+        keys = super(Trie, self).keys()
 
         if prefix is None:
             return set(keys)
 
-        # Python 2.6: no set comprehensions
-        return set([x for x in keys if x.startswith(prefix)])
+        return {x for x in keys if x.startswith(prefix)}
 
     def has_keys_with_prefix(self, prefix):
         for key in self.keys():
diff --git a/lib/html5lib/trie/py.py b/lib/html5lib/_trie/py.py
similarity index 100%
rename from lib/html5lib/trie/py.py
rename to lib/html5lib/_trie/py.py
diff --git a/lib/html5lib/_utils.py b/lib/html5lib/_utils.py
new file mode 100644
index 00000000..9ea57942
--- /dev/null
+++ b/lib/html5lib/_utils.py
@@ -0,0 +1,159 @@
+from __future__ import absolute_import, division, unicode_literals
+
+from types import ModuleType
+
+try:
+    from collections.abc import Mapping
+except ImportError:
+    from collections import Mapping
+
+from six import text_type, PY3
+
+if PY3:
+    import xml.etree.ElementTree as default_etree
+else:
+    try:
+        import xml.etree.cElementTree as default_etree
+    except ImportError:
+        import xml.etree.ElementTree as default_etree
+
+
+__all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair",
+           "surrogatePairToCodepoint", "moduleFactoryFactory",
+           "supports_lone_surrogates"]
+
+
+# Platforms not supporting lone surrogates (\uD800-\uDFFF) should be
+# caught by the below test. In general this would be any platform
+# using UTF-16 as its encoding of unicode strings, such as
+# Jython. This is because UTF-16 itself is based on the use of such
+# surrogates, and there is no mechanism to further escape such
+# escapes.
+try:
+    _x = eval('"\\uD800"')  # pylint:disable=eval-used
+    if not isinstance(_x, text_type):
+        # We need this with u"" because of http://bugs.jython.org/issue2039
+        _x = eval('u"\\uD800"')  # pylint:disable=eval-used
+        assert isinstance(_x, text_type)
+except Exception:
+    supports_lone_surrogates = False
+else:
+    supports_lone_surrogates = True
+
+
+class MethodDispatcher(dict):
+    """Dict with 2 special properties:
+
+    On initiation, keys that are lists, sets or tuples are converted to
+    multiple keys so accessing any one of the items in the original
+    list-like object returns the matching value
+
+    md = MethodDispatcher({("foo", "bar"):"baz"})
+    md["foo"] == "baz"
+
+    A default value which can be set through the default attribute.
+    """
+
+    def __init__(self, items=()):
+        _dictEntries = []
+        for name, value in items:
+            if isinstance(name, (list, tuple, frozenset, set)):
+                for item in name:
+                    _dictEntries.append((item, value))
+            else:
+                _dictEntries.append((name, value))
+        dict.__init__(self, _dictEntries)
+        assert len(self) == len(_dictEntries)
+        self.default = None
+
+    def __getitem__(self, key):
+        return dict.get(self, key, self.default)
+
+    def __get__(self, instance, owner=None):
+        return BoundMethodDispatcher(instance, self)
+
+
+class BoundMethodDispatcher(Mapping):
+    """Wraps a MethodDispatcher, binding its return values to `instance`"""
+    def __init__(self, instance, dispatcher):
+        self.instance = instance
+        self.dispatcher = dispatcher
+
+    def __getitem__(self, key):
+        # see https://docs.python.org/3/reference/datamodel.html#object.__get__
+        # on a function, __get__ is used to bind a function to an instance as a bound method
+        return self.dispatcher[key].__get__(self.instance)
+
+    def get(self, key, default):
+        if key in self.dispatcher:
+            return self[key]
+        else:
+            return default
+
+    def __iter__(self):
+        return iter(self.dispatcher)
+
+    def __len__(self):
+        return len(self.dispatcher)
+
+    def __contains__(self, key):
+        return key in self.dispatcher
+
+
+# Some utility functions to deal with weirdness around UCS2 vs UCS4
+# python builds
+
+def isSurrogatePair(data):
+    return (len(data) == 2 and
+            ord(data[0]) >= 0xD800 and ord(data[0]) <= 0xDBFF and
+            ord(data[1]) >= 0xDC00 and ord(data[1]) <= 0xDFFF)
+
+
+def surrogatePairToCodepoint(data):
+    char_val = (0x10000 + (ord(data[0]) - 0xD800) * 0x400 +
+                (ord(data[1]) - 0xDC00))
+    return char_val
+
+# Module Factory Factory (no, this isn't Java, I know)
+# Here to stop this being duplicated all over the place.
+
+
+def moduleFactoryFactory(factory):
+    moduleCache = {}
+
+    def moduleFactory(baseModule, *args, **kwargs):
+        if isinstance(ModuleType.__name__, type("")):
+            name = "_%s_factory" % baseModule.__name__
+        else:
+            name = b"_%s_factory" % baseModule.__name__
+
+        kwargs_tuple = tuple(kwargs.items())
+
+        try:
+            return moduleCache[name][args][kwargs_tuple]
+        except KeyError:
+            mod = ModuleType(name)
+            objs = factory(baseModule, *args, **kwargs)
+            mod.__dict__.update(objs)
+            if "name" not in moduleCache:
+                moduleCache[name] = {}
+            if "args" not in moduleCache[name]:
+                moduleCache[name][args] = {}
+            if "kwargs" not in moduleCache[name][args]:
+                moduleCache[name][args][kwargs_tuple] = {}
+            moduleCache[name][args][kwargs_tuple] = mod
+            return mod
+
+    return moduleFactory
+
+
+def memoize(func):
+    cache = {}
+
+    def wrapped(*args, **kwargs):
+        key = (tuple(args), tuple(kwargs.items()))
+        if key not in cache:
+            cache[key] = func(*args, **kwargs)
+        return cache[key]
+
+    return wrapped
diff --git a/lib/html5lib/constants.py b/lib/html5lib/constants.py
index e7089846..fe3e237c 100644
--- a/lib/html5lib/constants.py
+++ b/lib/html5lib/constants.py
@@ -1,292 +1,296 @@
 from __future__ import absolute_import, division, unicode_literals
 
 import string
-import gettext
-_ = gettext.gettext
 
 EOF = None
 
 E = {
     "null-character":
-        _("Null character in input stream, replaced with U+FFFD."),
+        "Null character in input stream, replaced with U+FFFD.",
     "invalid-codepoint":
-        _("Invalid codepoint in stream."),
+        "Invalid codepoint in stream.",
     "incorrectly-placed-solidus":
-        _("Solidus (/) incorrectly placed in tag."),
+        "Solidus (/) incorrectly placed in tag.",
     "incorrect-cr-newline-entity":
-        _("Incorrect CR newline entity, replaced with LF."),
+        "Incorrect CR newline entity, replaced with LF.",
     "illegal-windows-1252-entity":
-        _("Entity used with illegal number (windows-1252 reference)."),
+        "Entity used with illegal number (windows-1252 reference).",
     "cant-convert-numeric-entity":
-        _("Numeric entity couldn't be converted to character "
-          "(codepoint U+%(charAsInt)08x)."),
+        "Numeric entity couldn't be converted to character "
+        "(codepoint U+%(charAsInt)08x).",
     "illegal-codepoint-for-numeric-entity":
-        _("Numeric entity represents an illegal codepoint: "
-          "U+%(charAsInt)08x."),
+        "Numeric entity represents an illegal codepoint: "
+        "U+%(charAsInt)08x.",
     "numeric-entity-without-semicolon":
-        _("Numeric entity didn't end with ';'."),
+        "Numeric entity didn't end with ';'.",
     "expected-numeric-entity-but-got-eof":
-        _("Numeric entity expected. Got end of file instead."),
+        "Numeric entity expected. Got end of file instead.",
     "expected-numeric-entity":
-        _("Numeric entity expected but none found."),
+        "Numeric entity expected but none found.",
     "named-entity-without-semicolon":
-        _("Named entity didn't end with ';'."),
+        "Named entity didn't end with ';'.",
     "expected-named-entity":
-        _("Named entity expected. Got none."),
+        "Named entity expected. Got none.",
     "attributes-in-end-tag":
-        _("End tag contains unexpected attributes."),
+        "End tag contains unexpected attributes.",
     'self-closing-flag-on-end-tag':
-        _("End tag contains unexpected self-closing flag."),
+        "End tag contains unexpected self-closing flag.",
     "expected-tag-name-but-got-right-bracket":
-        _("Expected tag name. Got '>' instead."),
+        "Expected tag name. Got '>' instead.",
     "expected-tag-name-but-got-question-mark":
-        _("Expected tag name. Got '?' instead. (HTML doesn't "
-          "support processing instructions.)"),
+        "Expected tag name. Got '?' instead. (HTML doesn't "
+        "support processing instructions.)",
     "expected-tag-name":
-        _("Expected tag name. Got something else instead"),
+        "Expected tag name. Got something else instead",
     "expected-closing-tag-but-got-right-bracket":
-        _("Expected closing tag. Got '>' instead. Ignoring '</>'."),
+        "Expected closing tag. Got '>' instead. Ignoring '</>'.",
     "expected-closing-tag-but-got-eof":
-        _("Expected closing tag. Unexpected end of file."),
+        "Expected closing tag. Unexpected end of file.",
     "expected-closing-tag-but-got-char":
-        _("Expected closing tag. Unexpected character '%(data)s' found."),
+        "Expected closing tag. Unexpected character '%(data)s' found.",
     "eof-in-tag-name":
-        _("Unexpected end of file in the tag name."),
+        "Unexpected end of file in the tag name.",
     "expected-attribute-name-but-got-eof":
-        _("Unexpected end of file. Expected attribute name instead."),
+        "Unexpected end of file. Expected attribute name instead.",
     "eof-in-attribute-name":
-        _("Unexpected end of file in attribute name."),
+        "Unexpected end of file in attribute name.",
     "invalid-character-in-attribute-name":
-        _("Invalid character in attribute name"),
+        "Invalid character in attribute name",
     "duplicate-attribute":
-        _("Dropped duplicate attribute on tag."),
+        "Dropped duplicate attribute on tag.",
     "expected-end-of-tag-name-but-got-eof":
-        _("Unexpected end of file. Expected = or end of tag."),
+        "Unexpected end of file. Expected = or end of tag.",
     "expected-attribute-value-but-got-eof":
-        _("Unexpected end of file. Expected attribute value."),
+        "Unexpected end of file. Expected attribute value.",
     "expected-attribute-value-but-got-right-bracket":
-        _("Expected attribute value. Got '>' instead."),
+        "Expected attribute value. Got '>' instead.",
     'equals-in-unquoted-attribute-value':
-        _("Unexpected = in unquoted attribute"),
+        "Unexpected = in unquoted attribute",
     'unexpected-character-in-unquoted-attribute-value':
-        _("Unexpected character in unquoted attribute"),
+        "Unexpected character in unquoted attribute",
     "invalid-character-after-attribute-name":
-        _("Unexpected character after attribute name."),
+        "Unexpected character after attribute name.",
     "unexpected-character-after-attribute-value":
-        _("Unexpected character after attribute value."),
+        "Unexpected character after attribute value.",
     "eof-in-attribute-value-double-quote":
-        _("Unexpected end of file in attribute value (\")."),
+        "Unexpected end of file in attribute value (\").",
     "eof-in-attribute-value-single-quote":
-        _("Unexpected end of file in attribute value (')."),
+        "Unexpected end of file in attribute value (').",
     "eof-in-attribute-value-no-quotes":
-        _("Unexpected end of file in attribute value."),
+        "Unexpected end of file in attribute value.",
     "unexpected-EOF-after-solidus-in-tag":
-        _("Unexpected end of file in tag. Expected >"),
+        "Unexpected end of file in tag. Expected >",
     "unexpected-character-after-solidus-in-tag":
-        _("Unexpected character after / in tag. Expected >"),
+        "Unexpected character after / in tag. Expected >",
     "expected-dashes-or-doctype":
-        _("Expected '--' or 'DOCTYPE'. Not found."),
+        "Expected '--' or 'DOCTYPE'. Not found.",
     "unexpected-bang-after-double-dash-in-comment":
-        _("Unexpected ! after -- in comment"),
+        "Unexpected ! after -- in comment",
     "unexpected-space-after-double-dash-in-comment":
-        _("Unexpected space after -- in comment"),
+        "Unexpected space after -- in comment",
     "incorrect-comment":
-        _("Incorrect comment."),
+        "Incorrect comment.",
     "eof-in-comment":
-        _("Unexpected end of file in comment."),
+        "Unexpected end of file in comment.",
     "eof-in-comment-end-dash":
-        _("Unexpected end of file in comment (-)"),
+        "Unexpected end of file in comment (-)",
     "unexpected-dash-after-double-dash-in-comment":
-        _("Unexpected '-' after '--' found in comment."),
+        "Unexpected '-' after '--' found in comment.",
     "eof-in-comment-double-dash":
-        _("Unexpected end of file in comment (--)."),
+        "Unexpected end of file in comment (--).",
     "eof-in-comment-end-space-state":
-        _("Unexpected end of file in comment."),
+        "Unexpected end of file in comment.",
     "eof-in-comment-end-bang-state":
-        _("Unexpected end of file in comment."),
+        "Unexpected end of file in comment.",
     "unexpected-char-in-comment":
-        _("Unexpected character in comment found."),
+        "Unexpected character in comment found.",
     "need-space-after-doctype":
-        _("No space after literal string 'DOCTYPE'."),
+        "No space after literal string 'DOCTYPE'.",
     "expected-doctype-name-but-got-right-bracket":
-        _("Unexpected > character. Expected DOCTYPE name."),
+        "Unexpected > character. Expected DOCTYPE name.",
     "expected-doctype-name-but-got-eof":
-        _("Unexpected end of file. Expected DOCTYPE name."),
+        "Unexpected end of file. Expected DOCTYPE name.",
     "eof-in-doctype-name":
-        _("Unexpected end of file in DOCTYPE name."),
+        "Unexpected end of file in DOCTYPE name.",
     "eof-in-doctype":
-        _("Unexpected end of file in DOCTYPE."),
+        "Unexpected end of file in DOCTYPE.",
     "expected-space-or-right-bracket-in-doctype":
-        _("Expected space or '>'. Got '%(data)s'"),
+        "Expected space or '>'. Got '%(data)s'",
     "unexpected-end-of-doctype":
-        _("Unexpected end of DOCTYPE."),
+        "Unexpected end of DOCTYPE.",
     "unexpected-char-in-doctype":
-        _("Unexpected character in DOCTYPE."),
+        "Unexpected character in DOCTYPE.",
     "eof-in-innerhtml":
-        _("XXX innerHTML EOF"),
+        "XXX innerHTML EOF",
     "unexpected-doctype":
-        _("Unexpected DOCTYPE. Ignored."),
+        "Unexpected DOCTYPE. Ignored.",
     "non-html-root":
-        _("html needs to be the first start tag."),
+        "html needs to be the first start tag.",
     "expected-doctype-but-got-eof":
-        _("Unexpected End of file. Expected DOCTYPE."),
+        "Unexpected End of file. Expected DOCTYPE.",
     "unknown-doctype":
-        _("Erroneous DOCTYPE."),
+        "Erroneous DOCTYPE.",
     "expected-doctype-but-got-chars":
-        _("Unexpected non-space characters. Expected DOCTYPE."),
+        "Unexpected non-space characters. Expected DOCTYPE.",
     "expected-doctype-but-got-start-tag":
-        _("Unexpected start tag (%(name)s). Expected DOCTYPE."),
+        "Unexpected start tag (%(name)s). Expected DOCTYPE.",
     "expected-doctype-but-got-end-tag":
-        _("Unexpected end tag (%(name)s). Expected DOCTYPE."),
+        "Unexpected end tag (%(name)s). Expected DOCTYPE.",
     "end-tag-after-implied-root":
-        _("Unexpected end tag (%(name)s) after the (implied) root element."),
+        "Unexpected end tag (%(name)s) after the (implied) root element.",
     "expected-named-closing-tag-but-got-eof":
-        _("Unexpected end of file. Expected end tag (%(name)s)."),
+        "Unexpected end of file. Expected end tag (%(name)s).",
     "two-heads-are-not-better-than-one":
-        _("Unexpected start tag head in existing head. Ignored."),
+        "Unexpected start tag head in existing head. Ignored.",
     "unexpected-end-tag":
-        _("Unexpected end tag (%(name)s). Ignored."),
+        "Unexpected end tag (%(name)s). Ignored.",
     "unexpected-start-tag-out-of-my-head":
-        _("Unexpected start tag (%(name)s) that can be in head. Moved."),
+        "Unexpected start tag (%(name)s) that can be in head. Moved.",
     "unexpected-start-tag":
-        _("Unexpected start tag (%(name)s)."),
+        "Unexpected start tag (%(name)s).",
     "missing-end-tag":
-        _("Missing end tag (%(name)s)."),
+        "Missing end tag (%(name)s).",
     "missing-end-tags":
-        _("Missing end tags (%(name)s)."),
+        "Missing end tags (%(name)s).",
     "unexpected-start-tag-implies-end-tag":
-        _("Unexpected start tag (%(startName)s) "
-          "implies end tag (%(endName)s)."),
+        "Unexpected start tag (%(startName)s) "
+        "implies end tag (%(endName)s).",
     "unexpected-start-tag-treated-as":
-        _("Unexpected start tag (%(originalName)s). Treated as %(newName)s."),
+        "Unexpected start tag (%(originalName)s). Treated as %(newName)s.",
     "deprecated-tag":
-        _("Unexpected start tag %(name)s. Don't use it!"),
+        "Unexpected start tag %(name)s. Don't use it!",
     "unexpected-start-tag-ignored":
-        _("Unexpected start tag %(name)s. Ignored."),
+        "Unexpected start tag %(name)s. Ignored.",
     "expected-one-end-tag-but-got-another":
-        _("Unexpected end tag (%(gotName)s). "
-          "Missing end tag (%(expectedName)s)."),
+        "Unexpected end tag (%(gotName)s). "
+        "Missing end tag (%(expectedName)s).",
     "end-tag-too-early":
-        _("End tag (%(name)s) seen too early. Expected other end tag."),
+        "End tag (%(name)s) seen too early. Expected other end tag.",
     "end-tag-too-early-named":
-        _("Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s)."),
+        "Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s).",
     "end-tag-too-early-ignored":
-        _("End tag (%(name)s) seen too early. Ignored."),
+        "End tag (%(name)s) seen too early. Ignored.",
     "adoption-agency-1.1":
-        _("End tag (%(name)s) violates step 1, "
-          "paragraph 1 of the adoption agency algorithm."),
+        "End tag (%(name)s) violates step 1, "
+        "paragraph 1 of the adoption agency algorithm.",
     "adoption-agency-1.2":
-        _("End tag (%(name)s) violates step 1, "
-          "paragraph 2 of the adoption agency algorithm."),
+        "End tag (%(name)s) violates step 1, "
+        "paragraph 2 of the adoption agency algorithm.",
     "adoption-agency-1.3":
-        _("End tag (%(name)s) violates step 1, "
-          "paragraph 3 of the adoption agency algorithm."),
+        "End tag (%(name)s) violates step 1, "
+        "paragraph 3 of the adoption agency algorithm.",
     "adoption-agency-4.4":
-        _("End tag (%(name)s) violates step 4, "
-          "paragraph 4 of the adoption agency algorithm."),
+        "End tag (%(name)s) violates step 4, "
+        "paragraph 4 of the adoption agency algorithm.",
     "unexpected-end-tag-treated-as":
-        _("Unexpected end tag (%(originalName)s). Treated as %(newName)s."),
+        "Unexpected end tag (%(originalName)s). Treated as %(newName)s.",
     "no-end-tag":
-        _("This element (%(name)s) has no end tag."),
+        "This element (%(name)s) has no end tag.",
     "unexpected-implied-end-tag-in-table":
-        _("Unexpected implied end tag (%(name)s) in the table phase."),
+        "Unexpected implied end tag (%(name)s) in the table phase.",
     "unexpected-implied-end-tag-in-table-body":
-        _("Unexpected implied end tag (%(name)s) in the table body phase."),
+        "Unexpected implied end tag (%(name)s) in the table body phase.",
     "unexpected-char-implies-table-voodoo":
-        _("Unexpected non-space characters in "
-          "table context caused voodoo mode."),
+        "Unexpected non-space characters in "
+        "table context caused voodoo mode.",
     "unexpected-hidden-input-in-table":
-        _("Unexpected input with type hidden in table context."),
+        "Unexpected input with type hidden in table context.",
     "unexpected-form-in-table":
-        _("Unexpected form in table context."),
+        "Unexpected form in table context.",
     "unexpected-start-tag-implies-table-voodoo":
-        _("Unexpected start tag (%(name)s) in "
-          "table context caused voodoo mode."),
+        "Unexpected start tag (%(name)s) in "
+        "table context caused voodoo mode.",
     "unexpected-end-tag-implies-table-voodoo":
-        _("Unexpected end tag (%(name)s) in "
-          "table context caused voodoo mode."),
+        "Unexpected end tag (%(name)s) in "
+        "table context caused voodoo mode.",
     "unexpected-cell-in-table-body":
-        _("Unexpected table cell start tag (%(name)s) "
-          "in the table body phase."),
+        "Unexpected table cell start tag (%(name)s) "
+        "in the table body phase.",
     "unexpected-cell-end-tag":
-        _("Got table cell end tag (%(name)s) "
-          "while required end tags are missing."),
+        "Got table cell end tag (%(name)s) "
+        "while required end tags are missing.",
     "unexpected-end-tag-in-table-body":
-        _("Unexpected end tag (%(name)s) in the table body phase. Ignored."),
+        "Unexpected end tag (%(name)s) in the table body phase. Ignored.",
     "unexpected-implied-end-tag-in-table-row":
-        _("Unexpected implied end tag (%(name)s) in the table row phase."),
+        "Unexpected implied end tag (%(name)s) in the table row phase.",
     "unexpected-end-tag-in-table-row":
-        _("Unexpected end tag (%(name)s) in the table row phase. Ignored."),
+        "Unexpected end tag (%(name)s) in the table row phase. Ignored.",
     "unexpected-select-in-select":
-        _("Unexpected select start tag in the select phase "
-          "treated as select end tag."),
+        "Unexpected select start tag in the select phase "
+        "treated as select end tag.",
     "unexpected-input-in-select":
-        _("Unexpected input start tag in the select phase."),
+        "Unexpected input start tag in the select phase.",
     "unexpected-start-tag-in-select":
-        _("Unexpected start tag token (%(name)s in the select phase. "
-          "Ignored."),
+        "Unexpected start tag token (%(name)s in the select phase. "
+        "Ignored.",
     "unexpected-end-tag-in-select":
-        _("Unexpected end tag (%(name)s) in the select phase. Ignored."),
+        "Unexpected end tag (%(name)s) in the select phase. Ignored.",
     "unexpected-table-element-start-tag-in-select-in-table":
-        _("Unexpected table element start tag (%(name)s) in the select in table phase."),
+        "Unexpected table element start tag (%(name)s) in the select in table phase.",
     "unexpected-table-element-end-tag-in-select-in-table":
-        _("Unexpected table element end tag (%(name)s) in the select in table phase."),
+        "Unexpected table element end tag (%(name)s) in the select in table phase.",
     "unexpected-char-after-body":
-        _("Unexpected non-space characters in the after body phase."),
+        "Unexpected non-space characters in the after body phase.",
     "unexpected-start-tag-after-body":
-        _("Unexpected start tag token (%(name)s)"
-          " in the after body phase."),
+        "Unexpected start tag token (%(name)s)"
+        " in the after body phase.",
     "unexpected-end-tag-after-body":
-        _("Unexpected end tag token (%(name)s)"
-          " in the after body phase."),
+        "Unexpected end tag token (%(name)s)"
+        " in the after body phase.",
     "unexpected-char-in-frameset":
-        _("Unexpected characters in the frameset phase. Characters ignored."),
+        "Unexpected characters in the frameset phase. Characters ignored.",
     "unexpected-start-tag-in-frameset":
-        _("Unexpected start tag token (%(name)s)"
-          " in the frameset phase. Ignored."),
+        "Unexpected start tag token (%(name)s)"
+        " in the frameset phase. Ignored.",
     "unexpected-frameset-in-frameset-innerhtml":
-        _("Unexpected end tag token (frameset) "
-          "in the frameset phase (innerHTML)."),
+        "Unexpected end tag token (frameset) "
+        "in the frameset phase (innerHTML).",
     "unexpected-end-tag-in-frameset":
-        _("Unexpected end tag token (%(name)s)"
-          " in the frameset phase. Ignored."),
+        "Unexpected end tag token (%(name)s)"
+        " in the frameset phase. Ignored.",
     "unexpected-char-after-frameset":
-        _("Unexpected non-space characters in the "
-          "after frameset phase. Ignored."),
+        "Unexpected non-space characters in the "
+        "after frameset phase. Ignored.",
     "unexpected-start-tag-after-frameset":
-        _("Unexpected start tag (%(name)s)"
-          " in the after frameset phase. Ignored."),
+        "Unexpected start tag (%(name)s)"
+        " in the after frameset phase. Ignored.",
     "unexpected-end-tag-after-frameset":
-        _("Unexpected end tag (%(name)s)"
-          " in the after frameset phase. Ignored."),
+        "Unexpected end tag (%(name)s)"
+        " in the after frameset phase. Ignored.",
     "unexpected-end-tag-after-body-innerhtml":
-        _("Unexpected end tag after body(innerHtml)"),
+        "Unexpected end tag after body(innerHtml)",
     "expected-eof-but-got-char":
-        _("Unexpected non-space characters. Expected end of file."),
+        "Unexpected non-space characters. Expected end of file.",
     "expected-eof-but-got-start-tag":
-        _("Unexpected start tag (%(name)s)"
-          ". Expected end of file."),
+        "Unexpected start tag (%(name)s)"
+        ". Expected end of file.",
     "expected-eof-but-got-end-tag":
-        _("Unexpected end tag (%(name)s)"
-          ". Expected end of file."),
+        "Unexpected end tag (%(name)s)"
+        ". Expected end of file.",
     "eof-in-table":
-        _("Unexpected end of file. Expected table content."),
+        "Unexpected end of file. Expected table content.",
     "eof-in-select":
-        _("Unexpected end of file. Expected select content."),
+        "Unexpected end of file. Expected select content.",
     "eof-in-frameset":
-        _("Unexpected end of file. Expected frameset content."),
+        "Unexpected end of file. Expected frameset content.",
     "eof-in-script-in-script":
-        _("Unexpected end of file. Expected script content."),
+        "Unexpected end of file. Expected script content.",
     "eof-in-foreign-lands":
-        _("Unexpected end of file. Expected foreign content"),
+        "Unexpected end of file. Expected foreign content",
     "non-void-element-with-trailing-solidus":
-        _("Trailing solidus not allowed on element %(name)s"),
+        "Trailing solidus not allowed on element %(name)s",
     "unexpected-html-element-in-foreign-content":
-        _("Element %(name)s not allowed in a non-html context"),
+        "Element %(name)s not allowed in a non-html context",
     "unexpected-end-tag-before-html":
-        _("Unexpected end tag (%(name)s) before html."),
+        "Unexpected end tag (%(name)s) before html.",
+    "unexpected-inhead-noscript-tag":
+        "Element %(name)s not allowed in a inhead-noscript context",
+    "eof-in-head-noscript":
+        "Unexpected end of file. Expected inhead-noscript content",
+    "char-in-head-noscript":
+        "Unexpected non-space character. Expected inhead-noscript content",
     "XXX-undefined-error":
-        _("Undefined error (this sucks and should be fixed)"),
+        "Undefined error (this sucks and should be fixed)",
 }
 
 namespaces = {
@@ -298,7 +302,7 @@ namespaces = {
     "xmlns": "http://www.w3.org/2000/xmlns/"
 }
 
-scopingElements = frozenset((
+scopingElements = frozenset([
     (namespaces["html"], "applet"),
     (namespaces["html"], "caption"),
     (namespaces["html"], "html"),
@@ -316,9 +320,9 @@ scopingElements = frozenset((
     (namespaces["svg"], "foreignObject"),
     (namespaces["svg"], "desc"),
     (namespaces["svg"], "title"),
-))
+])
 
-formattingElements = frozenset((
+formattingElements = frozenset([
     (namespaces["html"], "a"),
     (namespaces["html"], "b"),
     (namespaces["html"], "big"),
@@ -333,9 +337,9 @@ formattingElements = frozenset((
     (namespaces["html"], "strong"),
     (namespaces["html"], "tt"),
     (namespaces["html"], "u")
-))
+])
 
-specialElements = frozenset((
+specialElements = frozenset([
     (namespaces["html"], "address"),
     (namespaces["html"], "applet"),
     (namespaces["html"], "area"),
@@ -416,22 +420,89 @@ specialElements = frozenset((
     (namespaces["html"], "wbr"),
     (namespaces["html"], "xmp"),
     (namespaces["svg"], "foreignObject")
-))
+])
 
-htmlIntegrationPointElements = frozenset((
-    (namespaces["mathml"], "annotaion-xml"),
+htmlIntegrationPointElements = frozenset([
+    (namespaces["mathml"], "annotation-xml"),
     (namespaces["svg"], "foreignObject"),
     (namespaces["svg"], "desc"),
     (namespaces["svg"], "title")
-))
+])
 
-mathmlTextIntegrationPointElements = frozenset((
+mathmlTextIntegrationPointElements = frozenset([
     (namespaces["mathml"], "mi"),
     (namespaces["mathml"], "mo"),
     (namespaces["mathml"], "mn"),
     (namespaces["mathml"], "ms"),
     (namespaces["mathml"], "mtext")
-))
+])
+
+adjustSVGAttributes = {
+    "attributename": "attributeName",
+    "attributetype": "attributeType",
+    "basefrequency": "baseFrequency",
+    "baseprofile": "baseProfile",
+    "calcmode": "calcMode",
+    "clippathunits": "clipPathUnits",
+    "contentscripttype": "contentScriptType",
+    "contentstyletype": "contentStyleType",
+    "diffuseconstant": "diffuseConstant",
+    "edgemode": "edgeMode",
+    "externalresourcesrequired": "externalResourcesRequired",
+    "filterres": "filterRes",
+    "filterunits": "filterUnits",
+    "glyphref": "glyphRef",
+    "gradienttransform": "gradientTransform",
+    "gradientunits": "gradientUnits",
+    "kernelmatrix": "kernelMatrix",
+    "kernelunitlength": "kernelUnitLength",
+    "keypoints": "keyPoints",
+    "keysplines": "keySplines",
+    "keytimes": "keyTimes",
+    "lengthadjust": "lengthAdjust",
+    "limitingconeangle": "limitingConeAngle",
+    "markerheight": "markerHeight",
+    "markerunits": "markerUnits",
+    "markerwidth": "markerWidth",
+    "maskcontentunits": "maskContentUnits",
+    "maskunits": "maskUnits",
+    "numoctaves": "numOctaves",
+    "pathlength": "pathLength",
+    "patterncontentunits": "patternContentUnits",
+    "patterntransform": "patternTransform",
+    "patternunits": "patternUnits",
+    "pointsatx": "pointsAtX",
+    "pointsaty": "pointsAtY",
+    "pointsatz": "pointsAtZ",
+    "preservealpha": "preserveAlpha",
+    "preserveaspectratio": "preserveAspectRatio",
+    "primitiveunits": "primitiveUnits",
+    "refx": "refX",
+    "refy": "refY",
+    "repeatcount": "repeatCount",
+    "repeatdur": "repeatDur",
+    "requiredextensions": "requiredExtensions",
+    "requiredfeatures": "requiredFeatures",
+    "specularconstant": "specularConstant",
+    "specularexponent": "specularExponent",
+    "spreadmethod": "spreadMethod",
+    "startoffset": "startOffset",
+    "stddeviation": "stdDeviation",
+    "stitchtiles": "stitchTiles",
+    "surfacescale": "surfaceScale",
+    "systemlanguage": "systemLanguage",
+    "tablevalues": "tableValues",
+    "targetx": "targetX",
+    "targety": "targetY",
+    "textlength": "textLength",
+    "viewbox": "viewBox",
+    "viewtarget": "viewTarget",
+    "xchannelselector": "xChannelSelector",
+    "ychannelselector": "yChannelSelector",
+    "zoomandpan": "zoomAndPan"
+}
+
+adjustMathMLAttributes = {"definitionurl": "definitionURL"}
 
 adjustForeignAttributes = {
     "xlink:actuate": ("xlink", "actuate", namespaces["xlink"]),
@@ -448,24 +519,24 @@ adjustForeignAttributes = {
     "xmlns:xlink": ("xmlns", "xlink", namespaces["xmlns"])
 }
 
-unadjustForeignAttributes = dict([((ns, local), qname) for qname, (prefix, local, ns) in
-                                  adjustForeignAttributes.items()])
+unadjustForeignAttributes = {(ns, local): qname for qname, (prefix, local, ns) in
+                             adjustForeignAttributes.items()}
 
-spaceCharacters = frozenset((
+spaceCharacters = frozenset([
     "\t",
     "\n",
     "\u000C",
     " ",
     "\r"
-))
+])
 
-tableInsertModeElements = frozenset((
+tableInsertModeElements = frozenset([
     "table",
     "tbody",
     "tfoot",
     "thead",
     "tr"
-))
+])
 
 asciiLowercase = frozenset(string.ascii_lowercase)
 asciiUppercase = frozenset(string.ascii_uppercase)
@@ -473,8 +544,7 @@ asciiLetters = frozenset(string.ascii_letters)
 digits = frozenset(string.digits)
 hexDigits = frozenset(string.hexdigits)
 
-asciiUpper2Lower = dict([(ord(c), ord(c.lower()))
-                         for c in string.ascii_uppercase])
+asciiUpper2Lower = {ord(c): ord(c.lower()) for c in string.ascii_uppercase}
 
 # Heading elements need to be ordered
 headingElements = (
@@ -486,7 +556,7 @@ headingElements = (
     "h6"
 )
 
-voidElements = frozenset((
+voidElements = frozenset([
     "base",
     "command",
     "event-source",
@@ -502,11 +572,11 @@ voidElements = frozenset((
     "input",
     "source",
     "track"
-))
+])
 
-cdataElements = frozenset(('title', 'textarea'))
+cdataElements = frozenset(['title', 'textarea'])
 
-rcdataElements = frozenset((
+rcdataElements = frozenset([
     'style',
     'script',
     'xmp',
@@ -514,27 +584,28 @@ rcdataElements = frozenset((
     'noembed',
     'noframes',
     'noscript'
-))
+])
 
 booleanAttributes = {
-    "": frozenset(("irrelevant",)),
-    "style": frozenset(("scoped",)),
-    "img": frozenset(("ismap",)),
-    "audio": frozenset(("autoplay", "controls")),
-    "video": frozenset(("autoplay", "controls")),
-    "script": frozenset(("defer", "async")),
-    "details": frozenset(("open",)),
-    "datagrid": frozenset(("multiple", "disabled")),
-    "command": frozenset(("hidden", "disabled", "checked", "default")),
-    "hr": frozenset(("noshade")),
-    "menu": frozenset(("autosubmit",)),
-    "fieldset": frozenset(("disabled", "readonly")),
-    "option": frozenset(("disabled", "readonly", "selected")),
-    "optgroup": frozenset(("disabled", "readonly")),
-    "button": frozenset(("disabled", "autofocus")),
-    "input": frozenset(("disabled", "readonly", "required", "autofocus", "checked", "ismap")),
-    "select": frozenset(("disabled", "readonly", "autofocus", "multiple")),
-    "output": frozenset(("disabled", "readonly")),
+    "": frozenset(["irrelevant", "itemscope"]),
+    "style": frozenset(["scoped"]),
+    "img": frozenset(["ismap"]),
+    "audio": frozenset(["autoplay", "controls"]),
+    "video": frozenset(["autoplay", "controls"]),
+    "script": frozenset(["defer", "async"]),
+    "details": frozenset(["open"]),
+    "datagrid": frozenset(["multiple", "disabled"]),
+    "command": frozenset(["hidden", "disabled", "checked", "default"]),
+    "hr": frozenset(["noshade"]),
+    "menu": frozenset(["autosubmit"]),
+    "fieldset": frozenset(["disabled", "readonly"]),
+    "option": frozenset(["disabled", "readonly", "selected"]),
+    "optgroup": frozenset(["disabled", "readonly"]),
+    "button": frozenset(["disabled", "autofocus"]),
+    "input": frozenset(["disabled", "readonly", "required", "autofocus", "checked", "ismap"]),
+    "select": frozenset(["disabled", "readonly", "autofocus", "multiple"]),
+    "output": frozenset(["disabled", "readonly"]),
+    "iframe": frozenset(["seamless"]),
 }
 
 # entitiesWindows1252 has to be _ordered_ and needs to have an index. It
@@ -574,7 +645,7 @@ entitiesWindows1252 = (
     376     # 0x9F  0x0178  LATIN CAPITAL LETTER Y WITH DIAERESIS
 )
 
-xmlEntities = frozenset(('lt;', 'gt;', 'amp;', 'apos;', 'quot;'))
+xmlEntities = frozenset(['lt;', 'gt;', 'amp;', 'apos;', 'quot;'])
 
 entities = {
     "AElig": "\xc6",
@@ -2815,7 +2886,6 @@ replacementCharacters = {
     0x0d: "\u000D",
     0x80: "\u20AC",
     0x81: "\u0081",
-    0x81: "\u0081",
     0x82: "\u201A",
     0x83: "\u0192",
     0x84: "\u201E",
@@ -2848,235 +2918,6 @@ replacementCharacters = {
     0x9F: "\u0178",
 }
 
-encodings = {
-    '437': 'cp437',
-    '850': 'cp850',
-    '852': 'cp852',
-    '855': 'cp855',
-    '857': 'cp857',
-    '860': 'cp860',
-    '861': 'cp861',
-    '862': 'cp862',
-    '863': 'cp863',
-    '865': 'cp865',
-    '866': 'cp866',
-    '869': 'cp869',
-    'ansix341968': 'ascii',
-    'ansix341986': 'ascii',
-    'arabic': 'iso8859-6',
-    'ascii': 'ascii',
-    'asmo708': 'iso8859-6',
-    'big5': 'big5',
-    'big5hkscs': 'big5hkscs',
-    'chinese': 'gbk',
-    'cp037': 'cp037',
-    'cp1026': 'cp1026',
-    'cp154': 'ptcp154',
-    'cp367': 'ascii',
-    'cp424': 'cp424',
-    'cp437': 'cp437',
-    'cp500': 'cp500',
-    'cp775': 'cp775',
-    'cp819': 'windows-1252',
-    'cp850': 'cp850',
-    'cp852': 'cp852',
-    'cp855': 'cp855',
-    'cp857': 'cp857',
-    'cp860': 'cp860',
-    'cp861': 'cp861',
-    'cp862': 'cp862',
-    'cp863': 'cp863',
-    'cp864': 'cp864',
-    'cp865': 'cp865',
-    'cp866': 'cp866',
-    'cp869': 'cp869',
-    'cp936': 'gbk',
-    'cpgr': 'cp869',
-    'cpis': 'cp861',
-    'csascii': 'ascii',
-    'csbig5': 'big5',
-    'cseuckr': 'cp949',
-    'cseucpkdfmtjapanese': 'euc_jp',
-    'csgb2312': 'gbk',
-    'cshproman8': 'hp-roman8',
-    'csibm037': 'cp037',
-    'csibm1026': 'cp1026',
-    'csibm424': 'cp424',
-    'csibm500': 'cp500',
-    'csibm855': 'cp855',
-    'csibm857': 'cp857',
-    'csibm860': 'cp860',
-    'csibm861': 'cp861',
-    'csibm863': 'cp863',
-    'csibm864': 'cp864',
-    'csibm865': 'cp865',
-    'csibm866': 'cp866',
-    'csibm869': 'cp869',
-    'csiso2022jp': 'iso2022_jp',
-    'csiso2022jp2': 'iso2022_jp_2',
-    'csiso2022kr': 'iso2022_kr',
-    'csiso58gb231280': 'gbk',
-    'csisolatin1': 'windows-1252',
-    'csisolatin2': 'iso8859-2',
-    'csisolatin3': 'iso8859-3',
-    'csisolatin4': 'iso8859-4',
-    'csisolatin5': 'windows-1254',
-    'csisolatin6': 'iso8859-10',
-    'csisolatinarabic': 'iso8859-6',
-    'csisolatincyrillic': 'iso8859-5',
-    'csisolatingreek': 'iso8859-7',
-    'csisolatinhebrew': 'iso8859-8',
-    'cskoi8r': 'koi8-r',
-    'csksc56011987': 'cp949',
-    'cspc775baltic': 'cp775',
-    'cspc850multilingual': 'cp850',
-    'cspc862latinhebrew': 'cp862',
-    'cspc8codepage437': 'cp437',
-    'cspcp852': 'cp852',
-    'csptcp154': 'ptcp154',
-    'csshiftjis': 'shift_jis',
-    'csunicode11utf7': 'utf-7',
-    'cyrillic': 'iso8859-5',
-    'cyrillicasian': 'ptcp154',
-    'ebcdiccpbe': 'cp500',
-    'ebcdiccpca': 'cp037',
-    'ebcdiccpch': 'cp500',
-    'ebcdiccphe': 'cp424',
-    'ebcdiccpnl': 'cp037',
-    'ebcdiccpus': 'cp037',
-    'ebcdiccpwt': 'cp037',
-    'ecma114': 'iso8859-6',
-    'ecma118': 'iso8859-7',
-    'elot928': 'iso8859-7',
-    'eucjp': 'euc_jp',
-    'euckr': 'cp949',
-    'extendedunixcodepackedformatforjapanese': 'euc_jp',
-    'gb18030': 'gb18030',
-    'gb2312': 'gbk',
-    'gb231280': 'gbk',
-    'gbk': 'gbk',
-    'greek': 'iso8859-7',
-    'greek8': 'iso8859-7',
-    'hebrew': 'iso8859-8',
-    'hproman8': 'hp-roman8',
-    'hzgb2312': 'hz',
-    'ibm037': 'cp037',
-    'ibm1026': 'cp1026',
-    'ibm367': 'ascii',
-    'ibm424': 'cp424',
-    'ibm437': 'cp437',
-    'ibm500': 'cp500',
-    'ibm775': 'cp775',
-    'ibm819': 'windows-1252',
-    'ibm850': 'cp850',
-    'ibm852': 'cp852',
-    'ibm855': 'cp855',
-    'ibm857': 'cp857',
-    'ibm860': 'cp860',
-    'ibm861': 'cp861',
-    'ibm862': 'cp862',
-    'ibm863': 'cp863',
-    'ibm864': 'cp864',
-    'ibm865': 'cp865',
-    'ibm866': 'cp866',
-    'ibm869': 'cp869',
-    'iso2022jp': 'iso2022_jp',
-    'iso2022jp2': 'iso2022_jp_2',
-    'iso2022kr': 'iso2022_kr',
-    'iso646irv1991': 'ascii',
-    'iso646us': 'ascii',
-    'iso88591': 'windows-1252',
-    'iso885910': 'iso8859-10',
-    'iso8859101992': 'iso8859-10',
-    'iso885911987': 'windows-1252',
-    'iso885913': 'iso8859-13',
-    'iso885914': 'iso8859-14',
-    'iso8859141998': 'iso8859-14',
-    'iso885915': 'iso8859-15',
-    'iso885916': 'iso8859-16',
-    'iso8859162001': 'iso8859-16',
-    'iso88592': 'iso8859-2',
-    'iso885921987': 'iso8859-2',
-    'iso88593': 'iso8859-3',
-    'iso885931988': 'iso8859-3',
-    'iso88594': 'iso8859-4',
-    'iso885941988': 'iso8859-4',
-    'iso88595': 'iso8859-5',
-    'iso885951988': 'iso8859-5',
-    'iso88596': 'iso8859-6',
-    'iso885961987': 'iso8859-6',
-    'iso88597': 'iso8859-7',
-    'iso885971987': 'iso8859-7',
-    'iso88598': 'iso8859-8',
-    'iso885981988': 'iso8859-8',
-    'iso88599': 'windows-1254',
-    'iso885991989': 'windows-1254',
-    'isoceltic': 'iso8859-14',
-    'isoir100': 'windows-1252',
-    'isoir101': 'iso8859-2',
-    'isoir109': 'iso8859-3',
-    'isoir110': 'iso8859-4',
-    'isoir126': 'iso8859-7',
-    'isoir127': 'iso8859-6',
-    'isoir138': 'iso8859-8',
-    'isoir144': 'iso8859-5',
-    'isoir148': 'windows-1254',
-    'isoir149': 'cp949',
-    'isoir157': 'iso8859-10',
-    'isoir199': 'iso8859-14',
-    'isoir226': 'iso8859-16',
-    'isoir58': 'gbk',
-    'isoir6': 'ascii',
-    'koi8r': 'koi8-r',
-    'koi8u': 'koi8-u',
-    'korean': 'cp949',
-    'ksc5601': 'cp949',
-    'ksc56011987': 'cp949',
-    'ksc56011989': 'cp949',
-    'l1': 'windows-1252',
-    'l10': 'iso8859-16',
-    'l2': 'iso8859-2',
-    'l3': 'iso8859-3',
-    'l4': 'iso8859-4',
-    'l5': 'windows-1254',
-    'l6': 'iso8859-10',
-    'l8': 'iso8859-14',
-    'latin1': 'windows-1252',
-    'latin10': 'iso8859-16',
-    'latin2': 'iso8859-2',
-    'latin3': 'iso8859-3',
-    'latin4': 'iso8859-4',
-    'latin5': 'windows-1254',
-    'latin6': 'iso8859-10',
-    'latin8': 'iso8859-14',
-    'latin9': 'iso8859-15',
-    'ms936': 'gbk',
-    'mskanji': 'shift_jis',
-    'pt154': 'ptcp154',
-    'ptcp154': 'ptcp154',
-    'r8': 'hp-roman8',
-    'roman8': 'hp-roman8',
-    'shiftjis': 'shift_jis',
-    'tis620': 'cp874',
-    'unicode11utf7': 'utf-7',
-    'us': 'ascii',
-    'usascii': 'ascii',
-    'utf16': 'utf-16',
-    'utf16be': 'utf-16-be',
-    'utf16le': 'utf-16-le',
-    'utf8': 'utf-8',
-    'windows1250': 'cp1250',
-    'windows1251': 'cp1251',
-    'windows1252': 'cp1252',
-    'windows1253': 'cp1253',
-    'windows1254': 'cp1254',
-    'windows1255': 'cp1255',
-    'windows1256': 'cp1256',
-    'windows1257': 'cp1257',
-    'windows1258': 'cp1258',
-    'windows936': 'gbk',
-    'x-x-big5': 'big5'}
-
 tokenTypes = {
     "Doctype": 0,
     "Characters": 1,
@@ -3088,17 +2929,18 @@ tokenTypes = {
     "ParseError": 7
 }
 
-tagTokenTypes = frozenset((tokenTypes["StartTag"], tokenTypes["EndTag"],
-                           tokenTypes["EmptyTag"]))
+tagTokenTypes = frozenset([tokenTypes["StartTag"], tokenTypes["EndTag"],
+                           tokenTypes["EmptyTag"]])
 
 
-prefixes = dict([(v, k) for k, v in namespaces.items()])
+prefixes = {v: k for k, v in namespaces.items()}
 prefixes["http://www.w3.org/1998/Math/MathML"] = "math"
 
 
 class DataLossWarning(UserWarning):
+    """Raised when the current tree is unable to represent the input data"""
     pass
 
 
-class ReparseException(Exception):
+class _ReparseException(Exception):
     pass
diff --git a/lib/html5lib/filters/alphabeticalattributes.py b/lib/html5lib/filters/alphabeticalattributes.py
index fed6996c..5ba926e3 100644
--- a/lib/html5lib/filters/alphabeticalattributes.py
+++ b/lib/html5lib/filters/alphabeticalattributes.py
@@ -1,20 +1,29 @@
 from __future__ import absolute_import, division, unicode_literals
 
-from . import _base
+from . import base
 
-try:
-    from collections import OrderedDict
-except ImportError:
-    from ordereddict import OrderedDict
+from collections import OrderedDict
 
 
-class Filter(_base.Filter):
+def _attr_key(attr):
+    """Return an appropriate key for an attribute for sorting
+
+    Attributes have a namespace that can be either ``None`` or a string. We
+    can't compare the two because they're different types, so we convert
+    ``None`` to an empty string first.
+
+    """
+    return (attr[0][0] or ''), attr[0][1]
+
+
+class Filter(base.Filter):
+    """Alphabetizes attributes for elements"""
     def __iter__(self):
-        for token in _base.Filter.__iter__(self):
+        for token in base.Filter.__iter__(self):
             if token["type"] in ("StartTag", "EmptyTag"):
                 attrs = OrderedDict()
                 for name, value in sorted(token["data"].items(),
-                                          key=lambda x: x[0]):
+                                          key=_attr_key):
                     attrs[name] = value
                 token["data"] = attrs
             yield token
diff --git a/lib/html5lib/filters/_base.py b/lib/html5lib/filters/base.py
similarity index 100%
rename from lib/html5lib/filters/_base.py
rename to lib/html5lib/filters/base.py
diff --git a/lib/html5lib/filters/inject_meta_charset.py b/lib/html5lib/filters/inject_meta_charset.py
index ca33b70b..aefb5c84 100644
--- a/lib/html5lib/filters/inject_meta_charset.py
+++ b/lib/html5lib/filters/inject_meta_charset.py
@@ -1,11 +1,19 @@
 from __future__ import absolute_import, division, unicode_literals
 
-from . import _base
+from . import base
 
 
-class Filter(_base.Filter):
+class Filter(base.Filter):
+    """Injects ``<meta charset=ENCODING>`` tag into head of document"""
     def __init__(self, source, encoding):
-        _base.Filter.__init__(self, source)
+        """Creates a Filter
+
+        :arg source: the source token stream
+
+        :arg encoding: the encoding to set
+
+        """
+        base.Filter.__init__(self, source)
         self.encoding = encoding
 
     def __iter__(self):
@@ -13,7 +21,7 @@ class Filter(_base.Filter):
         meta_found = (self.encoding is None)
         pending = []
 
-        for token in _base.Filter.__iter__(self):
+        for token in base.Filter.__iter__(self):
             type = token["type"]
             if type == "StartTag":
                 if token["name"].lower() == "head":
diff --git a/lib/html5lib/filters/lint.py b/lib/html5lib/filters/lint.py
index 7cc99a4b..acd4d7a2 100644
--- a/lib/html5lib/filters/lint.py
+++ b/lib/html5lib/filters/lint.py
@@ -1,93 +1,93 @@
 from __future__ import absolute_import, division, unicode_literals
 
-from gettext import gettext
-_ = gettext
+from six import text_type
 
-from . import _base
-from ..constants import cdataElements, rcdataElements, voidElements
+from . import base
+from ..constants import namespaces, voidElements
 
 from ..constants import spaceCharacters
 spaceCharacters = "".join(spaceCharacters)
 
 
-class LintError(Exception):
-    pass
+class Filter(base.Filter):
+    """Lints the token stream for errors
 
+    If it finds any errors, it'll raise an ``AssertionError``.
+
+    """
+    def __init__(self, source, require_matching_tags=True):
+        """Creates a Filter
+
+        :arg source: the source token stream
+
+        :arg require_matching_tags: whether or not to require matching tags
+
+        """
+        super(Filter, self).__init__(source)
+        self.require_matching_tags = require_matching_tags
 
-class Filter(_base.Filter):
     def __iter__(self):
         open_elements = []
-        contentModelFlag = "PCDATA"
-        for token in _base.Filter.__iter__(self):
+        for token in base.Filter.__iter__(self):
             type = token["type"]
             if type in ("StartTag", "EmptyTag"):
+                namespace = token["namespace"]
                 name = token["name"]
-                if contentModelFlag != "PCDATA":
-                    raise LintError(_("StartTag not in PCDATA content model flag: %(tag)s") % {"tag": name})
-                if not isinstance(name, str):
-                    raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name})
-                if not name:
-                    raise LintError(_("Empty tag name"))
-                if type == "StartTag" and name in voidElements:
-                    raise LintError(_("Void element reported as StartTag token: %(tag)s") % {"tag": name})
-                elif type == "EmptyTag" and name not in voidElements:
-                    raise LintError(_("Non-void element reported as EmptyTag token: %(tag)s") % {"tag": token["name"]})
-                if type == "StartTag":
-                    open_elements.append(name)
-                for name, value in token["data"]:
-                    if not isinstance(name, str):
-                        raise LintError(_("Attribute name is not a string: %(name)r") % {"name": name})
-                    if not name:
-                        raise LintError(_("Empty attribute name"))
-                    if not isinstance(value, str):
-                        raise LintError(_("Attribute value is not a string: %(value)r") % {"value": value})
-                if name in cdataElements:
-                    contentModelFlag = "CDATA"
-                elif name in rcdataElements:
-                    contentModelFlag = "RCDATA"
-                elif name == "plaintext":
-                    contentModelFlag = "PLAINTEXT"
+                assert namespace is None or isinstance(namespace, text_type)
+                assert namespace != ""
+                assert isinstance(name, text_type)
+                assert name != ""
+                assert isinstance(token["data"], dict)
+                if (not namespace or namespace == namespaces["html"]) and name in voidElements:
+                    assert type == "EmptyTag"
+                else:
+                    assert type == "StartTag"
+                if type == "StartTag" and self.require_matching_tags:
+                    open_elements.append((namespace, name))
+                for (namespace, name), value in token["data"].items():
+                    assert namespace is None or isinstance(namespace, text_type)
+                    assert namespace != ""
+                    assert isinstance(name, text_type)
+                    assert name != ""
+                    assert isinstance(value, text_type)
 
             elif type == "EndTag":
+                namespace = token["namespace"]
                 name = token["name"]
-                if not isinstance(name, str):
-                    raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name})
-                if not name:
-                    raise LintError(_("Empty tag name"))
-                if name in voidElements:
-                    raise LintError(_("Void element reported as EndTag token: %(tag)s") % {"tag": name})
-                start_name = open_elements.pop()
-                if start_name != name:
-                    raise LintError(_("EndTag (%(end)s) does not match StartTag (%(start)s)") % {"end": name, "start": start_name})
-                contentModelFlag = "PCDATA"
+                assert namespace is None or isinstance(namespace, text_type)
+                assert namespace != ""
+                assert isinstance(name, text_type)
+                assert name != ""
+                if (not namespace or namespace == namespaces["html"]) and name in voidElements:
+                    assert False, "Void element reported as EndTag token: %(tag)s" % {"tag": name}
+                elif self.require_matching_tags:
+                    start = open_elements.pop()
+                    assert start == (namespace, name)
 
             elif type == "Comment":
-                if contentModelFlag != "PCDATA":
-                    raise LintError(_("Comment not in PCDATA content model flag"))
+                data = token["data"]
+                assert isinstance(data, text_type)
 
             elif type in ("Characters", "SpaceCharacters"):
                 data = token["data"]
-                if not isinstance(data, str):
-                    raise LintError(_("Attribute name is not a string: %(name)r") % {"name": data})
-                if not data:
-                    raise LintError(_("%(type)s token with empty data") % {"type": type})
+                assert isinstance(data, text_type)
+                assert data != ""
                 if type == "SpaceCharacters":
-                    data = data.strip(spaceCharacters)
-                    if data:
-                        raise LintError(_("Non-space character(s) found in SpaceCharacters token: %(token)r") % {"token": data})
+                    assert data.strip(spaceCharacters) == ""
 
             elif type == "Doctype":
                 name = token["name"]
-                if contentModelFlag != "PCDATA":
-                    raise LintError(_("Doctype not in PCDATA content model flag: %(name)s") % {"name": name})
-                if not isinstance(name, str):
-                    raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name})
-                # XXX: what to do with token["data"] ?
+                assert name is None or isinstance(name, text_type)
+                assert token["publicId"] is None or isinstance(name, text_type)
+                assert token["systemId"] is None or isinstance(name, text_type)
 
-            elif type in ("ParseError", "SerializeError"):
-                pass
+            elif type == "Entity":
+                assert isinstance(token["name"], text_type)
+
+            elif type == "SerializerError":
+                assert isinstance(token["data"], text_type)
 
             else:
-                raise LintError(_("Unknown token type: %(type)s") % {"type": type})
+                assert False, "Unknown token type: %(type)s" % {"type": type}
 
             yield token
diff --git a/lib/html5lib/filters/optionaltags.py b/lib/html5lib/filters/optionaltags.py
index fefe0b30..4a865012 100644
--- a/lib/html5lib/filters/optionaltags.py
+++ b/lib/html5lib/filters/optionaltags.py
@@ -1,9 +1,10 @@
 from __future__ import absolute_import, division, unicode_literals
 
-from . import _base
+from . import base
 
 
-class Filter(_base.Filter):
+class Filter(base.Filter):
+    """Removes optional tags from the token stream"""
     def slider(self):
         previous1 = previous2 = None
         for token in self.source:
@@ -11,7 +12,8 @@ class Filter(_base.Filter):
                 yield previous2, previous1, token
             previous2 = previous1
             previous1 = token
-        yield previous2, previous1, None
+        if previous1 is not None:
+            yield previous2, previous1, None
 
     def __iter__(self):
         for previous, token, next in self.slider():
@@ -58,7 +60,7 @@ class Filter(_base.Filter):
         elif tagname == 'colgroup':
             # A colgroup element's start tag may be omitted if the first thing
             # inside the colgroup element is a col element, and if the element
-            # is not immediately preceeded by another colgroup element whose
+            # is not immediately preceded by another colgroup element whose
             # end tag has been omitted.
             if type in ("StartTag", "EmptyTag"):
                 # XXX: we do not look at the preceding event, so instead we never
@@ -70,7 +72,7 @@ class Filter(_base.Filter):
         elif tagname == 'tbody':
             # A tbody element's start tag may be omitted if the first thing
             # inside the tbody element is a tr element, and if the element is
-            # not immediately preceeded by a tbody, thead, or tfoot element
+            # not immediately preceded by a tbody, thead, or tfoot element
             # whose end tag has been omitted.
             if type == "StartTag":
                 # omit the thead and tfoot elements' end tag when they are
diff --git a/lib/html5lib/filters/sanitizer.py b/lib/html5lib/filters/sanitizer.py
index b206b54e..70ef9066 100644
--- a/lib/html5lib/filters/sanitizer.py
+++ b/lib/html5lib/filters/sanitizer.py
@@ -1,12 +1,916 @@
+"""Deprecated from html5lib 1.1.
+
+See `here <https://github.com/html5lib/html5lib-python/issues/443>`_ for
+information about its deprecation; `Bleach <https://github.com/mozilla/bleach>`_
+is recommended as a replacement. Please let us know in the aforementioned issue
+if Bleach is unsuitable for your needs.
+
+"""
 from __future__ import absolute_import, division, unicode_literals
 
-from . import _base
-from ..sanitizer import HTMLSanitizerMixin
+import re
+import warnings
+from xml.sax.saxutils import escape, unescape
+
+from six.moves import urllib_parse as urlparse
+
+from . import base
+from ..constants import namespaces, prefixes
+
+__all__ = ["Filter"]
 
 
-class Filter(_base.Filter, HTMLSanitizerMixin):
+_deprecation_msg = (
+    "html5lib's sanitizer is deprecated; see " +
+    "https://github.com/html5lib/html5lib-python/issues/443 and please let " +
+    "us know if Bleach is unsuitable for your needs"
+)
+
+warnings.warn(_deprecation_msg, DeprecationWarning)
+
+allowed_elements = frozenset((
+    (namespaces['html'], 'a'),
+    (namespaces['html'], 'abbr'),
+    (namespaces['html'], 'acronym'),
+    (namespaces['html'], 'address'),
+    (namespaces['html'], 'area'),
+    (namespaces['html'], 'article'),
+    (namespaces['html'], 'aside'),
+    (namespaces['html'], 'audio'),
+    (namespaces['html'], 'b'),
+    (namespaces['html'], 'big'),
+    (namespaces['html'], 'blockquote'),
+    (namespaces['html'], 'br'),
+    (namespaces['html'], 'button'),
+    (namespaces['html'], 'canvas'),
+    (namespaces['html'], 'caption'),
+    (namespaces['html'], 'center'),
+    (namespaces['html'], 'cite'),
+    (namespaces['html'], 'code'),
+    (namespaces['html'], 'col'),
+    (namespaces['html'], 'colgroup'),
+    (namespaces['html'], 'command'),
+    (namespaces['html'], 'datagrid'),
+    (namespaces['html'], 'datalist'),
+    (namespaces['html'], 'dd'),
+    (namespaces['html'], 'del'),
+    (namespaces['html'], 'details'),
+    (namespaces['html'], 'dfn'),
+    (namespaces['html'], 'dialog'),
+    (namespaces['html'], 'dir'),
+    (namespaces['html'], 'div'),
+    (namespaces['html'], 'dl'),
+    (namespaces['html'], 'dt'),
+    (namespaces['html'], 'em'),
+    (namespaces['html'], 'event-source'),
+    (namespaces['html'], 'fieldset'),
+    (namespaces['html'], 'figcaption'),
+    (namespaces['html'], 'figure'),
+    (namespaces['html'], 'footer'),
+    (namespaces['html'], 'font'),
+    (namespaces['html'], 'form'),
+    (namespaces['html'], 'header'),
+    (namespaces['html'], 'h1'),
+    (namespaces['html'], 'h2'),
+    (namespaces['html'], 'h3'),
+    (namespaces['html'], 'h4'),
+    (namespaces['html'], 'h5'),
+    (namespaces['html'], 'h6'),
+    (namespaces['html'], 'hr'),
+    (namespaces['html'], 'i'),
+    (namespaces['html'], 'img'),
+    (namespaces['html'], 'input'),
+    (namespaces['html'], 'ins'),
+    (namespaces['html'], 'keygen'),
+    (namespaces['html'], 'kbd'),
+    (namespaces['html'], 'label'),
+    (namespaces['html'], 'legend'),
+    (namespaces['html'], 'li'),
+    (namespaces['html'], 'm'),
+    (namespaces['html'], 'map'),
+    (namespaces['html'], 'menu'),
+    (namespaces['html'], 'meter'),
+    (namespaces['html'], 'multicol'),
+    (namespaces['html'], 'nav'),
+    (namespaces['html'], 'nextid'),
+    (namespaces['html'], 'ol'),
+    (namespaces['html'], 'output'),
+    (namespaces['html'], 'optgroup'),
+    (namespaces['html'], 'option'),
+    (namespaces['html'], 'p'),
+    (namespaces['html'], 'pre'),
+    (namespaces['html'], 'progress'),
+    (namespaces['html'], 'q'),
+    (namespaces['html'], 's'),
+    (namespaces['html'], 'samp'),
+    (namespaces['html'], 'section'),
+    (namespaces['html'], 'select'),
+    (namespaces['html'], 'small'),
+    (namespaces['html'], 'sound'),
+    (namespaces['html'], 'source'),
+    (namespaces['html'], 'spacer'),
+    (namespaces['html'], 'span'),
+    (namespaces['html'], 'strike'),
+    (namespaces['html'], 'strong'),
+    (namespaces['html'], 'sub'),
+    (namespaces['html'], 'sup'),
+    (namespaces['html'], 'table'),
+    (namespaces['html'], 'tbody'),
+    (namespaces['html'], 'td'),
+    (namespaces['html'], 'textarea'),
+    (namespaces['html'], 'time'),
+    (namespaces['html'], 'tfoot'),
+    (namespaces['html'], 'th'),
+    (namespaces['html'], 'thead'),
+    (namespaces['html'], 'tr'),
+    (namespaces['html'], 'tt'),
+    (namespaces['html'], 'u'),
+    (namespaces['html'], 'ul'),
+    (namespaces['html'], 'var'),
+    (namespaces['html'], 'video'),
+    (namespaces['mathml'], 'maction'),
+    (namespaces['mathml'], 'math'),
+    (namespaces['mathml'], 'merror'),
+    (namespaces['mathml'], 'mfrac'),
+    (namespaces['mathml'], 'mi'),
+    (namespaces['mathml'], 'mmultiscripts'),
+    (namespaces['mathml'], 'mn'),
+    (namespaces['mathml'], 'mo'),
+    (namespaces['mathml'], 'mover'),
+    (namespaces['mathml'], 'mpadded'),
+    (namespaces['mathml'], 'mphantom'),
+    (namespaces['mathml'], 'mprescripts'),
+    (namespaces['mathml'], 'mroot'),
+    (namespaces['mathml'], 'mrow'),
+    (namespaces['mathml'], 'mspace'),
+    (namespaces['mathml'], 'msqrt'),
+    (namespaces['mathml'], 'mstyle'),
+    (namespaces['mathml'], 'msub'),
+    (namespaces['mathml'], 'msubsup'),
+    (namespaces['mathml'], 'msup'),
+    (namespaces['mathml'], 'mtable'),
+    (namespaces['mathml'], 'mtd'),
+    (namespaces['mathml'], 'mtext'),
+    (namespaces['mathml'], 'mtr'),
+    (namespaces['mathml'], 'munder'),
+    (namespaces['mathml'], 'munderover'),
+    (namespaces['mathml'], 'none'),
+    (namespaces['svg'], 'a'),
+    (namespaces['svg'], 'animate'),
+    (namespaces['svg'], 'animateColor'),
+    (namespaces['svg'], 'animateMotion'),
+    (namespaces['svg'], 'animateTransform'),
+    (namespaces['svg'], 'clipPath'),
+    (namespaces['svg'], 'circle'),
+    (namespaces['svg'], 'defs'),
+    (namespaces['svg'], 'desc'),
+    (namespaces['svg'], 'ellipse'),
+    (namespaces['svg'], 'font-face'),
+    (namespaces['svg'], 'font-face-name'),
+    (namespaces['svg'], 'font-face-src'),
+    (namespaces['svg'], 'g'),
+    (namespaces['svg'], 'glyph'),
+    (namespaces['svg'], 'hkern'),
+    (namespaces['svg'], 'linearGradient'),
+    (namespaces['svg'], 'line'),
+    (namespaces['svg'], 'marker'),
+    (namespaces['svg'], 'metadata'),
+    (namespaces['svg'], 'missing-glyph'),
+    (namespaces['svg'], 'mpath'),
+    (namespaces['svg'], 'path'),
+    (namespaces['svg'], 'polygon'),
+    (namespaces['svg'], 'polyline'),
+    (namespaces['svg'], 'radialGradient'),
+    (namespaces['svg'], 'rect'),
+    (namespaces['svg'], 'set'),
+    (namespaces['svg'], 'stop'),
+    (namespaces['svg'], 'svg'),
+    (namespaces['svg'], 'switch'),
+    (namespaces['svg'], 'text'),
+    (namespaces['svg'], 'title'),
+    (namespaces['svg'], 'tspan'),
+    (namespaces['svg'], 'use'),
+))
+
+allowed_attributes = frozenset((
+    # HTML attributes
+    (None, 'abbr'),
+    (None, 'accept'),
+    (None, 'accept-charset'),
+    (None, 'accesskey'),
+    (None, 'action'),
+    (None, 'align'),
+    (None, 'alt'),
+    (None, 'autocomplete'),
+    (None, 'autofocus'),
+    (None, 'axis'),
+    (None, 'background'),
+    (None, 'balance'),
+    (None, 'bgcolor'),
+    (None, 'bgproperties'),
+    (None, 'border'),
+    (None, 'bordercolor'),
+    (None, 'bordercolordark'),
+    (None, 'bordercolorlight'),
+    (None, 'bottompadding'),
+    (None, 'cellpadding'),
+    (None, 'cellspacing'),
+    (None, 'ch'),
+    (None, 'challenge'),
+    (None, 'char'),
+    (None, 'charoff'),
+    (None, 'choff'),
+    (None, 'charset'),
+    (None, 'checked'),
+    (None, 'cite'),
+    (None, 'class'),
+    (None, 'clear'),
+    (None, 'color'),
+    (None, 'cols'),
+    (None, 'colspan'),
+    (None, 'compact'),
+    (None, 'contenteditable'),
+    (None, 'controls'),
+    (None, 'coords'),
+    (None, 'data'),
+    (None, 'datafld'),
+    (None, 'datapagesize'),
+    (None, 'datasrc'),
+    (None, 'datetime'),
+    (None, 'default'),
+    (None, 'delay'),
+    (None, 'dir'),
+    (None, 'disabled'),
+    (None, 'draggable'),
+    (None, 'dynsrc'),
+    (None, 'enctype'),
+    (None, 'end'),
+    (None, 'face'),
+    (None, 'for'),
+    (None, 'form'),
+    (None, 'frame'),
+    (None, 'galleryimg'),
+    (None, 'gutter'),
+    (None, 'headers'),
+    (None, 'height'),
+    (None, 'hidefocus'),
+    (None, 'hidden'),
+    (None, 'high'),
+    (None, 'href'),
+    (None, 'hreflang'),
+    (None, 'hspace'),
+    (None, 'icon'),
+    (None, 'id'),
+    (None, 'inputmode'),
+    (None, 'ismap'),
+    (None, 'keytype'),
+    (None, 'label'),
+    (None, 'leftspacing'),
+    (None, 'lang'),
+    (None, 'list'),
+    (None, 'longdesc'),
+    (None, 'loop'),
+    (None, 'loopcount'),
+    (None, 'loopend'),
+    (None, 'loopstart'),
+    (None, 'low'),
+    (None, 'lowsrc'),
+    (None, 'max'),
+    (None, 'maxlength'),
+    (None, 'media'),
+    (None, 'method'),
+    (None, 'min'),
+    (None, 'multiple'),
+    (None, 'name'),
+    (None, 'nohref'),
+    (None, 'noshade'),
+    (None, 'nowrap'),
+    (None, 'open'),
+    (None, 'optimum'),
+    (None, 'pattern'),
+    (None, 'ping'),
+    (None, 'point-size'),
+    (None, 'poster'),
+    (None, 'pqg'),
+    (None, 'preload'),
+    (None, 'prompt'),
+    (None, 'radiogroup'),
+    (None, 'readonly'),
+    (None, 'rel'),
+    (None, 'repeat-max'),
+    (None, 'repeat-min'),
+    (None, 'replace'),
+    (None, 'required'),
+    (None, 'rev'),
+    (None, 'rightspacing'),
+    (None, 'rows'),
+    (None, 'rowspan'),
+    (None, 'rules'),
+    (None, 'scope'),
+    (None, 'selected'),
+    (None, 'shape'),
+    (None, 'size'),
+    (None, 'span'),
+    (None, 'src'),
+    (None, 'start'),
+    (None, 'step'),
+    (None, 'style'),
+    (None, 'summary'),
+    (None, 'suppress'),
+    (None, 'tabindex'),
+    (None, 'target'),
+    (None, 'template'),
+    (None, 'title'),
+    (None, 'toppadding'),
+    (None, 'type'),
+    (None, 'unselectable'),
+    (None, 'usemap'),
+    (None, 'urn'),
+    (None, 'valign'),
+    (None, 'value'),
+    (None, 'variable'),
+    (None, 'volume'),
+    (None, 'vspace'),
+    (None, 'vrml'),
+    (None, 'width'),
+    (None, 'wrap'),
+    (namespaces['xml'], 'lang'),
+    # MathML attributes
+    (None, 'actiontype'),
+    (None, 'align'),
+    (None, 'columnalign'),
+    (None, 'columnalign'),
+    (None, 'columnalign'),
+    (None, 'columnlines'),
+    (None, 'columnspacing'),
+    (None, 'columnspan'),
+    (None, 'depth'),
+    (None, 'display'),
+    (None, 'displaystyle'),
+    (None, 'equalcolumns'),
+    (None, 'equalrows'),
+    (None, 'fence'),
+    (None, 'fontstyle'),
+    (None, 'fontweight'),
+    (None, 'frame'),
+    (None, 'height'),
+    (None, 'linethickness'),
+    (None, 'lspace'),
+    (None, 'mathbackground'),
+    (None, 'mathcolor'),
+    (None, 'mathvariant'),
+    (None, 'mathvariant'),
+    (None, 'maxsize'),
+    (None, 'minsize'),
+    (None, 'other'),
+    (None, 'rowalign'),
+    (None, 'rowalign'),
+    (None, 'rowalign'),
+    (None, 'rowlines'),
+    (None, 'rowspacing'),
+    (None, 'rowspan'),
+    (None, 'rspace'),
+    (None, 'scriptlevel'),
+    (None, 'selection'),
+    (None, 'separator'),
+    (None, 'stretchy'),
+    (None, 'width'),
+    (None, 'width'),
+    (namespaces['xlink'], 'href'),
+    (namespaces['xlink'], 'show'),
+    (namespaces['xlink'], 'type'),
+    # SVG attributes
+    (None, 'accent-height'),
+    (None, 'accumulate'),
+    (None, 'additive'),
+    (None, 'alphabetic'),
+    (None, 'arabic-form'),
+    (None, 'ascent'),
+    (None, 'attributeName'),
+    (None, 'attributeType'),
+    (None, 'baseProfile'),
+    (None, 'bbox'),
+    (None, 'begin'),
+    (None, 'by'),
+    (None, 'calcMode'),
+    (None, 'cap-height'),
+    (None, 'class'),
+    (None, 'clip-path'),
+    (None, 'color'),
+    (None, 'color-rendering'),
+    (None, 'content'),
+    (None, 'cx'),
+    (None, 'cy'),
+    (None, 'd'),
+    (None, 'dx'),
+    (None, 'dy'),
+    (None, 'descent'),
+    (None, 'display'),
+    (None, 'dur'),
+    (None, 'end'),
+    (None, 'fill'),
+    (None, 'fill-opacity'),
+    (None, 'fill-rule'),
+    (None, 'font-family'),
+    (None, 'font-size'),
+    (None, 'font-stretch'),
+    (None, 'font-style'),
+    (None, 'font-variant'),
+    (None, 'font-weight'),
+    (None, 'from'),
+    (None, 'fx'),
+    (None, 'fy'),
+    (None, 'g1'),
+    (None, 'g2'),
+    (None, 'glyph-name'),
+    (None, 'gradientUnits'),
+    (None, 'hanging'),
+    (None, 'height'),
+    (None, 'horiz-adv-x'),
+    (None, 'horiz-origin-x'),
+    (None, 'id'),
+    (None, 'ideographic'),
+    (None, 'k'),
+    (None, 'keyPoints'),
+    (None, 'keySplines'),
+    (None, 'keyTimes'),
+    (None, 'lang'),
+    (None, 'marker-end'),
+    (None, 'marker-mid'),
+    (None, 'marker-start'),
+    (None, 'markerHeight'),
+    (None, 'markerUnits'),
+    (None, 'markerWidth'),
+    (None, 'mathematical'),
+    (None, 'max'),
+    (None, 'min'),
+    (None, 'name'),
+    (None, 'offset'),
+    (None, 'opacity'),
+    (None, 'orient'),
+    (None, 'origin'),
+    (None, 'overline-position'),
+    (None, 'overline-thickness'),
+    (None, 'panose-1'),
+    (None, 'path'),
+    (None, 'pathLength'),
+    (None, 'points'),
+    (None, 'preserveAspectRatio'),
+    (None, 'r'),
+    (None, 'refX'),
+    (None, 'refY'),
+    (None, 'repeatCount'),
+    (None, 'repeatDur'),
+    (None, 'requiredExtensions'),
+    (None, 'requiredFeatures'),
+    (None, 'restart'),
+    (None, 'rotate'),
+    (None, 'rx'),
+    (None, 'ry'),
+    (None, 'slope'),
+    (None, 'stemh'),
+    (None, 'stemv'),
+    (None, 'stop-color'),
+    (None, 'stop-opacity'),
+    (None, 'strikethrough-position'),
+    (None, 'strikethrough-thickness'),
+    (None, 'stroke'),
+    (None, 'stroke-dasharray'),
+    (None, 'stroke-dashoffset'),
+    (None, 'stroke-linecap'),
+    (None, 'stroke-linejoin'),
+    (None, 'stroke-miterlimit'),
+    (None, 'stroke-opacity'),
+    (None, 'stroke-width'),
+    (None, 'systemLanguage'),
+    (None, 'target'),
+    (None, 'text-anchor'),
+    (None, 'to'),
+    (None, 'transform'),
+    (None, 'type'),
+    (None, 'u1'),
+    (None, 'u2'),
+    (None, 'underline-position'),
+    (None, 'underline-thickness'),
+    (None, 'unicode'),
+    (None, 'unicode-range'),
+    (None, 'units-per-em'),
+    (None, 'values'),
+    (None, 'version'),
+    (None, 'viewBox'),
+    (None, 'visibility'),
+    (None, 'width'),
+    (None, 'widths'),
+    (None, 'x'),
+    (None, 'x-height'),
+    (None, 'x1'),
+    (None, 'x2'),
+    (namespaces['xlink'], 'actuate'),
+    (namespaces['xlink'], 'arcrole'),
+    (namespaces['xlink'], 'href'),
+    (namespaces['xlink'], 'role'),
+    (namespaces['xlink'], 'show'),
+    (namespaces['xlink'], 'title'),
+    (namespaces['xlink'], 'type'),
+    (namespaces['xml'], 'base'),
+    (namespaces['xml'], 'lang'),
+    (namespaces['xml'], 'space'),
+    (None, 'y'),
+    (None, 'y1'),
+    (None, 'y2'),
+    (None, 'zoomAndPan'),
+))
+
+attr_val_is_uri = frozenset((
+    (None, 'href'),
+    (None, 'src'),
+    (None, 'cite'),
+    (None, 'action'),
+    (None, 'longdesc'),
+    (None, 'poster'),
+    (None, 'background'),
+    (None, 'datasrc'),
+    (None, 'dynsrc'),
+    (None, 'lowsrc'),
+    (None, 'ping'),
+    (namespaces['xlink'], 'href'),
+    (namespaces['xml'], 'base'),
+))
+
+svg_attr_val_allows_ref = frozenset((
+    (None, 'clip-path'),
+    (None, 'color-profile'),
+    (None, 'cursor'),
+    (None, 'fill'),
+    (None, 'filter'),
+    (None, 'marker'),
+    (None, 'marker-start'),
+    (None, 'marker-mid'),
+    (None, 'marker-end'),
+    (None, 'mask'),
+    (None, 'stroke'),
+))
+
+svg_allow_local_href = frozenset((
+    (None, 'altGlyph'),
+    (None, 'animate'),
+    (None, 'animateColor'),
+    (None, 'animateMotion'),
+    (None, 'animateTransform'),
+    (None, 'cursor'),
+    (None, 'feImage'),
+    (None, 'filter'),
+    (None, 'linearGradient'),
+    (None, 'pattern'),
+    (None, 'radialGradient'),
+    (None, 'textpath'),
+    (None, 'tref'),
+    (None, 'set'),
+    (None, 'use')
+))
+
+allowed_css_properties = frozenset((
+    'azimuth',
+    'background-color',
+    'border-bottom-color',
+    'border-collapse',
+    'border-color',
+    'border-left-color',
+    'border-right-color',
+    'border-top-color',
+    'clear',
+    'color',
+    'cursor',
+    'direction',
+    'display',
+    'elevation',
+    'float',
+    'font',
+    'font-family',
+    'font-size',
+    'font-style',
+    'font-variant',
+    'font-weight',
+    'height',
+    'letter-spacing',
+    'line-height',
+    'overflow',
+    'pause',
+    'pause-after',
+    'pause-before',
+    'pitch',
+    'pitch-range',
+    'richness',
+    'speak',
+    'speak-header',
+    'speak-numeral',
+    'speak-punctuation',
+    'speech-rate',
+    'stress',
+    'text-align',
+    'text-decoration',
+    'text-indent',
+    'unicode-bidi',
+    'vertical-align',
+    'voice-family',
+    'volume',
+    'white-space',
+    'width',
+))
+
+allowed_css_keywords = frozenset((
+    'auto',
+    'aqua',
+    'black',
+    'block',
+    'blue',
+    'bold',
+    'both',
+    'bottom',
+    'brown',
+    'center',
+    'collapse',
+    'dashed',
+    'dotted',
+    'fuchsia',
+    'gray',
+    'green',
+    '!important',
+    'italic',
+    'left',
+    'lime',
+    'maroon',
+    'medium',
+    'none',
+    'navy',
+    'normal',
+    'nowrap',
+    'olive',
+    'pointer',
+    'purple',
+    'red',
+    'right',
+    'solid',
+    'silver',
+    'teal',
+    'top',
+    'transparent',
+    'underline',
+    'white',
+    'yellow',
+))
+
+allowed_svg_properties = frozenset((
+    'fill',
+    'fill-opacity',
+    'fill-rule',
+    'stroke',
+    'stroke-width',
+    'stroke-linecap',
+    'stroke-linejoin',
+    'stroke-opacity',
+))
+
+allowed_protocols = frozenset((
+    'ed2k',
+    'ftp',
+    'http',
+    'https',
+    'irc',
+    'mailto',
+    'news',
+    'gopher',
+    'nntp',
+    'telnet',
+    'webcal',
+    'xmpp',
+    'callto',
+    'feed',
+    'urn',
+    'aim',
+    'rsync',
+    'tag',
+    'ssh',
+    'sftp',
+    'rtsp',
+    'afs',
+    'data',
+))
+
+allowed_content_types = frozenset((
+    'image/png',
+    'image/jpeg',
+    'image/gif',
+    'image/webp',
+    'image/bmp',
+    'text/plain',
+))
+
+
+data_content_type = re.compile(r'''
+                                ^
+                                # Match a content type <application>/<type>
+                                (?P<content_type>[-a-zA-Z0-9.]+/[-a-zA-Z0-9.]+)
+                                # Match any character set and encoding
+                                (?:(?:;charset=(?:[-a-zA-Z0-9]+)(?:;(?:base64))?)
+                                  |(?:;(?:base64))?(?:;charset=(?:[-a-zA-Z0-9]+))?)
+                                # Assume the rest is data
+                                ,.*
+                                $
+                                ''',
+                               re.VERBOSE)
+
+
+class Filter(base.Filter):
+    """Sanitizes token stream of XHTML+MathML+SVG and of inline style attributes"""
+    def __init__(self,
+                 source,
+                 allowed_elements=allowed_elements,
+                 allowed_attributes=allowed_attributes,
+                 allowed_css_properties=allowed_css_properties,
+                 allowed_css_keywords=allowed_css_keywords,
+                 allowed_svg_properties=allowed_svg_properties,
+                 allowed_protocols=allowed_protocols,
+                 allowed_content_types=allowed_content_types,
+                 attr_val_is_uri=attr_val_is_uri,
+                 svg_attr_val_allows_ref=svg_attr_val_allows_ref,
+                 svg_allow_local_href=svg_allow_local_href):
+        """Creates a Filter
+
+        :arg allowed_elements: set of elements to allow--everything else will
+            be escaped
+
+        :arg allowed_attributes: set of attributes to allow in
+            elements--everything else will be stripped
+
+        :arg allowed_css_properties: set of CSS properties to allow--everything
+            else will be stripped
+
+        :arg allowed_css_keywords: set of CSS keywords to allow--everything
+            else will be stripped
+
+        :arg allowed_svg_properties: set of SVG properties to allow--everything
+            else will be removed
+
+        :arg allowed_protocols: set of allowed protocols for URIs
+
+        :arg allowed_content_types: set of allowed content types for ``data`` URIs.
+
+        :arg attr_val_is_uri: set of attributes that have URI values--values
+            that have a scheme not listed in ``allowed_protocols`` are removed
+
+        :arg svg_attr_val_allows_ref: set of SVG attributes that can have
+            references
+
+        :arg svg_allow_local_href: set of SVG elements that can have local
+            hrefs--these are removed
+
+        """
+        super(Filter, self).__init__(source)
+
+        warnings.warn(_deprecation_msg, DeprecationWarning)
+
+        self.allowed_elements = allowed_elements
+        self.allowed_attributes = allowed_attributes
+        self.allowed_css_properties = allowed_css_properties
+        self.allowed_css_keywords = allowed_css_keywords
+        self.allowed_svg_properties = allowed_svg_properties
+        self.allowed_protocols = allowed_protocols
+        self.allowed_content_types = allowed_content_types
+        self.attr_val_is_uri = attr_val_is_uri
+        self.svg_attr_val_allows_ref = svg_attr_val_allows_ref
+        self.svg_allow_local_href = svg_allow_local_href
+
     def __iter__(self):
-        for token in _base.Filter.__iter__(self):
+        for token in base.Filter.__iter__(self):
             token = self.sanitize_token(token)
             if token:
                 yield token
+
+    # Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and
+    # stripping out all attributes not in ALLOWED_ATTRIBUTES. Style attributes
+    # are parsed, and a restricted set, specified by ALLOWED_CSS_PROPERTIES and
+    # ALLOWED_CSS_KEYWORDS, are allowed through. attributes in ATTR_VAL_IS_URI
+    # are scanned, and only URI schemes specified in ALLOWED_PROTOCOLS are
+    # allowed.
+    #
+    #   sanitize_html('<script> do_nasty_stuff() </script>')
+    #    => &lt;script> do_nasty_stuff() &lt;/script>
+    #   sanitize_html('<a href="javascript: sucker();">Click here for $100</a>')
+    #    => <a>Click here for $100</a>
+    def sanitize_token(self, token):
+
+        # accommodate filters which use token_type differently
+        token_type = token["type"]
+        if token_type in ("StartTag", "EndTag", "EmptyTag"):
+            name = token["name"]
+            namespace = token["namespace"]
+            if ((namespace, name) in self.allowed_elements or
+                (namespace is None and
+                 (namespaces["html"], name) in self.allowed_elements)):
+                return self.allowed_token(token)
+            else:
+                return self.disallowed_token(token)
+        elif token_type == "Comment":
+            pass
+        else:
+            return token
+
+    def allowed_token(self, token):
+        if "data" in token:
+            attrs = token["data"]
+            attr_names = set(attrs.keys())
+
+            # Remove forbidden attributes
+            for to_remove in (attr_names - self.allowed_attributes):
+                del token["data"][to_remove]
+                attr_names.remove(to_remove)
+
+            # Remove attributes with disallowed URL values
+            for attr in (attr_names & self.attr_val_is_uri):
+                assert attr in attrs
+                # I don't have a clue where this regexp comes from or why it matches those
+                # characters, nor why we call unescape. I just know it's always been here.
+                # Should you be worried by this comment in a sanitizer? Yes. On the other hand, all
+                # this will do is remove *more* than it otherwise would.
+                val_unescaped = re.sub("[`\x00-\x20\x7f-\xa0\\s]+", '',
+                                       unescape(attrs[attr])).lower()
+                # remove replacement characters from unescaped characters
+                val_unescaped = val_unescaped.replace("\ufffd", "")
+                try:
+                    uri = urlparse.urlparse(val_unescaped)
+                except ValueError:
+                    uri = None
+                    del attrs[attr]
+                if uri and uri.scheme:
+                    if uri.scheme not in self.allowed_protocols:
+                        del attrs[attr]
+                    if uri.scheme == 'data':
+                        m = data_content_type.match(uri.path)
+                        if not m:
+                            del attrs[attr]
+                        elif m.group('content_type') not in self.allowed_content_types:
+                            del attrs[attr]
+
+            for attr in self.svg_attr_val_allows_ref:
+                if attr in attrs:
+                    attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
+                                         ' ',
+                                         unescape(attrs[attr]))
+            if (token["name"] in self.svg_allow_local_href and
+                (namespaces['xlink'], 'href') in attrs and re.search(r'^\s*[^#\s].*',
+                                                                     attrs[(namespaces['xlink'], 'href')])):
+                del attrs[(namespaces['xlink'], 'href')]
+            if (None, 'style') in attrs:
+                attrs[(None, 'style')] = self.sanitize_css(attrs[(None, 'style')])
+            token["data"] = attrs
+        return token
+
+    def disallowed_token(self, token):
+        token_type = token["type"]
+        if token_type == "EndTag":
+            token["data"] = "</%s>" % token["name"]
+        elif token["data"]:
+            assert token_type in ("StartTag", "EmptyTag")
+            attrs = []
+            for (ns, name), v in token["data"].items():
+                attrs.append(' %s="%s"' % (name if ns is None else "%s:%s" % (prefixes[ns], name), escape(v)))
+            token["data"] = "<%s%s>" % (token["name"], ''.join(attrs))
+        else:
+            token["data"] = "<%s>" % token["name"]
+        if token.get("selfClosing"):
+            token["data"] = token["data"][:-1] + "/>"
+
+        token["type"] = "Characters"
+
+        del token["name"]
+        return token
+
+    def sanitize_css(self, style):
+        # disallow urls
+        style = re.compile(r'url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style)
+
+        # gauntlet
+        if not re.match(r"""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
+            return ''
+        if not re.match(r"^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
+            return ''
+
+        clean = []
+        for prop, value in re.findall(r"([-\w]+)\s*:\s*([^:;]*)", style):
+            if not value:
+                continue
+            if prop.lower() in self.allowed_css_properties:
+                clean.append(prop + ': ' + value + ';')
+            elif prop.split('-')[0].lower() in ['background', 'border', 'margin',
+                                                'padding']:
+                for keyword in value.split():
+                    if keyword not in self.allowed_css_keywords and \
+                            not re.match(r"^(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword):  # noqa
+                        break
+                else:
+                    clean.append(prop + ': ' + value + ';')
+            elif prop.lower() in self.allowed_svg_properties:
+                clean.append(prop + ': ' + value + ';')
+
+        return ' '.join(clean)
diff --git a/lib/html5lib/filters/whitespace.py b/lib/html5lib/filters/whitespace.py
index dfc60eeb..0d12584b 100644
--- a/lib/html5lib/filters/whitespace.py
+++ b/lib/html5lib/filters/whitespace.py
@@ -2,20 +2,20 @@ from __future__ import absolute_import, division, unicode_literals
 
 import re
 
-from . import _base
+from . import base
 from ..constants import rcdataElements, spaceCharacters
 spaceCharacters = "".join(spaceCharacters)
 
 SPACES_REGEX = re.compile("[%s]+" % spaceCharacters)
 
 
-class Filter(_base.Filter):
-
+class Filter(base.Filter):
+    """Collapses whitespace except in pre, textarea, and script elements"""
     spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements))
 
     def __iter__(self):
         preserve = 0
-        for token in _base.Filter.__iter__(self):
+        for token in base.Filter.__iter__(self):
             type = token["type"]
             if type == "StartTag" \
                     and (preserve or token["name"] in self.spacePreserveElements):
diff --git a/lib/html5lib/html5parser.py b/lib/html5lib/html5parser.py
index b0f14f39..74d829d9 100644
--- a/lib/html5lib/html5parser.py
+++ b/lib/html5lib/html5parser.py
@@ -1,38 +1,74 @@
 from __future__ import absolute_import, division, unicode_literals
-from six import with_metaclass
+from six import with_metaclass, viewkeys
 
 import types
 
-from . import inputstream
-from . import tokenizer
+from . import _inputstream
+from . import _tokenizer
 
 from . import treebuilders
-from .treebuilders._base import Marker
+from .treebuilders.base import Marker
 
-from . import utils
-from . import constants
-from .constants import spaceCharacters, asciiUpper2Lower
-from .constants import specialElements
-from .constants import headingElements
-from .constants import cdataElements, rcdataElements
-from .constants import tokenTypes, ReparseException, namespaces
-from .constants import htmlIntegrationPointElements, mathmlTextIntegrationPointElements
-from .constants import adjustForeignAttributes as adjustForeignAttributesMap
+from . import _utils
+from .constants import (
+    spaceCharacters, asciiUpper2Lower,
+    specialElements, headingElements, cdataElements, rcdataElements,
+    tokenTypes, tagTokenTypes,
+    namespaces,
+    htmlIntegrationPointElements, mathmlTextIntegrationPointElements,
+    adjustForeignAttributes as adjustForeignAttributesMap,
+    adjustMathMLAttributes, adjustSVGAttributes,
+    E,
+    _ReparseException
+)
 
 
-def parse(doc, treebuilder="etree", encoding=None,
-          namespaceHTMLElements=True):
-    """Parse a string or file-like object into a tree"""
+def parse(doc, treebuilder="etree", namespaceHTMLElements=True, **kwargs):
+    """Parse an HTML document as a string or file-like object into a tree
+
+    :arg doc: the document to parse as a string or file-like object
+
+    :arg treebuilder: the treebuilder to use when parsing
+
+    :arg namespaceHTMLElements: whether or not to namespace HTML elements
+
+    :returns: parsed tree
+
+    Example:
+
+    >>> from html5lib.html5parser import parse
+    >>> parse('<html><body><p>This is a doc</p></body></html>')
+    <Element u'{http://www.w3.org/1999/xhtml}html' at 0x7feac4909db0>
+
+    """
     tb = treebuilders.getTreeBuilder(treebuilder)
     p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements)
-    return p.parse(doc, encoding=encoding)
+    return p.parse(doc, **kwargs)
 
 
-def parseFragment(doc, container="div", treebuilder="etree", encoding=None,
-                  namespaceHTMLElements=True):
+def parseFragment(doc, container="div", treebuilder="etree", namespaceHTMLElements=True, **kwargs):
+    """Parse an HTML fragment as a string or file-like object into a tree
+
+    :arg doc: the fragment to parse as a string or file-like object
+
+    :arg container: the container context to parse the fragment in
+
+    :arg treebuilder: the treebuilder to use when parsing
+
+    :arg namespaceHTMLElements: whether or not to namespace HTML elements
+
+    :returns: parsed tree
+
+    Example:
+
+    >>> from html5lib.html5libparser import parseFragment
+    >>> parseFragment('<b>this is a fragment</b>')
+    <Element u'DOCUMENT_FRAGMENT' at 0x7feac484b090>
+
+    """
     tb = treebuilders.getTreeBuilder(treebuilder)
     p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements)
-    return p.parseFragment(doc, container=container, encoding=encoding)
+    return p.parseFragment(doc, container=container, **kwargs)
 
 
 def method_decorator_metaclass(function):
@@ -48,21 +84,30 @@ def method_decorator_metaclass(function):
 
 
 class HTMLParser(object):
-    """HTML parser. Generates a tree structure from a stream of (possibly
-        malformed) HTML"""
+    """HTML parser
 
-    def __init__(self, tree=None, tokenizer=tokenizer.HTMLTokenizer,
-                 strict=False, namespaceHTMLElements=True, debug=False):
+    Generates a tree structure from a stream of (possibly malformed) HTML.
+
+    """
+
+    def __init__(self, tree=None, strict=False, namespaceHTMLElements=True, debug=False):
         """
-        strict - raise an exception when a parse error is encountered
+        :arg tree: a treebuilder class controlling the type of tree that will be
+            returned. Built in treebuilders can be accessed through
+            html5lib.treebuilders.getTreeBuilder(treeType)
 
-        tree - a treebuilder class controlling the type of tree that will be
-        returned. Built in treebuilders can be accessed through
-        html5lib.treebuilders.getTreeBuilder(treeType)
+        :arg strict: raise an exception when a parse error is encountered
+
+        :arg namespaceHTMLElements: whether or not to namespace HTML elements
+
+        :arg debug: whether or not to enable debug mode which logs things
+
+        Example:
+
+        >>> from html5lib.html5parser import HTMLParser
+        >>> parser = HTMLParser()                     # generates parser with etree builder
+        >>> parser = HTMLParser('lxml', strict=True)  # generates parser with lxml builder which is strict
 
-        tokenizer - a class that provides a stream of tokens to the treebuilder.
-        This may be replaced for e.g. a sanitizer which converts some tags to
-        text
         """
 
         # Raise an exception on the first error encountered
@@ -71,29 +116,24 @@ class HTMLParser(object):
         if tree is None:
             tree = treebuilders.getTreeBuilder("etree")
         self.tree = tree(namespaceHTMLElements)
-        self.tokenizer_class = tokenizer
         self.errors = []
 
-        self.phases = dict([(name, cls(self, self.tree)) for name, cls in
-                            getPhases(debug).items()])
+        self.phases = {name: cls(self, self.tree) for name, cls in
+                       getPhases(debug).items()}
 
-    def _parse(self, stream, innerHTML=False, container="div",
-               encoding=None, parseMeta=True, useChardet=True, **kwargs):
+    def _parse(self, stream, innerHTML=False, container="div", scripting=False, **kwargs):
 
         self.innerHTMLMode = innerHTML
         self.container = container
-        self.tokenizer = self.tokenizer_class(stream, encoding=encoding,
-                                              parseMeta=parseMeta,
-                                              useChardet=useChardet,
-                                              parser=self, **kwargs)
+        self.scripting = scripting
+        self.tokenizer = _tokenizer.HTMLTokenizer(stream, parser=self, **kwargs)
         self.reset()
 
-        while True:
-            try:
-                self.mainLoop()
-                break
-            except ReparseException:
-                self.reset()
+        try:
+            self.mainLoop()
+        except _ReparseException:
+            self.reset()
+            self.mainLoop()
 
     def reset(self):
         self.tree.reset()
@@ -120,7 +160,7 @@ class HTMLParser(object):
             self.phase.insertHtmlElement()
             self.resetInsertionMode()
         else:
-            self.innerHTML = False
+            self.innerHTML = False  # pylint:disable=redefined-variable-type
             self.phase = self.phases["initial"]
 
         self.lastPhase = None
@@ -129,6 +169,16 @@ class HTMLParser(object):
 
         self.framesetOK = True
 
+    @property
+    def documentEncoding(self):
+        """Name of the character encoding that was used to decode the input stream, or
+        :obj:`None` if that is not determined yet
+
+        """
+        if not hasattr(self, 'tokenizer'):
+            return None
+        return self.tokenizer.stream.charEncoding[0].name
+
     def isHTMLIntegrationPoint(self, element):
         if (element.name == "annotation-xml" and
                 element.namespace == namespaces["mathml"]):
@@ -151,9 +201,11 @@ class HTMLParser(object):
         DoctypeToken = tokenTypes["Doctype"]
         ParseErrorToken = tokenTypes["ParseError"]
 
-        for token in self.normalizedTokens():
+        for token in self.tokenizer:
+            prev_token = None
             new_token = token
             while new_token is not None:
+                prev_token = new_token
                 currentNode = self.tree.openElements[-1] if self.tree.openElements else None
                 currentNodeNamespace = currentNode.namespace if currentNode else None
                 currentNodeName = currentNode.name if currentNode else None
@@ -172,6 +224,7 @@ class HTMLParser(object):
                           type in (CharactersToken, SpaceCharactersToken))) or
                         (currentNodeNamespace == namespaces["mathml"] and
                          currentNodeName == "annotation-xml" and
+                         type == StartTagToken and
                          token["name"] == "svg") or
                         (self.isHTMLIntegrationPoint(currentNode) and
                          type in (StartTagToken, CharactersToken, SpaceCharactersToken))):
@@ -192,10 +245,10 @@ class HTMLParser(object):
                     elif type == DoctypeToken:
                         new_token = phase.processDoctype(new_token)
 
-            if (type == StartTagToken and token["selfClosing"]
-                    and not token["selfClosingAcknowledged"]):
+            if (type == StartTagToken and prev_token["selfClosing"] and
+                    not prev_token["selfClosingAcknowledged"]):
                 self.parseError("non-void-element-with-trailing-solidus",
-                                {"name": token["name"]})
+                                {"name": prev_token["name"]})
 
         # When the loop finishes it's EOF
         reprocess = True
@@ -206,143 +259,78 @@ class HTMLParser(object):
             if reprocess:
                 assert self.phase not in phases
 
-    def normalizedTokens(self):
-        for token in self.tokenizer:
-            yield self.normalizeToken(token)
-
-    def parse(self, stream, encoding=None, parseMeta=True, useChardet=True):
+    def parse(self, stream, *args, **kwargs):
         """Parse a HTML document into a well-formed tree
 
-        stream - a filelike object or string containing the HTML to be parsed
+        :arg stream: a file-like object or string containing the HTML to be parsed
+
+            The optional encoding parameter must be a string that indicates
+            the encoding.  If specified, that encoding will be used,
+            regardless of any BOM or later declaration (such as in a meta
+            element).
+
+        :arg scripting: treat noscript elements as if JavaScript was turned on
+
+        :returns: parsed tree
+
+        Example:
+
+        >>> from html5lib.html5parser import HTMLParser
+        >>> parser = HTMLParser()
+        >>> parser.parse('<html><body><p>This is a doc</p></body></html>')
+        <Element u'{http://www.w3.org/1999/xhtml}html' at 0x7feac4909db0>
 
-        The optional encoding parameter must be a string that indicates
-        the encoding.  If specified, that encoding will be used,
-        regardless of any BOM or later declaration (such as in a meta
-        element)
         """
-        self._parse(stream, innerHTML=False, encoding=encoding,
-                    parseMeta=parseMeta, useChardet=useChardet)
+        self._parse(stream, False, None, *args, **kwargs)
         return self.tree.getDocument()
 
-    def parseFragment(self, stream, container="div", encoding=None,
-                      parseMeta=False, useChardet=True):
+    def parseFragment(self, stream, *args, **kwargs):
         """Parse a HTML fragment into a well-formed tree fragment
 
-        container - name of the element we're setting the innerHTML property
-        if set to None, default to 'div'
+        :arg container: name of the element we're setting the innerHTML
+            property if set to None, default to 'div'
 
-        stream - a filelike object or string containing the HTML to be parsed
+        :arg stream: a file-like object or string containing the HTML to be parsed
+
+            The optional encoding parameter must be a string that indicates
+            the encoding.  If specified, that encoding will be used,
+            regardless of any BOM or later declaration (such as in a meta
+            element)
+
+        :arg scripting: treat noscript elements as if JavaScript was turned on
+
+        :returns: parsed tree
+
+        Example:
+
+        >>> from html5lib.html5libparser import HTMLParser
+        >>> parser = HTMLParser()
+        >>> parser.parseFragment('<b>this is a fragment</b>')
+        <Element u'DOCUMENT_FRAGMENT' at 0x7feac484b090>
 
-        The optional encoding parameter must be a string that indicates
-        the encoding.  If specified, that encoding will be used,
-        regardless of any BOM or later declaration (such as in a meta
-        element)
         """
-        self._parse(stream, True, container=container, encoding=encoding)
+        self._parse(stream, True, *args, **kwargs)
         return self.tree.getFragment()
 
-    def parseError(self, errorcode="XXX-undefined-error", datavars={}):
+    def parseError(self, errorcode="XXX-undefined-error", datavars=None):
         # XXX The idea is to make errorcode mandatory.
+        if datavars is None:
+            datavars = {}
         self.errors.append((self.tokenizer.stream.position(), errorcode, datavars))
         if self.strict:
-            raise ParseError
-
-    def normalizeToken(self, token):
-        """ HTML5 specific normalizations to the token stream """
-
-        if token["type"] == tokenTypes["StartTag"]:
-            token["data"] = dict(token["data"][::-1])
-
-        return token
+            raise ParseError(E[errorcode] % datavars)
 
     def adjustMathMLAttributes(self, token):
-        replacements = {"definitionurl": "definitionURL"}
-        for k, v in replacements.items():
-            if k in token["data"]:
-                token["data"][v] = token["data"][k]
-                del token["data"][k]
+        adjust_attributes(token, adjustMathMLAttributes)
 
     def adjustSVGAttributes(self, token):
-        replacements = {
-            "attributename": "attributeName",
-            "attributetype": "attributeType",
-            "basefrequency": "baseFrequency",
-            "baseprofile": "baseProfile",
-            "calcmode": "calcMode",
-            "clippathunits": "clipPathUnits",
-            "contentscripttype": "contentScriptType",
-            "contentstyletype": "contentStyleType",
-            "diffuseconstant": "diffuseConstant",
-            "edgemode": "edgeMode",
-            "externalresourcesrequired": "externalResourcesRequired",
-            "filterres": "filterRes",
-            "filterunits": "filterUnits",
-            "glyphref": "glyphRef",
-            "gradienttransform": "gradientTransform",
-            "gradientunits": "gradientUnits",
-            "kernelmatrix": "kernelMatrix",
-            "kernelunitlength": "kernelUnitLength",
-            "keypoints": "keyPoints",
-            "keysplines": "keySplines",
-            "keytimes": "keyTimes",
-            "lengthadjust": "lengthAdjust",
-            "limitingconeangle": "limitingConeAngle",
-            "markerheight": "markerHeight",
-            "markerunits": "markerUnits",
-            "markerwidth": "markerWidth",
-            "maskcontentunits": "maskContentUnits",
-            "maskunits": "maskUnits",
-            "numoctaves": "numOctaves",
-            "pathlength": "pathLength",
-            "patterncontentunits": "patternContentUnits",
-            "patterntransform": "patternTransform",
-            "patternunits": "patternUnits",
-            "pointsatx": "pointsAtX",
-            "pointsaty": "pointsAtY",
-            "pointsatz": "pointsAtZ",
-            "preservealpha": "preserveAlpha",
-            "preserveaspectratio": "preserveAspectRatio",
-            "primitiveunits": "primitiveUnits",
-            "refx": "refX",
-            "refy": "refY",
-            "repeatcount": "repeatCount",
-            "repeatdur": "repeatDur",
-            "requiredextensions": "requiredExtensions",
-            "requiredfeatures": "requiredFeatures",
-            "specularconstant": "specularConstant",
-            "specularexponent": "specularExponent",
-            "spreadmethod": "spreadMethod",
-            "startoffset": "startOffset",
-            "stddeviation": "stdDeviation",
-            "stitchtiles": "stitchTiles",
-            "surfacescale": "surfaceScale",
-            "systemlanguage": "systemLanguage",
-            "tablevalues": "tableValues",
-            "targetx": "targetX",
-            "targety": "targetY",
-            "textlength": "textLength",
-            "viewbox": "viewBox",
-            "viewtarget": "viewTarget",
-            "xchannelselector": "xChannelSelector",
-            "ychannelselector": "yChannelSelector",
-            "zoomandpan": "zoomAndPan"
-        }
-        for originalName in list(token["data"].keys()):
-            if originalName in replacements:
-                svgName = replacements[originalName]
-                token["data"][svgName] = token["data"][originalName]
-                del token["data"][originalName]
+        adjust_attributes(token, adjustSVGAttributes)
 
     def adjustForeignAttributes(self, token):
-        replacements = adjustForeignAttributesMap
-
-        for originalName in token["data"].keys():
-            if originalName in replacements:
-                foreignName = replacements[originalName]
-                token["data"][foreignName] = token["data"][originalName]
-                del token["data"][originalName]
+        adjust_attributes(token, adjustForeignAttributesMap)
 
     def reparseTokenNormal(self, token):
+        # pylint:disable=unused-argument
         self.parser.phase()
 
     def resetInsertionMode(self):
@@ -390,9 +378,7 @@ class HTMLParser(object):
         self.phase = new_phase
 
     def parseRCDataRawtext(self, token, contentType):
-        """Generic RCDATA/RAWTEXT Parsing algorithm
-        contentType - RCDATA or RAWTEXT
-        """
+        # Generic RCDATA/RAWTEXT Parsing algorithm
         assert contentType in ("RAWTEXT", "RCDATA")
 
         self.tree.insertElement(token)
@@ -407,20 +393,17 @@ class HTMLParser(object):
         self.phase = self.phases["text"]
 
 
+@_utils.memoize
 def getPhases(debug):
     def log(function):
         """Logger that records which phase processes each token"""
-        type_names = dict((value, key) for key, value in
-                          constants.tokenTypes.items())
+        type_names = {value: key for key, value in tokenTypes.items()}
 
         def wrapped(self, *args, **kwargs):
             if function.__name__.startswith("process") and len(args) > 0:
                 token = args[0]
-                try:
-                    info = {"type": type_names[token['type']]}
-                except:
-                    raise
-                if token['type'] in constants.tagTokenTypes:
+                info = {"type": type_names[token['type']]}
+                if token['type'] in tagTokenTypes:
                     info["name"] = token['name']
 
                 self.parser.log.append((self.parser.tokenizer.state.__name__,
@@ -439,13 +422,17 @@ def getPhases(debug):
         else:
             return type
 
+    # pylint:disable=unused-argument
     class Phase(with_metaclass(getMetaclass(debug, log))):
         """Base class for helper object that implements each phase of processing
         """
+        __slots__ = ("parser", "tree", "__startTagCache", "__endTagCache")
 
         def __init__(self, parser, tree):
             self.parser = parser
             self.tree = tree
+            self.__startTagCache = {}
+            self.__endTagCache = {}
 
         def processEOF(self):
             raise NotImplementedError
@@ -465,7 +452,21 @@ def getPhases(debug):
             self.tree.insertText(token["data"])
 
         def processStartTag(self, token):
-            return self.startTagHandler[token["name"]](token)
+            # Note the caching is done here rather than BoundMethodDispatcher as doing it there
+            # requires a circular reference to the Phase, and this ends up with a significant
+            # (CPython 2.7, 3.8) GC cost when parsing many short inputs
+            name = token["name"]
+            # In Py2, using `in` is quicker in general than try/except KeyError
+            # In Py3, `in` is quicker when there are few cache hits (typically short inputs)
+            if name in self.__startTagCache:
+                func = self.__startTagCache[name]
+            else:
+                func = self.__startTagCache[name] = self.startTagHandler[name]
+                # bound the cache size in case we get loads of unknown tags
+                while len(self.__startTagCache) > len(self.startTagHandler) * 1.1:
+                    # this makes the eviction policy random on Py < 3.7 and FIFO >= 3.7
+                    self.__startTagCache.pop(next(iter(self.__startTagCache)))
+            return func(token)
 
         def startTagHtml(self, token):
             if not self.parser.firstStartTag and token["name"] == "html":
@@ -478,9 +479,25 @@ def getPhases(debug):
             self.parser.firstStartTag = False
 
         def processEndTag(self, token):
-            return self.endTagHandler[token["name"]](token)
+            # Note the caching is done here rather than BoundMethodDispatcher as doing it there
+            # requires a circular reference to the Phase, and this ends up with a significant
+            # (CPython 2.7, 3.8) GC cost when parsing many short inputs
+            name = token["name"]
+            # In Py2, using `in` is quicker in general than try/except KeyError
+            # In Py3, `in` is quicker when there are few cache hits (typically short inputs)
+            if name in self.__endTagCache:
+                func = self.__endTagCache[name]
+            else:
+                func = self.__endTagCache[name] = self.endTagHandler[name]
+                # bound the cache size in case we get loads of unknown tags
+                while len(self.__endTagCache) > len(self.endTagHandler) * 1.1:
+                    # this makes the eviction policy random on Py < 3.7 and FIFO >= 3.7
+                    self.__endTagCache.pop(next(iter(self.__endTagCache)))
+            return func(token)
 
     class InitialPhase(Phase):
+        __slots__ = tuple()
+
         def processSpaceCharacters(self, token):
             pass
 
@@ -505,77 +522,76 @@ def getPhases(debug):
             if publicId != "":
                 publicId = publicId.translate(asciiUpper2Lower)
 
-            if (not correct or token["name"] != "html"
-                or publicId.startswith(
-                    ("+//silmaril//dtd html pro v0r11 19970101//",
-                     "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
-                     "-//as//dtd html 3.0 aswedit + extensions//",
-                     "-//ietf//dtd html 2.0 level 1//",
-                     "-//ietf//dtd html 2.0 level 2//",
-                     "-//ietf//dtd html 2.0 strict level 1//",
-                     "-//ietf//dtd html 2.0 strict level 2//",
-                     "-//ietf//dtd html 2.0 strict//",
-                     "-//ietf//dtd html 2.0//",
-                     "-//ietf//dtd html 2.1e//",
-                     "-//ietf//dtd html 3.0//",
-                     "-//ietf//dtd html 3.2 final//",
-                     "-//ietf//dtd html 3.2//",
-                     "-//ietf//dtd html 3//",
-                     "-//ietf//dtd html level 0//",
-                     "-//ietf//dtd html level 1//",
-                     "-//ietf//dtd html level 2//",
-                     "-//ietf//dtd html level 3//",
-                     "-//ietf//dtd html strict level 0//",
-                     "-//ietf//dtd html strict level 1//",
-                     "-//ietf//dtd html strict level 2//",
-                     "-//ietf//dtd html strict level 3//",
-                     "-//ietf//dtd html strict//",
-                     "-//ietf//dtd html//",
-                     "-//metrius//dtd metrius presentational//",
-                     "-//microsoft//dtd internet explorer 2.0 html strict//",
-                     "-//microsoft//dtd internet explorer 2.0 html//",
-                     "-//microsoft//dtd internet explorer 2.0 tables//",
-                     "-//microsoft//dtd internet explorer 3.0 html strict//",
-                     "-//microsoft//dtd internet explorer 3.0 html//",
-                     "-//microsoft//dtd internet explorer 3.0 tables//",
-                     "-//netscape comm. corp.//dtd html//",
-                     "-//netscape comm. corp.//dtd strict html//",
-                     "-//o'reilly and associates//dtd html 2.0//",
-                     "-//o'reilly and associates//dtd html extended 1.0//",
-                     "-//o'reilly and associates//dtd html extended relaxed 1.0//",
-                     "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
-                     "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
-                     "-//spyglass//dtd html 2.0 extended//",
-                     "-//sq//dtd html 2.0 hotmetal + extensions//",
-                     "-//sun microsystems corp.//dtd hotjava html//",
-                     "-//sun microsystems corp.//dtd hotjava strict html//",
-                     "-//w3c//dtd html 3 1995-03-24//",
-                     "-//w3c//dtd html 3.2 draft//",
-                     "-//w3c//dtd html 3.2 final//",
-                     "-//w3c//dtd html 3.2//",
-                     "-//w3c//dtd html 3.2s draft//",
-                     "-//w3c//dtd html 4.0 frameset//",
-                     "-//w3c//dtd html 4.0 transitional//",
-                     "-//w3c//dtd html experimental 19960712//",
-                     "-//w3c//dtd html experimental 970421//",
-                     "-//w3c//dtd w3 html//",
-                     "-//w3o//dtd w3 html 3.0//",
-                     "-//webtechs//dtd mozilla html 2.0//",
-                     "-//webtechs//dtd mozilla html//"))
-                or publicId in
-                    ("-//w3o//dtd w3 html strict 3.0//en//",
-                     "-/w3c/dtd html 4.0 transitional/en",
-                     "html")
-                or publicId.startswith(
-                    ("-//w3c//dtd html 4.01 frameset//",
-                     "-//w3c//dtd html 4.01 transitional//")) and
-                    systemId is None
-                    or systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"):
+            if (not correct or token["name"] != "html" or
+                    publicId.startswith(
+                        ("+//silmaril//dtd html pro v0r11 19970101//",
+                         "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
+                         "-//as//dtd html 3.0 aswedit + extensions//",
+                         "-//ietf//dtd html 2.0 level 1//",
+                         "-//ietf//dtd html 2.0 level 2//",
+                         "-//ietf//dtd html 2.0 strict level 1//",
+                         "-//ietf//dtd html 2.0 strict level 2//",
+                         "-//ietf//dtd html 2.0 strict//",
+                         "-//ietf//dtd html 2.0//",
+                         "-//ietf//dtd html 2.1e//",
+                         "-//ietf//dtd html 3.0//",
+                         "-//ietf//dtd html 3.2 final//",
+                         "-//ietf//dtd html 3.2//",
+                         "-//ietf//dtd html 3//",
+                         "-//ietf//dtd html level 0//",
+                         "-//ietf//dtd html level 1//",
+                         "-//ietf//dtd html level 2//",
+                         "-//ietf//dtd html level 3//",
+                         "-//ietf//dtd html strict level 0//",
+                         "-//ietf//dtd html strict level 1//",
+                         "-//ietf//dtd html strict level 2//",
+                         "-//ietf//dtd html strict level 3//",
+                         "-//ietf//dtd html strict//",
+                         "-//ietf//dtd html//",
+                         "-//metrius//dtd metrius presentational//",
+                         "-//microsoft//dtd internet explorer 2.0 html strict//",
+                         "-//microsoft//dtd internet explorer 2.0 html//",
+                         "-//microsoft//dtd internet explorer 2.0 tables//",
+                         "-//microsoft//dtd internet explorer 3.0 html strict//",
+                         "-//microsoft//dtd internet explorer 3.0 html//",
+                         "-//microsoft//dtd internet explorer 3.0 tables//",
+                         "-//netscape comm. corp.//dtd html//",
+                         "-//netscape comm. corp.//dtd strict html//",
+                         "-//o'reilly and associates//dtd html 2.0//",
+                         "-//o'reilly and associates//dtd html extended 1.0//",
+                         "-//o'reilly and associates//dtd html extended relaxed 1.0//",
+                         "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
+                         "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
+                         "-//spyglass//dtd html 2.0 extended//",
+                         "-//sq//dtd html 2.0 hotmetal + extensions//",
+                         "-//sun microsystems corp.//dtd hotjava html//",
+                         "-//sun microsystems corp.//dtd hotjava strict html//",
+                         "-//w3c//dtd html 3 1995-03-24//",
+                         "-//w3c//dtd html 3.2 draft//",
+                         "-//w3c//dtd html 3.2 final//",
+                         "-//w3c//dtd html 3.2//",
+                         "-//w3c//dtd html 3.2s draft//",
+                         "-//w3c//dtd html 4.0 frameset//",
+                         "-//w3c//dtd html 4.0 transitional//",
+                         "-//w3c//dtd html experimental 19960712//",
+                         "-//w3c//dtd html experimental 970421//",
+                         "-//w3c//dtd w3 html//",
+                         "-//w3o//dtd w3 html 3.0//",
+                         "-//webtechs//dtd mozilla html 2.0//",
+                         "-//webtechs//dtd mozilla html//")) or
+                    publicId in ("-//w3o//dtd w3 html strict 3.0//en//",
+                                 "-/w3c/dtd html 4.0 transitional/en",
+                                 "html") or
+                    publicId.startswith(
+                        ("-//w3c//dtd html 4.01 frameset//",
+                         "-//w3c//dtd html 4.01 transitional//")) and
+                    systemId is None or
+                    systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"):
                 self.parser.compatMode = "quirks"
             elif (publicId.startswith(
                     ("-//w3c//dtd xhtml 1.0 frameset//",
-                     "-//w3c//dtd xhtml 1.0 transitional//"))
-                  or publicId.startswith(
+                     "-//w3c//dtd xhtml 1.0 transitional//")) or
+                  publicId.startswith(
                       ("-//w3c//dtd html 4.01 frameset//",
                        "-//w3c//dtd html 4.01 transitional//")) and
                   systemId is not None):
@@ -610,6 +626,8 @@ def getPhases(debug):
             return True
 
     class BeforeHtmlPhase(Phase):
+        __slots__ = tuple()
+
         # helper methods
         def insertHtmlElement(self):
             self.tree.insertRoot(impliedTagToken("html", "StartTag"))
@@ -645,19 +663,7 @@ def getPhases(debug):
                 return token
 
     class BeforeHeadPhase(Phase):
-        def __init__(self, parser, tree):
-            Phase.__init__(self, parser, tree)
-
-            self.startTagHandler = utils.MethodDispatcher([
-                ("html", self.startTagHtml),
-                ("head", self.startTagHead)
-            ])
-            self.startTagHandler.default = self.startTagOther
-
-            self.endTagHandler = utils.MethodDispatcher([
-                (("head", "body", "html", "br"), self.endTagImplyHead)
-            ])
-            self.endTagHandler.default = self.endTagOther
+        __slots__ = tuple()
 
         def processEOF(self):
             self.startTagHead(impliedTagToken("head", "StartTag"))
@@ -690,27 +696,19 @@ def getPhases(debug):
             self.parser.parseError("end-tag-after-implied-root",
                                    {"name": token["name"]})
 
+        startTagHandler = _utils.MethodDispatcher([
+            ("html", startTagHtml),
+            ("head", startTagHead)
+        ])
+        startTagHandler.default = startTagOther
+
+        endTagHandler = _utils.MethodDispatcher([
+            (("head", "body", "html", "br"), endTagImplyHead)
+        ])
+        endTagHandler.default = endTagOther
+
     class InHeadPhase(Phase):
-        def __init__(self, parser, tree):
-            Phase.__init__(self, parser, tree)
-
-            self.startTagHandler = utils.MethodDispatcher([
-                ("html", self.startTagHtml),
-                ("title", self.startTagTitle),
-                (("noscript", "noframes", "style"), self.startTagNoScriptNoFramesStyle),
-                ("script", self.startTagScript),
-                (("base", "basefont", "bgsound", "command", "link"),
-                 self.startTagBaseLinkCommand),
-                ("meta", self.startTagMeta),
-                ("head", self.startTagHead)
-            ])
-            self.startTagHandler.default = self.startTagOther
-
-            self. endTagHandler = utils.MethodDispatcher([
-                ("head", self.endTagHead),
-                (("br", "html", "body"), self.endTagHtmlBodyBr)
-            ])
-            self.endTagHandler.default = self.endTagOther
+        __slots__ = tuple()
 
         # the real thing
         def processEOF(self):
@@ -748,18 +746,25 @@ def getPhases(debug):
                     # the abstract Unicode string, and just use the
                     # ContentAttrParser on that, but using UTF-8 allows all chars
                     # to be encoded and as a ASCII-superset works.
-                    data = inputstream.EncodingBytes(attributes["content"].encode("utf-8"))
-                    parser = inputstream.ContentAttrParser(data)
+                    data = _inputstream.EncodingBytes(attributes["content"].encode("utf-8"))
+                    parser = _inputstream.ContentAttrParser(data)
                     codec = parser.parse()
                     self.parser.tokenizer.stream.changeEncoding(codec)
 
         def startTagTitle(self, token):
             self.parser.parseRCDataRawtext(token, "RCDATA")
 
-        def startTagNoScriptNoFramesStyle(self, token):
+        def startTagNoFramesStyle(self, token):
             # Need to decide whether to implement the scripting-disabled case
             self.parser.parseRCDataRawtext(token, "RAWTEXT")
 
+        def startTagNoscript(self, token):
+            if self.parser.scripting:
+                self.parser.parseRCDataRawtext(token, "RAWTEXT")
+            else:
+                self.tree.insertElement(token)
+                self.parser.phase = self.parser.phases["inHeadNoscript"]
+
         def startTagScript(self, token):
             self.tree.insertElement(token)
             self.parser.tokenizer.state = self.parser.tokenizer.scriptDataState
@@ -785,27 +790,90 @@ def getPhases(debug):
         def anythingElse(self):
             self.endTagHead(impliedTagToken("head"))
 
-    # XXX If we implement a parser for which scripting is disabled we need to
-    # implement this phase.
-    #
-    # class InHeadNoScriptPhase(Phase):
-    class AfterHeadPhase(Phase):
-        def __init__(self, parser, tree):
-            Phase.__init__(self, parser, tree)
+        startTagHandler = _utils.MethodDispatcher([
+            ("html", startTagHtml),
+            ("title", startTagTitle),
+            (("noframes", "style"), startTagNoFramesStyle),
+            ("noscript", startTagNoscript),
+            ("script", startTagScript),
+            (("base", "basefont", "bgsound", "command", "link"),
+             startTagBaseLinkCommand),
+            ("meta", startTagMeta),
+            ("head", startTagHead)
+        ])
+        startTagHandler.default = startTagOther
 
-            self.startTagHandler = utils.MethodDispatcher([
-                ("html", self.startTagHtml),
-                ("body", self.startTagBody),
-                ("frameset", self.startTagFrameset),
-                (("base", "basefont", "bgsound", "link", "meta", "noframes", "script",
-                  "style", "title"),
-                 self.startTagFromHead),
-                ("head", self.startTagHead)
-            ])
-            self.startTagHandler.default = self.startTagOther
-            self.endTagHandler = utils.MethodDispatcher([(("body", "html", "br"),
-                                                          self.endTagHtmlBodyBr)])
-            self.endTagHandler.default = self.endTagOther
+        endTagHandler = _utils.MethodDispatcher([
+            ("head", endTagHead),
+            (("br", "html", "body"), endTagHtmlBodyBr)
+        ])
+        endTagHandler.default = endTagOther
+
+    class InHeadNoscriptPhase(Phase):
+        __slots__ = tuple()
+
+        def processEOF(self):
+            self.parser.parseError("eof-in-head-noscript")
+            self.anythingElse()
+            return True
+
+        def processComment(self, token):
+            return self.parser.phases["inHead"].processComment(token)
+
+        def processCharacters(self, token):
+            self.parser.parseError("char-in-head-noscript")
+            self.anythingElse()
+            return token
+
+        def processSpaceCharacters(self, token):
+            return self.parser.phases["inHead"].processSpaceCharacters(token)
+
+        def startTagHtml(self, token):
+            return self.parser.phases["inBody"].processStartTag(token)
+
+        def startTagBaseLinkCommand(self, token):
+            return self.parser.phases["inHead"].processStartTag(token)
+
+        def startTagHeadNoscript(self, token):
+            self.parser.parseError("unexpected-start-tag", {"name": token["name"]})
+
+        def startTagOther(self, token):
+            self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]})
+            self.anythingElse()
+            return token
+
+        def endTagNoscript(self, token):
+            node = self.parser.tree.openElements.pop()
+            assert node.name == "noscript", "Expected noscript got %s" % node.name
+            self.parser.phase = self.parser.phases["inHead"]
+
+        def endTagBr(self, token):
+            self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]})
+            self.anythingElse()
+            return token
+
+        def endTagOther(self, token):
+            self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
+
+        def anythingElse(self):
+            # Caller must raise parse error first!
+            self.endTagNoscript(impliedTagToken("noscript"))
+
+        startTagHandler = _utils.MethodDispatcher([
+            ("html", startTagHtml),
+            (("basefont", "bgsound", "link", "meta", "noframes", "style"), startTagBaseLinkCommand),
+            (("head", "noscript"), startTagHeadNoscript),
+        ])
+        startTagHandler.default = startTagOther
+
+        endTagHandler = _utils.MethodDispatcher([
+            ("noscript", endTagNoscript),
+            ("br", endTagBr),
+        ])
+        endTagHandler.default = endTagOther
+
+    class AfterHeadPhase(Phase):
+        __slots__ = tuple()
 
         def processEOF(self):
             self.anythingElse()
@@ -856,91 +924,34 @@ def getPhases(debug):
             self.parser.phase = self.parser.phases["inBody"]
             self.parser.framesetOK = True
 
+        startTagHandler = _utils.MethodDispatcher([
+            ("html", startTagHtml),
+            ("body", startTagBody),
+            ("frameset", startTagFrameset),
+            (("base", "basefont", "bgsound", "link", "meta", "noframes", "script",
+              "style", "title"),
+             startTagFromHead),
+            ("head", startTagHead)
+        ])
+        startTagHandler.default = startTagOther
+        endTagHandler = _utils.MethodDispatcher([(("body", "html", "br"),
+                                                  endTagHtmlBodyBr)])
+        endTagHandler.default = endTagOther
+
     class InBodyPhase(Phase):
         # http://www.whatwg.org/specs/web-apps/current-work/#parsing-main-inbody
         # the really-really-really-very crazy mode
-        def __init__(self, parser, tree):
-            Phase.__init__(self, parser, tree)
+        __slots__ = ("processSpaceCharacters",)
 
-            # Keep a ref to this for special handling of whitespace in <pre>
-            self.processSpaceCharactersNonPre = self.processSpaceCharacters
-
-            self.startTagHandler = utils.MethodDispatcher([
-                ("html", self.startTagHtml),
-                (("base", "basefont", "bgsound", "command", "link", "meta",
-                  "noframes", "script", "style", "title"),
-                 self.startTagProcessInHead),
-                ("body", self.startTagBody),
-                ("frameset", self.startTagFrameset),
-                (("address", "article", "aside", "blockquote", "center", "details",
-                  "details", "dir", "div", "dl", "fieldset", "figcaption", "figure",
-                  "footer", "header", "hgroup", "main", "menu", "nav", "ol", "p",
-                  "section", "summary", "ul"),
-                 self.startTagCloseP),
-                (headingElements, self.startTagHeading),
-                (("pre", "listing"), self.startTagPreListing),
-                ("form", self.startTagForm),
-                (("li", "dd", "dt"), self.startTagListItem),
-                ("plaintext", self.startTagPlaintext),
-                ("a", self.startTagA),
-                (("b", "big", "code", "em", "font", "i", "s", "small", "strike",
-                  "strong", "tt", "u"), self.startTagFormatting),
-                ("nobr", self.startTagNobr),
-                ("button", self.startTagButton),
-                (("applet", "marquee", "object"), self.startTagAppletMarqueeObject),
-                ("xmp", self.startTagXmp),
-                ("table", self.startTagTable),
-                (("area", "br", "embed", "img", "keygen", "wbr"),
-                 self.startTagVoidFormatting),
-                (("param", "source", "track"), self.startTagParamSource),
-                ("input", self.startTagInput),
-                ("hr", self.startTagHr),
-                ("image", self.startTagImage),
-                ("isindex", self.startTagIsIndex),
-                ("textarea", self.startTagTextarea),
-                ("iframe", self.startTagIFrame),
-                (("noembed", "noframes", "noscript"), self.startTagRawtext),
-                ("select", self.startTagSelect),
-                (("rp", "rt"), self.startTagRpRt),
-                (("option", "optgroup"), self.startTagOpt),
-                (("math"), self.startTagMath),
-                (("svg"), self.startTagSvg),
-                (("caption", "col", "colgroup", "frame", "head",
-                  "tbody", "td", "tfoot", "th", "thead",
-                  "tr"), self.startTagMisplaced)
-            ])
-            self.startTagHandler.default = self.startTagOther
-
-            self.endTagHandler = utils.MethodDispatcher([
-                ("body", self.endTagBody),
-                ("html", self.endTagHtml),
-                (("address", "article", "aside", "blockquote", "button", "center",
-                  "details", "dialog", "dir", "div", "dl", "fieldset", "figcaption", "figure",
-                  "footer", "header", "hgroup", "listing", "main", "menu", "nav", "ol", "pre",
-                  "section", "summary", "ul"), self.endTagBlock),
-                ("form", self.endTagForm),
-                ("p", self.endTagP),
-                (("dd", "dt", "li"), self.endTagListItem),
-                (headingElements, self.endTagHeading),
-                (("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small",
-                  "strike", "strong", "tt", "u"), self.endTagFormatting),
-                (("applet", "marquee", "object"), self.endTagAppletMarqueeObject),
-                ("br", self.endTagBr),
-            ])
-            self.endTagHandler.default = self.endTagOther
+        def __init__(self, *args, **kwargs):
+            super(InBodyPhase, self).__init__(*args, **kwargs)
+            # Set this to the default handler
+            self.processSpaceCharacters = self.processSpaceCharactersNonPre
 
         def isMatchingFormattingElement(self, node1, node2):
-            if node1.name != node2.name or node1.namespace != node2.namespace:
-                return False
-            elif len(node1.attributes) != len(node2.attributes):
-                return False
-            else:
-                attributes1 = sorted(node1.attributes.items())
-                attributes2 = sorted(node2.attributes.items())
-                for attr1, attr2 in zip(attributes1, attributes2):
-                    if attr1 != attr2:
-                        return False
-            return True
+            return (node1.name == node2.name and
+                    node1.namespace == node2.namespace and
+                    node1.attributes == node2.attributes)
 
         # helper
         def addFormattingElement(self, token):
@@ -976,8 +987,8 @@ def getPhases(debug):
             data = token["data"]
             self.processSpaceCharacters = self.processSpaceCharactersNonPre
             if (data.startswith("\n") and
-                self.tree.openElements[-1].name in ("pre", "listing", "textarea")
-                    and not self.tree.openElements[-1].hasContent()):
+                self.tree.openElements[-1].name in ("pre", "listing", "textarea") and
+                    not self.tree.openElements[-1].hasContent()):
                 data = data[1:]
             if data:
                 self.tree.reconstructActiveFormattingElements()
@@ -995,7 +1006,7 @@ def getPhases(debug):
                      for char in token["data"]])):
                 self.parser.framesetOK = False
 
-        def processSpaceCharacters(self, token):
+        def processSpaceCharactersNonPre(self, token):
             self.tree.reconstructActiveFormattingElements()
             self.tree.insertText(token["data"])
 
@@ -1004,8 +1015,8 @@ def getPhases(debug):
 
         def startTagBody(self, token):
             self.parser.parseError("unexpected-start-tag", {"name": "body"})
-            if (len(self.tree.openElements) == 1
-                    or self.tree.openElements[1].name != "body"):
+            if (len(self.tree.openElements) == 1 or
+                    self.tree.openElements[1].name != "body"):
                 assert self.parser.innerHTML
             else:
                 self.parser.framesetOK = False
@@ -1205,8 +1216,7 @@ def getPhases(debug):
             attributes["name"] = "isindex"
             self.processStartTag(impliedTagToken("input", "StartTag",
                                                  attributes=attributes,
-                                                 selfClosing=
-                                                 token["selfClosing"]))
+                                                 selfClosing=token["selfClosing"]))
             self.processEndTag(impliedTagToken("label"))
             self.processStartTag(impliedTagToken("hr", "StartTag"))
             self.processEndTag(impliedTagToken("form"))
@@ -1221,6 +1231,12 @@ def getPhases(debug):
             self.parser.framesetOK = False
             self.startTagRawtext(token)
 
+        def startTagNoscript(self, token):
+            if self.parser.scripting:
+                self.startTagRawtext(token)
+            else:
+                self.startTagOther(token)
+
         def startTagRawtext(self, token):
             """iframe, noembed noframes, noscript(if scripting enabled)"""
             self.parser.parseRCDataRawtext(token, "RAWTEXT")
@@ -1316,7 +1332,7 @@ def getPhases(debug):
                         # Not sure this is the correct name for the parse error
                         self.parser.parseError(
                             "expected-one-end-tag-but-got-another",
-                            {"expectedName": "body", "gotName": node.name})
+                            {"gotName": "body", "expectedName": node.name})
                         break
             self.parser.phase = self.parser.phases["afterBody"]
 
@@ -1581,14 +1597,73 @@ def getPhases(debug):
                         self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
                         break
 
+        startTagHandler = _utils.MethodDispatcher([
+            ("html", Phase.startTagHtml),
+            (("base", "basefont", "bgsound", "command", "link", "meta",
+              "script", "style", "title"),
+             startTagProcessInHead),
+            ("body", startTagBody),
+            ("frameset", startTagFrameset),
+            (("address", "article", "aside", "blockquote", "center", "details",
+              "dir", "div", "dl", "fieldset", "figcaption", "figure",
+              "footer", "header", "hgroup", "main", "menu", "nav", "ol", "p",
+              "section", "summary", "ul"),
+             startTagCloseP),
+            (headingElements, startTagHeading),
+            (("pre", "listing"), startTagPreListing),
+            ("form", startTagForm),
+            (("li", "dd", "dt"), startTagListItem),
+            ("plaintext", startTagPlaintext),
+            ("a", startTagA),
+            (("b", "big", "code", "em", "font", "i", "s", "small", "strike",
+              "strong", "tt", "u"), startTagFormatting),
+            ("nobr", startTagNobr),
+            ("button", startTagButton),
+            (("applet", "marquee", "object"), startTagAppletMarqueeObject),
+            ("xmp", startTagXmp),
+            ("table", startTagTable),
+            (("area", "br", "embed", "img", "keygen", "wbr"),
+             startTagVoidFormatting),
+            (("param", "source", "track"), startTagParamSource),
+            ("input", startTagInput),
+            ("hr", startTagHr),
+            ("image", startTagImage),
+            ("isindex", startTagIsIndex),
+            ("textarea", startTagTextarea),
+            ("iframe", startTagIFrame),
+            ("noscript", startTagNoscript),
+            (("noembed", "noframes"), startTagRawtext),
+            ("select", startTagSelect),
+            (("rp", "rt"), startTagRpRt),
+            (("option", "optgroup"), startTagOpt),
+            (("math"), startTagMath),
+            (("svg"), startTagSvg),
+            (("caption", "col", "colgroup", "frame", "head",
+              "tbody", "td", "tfoot", "th", "thead",
+              "tr"), startTagMisplaced)
+        ])
+        startTagHandler.default = startTagOther
+
+        endTagHandler = _utils.MethodDispatcher([
+            ("body", endTagBody),
+            ("html", endTagHtml),
+            (("address", "article", "aside", "blockquote", "button", "center",
+              "details", "dialog", "dir", "div", "dl", "fieldset", "figcaption", "figure",
+              "footer", "header", "hgroup", "listing", "main", "menu", "nav", "ol", "pre",
+              "section", "summary", "ul"), endTagBlock),
+            ("form", endTagForm),
+            ("p", endTagP),
+            (("dd", "dt", "li"), endTagListItem),
+            (headingElements, endTagHeading),
+            (("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small",
+              "strike", "strong", "tt", "u"), endTagFormatting),
+            (("applet", "marquee", "object"), endTagAppletMarqueeObject),
+            ("br", endTagBr),
+        ])
+        endTagHandler.default = endTagOther
+
     class TextPhase(Phase):
-        def __init__(self, parser, tree):
-            Phase.__init__(self, parser, tree)
-            self.startTagHandler = utils.MethodDispatcher([])
-            self.startTagHandler.default = self.startTagOther
-            self.endTagHandler = utils.MethodDispatcher([
-                ("script", self.endTagScript)])
-            self.endTagHandler.default = self.endTagOther
+        __slots__ = tuple()
 
         def processCharacters(self, token):
             self.tree.insertText(token["data"])
@@ -1614,30 +1689,15 @@ def getPhases(debug):
             self.tree.openElements.pop()
             self.parser.phase = self.parser.originalPhase
 
+        startTagHandler = _utils.MethodDispatcher([])
+        startTagHandler.default = startTagOther
+        endTagHandler = _utils.MethodDispatcher([
+            ("script", endTagScript)])
+        endTagHandler.default = endTagOther
+
     class InTablePhase(Phase):
         # http://www.whatwg.org/specs/web-apps/current-work/#in-table
-        def __init__(self, parser, tree):
-            Phase.__init__(self, parser, tree)
-            self.startTagHandler = utils.MethodDispatcher([
-                ("html", self.startTagHtml),
-                ("caption", self.startTagCaption),
-                ("colgroup", self.startTagColgroup),
-                ("col", self.startTagCol),
-                (("tbody", "tfoot", "thead"), self.startTagRowGroup),
-                (("td", "th", "tr"), self.startTagImplyTbody),
-                ("table", self.startTagTable),
-                (("style", "script"), self.startTagStyleScript),
-                ("input", self.startTagInput),
-                ("form", self.startTagForm)
-            ])
-            self.startTagHandler.default = self.startTagOther
-
-            self.endTagHandler = utils.MethodDispatcher([
-                ("table", self.endTagTable),
-                (("body", "caption", "col", "colgroup", "html", "tbody", "td",
-                  "tfoot", "th", "thead", "tr"), self.endTagIgnore)
-            ])
-            self.endTagHandler.default = self.endTagOther
+        __slots__ = tuple()
 
         # helper methods
         def clearStackToTableContext(self):
@@ -1759,9 +1819,32 @@ def getPhases(debug):
             self.parser.phases["inBody"].processEndTag(token)
             self.tree.insertFromTable = False
 
+        startTagHandler = _utils.MethodDispatcher([
+            ("html", Phase.startTagHtml),
+            ("caption", startTagCaption),
+            ("colgroup", startTagColgroup),
+            ("col", startTagCol),
+            (("tbody", "tfoot", "thead"), startTagRowGroup),
+            (("td", "th", "tr"), startTagImplyTbody),
+            ("table", startTagTable),
+            (("style", "script"), startTagStyleScript),
+            ("input", startTagInput),
+            ("form", startTagForm)
+        ])
+        startTagHandler.default = startTagOther
+
+        endTagHandler = _utils.MethodDispatcher([
+            ("table", endTagTable),
+            (("body", "caption", "col", "colgroup", "html", "tbody", "td",
+              "tfoot", "th", "thead", "tr"), endTagIgnore)
+        ])
+        endTagHandler.default = endTagOther
+
     class InTableTextPhase(Phase):
-        def __init__(self, parser, tree):
-            Phase.__init__(self, parser, tree)
+        __slots__ = ("originalPhase", "characterTokens")
+
+        def __init__(self, *args, **kwargs):
+            super(InTableTextPhase, self).__init__(*args, **kwargs)
             self.originalPhase = None
             self.characterTokens = []
 
@@ -1806,23 +1889,7 @@ def getPhases(debug):
 
     class InCaptionPhase(Phase):
         # http://www.whatwg.org/specs/web-apps/current-work/#in-caption
-        def __init__(self, parser, tree):
-            Phase.__init__(self, parser, tree)
-
-            self.startTagHandler = utils.MethodDispatcher([
-                ("html", self.startTagHtml),
-                (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
-                  "thead", "tr"), self.startTagTableElement)
-            ])
-            self.startTagHandler.default = self.startTagOther
-
-            self.endTagHandler = utils.MethodDispatcher([
-                ("caption", self.endTagCaption),
-                ("table", self.endTagTable),
-                (("body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th",
-                  "thead", "tr"), self.endTagIgnore)
-            ])
-            self.endTagHandler.default = self.endTagOther
+        __slots__ = tuple()
 
         def ignoreEndTagCaption(self):
             return not self.tree.elementInScope("caption", variant="table")
@@ -1875,23 +1942,24 @@ def getPhases(debug):
         def endTagOther(self, token):
             return self.parser.phases["inBody"].processEndTag(token)
 
+        startTagHandler = _utils.MethodDispatcher([
+            ("html", Phase.startTagHtml),
+            (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
+              "thead", "tr"), startTagTableElement)
+        ])
+        startTagHandler.default = startTagOther
+
+        endTagHandler = _utils.MethodDispatcher([
+            ("caption", endTagCaption),
+            ("table", endTagTable),
+            (("body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th",
+              "thead", "tr"), endTagIgnore)
+        ])
+        endTagHandler.default = endTagOther
+
     class InColumnGroupPhase(Phase):
         # http://www.whatwg.org/specs/web-apps/current-work/#in-column
-
-        def __init__(self, parser, tree):
-            Phase.__init__(self, parser, tree)
-
-            self.startTagHandler = utils.MethodDispatcher([
-                ("html", self.startTagHtml),
-                ("col", self.startTagCol)
-            ])
-            self.startTagHandler.default = self.startTagOther
-
-            self.endTagHandler = utils.MethodDispatcher([
-                ("colgroup", self.endTagColgroup),
-                ("col", self.endTagCol)
-            ])
-            self.endTagHandler.default = self.endTagOther
+        __slots__ = tuple()
 
         def ignoreEndTagColgroup(self):
             return self.tree.openElements[-1].name == "html"
@@ -1915,6 +1983,7 @@ def getPhases(debug):
         def startTagCol(self, token):
             self.tree.insertElement(token)
             self.tree.openElements.pop()
+            token["selfClosingAcknowledged"] = True
 
         def startTagOther(self, token):
             ignoreEndTag = self.ignoreEndTagColgroup()
@@ -1940,26 +2009,21 @@ def getPhases(debug):
             if not ignoreEndTag:
                 return token
 
+        startTagHandler = _utils.MethodDispatcher([
+            ("html", Phase.startTagHtml),
+            ("col", startTagCol)
+        ])
+        startTagHandler.default = startTagOther
+
+        endTagHandler = _utils.MethodDispatcher([
+            ("colgroup", endTagColgroup),
+            ("col", endTagCol)
+        ])
+        endTagHandler.default = endTagOther
+
     class InTableBodyPhase(Phase):
         # http://www.whatwg.org/specs/web-apps/current-work/#in-table0
-        def __init__(self, parser, tree):
-            Phase.__init__(self, parser, tree)
-            self.startTagHandler = utils.MethodDispatcher([
-                ("html", self.startTagHtml),
-                ("tr", self.startTagTr),
-                (("td", "th"), self.startTagTableCell),
-                (("caption", "col", "colgroup", "tbody", "tfoot", "thead"),
-                 self.startTagTableOther)
-            ])
-            self.startTagHandler.default = self.startTagOther
-
-            self.endTagHandler = utils.MethodDispatcher([
-                (("tbody", "tfoot", "thead"), self.endTagTableRowGroup),
-                ("table", self.endTagTable),
-                (("body", "caption", "col", "colgroup", "html", "td", "th",
-                  "tr"), self.endTagIgnore)
-            ])
-            self.endTagHandler.default = self.endTagOther
+        __slots__ = tuple()
 
         # helper methods
         def clearStackToTableBodyContext(self):
@@ -2038,26 +2102,26 @@ def getPhases(debug):
         def endTagOther(self, token):
             return self.parser.phases["inTable"].processEndTag(token)
 
+        startTagHandler = _utils.MethodDispatcher([
+            ("html", Phase.startTagHtml),
+            ("tr", startTagTr),
+            (("td", "th"), startTagTableCell),
+            (("caption", "col", "colgroup", "tbody", "tfoot", "thead"),
+             startTagTableOther)
+        ])
+        startTagHandler.default = startTagOther
+
+        endTagHandler = _utils.MethodDispatcher([
+            (("tbody", "tfoot", "thead"), endTagTableRowGroup),
+            ("table", endTagTable),
+            (("body", "caption", "col", "colgroup", "html", "td", "th",
+              "tr"), endTagIgnore)
+        ])
+        endTagHandler.default = endTagOther
+
     class InRowPhase(Phase):
         # http://www.whatwg.org/specs/web-apps/current-work/#in-row
-        def __init__(self, parser, tree):
-            Phase.__init__(self, parser, tree)
-            self.startTagHandler = utils.MethodDispatcher([
-                ("html", self.startTagHtml),
-                (("td", "th"), self.startTagTableCell),
-                (("caption", "col", "colgroup", "tbody", "tfoot", "thead",
-                  "tr"), self.startTagTableOther)
-            ])
-            self.startTagHandler.default = self.startTagOther
-
-            self.endTagHandler = utils.MethodDispatcher([
-                ("tr", self.endTagTr),
-                ("table", self.endTagTable),
-                (("tbody", "tfoot", "thead"), self.endTagTableRowGroup),
-                (("body", "caption", "col", "colgroup", "html", "td", "th"),
-                 self.endTagIgnore)
-            ])
-            self.endTagHandler.default = self.endTagOther
+        __slots__ = tuple()
 
         # helper methods (XXX unify this with other table helper methods)
         def clearStackToTableRowContext(self):
@@ -2127,23 +2191,26 @@ def getPhases(debug):
         def endTagOther(self, token):
             return self.parser.phases["inTable"].processEndTag(token)
 
+        startTagHandler = _utils.MethodDispatcher([
+            ("html", Phase.startTagHtml),
+            (("td", "th"), startTagTableCell),
+            (("caption", "col", "colgroup", "tbody", "tfoot", "thead",
+              "tr"), startTagTableOther)
+        ])
+        startTagHandler.default = startTagOther
+
+        endTagHandler = _utils.MethodDispatcher([
+            ("tr", endTagTr),
+            ("table", endTagTable),
+            (("tbody", "tfoot", "thead"), endTagTableRowGroup),
+            (("body", "caption", "col", "colgroup", "html", "td", "th"),
+             endTagIgnore)
+        ])
+        endTagHandler.default = endTagOther
+
     class InCellPhase(Phase):
         # http://www.whatwg.org/specs/web-apps/current-work/#in-cell
-        def __init__(self, parser, tree):
-            Phase.__init__(self, parser, tree)
-            self.startTagHandler = utils.MethodDispatcher([
-                ("html", self.startTagHtml),
-                (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
-                  "thead", "tr"), self.startTagTableOther)
-            ])
-            self.startTagHandler.default = self.startTagOther
-
-            self.endTagHandler = utils.MethodDispatcher([
-                (("td", "th"), self.endTagTableCell),
-                (("body", "caption", "col", "colgroup", "html"), self.endTagIgnore),
-                (("table", "tbody", "tfoot", "thead", "tr"), self.endTagImply)
-            ])
-            self.endTagHandler.default = self.endTagOther
+        __slots__ = tuple()
 
         # helper
         def closeCell(self):
@@ -2203,26 +2270,22 @@ def getPhases(debug):
         def endTagOther(self, token):
             return self.parser.phases["inBody"].processEndTag(token)
 
+        startTagHandler = _utils.MethodDispatcher([
+            ("html", Phase.startTagHtml),
+            (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
+              "thead", "tr"), startTagTableOther)
+        ])
+        startTagHandler.default = startTagOther
+
+        endTagHandler = _utils.MethodDispatcher([
+            (("td", "th"), endTagTableCell),
+            (("body", "caption", "col", "colgroup", "html"), endTagIgnore),
+            (("table", "tbody", "tfoot", "thead", "tr"), endTagImply)
+        ])
+        endTagHandler.default = endTagOther
+
     class InSelectPhase(Phase):
-        def __init__(self, parser, tree):
-            Phase.__init__(self, parser, tree)
-
-            self.startTagHandler = utils.MethodDispatcher([
-                ("html", self.startTagHtml),
-                ("option", self.startTagOption),
-                ("optgroup", self.startTagOptgroup),
-                ("select", self.startTagSelect),
-                (("input", "keygen", "textarea"), self.startTagInput),
-                ("script", self.startTagScript)
-            ])
-            self.startTagHandler.default = self.startTagOther
-
-            self.endTagHandler = utils.MethodDispatcher([
-                ("option", self.endTagOption),
-                ("optgroup", self.endTagOptgroup),
-                ("select", self.endTagSelect)
-            ])
-            self.endTagHandler.default = self.endTagOther
+        __slots__ = tuple()
 
         # http://www.whatwg.org/specs/web-apps/current-work/#in-select
         def processEOF(self):
@@ -2303,21 +2366,25 @@ def getPhases(debug):
             self.parser.parseError("unexpected-end-tag-in-select",
                                    {"name": token["name"]})
 
+        startTagHandler = _utils.MethodDispatcher([
+            ("html", Phase.startTagHtml),
+            ("option", startTagOption),
+            ("optgroup", startTagOptgroup),
+            ("select", startTagSelect),
+            (("input", "keygen", "textarea"), startTagInput),
+            ("script", startTagScript)
+        ])
+        startTagHandler.default = startTagOther
+
+        endTagHandler = _utils.MethodDispatcher([
+            ("option", endTagOption),
+            ("optgroup", endTagOptgroup),
+            ("select", endTagSelect)
+        ])
+        endTagHandler.default = endTagOther
+
     class InSelectInTablePhase(Phase):
-        def __init__(self, parser, tree):
-            Phase.__init__(self, parser, tree)
-
-            self.startTagHandler = utils.MethodDispatcher([
-                (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
-                 self.startTagTable)
-            ])
-            self.startTagHandler.default = self.startTagOther
-
-            self.endTagHandler = utils.MethodDispatcher([
-                (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
-                 self.endTagTable)
-            ])
-            self.endTagHandler.default = self.endTagOther
+        __slots__ = tuple()
 
         def processEOF(self):
             self.parser.phases["inSelect"].processEOF()
@@ -2342,7 +2409,21 @@ def getPhases(debug):
         def endTagOther(self, token):
             return self.parser.phases["inSelect"].processEndTag(token)
 
+        startTagHandler = _utils.MethodDispatcher([
+            (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
+             startTagTable)
+        ])
+        startTagHandler.default = startTagOther
+
+        endTagHandler = _utils.MethodDispatcher([
+            (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
+             endTagTable)
+        ])
+        endTagHandler.default = endTagOther
+
     class InForeignContentPhase(Phase):
+        __slots__ = tuple()
+
         breakoutElements = frozenset(["b", "big", "blockquote", "body", "br",
                                       "center", "code", "dd", "div", "dl", "dt",
                                       "em", "embed", "h1", "h2", "h3",
@@ -2352,9 +2433,6 @@ def getPhases(debug):
                                       "span", "strong", "strike", "sub", "sup",
                                       "table", "tt", "u", "ul", "var"])
 
-        def __init__(self, parser, tree):
-            Phase.__init__(self, parser, tree)
-
         def adjustSVGTagNames(self, token):
             replacements = {"altglyph": "altGlyph",
                             "altglyphdef": "altGlyphDef",
@@ -2408,7 +2486,7 @@ def getPhases(debug):
             currentNode = self.tree.openElements[-1]
             if (token["name"] in self.breakoutElements or
                 (token["name"] == "font" and
-                 set(token["data"].keys()) & set(["color", "face", "size"]))):
+                 set(token["data"].keys()) & {"color", "face", "size"})):
                 self.parser.parseError("unexpected-html-element-in-foreign-content",
                                        {"name": token["name"]})
                 while (self.tree.openElements[-1].namespace !=
@@ -2434,7 +2512,7 @@ def getPhases(debug):
         def processEndTag(self, token):
             nodeIndex = len(self.tree.openElements) - 1
             node = self.tree.openElements[-1]
-            if node.name != token["name"]:
+            if node.name.translate(asciiUpper2Lower) != token["name"]:
                 self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
 
             while True:
@@ -2458,16 +2536,7 @@ def getPhases(debug):
             return new_token
 
     class AfterBodyPhase(Phase):
-        def __init__(self, parser, tree):
-            Phase.__init__(self, parser, tree)
-
-            self.startTagHandler = utils.MethodDispatcher([
-                ("html", self.startTagHtml)
-            ])
-            self.startTagHandler.default = self.startTagOther
-
-            self.endTagHandler = utils.MethodDispatcher([("html", self.endTagHtml)])
-            self.endTagHandler.default = self.endTagOther
+        __slots__ = tuple()
 
         def processEOF(self):
             # Stop parsing
@@ -2504,23 +2573,17 @@ def getPhases(debug):
             self.parser.phase = self.parser.phases["inBody"]
             return token
 
+        startTagHandler = _utils.MethodDispatcher([
+            ("html", startTagHtml)
+        ])
+        startTagHandler.default = startTagOther
+
+        endTagHandler = _utils.MethodDispatcher([("html", endTagHtml)])
+        endTagHandler.default = endTagOther
+
     class InFramesetPhase(Phase):
         # http://www.whatwg.org/specs/web-apps/current-work/#in-frameset
-        def __init__(self, parser, tree):
-            Phase.__init__(self, parser, tree)
-
-            self.startTagHandler = utils.MethodDispatcher([
-                ("html", self.startTagHtml),
-                ("frameset", self.startTagFrameset),
-                ("frame", self.startTagFrame),
-                ("noframes", self.startTagNoframes)
-            ])
-            self.startTagHandler.default = self.startTagOther
-
-            self.endTagHandler = utils.MethodDispatcher([
-                ("frameset", self.endTagFrameset)
-            ])
-            self.endTagHandler.default = self.endTagOther
+        __slots__ = tuple()
 
         def processEOF(self):
             if self.tree.openElements[-1].name != "html":
@@ -2553,7 +2616,7 @@ def getPhases(debug):
                 self.tree.openElements.pop()
             if (not self.parser.innerHTML and
                     self.tree.openElements[-1].name != "frameset"):
-                # If we're not in innerHTML mode and the the current node is not a
+                # If we're not in innerHTML mode and the current node is not a
                 # "frameset" element (anymore) then switch.
                 self.parser.phase = self.parser.phases["afterFrameset"]
 
@@ -2561,21 +2624,22 @@ def getPhases(debug):
             self.parser.parseError("unexpected-end-tag-in-frameset",
                                    {"name": token["name"]})
 
+        startTagHandler = _utils.MethodDispatcher([
+            ("html", Phase.startTagHtml),
+            ("frameset", startTagFrameset),
+            ("frame", startTagFrame),
+            ("noframes", startTagNoframes)
+        ])
+        startTagHandler.default = startTagOther
+
+        endTagHandler = _utils.MethodDispatcher([
+            ("frameset", endTagFrameset)
+        ])
+        endTagHandler.default = endTagOther
+
     class AfterFramesetPhase(Phase):
         # http://www.whatwg.org/specs/web-apps/current-work/#after3
-        def __init__(self, parser, tree):
-            Phase.__init__(self, parser, tree)
-
-            self.startTagHandler = utils.MethodDispatcher([
-                ("html", self.startTagHtml),
-                ("noframes", self.startTagNoframes)
-            ])
-            self.startTagHandler.default = self.startTagOther
-
-            self.endTagHandler = utils.MethodDispatcher([
-                ("html", self.endTagHtml)
-            ])
-            self.endTagHandler.default = self.endTagOther
+        __slots__ = tuple()
 
         def processEOF(self):
             # Stop parsing
@@ -2598,14 +2662,19 @@ def getPhases(debug):
             self.parser.parseError("unexpected-end-tag-after-frameset",
                                    {"name": token["name"]})
 
-    class AfterAfterBodyPhase(Phase):
-        def __init__(self, parser, tree):
-            Phase.__init__(self, parser, tree)
+        startTagHandler = _utils.MethodDispatcher([
+            ("html", Phase.startTagHtml),
+            ("noframes", startTagNoframes)
+        ])
+        startTagHandler.default = startTagOther
 
-            self.startTagHandler = utils.MethodDispatcher([
-                ("html", self.startTagHtml)
-            ])
-            self.startTagHandler.default = self.startTagOther
+        endTagHandler = _utils.MethodDispatcher([
+            ("html", endTagHtml)
+        ])
+        endTagHandler.default = endTagOther
+
+    class AfterAfterBodyPhase(Phase):
+        __slots__ = tuple()
 
         def processEOF(self):
             pass
@@ -2636,15 +2705,13 @@ def getPhases(debug):
             self.parser.phase = self.parser.phases["inBody"]
             return token
 
-    class AfterAfterFramesetPhase(Phase):
-        def __init__(self, parser, tree):
-            Phase.__init__(self, parser, tree)
+        startTagHandler = _utils.MethodDispatcher([
+            ("html", startTagHtml)
+        ])
+        startTagHandler.default = startTagOther
 
-            self.startTagHandler = utils.MethodDispatcher([
-                ("html", self.startTagHtml),
-                ("noframes", self.startTagNoFrames)
-            ])
-            self.startTagHandler.default = self.startTagOther
+    class AfterAfterFramesetPhase(Phase):
+        __slots__ = tuple()
 
         def processEOF(self):
             pass
@@ -2672,12 +2739,20 @@ def getPhases(debug):
             self.parser.parseError("expected-eof-but-got-end-tag",
                                    {"name": token["name"]})
 
+        startTagHandler = _utils.MethodDispatcher([
+            ("html", startTagHtml),
+            ("noframes", startTagNoFrames)
+        ])
+        startTagHandler.default = startTagOther
+
+    # pylint:enable=unused-argument
+
     return {
         "initial": InitialPhase,
         "beforeHtml": BeforeHtmlPhase,
         "beforeHead": BeforeHeadPhase,
         "inHead": InHeadPhase,
-        # XXX "inHeadNoscript": InHeadNoScriptPhase,
+        "inHeadNoscript": InHeadNoscriptPhase,
         "afterHead": AfterHeadPhase,
         "inBody": InBodyPhase,
         "text": TextPhase,
@@ -2700,6 +2775,13 @@ def getPhases(debug):
     }
 
 
+def adjust_attributes(token, replacements):
+    needs_adjustment = viewkeys(token['data']) & viewkeys(replacements)
+    if needs_adjustment:
+        token['data'] = type(token['data'])((replacements.get(k, k), v)
+                                            for k, v in token['data'].items())
+
+
 def impliedTagToken(name, type="EndTag", attributes=None,
                     selfClosing=False):
     if attributes is None:
diff --git a/lib/html5lib/sanitizer.py b/lib/html5lib/sanitizer.py
deleted file mode 100644
index 71dc5212..00000000
--- a/lib/html5lib/sanitizer.py
+++ /dev/null
@@ -1,271 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-import re
-from xml.sax.saxutils import escape, unescape
-
-from .tokenizer import HTMLTokenizer
-from .constants import tokenTypes
-
-
-class HTMLSanitizerMixin(object):
-    """ sanitization of XHTML+MathML+SVG and of inline style attributes."""
-
-    acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area',
-                           'article', 'aside', 'audio', 'b', 'big', 'blockquote', 'br', 'button',
-                           'canvas', 'caption', 'center', 'cite', 'code', 'col', 'colgroup',
-                           'command', 'datagrid', 'datalist', 'dd', 'del', 'details', 'dfn',
-                           'dialog', 'dir', 'div', 'dl', 'dt', 'em', 'event-source', 'fieldset',
-                           'figcaption', 'figure', 'footer', 'font', 'form', 'header', 'h1',
-                           'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input', 'ins',
-                           'keygen', 'kbd', 'label', 'legend', 'li', 'm', 'map', 'menu', 'meter',
-                           'multicol', 'nav', 'nextid', 'ol', 'output', 'optgroup', 'option',
-                           'p', 'pre', 'progress', 'q', 's', 'samp', 'section', 'select',
-                           'small', 'sound', 'source', 'spacer', 'span', 'strike', 'strong',
-                           'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'time', 'tfoot',
-                           'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'video']
-
-    mathml_elements = ['maction', 'math', 'merror', 'mfrac', 'mi',
-                       'mmultiscripts', 'mn', 'mo', 'mover', 'mpadded', 'mphantom',
-                       'mprescripts', 'mroot', 'mrow', 'mspace', 'msqrt', 'mstyle', 'msub',
-                       'msubsup', 'msup', 'mtable', 'mtd', 'mtext', 'mtr', 'munder',
-                       'munderover', 'none']
-
-    svg_elements = ['a', 'animate', 'animateColor', 'animateMotion',
-                    'animateTransform', 'clipPath', 'circle', 'defs', 'desc', 'ellipse',
-                    'font-face', 'font-face-name', 'font-face-src', 'g', 'glyph', 'hkern',
-                    'linearGradient', 'line', 'marker', 'metadata', 'missing-glyph',
-                    'mpath', 'path', 'polygon', 'polyline', 'radialGradient', 'rect',
-                    'set', 'stop', 'svg', 'switch', 'text', 'title', 'tspan', 'use']
-
-    acceptable_attributes = ['abbr', 'accept', 'accept-charset', 'accesskey',
-                             'action', 'align', 'alt', 'autocomplete', 'autofocus', 'axis',
-                             'background', 'balance', 'bgcolor', 'bgproperties', 'border',
-                             'bordercolor', 'bordercolordark', 'bordercolorlight', 'bottompadding',
-                             'cellpadding', 'cellspacing', 'ch', 'challenge', 'char', 'charoff',
-                             'choff', 'charset', 'checked', 'cite', 'class', 'clear', 'color',
-                             'cols', 'colspan', 'compact', 'contenteditable', 'controls', 'coords',
-                             'data', 'datafld', 'datapagesize', 'datasrc', 'datetime', 'default',
-                             'delay', 'dir', 'disabled', 'draggable', 'dynsrc', 'enctype', 'end',
-                             'face', 'for', 'form', 'frame', 'galleryimg', 'gutter', 'headers',
-                             'height', 'hidefocus', 'hidden', 'high', 'href', 'hreflang', 'hspace',
-                             'icon', 'id', 'inputmode', 'ismap', 'keytype', 'label', 'leftspacing',
-                             'lang', 'list', 'longdesc', 'loop', 'loopcount', 'loopend',
-                             'loopstart', 'low', 'lowsrc', 'max', 'maxlength', 'media', 'method',
-                             'min', 'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'open',
-                             'optimum', 'pattern', 'ping', 'point-size', 'poster', 'pqg', 'preload',
-                             'prompt', 'radiogroup', 'readonly', 'rel', 'repeat-max', 'repeat-min',
-                             'replace', 'required', 'rev', 'rightspacing', 'rows', 'rowspan',
-                             'rules', 'scope', 'selected', 'shape', 'size', 'span', 'src', 'start',
-                             'step', 'style', 'summary', 'suppress', 'tabindex', 'target',
-                             'template', 'title', 'toppadding', 'type', 'unselectable', 'usemap',
-                             'urn', 'valign', 'value', 'variable', 'volume', 'vspace', 'vrml',
-                             'width', 'wrap', 'xml:lang']
-
-    mathml_attributes = ['actiontype', 'align', 'columnalign', 'columnalign',
-                         'columnalign', 'columnlines', 'columnspacing', 'columnspan', 'depth',
-                         'display', 'displaystyle', 'equalcolumns', 'equalrows', 'fence',
-                         'fontstyle', 'fontweight', 'frame', 'height', 'linethickness', 'lspace',
-                         'mathbackground', 'mathcolor', 'mathvariant', 'mathvariant', 'maxsize',
-                         'minsize', 'other', 'rowalign', 'rowalign', 'rowalign', 'rowlines',
-                         'rowspacing', 'rowspan', 'rspace', 'scriptlevel', 'selection',
-                         'separator', 'stretchy', 'width', 'width', 'xlink:href', 'xlink:show',
-                         'xlink:type', 'xmlns', 'xmlns:xlink']
-
-    svg_attributes = ['accent-height', 'accumulate', 'additive', 'alphabetic',
-                      'arabic-form', 'ascent', 'attributeName', 'attributeType',
-                      'baseProfile', 'bbox', 'begin', 'by', 'calcMode', 'cap-height',
-                      'class', 'clip-path', 'color', 'color-rendering', 'content', 'cx',
-                      'cy', 'd', 'dx', 'dy', 'descent', 'display', 'dur', 'end', 'fill',
-                      'fill-opacity', 'fill-rule', 'font-family', 'font-size',
-                      'font-stretch', 'font-style', 'font-variant', 'font-weight', 'from',
-                      'fx', 'fy', 'g1', 'g2', 'glyph-name', 'gradientUnits', 'hanging',
-                      'height', 'horiz-adv-x', 'horiz-origin-x', 'id', 'ideographic', 'k',
-                      'keyPoints', 'keySplines', 'keyTimes', 'lang', 'marker-end',
-                      'marker-mid', 'marker-start', 'markerHeight', 'markerUnits',
-                      'markerWidth', 'mathematical', 'max', 'min', 'name', 'offset',
-                      'opacity', 'orient', 'origin', 'overline-position',
-                      'overline-thickness', 'panose-1', 'path', 'pathLength', 'points',
-                      'preserveAspectRatio', 'r', 'refX', 'refY', 'repeatCount',
-                      'repeatDur', 'requiredExtensions', 'requiredFeatures', 'restart',
-                      'rotate', 'rx', 'ry', 'slope', 'stemh', 'stemv', 'stop-color',
-                      'stop-opacity', 'strikethrough-position', 'strikethrough-thickness',
-                      'stroke', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap',
-                      'stroke-linejoin', 'stroke-miterlimit', 'stroke-opacity',
-                      'stroke-width', 'systemLanguage', 'target', 'text-anchor', 'to',
-                      'transform', 'type', 'u1', 'u2', 'underline-position',
-                      'underline-thickness', 'unicode', 'unicode-range', 'units-per-em',
-                      'values', 'version', 'viewBox', 'visibility', 'width', 'widths', 'x',
-                      'x-height', 'x1', 'x2', 'xlink:actuate', 'xlink:arcrole',
-                      'xlink:href', 'xlink:role', 'xlink:show', 'xlink:title', 'xlink:type',
-                      'xml:base', 'xml:lang', 'xml:space', 'xmlns', 'xmlns:xlink', 'y',
-                      'y1', 'y2', 'zoomAndPan']
-
-    attr_val_is_uri = ['href', 'src', 'cite', 'action', 'longdesc', 'poster',
-                       'xlink:href', 'xml:base']
-
-    svg_attr_val_allows_ref = ['clip-path', 'color-profile', 'cursor', 'fill',
-                               'filter', 'marker', 'marker-start', 'marker-mid', 'marker-end',
-                               'mask', 'stroke']
-
-    svg_allow_local_href = ['altGlyph', 'animate', 'animateColor',
-                            'animateMotion', 'animateTransform', 'cursor', 'feImage', 'filter',
-                            'linearGradient', 'pattern', 'radialGradient', 'textpath', 'tref',
-                            'set', 'use']
-
-    acceptable_css_properties = ['azimuth', 'background-color',
-                                 'border-bottom-color', 'border-collapse', 'border-color',
-                                 'border-left-color', 'border-right-color', 'border-top-color', 'clear',
-                                 'color', 'cursor', 'direction', 'display', 'elevation', 'float', 'font',
-                                 'font-family', 'font-size', 'font-style', 'font-variant', 'font-weight',
-                                 'height', 'letter-spacing', 'line-height', 'overflow', 'pause',
-                                 'pause-after', 'pause-before', 'pitch', 'pitch-range', 'richness',
-                                 'speak', 'speak-header', 'speak-numeral', 'speak-punctuation',
-                                 'speech-rate', 'stress', 'text-align', 'text-decoration', 'text-indent',
-                                 'unicode-bidi', 'vertical-align', 'voice-family', 'volume',
-                                 'white-space', 'width']
-
-    acceptable_css_keywords = ['auto', 'aqua', 'black', 'block', 'blue',
-                               'bold', 'both', 'bottom', 'brown', 'center', 'collapse', 'dashed',
-                               'dotted', 'fuchsia', 'gray', 'green', '!important', 'italic', 'left',
-                               'lime', 'maroon', 'medium', 'none', 'navy', 'normal', 'nowrap', 'olive',
-                               'pointer', 'purple', 'red', 'right', 'solid', 'silver', 'teal', 'top',
-                               'transparent', 'underline', 'white', 'yellow']
-
-    acceptable_svg_properties = ['fill', 'fill-opacity', 'fill-rule',
-                                 'stroke', 'stroke-width', 'stroke-linecap', 'stroke-linejoin',
-                                 'stroke-opacity']
-
-    acceptable_protocols = ['ed2k', 'ftp', 'http', 'https', 'irc',
-                            'mailto', 'news', 'gopher', 'nntp', 'telnet', 'webcal',
-                            'xmpp', 'callto', 'feed', 'urn', 'aim', 'rsync', 'tag',
-                            'ssh', 'sftp', 'rtsp', 'afs']
-
-    # subclasses may define their own versions of these constants
-    allowed_elements = acceptable_elements + mathml_elements + svg_elements
-    allowed_attributes = acceptable_attributes + mathml_attributes + svg_attributes
-    allowed_css_properties = acceptable_css_properties
-    allowed_css_keywords = acceptable_css_keywords
-    allowed_svg_properties = acceptable_svg_properties
-    allowed_protocols = acceptable_protocols
-
-    # Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and
-    # stripping out all # attributes not in ALLOWED_ATTRIBUTES. Style
-    # attributes are parsed, and a restricted set, # specified by
-    # ALLOWED_CSS_PROPERTIES and ALLOWED_CSS_KEYWORDS, are allowed through.
-    # attributes in ATTR_VAL_IS_URI are scanned, and only URI schemes specified
-    # in ALLOWED_PROTOCOLS are allowed.
-    #
-    #   sanitize_html('<script> do_nasty_stuff() </script>')
-    #    => &lt;script> do_nasty_stuff() &lt;/script>
-    #   sanitize_html('<a href="javascript: sucker();">Click here for $100</a>')
-    #    => <a>Click here for $100</a>
-    def sanitize_token(self, token):
-
-        # accommodate filters which use token_type differently
-        token_type = token["type"]
-        if token_type in list(tokenTypes.keys()):
-            token_type = tokenTypes[token_type]
-
-        if token_type in (tokenTypes["StartTag"], tokenTypes["EndTag"],
-                          tokenTypes["EmptyTag"]):
-            if token["name"] in self.allowed_elements:
-                return self.allowed_token(token, token_type)
-            else:
-                return self.disallowed_token(token, token_type)
-        elif token_type == tokenTypes["Comment"]:
-            pass
-        else:
-            return token
-
-    def allowed_token(self, token, token_type):
-        if "data" in token:
-            attrs = dict([(name, val) for name, val in
-                          token["data"][::-1]
-                          if name in self.allowed_attributes])
-            for attr in self.attr_val_is_uri:
-                if attr not in attrs:
-                    continue
-                val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '',
-                                       unescape(attrs[attr])).lower()
-                # remove replacement characters from unescaped characters
-                val_unescaped = val_unescaped.replace("\ufffd", "")
-                if (re.match("^[a-z0-9][-+.a-z0-9]*:", val_unescaped) and
-                    (val_unescaped.split(':')[0] not in
-                     self.allowed_protocols)):
-                    del attrs[attr]
-            for attr in self.svg_attr_val_allows_ref:
-                if attr in attrs:
-                    attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
-                                         ' ',
-                                         unescape(attrs[attr]))
-            if (token["name"] in self.svg_allow_local_href and
-                'xlink:href' in attrs and re.search('^\s*[^#\s].*',
-                                                    attrs['xlink:href'])):
-                del attrs['xlink:href']
-            if 'style' in attrs:
-                attrs['style'] = self.sanitize_css(attrs['style'])
-            token["data"] = [[name, val] for name, val in list(attrs.items())]
-        return token
-
-    def disallowed_token(self, token, token_type):
-        if token_type == tokenTypes["EndTag"]:
-            token["data"] = "</%s>" % token["name"]
-        elif token["data"]:
-            attrs = ''.join([' %s="%s"' % (k, escape(v)) for k, v in token["data"]])
-            token["data"] = "<%s%s>" % (token["name"], attrs)
-        else:
-            token["data"] = "<%s>" % token["name"]
-        if token.get("selfClosing"):
-            token["data"] = token["data"][:-1] + "/>"
-
-        if token["type"] in list(tokenTypes.keys()):
-            token["type"] = "Characters"
-        else:
-            token["type"] = tokenTypes["Characters"]
-
-        del token["name"]
-        return token
-
-    def sanitize_css(self, style):
-        # disallow urls
-        style = re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style)
-
-        # gauntlet
-        if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
-            return ''
-        if not re.match("^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
-            return ''
-
-        clean = []
-        for prop, value in re.findall("([-\w]+)\s*:\s*([^:;]*)", style):
-            if not value:
-                continue
-            if prop.lower() in self.allowed_css_properties:
-                clean.append(prop + ': ' + value + ';')
-            elif prop.split('-')[0].lower() in ['background', 'border', 'margin',
-                                                'padding']:
-                for keyword in value.split():
-                    if not keyword in self.acceptable_css_keywords and \
-                            not re.match("^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword):
-                        break
-                else:
-                    clean.append(prop + ': ' + value + ';')
-            elif prop.lower() in self.allowed_svg_properties:
-                clean.append(prop + ': ' + value + ';')
-
-        return ' '.join(clean)
-
-
-class HTMLSanitizer(HTMLTokenizer, HTMLSanitizerMixin):
-    def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True,
-                 lowercaseElementName=False, lowercaseAttrName=False, parser=None):
-        # Change case matching defaults as we only output lowercase html anyway
-        # This solution doesn't seem ideal...
-        HTMLTokenizer.__init__(self, stream, encoding, parseMeta, useChardet,
-                               lowercaseElementName, lowercaseAttrName, parser=parser)
-
-    def __iter__(self):
-        for token in HTMLTokenizer.__iter__(self):
-            token = self.sanitize_token(token)
-            if token:
-                yield token
diff --git a/lib/html5lib/serializer.py b/lib/html5lib/serializer.py
new file mode 100644
index 00000000..c66df683
--- /dev/null
+++ b/lib/html5lib/serializer.py
@@ -0,0 +1,409 @@
+from __future__ import absolute_import, division, unicode_literals
+from six import text_type
+
+import re
+
+from codecs import register_error, xmlcharrefreplace_errors
+
+from .constants import voidElements, booleanAttributes, spaceCharacters
+from .constants import rcdataElements, entities, xmlEntities
+from . import treewalkers, _utils
+from xml.sax.saxutils import escape
+
+_quoteAttributeSpecChars = "".join(spaceCharacters) + "\"'=<>`"
+_quoteAttributeSpec = re.compile("[" + _quoteAttributeSpecChars + "]")
+_quoteAttributeLegacy = re.compile("[" + _quoteAttributeSpecChars +
+                                   "\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n"
+                                   "\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15"
+                                   "\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+                                   "\x20\x2f\x60\xa0\u1680\u180e\u180f\u2000"
+                                   "\u2001\u2002\u2003\u2004\u2005\u2006\u2007"
+                                   "\u2008\u2009\u200a\u2028\u2029\u202f\u205f"
+                                   "\u3000]")
+
+
+_encode_entity_map = {}
+_is_ucs4 = len("\U0010FFFF") == 1
+for k, v in list(entities.items()):
+    # skip multi-character entities
+    if ((_is_ucs4 and len(v) > 1) or
+            (not _is_ucs4 and len(v) > 2)):
+        continue
+    if v != "&":
+        if len(v) == 2:
+            v = _utils.surrogatePairToCodepoint(v)
+        else:
+            v = ord(v)
+        if v not in _encode_entity_map or k.islower():
+            # prefer &lt; over &LT; and similarly for &amp;, &gt;, etc.
+            _encode_entity_map[v] = k
+
+
+def htmlentityreplace_errors(exc):
+    if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)):
+        res = []
+        codepoints = []
+        skip = False
+        for i, c in enumerate(exc.object[exc.start:exc.end]):
+            if skip:
+                skip = False
+                continue
+            index = i + exc.start
+            if _utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]):
+                codepoint = _utils.surrogatePairToCodepoint(exc.object[index:index + 2])
+                skip = True
+            else:
+                codepoint = ord(c)
+            codepoints.append(codepoint)
+        for cp in codepoints:
+            e = _encode_entity_map.get(cp)
+            if e:
+                res.append("&")
+                res.append(e)
+                if not e.endswith(";"):
+                    res.append(";")
+            else:
+                res.append("&#x%s;" % (hex(cp)[2:]))
+        return ("".join(res), exc.end)
+    else:
+        return xmlcharrefreplace_errors(exc)
+
+
+register_error("htmlentityreplace", htmlentityreplace_errors)
+
+
+def serialize(input, tree="etree", encoding=None, **serializer_opts):
+    """Serializes the input token stream using the specified treewalker
+
+    :arg input: the token stream to serialize
+
+    :arg tree: the treewalker to use
+
+    :arg encoding: the encoding to use
+
+    :arg serializer_opts: any options to pass to the
+        :py:class:`html5lib.serializer.HTMLSerializer` that gets created
+
+    :returns: the tree serialized as a string
+
+    Example:
+
+    >>> from html5lib.html5parser import parse
+    >>> from html5lib.serializer import serialize
+    >>> token_stream = parse('<html><body><p>Hi!</p></body></html>')
+    >>> serialize(token_stream, omit_optional_tags=False)
+    '<html><head></head><body><p>Hi!</p></body></html>'
+
+    """
+    # XXX: Should we cache this?
+    walker = treewalkers.getTreeWalker(tree)
+    s = HTMLSerializer(**serializer_opts)
+    return s.render(walker(input), encoding)
+
+
+class HTMLSerializer(object):
+
+    # attribute quoting options
+    quote_attr_values = "legacy"  # be secure by default
+    quote_char = '"'
+    use_best_quote_char = True
+
+    # tag syntax options
+    omit_optional_tags = True
+    minimize_boolean_attributes = True
+    use_trailing_solidus = False
+    space_before_trailing_solidus = True
+
+    # escaping options
+    escape_lt_in_attrs = False
+    escape_rcdata = False
+    resolve_entities = True
+
+    # miscellaneous options
+    alphabetical_attributes = False
+    inject_meta_charset = True
+    strip_whitespace = False
+    sanitize = False
+
+    options = ("quote_attr_values", "quote_char", "use_best_quote_char",
+               "omit_optional_tags", "minimize_boolean_attributes",
+               "use_trailing_solidus", "space_before_trailing_solidus",
+               "escape_lt_in_attrs", "escape_rcdata", "resolve_entities",
+               "alphabetical_attributes", "inject_meta_charset",
+               "strip_whitespace", "sanitize")
+
+    def __init__(self, **kwargs):
+        """Initialize HTMLSerializer
+
+        :arg inject_meta_charset: Whether or not to inject the meta charset.
+
+            Defaults to ``True``.
+
+        :arg quote_attr_values: Whether to quote attribute values that don't
+            require quoting per legacy browser behavior (``"legacy"``), when
+            required by the standard (``"spec"``), or always (``"always"``).
+
+            Defaults to ``"legacy"``.
+
+        :arg quote_char: Use given quote character for attribute quoting.
+
+            Defaults to ``"`` which will use double quotes unless attribute
+            value contains a double quote, in which case single quotes are
+            used.
+
+        :arg escape_lt_in_attrs: Whether or not to escape ``<`` in attribute
+            values.
+
+            Defaults to ``False``.
+
+        :arg escape_rcdata: Whether to escape characters that need to be
+            escaped within normal elements within rcdata elements such as
+            style.
+
+            Defaults to ``False``.
+
+        :arg resolve_entities: Whether to resolve named character entities that
+            appear in the source tree. The XML predefined entities &lt; &gt;
+            &amp; &quot; &apos; are unaffected by this setting.
+
+            Defaults to ``True``.
+
+        :arg strip_whitespace: Whether to remove semantically meaningless
+            whitespace. (This compresses all whitespace to a single space
+            except within ``pre``.)
+
+            Defaults to ``False``.
+
+        :arg minimize_boolean_attributes: Shortens boolean attributes to give
+            just the attribute value, for example::
+
+              <input disabled="disabled">
+
+            becomes::
+
+              <input disabled>
+
+            Defaults to ``True``.
+
+        :arg use_trailing_solidus: Includes a close-tag slash at the end of the
+            start tag of void elements (empty elements whose end tag is
+            forbidden). E.g. ``<hr/>``.
+
+            Defaults to ``False``.
+
+        :arg space_before_trailing_solidus: Places a space immediately before
+            the closing slash in a tag using a trailing solidus. E.g.
+            ``<hr />``. Requires ``use_trailing_solidus=True``.
+
+            Defaults to ``True``.
+
+        :arg sanitize: Strip all unsafe or unknown constructs from output.
+            See :py:class:`html5lib.filters.sanitizer.Filter`.
+
+            Defaults to ``False``.
+
+        :arg omit_optional_tags: Omit start/end tags that are optional.
+
+            Defaults to ``True``.
+
+        :arg alphabetical_attributes: Reorder attributes to be in alphabetical order.
+
+            Defaults to ``False``.
+
+        """
+        unexpected_args = frozenset(kwargs) - frozenset(self.options)
+        if len(unexpected_args) > 0:
+            raise TypeError("__init__() got an unexpected keyword argument '%s'" % next(iter(unexpected_args)))
+        if 'quote_char' in kwargs:
+            self.use_best_quote_char = False
+        for attr in self.options:
+            setattr(self, attr, kwargs.get(attr, getattr(self, attr)))
+        self.errors = []
+        self.strict = False
+
+    def encode(self, string):
+        assert(isinstance(string, text_type))
+        if self.encoding:
+            return string.encode(self.encoding, "htmlentityreplace")
+        else:
+            return string
+
+    def encodeStrict(self, string):
+        assert(isinstance(string, text_type))
+        if self.encoding:
+            return string.encode(self.encoding, "strict")
+        else:
+            return string
+
+    def serialize(self, treewalker, encoding=None):
+        # pylint:disable=too-many-nested-blocks
+        self.encoding = encoding
+        in_cdata = False
+        self.errors = []
+
+        if encoding and self.inject_meta_charset:
+            from .filters.inject_meta_charset import Filter
+            treewalker = Filter(treewalker, encoding)
+        # Alphabetical attributes is here under the assumption that none of
+        # the later filters add or change order of attributes; it needs to be
+        # before the sanitizer so escaped elements come out correctly
+        if self.alphabetical_attributes:
+            from .filters.alphabeticalattributes import Filter
+            treewalker = Filter(treewalker)
+        # WhitespaceFilter should be used before OptionalTagFilter
+        # for maximum efficiently of this latter filter
+        if self.strip_whitespace:
+            from .filters.whitespace import Filter
+            treewalker = Filter(treewalker)
+        if self.sanitize:
+            from .filters.sanitizer import Filter
+            treewalker = Filter(treewalker)
+        if self.omit_optional_tags:
+            from .filters.optionaltags import Filter
+            treewalker = Filter(treewalker)
+
+        for token in treewalker:
+            type = token["type"]
+            if type == "Doctype":
+                doctype = "<!DOCTYPE %s" % token["name"]
+
+                if token["publicId"]:
+                    doctype += ' PUBLIC "%s"' % token["publicId"]
+                elif token["systemId"]:
+                    doctype += " SYSTEM"
+                if token["systemId"]:
+                    if token["systemId"].find('"') >= 0:
+                        if token["systemId"].find("'") >= 0:
+                            self.serializeError("System identifier contains both single and double quote characters")
+                        quote_char = "'"
+                    else:
+                        quote_char = '"'
+                    doctype += " %s%s%s" % (quote_char, token["systemId"], quote_char)
+
+                doctype += ">"
+                yield self.encodeStrict(doctype)
+
+            elif type in ("Characters", "SpaceCharacters"):
+                if type == "SpaceCharacters" or in_cdata:
+                    if in_cdata and token["data"].find("</") >= 0:
+                        self.serializeError("Unexpected </ in CDATA")
+                    yield self.encode(token["data"])
+                else:
+                    yield self.encode(escape(token["data"]))
+
+            elif type in ("StartTag", "EmptyTag"):
+                name = token["name"]
+                yield self.encodeStrict("<%s" % name)
+                if name in rcdataElements and not self.escape_rcdata:
+                    in_cdata = True
+                elif in_cdata:
+                    self.serializeError("Unexpected child element of a CDATA element")
+                for (_, attr_name), attr_value in token["data"].items():
+                    # TODO: Add namespace support here
+                    k = attr_name
+                    v = attr_value
+                    yield self.encodeStrict(' ')
+
+                    yield self.encodeStrict(k)
+                    if not self.minimize_boolean_attributes or \
+                        (k not in booleanAttributes.get(name, tuple()) and
+                         k not in booleanAttributes.get("", tuple())):
+                        yield self.encodeStrict("=")
+                        if self.quote_attr_values == "always" or len(v) == 0:
+                            quote_attr = True
+                        elif self.quote_attr_values == "spec":
+                            quote_attr = _quoteAttributeSpec.search(v) is not None
+                        elif self.quote_attr_values == "legacy":
+                            quote_attr = _quoteAttributeLegacy.search(v) is not None
+                        else:
+                            raise ValueError("quote_attr_values must be one of: "
+                                             "'always', 'spec', or 'legacy'")
+                        v = v.replace("&", "&amp;")
+                        if self.escape_lt_in_attrs:
+                            v = v.replace("<", "&lt;")
+                        if quote_attr:
+                            quote_char = self.quote_char
+                            if self.use_best_quote_char:
+                                if "'" in v and '"' not in v:
+                                    quote_char = '"'
+                                elif '"' in v and "'" not in v:
+                                    quote_char = "'"
+                            if quote_char == "'":
+                                v = v.replace("'", "&#39;")
+                            else:
+                                v = v.replace('"', "&quot;")
+                            yield self.encodeStrict(quote_char)
+                            yield self.encode(v)
+                            yield self.encodeStrict(quote_char)
+                        else:
+                            yield self.encode(v)
+                if name in voidElements and self.use_trailing_solidus:
+                    if self.space_before_trailing_solidus:
+                        yield self.encodeStrict(" /")
+                    else:
+                        yield self.encodeStrict("/")
+                yield self.encode(">")
+
+            elif type == "EndTag":
+                name = token["name"]
+                if name in rcdataElements:
+                    in_cdata = False
+                elif in_cdata:
+                    self.serializeError("Unexpected child element of a CDATA element")
+                yield self.encodeStrict("</%s>" % name)
+
+            elif type == "Comment":
+                data = token["data"]
+                if data.find("--") >= 0:
+                    self.serializeError("Comment contains --")
+                yield self.encodeStrict("<!--%s-->" % token["data"])
+
+            elif type == "Entity":
+                name = token["name"]
+                key = name + ";"
+                if key not in entities:
+                    self.serializeError("Entity %s not recognized" % name)
+                if self.resolve_entities and key not in xmlEntities:
+                    data = entities[key]
+                else:
+                    data = "&%s;" % name
+                yield self.encodeStrict(data)
+
+            else:
+                self.serializeError(token["data"])
+
+    def render(self, treewalker, encoding=None):
+        """Serializes the stream from the treewalker into a string
+
+        :arg treewalker: the treewalker to serialize
+
+        :arg encoding: the string encoding to use
+
+        :returns: the serialized tree
+
+        Example:
+
+        >>> from html5lib import parse, getTreeWalker
+        >>> from html5lib.serializer import HTMLSerializer
+        >>> token_stream = parse('<html><body>Hi!</body></html>')
+        >>> walker = getTreeWalker('etree')
+        >>> serializer = HTMLSerializer(omit_optional_tags=False)
+        >>> serializer.render(walker(token_stream))
+        '<html><head></head><body>Hi!</body></html>'
+
+        """
+        if encoding:
+            return b"".join(list(self.serialize(treewalker, encoding)))
+        else:
+            return "".join(list(self.serialize(treewalker)))
+
+    def serializeError(self, data="XXX ERROR MESSAGE NEEDED"):
+        # XXX The idea is to make data mandatory.
+        self.errors.append(data)
+        if self.strict:
+            raise SerializeError
+
+
+class SerializeError(Exception):
+    """Error in serialized tree"""
+    pass
diff --git a/lib/html5lib/serializer/__init__.py b/lib/html5lib/serializer/__init__.py
deleted file mode 100644
index 8380839a..00000000
--- a/lib/html5lib/serializer/__init__.py
+++ /dev/null
@@ -1,16 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from .. import treewalkers
-
-from .htmlserializer import HTMLSerializer
-
-
-def serialize(input, tree="etree", format="html", encoding=None,
-              **serializer_opts):
-    # XXX: Should we cache this?
-    walker = treewalkers.getTreeWalker(tree)
-    if format == "html":
-        s = HTMLSerializer(**serializer_opts)
-    else:
-        raise ValueError("type must be html")
-    return s.render(walker(input), encoding)
diff --git a/lib/html5lib/serializer/htmlserializer.py b/lib/html5lib/serializer/htmlserializer.py
deleted file mode 100644
index 412a5a22..00000000
--- a/lib/html5lib/serializer/htmlserializer.py
+++ /dev/null
@@ -1,320 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-from six import text_type
-
-import gettext
-_ = gettext.gettext
-
-try:
-    from functools import reduce
-except ImportError:
-    pass
-
-from ..constants import voidElements, booleanAttributes, spaceCharacters
-from ..constants import rcdataElements, entities, xmlEntities
-from .. import utils
-from xml.sax.saxutils import escape
-
-spaceCharacters = "".join(spaceCharacters)
-
-try:
-    from codecs import register_error, xmlcharrefreplace_errors
-except ImportError:
-    unicode_encode_errors = "strict"
-else:
-    unicode_encode_errors = "htmlentityreplace"
-
-    encode_entity_map = {}
-    is_ucs4 = len("\U0010FFFF") == 1
-    for k, v in list(entities.items()):
-        # skip multi-character entities
-        if ((is_ucs4 and len(v) > 1) or
-                (not is_ucs4 and len(v) > 2)):
-            continue
-        if v != "&":
-            if len(v) == 2:
-                v = utils.surrogatePairToCodepoint(v)
-            else:
-                v = ord(v)
-            if not v in encode_entity_map or k.islower():
-                # prefer &lt; over &LT; and similarly for &amp;, &gt;, etc.
-                encode_entity_map[v] = k
-
-    def htmlentityreplace_errors(exc):
-        if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)):
-            res = []
-            codepoints = []
-            skip = False
-            for i, c in enumerate(exc.object[exc.start:exc.end]):
-                if skip:
-                    skip = False
-                    continue
-                index = i + exc.start
-                if utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]):
-                    codepoint = utils.surrogatePairToCodepoint(exc.object[index:index + 2])
-                    skip = True
-                else:
-                    codepoint = ord(c)
-                codepoints.append(codepoint)
-            for cp in codepoints:
-                e = encode_entity_map.get(cp)
-                if e:
-                    res.append("&")
-                    res.append(e)
-                    if not e.endswith(";"):
-                        res.append(";")
-                else:
-                    res.append("&#x%s;" % (hex(cp)[2:]))
-            return ("".join(res), exc.end)
-        else:
-            return xmlcharrefreplace_errors(exc)
-
-    register_error(unicode_encode_errors, htmlentityreplace_errors)
-
-    del register_error
-
-
-class HTMLSerializer(object):
-
-    # attribute quoting options
-    quote_attr_values = False
-    quote_char = '"'
-    use_best_quote_char = True
-
-    # tag syntax options
-    omit_optional_tags = True
-    minimize_boolean_attributes = True
-    use_trailing_solidus = False
-    space_before_trailing_solidus = True
-
-    # escaping options
-    escape_lt_in_attrs = False
-    escape_rcdata = False
-    resolve_entities = True
-
-    # miscellaneous options
-    alphabetical_attributes = False
-    inject_meta_charset = True
-    strip_whitespace = False
-    sanitize = False
-
-    options = ("quote_attr_values", "quote_char", "use_best_quote_char",
-               "omit_optional_tags", "minimize_boolean_attributes",
-               "use_trailing_solidus", "space_before_trailing_solidus",
-               "escape_lt_in_attrs", "escape_rcdata", "resolve_entities",
-               "alphabetical_attributes", "inject_meta_charset",
-               "strip_whitespace", "sanitize")
-
-    def __init__(self, **kwargs):
-        """Initialize HTMLSerializer.
-
-        Keyword options (default given first unless specified) include:
-
-        inject_meta_charset=True|False
-          Whether it insert a meta element to define the character set of the
-          document.
-        quote_attr_values=True|False
-          Whether to quote attribute values that don't require quoting
-          per HTML5 parsing rules.
-        quote_char=u'"'|u"'"
-          Use given quote character for attribute quoting. Default is to
-          use double quote unless attribute value contains a double quote,
-          in which case single quotes are used instead.
-        escape_lt_in_attrs=False|True
-          Whether to escape < in attribute values.
-        escape_rcdata=False|True
-          Whether to escape characters that need to be escaped within normal
-          elements within rcdata elements such as style.
-        resolve_entities=True|False
-          Whether to resolve named character entities that appear in the
-          source tree. The XML predefined entities &lt; &gt; &amp; &quot; &apos;
-          are unaffected by this setting.
-        strip_whitespace=False|True
-          Whether to remove semantically meaningless whitespace. (This
-          compresses all whitespace to a single space except within pre.)
-        minimize_boolean_attributes=True|False
-          Shortens boolean attributes to give just the attribute value,
-          for example <input disabled="disabled"> becomes <input disabled>.
-        use_trailing_solidus=False|True
-          Includes a close-tag slash at the end of the start tag of void
-          elements (empty elements whose end tag is forbidden). E.g. <hr/>.
-        space_before_trailing_solidus=True|False
-          Places a space immediately before the closing slash in a tag
-          using a trailing solidus. E.g. <hr />. Requires use_trailing_solidus.
-        sanitize=False|True
-          Strip all unsafe or unknown constructs from output.
-          See `html5lib user documentation`_
-        omit_optional_tags=True|False
-          Omit start/end tags that are optional.
-        alphabetical_attributes=False|True
-          Reorder attributes to be in alphabetical order.
-
-        .. _html5lib user documentation: http://code.google.com/p/html5lib/wiki/UserDocumentation
-        """
-        if 'quote_char' in kwargs:
-            self.use_best_quote_char = False
-        for attr in self.options:
-            setattr(self, attr, kwargs.get(attr, getattr(self, attr)))
-        self.errors = []
-        self.strict = False
-
-    def encode(self, string):
-        assert(isinstance(string, text_type))
-        if self.encoding:
-            return string.encode(self.encoding, unicode_encode_errors)
-        else:
-            return string
-
-    def encodeStrict(self, string):
-        assert(isinstance(string, text_type))
-        if self.encoding:
-            return string.encode(self.encoding, "strict")
-        else:
-            return string
-
-    def serialize(self, treewalker, encoding=None):
-        self.encoding = encoding
-        in_cdata = False
-        self.errors = []
-
-        if encoding and self.inject_meta_charset:
-            from ..filters.inject_meta_charset import Filter
-            treewalker = Filter(treewalker, encoding)
-        # WhitespaceFilter should be used before OptionalTagFilter
-        # for maximum efficiently of this latter filter
-        if self.strip_whitespace:
-            from ..filters.whitespace import Filter
-            treewalker = Filter(treewalker)
-        if self.sanitize:
-            from ..filters.sanitizer import Filter
-            treewalker = Filter(treewalker)
-        if self.omit_optional_tags:
-            from ..filters.optionaltags import Filter
-            treewalker = Filter(treewalker)
-        # Alphabetical attributes must be last, as other filters
-        # could add attributes and alter the order
-        if self.alphabetical_attributes:
-            from ..filters.alphabeticalattributes import Filter
-            treewalker = Filter(treewalker)
-
-        for token in treewalker:
-            type = token["type"]
-            if type == "Doctype":
-                doctype = "<!DOCTYPE %s" % token["name"]
-
-                if token["publicId"]:
-                    doctype += ' PUBLIC "%s"' % token["publicId"]
-                elif token["systemId"]:
-                    doctype += " SYSTEM"
-                if token["systemId"]:
-                    if token["systemId"].find('"') >= 0:
-                        if token["systemId"].find("'") >= 0:
-                            self.serializeError(_("System identifer contains both single and double quote characters"))
-                        quote_char = "'"
-                    else:
-                        quote_char = '"'
-                    doctype += " %s%s%s" % (quote_char, token["systemId"], quote_char)
-
-                doctype += ">"
-                yield self.encodeStrict(doctype)
-
-            elif type in ("Characters", "SpaceCharacters"):
-                if type == "SpaceCharacters" or in_cdata:
-                    if in_cdata and token["data"].find("</") >= 0:
-                        self.serializeError(_("Unexpected </ in CDATA"))
-                    yield self.encode(token["data"])
-                else:
-                    yield self.encode(escape(token["data"]))
-
-            elif type in ("StartTag", "EmptyTag"):
-                name = token["name"]
-                yield self.encodeStrict("<%s" % name)
-                if name in rcdataElements and not self.escape_rcdata:
-                    in_cdata = True
-                elif in_cdata:
-                    self.serializeError(_("Unexpected child element of a CDATA element"))
-                for (attr_namespace, attr_name), attr_value in token["data"].items():
-                    # TODO: Add namespace support here
-                    k = attr_name
-                    v = attr_value
-                    yield self.encodeStrict(' ')
-
-                    yield self.encodeStrict(k)
-                    if not self.minimize_boolean_attributes or \
-                        (k not in booleanAttributes.get(name, tuple())
-                         and k not in booleanAttributes.get("", tuple())):
-                        yield self.encodeStrict("=")
-                        if self.quote_attr_values or not v:
-                            quote_attr = True
-                        else:
-                            quote_attr = reduce(lambda x, y: x or (y in v),
-                                                spaceCharacters + ">\"'=", False)
-                        v = v.replace("&", "&amp;")
-                        if self.escape_lt_in_attrs:
-                            v = v.replace("<", "&lt;")
-                        if quote_attr:
-                            quote_char = self.quote_char
-                            if self.use_best_quote_char:
-                                if "'" in v and '"' not in v:
-                                    quote_char = '"'
-                                elif '"' in v and "'" not in v:
-                                    quote_char = "'"
-                            if quote_char == "'":
-                                v = v.replace("'", "&#39;")
-                            else:
-                                v = v.replace('"', "&quot;")
-                            yield self.encodeStrict(quote_char)
-                            yield self.encode(v)
-                            yield self.encodeStrict(quote_char)
-                        else:
-                            yield self.encode(v)
-                if name in voidElements and self.use_trailing_solidus:
-                    if self.space_before_trailing_solidus:
-                        yield self.encodeStrict(" /")
-                    else:
-                        yield self.encodeStrict("/")
-                yield self.encode(">")
-
-            elif type == "EndTag":
-                name = token["name"]
-                if name in rcdataElements:
-                    in_cdata = False
-                elif in_cdata:
-                    self.serializeError(_("Unexpected child element of a CDATA element"))
-                yield self.encodeStrict("</%s>" % name)
-
-            elif type == "Comment":
-                data = token["data"]
-                if data.find("--") >= 0:
-                    self.serializeError(_("Comment contains --"))
-                yield self.encodeStrict("<!--%s-->" % token["data"])
-
-            elif type == "Entity":
-                name = token["name"]
-                key = name + ";"
-                if not key in entities:
-                    self.serializeError(_("Entity %s not recognized" % name))
-                if self.resolve_entities and key not in xmlEntities:
-                    data = entities[key]
-                else:
-                    data = "&%s;" % name
-                yield self.encodeStrict(data)
-
-            else:
-                self.serializeError(token["data"])
-
-    def render(self, treewalker, encoding=None):
-        if encoding:
-            return b"".join(list(self.serialize(treewalker, encoding)))
-        else:
-            return "".join(list(self.serialize(treewalker)))
-
-    def serializeError(self, data="XXX ERROR MESSAGE NEEDED"):
-        # XXX The idea is to make data mandatory.
-        self.errors.append(data)
-        if self.strict:
-            raise SerializeError
-
-
-def SerializeError(Exception):
-    """Error in serialized tree"""
-    pass
diff --git a/lib/html5lib/tests/__init__.py b/lib/html5lib/tests/__init__.py
new file mode 100644
index 00000000..b8ce2de3
--- /dev/null
+++ b/lib/html5lib/tests/__init__.py
@@ -0,0 +1 @@
+from __future__ import absolute_import, division, unicode_literals
diff --git a/lib/html5lib/tests/conftest.py b/lib/html5lib/tests/conftest.py
new file mode 100644
index 00000000..dad167c5
--- /dev/null
+++ b/lib/html5lib/tests/conftest.py
@@ -0,0 +1,108 @@
+from __future__ import print_function
+import os.path
+import sys
+
+import pkg_resources
+import pytest
+
+from .tree_construction import TreeConstructionFile
+from .tokenizer import TokenizerFile
+from .sanitizer import SanitizerFile
+
+_dir = os.path.abspath(os.path.dirname(__file__))
+_root = os.path.join(_dir, "..", "..")
+_testdata = os.path.join(_dir, "testdata")
+_tree_construction = os.path.join(_testdata, "tree-construction")
+_tokenizer = os.path.join(_testdata, "tokenizer")
+_sanitizer_testdata = os.path.join(_dir, "sanitizer-testdata")
+
+
+def fail_if_missing_pytest_expect():
+    """Throws an exception halting pytest if pytest-expect isn't working"""
+    try:
+        from pytest_expect import expect  # noqa
+    except ImportError:
+        header = '*' * 78
+        print(
+            '\n' +
+            header + '\n' +
+            'ERROR: Either pytest-expect or its dependency u-msgpack-python is not\n' +
+            'installed. Please install them both before running pytest.\n' +
+            header + '\n',
+            file=sys.stderr
+        )
+        raise
+
+
+fail_if_missing_pytest_expect()
+
+
+def pytest_configure(config):
+    msgs = []
+
+    if not os.path.exists(_testdata):
+        msg = "testdata not available! "
+        if os.path.exists(os.path.join(_root, ".git")):
+            msg += ("Please run git submodule update --init --recursive " +
+                    "and then run tests again.")
+        else:
+            msg += ("The testdata doesn't appear to be included with this package, " +
+                    "so finding the right version will be hard. :(")
+        msgs.append(msg)
+
+    if config.option.update_xfail:
+        # Check for optional requirements
+        req_file = os.path.join(_root, "requirements-optional.txt")
+        if os.path.exists(req_file):
+            with open(req_file, "r") as fp:
+                for line in fp:
+                    if (line.strip() and
+                        not (line.startswith("-r") or
+                             line.startswith("#"))):
+                        if ";" in line:
+                            spec, marker = line.strip().split(";", 1)
+                        else:
+                            spec, marker = line.strip(), None
+                        req = pkg_resources.Requirement.parse(spec)
+                        if marker and not pkg_resources.evaluate_marker(marker):
+                            msgs.append("%s not available in this environment" % spec)
+                        else:
+                            try:
+                                installed = pkg_resources.working_set.find(req)
+                            except pkg_resources.VersionConflict:
+                                msgs.append("Outdated version of %s installed, need %s" % (req.name, spec))
+                            else:
+                                if not installed:
+                                    msgs.append("Need %s" % spec)
+
+        # Check cElementTree
+        import xml.etree.ElementTree as ElementTree
+
+        try:
+            import xml.etree.cElementTree as cElementTree
+        except ImportError:
+            msgs.append("cElementTree unable to be imported")
+        else:
+            if cElementTree.Element is ElementTree.Element:
+                msgs.append("cElementTree is just an alias for ElementTree")
+
+    if msgs:
+        pytest.exit("\n".join(msgs))
+
+
+def pytest_collect_file(path, parent):
+    dir = os.path.abspath(path.dirname)
+    dir_and_parents = set()
+    while dir not in dir_and_parents:
+        dir_and_parents.add(dir)
+        dir = os.path.dirname(dir)
+
+    if _tree_construction in dir_and_parents:
+        if path.ext == ".dat":
+            return TreeConstructionFile(path, parent)
+    elif _tokenizer in dir_and_parents:
+        if path.ext == ".test":
+            return TokenizerFile(path, parent)
+    elif _sanitizer_testdata in dir_and_parents:
+        if path.ext == ".dat":
+            return SanitizerFile(path, parent)
diff --git a/lib/html5lib/tests/sanitizer.py b/lib/html5lib/tests/sanitizer.py
new file mode 100644
index 00000000..bb483421
--- /dev/null
+++ b/lib/html5lib/tests/sanitizer.py
@@ -0,0 +1,51 @@
+from __future__ import absolute_import, division, unicode_literals
+
+import codecs
+import json
+
+import pytest
+
+from html5lib import parseFragment, serialize
+
+
+class SanitizerFile(pytest.File):
+    def collect(self):
+        with codecs.open(str(self.fspath), "r", encoding="utf-8") as fp:
+            tests = json.load(fp)
+        for i, test in enumerate(tests):
+            yield SanitizerTest(str(i), self, test=test)
+
+
+class SanitizerTest(pytest.Item):
+    def __init__(self, name, parent, test):
+        super(SanitizerTest, self).__init__(name, parent)
+        self.obj = lambda: 1  # this is to hack around skipif needing a function!
+        self.test = test
+
+    def runtest(self):
+        input = self.test["input"]
+        expected = self.test["output"]
+
+        parsed = parseFragment(input)
+        with pytest.deprecated_call():
+            serialized = serialize(parsed,
+                                   sanitize=True,
+                                   omit_optional_tags=False,
+                                   use_trailing_solidus=True,
+                                   space_before_trailing_solidus=False,
+                                   quote_attr_values="always",
+                                   quote_char="'",
+                                   alphabetical_attributes=True)
+        errorMsg = "\n".join(["\n\nInput:", input,
+                              "\nExpected:", expected,
+                              "\nReceived:", serialized])
+        assert expected == serialized, errorMsg
+
+    def repr_failure(self, excinfo):
+        traceback = excinfo.traceback
+        ntraceback = traceback.cut(path=__file__)
+        excinfo.traceback = ntraceback.filter()
+
+        return excinfo.getrepr(funcargs=True,
+                               showlocals=False,
+                               style="short", tbfilter=False)
diff --git a/lib/html5lib/tests/support.py b/lib/html5lib/tests/support.py
new file mode 100644
index 00000000..9cd5afbe
--- /dev/null
+++ b/lib/html5lib/tests/support.py
@@ -0,0 +1,199 @@
+from __future__ import absolute_import, division, unicode_literals
+
+# pylint:disable=wrong-import-position
+
+import os
+import sys
+import codecs
+import glob
+import xml.sax.handler
+
+base_path = os.path.split(__file__)[0]
+
+test_dir = os.path.join(base_path, 'testdata')
+sys.path.insert(0, os.path.abspath(os.path.join(base_path,
+                                                os.path.pardir,
+                                                os.path.pardir)))
+
+from html5lib import treebuilders, treewalkers, treeadapters  # noqa
+del base_path
+
+# Build a dict of available trees
+treeTypes = {}
+
+# DOM impls
+treeTypes["DOM"] = {
+    "builder": treebuilders.getTreeBuilder("dom"),
+    "walker": treewalkers.getTreeWalker("dom")
+}
+
+# ElementTree impls
+import xml.etree.ElementTree as ElementTree  # noqa
+treeTypes['ElementTree'] = {
+    "builder": treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True),
+    "walker": treewalkers.getTreeWalker("etree", ElementTree)
+}
+
+try:
+    import xml.etree.cElementTree as cElementTree  # noqa
+except ImportError:
+    treeTypes['cElementTree'] = None
+else:
+    # On Python 3.3 and above cElementTree is an alias, don't run them twice.
+    if cElementTree.Element is ElementTree.Element:
+        treeTypes['cElementTree'] = None
+    else:
+        treeTypes['cElementTree'] = {
+            "builder": treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True),
+            "walker": treewalkers.getTreeWalker("etree", cElementTree)
+        }
+
+try:
+    import lxml.etree as lxml  # noqa
+except ImportError:
+    treeTypes['lxml'] = None
+else:
+    treeTypes['lxml'] = {
+        "builder": treebuilders.getTreeBuilder("lxml"),
+        "walker": treewalkers.getTreeWalker("lxml")
+    }
+
+# Genshi impls
+try:
+    import genshi  # noqa
+except ImportError:
+    treeTypes["genshi"] = None
+else:
+    treeTypes["genshi"] = {
+        "builder": treebuilders.getTreeBuilder("dom"),
+        "adapter": lambda tree: treeadapters.genshi.to_genshi(treewalkers.getTreeWalker("dom")(tree)),
+        "walker": treewalkers.getTreeWalker("genshi")
+    }
+
+# pylint:enable=wrong-import-position
+
+
+def get_data_files(subdirectory, files='*.dat', search_dir=test_dir):
+    return sorted(glob.glob(os.path.join(search_dir, subdirectory, files)))
+
+
+class DefaultDict(dict):
+    def __init__(self, default, *args, **kwargs):
+        self.default = default
+        dict.__init__(self, *args, **kwargs)
+
+    def __getitem__(self, key):
+        return dict.get(self, key, self.default)
+
+
+class TestData(object):
+    def __init__(self, filename, newTestHeading="data", encoding="utf8"):
+        if encoding is None:
+            self.f = open(filename, mode="rb")
+        else:
+            self.f = codecs.open(filename, encoding=encoding)
+        self.encoding = encoding
+        self.newTestHeading = newTestHeading
+
+    def __iter__(self):
+        data = DefaultDict(None)
+        key = None
+        for line in self.f:
+            heading = self.isSectionHeading(line)
+            if heading:
+                if data and heading == self.newTestHeading:
+                    # Remove trailing newline
+                    data[key] = data[key][:-1]
+                    yield self.normaliseOutput(data)
+                    data = DefaultDict(None)
+                key = heading
+                data[key] = "" if self.encoding else b""
+            elif key is not None:
+                data[key] += line
+        if data:
+            yield self.normaliseOutput(data)
+
+    def isSectionHeading(self, line):
+        """If the current heading is a test section heading return the heading,
+        otherwise return False"""
+        # print(line)
+        if line.startswith("#" if self.encoding else b"#"):
+            return line[1:].strip()
+        else:
+            return False
+
+    def normaliseOutput(self, data):
+        # Remove trailing newlines
+        for key, value in data.items():
+            if value.endswith("\n" if self.encoding else b"\n"):
+                data[key] = value[:-1]
+        return data
+
+
+def convert(stripChars):
+    def convertData(data):
+        """convert the output of str(document) to the format used in the testcases"""
+        data = data.split("\n")
+        rv = []
+        for line in data:
+            if line.startswith("|"):
+                rv.append(line[stripChars:])
+            else:
+                rv.append(line)
+        return "\n".join(rv)
+    return convertData
+
+
+convertExpected = convert(2)
+
+
+def errorMessage(input, expected, actual):
+    msg = ("Input:\n%s\nExpected:\n%s\nReceived\n%s\n" %
+           (repr(input), repr(expected), repr(actual)))
+    if sys.version_info[0] == 2:
+        msg = msg.encode("ascii", "backslashreplace")
+    return msg
+
+
+class TracingSaxHandler(xml.sax.handler.ContentHandler):
+    def __init__(self):
+        xml.sax.handler.ContentHandler.__init__(self)
+        self.visited = []
+
+    def startDocument(self):
+        self.visited.append('startDocument')
+
+    def endDocument(self):
+        self.visited.append('endDocument')
+
+    def startPrefixMapping(self, prefix, uri):
+        # These are ignored as their order is not guaranteed
+        pass
+
+    def endPrefixMapping(self, prefix):
+        # These are ignored as their order is not guaranteed
+        pass
+
+    def startElement(self, name, attrs):
+        self.visited.append(('startElement', name, attrs))
+
+    def endElement(self, name):
+        self.visited.append(('endElement', name))
+
+    def startElementNS(self, name, qname, attrs):
+        self.visited.append(('startElementNS', name, qname, dict(attrs)))
+
+    def endElementNS(self, name, qname):
+        self.visited.append(('endElementNS', name, qname))
+
+    def characters(self, content):
+        self.visited.append(('characters', content))
+
+    def ignorableWhitespace(self, whitespace):
+        self.visited.append(('ignorableWhitespace', whitespace))
+
+    def processingInstruction(self, target, data):
+        self.visited.append(('processingInstruction', target, data))
+
+    def skippedEntity(self, name):
+        self.visited.append(('skippedEntity', name))
diff --git a/lib/html5lib/tests/test_alphabeticalattributes.py b/lib/html5lib/tests/test_alphabeticalattributes.py
new file mode 100644
index 00000000..7d5b8e0f
--- /dev/null
+++ b/lib/html5lib/tests/test_alphabeticalattributes.py
@@ -0,0 +1,78 @@
+from __future__ import absolute_import, division, unicode_literals
+
+from collections import OrderedDict
+
+import pytest
+
+import html5lib
+from html5lib.filters.alphabeticalattributes import Filter
+from html5lib.serializer import HTMLSerializer
+
+
+@pytest.mark.parametrize('msg, attrs, expected_attrs', [
+    (
+        'no attrs',
+        {},
+        {}
+    ),
+    (
+        'one attr',
+        {(None, 'alt'): 'image'},
+        OrderedDict([((None, 'alt'), 'image')])
+    ),
+    (
+        'multiple attrs',
+        {
+            (None, 'src'): 'foo',
+            (None, 'alt'): 'image',
+            (None, 'style'): 'border: 1px solid black;'
+        },
+        OrderedDict([
+            ((None, 'alt'), 'image'),
+            ((None, 'src'), 'foo'),
+            ((None, 'style'), 'border: 1px solid black;')
+        ])
+    ),
+])
+def test_alphabetizing(msg, attrs, expected_attrs):
+    tokens = [{'type': 'StartTag', 'name': 'img', 'data': attrs}]
+    output_tokens = list(Filter(tokens))
+
+    attrs = output_tokens[0]['data']
+    assert attrs == expected_attrs
+
+
+def test_with_different_namespaces():
+    tokens = [{
+        'type': 'StartTag',
+        'name': 'pattern',
+        'data': {
+            (None, 'id'): 'patt1',
+            ('http://www.w3.org/1999/xlink', 'href'): '#patt2'
+        }
+    }]
+    output_tokens = list(Filter(tokens))
+
+    attrs = output_tokens[0]['data']
+    assert attrs == OrderedDict([
+        ((None, 'id'), 'patt1'),
+        (('http://www.w3.org/1999/xlink', 'href'), '#patt2')
+    ])
+
+
+def test_with_serializer():
+    """Verify filter works in the context of everything else"""
+    parser = html5lib.HTMLParser()
+    dom = parser.parseFragment('<svg><pattern xlink:href="#patt2" id="patt1"></svg>')
+    walker = html5lib.getTreeWalker('etree')
+    ser = HTMLSerializer(
+        alphabetical_attributes=True,
+        quote_attr_values='always'
+    )
+
+    # FIXME(willkg): The "xlink" namespace gets dropped by the serializer. When
+    # that gets fixed, we can fix this expected result.
+    assert (
+        ser.render(walker(dom)) ==
+        '<svg><pattern id="patt1" href="#patt2"></pattern></svg>'
+    )
diff --git a/lib/html5lib/tests/test_encoding.py b/lib/html5lib/tests/test_encoding.py
new file mode 100644
index 00000000..47c4814a
--- /dev/null
+++ b/lib/html5lib/tests/test_encoding.py
@@ -0,0 +1,117 @@
+from __future__ import absolute_import, division, unicode_literals
+
+import os
+
+import pytest
+
+from .support import get_data_files, test_dir, errorMessage, TestData as _TestData
+from html5lib import HTMLParser, _inputstream
+
+
+def test_basic_prescan_length():
+    data = "<title>Caf\u00E9</title><!--a--><meta charset='utf-8'>".encode('utf-8')
+    pad = 1024 - len(data) + 1
+    data = data.replace(b"-a-", b"-" + (b"a" * pad) + b"-")
+    assert len(data) == 1024  # Sanity
+    stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False)
+    assert 'utf-8' == stream.charEncoding[0].name
+
+
+def test_parser_reparse():
+    data = "<title>Caf\u00E9</title><!--a--><meta charset='utf-8'>".encode('utf-8')
+    pad = 10240 - len(data) + 1
+    data = data.replace(b"-a-", b"-" + (b"a" * pad) + b"-")
+    assert len(data) == 10240  # Sanity
+    stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False)
+    assert 'windows-1252' == stream.charEncoding[0].name
+    p = HTMLParser(namespaceHTMLElements=False)
+    doc = p.parse(data, useChardet=False)
+    assert 'utf-8' == p.documentEncoding
+    assert doc.find(".//title").text == "Caf\u00E9"
+
+
+@pytest.mark.parametrize("expected,data,kwargs", [
+    ("utf-16le", b"\xFF\xFE", {"override_encoding": "iso-8859-2"}),
+    ("utf-16be", b"\xFE\xFF", {"override_encoding": "iso-8859-2"}),
+    ("utf-8", b"\xEF\xBB\xBF", {"override_encoding": "iso-8859-2"}),
+    ("iso-8859-2", b"", {"override_encoding": "iso-8859-2", "transport_encoding": "iso-8859-3"}),
+    ("iso-8859-2", b"<meta charset=iso-8859-3>", {"transport_encoding": "iso-8859-2"}),
+    ("iso-8859-2", b"<meta charset=iso-8859-2>", {"same_origin_parent_encoding": "iso-8859-3"}),
+    ("iso-8859-2", b"", {"same_origin_parent_encoding": "iso-8859-2", "likely_encoding": "iso-8859-3"}),
+    ("iso-8859-2", b"", {"same_origin_parent_encoding": "utf-16", "likely_encoding": "iso-8859-2"}),
+    ("iso-8859-2", b"", {"same_origin_parent_encoding": "utf-16be", "likely_encoding": "iso-8859-2"}),
+    ("iso-8859-2", b"", {"same_origin_parent_encoding": "utf-16le", "likely_encoding": "iso-8859-2"}),
+    ("iso-8859-2", b"", {"likely_encoding": "iso-8859-2", "default_encoding": "iso-8859-3"}),
+    ("iso-8859-2", b"", {"default_encoding": "iso-8859-2"}),
+    ("windows-1252", b"", {"default_encoding": "totally-bogus-string"}),
+    ("windows-1252", b"", {}),
+])
+def test_parser_args(expected, data, kwargs):
+    stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False, **kwargs)
+    assert expected == stream.charEncoding[0].name
+    p = HTMLParser()
+    p.parse(data, useChardet=False, **kwargs)
+    assert expected == p.documentEncoding
+
+
+@pytest.mark.parametrize("kwargs", [
+    {"override_encoding": "iso-8859-2"},
+    {"override_encoding": None},
+    {"transport_encoding": "iso-8859-2"},
+    {"transport_encoding": None},
+    {"same_origin_parent_encoding": "iso-8859-2"},
+    {"same_origin_parent_encoding": None},
+    {"likely_encoding": "iso-8859-2"},
+    {"likely_encoding": None},
+    {"default_encoding": "iso-8859-2"},
+    {"default_encoding": None},
+    {"foo_encoding": "iso-8859-2"},
+    {"foo_encoding": None},
+])
+def test_parser_args_raises(kwargs):
+    with pytest.raises(TypeError) as exc_info:
+        p = HTMLParser()
+        p.parse("", useChardet=False, **kwargs)
+    assert exc_info.value.args[0].startswith("Cannot set an encoding with a unicode input")
+
+
+def param_encoding():
+    for filename in get_data_files("encoding"):
+        tests = _TestData(filename, b"data", encoding=None)
+        for test in tests:
+            yield test[b'data'], test[b'encoding']
+
+
+@pytest.mark.parametrize("data, encoding", param_encoding())
+def test_parser_encoding(data, encoding):
+    p = HTMLParser()
+    assert p.documentEncoding is None
+    p.parse(data, useChardet=False)
+    encoding = encoding.lower().decode("ascii")
+
+    assert encoding == p.documentEncoding, errorMessage(data, encoding, p.documentEncoding)
+
+
+@pytest.mark.parametrize("data, encoding", param_encoding())
+def test_prescan_encoding(data, encoding):
+    stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False)
+    encoding = encoding.lower().decode("ascii")
+
+    # Very crude way to ignore irrelevant tests
+    if len(data) > stream.numBytesMeta:
+        return
+
+    assert encoding == stream.charEncoding[0].name, errorMessage(data, encoding, stream.charEncoding[0].name)
+
+
+# pylint:disable=wrong-import-position
+try:
+    import chardet  # noqa
+except ImportError:
+    print("chardet not found, skipping chardet tests")
+else:
+    def test_chardet():
+        with open(os.path.join(test_dir, "encoding", "chardet", "test_big5.txt"), "rb") as fp:
+            encoding = _inputstream.HTMLInputStream(fp.read()).charEncoding
+            assert encoding[0].name == "big5"
+# pylint:enable=wrong-import-position
diff --git a/lib/html5lib/tests/test_meta.py b/lib/html5lib/tests/test_meta.py
new file mode 100644
index 00000000..dd02dd7f
--- /dev/null
+++ b/lib/html5lib/tests/test_meta.py
@@ -0,0 +1,41 @@
+from __future__ import absolute_import, division, unicode_literals
+
+import six
+from mock import Mock
+
+from . import support
+
+
+def _createReprMock(r):
+    """Creates a mock with a __repr__ returning r
+
+    Also provides __str__ mock with default mock behaviour"""
+    mock = Mock()
+    mock.__repr__ = Mock()
+    mock.__repr__.return_value = r
+    mock.__str__ = Mock(wraps=mock.__str__)
+    return mock
+
+
+def test_errorMessage():
+    # Create mock objects to take repr of
+    input = _createReprMock("1")
+    expected = _createReprMock("2")
+    actual = _createReprMock("3")
+
+    # Run the actual test
+    r = support.errorMessage(input, expected, actual)
+
+    # Assertions!
+    if six.PY2:
+        assert b"Input:\n1\nExpected:\n2\nReceived\n3\n" == r
+    else:
+        assert six.PY3
+        assert "Input:\n1\nExpected:\n2\nReceived\n3\n" == r
+
+    assert input.__repr__.call_count == 1
+    assert expected.__repr__.call_count == 1
+    assert actual.__repr__.call_count == 1
+    assert not input.__str__.called
+    assert not expected.__str__.called
+    assert not actual.__str__.called
diff --git a/lib/html5lib/tests/test_optionaltags_filter.py b/lib/html5lib/tests/test_optionaltags_filter.py
new file mode 100644
index 00000000..cd282149
--- /dev/null
+++ b/lib/html5lib/tests/test_optionaltags_filter.py
@@ -0,0 +1,7 @@
+from __future__ import absolute_import, division, unicode_literals
+
+from html5lib.filters.optionaltags import Filter
+
+
+def test_empty():
+    assert list(Filter([])) == []
diff --git a/lib/html5lib/tests/test_parser2.py b/lib/html5lib/tests/test_parser2.py
new file mode 100644
index 00000000..879d2447
--- /dev/null
+++ b/lib/html5lib/tests/test_parser2.py
@@ -0,0 +1,94 @@
+from __future__ import absolute_import, division, unicode_literals
+
+from six import PY2, text_type
+
+import io
+
+from . import support  # noqa
+
+from html5lib.constants import namespaces
+from html5lib import parse, parseFragment, HTMLParser
+
+
+# tests that aren't autogenerated from text files
+def test_assertDoctypeCloneable():
+    doc = parse('<!DOCTYPE HTML>', treebuilder="dom")
+    assert doc.cloneNode(True) is not None
+
+
+def test_line_counter():
+    # http://groups.google.com/group/html5lib-discuss/browse_frm/thread/f4f00e4a2f26d5c0
+    assert parse("<pre>\nx\n&gt;\n</pre>") is not None
+
+
+def test_namespace_html_elements_0_dom():
+    doc = parse("<html></html>",
+                treebuilder="dom",
+                namespaceHTMLElements=True)
+    assert doc.childNodes[0].namespaceURI == namespaces["html"]
+
+
+def test_namespace_html_elements_1_dom():
+    doc = parse("<html></html>",
+                treebuilder="dom",
+                namespaceHTMLElements=False)
+    assert doc.childNodes[0].namespaceURI is None
+
+
+def test_namespace_html_elements_0_etree():
+    doc = parse("<html></html>",
+                treebuilder="etree",
+                namespaceHTMLElements=True)
+    assert doc.tag == "{%s}html" % (namespaces["html"],)
+
+
+def test_namespace_html_elements_1_etree():
+    doc = parse("<html></html>",
+                treebuilder="etree",
+                namespaceHTMLElements=False)
+    assert doc.tag == "html"
+
+
+def test_unicode_file():
+    assert parse(io.StringIO("a")) is not None
+
+
+def test_debug_log():
+    parser = HTMLParser(debug=True)
+    parser.parse("<!doctype html><title>a</title><p>b<script>c</script>d</p>e")
+
+    expected = [('dataState', 'InitialPhase', 'InitialPhase', 'processDoctype', {'type': 'Doctype'}),
+                ('dataState', 'BeforeHtmlPhase', 'BeforeHtmlPhase', 'processStartTag', {'name': 'title', 'type': 'StartTag'}),
+                ('dataState', 'BeforeHeadPhase', 'BeforeHeadPhase', 'processStartTag', {'name': 'title', 'type': 'StartTag'}),
+                ('dataState', 'InHeadPhase', 'InHeadPhase', 'processStartTag', {'name': 'title', 'type': 'StartTag'}),
+                ('rcdataState', 'TextPhase', 'TextPhase', 'processCharacters', {'type': 'Characters'}),
+                ('dataState', 'TextPhase', 'TextPhase', 'processEndTag', {'name': 'title', 'type': 'EndTag'}),
+                ('dataState', 'InHeadPhase', 'InHeadPhase', 'processStartTag', {'name': 'p', 'type': 'StartTag'}),
+                ('dataState', 'AfterHeadPhase', 'AfterHeadPhase', 'processStartTag', {'name': 'p', 'type': 'StartTag'}),
+                ('dataState', 'InBodyPhase', 'InBodyPhase', 'processStartTag', {'name': 'p', 'type': 'StartTag'}),
+                ('dataState', 'InBodyPhase', 'InBodyPhase', 'processCharacters', {'type': 'Characters'}),
+                ('dataState', 'InBodyPhase', 'InBodyPhase', 'processStartTag', {'name': 'script', 'type': 'StartTag'}),
+                ('dataState', 'InBodyPhase', 'InHeadPhase', 'processStartTag', {'name': 'script', 'type': 'StartTag'}),
+                ('scriptDataState', 'TextPhase', 'TextPhase', 'processCharacters', {'type': 'Characters'}),
+                ('dataState', 'TextPhase', 'TextPhase', 'processEndTag', {'name': 'script', 'type': 'EndTag'}),
+                ('dataState', 'InBodyPhase', 'InBodyPhase', 'processCharacters', {'type': 'Characters'}),
+                ('dataState', 'InBodyPhase', 'InBodyPhase', 'processEndTag', {'name': 'p', 'type': 'EndTag'}),
+                ('dataState', 'InBodyPhase', 'InBodyPhase', 'processCharacters', {'type': 'Characters'})]
+
+    if PY2:
+        for i, log in enumerate(expected):
+            log = [x.encode("ascii") if isinstance(x, text_type) else x for x in log]
+            expected[i] = tuple(log)
+
+    assert parser.log == expected
+
+
+def test_no_duplicate_clone():
+    frag = parseFragment("<b><em><foo><foob><fooc><aside></b></em>")
+    assert len(frag) == 2
+
+
+def test_self_closing_col():
+    parser = HTMLParser()
+    parser.parseFragment('<table><colgroup><col /></colgroup></table>')
+    assert not parser.errors
diff --git a/lib/html5lib/tests/test_sanitizer.py b/lib/html5lib/tests/test_sanitizer.py
new file mode 100644
index 00000000..f3faeb80
--- /dev/null
+++ b/lib/html5lib/tests/test_sanitizer.py
@@ -0,0 +1,133 @@
+from __future__ import absolute_import, division, unicode_literals
+
+import pytest
+
+from html5lib import constants, parseFragment, serialize
+from html5lib.filters import sanitizer
+
+
+def sanitize_html(stream):
+    parsed = parseFragment(stream)
+    with pytest.deprecated_call():
+        serialized = serialize(parsed,
+                               sanitize=True,
+                               omit_optional_tags=False,
+                               use_trailing_solidus=True,
+                               space_before_trailing_solidus=False,
+                               quote_attr_values="always",
+                               quote_char='"',
+                               alphabetical_attributes=True)
+    return serialized
+
+
+def test_should_handle_astral_plane_characters():
+    sanitized = sanitize_html("<p>&#x1d4b5; &#x1d538;</p>")
+    expected = '<p>\U0001d4b5 \U0001d538</p>'
+    assert expected == sanitized
+
+
+def test_should_allow_relative_uris():
+    sanitized = sanitize_html('<p><a href="/example.com"></a></p>')
+    expected = '<p><a href="/example.com"></a></p>'
+    assert expected == sanitized
+
+
+def test_invalid_data_uri():
+    sanitized = sanitize_html('<audio controls="" src="data:foobar"></audio>')
+    expected = '<audio controls></audio>'
+    assert expected == sanitized
+
+
+def test_invalid_ipv6_url():
+    sanitized = sanitize_html('<a href="h://]">')
+    expected = "<a></a>"
+    assert expected == sanitized
+
+
+def test_data_uri_disallowed_type():
+    sanitized = sanitize_html('<audio controls="" src="data:text/html,<html>"></audio>')
+    expected = "<audio controls></audio>"
+    assert expected == sanitized
+
+
+def param_sanitizer():
+    for ns, tag_name in sanitizer.allowed_elements:
+        if ns != constants.namespaces["html"]:
+            continue
+        if tag_name in ['caption', 'col', 'colgroup', 'optgroup', 'option', 'table', 'tbody', 'td',
+                        'tfoot', 'th', 'thead', 'tr', 'select']:
+            continue  # TODO
+        if tag_name == 'image':
+            yield ("test_should_allow_%s_tag" % tag_name,
+                   "<img title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz",
+                   "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
+        elif tag_name == 'br':
+            yield ("test_should_allow_%s_tag" % tag_name,
+                   "<br title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz<br/>",
+                   "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
+        elif tag_name in constants.voidElements:
+            yield ("test_should_allow_%s_tag" % tag_name,
+                   "<%s title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz" % tag_name,
+                   "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
+        else:
+            yield ("test_should_allow_%s_tag" % tag_name,
+                   "<%s title=\"1\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</%s>" % (tag_name, tag_name),
+                   "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
+
+    for ns, attribute_name in sanitizer.allowed_attributes:
+        if ns is not None:
+            continue
+        if attribute_name != attribute_name.lower():
+            continue  # TODO
+        if attribute_name == 'style':
+            continue
+        attribute_value = 'foo'
+        if attribute_name in sanitizer.attr_val_is_uri:
+            attribute_value = '%s://sub.domain.tld/path/object.ext' % sanitizer.allowed_protocols[0]
+        yield ("test_should_allow_%s_attribute" % attribute_name,
+               "<p %s=\"%s\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>" % (attribute_name, attribute_value),
+               "<p %s='%s'>foo <bad>bar</bad> baz</p>" % (attribute_name, attribute_value))
+
+    for protocol in sanitizer.allowed_protocols:
+        rest_of_uri = '//sub.domain.tld/path/object.ext'
+        if protocol == 'data':
+            rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ='
+        yield ("test_should_allow_uppercase_%s_uris" % protocol,
+               "<img src=\"%s:%s\">foo</a>" % (protocol, rest_of_uri),
+               """<img src="%s:%s">foo</a>""" % (protocol, rest_of_uri))
+
+    for protocol in sanitizer.allowed_protocols:
+        rest_of_uri = '//sub.domain.tld/path/object.ext'
+        if protocol == 'data':
+            rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ='
+        protocol = protocol.upper()
+        yield ("test_should_allow_uppercase_%s_uris" % protocol,
+               "<img src=\"%s:%s\">foo</a>" % (protocol, rest_of_uri),
+               """<img src="%s:%s">foo</a>""" % (protocol, rest_of_uri))
+
+
+@pytest.mark.parametrize("expected, input",
+                         (pytest.param(expected, input, id=id)
+                          for id, expected, input in param_sanitizer()))
+def test_sanitizer(expected, input):
+    parsed = parseFragment(expected)
+    expected = serialize(parsed,
+                         omit_optional_tags=False,
+                         use_trailing_solidus=True,
+                         space_before_trailing_solidus=False,
+                         quote_attr_values="always",
+                         quote_char='"',
+                         alphabetical_attributes=True)
+    assert expected == sanitize_html(input)
+
+
+def test_lowercase_color_codes_in_style():
+    sanitized = sanitize_html("<p style=\"border: 1px solid #a2a2a2;\"></p>")
+    expected = '<p style=\"border: 1px solid #a2a2a2;\"></p>'
+    assert expected == sanitized
+
+
+def test_uppercase_color_codes_in_style():
+    sanitized = sanitize_html("<p style=\"border: 1px solid #A2A2A2;\"></p>")
+    expected = '<p style=\"border: 1px solid #A2A2A2;\"></p>'
+    assert expected == sanitized
diff --git a/lib/html5lib/tests/test_serializer.py b/lib/html5lib/tests/test_serializer.py
new file mode 100644
index 00000000..bce62459
--- /dev/null
+++ b/lib/html5lib/tests/test_serializer.py
@@ -0,0 +1,226 @@
+from __future__ import absolute_import, division, unicode_literals
+
+import os
+import json
+
+import pytest
+
+from .support import get_data_files
+
+from html5lib import constants
+from html5lib.filters.lint import Filter as Lint
+from html5lib.serializer import HTMLSerializer, serialize
+from html5lib.treewalkers.base import TreeWalker
+
+# pylint:disable=wrong-import-position
+optionals_loaded = []
+
+try:
+    from lxml import etree
+    optionals_loaded.append("lxml")
+except ImportError:
+    pass
+# pylint:enable=wrong-import-position
+
+default_namespace = constants.namespaces["html"]
+
+
+class JsonWalker(TreeWalker):
+    def __iter__(self):
+        for token in self.tree:
+            type = token[0]
+            if type == "StartTag":
+                if len(token) == 4:
+                    namespace, name, attrib = token[1:4]
+                else:
+                    namespace = default_namespace
+                    name, attrib = token[1:3]
+                yield self.startTag(namespace, name, self._convertAttrib(attrib))
+            elif type == "EndTag":
+                if len(token) == 3:
+                    namespace, name = token[1:3]
+                else:
+                    namespace = default_namespace
+                    name = token[1]
+                yield self.endTag(namespace, name)
+            elif type == "EmptyTag":
+                if len(token) == 4:
+                    namespace, name, attrib = token[1:]
+                else:
+                    namespace = default_namespace
+                    name, attrib = token[1:]
+                for token in self.emptyTag(namespace, name, self._convertAttrib(attrib)):
+                    yield token
+            elif type == "Comment":
+                yield self.comment(token[1])
+            elif type in ("Characters", "SpaceCharacters"):
+                for token in self.text(token[1]):
+                    yield token
+            elif type == "Doctype":
+                if len(token) == 4:
+                    yield self.doctype(token[1], token[2], token[3])
+                elif len(token) == 3:
+                    yield self.doctype(token[1], token[2])
+                else:
+                    yield self.doctype(token[1])
+            else:
+                raise ValueError("Unknown token type: " + type)
+
+    def _convertAttrib(self, attribs):
+        """html5lib tree-walkers use a dict of (namespace, name): value for
+        attributes, but JSON cannot represent this. Convert from the format
+        in the serializer tests (a list of dicts with "namespace", "name",
+        and "value" as keys) to html5lib's tree-walker format."""
+        attrs = {}
+        for attrib in attribs:
+            name = (attrib["namespace"], attrib["name"])
+            assert(name not in attrs)
+            attrs[name] = attrib["value"]
+        return attrs
+
+
+def serialize_html(input, options):
+    options = {str(k): v for k, v in options.items()}
+    encoding = options.get("encoding", None)
+    if "encoding" in options:
+        del options["encoding"]
+    stream = Lint(JsonWalker(input), False)
+    serializer = HTMLSerializer(alphabetical_attributes=True, **options)
+    return serializer.render(stream, encoding)
+
+
+def throwsWithLatin1(input):
+    with pytest.raises(UnicodeEncodeError):
+        serialize_html(input, {"encoding": "iso-8859-1"})
+
+
+def testDoctypeName():
+    throwsWithLatin1([["Doctype", "\u0101"]])
+
+
+def testDoctypePublicId():
+    throwsWithLatin1([["Doctype", "potato", "\u0101"]])
+
+
+def testDoctypeSystemId():
+    throwsWithLatin1([["Doctype", "potato", "potato", "\u0101"]])
+
+
+def testCdataCharacters():
+    test_serializer([["StartTag", "http://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\u0101"]],
+                    ["<style>&amacr;"], {"encoding": "iso-8859-1"})
+
+
+def testCharacters():
+    test_serializer([["Characters", "\u0101"]],
+                    ["&amacr;"], {"encoding": "iso-8859-1"})
+
+
+def testStartTagName():
+    throwsWithLatin1([["StartTag", "http://www.w3.org/1999/xhtml", "\u0101", []]])
+
+
+def testAttributeName():
+    throwsWithLatin1([["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": None, "name": "\u0101", "value": "potato"}]]])
+
+
+def testAttributeValue():
+    test_serializer([["StartTag", "http://www.w3.org/1999/xhtml", "span",
+                      [{"namespace": None, "name": "potato", "value": "\u0101"}]]],
+                    ["<span potato=&amacr;>"], {"encoding": "iso-8859-1"})
+
+
+def testEndTagName():
+    throwsWithLatin1([["EndTag", "http://www.w3.org/1999/xhtml", "\u0101"]])
+
+
+def testComment():
+    throwsWithLatin1([["Comment", "\u0101"]])
+
+
+def testThrowsUnknownOption():
+    with pytest.raises(TypeError):
+        HTMLSerializer(foobar=None)
+
+
+@pytest.mark.parametrize("c", list("\t\n\u000C\x20\r\"'=<>`"))
+def testSpecQuoteAttribute(c):
+    input_ = [["StartTag", "http://www.w3.org/1999/xhtml", "span",
+               [{"namespace": None, "name": "foo", "value": c}]]]
+    if c == '"':
+        output_ = ["<span foo='%s'>" % c]
+    else:
+        output_ = ['<span foo="%s">' % c]
+    options_ = {"quote_attr_values": "spec"}
+    test_serializer(input_, output_, options_)
+
+
+@pytest.mark.parametrize("c", list("\t\n\u000C\x20\r\"'=<>`"
+                                   "\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n"
+                                   "\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15"
+                                   "\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+                                   "\x20\x2f\x60\xa0\u1680\u180e\u180f\u2000"
+                                   "\u2001\u2002\u2003\u2004\u2005\u2006\u2007"
+                                   "\u2008\u2009\u200a\u2028\u2029\u202f\u205f"
+                                   "\u3000"))
+def testLegacyQuoteAttribute(c):
+    input_ = [["StartTag", "http://www.w3.org/1999/xhtml", "span",
+               [{"namespace": None, "name": "foo", "value": c}]]]
+    if c == '"':
+        output_ = ["<span foo='%s'>" % c]
+    else:
+        output_ = ['<span foo="%s">' % c]
+    options_ = {"quote_attr_values": "legacy"}
+    test_serializer(input_, output_, options_)
+
+
+@pytest.fixture
+def lxml_parser():
+    return etree.XMLParser(resolve_entities=False)
+
+
+@pytest.mark.skipif("lxml" not in optionals_loaded, reason="lxml not importable")
+def testEntityReplacement(lxml_parser):
+    doc = '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>'
+    tree = etree.fromstring(doc, parser=lxml_parser).getroottree()
+    result = serialize(tree, tree="lxml", omit_optional_tags=False)
+    assert result == '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>\u03B2</html>'
+
+
+@pytest.mark.skipif("lxml" not in optionals_loaded, reason="lxml not importable")
+def testEntityXML(lxml_parser):
+    doc = '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&gt;</html>'
+    tree = etree.fromstring(doc, parser=lxml_parser).getroottree()
+    result = serialize(tree, tree="lxml", omit_optional_tags=False)
+    assert result == '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&gt;</html>'
+
+
+@pytest.mark.skipif("lxml" not in optionals_loaded, reason="lxml not importable")
+def testEntityNoResolve(lxml_parser):
+    doc = '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>'
+    tree = etree.fromstring(doc, parser=lxml_parser).getroottree()
+    result = serialize(tree, tree="lxml", omit_optional_tags=False,
+                                  resolve_entities=False)
+    assert result == '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>'
+
+
+def param_serializer():
+    for filename in get_data_files('serializer-testdata', '*.test', os.path.dirname(__file__)):
+        with open(filename) as fp:
+            tests = json.load(fp)
+            for test in tests['tests']:
+                yield test["input"], test["expected"], test.get("options", {})
+
+
+@pytest.mark.parametrize("input, expected, options", param_serializer())
+def test_serializer(input, expected, options):
+    encoding = options.get("encoding", None)
+
+    if encoding:
+        expected = list(map(lambda x: x.encode(encoding), expected))
+
+    result = serialize_html(input, options)
+    if len(expected) == 1:
+        assert expected[0] == result, "Expected:\n%s\nActual:\n%s\nOptions:\n%s" % (expected[0], result, str(options))
+    elif result not in expected:
+        assert False, "Expected: %s, Received: %s" % (expected, result)
diff --git a/lib/html5lib/tests/test_stream.py b/lib/html5lib/tests/test_stream.py
new file mode 100644
index 00000000..efe9b472
--- /dev/null
+++ b/lib/html5lib/tests/test_stream.py
@@ -0,0 +1,325 @@
+from __future__ import absolute_import, division, unicode_literals
+
+from . import support  # noqa
+
+import codecs
+import sys
+from io import BytesIO, StringIO
+
+import pytest
+
+import six
+from six.moves import http_client, urllib
+
+from html5lib._inputstream import (BufferedStream, HTMLInputStream,
+                                   HTMLUnicodeInputStream, HTMLBinaryInputStream)
+from html5lib._utils import supports_lone_surrogates
+
+
+def test_basic():
+    s = b"abc"
+    fp = BufferedStream(BytesIO(s))
+    read = fp.read(10)
+    assert read == s
+
+
+def test_read_length():
+    fp = BufferedStream(BytesIO(b"abcdef"))
+    read1 = fp.read(1)
+    assert read1 == b"a"
+    read2 = fp.read(2)
+    assert read2 == b"bc"
+    read3 = fp.read(3)
+    assert read3 == b"def"
+    read4 = fp.read(4)
+    assert read4 == b""
+
+
+def test_tell():
+    fp = BufferedStream(BytesIO(b"abcdef"))
+    read1 = fp.read(1)
+    assert read1 == b"a"
+    assert fp.tell() == 1
+    read2 = fp.read(2)
+    assert read2 == b"bc"
+    assert fp.tell() == 3
+    read3 = fp.read(3)
+    assert read3 == b"def"
+    assert fp.tell() == 6
+    read4 = fp.read(4)
+    assert read4 == b""
+    assert fp.tell() == 6
+
+
+def test_seek():
+    fp = BufferedStream(BytesIO(b"abcdef"))
+    read1 = fp.read(1)
+    assert read1 == b"a"
+    fp.seek(0)
+    read2 = fp.read(1)
+    assert read2 == b"a"
+    read3 = fp.read(2)
+    assert read3 == b"bc"
+    fp.seek(2)
+    read4 = fp.read(2)
+    assert read4 == b"cd"
+    fp.seek(4)
+    read5 = fp.read(2)
+    assert read5 == b"ef"
+
+
+def test_seek_tell():
+    fp = BufferedStream(BytesIO(b"abcdef"))
+    read1 = fp.read(1)
+    assert read1 == b"a"
+    assert fp.tell() == 1
+    fp.seek(0)
+    read2 = fp.read(1)
+    assert read2 == b"a"
+    assert fp.tell() == 1
+    read3 = fp.read(2)
+    assert read3 == b"bc"
+    assert fp.tell() == 3
+    fp.seek(2)
+    read4 = fp.read(2)
+    assert read4 == b"cd"
+    assert fp.tell() == 4
+    fp.seek(4)
+    read5 = fp.read(2)
+    assert read5 == b"ef"
+    assert fp.tell() == 6
+
+
+class HTMLUnicodeInputStreamShortChunk(HTMLUnicodeInputStream):
+    _defaultChunkSize = 2
+
+
+class HTMLBinaryInputStreamShortChunk(HTMLBinaryInputStream):
+    _defaultChunkSize = 2
+
+
+def test_char_ascii():
+    stream = HTMLInputStream(b"'", override_encoding='ascii')
+    assert stream.charEncoding[0].name == 'windows-1252'
+    assert stream.char() == "'"
+
+
+def test_char_utf8():
+    stream = HTMLInputStream('\u2018'.encode('utf-8'), override_encoding='utf-8')
+    assert stream.charEncoding[0].name == 'utf-8'
+    assert stream.char() == '\u2018'
+
+
+def test_char_win1252():
+    stream = HTMLInputStream("\xa9\xf1\u2019".encode('windows-1252'))
+    assert stream.charEncoding[0].name == 'windows-1252'
+    assert stream.char() == "\xa9"
+    assert stream.char() == "\xf1"
+    assert stream.char() == "\u2019"
+
+
+def test_bom():
+    stream = HTMLInputStream(codecs.BOM_UTF8 + b"'")
+    assert stream.charEncoding[0].name == 'utf-8'
+    assert stream.char() == "'"
+
+
+def test_utf_16():
+    stream = HTMLInputStream((' ' * 1025).encode('utf-16'))
+    assert stream.charEncoding[0].name in ['utf-16le', 'utf-16be']
+    assert len(stream.charsUntil(' ', True)) == 1025
+
+
+def test_newlines():
+    stream = HTMLBinaryInputStreamShortChunk(codecs.BOM_UTF8 + b"a\nbb\r\nccc\rddddxe")
+    assert stream.position() == (1, 0)
+    assert stream.charsUntil('c') == "a\nbb\n"
+    assert stream.position() == (3, 0)
+    assert stream.charsUntil('x') == "ccc\ndddd"
+    assert stream.position() == (4, 4)
+    assert stream.charsUntil('e') == "x"
+    assert stream.position() == (4, 5)
+
+
+def test_newlines2():
+    size = HTMLUnicodeInputStream._defaultChunkSize
+    stream = HTMLInputStream("\r" * size + "\n")
+    assert stream.charsUntil('x') == "\n" * size
+
+
+def test_position():
+    stream = HTMLBinaryInputStreamShortChunk(codecs.BOM_UTF8 + b"a\nbb\nccc\nddde\nf\ngh")
+    assert stream.position() == (1, 0)
+    assert stream.charsUntil('c') == "a\nbb\n"
+    assert stream.position() == (3, 0)
+    stream.unget("\n")
+    assert stream.position() == (2, 2)
+    assert stream.charsUntil('c') == "\n"
+    assert stream.position() == (3, 0)
+    stream.unget("\n")
+    assert stream.position() == (2, 2)
+    assert stream.char() == "\n"
+    assert stream.position() == (3, 0)
+    assert stream.charsUntil('e') == "ccc\nddd"
+    assert stream.position() == (4, 3)
+    assert stream.charsUntil('h') == "e\nf\ng"
+    assert stream.position() == (6, 1)
+
+
+def test_position2():
+    stream = HTMLUnicodeInputStreamShortChunk("abc\nd")
+    assert stream.position() == (1, 0)
+    assert stream.char() == "a"
+    assert stream.position() == (1, 1)
+    assert stream.char() == "b"
+    assert stream.position() == (1, 2)
+    assert stream.char() == "c"
+    assert stream.position() == (1, 3)
+    assert stream.char() == "\n"
+    assert stream.position() == (2, 0)
+    assert stream.char() == "d"
+    assert stream.position() == (2, 1)
+
+
+def test_python_issue_20007():
+    """
+    Make sure we have a work-around for Python bug #20007
+    http://bugs.python.org/issue20007
+    """
+    class FakeSocket(object):
+        def makefile(self, _mode, _bufsize=None):
+            # pylint:disable=unused-argument
+            return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText")
+
+    source = http_client.HTTPResponse(FakeSocket())
+    source.begin()
+    stream = HTMLInputStream(source)
+    assert stream.charsUntil(" ") == "Text"
+
+
+def test_python_issue_20007_b():
+    """
+    Make sure we have a work-around for Python bug #20007
+    http://bugs.python.org/issue20007
+    """
+    if six.PY2:
+        return
+
+    class FakeSocket(object):
+        def makefile(self, _mode, _bufsize=None):
+            # pylint:disable=unused-argument
+            return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText")
+
+    source = http_client.HTTPResponse(FakeSocket())
+    source.begin()
+    wrapped = urllib.response.addinfourl(source, source.msg, "http://example.com")
+    stream = HTMLInputStream(wrapped)
+    assert stream.charsUntil(" ") == "Text"
+
+
+@pytest.mark.parametrize("inp,num",
+                         [("\u0000", 0),
+                          ("\u0001", 1),
+                          ("\u0008", 1),
+                          ("\u0009", 0),
+                          ("\u000A", 0),
+                          ("\u000B", 1),
+                          ("\u000C", 0),
+                          ("\u000D", 0),
+                          ("\u000E", 1),
+                          ("\u001F", 1),
+                          ("\u0020", 0),
+                          ("\u007E", 0),
+                          ("\u007F", 1),
+                          ("\u009F", 1),
+                          ("\u00A0", 0),
+                          ("\uFDCF", 0),
+                          ("\uFDD0", 1),
+                          ("\uFDEF", 1),
+                          ("\uFDF0", 0),
+                          ("\uFFFD", 0),
+                          ("\uFFFE", 1),
+                          ("\uFFFF", 1),
+                          ("\U0001FFFD", 0),
+                          ("\U0001FFFE", 1),
+                          ("\U0001FFFF", 1),
+                          ("\U0002FFFD", 0),
+                          ("\U0002FFFE", 1),
+                          ("\U0002FFFF", 1),
+                          ("\U0003FFFD", 0),
+                          ("\U0003FFFE", 1),
+                          ("\U0003FFFF", 1),
+                          ("\U0004FFFD", 0),
+                          ("\U0004FFFE", 1),
+                          ("\U0004FFFF", 1),
+                          ("\U0005FFFD", 0),
+                          ("\U0005FFFE", 1),
+                          ("\U0005FFFF", 1),
+                          ("\U0006FFFD", 0),
+                          ("\U0006FFFE", 1),
+                          ("\U0006FFFF", 1),
+                          ("\U0007FFFD", 0),
+                          ("\U0007FFFE", 1),
+                          ("\U0007FFFF", 1),
+                          ("\U0008FFFD", 0),
+                          ("\U0008FFFE", 1),
+                          ("\U0008FFFF", 1),
+                          ("\U0009FFFD", 0),
+                          ("\U0009FFFE", 1),
+                          ("\U0009FFFF", 1),
+                          ("\U000AFFFD", 0),
+                          ("\U000AFFFE", 1),
+                          ("\U000AFFFF", 1),
+                          ("\U000BFFFD", 0),
+                          ("\U000BFFFE", 1),
+                          ("\U000BFFFF", 1),
+                          ("\U000CFFFD", 0),
+                          ("\U000CFFFE", 1),
+                          ("\U000CFFFF", 1),
+                          ("\U000DFFFD", 0),
+                          ("\U000DFFFE", 1),
+                          ("\U000DFFFF", 1),
+                          ("\U000EFFFD", 0),
+                          ("\U000EFFFE", 1),
+                          ("\U000EFFFF", 1),
+                          ("\U000FFFFD", 0),
+                          ("\U000FFFFE", 1),
+                          ("\U000FFFFF", 1),
+                          ("\U0010FFFD", 0),
+                          ("\U0010FFFE", 1),
+                          ("\U0010FFFF", 1),
+                          ("\x01\x01\x01", 3),
+                          ("a\x01a\x01a\x01a", 3)])
+def test_invalid_codepoints(inp, num):
+    stream = HTMLUnicodeInputStream(StringIO(inp))
+    for _i in range(len(inp)):
+        stream.char()
+    assert len(stream.errors) == num
+
+
+@pytest.mark.skipif(not supports_lone_surrogates, reason="doesn't support lone surrogates")
+@pytest.mark.parametrize("inp,num",
+                         [("'\\uD7FF'", 0),
+                          ("'\\uD800'", 1),
+                          ("'\\uDBFF'", 1),
+                          ("'\\uDC00'", 1),
+                          ("'\\uDFFF'", 1),
+                          ("'\\uE000'", 0),
+                          ("'\\uD800\\uD800\\uD800'", 3),
+                          ("'a\\uD800a\\uD800a\\uD800a'", 3),
+                          ("'\\uDFFF\\uDBFF'", 2),
+                          pytest.param(
+                              "'\\uDBFF\\uDFFF'", 2,
+                              marks=pytest.mark.skipif(
+                                  sys.maxunicode == 0xFFFF,
+                                  reason="narrow Python"))])
+def test_invalid_codepoints_surrogates(inp, num):
+    inp = eval(inp)  # pylint:disable=eval-used
+    fp = StringIO(inp)
+    if ord(max(fp.read())) > 0xFFFF:
+        pytest.skip("StringIO altered string")
+    fp.seek(0)
+    stream = HTMLUnicodeInputStream(fp)
+    for _i in range(len(inp)):
+        stream.char()
+    assert len(stream.errors) == num
diff --git a/lib/html5lib/tests/test_tokenizer2.py b/lib/html5lib/tests/test_tokenizer2.py
new file mode 100644
index 00000000..158d847a
--- /dev/null
+++ b/lib/html5lib/tests/test_tokenizer2.py
@@ -0,0 +1,66 @@
+from __future__ import absolute_import, division, unicode_literals
+
+import io
+
+from six import unichr, text_type
+
+from html5lib._tokenizer import HTMLTokenizer
+from html5lib.constants import tokenTypes
+
+
+def ignore_parse_errors(toks):
+    for tok in toks:
+        if tok['type'] != tokenTypes['ParseError']:
+            yield tok
+
+
+def test_maintain_attribute_order():
+    # generate loads to maximize the chance a hash-based mutation will occur
+    attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
+    stream = io.StringIO("<span " + " ".join("%s='%s'" % (x, i) for x, i in attrs) + ">")
+
+    toks = HTMLTokenizer(stream)
+    out = list(ignore_parse_errors(toks))
+
+    assert len(out) == 1
+    assert out[0]['type'] == tokenTypes['StartTag']
+
+    attrs_tok = out[0]['data']
+    assert len(attrs_tok) == len(attrs)
+
+    for (in_name, in_value), (out_name, out_value) in zip(attrs, attrs_tok.items()):
+        assert in_name == out_name
+        assert in_value == out_value
+
+
+def test_duplicate_attribute():
+    stream = io.StringIO("<span a=1 a=2 a=3>")
+
+    toks = HTMLTokenizer(stream)
+    out = list(ignore_parse_errors(toks))
+
+    assert len(out) == 1
+    assert out[0]['type'] == tokenTypes['StartTag']
+
+    attrs_tok = out[0]['data']
+    assert len(attrs_tok) == 1
+    assert list(attrs_tok.items()) == [('a', '1')]
+
+
+def test_maintain_duplicate_attribute_order():
+    # generate loads to maximize the chance a hash-based mutation will occur
+    attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
+    stream = io.StringIO("<span " + " ".join("%s='%s'" % (x, i) for x, i in attrs) + " a=100>")
+
+    toks = HTMLTokenizer(stream)
+    out = list(ignore_parse_errors(toks))
+
+    assert len(out) == 1
+    assert out[0]['type'] == tokenTypes['StartTag']
+
+    attrs_tok = out[0]['data']
+    assert len(attrs_tok) == len(attrs)
+
+    for (in_name, in_value), (out_name, out_value) in zip(attrs, attrs_tok.items()):
+        assert in_name == out_name
+        assert in_value == out_value
diff --git a/lib/html5lib/tests/test_treeadapters.py b/lib/html5lib/tests/test_treeadapters.py
new file mode 100644
index 00000000..95e56c00
--- /dev/null
+++ b/lib/html5lib/tests/test_treeadapters.py
@@ -0,0 +1,40 @@
+from __future__ import absolute_import, division, unicode_literals
+
+from . import support  # noqa
+
+import html5lib
+from html5lib.treeadapters import sax
+from html5lib.treewalkers import getTreeWalker
+
+
+def test_to_sax():
+    handler = support.TracingSaxHandler()
+    tree = html5lib.parse("""<html xml:lang="en">
+        <title>Directory Listing</title>
+        <a href="/"><b/></p>
+    """, treebuilder="etree")
+    walker = getTreeWalker("etree")
+    sax.to_sax(walker(tree), handler)
+    expected = [
+        'startDocument',
+        ('startElementNS', ('http://www.w3.org/1999/xhtml', 'html'),
+            'html', {(None, 'xml:lang'): 'en'}),
+        ('startElementNS', ('http://www.w3.org/1999/xhtml', 'head'), 'head', {}),
+        ('startElementNS', ('http://www.w3.org/1999/xhtml', 'title'), 'title', {}),
+        ('characters', 'Directory Listing'),
+        ('endElementNS', ('http://www.w3.org/1999/xhtml', 'title'), 'title'),
+        ('characters', '\n        '),
+        ('endElementNS', ('http://www.w3.org/1999/xhtml', 'head'), 'head'),
+        ('startElementNS', ('http://www.w3.org/1999/xhtml', 'body'), 'body', {}),
+        ('startElementNS', ('http://www.w3.org/1999/xhtml', 'a'), 'a', {(None, 'href'): '/'}),
+        ('startElementNS', ('http://www.w3.org/1999/xhtml', 'b'), 'b', {}),
+        ('startElementNS', ('http://www.w3.org/1999/xhtml', 'p'), 'p', {}),
+        ('endElementNS', ('http://www.w3.org/1999/xhtml', 'p'), 'p'),
+        ('characters', '\n    '),
+        ('endElementNS', ('http://www.w3.org/1999/xhtml', 'b'), 'b'),
+        ('endElementNS', ('http://www.w3.org/1999/xhtml', 'a'), 'a'),
+        ('endElementNS', ('http://www.w3.org/1999/xhtml', 'body'), 'body'),
+        ('endElementNS', ('http://www.w3.org/1999/xhtml', 'html'), 'html'),
+        'endDocument',
+    ]
+    assert expected == handler.visited
diff --git a/lib/html5lib/tests/test_treewalkers.py b/lib/html5lib/tests/test_treewalkers.py
new file mode 100644
index 00000000..780ca964
--- /dev/null
+++ b/lib/html5lib/tests/test_treewalkers.py
@@ -0,0 +1,205 @@
+from __future__ import absolute_import, division, unicode_literals
+
+import itertools
+import sys
+
+from six import unichr, text_type
+import pytest
+
+try:
+    import lxml.etree
+except ImportError:
+    pass
+
+from .support import treeTypes
+
+from html5lib import html5parser, treewalkers
+from html5lib.filters.lint import Filter as Lint
+
+import re
+attrlist = re.compile(r"^(\s+)\w+=.*(\n\1\w+=.*)+", re.M)
+
+
+def sortattrs(x):
+    lines = x.group(0).split("\n")
+    lines.sort()
+    return "\n".join(lines)
+
+
+def test_all_tokens():
+    expected = [
+        {'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'html'},
+        {'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'head'},
+        {'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'head'},
+        {'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'body'},
+        {'data': 'a', 'type': 'Characters'},
+        {'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'div'},
+        {'data': 'b', 'type': 'Characters'},
+        {'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'div'},
+        {'data': 'c', 'type': 'Characters'},
+        {'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'body'},
+        {'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'html'}
+    ]
+    for _, treeCls in sorted(treeTypes.items()):
+        if treeCls is None:
+            continue
+        p = html5parser.HTMLParser(tree=treeCls["builder"])
+        document = p.parse("<html><head></head><body>a<div>b</div>c</body></html>")
+        document = treeCls.get("adapter", lambda x: x)(document)
+        output = Lint(treeCls["walker"](document))
+        for expectedToken, outputToken in zip(expected, output):
+            assert expectedToken == outputToken
+
+
+def set_attribute_on_first_child(docfrag, name, value, treeName):
+    """naively sets an attribute on the first child of the document
+    fragment passed in"""
+    setter = {'ElementTree': lambda d: d[0].set,
+              'DOM': lambda d: d.firstChild.setAttribute}
+    setter['cElementTree'] = setter['ElementTree']
+    try:
+        setter.get(treeName, setter['DOM'])(docfrag)(name, value)
+    except AttributeError:
+        setter['ElementTree'](docfrag)(name, value)
+
+
+def param_treewalker_six_mix():
+    """Str/Unicode mix. If str attrs added to tree"""
+
+    # On Python 2.x string literals are of type str. Unless, like this
+    # file, the programmer imports unicode_literals from __future__.
+    # In that case, string literals become objects of type unicode.
+
+    # This test simulates a Py2 user, modifying attributes on a document
+    # fragment but not using the u'' syntax nor importing unicode_literals
+    sm_tests = [
+        ('<a href="http://example.com">Example</a>',
+         [(str('class'), str('test123'))],
+         '<a>\n  class="test123"\n  href="http://example.com"\n  "Example"'),
+
+        ('<link href="http://example.com/cow">',
+         [(str('rel'), str('alternate'))],
+         '<link>\n  href="http://example.com/cow"\n  rel="alternate"\n  "Example"')
+    ]
+
+    for tree in sorted(treeTypes.items()):
+        for intext, attrs, expected in sm_tests:
+            yield intext, expected, attrs, tree
+
+
+@pytest.mark.parametrize("intext, expected, attrs_to_add, tree", param_treewalker_six_mix())
+def test_treewalker_six_mix(intext, expected, attrs_to_add, tree):
+    """tests what happens when we add attributes to the intext"""
+    treeName, treeClass = tree
+    if treeClass is None:
+        pytest.skip("Treebuilder not loaded")
+    parser = html5parser.HTMLParser(tree=treeClass["builder"])
+    document = parser.parseFragment(intext)
+    for nom, val in attrs_to_add:
+        set_attribute_on_first_child(document, nom, val, treeName)
+
+    document = treeClass.get("adapter", lambda x: x)(document)
+    output = treewalkers.pprint(treeClass["walker"](document))
+    output = attrlist.sub(sortattrs, output)
+    if output not in expected:
+        raise AssertionError("TreewalkerEditTest: %s\nExpected:\n%s\nReceived:\n%s" % (treeName, expected, output))
+
+
+@pytest.mark.parametrize("tree,char", itertools.product(sorted(treeTypes.items()), ["x", "\u1234"]))
+def test_fragment_single_char(tree, char):
+    expected = [
+        {'data': char, 'type': 'Characters'}
+    ]
+
+    treeName, treeClass = tree
+    if treeClass is None:
+        pytest.skip("Treebuilder not loaded")
+
+    parser = html5parser.HTMLParser(tree=treeClass["builder"])
+    document = parser.parseFragment(char)
+    document = treeClass.get("adapter", lambda x: x)(document)
+    output = Lint(treeClass["walker"](document))
+
+    assert list(output) == expected
+
+
+@pytest.mark.skipif(treeTypes["lxml"] is None, reason="lxml not importable")
+def test_lxml_xml():
+    expected = [
+        {'data': {}, 'name': 'div', 'namespace': None, 'type': 'StartTag'},
+        {'data': {}, 'name': 'div', 'namespace': None, 'type': 'StartTag'},
+        {'name': 'div', 'namespace': None, 'type': 'EndTag'},
+        {'name': 'div', 'namespace': None, 'type': 'EndTag'}
+    ]
+
+    lxmltree = lxml.etree.fromstring('<div><div></div></div>')
+    walker = treewalkers.getTreeWalker('lxml')
+    output = Lint(walker(lxmltree))
+
+    assert list(output) == expected
+
+
+@pytest.mark.parametrize("treeName",
+                         [pytest.param(treeName, marks=[getattr(pytest.mark, treeName),
+                                                        pytest.mark.skipif(
+                                                            treeName != "lxml" or
+                                                            sys.version_info < (3, 7), reason="dict order undef")])
+                          for treeName in sorted(treeTypes.keys())])
+def test_maintain_attribute_order(treeName):
+    treeAPIs = treeTypes[treeName]
+    if treeAPIs is None:
+        pytest.skip("Treebuilder not loaded")
+
+    # generate loads to maximize the chance a hash-based mutation will occur
+    attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
+    data = "<span " + " ".join("%s='%s'" % (x, i) for x, i in attrs) + ">"
+
+    parser = html5parser.HTMLParser(tree=treeAPIs["builder"])
+    document = parser.parseFragment(data)
+
+    document = treeAPIs.get("adapter", lambda x: x)(document)
+    output = list(Lint(treeAPIs["walker"](document)))
+
+    assert len(output) == 2
+    assert output[0]['type'] == 'StartTag'
+    assert output[1]['type'] == "EndTag"
+
+    attrs_out = output[0]['data']
+    assert len(attrs) == len(attrs_out)
+
+    for (in_name, in_value), (out_name, out_value) in zip(attrs, attrs_out.items()):
+        assert (None, in_name) == out_name
+        assert in_value == out_value
+
+
+@pytest.mark.parametrize("treeName",
+                         [pytest.param(treeName, marks=[getattr(pytest.mark, treeName),
+                                                        pytest.mark.skipif(
+                                                            treeName != "lxml" or
+                                                            sys.version_info < (3, 7), reason="dict order undef")])
+                          for treeName in sorted(treeTypes.keys())])
+def test_maintain_attribute_order_adjusted(treeName):
+    treeAPIs = treeTypes[treeName]
+    if treeAPIs is None:
+        pytest.skip("Treebuilder not loaded")
+
+    # generate loads to maximize the chance a hash-based mutation will occur
+    data = "<svg a=1 refx=2 b=3 xml:lang=4 c=5>"
+
+    parser = html5parser.HTMLParser(tree=treeAPIs["builder"])
+    document = parser.parseFragment(data)
+
+    document = treeAPIs.get("adapter", lambda x: x)(document)
+    output = list(Lint(treeAPIs["walker"](document)))
+
+    assert len(output) == 2
+    assert output[0]['type'] == 'StartTag'
+    assert output[1]['type'] == "EndTag"
+
+    attrs_out = output[0]['data']
+
+    assert list(attrs_out.items()) == [((None, 'a'), '1'),
+                                       ((None, 'refX'), '2'),
+                                       ((None, 'b'), '3'),
+                                       (('http://www.w3.org/XML/1998/namespace', 'lang'), '4'),
+                                       ((None, 'c'), '5')]
diff --git a/lib/html5lib/tests/test_whitespace_filter.py b/lib/html5lib/tests/test_whitespace_filter.py
new file mode 100644
index 00000000..e9da6140
--- /dev/null
+++ b/lib/html5lib/tests/test_whitespace_filter.py
@@ -0,0 +1,125 @@
+from __future__ import absolute_import, division, unicode_literals
+
+from html5lib.filters.whitespace import Filter
+from html5lib.constants import spaceCharacters
+spaceCharacters = "".join(spaceCharacters)
+
+
+def runTest(input, expected):
+    output = list(Filter(input))
+    errorMsg = "\n".join(["\n\nInput:", str(input),
+                          "\nExpected:", str(expected),
+                          "\nReceived:", str(output)])
+    assert expected == output, errorMsg
+
+
+def runTestUnmodifiedOutput(input):
+    runTest(input, input)
+
+
+def testPhrasingElements():
+    runTestUnmodifiedOutput(
+        [{"type": "Characters", "data": "This is a "},
+         {"type": "StartTag", "name": "span", "data": []},
+         {"type": "Characters", "data": "phrase"},
+         {"type": "EndTag", "name": "span", "data": []},
+         {"type": "SpaceCharacters", "data": " "},
+         {"type": "Characters", "data": "with"},
+         {"type": "SpaceCharacters", "data": " "},
+         {"type": "StartTag", "name": "em", "data": []},
+         {"type": "Characters", "data": "emphasised text"},
+         {"type": "EndTag", "name": "em", "data": []},
+         {"type": "Characters", "data": " and an "},
+         {"type": "StartTag", "name": "img", "data": [["alt", "image"]]},
+         {"type": "Characters", "data": "."}])
+
+
+def testLeadingWhitespace():
+    runTest(
+        [{"type": "StartTag", "name": "p", "data": []},
+         {"type": "SpaceCharacters", "data": spaceCharacters},
+         {"type": "Characters", "data": "foo"},
+         {"type": "EndTag", "name": "p", "data": []}],
+        [{"type": "StartTag", "name": "p", "data": []},
+         {"type": "SpaceCharacters", "data": " "},
+         {"type": "Characters", "data": "foo"},
+         {"type": "EndTag", "name": "p", "data": []}])
+
+
+def testLeadingWhitespaceAsCharacters():
+    runTest(
+        [{"type": "StartTag", "name": "p", "data": []},
+         {"type": "Characters", "data": spaceCharacters + "foo"},
+         {"type": "EndTag", "name": "p", "data": []}],
+        [{"type": "StartTag", "name": "p", "data": []},
+         {"type": "Characters", "data": " foo"},
+         {"type": "EndTag", "name": "p", "data": []}])
+
+
+def testTrailingWhitespace():
+    runTest(
+        [{"type": "StartTag", "name": "p", "data": []},
+         {"type": "Characters", "data": "foo"},
+         {"type": "SpaceCharacters", "data": spaceCharacters},
+         {"type": "EndTag", "name": "p", "data": []}],
+        [{"type": "StartTag", "name": "p", "data": []},
+         {"type": "Characters", "data": "foo"},
+         {"type": "SpaceCharacters", "data": " "},
+         {"type": "EndTag", "name": "p", "data": []}])
+
+
+def testTrailingWhitespaceAsCharacters():
+    runTest(
+        [{"type": "StartTag", "name": "p", "data": []},
+         {"type": "Characters", "data": "foo" + spaceCharacters},
+         {"type": "EndTag", "name": "p", "data": []}],
+        [{"type": "StartTag", "name": "p", "data": []},
+         {"type": "Characters", "data": "foo "},
+         {"type": "EndTag", "name": "p", "data": []}])
+
+
+def testWhitespace():
+    runTest(
+        [{"type": "StartTag", "name": "p", "data": []},
+         {"type": "Characters", "data": "foo" + spaceCharacters + "bar"},
+         {"type": "EndTag", "name": "p", "data": []}],
+        [{"type": "StartTag", "name": "p", "data": []},
+         {"type": "Characters", "data": "foo bar"},
+         {"type": "EndTag", "name": "p", "data": []}])
+
+
+def testLeadingWhitespaceInPre():
+    runTestUnmodifiedOutput(
+        [{"type": "StartTag", "name": "pre", "data": []},
+         {"type": "SpaceCharacters", "data": spaceCharacters},
+         {"type": "Characters", "data": "foo"},
+         {"type": "EndTag", "name": "pre", "data": []}])
+
+
+def testLeadingWhitespaceAsCharactersInPre():
+    runTestUnmodifiedOutput(
+        [{"type": "StartTag", "name": "pre", "data": []},
+         {"type": "Characters", "data": spaceCharacters + "foo"},
+         {"type": "EndTag", "name": "pre", "data": []}])
+
+
+def testTrailingWhitespaceInPre():
+    runTestUnmodifiedOutput(
+        [{"type": "StartTag", "name": "pre", "data": []},
+         {"type": "Characters", "data": "foo"},
+         {"type": "SpaceCharacters", "data": spaceCharacters},
+         {"type": "EndTag", "name": "pre", "data": []}])
+
+
+def testTrailingWhitespaceAsCharactersInPre():
+    runTestUnmodifiedOutput(
+        [{"type": "StartTag", "name": "pre", "data": []},
+         {"type": "Characters", "data": "foo" + spaceCharacters},
+         {"type": "EndTag", "name": "pre", "data": []}])
+
+
+def testWhitespaceInPre():
+    runTestUnmodifiedOutput(
+        [{"type": "StartTag", "name": "pre", "data": []},
+         {"type": "Characters", "data": "foo" + spaceCharacters + "bar"},
+         {"type": "EndTag", "name": "pre", "data": []}])
diff --git a/lib/html5lib/tests/testdata/.gitattributes b/lib/html5lib/tests/testdata/.gitattributes
new file mode 100644
index 00000000..897831b2
--- /dev/null
+++ b/lib/html5lib/tests/testdata/.gitattributes
@@ -0,0 +1,2 @@
+*.dat		-text diff
+*.test		-text diff
diff --git a/lib/html5lib/tests/testdata/AUTHORS.rst b/lib/html5lib/tests/testdata/AUTHORS.rst
new file mode 100644
index 00000000..4a7de17a
--- /dev/null
+++ b/lib/html5lib/tests/testdata/AUTHORS.rst
@@ -0,0 +1,34 @@
+Credits
+=======
+
+The ``html5lib`` test data is maintained by:
+
+- James Graham
+- Geoffrey Sneddon
+
+
+Contributors
+------------
+
+- Adam Barth
+- Andi Sidwell
+- Anne van Kesteren
+- David Flanagan
+- Edward Z. Yang
+- Geoffrey Sneddon
+- Henri Sivonen
+- Ian Hickson
+- Jacques Distler
+- James Graham
+- Lachlan Hunt
+- lantis63
+- Mark Pilgrim
+- Mats Palmgren
+- Ms2ger
+- Nolan Waite
+- Philip Taylor
+- Rafael Weinstein
+- Ryan King
+- Sam Ruby
+- Simon Pieters
+- Thomas Broyer
diff --git a/lib/html5lib/tests/testdata/LICENSE b/lib/html5lib/tests/testdata/LICENSE
new file mode 100644
index 00000000..8812371b
--- /dev/null
+++ b/lib/html5lib/tests/testdata/LICENSE
@@ -0,0 +1,21 @@
+Copyright (c) 2006-2013 James Graham, Geoffrey Sneddon, and
+other contributors
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/lib/html5lib/tests/testdata/encoding/chardet/test_big5.txt b/lib/html5lib/tests/testdata/encoding/chardet/test_big5.txt
new file mode 100644
index 00000000..91074c98
--- /dev/null
+++ b/lib/html5lib/tests/testdata/encoding/chardet/test_big5.txt
@@ -0,0 +1,51 @@
+�Ѥl�m�D�w�g�n �Ĥ@~�|�Q��
+
+�Ѥl�D�g
+
+�Ĥ@��
+
+�D�i�D�A�D�`�D�C�W�i�W�A�D�`�W�C�L�A�W�Ѧa���l�Q���A�W�U�������C
+�G�`�L�A���H�[�䧮�F�`���A���H�[���u�C����̡A�P�X�Ӳ��W�A�P�פ�
+�ȡC�Ȥ��S�ȡA���������C
+
+�ĤG��
+
+�ѤU�Ҫ����������A���c�o�Q�Ҫ����������A�������o�C�G���L�ۥ͡A��
+���ۦ��A���u�ۧΡA���U�۶ɡA���n�۩M�A�e����H�C�O�H�t�H�B�u�L��
+�v���ơA��u�����v���СC�U���@�j�Ӥ���A�ͦӤ����A���Ӥ���A�\��
+�ӥ��~�C�Ұߥ��~�A�O�H���h�C
+
+�ĤT��
+
+���|��A�ϥ������Q���Q���o���f�A�ϥ������s�Q�����i���A�ϥ��ߤ���
+�C�O�H�u�t�H�v���v�A���ߡA��両�A�z��ӡA�j�䰩�C�`�ϥ��L���L
+���C�ϤҴ��̤������]�C���u�L���v�A�h�L���v�C
+
+�ĥ|��
+
+�u�D�v�R�A�ӥΤ��Τ��աC�W���A���U�����v�Q����U�A�Ѩ�ɡA�M���
+�A�P��СQ�祿���Φs�C�^�����֤��l�H�H�Ҥ����C
+
+�Ĥ���
+
+�Ѧa�����A�H�U�����쪯�Q�t�H�����A�H�ʩm���쪯�C�Ѧa�����A��S��
+���G�H��Ӥ��}�A�ʦӷU�X�C�h���ƽa�A���p�u���C
+
+�Ĥ���
+
+���������A�O�ץȦɡC�Ȧɤ����A�O�פѦa�ڡC�����Y�s�A�Τ����ԡC
+
+�ĤC��
+
+�Ѫ��a�[�C�Ѧa�ҥH����B�[�̡A�H�䤣�ۥ͡A�G����[�C�O�H�t�H���
+���Ө����A�~�䨭�Ө��s�C�D�H��L�p���H�G�ন��p�C
+
+�ĤK��
+
+�W���Y���C�����Q�U���Ӥ����C�B���H���Ҵc�A�G�X��D�C�~���a�A�ߵ�
+�W�A�P�����A�����H�A�F���v�A�Ƶ���A�ʵ��ɡC�Ұߤ����A�G�L�סC
+
+�ĤE��
+
+���Ӭդ��A���p��w�Q���ӾU���A���i���O�C���ɺ���A������u�Q�I�Q
+��ź�A�ۿ��S�C�\�E���h�A�Ѥ��D�C
diff --git a/lib/html5lib/tests/testdata/encoding/test-yahoo-jp.dat b/lib/html5lib/tests/testdata/encoding/test-yahoo-jp.dat
new file mode 100644
index 00000000..9b5f1917
--- /dev/null
+++ b/lib/html5lib/tests/testdata/encoding/test-yahoo-jp.dat
@@ -0,0 +1,10 @@
+#data
+<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=euc-jp">
+<!--京-->
+<title>Yahoo! JAPAN</title>
+<meta name="description" content="日本最大級のポータルサイト。検索、オークション、ニュース、メール、コミュニティ、ショッピング、など80以上のサービスを展開。あなたの生活をより豊かにする「ライフ・エンジン」を目指していきます。">
+<style type="text/css" media="all">
+#encoding
+euc-jp
diff --git a/lib/html5lib/tests/testdata/encoding/tests1.dat b/lib/html5lib/tests/testdata/encoding/tests1.dat
new file mode 100644
index 00000000..1b72666c
--- /dev/null
+++ b/lib/html5lib/tests/testdata/encoding/tests1.dat
@@ -0,0 +1,394 @@
+#data
+<!DOCTYPE HTML>
+<!-- (control test - for the other tests to work, this should pass - you may have to set your defaults appropriately) -->
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta charset="ISO-8859-1">
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta charset="iso8859-2">
+#encoding
+iso-8859-2
+
+#data
+<!DOCTYPE HTML>
+<meta charset='iso8859-2'>
+#encoding
+iso-8859-2
+
+#data
+<!DOCTYPE HTML>
+<meta charset=iso8859-2>
+#encoding
+iso-8859-2
+
+#data
+<!DOCTYPE HTML>
+<meta
+charset=iso8859-2>
+#encoding
+iso-8859-2
+
+#data
+<!DOCTYPE HTML>
+<metacharset=iso8859-2>
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta http-equiv="Content-Type" content="text/html; charset=iso8859-2">
+<!-- XXX this is a tough one, not sure how to do this one, unless we explictly do content= processing -->
+#encoding
+iso-8859-2
+
+#data
+<!DOCTYPE HTML>
+<meta content="text/html; charset=iso8859-2" http-equiv="Content-Type">
+<!-- XXX this is a tough one, not sure how to do this one, unless we explictly do content= processing -->
+#encoding
+iso-8859-2
+
+#data
+<!DOCTYPE HTML>
+<meta http-equiv="Content-Type" content=text/html; charset=iso8859-2>
+#encoding
+iso-8859-2
+
+#data
+<!DOCTYPE HTML>
+<meta http-equiv="Content-Type content="text/html; charset=iso8859-2">
+#encoding
+windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta http-equiv="Content-Type " content="text/html; charset=iso8859-2">
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta content="text/html; charset=iso8859-2" http-equiv="Content-Type ">
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta http-equiv="Content-Type>" content="text/html; charset=iso8859-2">
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta content="text/html; charset=iso8859-2" http-equiv="Content-Type>">
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta http-equiv="Content-Style-Type" content="text/html; charset=iso8859-2">
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta content="text/html; charset=iso8859-2" http-equiv="Content-Style-Type">
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta name="Content-Style-Type" content="text/html; charset=iso8859-2">
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta content="text/html; charset=iso8859-2" name="Content-Style-Type">
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta content="text/html; charset=iso8859-2">
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta content=" text/html; charset = iso8859-2 ">
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta content="
+text/html; charset=iso8859-2
+" http-equiv="Content-Type">
+#encoding
+iso-8859-2
+
+#data
+<!DOCTYPE HTML>
+<meta charset="
+iso8859-2
+">
+#encoding
+iso-8859-2
+
+#data
+<!DOCTYPE HTML>
+<meta charset=
+iso8859-2
+>
+#encoding
+iso-8859-2
+
+#data
+<!DOCTYPE HTML>
+<meta charset="iso8859-2>
+<p>"</p>
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta charset=iso8859-2">
+<p>"</p>
+#encoding
+windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta " charset=iso8859-2>
+<p>"</p>
+#encoding
+iso-8859-2
+
+#data
+<!DOCTYPE HTML>
+<meta test" charset=iso8859-2>
+<p>"</p>
+#encoding
+iso-8859-2
+
+#data
+<!DOCTYPE HTML>
+<meta test=" charset=iso8859-2>
+<p>"</p>
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta test="' charset=iso8859-2>
+<p>"'</p>
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta test='" charset=iso8859-2>
+<p>'"</p>
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta test="" charset=iso8859-2>
+#encoding
+iso-8859-2
+
+#data
+<!DOCTYPE HTML>
+<meta test=x" charset=iso8859-2>
+<p>"</p>
+#encoding
+iso-8859-2
+
+#data
+<!DOCTYPE HTML>
+<head></head><p title="x>
+<meta test=x" charset=iso8859-2>
+<p>"</p>
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<head></head><p title="x>
+<meta test=x charset=iso8859-2>
+<p>"</p>
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<head></head><p title="x>
+<meta charset=iso8859-2>
+<p>"</p>
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<head></head><p title="x>">
+<meta charset=iso8859-2>
+<p>"</p>
+#encoding
+iso-8859-2
+
+#data
+<!DOCTYPE HTML>
+<meta charset="ISO-8859-1">
+<meta charset="iso8859-2">
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta charset="iso8859-2">
+<meta charset="ISO-8859-1">
+#encoding
+iso-8859-2
+
+#data
+<!DOCTYPE HTML>
+<!--<meta charset="ISO-8859-1">-->
+<meta charset="iso8859-2">
+#encoding
+iso-8859-2
+
+#data
+<!DOCTYPE HTML>
+<!--<meta charset="iso8859-2">-->
+<meta charset="ISO-8859-1">
+#encoding
+Windows-1252
+
+#data
+﻿<!DOCTYPE HTML>
+<!-- Starts with UTF-8 BOM -->
+#encoding
+UTF-8
+
+#data
+﻿<!DOCTYPE HTML>
+<meta charset="ISO-8859-1">
+<!-- Starts with UTF-8 BOM -->
+#encoding
+UTF-8
+
+#data
+<!-- 511 characters xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx-->
+<meta charset="iso8859-2">
+#encoding
+iso-8859-2
+
+#data
+<!-- 512 characters xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx-->
+<meta charset="iso8859-2">
+#encoding
+iso-8859-2
+
+#data
+<!-- 1024 characters xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx-->
+<meta charset="iso8859-2">
+#encoding
+iso-8859-2
+
+#data
+<!-- 1025 characters xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz-->
+<meta charset="iso8859-2">
+#encoding
+iso-8859-2
+
+#data
+<!-- 2048 characters xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx-->
+<meta charset="iso8859-2">
+#encoding
+iso-8859-2
+
+#data
+<!-- 2049 characters xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz-->
+<meta charset="iso8859-2">
+#encoding
+iso-8859-2
+
+#data
+<!-- 4096 characters xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx-->
+<meta charset="iso8859-2">
+#encoding
+iso-8859-2
+
+#data
+<!-- 4097 characters xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz-->
+<meta charset="iso8859-2">
+#encoding
+iso-8859-2
+
+#data
+<!-- 8192 characters xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx-->
+<meta charset="iso8859-2">
+#encoding
+iso-8859-2
+
+#data
+<!-- 8193 characters xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz-->
+<meta charset="iso8859-2">
+#encoding
+iso-8859-2
+
+#data
+<!-- multi-script test -->
+<script>alert('step 1 of 3 ("�")')</script>
+<!-- ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -->
+<script>alert('step 2 of 3 ("�")')</script>
+<meta charset="iso8859-2">
+<script>alert('step 3 of 3 ("�")')</script>
+#encoding
+iso-8859-2
+
+#data
+<!DOCTYPE HTML>
+<script>document.write('<meta charset="ISO-8859-' + '2">')</script>
+#encoding
+iso-8859-2
+
+#data
+<!DOCTYPE HTML>
+<script>document.write('<meta charset="iso8859-2">')</script>
+#encoding
+iso-8859-2
+
+#data
+<!DOCTYPE HTML>
+<script type="text/plain"><meta charset="iso8859-2"></script>
+#encoding
+iso-8859-2
+
+#data
+<!DOCTYPE HTML>
+<style type="text/plain"><meta charset="iso8859-2"></style>
+#encoding
+iso-8859-2
+
+#data
+<!DOCTYPE HTML>
+<p><meta charset="iso8859-2"></p>
+#encoding
+iso-8859-2
+
+#data
+<!DOCTYPE HTML>
+<meta charset="bogus">
+<meta charset="iso8859-2">
+#encoding
+iso-8859-2
diff --git a/lib/html5lib/tests/testdata/encoding/tests2.dat b/lib/html5lib/tests/testdata/encoding/tests2.dat
new file mode 100644
index 00000000..873bcdcd
--- /dev/null
+++ b/lib/html5lib/tests/testdata/encoding/tests2.dat
@@ -0,0 +1,115 @@
+#data
+<meta
+#encoding
+windows-1252
+
+#data
+<
+#encoding
+windows-1252
+
+#data
+<!
+#encoding
+windows-1252
+
+#data
+<meta charset = "
+#encoding
+windows-1252
+
+#data
+<meta charset=euc-jp
+#encoding
+windows-1252
+
+#data
+<meta <meta charset='euc-jp'>
+#encoding
+euc-jp
+
+#data
+<meta       charset    =     'euc-jp'>
+#encoding
+euc-jp
+
+#data
+<!-- -->
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+#encoding
+utf-8
+
+#data
+<!-- -->
+<meta http-equiv="Content-Type" content="text/html; charset=utf
+#encoding
+windows-1252
+
+#data
+<meta http-equiv="Content-Type<meta charset="utf-8">
+#encoding
+windows-1252
+
+#data
+<meta http-equiv="Content-Type" content="text/html; charset='utf-8'">
+#encoding
+utf-8
+
+#data
+<meta http-equiv="Content-Type" content="text/html; charset='utf-8">
+#encoding
+windows-1252
+
+#data
+<meta                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 
+#encoding
+windows-1252
+
+#data
+<meta charset                    =                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            
+#encoding
+windows-1252
+
+#data
+<meta charset=                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            utf-8
+>
+#encoding
+utf-8
+
+#data
+<meta content = "text/html;
+#encoding
+windows-1252
+
+#data
+<meta charset="UTF-16">
+#encoding
+utf-8
+
+#data
+<meta charset="UTF-16LE">
+#encoding
+utf-8
+
+#data
+<meta charset="UTF-16BE">
+#encoding
+utf-8
+
+#data
+<html a=ñ>
+<meta charset="utf-8">
+#encoding
+utf-8
+
+#data
+<html ñ>
+<meta charset="utf-8">
+#encoding
+utf-8
+
+#data
+<html>ñ
+<meta charset="utf-8">
+#encoding
+utf-8
diff --git a/lib/html5lib/tests/testdata/serializer/core.test b/lib/html5lib/tests/testdata/serializer/core.test
new file mode 100644
index 00000000..c0b4222d
--- /dev/null
+++ b/lib/html5lib/tests/testdata/serializer/core.test
@@ -0,0 +1,125 @@
+{"tests": [
+
+{"description": "proper attribute value escaping",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "test \"with\" &quot;"}]]],
+ "expected": ["<span title='test \"with\" &amp;quot;'>"]
+},
+
+{"description": "proper attribute value non-quoting",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo"}]]],
+ "expected": ["<span title=foo>"],
+ "xhtml":    ["<span title=\"foo\">"]
+},
+
+{"description": "proper attribute value non-quoting (with <)",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo<bar"}]]],
+ "expected": ["<span title=foo<bar>"],
+ "xhtml":    ["<span title=\"foo&lt;bar\">"]
+},
+
+{"description": "proper attribute value quoting (with =)",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo=bar"}]]],
+ "expected": ["<span title=\"foo=bar\">"]
+},
+
+{"description": "proper attribute value quoting (with >)",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo>bar"}]]],
+ "expected": ["<span title=\"foo>bar\">"]
+},
+
+{"description": "proper attribute value quoting (with \")",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\"bar"}]]],
+ "expected": ["<span title='foo\"bar'>"]
+},
+
+{"description": "proper attribute value quoting (with ')",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo'bar"}]]],
+ "expected": ["<span title=\"foo'bar\">"]
+},
+
+{"description": "proper attribute value quoting (with both \" and ')",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo'bar\"baz"}]]],
+ "expected": ["<span title=\"foo'bar&quot;baz\">"]
+},
+
+{"description": "proper attribute value quoting (with space)",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo bar"}]]],
+ "expected": ["<span title=\"foo bar\">"]
+},
+
+{"description": "proper attribute value quoting (with tab)",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\tbar"}]]],
+ "expected": ["<span title=\"foo\tbar\">"]
+},
+
+{"description": "proper attribute value quoting (with LF)",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\nbar"}]]],
+ "expected": ["<span title=\"foo\nbar\">"]
+},
+
+{"description": "proper attribute value quoting (with CR)",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\rbar"}]]],
+ "expected": ["<span title=\"foo\rbar\">"]
+},
+
+{"description": "proper attribute value non-quoting (with linetab)",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\u000Bbar"}]]],
+ "expected": ["<span title=foo\u000Bbar>"],
+ "xhtml": ["<span title=\"foo\u000Bbar\">"]
+},
+
+{"description": "proper attribute value quoting (with form feed)",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\u000Cbar"}]]],
+ "expected": ["<span title=\"foo\u000Cbar\">"]
+},
+
+{"description": "void element (as EmptyTag token)",
+ "input": [["EmptyTag", "img", {}]],
+ "expected": ["<img>"],
+ "xhtml":    ["<img />"]
+},
+
+{"description": "void element (as StartTag token)",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "img", {}]],
+ "expected": ["<img>"],
+ "xhtml":    ["<img />"]
+},
+
+{"description": "doctype in error",
+ "input": [["Doctype", "foo"]],
+ "expected": ["<!DOCTYPE foo>"]
+},
+
+{"description": "character data",
+ "options": {"encoding":"utf-8"},
+ "input": [["Characters", "a<b>c&d"]],
+ "expected": ["a&lt;b&gt;c&amp;d"]
+},
+
+{"description": "rcdata",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "script", {}], ["Characters", "a<b>c&d"]],
+ "expected": ["<script>a<b>c&d"],
+ "xhtml": ["<script>a&lt;b&gt;c&amp;d"]
+},
+
+{"description": "doctype",
+ "input": [["Doctype", "HTML"]],
+ "expected": ["<!DOCTYPE HTML>"]
+},
+
+{"description": "HTML 4.01 DOCTYPE",
+ "input": [["Doctype", "HTML",  "-//W3C//DTD HTML 4.01//EN", "http://www.w3.org/TR/html4/strict.dtd"]],
+ "expected": ["<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">"]
+},
+
+{"description": "HTML 4.01 DOCTYPE without system identifer",
+ "input": [["Doctype", "HTML",  "-//W3C//DTD HTML 4.01//EN"]],
+ "expected": ["<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\">"]
+},
+
+{"description": "IBM DOCTYPE without public identifer",
+ "input": [["Doctype", "html",  "", "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"]],
+ "expected": ["<!DOCTYPE html SYSTEM \"http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd\">"]
+}
+
+]}
diff --git a/lib/html5lib/tests/testdata/serializer/injectmeta.test b/lib/html5lib/tests/testdata/serializer/injectmeta.test
new file mode 100644
index 00000000..feaaa44f
--- /dev/null
+++ b/lib/html5lib/tests/testdata/serializer/injectmeta.test
@@ -0,0 +1,66 @@
+{"tests": [
+
+{"description": "no encoding",
+ "options": {"inject_meta_charset": true},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
+ "expected": [""],
+ "xhtml": ["<head></head>"]
+},
+
+{"description": "empytag head",
+ "options": {"inject_meta_charset": true, "encoding":"utf-8"},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
+ "expected": ["<meta charset=utf-8>"],
+ "xhtml":    ["<head><meta charset=\"utf-8\" /></head>"]
+},
+
+{"description": "head w/title",
+ "options": {"inject_meta_charset": true, "encoding":"utf-8"},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["StartTag", "http://www.w3.org/1999/xhtml","title",{}], ["Characters", "foo"],["EndTag", "http://www.w3.org/1999/xhtml", "title"], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
+ "expected": ["<meta charset=utf-8><title>foo</title>"],
+ "xhtml":    ["<head><meta charset=\"utf-8\" /><title>foo</title></head>"]
+},
+
+{"description": "head w/meta-charset",
+ "options": {"inject_meta_charset": true, "encoding":"utf-8"},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "charset", "value": "ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
+ "expected": ["<meta charset=utf-8>"],
+ "xhtml":    ["<head><meta charset=\"utf-8\" /></head>"]
+},
+
+{"description": "head w/ two meta-charset",
+ "options": {"inject_meta_charset": true, "encoding":"utf-8"},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "charset", "value": "ascii"}]], ["EmptyTag","meta",[{"namespace": null, "name": "charset", "value": "ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
+ "expected": ["<meta charset=utf-8><meta charset=utf-8>", "<head><meta charset=utf-8><meta charset=ascii>"],
+ "xhtml": ["<head><meta charset=\"utf-8\" /><meta charset=\"utf-8\" /></head>", "<head><meta charset=\"utf-8\" /><meta charset=\"ascii\" /></head>"]
+},
+
+{"description": "head w/robots",
+ "options": {"inject_meta_charset": true, "encoding":"utf-8"},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "name", "value": "robots"},{"namespace": null, "name": "content", "value": "noindex"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
+ "expected": ["<meta charset=utf-8><meta content=noindex name=robots>"],
+ "xhtml":    ["<head><meta charset=\"utf-8\" /><meta content=\"noindex\" name=\"robots\" /></head>"]
+},
+
+{"description": "head w/robots & charset",
+ "options": {"inject_meta_charset": true, "encoding":"utf-8"},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "name", "value": "robots"},{"namespace": null, "name": "content", "value": "noindex"}]], ["EmptyTag","meta",[{"namespace": null, "name": "charset", "value": "ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
+ "expected": ["<meta content=noindex name=robots><meta charset=utf-8>"],
+ "xhtml":    ["<head><meta content=\"noindex\" name=\"robots\" /><meta charset=\"utf-8\" /></head>"]
+},
+
+{"description": "head w/ charset in http-equiv content-type",
+ "options": {"inject_meta_charset": true, "encoding":"utf-8"},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "http-equiv", "value": "content-type"}, {"namespace": null, "name": "content", "value": "text/html; charset=ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
+ "expected": ["<meta content=\"text/html; charset=utf-8\" http-equiv=content-type>"],
+ "xhtml":    ["<head><meta content=\"text/html; charset=utf-8\" http-equiv=\"content-type\" /></head>"]
+},
+
+{"description": "head w/robots & charset in http-equiv content-type",
+ "options": {"inject_meta_charset": true, "encoding":"utf-8"},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "name", "value": "robots"},{"namespace": null, "name": "content", "value": "noindex"}]], ["EmptyTag","meta",[{"namespace": null, "name": "http-equiv", "value": "content-type"}, {"namespace": null, "name": "content", "value": "text/html; charset=ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
+ "expected": ["<meta content=noindex name=robots><meta content=\"text/html; charset=utf-8\" http-equiv=content-type>"],
+ "xhtml": ["<head><meta content=\"noindex\" name=\"robots\" /><meta content=\"text/html; charset=utf-8\" http-equiv=\"content-type\" /></head>"]
+}
+
+]}
diff --git a/lib/html5lib/tests/testdata/serializer/optionaltags.test b/lib/html5lib/tests/testdata/serializer/optionaltags.test
new file mode 100644
index 00000000..80a5edf8
--- /dev/null
+++ b/lib/html5lib/tests/testdata/serializer/optionaltags.test
@@ -0,0 +1,965 @@
+{"tests": [
+
+{"description": "html start-tag followed by text, with attributes",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", [{"namespace": null, "name": "lang", "value": "en"}]], ["Characters", "foo"]],
+ "expected": ["<html lang=en>foo"]
+},
+
+
+
+{"description": "html start-tag followed by comment",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}], ["Comment", "foo"]],
+ "expected": ["<html><!--foo-->"]
+},
+
+{"description": "html start-tag followed by space character",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}], ["Characters", " foo"]],
+ "expected": ["<html> foo"]
+},
+
+{"description": "html start-tag followed by text",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}], ["Characters", "foo"]],
+ "expected": ["foo"]
+},
+
+{"description": "html start-tag followed by start-tag",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
+ "expected": ["<foo>"]
+},
+
+{"description": "html start-tag followed by end-tag",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
+ "expected": ["</foo>"]
+},
+
+{"description": "html start-tag at EOF (shouldn't ever happen?!)",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}]],
+ "expected": [""]
+},
+
+
+
+{"description": "html end-tag followed by comment",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"], ["Comment", "foo"]],
+ "expected": ["</html><!--foo-->"]
+},
+
+{"description": "html end-tag followed by space character",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"], ["Characters", " foo"]],
+ "expected": ["</html> foo"]
+},
+
+{"description": "html end-tag followed by text",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"], ["Characters", "foo"]],
+ "expected": ["foo"]
+},
+
+{"description": "html end-tag followed by start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
+ "expected": ["<foo>"]
+},
+
+{"description": "html end-tag followed by end-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
+ "expected": ["</foo>"]
+},
+
+{"description": "html end-tag at EOF",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"]],
+ "expected": [""]
+},
+
+
+
+
+{"description": "head start-tag followed by comment",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["Comment", "foo"]],
+ "expected": ["<head><!--foo-->"]
+},
+
+{"description": "head start-tag followed by space character",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["Characters", " foo"]],
+ "expected": ["<head> foo"]
+},
+
+{"description": "head start-tag followed by text",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["Characters", "foo"]],
+ "expected": ["<head>foo"]
+},
+
+{"description": "head start-tag followed by start-tag",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
+ "expected": ["<foo>"]
+},
+
+{"description": "head start-tag followed by end-tag (shouldn't ever happen?!)",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
+ "expected": ["<head></foo>", "</foo>"]
+},
+
+{"description": "empty head element",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
+ "expected": [""]
+},
+
+{"description": "head start-tag followed by empty-tag",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag", "foo", {}]],
+ "expected": ["<foo>"]
+},
+
+{"description": "head start-tag at EOF (shouldn't ever happen?!)",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}]],
+ "expected": ["<head>", ""]
+},
+
+
+
+{"description": "head end-tag followed by comment",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"], ["Comment", "foo"]],
+ "expected": ["</head><!--foo-->"]
+},
+
+{"description": "head end-tag followed by space character",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"], ["Characters", " foo"]],
+ "expected": ["</head> foo"]
+},
+
+{"description": "head end-tag followed by text",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"], ["Characters", "foo"]],
+ "expected": ["foo"]
+},
+
+{"description": "head end-tag followed by start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
+ "expected": ["<foo>"]
+},
+
+{"description": "head end-tag followed by end-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
+ "expected": ["</foo>"]
+},
+
+{"description": "head end-tag at EOF",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
+ "expected": [""]
+},
+
+
+
+
+{"description": "body start-tag followed by comment",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}], ["Comment", "foo"]],
+ "expected": ["<body><!--foo-->"]
+},
+
+{"description": "body start-tag followed by space character",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}], ["Characters", " foo"]],
+ "expected": ["<body> foo"]
+},
+
+{"description": "body start-tag followed by text",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}], ["Characters", "foo"]],
+ "expected": ["foo"]
+},
+
+{"description": "body start-tag followed by start-tag",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
+ "expected": ["<foo>"]
+},
+
+{"description": "body start-tag followed by end-tag",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
+ "expected": ["</foo>"]
+},
+
+{"description": "body start-tag at EOF (shouldn't ever happen?!)",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}]],
+ "expected": [""]
+},
+
+
+
+{"description": "body end-tag followed by comment",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"], ["Comment", "foo"]],
+ "expected": ["</body><!--foo-->"]
+},
+
+{"description": "body end-tag followed by space character",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"], ["Characters", " foo"]],
+ "expected": ["</body> foo"]
+},
+
+{"description": "body end-tag followed by text",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"], ["Characters", "foo"]],
+ "expected": ["foo"]
+},
+
+{"description": "body end-tag followed by start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
+ "expected": ["<foo>"]
+},
+
+{"description": "body end-tag followed by end-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
+ "expected": ["</foo>"]
+},
+
+{"description": "body end-tag at EOF",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"]],
+ "expected": [""]
+},
+
+
+
+
+{"description": "li end-tag followed by comment",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["Comment", "foo"]],
+ "expected": ["</li><!--foo-->"]
+},
+
+{"description": "li end-tag followed by space character",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["Characters", " foo"]],
+ "expected": ["</li> foo"]
+},
+
+{"description": "li end-tag followed by text",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["Characters", "foo"]],
+ "expected": ["</li>foo"]
+},
+
+{"description": "li end-tag followed by start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
+ "expected": ["</li><foo>"]
+},
+
+{"description": "li end-tag followed by li start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["StartTag", "http://www.w3.org/1999/xhtml", "li", {}]],
+ "expected": ["<li>"]
+},
+
+{"description": "li end-tag followed by end-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
+ "expected": ["</foo>"]
+},
+
+{"description": "li end-tag at EOF",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"]],
+ "expected": [""]
+},
+
+
+
+
+{"description": "dt end-tag followed by comment",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["Comment", "foo"]],
+ "expected": ["</dt><!--foo-->"]
+},
+
+{"description": "dt end-tag followed by space character",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["Characters", " foo"]],
+ "expected": ["</dt> foo"]
+},
+
+{"description": "dt end-tag followed by text",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["Characters", "foo"]],
+ "expected": ["</dt>foo"]
+},
+
+{"description": "dt end-tag followed by start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
+ "expected": ["</dt><foo>"]
+},
+
+{"description": "dt end-tag followed by dt start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["StartTag", "http://www.w3.org/1999/xhtml", "dt", {}]],
+ "expected": ["<dt>"]
+},
+
+{"description": "dt end-tag followed by dd start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["StartTag", "http://www.w3.org/1999/xhtml", "dd", {}]],
+ "expected": ["<dd>"]
+},
+
+{"description": "dt end-tag followed by end-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
+ "expected": ["</dt></foo>"]
+},
+
+{"description": "dt end-tag at EOF",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"]],
+ "expected": ["</dt>"]
+},
+
+
+
+
+{"description": "dd end-tag followed by comment",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["Comment", "foo"]],
+ "expected": ["</dd><!--foo-->"]
+},
+
+{"description": "dd end-tag followed by space character",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["Characters", " foo"]],
+ "expected": ["</dd> foo"]
+},
+
+{"description": "dd end-tag followed by text",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["Characters", "foo"]],
+ "expected": ["</dd>foo"]
+},
+
+{"description": "dd end-tag followed by start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
+ "expected": ["</dd><foo>"]
+},
+
+{"description": "dd end-tag followed by dd start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["StartTag", "http://www.w3.org/1999/xhtml", "dd", {}]],
+ "expected": ["<dd>"]
+},
+
+{"description": "dd end-tag followed by dt start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["StartTag", "http://www.w3.org/1999/xhtml", "dt", {}]],
+ "expected": ["<dt>"]
+},
+
+{"description": "dd end-tag followed by end-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
+ "expected": ["</foo>"]
+},
+
+{"description": "dd end-tag at EOF",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"]],
+ "expected": [""]
+},
+
+
+
+
+{"description": "p end-tag followed by comment",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["Comment", "foo"]],
+ "expected": ["</p><!--foo-->"]
+},
+
+{"description": "p end-tag followed by space character",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["Characters", " foo"]],
+ "expected": ["</p> foo"]
+},
+
+{"description": "p end-tag followed by text",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["Characters", "foo"]],
+ "expected": ["</p>foo"]
+},
+
+{"description": "p end-tag followed by start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
+ "expected": ["</p><foo>"]
+},
+
+{"description": "p end-tag followed by address start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "address", {}]],
+ "expected": ["<address>"]
+},
+
+{"description": "p end-tag followed by article start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "article", {}]],
+ "expected": ["<article>"]
+},
+
+{"description": "p end-tag followed by aside start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "aside", {}]],
+ "expected": ["<aside>"]
+},
+
+{"description": "p end-tag followed by blockquote start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "blockquote", {}]],
+ "expected": ["<blockquote>"]
+},
+
+{"description": "p end-tag followed by datagrid start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "datagrid", {}]],
+ "expected": ["<datagrid>"]
+},
+
+{"description": "p end-tag followed by dialog start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "dialog", {}]],
+ "expected": ["<dialog>"]
+},
+
+{"description": "p end-tag followed by dir start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "dir", {}]],
+ "expected": ["<dir>"]
+},
+
+{"description": "p end-tag followed by div start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "div", {}]],
+ "expected": ["<div>"]
+},
+
+{"description": "p end-tag followed by dl start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "dl", {}]],
+ "expected": ["<dl>"]
+},
+
+{"description": "p end-tag followed by fieldset start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "fieldset", {}]],
+ "expected": ["<fieldset>"]
+},
+
+{"description": "p end-tag followed by footer start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "footer", {}]],
+ "expected": ["<footer>"]
+},
+
+{"description": "p end-tag followed by form start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "form", {}]],
+ "expected": ["<form>"]
+},
+
+{"description": "p end-tag followed by h1 start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h1", {}]],
+ "expected": ["<h1>"]
+},
+
+{"description": "p end-tag followed by h2 start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h2", {}]],
+ "expected": ["<h2>"]
+},
+
+{"description": "p end-tag followed by h3 start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h3", {}]],
+ "expected": ["<h3>"]
+},
+
+{"description": "p end-tag followed by h4 start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h4", {}]],
+ "expected": ["<h4>"]
+},
+
+{"description": "p end-tag followed by h5 start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h5", {}]],
+ "expected": ["<h5>"]
+},
+
+{"description": "p end-tag followed by h6 start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h6", {}]],
+ "expected": ["<h6>"]
+},
+
+{"description": "p end-tag followed by header start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "header", {}]],
+ "expected": ["<header>"]
+},
+
+{"description": "p end-tag followed by hr empty-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["EmptyTag", "hr", {}]],
+ "expected": ["<hr>"]
+},
+
+{"description": "p end-tag followed by menu start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "menu", {}]],
+ "expected": ["<menu>"]
+},
+
+{"description": "p end-tag followed by nav start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "nav", {}]],
+ "expected": ["<nav>"]
+},
+
+{"description": "p end-tag followed by ol start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "ol", {}]],
+ "expected": ["<ol>"]
+},
+
+{"description": "p end-tag followed by p start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "p", {}]],
+ "expected": ["<p>"]
+},
+
+{"description": "p end-tag followed by pre start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "pre", {}]],
+ "expected": ["<pre>"]
+},
+
+{"description": "p end-tag followed by section start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "section", {}]],
+ "expected": ["<section>"]
+},
+
+{"description": "p end-tag followed by table start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "table", {}]],
+ "expected": ["<table>"]
+},
+
+{"description": "p end-tag followed by ul start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "ul", {}]],
+ "expected": ["<ul>"]
+},
+
+{"description": "p end-tag followed by end-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
+ "expected": ["</foo>"]
+},
+
+{"description": "p end-tag at EOF",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"]],
+ "expected": [""]
+},
+
+
+
+
+{"description": "optgroup end-tag followed by comment",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["Comment", "foo"]],
+ "expected": ["</optgroup><!--foo-->"]
+},
+
+{"description": "optgroup end-tag followed by space character",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["Characters", " foo"]],
+ "expected": ["</optgroup> foo"]
+},
+
+{"description": "optgroup end-tag followed by text",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["Characters", "foo"]],
+ "expected": ["</optgroup>foo"]
+},
+
+{"description": "optgroup end-tag followed by start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
+ "expected": ["</optgroup><foo>"]
+},
+
+{"description": "optgroup end-tag followed by optgroup start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["StartTag", "http://www.w3.org/1999/xhtml", "optgroup", {}]],
+ "expected": ["<optgroup>"]
+},
+
+{"description": "optgroup end-tag followed by end-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
+ "expected": ["</foo>"]
+},
+
+{"description": "optgroup end-tag at EOF",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"]],
+ "expected": [""]
+},
+
+
+
+
+{"description": "option end-tag followed by comment",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["Comment", "foo"]],
+ "expected": ["</option><!--foo-->"]
+},
+
+{"description": "option end-tag followed by space character",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["Characters", " foo"]],
+ "expected": ["</option> foo"]
+},
+
+{"description": "option end-tag followed by text",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["Characters", "foo"]],
+ "expected": ["</option>foo"]
+},
+
+{"description": "option end-tag followed by optgroup start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["StartTag", "http://www.w3.org/1999/xhtml", "optgroup", {}]],
+ "expected": ["<optgroup>"]
+},
+
+{"description": "option end-tag followed by start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
+ "expected": ["</option><foo>"]
+},
+
+{"description": "option end-tag followed by option start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["StartTag", "http://www.w3.org/1999/xhtml", "option", {}]],
+ "expected": ["<option>"]
+},
+
+{"description": "option end-tag followed by end-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
+ "expected": ["</foo>"]
+},
+
+{"description": "option end-tag at EOF",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"]],
+ "expected": [""]
+},
+
+
+
+
+{"description": "colgroup start-tag followed by comment",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["Comment", "foo"]],
+ "expected": ["<colgroup><!--foo-->"]
+},
+
+{"description": "colgroup start-tag followed by space character",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["Characters", " foo"]],
+ "expected": ["<colgroup> foo"]
+},
+
+{"description": "colgroup start-tag followed by text",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["Characters", "foo"]],
+ "expected": ["<colgroup>foo"]
+},
+
+{"description": "colgroup start-tag followed by start-tag",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
+ "expected": ["<colgroup><foo>"]
+},
+
+{"description": "first colgroup in a table with a col child",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "table", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["EmptyTag", "col", {}]],
+ "expected": ["<table><col>"]
+},
+
+{"description": "colgroup with a col child, following another colgroup",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "col", {}]],
+ "expected": ["</colgroup><col>", "<colgroup><col>"]
+},
+
+{"description": "colgroup start-tag followed by end-tag",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
+ "expected": ["<colgroup></foo>"]
+},
+
+{"description": "colgroup start-tag at EOF",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}]],
+ "expected": ["<colgroup>"]
+},
+
+
+
+{"description": "colgroup end-tag followed by comment",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["Comment", "foo"]],
+ "expected": ["</colgroup><!--foo-->"]
+},
+
+{"description": "colgroup end-tag followed by space character",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["Characters", " foo"]],
+ "expected": ["</colgroup> foo"]
+},
+
+{"description": "colgroup end-tag followed by text",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["Characters", "foo"]],
+ "expected": ["foo"]
+},
+
+{"description": "colgroup end-tag followed by start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
+ "expected": ["<foo>"]
+},
+
+{"description": "colgroup end-tag followed by end-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
+ "expected": ["</foo>"]
+},
+
+{"description": "colgroup end-tag at EOF",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"]],
+ "expected": [""]
+},
+
+
+
+
+{"description": "thead end-tag followed by comment",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["Comment", "foo"]],
+ "expected": ["</thead><!--foo-->"]
+},
+
+{"description": "thead end-tag followed by space character",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["Characters", " foo"]],
+ "expected": ["</thead> foo"]
+},
+
+{"description": "thead end-tag followed by text",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["Characters", "foo"]],
+ "expected": ["</thead>foo"]
+},
+
+{"description": "thead end-tag followed by start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
+ "expected": ["</thead><foo>"]
+},
+
+{"description": "thead end-tag followed by tbody start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}]],
+ "expected": ["<tbody>"]
+},
+
+{"description": "thead end-tag followed by tfoot start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["StartTag", "http://www.w3.org/1999/xhtml", "tfoot", {}]],
+ "expected": ["<tfoot>"]
+},
+
+{"description": "thead end-tag followed by end-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
+ "expected": ["</thead></foo>"]
+},
+
+{"description": "thead end-tag at EOF",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"]],
+ "expected": ["</thead>"]
+},
+
+
+
+
+{"description": "tbody start-tag followed by comment",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["Comment", "foo"]],
+ "expected": ["<tbody><!--foo-->"]
+},
+
+{"description": "tbody start-tag followed by space character",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["Characters", " foo"]],
+ "expected": ["<tbody> foo"]
+},
+
+{"description": "tbody start-tag followed by text",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["Characters", "foo"]],
+ "expected": ["<tbody>foo"]
+},
+
+{"description": "tbody start-tag followed by start-tag",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
+ "expected": ["<tbody><foo>"]
+},
+
+{"description": "first tbody in a table with a tr child",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "table", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "tr", {}]],
+ "expected": ["<table><tr>"]
+},
+
+{"description": "tbody with a tr child, following another tbody",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "tr", {}]],
+ "expected": ["<tbody><tr>", "</tbody><tr>"]
+},
+
+{"description": "tbody with a tr child, following a thead",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "tr", {}]],
+ "expected": ["<tbody><tr>", "</thead><tr>"]
+},
+
+{"description": "tbody with a tr child, following a tfoot",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "tr", {}]],
+ "expected": ["<tbody><tr>", "</tfoot><tr>"]
+},
+
+{"description": "tbody start-tag followed by end-tag",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
+ "expected": ["<tbody></foo>"]
+},
+
+{"description": "tbody start-tag at EOF",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}]],
+ "expected": ["<tbody>"]
+},
+
+
+
+{"description": "tbody end-tag followed by comment",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["Comment", "foo"]],
+ "expected": ["</tbody><!--foo-->"]
+},
+
+{"description": "tbody end-tag followed by space character",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["Characters", " foo"]],
+ "expected": ["</tbody> foo"]
+},
+
+{"description": "tbody end-tag followed by text",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["Characters", "foo"]],
+ "expected": ["</tbody>foo"]
+},
+
+{"description": "tbody end-tag followed by start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
+ "expected": ["</tbody><foo>"]
+},
+
+{"description": "tbody end-tag followed by tbody start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}]],
+ "expected": ["<tbody>", "</tbody>"]
+},
+
+{"description": "tbody end-tag followed by tfoot start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["StartTag", "http://www.w3.org/1999/xhtml", "tfoot", {}]],
+ "expected": ["<tfoot>"]
+},
+
+{"description": "tbody end-tag followed by end-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
+ "expected": ["</foo>"]
+},
+
+{"description": "tbody end-tag at EOF",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"]],
+ "expected": [""]
+},
+
+
+
+
+{"description": "tfoot end-tag followed by comment",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["Comment", "foo"]],
+ "expected": ["</tfoot><!--foo-->"]
+},
+
+{"description": "tfoot end-tag followed by space character",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["Characters", " foo"]],
+ "expected": ["</tfoot> foo"]
+},
+
+{"description": "tfoot end-tag followed by text",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["Characters", "foo"]],
+ "expected": ["</tfoot>foo"]
+},
+
+{"description": "tfoot end-tag followed by start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
+ "expected": ["</tfoot><foo>"]
+},
+
+{"description": "tfoot end-tag followed by tbody start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}]],
+ "expected": ["<tbody>", "</tfoot>"]
+},
+
+{"description": "tfoot end-tag followed by end-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
+ "expected": ["</foo>"]
+},
+
+{"description": "tfoot end-tag at EOF",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"]],
+ "expected": [""]
+},
+
+
+
+
+{"description": "tr end-tag followed by comment",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["Comment", "foo"]],
+ "expected": ["</tr><!--foo-->"]
+},
+
+{"description": "tr end-tag followed by space character",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["Characters", " foo"]],
+ "expected": ["</tr> foo"]
+},
+
+{"description": "tr end-tag followed by text",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["Characters", "foo"]],
+ "expected": ["</tr>foo"]
+},
+
+{"description": "tr end-tag followed by start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
+ "expected": ["</tr><foo>"]
+},
+
+{"description": "tr end-tag followed by tr start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["StartTag", "http://www.w3.org/1999/xhtml", "tr", {}]],
+ "expected": ["<tr>", "</tr>"]
+},
+
+{"description": "tr end-tag followed by end-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
+ "expected": ["</foo>"]
+},
+
+{"description": "tr end-tag at EOF",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"]],
+ "expected": [""]
+},
+
+
+
+
+{"description": "td end-tag followed by comment",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["Comment", "foo"]],
+ "expected": ["</td><!--foo-->"]
+},
+
+{"description": "td end-tag followed by space character",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["Characters", " foo"]],
+ "expected": ["</td> foo"]
+},
+
+{"description": "td end-tag followed by text",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["Characters", "foo"]],
+ "expected": ["</td>foo"]
+},
+
+{"description": "td end-tag followed by start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
+ "expected": ["</td><foo>"]
+},
+
+{"description": "td end-tag followed by td start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["StartTag", "http://www.w3.org/1999/xhtml", "td", {}]],
+ "expected": ["<td>", "</td>"]
+},
+
+{"description": "td end-tag followed by th start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["StartTag", "http://www.w3.org/1999/xhtml", "th", {}]],
+ "expected": ["<th>", "</td>"]
+},
+
+{"description": "td end-tag followed by end-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
+ "expected": ["</foo>"]
+},
+
+{"description": "td end-tag at EOF",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"]],
+ "expected": [""]
+},
+
+
+
+
+{"description": "th end-tag followed by comment",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["Comment", "foo"]],
+ "expected": ["</th><!--foo-->"]
+},
+
+{"description": "th end-tag followed by space character",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["Characters", " foo"]],
+ "expected": ["</th> foo"]
+},
+
+{"description": "th end-tag followed by text",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["Characters", "foo"]],
+ "expected": ["</th>foo"]
+},
+
+{"description": "th end-tag followed by start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
+ "expected": ["</th><foo>"]
+},
+
+{"description": "th end-tag followed by th start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["StartTag", "http://www.w3.org/1999/xhtml", "th", {}]],
+ "expected": ["<th>", "</th>"]
+},
+
+{"description": "th end-tag followed by td start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["StartTag", "http://www.w3.org/1999/xhtml", "td", {}]],
+ "expected": ["<td>", "</th>"]
+},
+
+{"description": "th end-tag followed by end-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
+ "expected": ["</foo>"]
+},
+
+{"description": "th end-tag at EOF",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml"    , "th"]],
+ "expected": [""]
+}
+
+]}
diff --git a/lib/html5lib/tests/testdata/serializer/options.test b/lib/html5lib/tests/testdata/serializer/options.test
new file mode 100644
index 00000000..6f342dd3
--- /dev/null
+++ b/lib/html5lib/tests/testdata/serializer/options.test
@@ -0,0 +1,60 @@
+{"tests":[
+
+{"description": "quote_char=\"'\"",
+ "options": {"quote_char": "'"},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "test 'with' quote_char"}]]],
+ "expected": ["<span title='test &#39;with&#39; quote_char'>"]
+},
+
+{"description": "quote_attr_values=true",
+ "options": {"quote_attr_values": true},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "button", [{"namespace": null, "name": "disabled", "value" :"disabled"}]]],
+ "expected": ["<button disabled>"],
+ "xhtml":    ["<button disabled=\"disabled\">"]
+},
+
+{"description": "quote_attr_values=true with irrelevant",
+ "options": {"quote_attr_values": true},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", [{"namespace": null, "name": "irrelevant", "value" :"irrelevant"}]]],
+ "expected": ["<div irrelevant>"],
+ "xhtml":    ["<div irrelevant=\"irrelevant\">"]
+},
+
+{"description": "use_trailing_solidus=true with void element",
+ "options": {"use_trailing_solidus": true},
+ "input": [["EmptyTag", "img", {}]],
+ "expected": ["<img />"]
+},
+
+{"description": "use_trailing_solidus=true with non-void element",
+ "options": {"use_trailing_solidus": true},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", {}]],
+ "expected": ["<div>"]
+},
+
+{"description": "minimize_boolean_attributes=false",
+ "options": {"minimize_boolean_attributes": false},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", [{"namespace": null, "name": "irrelevant", "value" :"irrelevant"}]]],
+ "expected": ["<div irrelevant=irrelevant>"],
+ "xhtml":    ["<div irrelevant=\"irrelevant\">"]
+},
+
+{"description": "minimize_boolean_attributes=false with empty value",
+ "options": {"minimize_boolean_attributes": false},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", [{"namespace": null, "name": "irrelevant", "value" :""}]]],
+ "expected": ["<div irrelevant=\"\">"]
+},
+
+{"description": "escape less than signs in attribute values",
+ "options": {"escape_lt_in_attrs": true},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "a", [{"namespace": null, "name": "title", "value": "a<b>c&d"}]]],
+ "expected": ["<a title=\"a&lt;b>c&amp;d\">"]
+},
+
+{"description": "rcdata",
+ "options": {"escape_rcdata": true},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "script", {}], ["Characters", "a<b>c&d"]],
+ "expected": ["<script>a&lt;b&gt;c&amp;d"]
+}
+
+]}
diff --git a/lib/html5lib/tests/testdata/serializer/whitespace.test b/lib/html5lib/tests/testdata/serializer/whitespace.test
new file mode 100644
index 00000000..e5d050d3
--- /dev/null
+++ b/lib/html5lib/tests/testdata/serializer/whitespace.test
@@ -0,0 +1,51 @@
+{"tests": [
+
+{"description": "bare text with leading spaces",
+ "options": {"strip_whitespace": true},
+ "input": [["Characters", "\t\r\n\u000C foo"]],
+ "expected": [" foo"]
+},
+
+{"description": "bare text with trailing spaces",
+ "options": {"strip_whitespace": true},
+ "input": [["Characters", "foo \t\r\n\u000C"]],
+ "expected": ["foo "]
+},
+
+{"description": "bare text with inner spaces",
+ "options": {"strip_whitespace": true},
+ "input": [["Characters", "foo \t\r\n\u000C bar"]],
+ "expected": ["foo bar"]
+},
+
+{"description": "text within <pre>",
+ "options": {"strip_whitespace": true},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "pre", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "pre"]],
+ "expected": ["<pre>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</pre>"]
+},
+
+{"description": "text within <pre>, with inner markup",
+ "options": {"strip_whitespace": true},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "pre", {}], ["Characters", "\t\r\n\u000C fo"], ["StartTag", "http://www.w3.org/1999/xhtml", "span", {}], ["Characters", "o \t\r\n\u000C b"], ["EndTag", "http://www.w3.org/1999/xhtml", "span"], ["Characters", "ar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "pre"]],
+ "expected": ["<pre>\t\r\n\u000C fo<span>o \t\r\n\u000C b</span>ar \t\r\n\u000C</pre>"]
+},
+
+{"description": "text within <textarea>",
+ "options": {"strip_whitespace": true},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "textarea", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "textarea"]],
+ "expected": ["<textarea>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</textarea>"]
+},
+
+{"description": "text within <script>",
+ "options": {"strip_whitespace": true},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "script", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "script"]],
+ "expected": ["<script>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</script>"]
+},
+
+{"description": "text within <style>",
+ "options": {"strip_whitespace": true},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "style"]],
+ "expected": ["<style>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</style>"]
+}
+
+]}
\ No newline at end of file
diff --git a/lib/html5lib/tests/testdata/tokenizer/README.md b/lib/html5lib/tests/testdata/tokenizer/README.md
new file mode 100644
index 00000000..66b81e8f
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tokenizer/README.md
@@ -0,0 +1,107 @@
+Tokenizer tests
+===============
+
+The test format is [JSON](http://www.json.org/). This has the advantage
+that the syntax allows backward-compatible extensions to the tests and
+the disadvantage that it is relatively verbose.
+
+Basic Structure
+---------------
+
+    {"tests": [
+        {"description": "Test description",
+        "input": "input_string",
+        "output": [expected_output_tokens],
+        "initialStates": [initial_states],
+        "lastStartTag": last_start_tag,
+        "errors": [parse_errors]
+        }
+    ]}
+
+Multiple tests per file are allowed simply by adding more objects to the
+"tests" list.
+
+Each parse error is an object that contains error `code` and one-based
+error location indices: `line` and `col`.
+
+`description`, `input` and `output` are always present. The other values
+are optional.
+
+### Test set-up
+
+`test.input` is a string containing the characters to pass to the
+tokenizer. Specifically, it represents the characters of the **input
+stream**, and so implementations are expected to perform the processing
+described in the spec's **Preprocessing the input stream** section
+before feeding the result to the tokenizer.
+
+If `test.doubleEscaped` is present and `true`, then `test.input` is not
+quite as described above. Instead, it must first be subjected to another
+round of unescaping (i.e., in addition to any unescaping involved in the
+JSON import), and the result of *that* represents the characters of the
+input stream. Currently, the only unescaping required by this option is
+to convert each sequence of the form \\uHHHH (where H is a hex digit)
+into the corresponding Unicode code point. (Note that this option also
+affects the interpretation of `test.output`.)
+
+`test.initialStates` is a list of strings, each being the name of a
+tokenizer state which can be one of the following:
+
+-   `Data state`
+-   `PLAINTEXT state`
+-   `RCDATA state`
+-   `RAWTEXT state`
+-   `Script data state`
+-   `CDATA section state`
+
+ The test should be run once for each string, using it
+to set the tokenizer's initial state for that run. If
+`test.initialStates` is omitted, it defaults to `["Data state"]`.
+
+`test.lastStartTag` is a lowercase string that should be used as "the
+tag name of the last start tag to have been emitted from this
+tokenizer", referenced in the spec's definition of **appropriate end tag
+token**. If it is omitted, it is treated as if "no start tag has been
+emitted from this tokenizer".
+
+### Test results
+
+`test.output` is a list of tokens, ordered with the first produced by
+the tokenizer the first (leftmost) in the list. The list must mach the
+**complete** list of tokens that the tokenizer should produce. Valid
+tokens are:
+
+    ["DOCTYPE", name, public_id, system_id, correctness]
+    ["StartTag", name, {attributes}*, true*]
+    ["StartTag", name, {attributes}]
+    ["EndTag", name]
+    ["Comment", data]
+    ["Character", data]
+
+`public_id` and `system_id` are either strings or `null`. `correctness`
+is either `true` or `false`; `true` corresponds to the force-quirks flag
+being false, and vice-versa.
+
+When the self-closing flag is set, the `StartTag` array has `true` as
+its fourth entry. When the flag is not set, the array has only three
+entries for backwards compatibility.
+
+All adjacent character tokens are coalesced into a single
+`["Character", data]` token.
+
+If `test.doubleEscaped` is present and `true`, then every string within
+`test.output` must be further unescaped (as described above) before
+comparing with the tokenizer's output.
+
+xmlViolation tests
+------------------
+
+`tokenizer/xmlViolation.test` differs from the above in a couple of
+ways:
+
+-   The name of the single member of the top-level JSON object is
+    "xmlViolationTests" instead of "tests".
+-   Each test's expected output assumes that implementation is applying
+    the tweaks given in the spec's "Coercing an HTML DOM into an
+    infoset" section.
+
diff --git a/lib/html5lib/tests/testdata/tokenizer/contentModelFlags.test b/lib/html5lib/tests/testdata/tokenizer/contentModelFlags.test
new file mode 100644
index 00000000..e9cec845
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tokenizer/contentModelFlags.test
@@ -0,0 +1,93 @@
+{"tests": [
+
+{"description":"PLAINTEXT content model flag",
+"initialStates":["PLAINTEXT state"],
+"lastStartTag":"plaintext",
+"input":"<head>&body;",
+"output":[["Character", "<head>&body;"]]},
+
+{"description":"PLAINTEXT with seeming close tag",
+"initialStates":["PLAINTEXT state"],
+"lastStartTag":"plaintext",
+"input":"</plaintext>&body;",
+"output":[["Character", "</plaintext>&body;"]]},
+
+{"description":"End tag closing RCDATA or RAWTEXT",
+"initialStates":["RCDATA state", "RAWTEXT state"],
+"lastStartTag":"xmp",
+"input":"foo</xmp>",
+"output":[["Character", "foo"], ["EndTag", "xmp"]]},
+
+{"description":"End tag closing RCDATA or RAWTEXT (case-insensitivity)",
+"initialStates":["RCDATA state", "RAWTEXT state"],
+"lastStartTag":"xmp",
+"input":"foo</xMp>",
+"output":[["Character", "foo"], ["EndTag", "xmp"]]},
+
+{"description":"End tag closing RCDATA or RAWTEXT (ending with space)",
+"initialStates":["RCDATA state", "RAWTEXT state"],
+"lastStartTag":"xmp",
+"input":"foo</xmp ",
+"output":[["Character", "foo"]],
+"errors":[
+    { "code": "eof-in-tag", "line": 1, "col": 10 }
+]},
+
+{"description":"End tag closing RCDATA or RAWTEXT (ending with EOF)",
+"initialStates":["RCDATA state", "RAWTEXT state"],
+"lastStartTag":"xmp",
+"input":"foo</xmp",
+"output":[["Character", "foo</xmp"]]},
+
+{"description":"End tag closing RCDATA or RAWTEXT (ending with slash)",
+"initialStates":["RCDATA state", "RAWTEXT state"],
+"lastStartTag":"xmp",
+"input":"foo</xmp/",
+"output":[["Character", "foo"]],
+"errors":[
+    { "code": "eof-in-tag", "line": 1, "col": 10 }
+]},
+
+{"description":"End tag not closing RCDATA or RAWTEXT (ending with left-angle-bracket)",
+"initialStates":["RCDATA state", "RAWTEXT state"],
+"lastStartTag":"xmp",
+"input":"foo</xmp<",
+"output":[["Character", "foo</xmp<"]]},
+
+{"description":"End tag with incorrect name in RCDATA or RAWTEXT",
+"initialStates":["RCDATA state", "RAWTEXT state"],
+"lastStartTag":"xmp",
+"input":"</foo>bar</xmp>",
+"output":[["Character", "</foo>bar"], ["EndTag", "xmp"]]},
+
+{"description":"Partial end tags leading straight into partial end tags",
+"initialStates":["RCDATA state", "RAWTEXT state"],
+"lastStartTag":"xmp",
+"input":"</xmp</xmp</xmp>",
+"output":[["Character", "</xmp</xmp"], ["EndTag", "xmp"]]},
+
+{"description":"End tag with incorrect name in RCDATA or RAWTEXT (starting like correct name)",
+"initialStates":["RCDATA state", "RAWTEXT state"],
+"lastStartTag":"xmp",
+"input":"</foo>bar</xmpaar>",
+"output":[["Character", "</foo>bar</xmpaar>"]]},
+
+{"description":"End tag closing RCDATA or RAWTEXT, switching back to PCDATA",
+"initialStates":["RCDATA state", "RAWTEXT state"],
+"lastStartTag":"xmp",
+"input":"foo</xmp></baz>",
+"output":[["Character", "foo"], ["EndTag", "xmp"], ["EndTag", "baz"]]},
+
+{"description":"RAWTEXT w/ something looking like an entity",
+"initialStates":["RAWTEXT state"],
+"lastStartTag":"xmp",
+"input":"&foo;",
+"output":[["Character", "&foo;"]]},
+
+{"description":"RCDATA w/ an entity",
+"initialStates":["RCDATA state"],
+"lastStartTag":"textarea",
+"input":"&lt;",
+"output":[["Character", "<"]]}
+
+]}
diff --git a/lib/html5lib/tests/testdata/tokenizer/domjs.test b/lib/html5lib/tests/testdata/tokenizer/domjs.test
new file mode 100644
index 00000000..0117baf3
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tokenizer/domjs.test
@@ -0,0 +1,330 @@
+{
+    "tests": [
+        {
+            "description":"CR in bogus comment state",
+            "input":"<?\u000d",
+            "output":[["Comment", "?\u000a"]],
+            "errors":[
+                { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
+            ]
+        },
+        {
+            "description":"CRLF in bogus comment state",
+            "input":"<?\u000d\u000a",
+            "output":[["Comment", "?\u000a"]],
+            "errors":[
+                { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
+            ]
+        },
+        {
+            "description":"CRLFLF in bogus comment state",
+            "input":"<?\u000d\u000a\u000a",
+            "output":[["Comment", "?\u000a\u000a"]],
+            "errors":[
+                { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
+            ]
+        },
+        {
+            "description":"Raw NUL replacement",
+            "doubleEscaped":true,
+            "initialStates":["RCDATA state", "RAWTEXT state", "PLAINTEXT state", "Script data state"],
+            "input":"\\u0000",
+            "output":[["Character", "\\uFFFD"]],
+            "errors":[
+                { "code": "unexpected-null-character", "line": 1, "col": 1 }
+            ]
+        },
+        {
+            "description":"NUL in CDATA section",
+            "doubleEscaped":true,
+            "initialStates":["CDATA section state"],
+            "input":"\\u0000]]>",
+            "output":[["Character", "\\u0000"]]
+        },
+        {
+           "description":"NUL in script HTML comment",
+           "doubleEscaped":true,
+           "initialStates":["Script data state"],
+           "input":"<!--test\\u0000--><!--test-\\u0000--><!--test--\\u0000-->",
+           "output":[["Character", "<!--test\\uFFFD--><!--test-\\uFFFD--><!--test--\\uFFFD-->"]],
+           "errors":[
+               { "code": "unexpected-null-character", "line": 1, "col": 9 },
+               { "code": "unexpected-null-character", "line": 1, "col": 22 },
+               { "code": "unexpected-null-character", "line": 1, "col": 36 }
+           ]
+        },
+        {
+           "description":"NUL in script HTML comment - double escaped",
+           "doubleEscaped":true,
+           "initialStates":["Script data state"],
+           "input":"<!--<script>\\u0000--><!--<script>-\\u0000--><!--<script>--\\u0000-->",
+           "output":[["Character", "<!--<script>\\uFFFD--><!--<script>-\\uFFFD--><!--<script>--\\uFFFD-->"]],
+           "errors":[
+                { "code": "unexpected-null-character", "line": 1, "col": 13 },
+                { "code": "unexpected-null-character", "line": 1, "col": 30 },
+                { "code": "unexpected-null-character", "line": 1, "col": 48 }
+           ]
+        },
+        {
+           "description":"EOF in script HTML comment",
+           "initialStates":["Script data state"],
+           "input":"<!--test",
+           "output":[["Character", "<!--test"]],
+           "errors":[
+               { "code": "eof-in-script-html-comment-like-text", "line": 1, "col": 9 }
+           ]
+        },
+        {
+           "description":"EOF in script HTML comment after dash",
+           "initialStates":["Script data state"],
+           "input":"<!--test-",
+           "output":[["Character", "<!--test-"]],
+           "errors":[
+               { "code": "eof-in-script-html-comment-like-text", "line": 1, "col": 10 }
+           ]
+        },
+        {
+           "description":"EOF in script HTML comment after dash dash",
+           "initialStates":["Script data state"],
+           "input":"<!--test--",
+           "output":[["Character", "<!--test--"]],
+           "errors":[
+               { "code": "eof-in-script-html-comment-like-text", "line": 1, "col": 11 }
+           ]
+        },
+        {
+           "description":"EOF in script HTML comment double escaped after dash",
+           "initialStates":["Script data state"],
+           "input":"<!--<script>-",
+           "output":[["Character", "<!--<script>-"]],
+           "errors":[
+               { "code": "eof-in-script-html-comment-like-text", "line": 1, "col": 14 }
+           ]
+        },
+        {
+           "description":"EOF in script HTML comment double escaped after dash dash",
+           "initialStates":["Script data state"],
+           "input":"<!--<script>--",
+           "output":[["Character", "<!--<script>--"]],
+           "errors":[
+               { "code": "eof-in-script-html-comment-like-text", "line": 1, "col": 15 }
+           ]
+        },
+        {
+           "description":"EOF in script HTML comment - double escaped",
+           "initialStates":["Script data state"],
+           "input":"<!--<script>",
+           "output":[["Character", "<!--<script>"]],
+           "errors":[
+               { "code": "eof-in-script-html-comment-like-text", "line": 1, "col": 13 }
+           ]
+        },
+        {
+            "description":"Dash in script HTML comment",
+            "initialStates":["Script data state"],
+            "input":"<!-- - -->",
+            "output":[["Character", "<!-- - -->"]]
+        },
+        {
+            "description":"Dash less-than in script HTML comment",
+            "initialStates":["Script data state"],
+            "input":"<!-- -< -->",
+            "output":[["Character", "<!-- -< -->"]]
+        },
+        {
+            "description":"Dash at end of script HTML comment",
+            "initialStates":["Script data state"],
+            "input":"<!--test--->",
+            "output":[["Character", "<!--test--->"]]
+        },
+        {
+            "description":"</script> in script HTML comment",
+            "initialStates":["Script data state"],
+            "lastStartTag":"script",
+            "input":"<!-- </script> --></script>",
+            "output":[["Character", "<!-- "], ["EndTag", "script"], ["Character", " -->"], ["EndTag", "script"]]
+        },
+        {
+            "description":"</script> in script HTML comment - double escaped",
+            "initialStates":["Script data state"],
+            "lastStartTag":"script",
+            "input":"<!-- <script></script> --></script>",
+            "output":[["Character", "<!-- <script></script> -->"], ["EndTag", "script"]]
+        },
+        {
+            "description":"</script> in script HTML comment - double escaped with nested <script>",
+            "initialStates":["Script data state"],
+            "lastStartTag":"script",
+            "input":"<!-- <script><script></script></script> --></script>",
+            "output":[["Character", "<!-- <script><script></script>"], ["EndTag", "script"], ["Character", " -->"], ["EndTag", "script"]]
+        },
+        {
+            "description":"</script> in script HTML comment - double escaped with abrupt end",
+            "initialStates":["Script data state"],
+            "lastStartTag":"script",
+            "input":"<!-- <script>--></script> --></script>",
+            "output":[["Character", "<!-- <script>-->"], ["EndTag", "script"], ["Character", " -->"], ["EndTag", "script"]]
+        },
+        {
+            "description":"Incomplete start tag in script HTML comment double escaped",
+            "initialStates":["Script data state"],
+            "lastStartTag":"script",
+            "input":"<!--<scrip></script>-->",
+            "output":[["Character", "<!--<scrip>"], ["EndTag", "script"], ["Character", "-->"]]
+        },
+        {
+            "description":"Unclosed start tag in script HTML comment double escaped",
+            "initialStates":["Script data state"],
+            "lastStartTag":"script",
+            "input":"<!--<script</script>-->",
+            "output":[["Character", "<!--<script"], ["EndTag", "script"], ["Character", "-->"]]
+        },
+        {
+            "description":"Incomplete end tag in script HTML comment double escaped",
+            "initialStates":["Script data state"],
+            "lastStartTag":"script",
+            "input":"<!--<script></scrip>-->",
+            "output":[["Character", "<!--<script></scrip>-->"]]
+        },
+        {
+            "description":"Unclosed end tag in script HTML comment double escaped",
+            "initialStates":["Script data state"],
+            "lastStartTag":"script",
+            "input":"<!--<script></script-->",
+            "output":[["Character", "<!--<script></script-->"]]
+        },
+        {
+            "description":"leading U+FEFF must pass through",
+            "initialStates":["Data state", "RCDATA state", "RAWTEXT state", "Script data state"],
+            "doubleEscaped":true,
+            "input":"\\uFEFFfoo\\uFEFFbar",
+            "output":[["Character", "\\uFEFFfoo\\uFEFFbar"]]
+        },
+        {
+            "description":"Non BMP-charref in RCDATA",
+            "initialStates":["RCDATA state"],
+            "input":"&NotEqualTilde;",
+            "output":[["Character", "\u2242\u0338"]]
+        },
+        {
+            "description":"Bad charref in RCDATA",
+            "initialStates":["RCDATA state"],
+            "input":"&NotEqualTild;",
+            "output":[["Character", "&NotEqualTild;"]],
+            "errors":[
+               { "code": "unknown-named-character-reference", "line": 1, "col": 14 }
+            ]
+        },
+        {
+            "description":"lowercase endtags",
+            "initialStates":["RCDATA state", "RAWTEXT state", "Script data state"],
+            "lastStartTag":"xmp",
+            "input":"</XMP>",
+            "output":[["EndTag","xmp"]]
+        },
+        {
+            "description":"bad endtag (space before name)",
+            "initialStates":["RCDATA state", "RAWTEXT state", "Script data state"],
+            "lastStartTag":"xmp",
+            "input":"</ XMP>",
+            "output":[["Character","</ XMP>"]]
+        },
+        {
+            "description":"bad endtag (not matching last start tag)",
+            "initialStates":["RCDATA state", "RAWTEXT state", "Script data state"],
+            "lastStartTag":"xmp",
+            "input":"</xm>",
+            "output":[["Character","</xm>"]]
+        },
+        {
+            "description":"bad endtag (without close bracket)",
+            "initialStates":["RCDATA state", "RAWTEXT state", "Script data state"],
+            "lastStartTag":"xmp",
+            "input":"</xm ",
+            "output":[["Character","</xm "]]
+        },
+        {
+            "description":"bad endtag (trailing solidus)",
+            "initialStates":["RCDATA state", "RAWTEXT state", "Script data state"],
+            "lastStartTag":"xmp",
+            "input":"</xm/",
+            "output":[["Character","</xm/"]]
+        },
+        {
+            "description":"Non BMP-charref in attribute",
+            "input":"<p id=\"&NotEqualTilde;\">",
+            "output":[["StartTag", "p", {"id":"\u2242\u0338"}]]
+        },
+        {
+            "description":"--!NUL in comment ",
+            "doubleEscaped":true,
+            "input":"<!----!\\u0000-->",
+            "output":[["Comment", "--!\\uFFFD"]],
+            "errors":[
+                { "code": "unexpected-null-character", "line": 1, "col": 8 }
+            ]
+        },
+        {
+            "description":"space EOF after doctype ",
+            "input":"<!DOCTYPE html ",
+            "output":[["DOCTYPE", "html", null, null , false]],
+            "errors":[
+                { "code": "eof-in-doctype", "line": 1, "col": 16 }
+            ]
+        },
+        {
+            "description":"CDATA in HTML content",
+            "input":"<![CDATA[foo]]>",
+            "output":[["Comment", "[CDATA[foo]]"]],
+            "errors":[
+                { "code": "cdata-in-html-content", "line": 1, "col": 9 }
+            ]
+        },
+        {
+            "description":"CDATA content",
+            "input":"foo&#32;]]>",
+            "initialStates":["CDATA section state"],
+            "output":[["Character", "foo&#32;"]]
+        },
+        {
+            "description":"CDATA followed by HTML content",
+            "input":"foo&#32;]]>&#32;",
+            "initialStates":["CDATA section state"],
+            "output":[["Character", "foo&#32; "]]
+        },
+        {
+            "description":"CDATA with extra bracket",
+            "input":"foo]]]>",
+            "initialStates":["CDATA section state"],
+            "output":[["Character", "foo]"]]
+        },
+        {
+            "description":"CDATA without end marker",
+            "input":"foo",
+            "initialStates":["CDATA section state"],
+            "output":[["Character", "foo"]],
+            "errors":[
+                { "code": "eof-in-cdata", "line": 1, "col": 4 }
+            ]
+        },
+        {
+            "description":"CDATA with single bracket ending",
+            "input":"foo]",
+            "initialStates":["CDATA section state"],
+            "output":[["Character", "foo]"]],
+            "errors":[
+                { "code": "eof-in-cdata", "line": 1, "col": 5 }
+            ]
+        },
+        {
+            "description":"CDATA with two brackets ending",
+            "input":"foo]]",
+            "initialStates":["CDATA section state"],
+            "output":[["Character", "foo]]"]],
+            "errors":[
+                { "code": "eof-in-cdata", "line": 1, "col": 6 }
+            ]
+        }
+
+    ]
+}
diff --git a/lib/html5lib/tests/testdata/tokenizer/entities.test b/lib/html5lib/tests/testdata/tokenizer/entities.test
new file mode 100644
index 00000000..a6469cd0
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tokenizer/entities.test
@@ -0,0 +1,542 @@
+{"tests": [
+
+{"description": "Undefined named entity in a double-quoted attribute value ending in semicolon and whose name starts with a known entity name.",
+"input":"<h a=\"&noti;\">",
+"output": [["StartTag", "h", {"a": "&noti;"}]]},
+
+{"description": "Entity name requiring semicolon instead followed by the equals sign in a double-quoted attribute value.",
+"input":"<h a=\"&lang=\">",
+"output": [["StartTag", "h", {"a": "&lang="}]]},
+
+{"description": "Valid entity name followed by the equals sign in a double-quoted attribute value.",
+"input":"<h a=\"&not=\">",
+"output": [["StartTag", "h", {"a": "&not="}]]},
+
+{"description": "Undefined named entity in a single-quoted attribute value ending in semicolon and whose name starts with a known entity name.",
+"input":"<h a='&noti;'>",
+"output": [["StartTag", "h", {"a": "&noti;"}]]},
+
+{"description": "Entity name requiring semicolon instead followed by the equals sign in a single-quoted attribute value.",
+"input":"<h a='&lang='>",
+"output": [["StartTag", "h", {"a": "&lang="}]]},
+
+{"description": "Valid entity name followed by the equals sign in a single-quoted attribute value.",
+"input":"<h a='&not='>",
+"output": [["StartTag", "h", {"a": "&not="}]]},
+
+{"description": "Undefined named entity in an unquoted attribute value ending in semicolon and whose name starts with a known entity name.",
+"input":"<h a=&noti;>",
+"output": [["StartTag", "h", {"a": "&noti;"}]]},
+
+{"description": "Entity name requiring semicolon instead followed by the equals sign in an unquoted attribute value.",
+"input":"<h a=&lang=>",
+"output": [["StartTag", "h", {"a": "&lang="}]],
+"errors":[
+    { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 11 }
+]},
+
+{"description": "Valid entity name followed by the equals sign in an unquoted attribute value.",
+"input":"<h a=&not=>",
+"output": [["StartTag", "h", {"a": "&not="}]],
+"errors":[
+    { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 10 }
+]},
+
+{"description": "Ambiguous ampersand.",
+"input":"&rrrraannddom;",
+"output": [["Character", "&rrrraannddom;"]],
+"errors":[
+    { "code": "unknown-named-character-reference", "line": 1, "col": 14 }
+]},
+
+{"description": "Semicolonless named entity 'not' followed by 'i;' in body",
+"input":"&noti;",
+"output": [["Character", "\u00ACi;"]],
+"errors":[
+    { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 5 }
+]},
+
+{"description": "Very long undefined named entity in body",
+"input":"&ammmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmp;",
+"output": [["Character", "&ammmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmp;"]],
+"errors":[
+    { "code": "unknown-named-character-reference", "line": 1, "col": 950 }
+]},
+
+{"description": "CR as numeric entity",
+"input":"&#013;",
+"output": [["Character", "\r"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 7 }
+]},
+
+{"description": "CR as hexadecimal numeric entity",
+"input":"&#x00D;",
+"output": [["Character", "\r"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 EURO SIGN numeric entity.",
+"input":"&#0128;",
+"output": [["Character", "\u20AC"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
+"input":"&#0129;",
+"output": [["Character", "\u0081"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 SINGLE LOW-9 QUOTATION MARK numeric entity.",
+"input":"&#0130;",
+"output": [["Character", "\u201A"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 LATIN SMALL LETTER F WITH HOOK numeric entity.",
+"input":"&#0131;",
+"output": [["Character", "\u0192"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 DOUBLE LOW-9 QUOTATION MARK numeric entity.",
+"input":"&#0132;",
+"output": [["Character", "\u201E"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 HORIZONTAL ELLIPSIS numeric entity.",
+"input":"&#0133;",
+"output": [["Character", "\u2026"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 DAGGER numeric entity.",
+"input":"&#0134;",
+"output": [["Character", "\u2020"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 DOUBLE DAGGER numeric entity.",
+"input":"&#0135;",
+"output": [["Character", "\u2021"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 MODIFIER LETTER CIRCUMFLEX ACCENT numeric entity.",
+"input":"&#0136;",
+"output": [["Character", "\u02C6"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 PER MILLE SIGN numeric entity.",
+"input":"&#0137;",
+"output": [["Character", "\u2030"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 LATIN CAPITAL LETTER S WITH CARON numeric entity.",
+"input":"&#0138;",
+"output": [["Character", "\u0160"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 SINGLE LEFT-POINTING ANGLE QUOTATION MARK numeric entity.",
+"input":"&#0139;",
+"output": [["Character", "\u2039"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 LATIN CAPITAL LIGATURE OE numeric entity.",
+"input":"&#0140;",
+"output": [["Character", "\u0152"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
+"input":"&#0141;",
+"output": [["Character", "\u008D"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 LATIN CAPITAL LETTER Z WITH CARON numeric entity.",
+"input":"&#0142;",
+"output": [["Character", "\u017D"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
+"input":"&#0143;",
+"output": [["Character", "\u008F"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
+"input":"&#0144;",
+"output": [["Character", "\u0090"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 LEFT SINGLE QUOTATION MARK numeric entity.",
+"input":"&#0145;",
+"output": [["Character", "\u2018"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 RIGHT SINGLE QUOTATION MARK numeric entity.",
+"input":"&#0146;",
+"output": [["Character", "\u2019"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 LEFT DOUBLE QUOTATION MARK numeric entity.",
+"input":"&#0147;",
+"output": [["Character", "\u201C"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 RIGHT DOUBLE QUOTATION MARK numeric entity.",
+"input":"&#0148;",
+"output": [["Character", "\u201D"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 BULLET numeric entity.",
+"input":"&#0149;",
+"output": [["Character", "\u2022"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 EN DASH numeric entity.",
+"input":"&#0150;",
+"output": [["Character", "\u2013"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 EM DASH numeric entity.",
+"input":"&#0151;",
+"output": [["Character", "\u2014"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 SMALL TILDE numeric entity.",
+"input":"&#0152;",
+"output": [["Character", "\u02DC"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 TRADE MARK SIGN numeric entity.",
+"input":"&#0153;",
+"output": [["Character", "\u2122"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 LATIN SMALL LETTER S WITH CARON numeric entity.",
+"input":"&#0154;",
+"output": [["Character", "\u0161"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 SINGLE RIGHT-POINTING ANGLE QUOTATION MARK numeric entity.",
+"input":"&#0155;",
+"output": [["Character", "\u203A"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 LATIN SMALL LIGATURE OE numeric entity.",
+"input":"&#0156;",
+"output": [["Character", "\u0153"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
+"input":"&#0157;",
+"output": [["Character", "\u009D"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 EURO SIGN hexadecimal numeric entity.",
+"input":"&#x080;",
+"output": [["Character", "\u20AC"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
+"input":"&#x081;",
+"output": [["Character", "\u0081"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 SINGLE LOW-9 QUOTATION MARK hexadecimal numeric entity.",
+"input":"&#x082;",
+"output": [["Character", "\u201A"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 LATIN SMALL LETTER F WITH HOOK hexadecimal numeric entity.",
+"input":"&#x083;",
+"output": [["Character", "\u0192"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 DOUBLE LOW-9 QUOTATION MARK hexadecimal numeric entity.",
+"input":"&#x084;",
+"output": [["Character", "\u201E"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 HORIZONTAL ELLIPSIS hexadecimal numeric entity.",
+"input":"&#x085;",
+"output": [["Character", "\u2026"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 DAGGER hexadecimal numeric entity.",
+"input":"&#x086;",
+"output": [["Character", "\u2020"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 DOUBLE DAGGER hexadecimal numeric entity.",
+"input":"&#x087;",
+"output": [["Character", "\u2021"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 MODIFIER LETTER CIRCUMFLEX ACCENT hexadecimal numeric entity.",
+"input":"&#x088;",
+"output": [["Character", "\u02C6"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 PER MILLE SIGN hexadecimal numeric entity.",
+"input":"&#x089;",
+"output": [["Character", "\u2030"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 LATIN CAPITAL LETTER S WITH CARON hexadecimal numeric entity.",
+"input":"&#x08A;",
+"output": [["Character", "\u0160"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 SINGLE LEFT-POINTING ANGLE QUOTATION MARK hexadecimal numeric entity.",
+"input":"&#x08B;",
+"output": [["Character", "\u2039"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 LATIN CAPITAL LIGATURE OE hexadecimal numeric entity.",
+"input":"&#x08C;",
+"output": [["Character", "\u0152"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
+"input":"&#x08D;",
+"output": [["Character", "\u008D"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 LATIN CAPITAL LETTER Z WITH CARON hexadecimal numeric entity.",
+"input":"&#x08E;",
+"output": [["Character", "\u017D"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
+"input":"&#x08F;",
+"output": [["Character", "\u008F"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
+"input":"&#x090;",
+"output": [["Character", "\u0090"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 LEFT SINGLE QUOTATION MARK hexadecimal numeric entity.",
+"input":"&#x091;",
+"output": [["Character", "\u2018"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 RIGHT SINGLE QUOTATION MARK hexadecimal numeric entity.",
+"input":"&#x092;",
+"output": [["Character", "\u2019"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 LEFT DOUBLE QUOTATION MARK hexadecimal numeric entity.",
+"input":"&#x093;",
+"output": [["Character", "\u201C"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 RIGHT DOUBLE QUOTATION MARK hexadecimal numeric entity.",
+"input":"&#x094;",
+"output": [["Character", "\u201D"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 BULLET hexadecimal numeric entity.",
+"input":"&#x095;",
+"output": [["Character", "\u2022"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 EN DASH hexadecimal numeric entity.",
+"input":"&#x096;",
+"output": [["Character", "\u2013"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 EM DASH hexadecimal numeric entity.",
+"input":"&#x097;",
+"output": [["Character", "\u2014"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 SMALL TILDE hexadecimal numeric entity.",
+"input":"&#x098;",
+"output": [["Character", "\u02DC"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 TRADE MARK SIGN hexadecimal numeric entity.",
+"input":"&#x099;",
+"output": [["Character", "\u2122"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 LATIN SMALL LETTER S WITH CARON hexadecimal numeric entity.",
+"input":"&#x09A;",
+"output": [["Character", "\u0161"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 SINGLE RIGHT-POINTING ANGLE QUOTATION MARK hexadecimal numeric entity.",
+"input":"&#x09B;",
+"output": [["Character", "\u203A"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 LATIN SMALL LIGATURE OE hexadecimal numeric entity.",
+"input":"&#x09C;",
+"output": [["Character", "\u0153"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
+"input":"&#x09D;",
+"output": [["Character", "\u009D"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 LATIN SMALL LETTER Z WITH CARON hexadecimal numeric entity.",
+"input":"&#x09E;",
+"output": [["Character", "\u017E"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Windows-1252 LATIN CAPITAL LETTER Y WITH DIAERESIS hexadecimal numeric entity.",
+"input":"&#x09F;",
+"output": [["Character", "\u0178"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description": "Decimal numeric entity followed by hex character a.",
+"input":"&#97a",
+"output": [["Character", "aa"]],
+"errors":[
+    { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 5 }
+]},
+
+{"description": "Decimal numeric entity followed by hex character A.",
+"input":"&#97A",
+"output": [["Character", "aA"]],
+"errors":[
+    { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 5 }
+]},
+
+{"description": "Decimal numeric entity followed by hex character f.",
+"input":"&#97f",
+"output": [["Character", "af"]],
+"errors":[
+    { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 5 }
+]},
+
+{"description": "Decimal numeric entity followed by hex character A.",
+"input":"&#97F",
+"output": [["Character", "aF"]],
+"errors":[
+    { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 5 }
+]}
+
+]}
diff --git a/lib/html5lib/tests/testdata/tokenizer/escapeFlag.test b/lib/html5lib/tests/testdata/tokenizer/escapeFlag.test
new file mode 100644
index 00000000..d7d2c490
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tokenizer/escapeFlag.test
@@ -0,0 +1,36 @@
+{"tests": [
+
+{"description":"Commented close tag in RCDATA or RAWTEXT",
+"initialStates":["RCDATA state", "RAWTEXT state"],
+"lastStartTag":"xmp",
+"input":"foo<!--</xmp>--></xmp>",
+"output":[["Character", "foo<!--"], ["EndTag", "xmp"], ["Character", "-->"], ["EndTag", "xmp"]]},
+
+{"description":"Bogus comment in RCDATA or RAWTEXT",
+"initialStates":["RCDATA state", "RAWTEXT state"],
+"lastStartTag":"xmp",
+"input":"foo<!-->baz</xmp>",
+"output":[["Character", "foo<!-->baz"], ["EndTag", "xmp"]]},
+
+{"description":"End tag surrounded by bogus comment in RCDATA or RAWTEXT",
+"initialStates":["RCDATA state", "RAWTEXT state"],
+"lastStartTag":"xmp",
+"input":"foo<!--></xmp><!-->baz</xmp>",
+"output":[["Character", "foo<!-->"], ["EndTag", "xmp"], ["Comment", ""], ["Character", "baz"], ["EndTag", "xmp"]],
+"errors":[
+    { "code": "abrupt-closing-of-empty-comment", "line": 1, "col": 19 }
+]},
+
+{"description":"Commented entities in RCDATA",
+"initialStates":["RCDATA state"],
+"lastStartTag":"xmp",
+"input":" &amp; <!-- &amp; --> &amp; </xmp>",
+"output":[["Character", " & <!-- & --> & "], ["EndTag", "xmp"]]},
+
+{"description":"Incorrect comment ending sequences in RCDATA or RAWTEXT",
+"initialStates":["RCDATA state", "RAWTEXT state"],
+"lastStartTag":"xmp",
+"input":"foo<!-- x --x>x-- >x--!>x--<></xmp>",
+"output":[["Character", "foo<!-- x --x>x-- >x--!>x--<>"], ["EndTag", "xmp"]]}
+
+]}
diff --git a/lib/html5lib/tests/testdata/tokenizer/namedEntities.test b/lib/html5lib/tests/testdata/tokenizer/namedEntities.test
new file mode 100644
index 00000000..f74f5bff
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tokenizer/namedEntities.test
@@ -0,0 +1,42422 @@
+{
+    "tests": [
+        {
+            "input": "&AElig",
+            "description": "Named entity: AElig without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00c6"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 7 }
+            ]
+        },
+        {
+            "input": "&AElig;",
+            "description": "Named entity: AElig; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00c6"
+                ]
+            ]
+        },
+        {
+            "input": "&AMP",
+            "description": "Named entity: AMP without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 5 }
+            ]
+        },
+        {
+            "input": "&AMP;",
+            "description": "Named entity: AMP; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&"
+                ]
+            ]
+        },
+        {
+            "input": "&Aacute",
+            "description": "Named entity: Aacute without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00c1"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 8 }
+            ]
+        },
+        {
+            "input": "&Aacute;",
+            "description": "Named entity: Aacute; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00c1"
+                ]
+            ]
+        },
+        {
+            "input": "&Abreve",
+            "description": "Bad named entity: Abreve without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Abreve"
+                ]
+            ]
+        },
+        {
+            "input": "&Abreve;",
+            "description": "Named entity: Abreve; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0102"
+                ]
+            ]
+        },
+        {
+            "input": "&Acirc",
+            "description": "Named entity: Acirc without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00c2"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 7 }
+            ]
+        },
+        {
+            "input": "&Acirc;",
+            "description": "Named entity: Acirc; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00c2"
+                ]
+            ]
+        },
+        {
+            "input": "&Acy",
+            "description": "Bad named entity: Acy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Acy"
+                ]
+            ]
+        },
+        {
+            "input": "&Acy;",
+            "description": "Named entity: Acy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0410"
+                ]
+            ]
+        },
+        {
+            "input": "&Afr",
+            "description": "Bad named entity: Afr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Afr"
+                ]
+            ]
+        },
+        {
+            "input": "&Afr;",
+            "description": "Named entity: Afr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd04"
+                ]
+            ]
+        },
+        {
+            "input": "&Agrave",
+            "description": "Named entity: Agrave without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00c0"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 8 }
+            ]
+        },
+        {
+            "input": "&Agrave;",
+            "description": "Named entity: Agrave; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00c0"
+                ]
+            ]
+        },
+        {
+            "input": "&Alpha",
+            "description": "Bad named entity: Alpha without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Alpha"
+                ]
+            ]
+        },
+        {
+            "input": "&Alpha;",
+            "description": "Named entity: Alpha; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0391"
+                ]
+            ]
+        },
+        {
+            "input": "&Amacr",
+            "description": "Bad named entity: Amacr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Amacr"
+                ]
+            ]
+        },
+        {
+            "input": "&Amacr;",
+            "description": "Named entity: Amacr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0100"
+                ]
+            ]
+        },
+        {
+            "input": "&And",
+            "description": "Bad named entity: And without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&And"
+                ]
+            ]
+        },
+        {
+            "input": "&And;",
+            "description": "Named entity: And; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a53"
+                ]
+            ]
+        },
+        {
+            "input": "&Aogon",
+            "description": "Bad named entity: Aogon without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Aogon"
+                ]
+            ]
+        },
+        {
+            "input": "&Aogon;",
+            "description": "Named entity: Aogon; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0104"
+                ]
+            ]
+        },
+        {
+            "input": "&Aopf",
+            "description": "Bad named entity: Aopf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Aopf"
+                ]
+            ]
+        },
+        {
+            "input": "&Aopf;",
+            "description": "Named entity: Aopf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd38"
+                ]
+            ]
+        },
+        {
+            "input": "&ApplyFunction",
+            "description": "Bad named entity: ApplyFunction without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ApplyFunction"
+                ]
+            ]
+        },
+        {
+            "input": "&ApplyFunction;",
+            "description": "Named entity: ApplyFunction; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2061"
+                ]
+            ]
+        },
+        {
+            "input": "&Aring",
+            "description": "Named entity: Aring without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00c5"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 7 }
+            ]
+        },
+        {
+            "input": "&Aring;",
+            "description": "Named entity: Aring; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00c5"
+                ]
+            ]
+        },
+        {
+            "input": "&Ascr",
+            "description": "Bad named entity: Ascr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Ascr"
+                ]
+            ]
+        },
+        {
+            "input": "&Ascr;",
+            "description": "Named entity: Ascr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udc9c"
+                ]
+            ]
+        },
+        {
+            "input": "&Assign",
+            "description": "Bad named entity: Assign without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Assign"
+                ]
+            ]
+        },
+        {
+            "input": "&Assign;",
+            "description": "Named entity: Assign; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2254"
+                ]
+            ]
+        },
+        {
+            "input": "&Atilde",
+            "description": "Named entity: Atilde without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00c3"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 8 }
+            ]
+        },
+        {
+            "input": "&Atilde;",
+            "description": "Named entity: Atilde; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00c3"
+                ]
+            ]
+        },
+        {
+            "input": "&Auml",
+            "description": "Named entity: Auml without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00c4"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 6 }
+            ]
+        },
+        {
+            "input": "&Auml;",
+            "description": "Named entity: Auml; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00c4"
+                ]
+            ]
+        },
+        {
+            "input": "&Backslash",
+            "description": "Bad named entity: Backslash without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Backslash"
+                ]
+            ]
+        },
+        {
+            "input": "&Backslash;",
+            "description": "Named entity: Backslash; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2216"
+                ]
+            ]
+        },
+        {
+            "input": "&Barv",
+            "description": "Bad named entity: Barv without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Barv"
+                ]
+            ]
+        },
+        {
+            "input": "&Barv;",
+            "description": "Named entity: Barv; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ae7"
+                ]
+            ]
+        },
+        {
+            "input": "&Barwed",
+            "description": "Bad named entity: Barwed without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Barwed"
+                ]
+            ]
+        },
+        {
+            "input": "&Barwed;",
+            "description": "Named entity: Barwed; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2306"
+                ]
+            ]
+        },
+        {
+            "input": "&Bcy",
+            "description": "Bad named entity: Bcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Bcy"
+                ]
+            ]
+        },
+        {
+            "input": "&Bcy;",
+            "description": "Named entity: Bcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0411"
+                ]
+            ]
+        },
+        {
+            "input": "&Because",
+            "description": "Bad named entity: Because without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Because"
+                ]
+            ]
+        },
+        {
+            "input": "&Because;",
+            "description": "Named entity: Because; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2235"
+                ]
+            ]
+        },
+        {
+            "input": "&Bernoullis",
+            "description": "Bad named entity: Bernoullis without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Bernoullis"
+                ]
+            ]
+        },
+        {
+            "input": "&Bernoullis;",
+            "description": "Named entity: Bernoullis; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u212c"
+                ]
+            ]
+        },
+        {
+            "input": "&Beta",
+            "description": "Bad named entity: Beta without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Beta"
+                ]
+            ]
+        },
+        {
+            "input": "&Beta;",
+            "description": "Named entity: Beta; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0392"
+                ]
+            ]
+        },
+        {
+            "input": "&Bfr",
+            "description": "Bad named entity: Bfr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Bfr"
+                ]
+            ]
+        },
+        {
+            "input": "&Bfr;",
+            "description": "Named entity: Bfr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd05"
+                ]
+            ]
+        },
+        {
+            "input": "&Bopf",
+            "description": "Bad named entity: Bopf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Bopf"
+                ]
+            ]
+        },
+        {
+            "input": "&Bopf;",
+            "description": "Named entity: Bopf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd39"
+                ]
+            ]
+        },
+        {
+            "input": "&Breve",
+            "description": "Bad named entity: Breve without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Breve"
+                ]
+            ]
+        },
+        {
+            "input": "&Breve;",
+            "description": "Named entity: Breve; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u02d8"
+                ]
+            ]
+        },
+        {
+            "input": "&Bscr",
+            "description": "Bad named entity: Bscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Bscr"
+                ]
+            ]
+        },
+        {
+            "input": "&Bscr;",
+            "description": "Named entity: Bscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u212c"
+                ]
+            ]
+        },
+        {
+            "input": "&Bumpeq",
+            "description": "Bad named entity: Bumpeq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Bumpeq"
+                ]
+            ]
+        },
+        {
+            "input": "&Bumpeq;",
+            "description": "Named entity: Bumpeq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u224e"
+                ]
+            ]
+        },
+        {
+            "input": "&CHcy",
+            "description": "Bad named entity: CHcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&CHcy"
+                ]
+            ]
+        },
+        {
+            "input": "&CHcy;",
+            "description": "Named entity: CHcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0427"
+                ]
+            ]
+        },
+        {
+            "input": "&COPY",
+            "description": "Named entity: COPY without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00a9"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 6 }
+            ]
+        },
+        {
+            "input": "&COPY;",
+            "description": "Named entity: COPY; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00a9"
+                ]
+            ]
+        },
+        {
+            "input": "&Cacute",
+            "description": "Bad named entity: Cacute without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Cacute"
+                ]
+            ]
+        },
+        {
+            "input": "&Cacute;",
+            "description": "Named entity: Cacute; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0106"
+                ]
+            ]
+        },
+        {
+            "input": "&Cap",
+            "description": "Bad named entity: Cap without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Cap"
+                ]
+            ]
+        },
+        {
+            "input": "&Cap;",
+            "description": "Named entity: Cap; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22d2"
+                ]
+            ]
+        },
+        {
+            "input": "&CapitalDifferentialD",
+            "description": "Bad named entity: CapitalDifferentialD without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&CapitalDifferentialD"
+                ]
+            ]
+        },
+        {
+            "input": "&CapitalDifferentialD;",
+            "description": "Named entity: CapitalDifferentialD; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2145"
+                ]
+            ]
+        },
+        {
+            "input": "&Cayleys",
+            "description": "Bad named entity: Cayleys without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Cayleys"
+                ]
+            ]
+        },
+        {
+            "input": "&Cayleys;",
+            "description": "Named entity: Cayleys; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u212d"
+                ]
+            ]
+        },
+        {
+            "input": "&Ccaron",
+            "description": "Bad named entity: Ccaron without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Ccaron"
+                ]
+            ]
+        },
+        {
+            "input": "&Ccaron;",
+            "description": "Named entity: Ccaron; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u010c"
+                ]
+            ]
+        },
+        {
+            "input": "&Ccedil",
+            "description": "Named entity: Ccedil without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00c7"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 8 }
+            ]
+        },
+        {
+            "input": "&Ccedil;",
+            "description": "Named entity: Ccedil; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00c7"
+                ]
+            ]
+        },
+        {
+            "input": "&Ccirc",
+            "description": "Bad named entity: Ccirc without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Ccirc"
+                ]
+            ]
+        },
+        {
+            "input": "&Ccirc;",
+            "description": "Named entity: Ccirc; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0108"
+                ]
+            ]
+        },
+        {
+            "input": "&Cconint",
+            "description": "Bad named entity: Cconint without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Cconint"
+                ]
+            ]
+        },
+        {
+            "input": "&Cconint;",
+            "description": "Named entity: Cconint; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2230"
+                ]
+            ]
+        },
+        {
+            "input": "&Cdot",
+            "description": "Bad named entity: Cdot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Cdot"
+                ]
+            ]
+        },
+        {
+            "input": "&Cdot;",
+            "description": "Named entity: Cdot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u010a"
+                ]
+            ]
+        },
+        {
+            "input": "&Cedilla",
+            "description": "Bad named entity: Cedilla without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Cedilla"
+                ]
+            ]
+        },
+        {
+            "input": "&Cedilla;",
+            "description": "Named entity: Cedilla; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00b8"
+                ]
+            ]
+        },
+        {
+            "input": "&CenterDot",
+            "description": "Bad named entity: CenterDot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&CenterDot"
+                ]
+            ]
+        },
+        {
+            "input": "&CenterDot;",
+            "description": "Named entity: CenterDot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00b7"
+                ]
+            ]
+        },
+        {
+            "input": "&Cfr",
+            "description": "Bad named entity: Cfr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Cfr"
+                ]
+            ]
+        },
+        {
+            "input": "&Cfr;",
+            "description": "Named entity: Cfr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u212d"
+                ]
+            ]
+        },
+        {
+            "input": "&Chi",
+            "description": "Bad named entity: Chi without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Chi"
+                ]
+            ]
+        },
+        {
+            "input": "&Chi;",
+            "description": "Named entity: Chi; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03a7"
+                ]
+            ]
+        },
+        {
+            "input": "&CircleDot",
+            "description": "Bad named entity: CircleDot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&CircleDot"
+                ]
+            ]
+        },
+        {
+            "input": "&CircleDot;",
+            "description": "Named entity: CircleDot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2299"
+                ]
+            ]
+        },
+        {
+            "input": "&CircleMinus",
+            "description": "Bad named entity: CircleMinus without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&CircleMinus"
+                ]
+            ]
+        },
+        {
+            "input": "&CircleMinus;",
+            "description": "Named entity: CircleMinus; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2296"
+                ]
+            ]
+        },
+        {
+            "input": "&CirclePlus",
+            "description": "Bad named entity: CirclePlus without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&CirclePlus"
+                ]
+            ]
+        },
+        {
+            "input": "&CirclePlus;",
+            "description": "Named entity: CirclePlus; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2295"
+                ]
+            ]
+        },
+        {
+            "input": "&CircleTimes",
+            "description": "Bad named entity: CircleTimes without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&CircleTimes"
+                ]
+            ]
+        },
+        {
+            "input": "&CircleTimes;",
+            "description": "Named entity: CircleTimes; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2297"
+                ]
+            ]
+        },
+        {
+            "input": "&ClockwiseContourIntegral",
+            "description": "Bad named entity: ClockwiseContourIntegral without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ClockwiseContourIntegral"
+                ]
+            ]
+        },
+        {
+            "input": "&ClockwiseContourIntegral;",
+            "description": "Named entity: ClockwiseContourIntegral; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2232"
+                ]
+            ]
+        },
+        {
+            "input": "&CloseCurlyDoubleQuote",
+            "description": "Bad named entity: CloseCurlyDoubleQuote without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&CloseCurlyDoubleQuote"
+                ]
+            ]
+        },
+        {
+            "input": "&CloseCurlyDoubleQuote;",
+            "description": "Named entity: CloseCurlyDoubleQuote; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u201d"
+                ]
+            ]
+        },
+        {
+            "input": "&CloseCurlyQuote",
+            "description": "Bad named entity: CloseCurlyQuote without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&CloseCurlyQuote"
+                ]
+            ]
+        },
+        {
+            "input": "&CloseCurlyQuote;",
+            "description": "Named entity: CloseCurlyQuote; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2019"
+                ]
+            ]
+        },
+        {
+            "input": "&Colon",
+            "description": "Bad named entity: Colon without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Colon"
+                ]
+            ]
+        },
+        {
+            "input": "&Colon;",
+            "description": "Named entity: Colon; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2237"
+                ]
+            ]
+        },
+        {
+            "input": "&Colone",
+            "description": "Bad named entity: Colone without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Colone"
+                ]
+            ]
+        },
+        {
+            "input": "&Colone;",
+            "description": "Named entity: Colone; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a74"
+                ]
+            ]
+        },
+        {
+            "input": "&Congruent",
+            "description": "Bad named entity: Congruent without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Congruent"
+                ]
+            ]
+        },
+        {
+            "input": "&Congruent;",
+            "description": "Named entity: Congruent; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2261"
+                ]
+            ]
+        },
+        {
+            "input": "&Conint",
+            "description": "Bad named entity: Conint without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Conint"
+                ]
+            ]
+        },
+        {
+            "input": "&Conint;",
+            "description": "Named entity: Conint; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u222f"
+                ]
+            ]
+        },
+        {
+            "input": "&ContourIntegral",
+            "description": "Bad named entity: ContourIntegral without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ContourIntegral"
+                ]
+            ]
+        },
+        {
+            "input": "&ContourIntegral;",
+            "description": "Named entity: ContourIntegral; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u222e"
+                ]
+            ]
+        },
+        {
+            "input": "&Copf",
+            "description": "Bad named entity: Copf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Copf"
+                ]
+            ]
+        },
+        {
+            "input": "&Copf;",
+            "description": "Named entity: Copf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2102"
+                ]
+            ]
+        },
+        {
+            "input": "&Coproduct",
+            "description": "Bad named entity: Coproduct without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Coproduct"
+                ]
+            ]
+        },
+        {
+            "input": "&Coproduct;",
+            "description": "Named entity: Coproduct; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2210"
+                ]
+            ]
+        },
+        {
+            "input": "&CounterClockwiseContourIntegral",
+            "description": "Bad named entity: CounterClockwiseContourIntegral without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&CounterClockwiseContourIntegral"
+                ]
+            ]
+        },
+        {
+            "input": "&CounterClockwiseContourIntegral;",
+            "description": "Named entity: CounterClockwiseContourIntegral; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2233"
+                ]
+            ]
+        },
+        {
+            "input": "&Cross",
+            "description": "Bad named entity: Cross without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Cross"
+                ]
+            ]
+        },
+        {
+            "input": "&Cross;",
+            "description": "Named entity: Cross; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a2f"
+                ]
+            ]
+        },
+        {
+            "input": "&Cscr",
+            "description": "Bad named entity: Cscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Cscr"
+                ]
+            ]
+        },
+        {
+            "input": "&Cscr;",
+            "description": "Named entity: Cscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udc9e"
+                ]
+            ]
+        },
+        {
+            "input": "&Cup",
+            "description": "Bad named entity: Cup without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Cup"
+                ]
+            ]
+        },
+        {
+            "input": "&Cup;",
+            "description": "Named entity: Cup; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22d3"
+                ]
+            ]
+        },
+        {
+            "input": "&CupCap",
+            "description": "Bad named entity: CupCap without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&CupCap"
+                ]
+            ]
+        },
+        {
+            "input": "&CupCap;",
+            "description": "Named entity: CupCap; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u224d"
+                ]
+            ]
+        },
+        {
+            "input": "&DD",
+            "description": "Bad named entity: DD without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&DD"
+                ]
+            ]
+        },
+        {
+            "input": "&DD;",
+            "description": "Named entity: DD; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2145"
+                ]
+            ]
+        },
+        {
+            "input": "&DDotrahd",
+            "description": "Bad named entity: DDotrahd without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&DDotrahd"
+                ]
+            ]
+        },
+        {
+            "input": "&DDotrahd;",
+            "description": "Named entity: DDotrahd; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2911"
+                ]
+            ]
+        },
+        {
+            "input": "&DJcy",
+            "description": "Bad named entity: DJcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&DJcy"
+                ]
+            ]
+        },
+        {
+            "input": "&DJcy;",
+            "description": "Named entity: DJcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0402"
+                ]
+            ]
+        },
+        {
+            "input": "&DScy",
+            "description": "Bad named entity: DScy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&DScy"
+                ]
+            ]
+        },
+        {
+            "input": "&DScy;",
+            "description": "Named entity: DScy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0405"
+                ]
+            ]
+        },
+        {
+            "input": "&DZcy",
+            "description": "Bad named entity: DZcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&DZcy"
+                ]
+            ]
+        },
+        {
+            "input": "&DZcy;",
+            "description": "Named entity: DZcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u040f"
+                ]
+            ]
+        },
+        {
+            "input": "&Dagger",
+            "description": "Bad named entity: Dagger without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Dagger"
+                ]
+            ]
+        },
+        {
+            "input": "&Dagger;",
+            "description": "Named entity: Dagger; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2021"
+                ]
+            ]
+        },
+        {
+            "input": "&Darr",
+            "description": "Bad named entity: Darr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Darr"
+                ]
+            ]
+        },
+        {
+            "input": "&Darr;",
+            "description": "Named entity: Darr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21a1"
+                ]
+            ]
+        },
+        {
+            "input": "&Dashv",
+            "description": "Bad named entity: Dashv without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Dashv"
+                ]
+            ]
+        },
+        {
+            "input": "&Dashv;",
+            "description": "Named entity: Dashv; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ae4"
+                ]
+            ]
+        },
+        {
+            "input": "&Dcaron",
+            "description": "Bad named entity: Dcaron without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Dcaron"
+                ]
+            ]
+        },
+        {
+            "input": "&Dcaron;",
+            "description": "Named entity: Dcaron; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u010e"
+                ]
+            ]
+        },
+        {
+            "input": "&Dcy",
+            "description": "Bad named entity: Dcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Dcy"
+                ]
+            ]
+        },
+        {
+            "input": "&Dcy;",
+            "description": "Named entity: Dcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0414"
+                ]
+            ]
+        },
+        {
+            "input": "&Del",
+            "description": "Bad named entity: Del without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Del"
+                ]
+            ]
+        },
+        {
+            "input": "&Del;",
+            "description": "Named entity: Del; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2207"
+                ]
+            ]
+        },
+        {
+            "input": "&Delta",
+            "description": "Bad named entity: Delta without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Delta"
+                ]
+            ]
+        },
+        {
+            "input": "&Delta;",
+            "description": "Named entity: Delta; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0394"
+                ]
+            ]
+        },
+        {
+            "input": "&Dfr",
+            "description": "Bad named entity: Dfr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Dfr"
+                ]
+            ]
+        },
+        {
+            "input": "&Dfr;",
+            "description": "Named entity: Dfr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd07"
+                ]
+            ]
+        },
+        {
+            "input": "&DiacriticalAcute",
+            "description": "Bad named entity: DiacriticalAcute without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&DiacriticalAcute"
+                ]
+            ]
+        },
+        {
+            "input": "&DiacriticalAcute;",
+            "description": "Named entity: DiacriticalAcute; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00b4"
+                ]
+            ]
+        },
+        {
+            "input": "&DiacriticalDot",
+            "description": "Bad named entity: DiacriticalDot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&DiacriticalDot"
+                ]
+            ]
+        },
+        {
+            "input": "&DiacriticalDot;",
+            "description": "Named entity: DiacriticalDot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u02d9"
+                ]
+            ]
+        },
+        {
+            "input": "&DiacriticalDoubleAcute",
+            "description": "Bad named entity: DiacriticalDoubleAcute without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&DiacriticalDoubleAcute"
+                ]
+            ]
+        },
+        {
+            "input": "&DiacriticalDoubleAcute;",
+            "description": "Named entity: DiacriticalDoubleAcute; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u02dd"
+                ]
+            ]
+        },
+        {
+            "input": "&DiacriticalGrave",
+            "description": "Bad named entity: DiacriticalGrave without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&DiacriticalGrave"
+                ]
+            ]
+        },
+        {
+            "input": "&DiacriticalGrave;",
+            "description": "Named entity: DiacriticalGrave; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "`"
+                ]
+            ]
+        },
+        {
+            "input": "&DiacriticalTilde",
+            "description": "Bad named entity: DiacriticalTilde without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&DiacriticalTilde"
+                ]
+            ]
+        },
+        {
+            "input": "&DiacriticalTilde;",
+            "description": "Named entity: DiacriticalTilde; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u02dc"
+                ]
+            ]
+        },
+        {
+            "input": "&Diamond",
+            "description": "Bad named entity: Diamond without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Diamond"
+                ]
+            ]
+        },
+        {
+            "input": "&Diamond;",
+            "description": "Named entity: Diamond; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22c4"
+                ]
+            ]
+        },
+        {
+            "input": "&DifferentialD",
+            "description": "Bad named entity: DifferentialD without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&DifferentialD"
+                ]
+            ]
+        },
+        {
+            "input": "&DifferentialD;",
+            "description": "Named entity: DifferentialD; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2146"
+                ]
+            ]
+        },
+        {
+            "input": "&Dopf",
+            "description": "Bad named entity: Dopf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Dopf"
+                ]
+            ]
+        },
+        {
+            "input": "&Dopf;",
+            "description": "Named entity: Dopf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd3b"
+                ]
+            ]
+        },
+        {
+            "input": "&Dot",
+            "description": "Bad named entity: Dot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Dot"
+                ]
+            ]
+        },
+        {
+            "input": "&Dot;",
+            "description": "Named entity: Dot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00a8"
+                ]
+            ]
+        },
+        {
+            "input": "&DotDot",
+            "description": "Bad named entity: DotDot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&DotDot"
+                ]
+            ]
+        },
+        {
+            "input": "&DotDot;",
+            "description": "Named entity: DotDot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u20dc"
+                ]
+            ]
+        },
+        {
+            "input": "&DotEqual",
+            "description": "Bad named entity: DotEqual without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&DotEqual"
+                ]
+            ]
+        },
+        {
+            "input": "&DotEqual;",
+            "description": "Named entity: DotEqual; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2250"
+                ]
+            ]
+        },
+        {
+            "input": "&DoubleContourIntegral",
+            "description": "Bad named entity: DoubleContourIntegral without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&DoubleContourIntegral"
+                ]
+            ]
+        },
+        {
+            "input": "&DoubleContourIntegral;",
+            "description": "Named entity: DoubleContourIntegral; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u222f"
+                ]
+            ]
+        },
+        {
+            "input": "&DoubleDot",
+            "description": "Bad named entity: DoubleDot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&DoubleDot"
+                ]
+            ]
+        },
+        {
+            "input": "&DoubleDot;",
+            "description": "Named entity: DoubleDot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00a8"
+                ]
+            ]
+        },
+        {
+            "input": "&DoubleDownArrow",
+            "description": "Bad named entity: DoubleDownArrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&DoubleDownArrow"
+                ]
+            ]
+        },
+        {
+            "input": "&DoubleDownArrow;",
+            "description": "Named entity: DoubleDownArrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21d3"
+                ]
+            ]
+        },
+        {
+            "input": "&DoubleLeftArrow",
+            "description": "Bad named entity: DoubleLeftArrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&DoubleLeftArrow"
+                ]
+            ]
+        },
+        {
+            "input": "&DoubleLeftArrow;",
+            "description": "Named entity: DoubleLeftArrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21d0"
+                ]
+            ]
+        },
+        {
+            "input": "&DoubleLeftRightArrow",
+            "description": "Bad named entity: DoubleLeftRightArrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&DoubleLeftRightArrow"
+                ]
+            ]
+        },
+        {
+            "input": "&DoubleLeftRightArrow;",
+            "description": "Named entity: DoubleLeftRightArrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21d4"
+                ]
+            ]
+        },
+        {
+            "input": "&DoubleLeftTee",
+            "description": "Bad named entity: DoubleLeftTee without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&DoubleLeftTee"
+                ]
+            ]
+        },
+        {
+            "input": "&DoubleLeftTee;",
+            "description": "Named entity: DoubleLeftTee; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ae4"
+                ]
+            ]
+        },
+        {
+            "input": "&DoubleLongLeftArrow",
+            "description": "Bad named entity: DoubleLongLeftArrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&DoubleLongLeftArrow"
+                ]
+            ]
+        },
+        {
+            "input": "&DoubleLongLeftArrow;",
+            "description": "Named entity: DoubleLongLeftArrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u27f8"
+                ]
+            ]
+        },
+        {
+            "input": "&DoubleLongLeftRightArrow",
+            "description": "Bad named entity: DoubleLongLeftRightArrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&DoubleLongLeftRightArrow"
+                ]
+            ]
+        },
+        {
+            "input": "&DoubleLongLeftRightArrow;",
+            "description": "Named entity: DoubleLongLeftRightArrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u27fa"
+                ]
+            ]
+        },
+        {
+            "input": "&DoubleLongRightArrow",
+            "description": "Bad named entity: DoubleLongRightArrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&DoubleLongRightArrow"
+                ]
+            ]
+        },
+        {
+            "input": "&DoubleLongRightArrow;",
+            "description": "Named entity: DoubleLongRightArrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u27f9"
+                ]
+            ]
+        },
+        {
+            "input": "&DoubleRightArrow",
+            "description": "Bad named entity: DoubleRightArrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&DoubleRightArrow"
+                ]
+            ]
+        },
+        {
+            "input": "&DoubleRightArrow;",
+            "description": "Named entity: DoubleRightArrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21d2"
+                ]
+            ]
+        },
+        {
+            "input": "&DoubleRightTee",
+            "description": "Bad named entity: DoubleRightTee without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&DoubleRightTee"
+                ]
+            ]
+        },
+        {
+            "input": "&DoubleRightTee;",
+            "description": "Named entity: DoubleRightTee; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22a8"
+                ]
+            ]
+        },
+        {
+            "input": "&DoubleUpArrow",
+            "description": "Bad named entity: DoubleUpArrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&DoubleUpArrow"
+                ]
+            ]
+        },
+        {
+            "input": "&DoubleUpArrow;",
+            "description": "Named entity: DoubleUpArrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21d1"
+                ]
+            ]
+        },
+        {
+            "input": "&DoubleUpDownArrow",
+            "description": "Bad named entity: DoubleUpDownArrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&DoubleUpDownArrow"
+                ]
+            ]
+        },
+        {
+            "input": "&DoubleUpDownArrow;",
+            "description": "Named entity: DoubleUpDownArrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21d5"
+                ]
+            ]
+        },
+        {
+            "input": "&DoubleVerticalBar",
+            "description": "Bad named entity: DoubleVerticalBar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&DoubleVerticalBar"
+                ]
+            ]
+        },
+        {
+            "input": "&DoubleVerticalBar;",
+            "description": "Named entity: DoubleVerticalBar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2225"
+                ]
+            ]
+        },
+        {
+            "input": "&DownArrow",
+            "description": "Bad named entity: DownArrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&DownArrow"
+                ]
+            ]
+        },
+        {
+            "input": "&DownArrow;",
+            "description": "Named entity: DownArrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2193"
+                ]
+            ]
+        },
+        {
+            "input": "&DownArrowBar",
+            "description": "Bad named entity: DownArrowBar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&DownArrowBar"
+                ]
+            ]
+        },
+        {
+            "input": "&DownArrowBar;",
+            "description": "Named entity: DownArrowBar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2913"
+                ]
+            ]
+        },
+        {
+            "input": "&DownArrowUpArrow",
+            "description": "Bad named entity: DownArrowUpArrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&DownArrowUpArrow"
+                ]
+            ]
+        },
+        {
+            "input": "&DownArrowUpArrow;",
+            "description": "Named entity: DownArrowUpArrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21f5"
+                ]
+            ]
+        },
+        {
+            "input": "&DownBreve",
+            "description": "Bad named entity: DownBreve without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&DownBreve"
+                ]
+            ]
+        },
+        {
+            "input": "&DownBreve;",
+            "description": "Named entity: DownBreve; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0311"
+                ]
+            ]
+        },
+        {
+            "input": "&DownLeftRightVector",
+            "description": "Bad named entity: DownLeftRightVector without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&DownLeftRightVector"
+                ]
+            ]
+        },
+        {
+            "input": "&DownLeftRightVector;",
+            "description": "Named entity: DownLeftRightVector; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2950"
+                ]
+            ]
+        },
+        {
+            "input": "&DownLeftTeeVector",
+            "description": "Bad named entity: DownLeftTeeVector without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&DownLeftTeeVector"
+                ]
+            ]
+        },
+        {
+            "input": "&DownLeftTeeVector;",
+            "description": "Named entity: DownLeftTeeVector; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u295e"
+                ]
+            ]
+        },
+        {
+            "input": "&DownLeftVector",
+            "description": "Bad named entity: DownLeftVector without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&DownLeftVector"
+                ]
+            ]
+        },
+        {
+            "input": "&DownLeftVector;",
+            "description": "Named entity: DownLeftVector; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21bd"
+                ]
+            ]
+        },
+        {
+            "input": "&DownLeftVectorBar",
+            "description": "Bad named entity: DownLeftVectorBar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&DownLeftVectorBar"
+                ]
+            ]
+        },
+        {
+            "input": "&DownLeftVectorBar;",
+            "description": "Named entity: DownLeftVectorBar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2956"
+                ]
+            ]
+        },
+        {
+            "input": "&DownRightTeeVector",
+            "description": "Bad named entity: DownRightTeeVector without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&DownRightTeeVector"
+                ]
+            ]
+        },
+        {
+            "input": "&DownRightTeeVector;",
+            "description": "Named entity: DownRightTeeVector; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u295f"
+                ]
+            ]
+        },
+        {
+            "input": "&DownRightVector",
+            "description": "Bad named entity: DownRightVector without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&DownRightVector"
+                ]
+            ]
+        },
+        {
+            "input": "&DownRightVector;",
+            "description": "Named entity: DownRightVector; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21c1"
+                ]
+            ]
+        },
+        {
+            "input": "&DownRightVectorBar",
+            "description": "Bad named entity: DownRightVectorBar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&DownRightVectorBar"
+                ]
+            ]
+        },
+        {
+            "input": "&DownRightVectorBar;",
+            "description": "Named entity: DownRightVectorBar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2957"
+                ]
+            ]
+        },
+        {
+            "input": "&DownTee",
+            "description": "Bad named entity: DownTee without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&DownTee"
+                ]
+            ]
+        },
+        {
+            "input": "&DownTee;",
+            "description": "Named entity: DownTee; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22a4"
+                ]
+            ]
+        },
+        {
+            "input": "&DownTeeArrow",
+            "description": "Bad named entity: DownTeeArrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&DownTeeArrow"
+                ]
+            ]
+        },
+        {
+            "input": "&DownTeeArrow;",
+            "description": "Named entity: DownTeeArrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21a7"
+                ]
+            ]
+        },
+        {
+            "input": "&Downarrow",
+            "description": "Bad named entity: Downarrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Downarrow"
+                ]
+            ]
+        },
+        {
+            "input": "&Downarrow;",
+            "description": "Named entity: Downarrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21d3"
+                ]
+            ]
+        },
+        {
+            "input": "&Dscr",
+            "description": "Bad named entity: Dscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Dscr"
+                ]
+            ]
+        },
+        {
+            "input": "&Dscr;",
+            "description": "Named entity: Dscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udc9f"
+                ]
+            ]
+        },
+        {
+            "input": "&Dstrok",
+            "description": "Bad named entity: Dstrok without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Dstrok"
+                ]
+            ]
+        },
+        {
+            "input": "&Dstrok;",
+            "description": "Named entity: Dstrok; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0110"
+                ]
+            ]
+        },
+        {
+            "input": "&ENG",
+            "description": "Bad named entity: ENG without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ENG"
+                ]
+            ]
+        },
+        {
+            "input": "&ENG;",
+            "description": "Named entity: ENG; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u014a"
+                ]
+            ]
+        },
+        {
+            "input": "&ETH",
+            "description": "Named entity: ETH without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00d0"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 5 }
+            ]
+        },
+        {
+            "input": "&ETH;",
+            "description": "Named entity: ETH; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00d0"
+                ]
+            ]
+        },
+        {
+            "input": "&Eacute",
+            "description": "Named entity: Eacute without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00c9"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 8 }
+            ]
+        },
+        {
+            "input": "&Eacute;",
+            "description": "Named entity: Eacute; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00c9"
+                ]
+            ]
+        },
+        {
+            "input": "&Ecaron",
+            "description": "Bad named entity: Ecaron without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Ecaron"
+                ]
+            ]
+        },
+        {
+            "input": "&Ecaron;",
+            "description": "Named entity: Ecaron; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u011a"
+                ]
+            ]
+        },
+        {
+            "input": "&Ecirc",
+            "description": "Named entity: Ecirc without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00ca"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 7 }
+            ]
+        },
+        {
+            "input": "&Ecirc;",
+            "description": "Named entity: Ecirc; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00ca"
+                ]
+            ]
+        },
+        {
+            "input": "&Ecy",
+            "description": "Bad named entity: Ecy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Ecy"
+                ]
+            ]
+        },
+        {
+            "input": "&Ecy;",
+            "description": "Named entity: Ecy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u042d"
+                ]
+            ]
+        },
+        {
+            "input": "&Edot",
+            "description": "Bad named entity: Edot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Edot"
+                ]
+            ]
+        },
+        {
+            "input": "&Edot;",
+            "description": "Named entity: Edot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0116"
+                ]
+            ]
+        },
+        {
+            "input": "&Efr",
+            "description": "Bad named entity: Efr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Efr"
+                ]
+            ]
+        },
+        {
+            "input": "&Efr;",
+            "description": "Named entity: Efr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd08"
+                ]
+            ]
+        },
+        {
+            "input": "&Egrave",
+            "description": "Named entity: Egrave without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00c8"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 8 }
+            ]
+        },
+        {
+            "input": "&Egrave;",
+            "description": "Named entity: Egrave; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00c8"
+                ]
+            ]
+        },
+        {
+            "input": "&Element",
+            "description": "Bad named entity: Element without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Element"
+                ]
+            ]
+        },
+        {
+            "input": "&Element;",
+            "description": "Named entity: Element; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2208"
+                ]
+            ]
+        },
+        {
+            "input": "&Emacr",
+            "description": "Bad named entity: Emacr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Emacr"
+                ]
+            ]
+        },
+        {
+            "input": "&Emacr;",
+            "description": "Named entity: Emacr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0112"
+                ]
+            ]
+        },
+        {
+            "input": "&EmptySmallSquare",
+            "description": "Bad named entity: EmptySmallSquare without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&EmptySmallSquare"
+                ]
+            ]
+        },
+        {
+            "input": "&EmptySmallSquare;",
+            "description": "Named entity: EmptySmallSquare; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u25fb"
+                ]
+            ]
+        },
+        {
+            "input": "&EmptyVerySmallSquare",
+            "description": "Bad named entity: EmptyVerySmallSquare without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&EmptyVerySmallSquare"
+                ]
+            ]
+        },
+        {
+            "input": "&EmptyVerySmallSquare;",
+            "description": "Named entity: EmptyVerySmallSquare; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u25ab"
+                ]
+            ]
+        },
+        {
+            "input": "&Eogon",
+            "description": "Bad named entity: Eogon without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Eogon"
+                ]
+            ]
+        },
+        {
+            "input": "&Eogon;",
+            "description": "Named entity: Eogon; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0118"
+                ]
+            ]
+        },
+        {
+            "input": "&Eopf",
+            "description": "Bad named entity: Eopf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Eopf"
+                ]
+            ]
+        },
+        {
+            "input": "&Eopf;",
+            "description": "Named entity: Eopf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd3c"
+                ]
+            ]
+        },
+        {
+            "input": "&Epsilon",
+            "description": "Bad named entity: Epsilon without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Epsilon"
+                ]
+            ]
+        },
+        {
+            "input": "&Epsilon;",
+            "description": "Named entity: Epsilon; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0395"
+                ]
+            ]
+        },
+        {
+            "input": "&Equal",
+            "description": "Bad named entity: Equal without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Equal"
+                ]
+            ]
+        },
+        {
+            "input": "&Equal;",
+            "description": "Named entity: Equal; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a75"
+                ]
+            ]
+        },
+        {
+            "input": "&EqualTilde",
+            "description": "Bad named entity: EqualTilde without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&EqualTilde"
+                ]
+            ]
+        },
+        {
+            "input": "&EqualTilde;",
+            "description": "Named entity: EqualTilde; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2242"
+                ]
+            ]
+        },
+        {
+            "input": "&Equilibrium",
+            "description": "Bad named entity: Equilibrium without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Equilibrium"
+                ]
+            ]
+        },
+        {
+            "input": "&Equilibrium;",
+            "description": "Named entity: Equilibrium; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21cc"
+                ]
+            ]
+        },
+        {
+            "input": "&Escr",
+            "description": "Bad named entity: Escr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Escr"
+                ]
+            ]
+        },
+        {
+            "input": "&Escr;",
+            "description": "Named entity: Escr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2130"
+                ]
+            ]
+        },
+        {
+            "input": "&Esim",
+            "description": "Bad named entity: Esim without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Esim"
+                ]
+            ]
+        },
+        {
+            "input": "&Esim;",
+            "description": "Named entity: Esim; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a73"
+                ]
+            ]
+        },
+        {
+            "input": "&Eta",
+            "description": "Bad named entity: Eta without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Eta"
+                ]
+            ]
+        },
+        {
+            "input": "&Eta;",
+            "description": "Named entity: Eta; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0397"
+                ]
+            ]
+        },
+        {
+            "input": "&Euml",
+            "description": "Named entity: Euml without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00cb"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 6 }
+            ]
+        },
+        {
+            "input": "&Euml;",
+            "description": "Named entity: Euml; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00cb"
+                ]
+            ]
+        },
+        {
+            "input": "&Exists",
+            "description": "Bad named entity: Exists without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Exists"
+                ]
+            ]
+        },
+        {
+            "input": "&Exists;",
+            "description": "Named entity: Exists; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2203"
+                ]
+            ]
+        },
+        {
+            "input": "&ExponentialE",
+            "description": "Bad named entity: ExponentialE without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ExponentialE"
+                ]
+            ]
+        },
+        {
+            "input": "&ExponentialE;",
+            "description": "Named entity: ExponentialE; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2147"
+                ]
+            ]
+        },
+        {
+            "input": "&Fcy",
+            "description": "Bad named entity: Fcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Fcy"
+                ]
+            ]
+        },
+        {
+            "input": "&Fcy;",
+            "description": "Named entity: Fcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0424"
+                ]
+            ]
+        },
+        {
+            "input": "&Ffr",
+            "description": "Bad named entity: Ffr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Ffr"
+                ]
+            ]
+        },
+        {
+            "input": "&Ffr;",
+            "description": "Named entity: Ffr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd09"
+                ]
+            ]
+        },
+        {
+            "input": "&FilledSmallSquare",
+            "description": "Bad named entity: FilledSmallSquare without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&FilledSmallSquare"
+                ]
+            ]
+        },
+        {
+            "input": "&FilledSmallSquare;",
+            "description": "Named entity: FilledSmallSquare; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u25fc"
+                ]
+            ]
+        },
+        {
+            "input": "&FilledVerySmallSquare",
+            "description": "Bad named entity: FilledVerySmallSquare without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&FilledVerySmallSquare"
+                ]
+            ]
+        },
+        {
+            "input": "&FilledVerySmallSquare;",
+            "description": "Named entity: FilledVerySmallSquare; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u25aa"
+                ]
+            ]
+        },
+        {
+            "input": "&Fopf",
+            "description": "Bad named entity: Fopf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Fopf"
+                ]
+            ]
+        },
+        {
+            "input": "&Fopf;",
+            "description": "Named entity: Fopf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd3d"
+                ]
+            ]
+        },
+        {
+            "input": "&ForAll",
+            "description": "Bad named entity: ForAll without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ForAll"
+                ]
+            ]
+        },
+        {
+            "input": "&ForAll;",
+            "description": "Named entity: ForAll; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2200"
+                ]
+            ]
+        },
+        {
+            "input": "&Fouriertrf",
+            "description": "Bad named entity: Fouriertrf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Fouriertrf"
+                ]
+            ]
+        },
+        {
+            "input": "&Fouriertrf;",
+            "description": "Named entity: Fouriertrf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2131"
+                ]
+            ]
+        },
+        {
+            "input": "&Fscr",
+            "description": "Bad named entity: Fscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Fscr"
+                ]
+            ]
+        },
+        {
+            "input": "&Fscr;",
+            "description": "Named entity: Fscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2131"
+                ]
+            ]
+        },
+        {
+            "input": "&GJcy",
+            "description": "Bad named entity: GJcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&GJcy"
+                ]
+            ]
+        },
+        {
+            "input": "&GJcy;",
+            "description": "Named entity: GJcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0403"
+                ]
+            ]
+        },
+        {
+            "input": "&GT",
+            "description": "Named entity: GT without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    ">"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 4 }
+            ]
+        },
+        {
+            "input": "&GT;",
+            "description": "Named entity: GT; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    ">"
+                ]
+            ]
+        },
+        {
+            "input": "&Gamma",
+            "description": "Bad named entity: Gamma without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Gamma"
+                ]
+            ]
+        },
+        {
+            "input": "&Gamma;",
+            "description": "Named entity: Gamma; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0393"
+                ]
+            ]
+        },
+        {
+            "input": "&Gammad",
+            "description": "Bad named entity: Gammad without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Gammad"
+                ]
+            ]
+        },
+        {
+            "input": "&Gammad;",
+            "description": "Named entity: Gammad; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03dc"
+                ]
+            ]
+        },
+        {
+            "input": "&Gbreve",
+            "description": "Bad named entity: Gbreve without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Gbreve"
+                ]
+            ]
+        },
+        {
+            "input": "&Gbreve;",
+            "description": "Named entity: Gbreve; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u011e"
+                ]
+            ]
+        },
+        {
+            "input": "&Gcedil",
+            "description": "Bad named entity: Gcedil without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Gcedil"
+                ]
+            ]
+        },
+        {
+            "input": "&Gcedil;",
+            "description": "Named entity: Gcedil; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0122"
+                ]
+            ]
+        },
+        {
+            "input": "&Gcirc",
+            "description": "Bad named entity: Gcirc without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Gcirc"
+                ]
+            ]
+        },
+        {
+            "input": "&Gcirc;",
+            "description": "Named entity: Gcirc; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u011c"
+                ]
+            ]
+        },
+        {
+            "input": "&Gcy",
+            "description": "Bad named entity: Gcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Gcy"
+                ]
+            ]
+        },
+        {
+            "input": "&Gcy;",
+            "description": "Named entity: Gcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0413"
+                ]
+            ]
+        },
+        {
+            "input": "&Gdot",
+            "description": "Bad named entity: Gdot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Gdot"
+                ]
+            ]
+        },
+        {
+            "input": "&Gdot;",
+            "description": "Named entity: Gdot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0120"
+                ]
+            ]
+        },
+        {
+            "input": "&Gfr",
+            "description": "Bad named entity: Gfr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Gfr"
+                ]
+            ]
+        },
+        {
+            "input": "&Gfr;",
+            "description": "Named entity: Gfr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd0a"
+                ]
+            ]
+        },
+        {
+            "input": "&Gg",
+            "description": "Bad named entity: Gg without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Gg"
+                ]
+            ]
+        },
+        {
+            "input": "&Gg;",
+            "description": "Named entity: Gg; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22d9"
+                ]
+            ]
+        },
+        {
+            "input": "&Gopf",
+            "description": "Bad named entity: Gopf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Gopf"
+                ]
+            ]
+        },
+        {
+            "input": "&Gopf;",
+            "description": "Named entity: Gopf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd3e"
+                ]
+            ]
+        },
+        {
+            "input": "&GreaterEqual",
+            "description": "Bad named entity: GreaterEqual without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&GreaterEqual"
+                ]
+            ]
+        },
+        {
+            "input": "&GreaterEqual;",
+            "description": "Named entity: GreaterEqual; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2265"
+                ]
+            ]
+        },
+        {
+            "input": "&GreaterEqualLess",
+            "description": "Bad named entity: GreaterEqualLess without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&GreaterEqualLess"
+                ]
+            ]
+        },
+        {
+            "input": "&GreaterEqualLess;",
+            "description": "Named entity: GreaterEqualLess; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22db"
+                ]
+            ]
+        },
+        {
+            "input": "&GreaterFullEqual",
+            "description": "Bad named entity: GreaterFullEqual without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&GreaterFullEqual"
+                ]
+            ]
+        },
+        {
+            "input": "&GreaterFullEqual;",
+            "description": "Named entity: GreaterFullEqual; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2267"
+                ]
+            ]
+        },
+        {
+            "input": "&GreaterGreater",
+            "description": "Bad named entity: GreaterGreater without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&GreaterGreater"
+                ]
+            ]
+        },
+        {
+            "input": "&GreaterGreater;",
+            "description": "Named entity: GreaterGreater; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2aa2"
+                ]
+            ]
+        },
+        {
+            "input": "&GreaterLess",
+            "description": "Bad named entity: GreaterLess without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&GreaterLess"
+                ]
+            ]
+        },
+        {
+            "input": "&GreaterLess;",
+            "description": "Named entity: GreaterLess; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2277"
+                ]
+            ]
+        },
+        {
+            "input": "&GreaterSlantEqual",
+            "description": "Bad named entity: GreaterSlantEqual without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&GreaterSlantEqual"
+                ]
+            ]
+        },
+        {
+            "input": "&GreaterSlantEqual;",
+            "description": "Named entity: GreaterSlantEqual; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a7e"
+                ]
+            ]
+        },
+        {
+            "input": "&GreaterTilde",
+            "description": "Bad named entity: GreaterTilde without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&GreaterTilde"
+                ]
+            ]
+        },
+        {
+            "input": "&GreaterTilde;",
+            "description": "Named entity: GreaterTilde; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2273"
+                ]
+            ]
+        },
+        {
+            "input": "&Gscr",
+            "description": "Bad named entity: Gscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Gscr"
+                ]
+            ]
+        },
+        {
+            "input": "&Gscr;",
+            "description": "Named entity: Gscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udca2"
+                ]
+            ]
+        },
+        {
+            "input": "&Gt",
+            "description": "Bad named entity: Gt without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Gt"
+                ]
+            ]
+        },
+        {
+            "input": "&Gt;",
+            "description": "Named entity: Gt; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u226b"
+                ]
+            ]
+        },
+        {
+            "input": "&HARDcy",
+            "description": "Bad named entity: HARDcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&HARDcy"
+                ]
+            ]
+        },
+        {
+            "input": "&HARDcy;",
+            "description": "Named entity: HARDcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u042a"
+                ]
+            ]
+        },
+        {
+            "input": "&Hacek",
+            "description": "Bad named entity: Hacek without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Hacek"
+                ]
+            ]
+        },
+        {
+            "input": "&Hacek;",
+            "description": "Named entity: Hacek; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u02c7"
+                ]
+            ]
+        },
+        {
+            "input": "&Hat",
+            "description": "Bad named entity: Hat without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Hat"
+                ]
+            ]
+        },
+        {
+            "input": "&Hat;",
+            "description": "Named entity: Hat; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "^"
+                ]
+            ]
+        },
+        {
+            "input": "&Hcirc",
+            "description": "Bad named entity: Hcirc without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Hcirc"
+                ]
+            ]
+        },
+        {
+            "input": "&Hcirc;",
+            "description": "Named entity: Hcirc; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0124"
+                ]
+            ]
+        },
+        {
+            "input": "&Hfr",
+            "description": "Bad named entity: Hfr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Hfr"
+                ]
+            ]
+        },
+        {
+            "input": "&Hfr;",
+            "description": "Named entity: Hfr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u210c"
+                ]
+            ]
+        },
+        {
+            "input": "&HilbertSpace",
+            "description": "Bad named entity: HilbertSpace without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&HilbertSpace"
+                ]
+            ]
+        },
+        {
+            "input": "&HilbertSpace;",
+            "description": "Named entity: HilbertSpace; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u210b"
+                ]
+            ]
+        },
+        {
+            "input": "&Hopf",
+            "description": "Bad named entity: Hopf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Hopf"
+                ]
+            ]
+        },
+        {
+            "input": "&Hopf;",
+            "description": "Named entity: Hopf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u210d"
+                ]
+            ]
+        },
+        {
+            "input": "&HorizontalLine",
+            "description": "Bad named entity: HorizontalLine without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&HorizontalLine"
+                ]
+            ]
+        },
+        {
+            "input": "&HorizontalLine;",
+            "description": "Named entity: HorizontalLine; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2500"
+                ]
+            ]
+        },
+        {
+            "input": "&Hscr",
+            "description": "Bad named entity: Hscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Hscr"
+                ]
+            ]
+        },
+        {
+            "input": "&Hscr;",
+            "description": "Named entity: Hscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u210b"
+                ]
+            ]
+        },
+        {
+            "input": "&Hstrok",
+            "description": "Bad named entity: Hstrok without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Hstrok"
+                ]
+            ]
+        },
+        {
+            "input": "&Hstrok;",
+            "description": "Named entity: Hstrok; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0126"
+                ]
+            ]
+        },
+        {
+            "input": "&HumpDownHump",
+            "description": "Bad named entity: HumpDownHump without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&HumpDownHump"
+                ]
+            ]
+        },
+        {
+            "input": "&HumpDownHump;",
+            "description": "Named entity: HumpDownHump; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u224e"
+                ]
+            ]
+        },
+        {
+            "input": "&HumpEqual",
+            "description": "Bad named entity: HumpEqual without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&HumpEqual"
+                ]
+            ]
+        },
+        {
+            "input": "&HumpEqual;",
+            "description": "Named entity: HumpEqual; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u224f"
+                ]
+            ]
+        },
+        {
+            "input": "&IEcy",
+            "description": "Bad named entity: IEcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&IEcy"
+                ]
+            ]
+        },
+        {
+            "input": "&IEcy;",
+            "description": "Named entity: IEcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0415"
+                ]
+            ]
+        },
+        {
+            "input": "&IJlig",
+            "description": "Bad named entity: IJlig without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&IJlig"
+                ]
+            ]
+        },
+        {
+            "input": "&IJlig;",
+            "description": "Named entity: IJlig; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0132"
+                ]
+            ]
+        },
+        {
+            "input": "&IOcy",
+            "description": "Bad named entity: IOcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&IOcy"
+                ]
+            ]
+        },
+        {
+            "input": "&IOcy;",
+            "description": "Named entity: IOcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0401"
+                ]
+            ]
+        },
+        {
+            "input": "&Iacute",
+            "description": "Named entity: Iacute without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00cd"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 8 }
+            ]
+        },
+        {
+            "input": "&Iacute;",
+            "description": "Named entity: Iacute; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00cd"
+                ]
+            ]
+        },
+        {
+            "input": "&Icirc",
+            "description": "Named entity: Icirc without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00ce"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 7 }
+            ]
+        },
+        {
+            "input": "&Icirc;",
+            "description": "Named entity: Icirc; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00ce"
+                ]
+            ]
+        },
+        {
+            "input": "&Icy",
+            "description": "Bad named entity: Icy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Icy"
+                ]
+            ]
+        },
+        {
+            "input": "&Icy;",
+            "description": "Named entity: Icy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0418"
+                ]
+            ]
+        },
+        {
+            "input": "&Idot",
+            "description": "Bad named entity: Idot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Idot"
+                ]
+            ]
+        },
+        {
+            "input": "&Idot;",
+            "description": "Named entity: Idot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0130"
+                ]
+            ]
+        },
+        {
+            "input": "&Ifr",
+            "description": "Bad named entity: Ifr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Ifr"
+                ]
+            ]
+        },
+        {
+            "input": "&Ifr;",
+            "description": "Named entity: Ifr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2111"
+                ]
+            ]
+        },
+        {
+            "input": "&Igrave",
+            "description": "Named entity: Igrave without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00cc"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 8 }
+            ]
+        },
+        {
+            "input": "&Igrave;",
+            "description": "Named entity: Igrave; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00cc"
+                ]
+            ]
+        },
+        {
+            "input": "&Im",
+            "description": "Bad named entity: Im without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Im"
+                ]
+            ]
+        },
+        {
+            "input": "&Im;",
+            "description": "Named entity: Im; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2111"
+                ]
+            ]
+        },
+        {
+            "input": "&Imacr",
+            "description": "Bad named entity: Imacr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Imacr"
+                ]
+            ]
+        },
+        {
+            "input": "&Imacr;",
+            "description": "Named entity: Imacr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u012a"
+                ]
+            ]
+        },
+        {
+            "input": "&ImaginaryI",
+            "description": "Bad named entity: ImaginaryI without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ImaginaryI"
+                ]
+            ]
+        },
+        {
+            "input": "&ImaginaryI;",
+            "description": "Named entity: ImaginaryI; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2148"
+                ]
+            ]
+        },
+        {
+            "input": "&Implies",
+            "description": "Bad named entity: Implies without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Implies"
+                ]
+            ]
+        },
+        {
+            "input": "&Implies;",
+            "description": "Named entity: Implies; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21d2"
+                ]
+            ]
+        },
+        {
+            "input": "&Int",
+            "description": "Bad named entity: Int without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Int"
+                ]
+            ]
+        },
+        {
+            "input": "&Int;",
+            "description": "Named entity: Int; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u222c"
+                ]
+            ]
+        },
+        {
+            "input": "&Integral",
+            "description": "Bad named entity: Integral without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Integral"
+                ]
+            ]
+        },
+        {
+            "input": "&Integral;",
+            "description": "Named entity: Integral; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u222b"
+                ]
+            ]
+        },
+        {
+            "input": "&Intersection",
+            "description": "Bad named entity: Intersection without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Intersection"
+                ]
+            ]
+        },
+        {
+            "input": "&Intersection;",
+            "description": "Named entity: Intersection; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22c2"
+                ]
+            ]
+        },
+        {
+            "input": "&InvisibleComma",
+            "description": "Bad named entity: InvisibleComma without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&InvisibleComma"
+                ]
+            ]
+        },
+        {
+            "input": "&InvisibleComma;",
+            "description": "Named entity: InvisibleComma; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2063"
+                ]
+            ]
+        },
+        {
+            "input": "&InvisibleTimes",
+            "description": "Bad named entity: InvisibleTimes without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&InvisibleTimes"
+                ]
+            ]
+        },
+        {
+            "input": "&InvisibleTimes;",
+            "description": "Named entity: InvisibleTimes; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2062"
+                ]
+            ]
+        },
+        {
+            "input": "&Iogon",
+            "description": "Bad named entity: Iogon without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Iogon"
+                ]
+            ]
+        },
+        {
+            "input": "&Iogon;",
+            "description": "Named entity: Iogon; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u012e"
+                ]
+            ]
+        },
+        {
+            "input": "&Iopf",
+            "description": "Bad named entity: Iopf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Iopf"
+                ]
+            ]
+        },
+        {
+            "input": "&Iopf;",
+            "description": "Named entity: Iopf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd40"
+                ]
+            ]
+        },
+        {
+            "input": "&Iota",
+            "description": "Bad named entity: Iota without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Iota"
+                ]
+            ]
+        },
+        {
+            "input": "&Iota;",
+            "description": "Named entity: Iota; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0399"
+                ]
+            ]
+        },
+        {
+            "input": "&Iscr",
+            "description": "Bad named entity: Iscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Iscr"
+                ]
+            ]
+        },
+        {
+            "input": "&Iscr;",
+            "description": "Named entity: Iscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2110"
+                ]
+            ]
+        },
+        {
+            "input": "&Itilde",
+            "description": "Bad named entity: Itilde without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Itilde"
+                ]
+            ]
+        },
+        {
+            "input": "&Itilde;",
+            "description": "Named entity: Itilde; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0128"
+                ]
+            ]
+        },
+        {
+            "input": "&Iukcy",
+            "description": "Bad named entity: Iukcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Iukcy"
+                ]
+            ]
+        },
+        {
+            "input": "&Iukcy;",
+            "description": "Named entity: Iukcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0406"
+                ]
+            ]
+        },
+        {
+            "input": "&Iuml",
+            "description": "Named entity: Iuml without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00cf"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 6 }
+            ]
+        },
+        {
+            "input": "&Iuml;",
+            "description": "Named entity: Iuml; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00cf"
+                ]
+            ]
+        },
+        {
+            "input": "&Jcirc",
+            "description": "Bad named entity: Jcirc without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Jcirc"
+                ]
+            ]
+        },
+        {
+            "input": "&Jcirc;",
+            "description": "Named entity: Jcirc; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0134"
+                ]
+            ]
+        },
+        {
+            "input": "&Jcy",
+            "description": "Bad named entity: Jcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Jcy"
+                ]
+            ]
+        },
+        {
+            "input": "&Jcy;",
+            "description": "Named entity: Jcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0419"
+                ]
+            ]
+        },
+        {
+            "input": "&Jfr",
+            "description": "Bad named entity: Jfr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Jfr"
+                ]
+            ]
+        },
+        {
+            "input": "&Jfr;",
+            "description": "Named entity: Jfr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd0d"
+                ]
+            ]
+        },
+        {
+            "input": "&Jopf",
+            "description": "Bad named entity: Jopf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Jopf"
+                ]
+            ]
+        },
+        {
+            "input": "&Jopf;",
+            "description": "Named entity: Jopf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd41"
+                ]
+            ]
+        },
+        {
+            "input": "&Jscr",
+            "description": "Bad named entity: Jscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Jscr"
+                ]
+            ]
+        },
+        {
+            "input": "&Jscr;",
+            "description": "Named entity: Jscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udca5"
+                ]
+            ]
+        },
+        {
+            "input": "&Jsercy",
+            "description": "Bad named entity: Jsercy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Jsercy"
+                ]
+            ]
+        },
+        {
+            "input": "&Jsercy;",
+            "description": "Named entity: Jsercy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0408"
+                ]
+            ]
+        },
+        {
+            "input": "&Jukcy",
+            "description": "Bad named entity: Jukcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Jukcy"
+                ]
+            ]
+        },
+        {
+            "input": "&Jukcy;",
+            "description": "Named entity: Jukcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0404"
+                ]
+            ]
+        },
+        {
+            "input": "&KHcy",
+            "description": "Bad named entity: KHcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&KHcy"
+                ]
+            ]
+        },
+        {
+            "input": "&KHcy;",
+            "description": "Named entity: KHcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0425"
+                ]
+            ]
+        },
+        {
+            "input": "&KJcy",
+            "description": "Bad named entity: KJcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&KJcy"
+                ]
+            ]
+        },
+        {
+            "input": "&KJcy;",
+            "description": "Named entity: KJcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u040c"
+                ]
+            ]
+        },
+        {
+            "input": "&Kappa",
+            "description": "Bad named entity: Kappa without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Kappa"
+                ]
+            ]
+        },
+        {
+            "input": "&Kappa;",
+            "description": "Named entity: Kappa; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u039a"
+                ]
+            ]
+        },
+        {
+            "input": "&Kcedil",
+            "description": "Bad named entity: Kcedil without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Kcedil"
+                ]
+            ]
+        },
+        {
+            "input": "&Kcedil;",
+            "description": "Named entity: Kcedil; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0136"
+                ]
+            ]
+        },
+        {
+            "input": "&Kcy",
+            "description": "Bad named entity: Kcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Kcy"
+                ]
+            ]
+        },
+        {
+            "input": "&Kcy;",
+            "description": "Named entity: Kcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u041a"
+                ]
+            ]
+        },
+        {
+            "input": "&Kfr",
+            "description": "Bad named entity: Kfr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Kfr"
+                ]
+            ]
+        },
+        {
+            "input": "&Kfr;",
+            "description": "Named entity: Kfr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd0e"
+                ]
+            ]
+        },
+        {
+            "input": "&Kopf",
+            "description": "Bad named entity: Kopf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Kopf"
+                ]
+            ]
+        },
+        {
+            "input": "&Kopf;",
+            "description": "Named entity: Kopf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd42"
+                ]
+            ]
+        },
+        {
+            "input": "&Kscr",
+            "description": "Bad named entity: Kscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Kscr"
+                ]
+            ]
+        },
+        {
+            "input": "&Kscr;",
+            "description": "Named entity: Kscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udca6"
+                ]
+            ]
+        },
+        {
+            "input": "&LJcy",
+            "description": "Bad named entity: LJcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&LJcy"
+                ]
+            ]
+        },
+        {
+            "input": "&LJcy;",
+            "description": "Named entity: LJcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0409"
+                ]
+            ]
+        },
+        {
+            "input": "&LT",
+            "description": "Named entity: LT without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "<"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 4 }
+            ]
+        },
+        {
+            "input": "&LT;",
+            "description": "Named entity: LT; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "<"
+                ]
+            ]
+        },
+        {
+            "input": "&Lacute",
+            "description": "Bad named entity: Lacute without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Lacute"
+                ]
+            ]
+        },
+        {
+            "input": "&Lacute;",
+            "description": "Named entity: Lacute; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0139"
+                ]
+            ]
+        },
+        {
+            "input": "&Lambda",
+            "description": "Bad named entity: Lambda without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Lambda"
+                ]
+            ]
+        },
+        {
+            "input": "&Lambda;",
+            "description": "Named entity: Lambda; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u039b"
+                ]
+            ]
+        },
+        {
+            "input": "&Lang",
+            "description": "Bad named entity: Lang without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Lang"
+                ]
+            ]
+        },
+        {
+            "input": "&Lang;",
+            "description": "Named entity: Lang; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u27ea"
+                ]
+            ]
+        },
+        {
+            "input": "&Laplacetrf",
+            "description": "Bad named entity: Laplacetrf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Laplacetrf"
+                ]
+            ]
+        },
+        {
+            "input": "&Laplacetrf;",
+            "description": "Named entity: Laplacetrf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2112"
+                ]
+            ]
+        },
+        {
+            "input": "&Larr",
+            "description": "Bad named entity: Larr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Larr"
+                ]
+            ]
+        },
+        {
+            "input": "&Larr;",
+            "description": "Named entity: Larr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u219e"
+                ]
+            ]
+        },
+        {
+            "input": "&Lcaron",
+            "description": "Bad named entity: Lcaron without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Lcaron"
+                ]
+            ]
+        },
+        {
+            "input": "&Lcaron;",
+            "description": "Named entity: Lcaron; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u013d"
+                ]
+            ]
+        },
+        {
+            "input": "&Lcedil",
+            "description": "Bad named entity: Lcedil without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Lcedil"
+                ]
+            ]
+        },
+        {
+            "input": "&Lcedil;",
+            "description": "Named entity: Lcedil; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u013b"
+                ]
+            ]
+        },
+        {
+            "input": "&Lcy",
+            "description": "Bad named entity: Lcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Lcy"
+                ]
+            ]
+        },
+        {
+            "input": "&Lcy;",
+            "description": "Named entity: Lcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u041b"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftAngleBracket",
+            "description": "Bad named entity: LeftAngleBracket without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&LeftAngleBracket"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftAngleBracket;",
+            "description": "Named entity: LeftAngleBracket; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u27e8"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftArrow",
+            "description": "Bad named entity: LeftArrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&LeftArrow"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftArrow;",
+            "description": "Named entity: LeftArrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2190"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftArrowBar",
+            "description": "Bad named entity: LeftArrowBar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&LeftArrowBar"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftArrowBar;",
+            "description": "Named entity: LeftArrowBar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21e4"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftArrowRightArrow",
+            "description": "Bad named entity: LeftArrowRightArrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&LeftArrowRightArrow"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftArrowRightArrow;",
+            "description": "Named entity: LeftArrowRightArrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21c6"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftCeiling",
+            "description": "Bad named entity: LeftCeiling without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&LeftCeiling"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftCeiling;",
+            "description": "Named entity: LeftCeiling; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2308"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftDoubleBracket",
+            "description": "Bad named entity: LeftDoubleBracket without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&LeftDoubleBracket"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftDoubleBracket;",
+            "description": "Named entity: LeftDoubleBracket; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u27e6"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftDownTeeVector",
+            "description": "Bad named entity: LeftDownTeeVector without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&LeftDownTeeVector"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftDownTeeVector;",
+            "description": "Named entity: LeftDownTeeVector; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2961"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftDownVector",
+            "description": "Bad named entity: LeftDownVector without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&LeftDownVector"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftDownVector;",
+            "description": "Named entity: LeftDownVector; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21c3"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftDownVectorBar",
+            "description": "Bad named entity: LeftDownVectorBar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&LeftDownVectorBar"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftDownVectorBar;",
+            "description": "Named entity: LeftDownVectorBar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2959"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftFloor",
+            "description": "Bad named entity: LeftFloor without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&LeftFloor"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftFloor;",
+            "description": "Named entity: LeftFloor; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u230a"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftRightArrow",
+            "description": "Bad named entity: LeftRightArrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&LeftRightArrow"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftRightArrow;",
+            "description": "Named entity: LeftRightArrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2194"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftRightVector",
+            "description": "Bad named entity: LeftRightVector without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&LeftRightVector"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftRightVector;",
+            "description": "Named entity: LeftRightVector; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u294e"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftTee",
+            "description": "Bad named entity: LeftTee without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&LeftTee"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftTee;",
+            "description": "Named entity: LeftTee; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22a3"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftTeeArrow",
+            "description": "Bad named entity: LeftTeeArrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&LeftTeeArrow"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftTeeArrow;",
+            "description": "Named entity: LeftTeeArrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21a4"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftTeeVector",
+            "description": "Bad named entity: LeftTeeVector without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&LeftTeeVector"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftTeeVector;",
+            "description": "Named entity: LeftTeeVector; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u295a"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftTriangle",
+            "description": "Bad named entity: LeftTriangle without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&LeftTriangle"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftTriangle;",
+            "description": "Named entity: LeftTriangle; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22b2"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftTriangleBar",
+            "description": "Bad named entity: LeftTriangleBar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&LeftTriangleBar"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftTriangleBar;",
+            "description": "Named entity: LeftTriangleBar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29cf"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftTriangleEqual",
+            "description": "Bad named entity: LeftTriangleEqual without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&LeftTriangleEqual"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftTriangleEqual;",
+            "description": "Named entity: LeftTriangleEqual; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22b4"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftUpDownVector",
+            "description": "Bad named entity: LeftUpDownVector without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&LeftUpDownVector"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftUpDownVector;",
+            "description": "Named entity: LeftUpDownVector; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2951"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftUpTeeVector",
+            "description": "Bad named entity: LeftUpTeeVector without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&LeftUpTeeVector"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftUpTeeVector;",
+            "description": "Named entity: LeftUpTeeVector; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2960"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftUpVector",
+            "description": "Bad named entity: LeftUpVector without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&LeftUpVector"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftUpVector;",
+            "description": "Named entity: LeftUpVector; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21bf"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftUpVectorBar",
+            "description": "Bad named entity: LeftUpVectorBar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&LeftUpVectorBar"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftUpVectorBar;",
+            "description": "Named entity: LeftUpVectorBar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2958"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftVector",
+            "description": "Bad named entity: LeftVector without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&LeftVector"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftVector;",
+            "description": "Named entity: LeftVector; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21bc"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftVectorBar",
+            "description": "Bad named entity: LeftVectorBar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&LeftVectorBar"
+                ]
+            ]
+        },
+        {
+            "input": "&LeftVectorBar;",
+            "description": "Named entity: LeftVectorBar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2952"
+                ]
+            ]
+        },
+        {
+            "input": "&Leftarrow",
+            "description": "Bad named entity: Leftarrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Leftarrow"
+                ]
+            ]
+        },
+        {
+            "input": "&Leftarrow;",
+            "description": "Named entity: Leftarrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21d0"
+                ]
+            ]
+        },
+        {
+            "input": "&Leftrightarrow",
+            "description": "Bad named entity: Leftrightarrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Leftrightarrow"
+                ]
+            ]
+        },
+        {
+            "input": "&Leftrightarrow;",
+            "description": "Named entity: Leftrightarrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21d4"
+                ]
+            ]
+        },
+        {
+            "input": "&LessEqualGreater",
+            "description": "Bad named entity: LessEqualGreater without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&LessEqualGreater"
+                ]
+            ]
+        },
+        {
+            "input": "&LessEqualGreater;",
+            "description": "Named entity: LessEqualGreater; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22da"
+                ]
+            ]
+        },
+        {
+            "input": "&LessFullEqual",
+            "description": "Bad named entity: LessFullEqual without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&LessFullEqual"
+                ]
+            ]
+        },
+        {
+            "input": "&LessFullEqual;",
+            "description": "Named entity: LessFullEqual; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2266"
+                ]
+            ]
+        },
+        {
+            "input": "&LessGreater",
+            "description": "Bad named entity: LessGreater without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&LessGreater"
+                ]
+            ]
+        },
+        {
+            "input": "&LessGreater;",
+            "description": "Named entity: LessGreater; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2276"
+                ]
+            ]
+        },
+        {
+            "input": "&LessLess",
+            "description": "Bad named entity: LessLess without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&LessLess"
+                ]
+            ]
+        },
+        {
+            "input": "&LessLess;",
+            "description": "Named entity: LessLess; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2aa1"
+                ]
+            ]
+        },
+        {
+            "input": "&LessSlantEqual",
+            "description": "Bad named entity: LessSlantEqual without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&LessSlantEqual"
+                ]
+            ]
+        },
+        {
+            "input": "&LessSlantEqual;",
+            "description": "Named entity: LessSlantEqual; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a7d"
+                ]
+            ]
+        },
+        {
+            "input": "&LessTilde",
+            "description": "Bad named entity: LessTilde without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&LessTilde"
+                ]
+            ]
+        },
+        {
+            "input": "&LessTilde;",
+            "description": "Named entity: LessTilde; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2272"
+                ]
+            ]
+        },
+        {
+            "input": "&Lfr",
+            "description": "Bad named entity: Lfr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Lfr"
+                ]
+            ]
+        },
+        {
+            "input": "&Lfr;",
+            "description": "Named entity: Lfr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd0f"
+                ]
+            ]
+        },
+        {
+            "input": "&Ll",
+            "description": "Bad named entity: Ll without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Ll"
+                ]
+            ]
+        },
+        {
+            "input": "&Ll;",
+            "description": "Named entity: Ll; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22d8"
+                ]
+            ]
+        },
+        {
+            "input": "&Lleftarrow",
+            "description": "Bad named entity: Lleftarrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Lleftarrow"
+                ]
+            ]
+        },
+        {
+            "input": "&Lleftarrow;",
+            "description": "Named entity: Lleftarrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21da"
+                ]
+            ]
+        },
+        {
+            "input": "&Lmidot",
+            "description": "Bad named entity: Lmidot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Lmidot"
+                ]
+            ]
+        },
+        {
+            "input": "&Lmidot;",
+            "description": "Named entity: Lmidot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u013f"
+                ]
+            ]
+        },
+        {
+            "input": "&LongLeftArrow",
+            "description": "Bad named entity: LongLeftArrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&LongLeftArrow"
+                ]
+            ]
+        },
+        {
+            "input": "&LongLeftArrow;",
+            "description": "Named entity: LongLeftArrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u27f5"
+                ]
+            ]
+        },
+        {
+            "input": "&LongLeftRightArrow",
+            "description": "Bad named entity: LongLeftRightArrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&LongLeftRightArrow"
+                ]
+            ]
+        },
+        {
+            "input": "&LongLeftRightArrow;",
+            "description": "Named entity: LongLeftRightArrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u27f7"
+                ]
+            ]
+        },
+        {
+            "input": "&LongRightArrow",
+            "description": "Bad named entity: LongRightArrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&LongRightArrow"
+                ]
+            ]
+        },
+        {
+            "input": "&LongRightArrow;",
+            "description": "Named entity: LongRightArrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u27f6"
+                ]
+            ]
+        },
+        {
+            "input": "&Longleftarrow",
+            "description": "Bad named entity: Longleftarrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Longleftarrow"
+                ]
+            ]
+        },
+        {
+            "input": "&Longleftarrow;",
+            "description": "Named entity: Longleftarrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u27f8"
+                ]
+            ]
+        },
+        {
+            "input": "&Longleftrightarrow",
+            "description": "Bad named entity: Longleftrightarrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Longleftrightarrow"
+                ]
+            ]
+        },
+        {
+            "input": "&Longleftrightarrow;",
+            "description": "Named entity: Longleftrightarrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u27fa"
+                ]
+            ]
+        },
+        {
+            "input": "&Longrightarrow",
+            "description": "Bad named entity: Longrightarrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Longrightarrow"
+                ]
+            ]
+        },
+        {
+            "input": "&Longrightarrow;",
+            "description": "Named entity: Longrightarrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u27f9"
+                ]
+            ]
+        },
+        {
+            "input": "&Lopf",
+            "description": "Bad named entity: Lopf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Lopf"
+                ]
+            ]
+        },
+        {
+            "input": "&Lopf;",
+            "description": "Named entity: Lopf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd43"
+                ]
+            ]
+        },
+        {
+            "input": "&LowerLeftArrow",
+            "description": "Bad named entity: LowerLeftArrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&LowerLeftArrow"
+                ]
+            ]
+        },
+        {
+            "input": "&LowerLeftArrow;",
+            "description": "Named entity: LowerLeftArrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2199"
+                ]
+            ]
+        },
+        {
+            "input": "&LowerRightArrow",
+            "description": "Bad named entity: LowerRightArrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&LowerRightArrow"
+                ]
+            ]
+        },
+        {
+            "input": "&LowerRightArrow;",
+            "description": "Named entity: LowerRightArrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2198"
+                ]
+            ]
+        },
+        {
+            "input": "&Lscr",
+            "description": "Bad named entity: Lscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Lscr"
+                ]
+            ]
+        },
+        {
+            "input": "&Lscr;",
+            "description": "Named entity: Lscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2112"
+                ]
+            ]
+        },
+        {
+            "input": "&Lsh",
+            "description": "Bad named entity: Lsh without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Lsh"
+                ]
+            ]
+        },
+        {
+            "input": "&Lsh;",
+            "description": "Named entity: Lsh; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21b0"
+                ]
+            ]
+        },
+        {
+            "input": "&Lstrok",
+            "description": "Bad named entity: Lstrok without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Lstrok"
+                ]
+            ]
+        },
+        {
+            "input": "&Lstrok;",
+            "description": "Named entity: Lstrok; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0141"
+                ]
+            ]
+        },
+        {
+            "input": "&Lt",
+            "description": "Bad named entity: Lt without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Lt"
+                ]
+            ]
+        },
+        {
+            "input": "&Lt;",
+            "description": "Named entity: Lt; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u226a"
+                ]
+            ]
+        },
+        {
+            "input": "&Map",
+            "description": "Bad named entity: Map without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Map"
+                ]
+            ]
+        },
+        {
+            "input": "&Map;",
+            "description": "Named entity: Map; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2905"
+                ]
+            ]
+        },
+        {
+            "input": "&Mcy",
+            "description": "Bad named entity: Mcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Mcy"
+                ]
+            ]
+        },
+        {
+            "input": "&Mcy;",
+            "description": "Named entity: Mcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u041c"
+                ]
+            ]
+        },
+        {
+            "input": "&MediumSpace",
+            "description": "Bad named entity: MediumSpace without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&MediumSpace"
+                ]
+            ]
+        },
+        {
+            "input": "&MediumSpace;",
+            "description": "Named entity: MediumSpace; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u205f"
+                ]
+            ]
+        },
+        {
+            "input": "&Mellintrf",
+            "description": "Bad named entity: Mellintrf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Mellintrf"
+                ]
+            ]
+        },
+        {
+            "input": "&Mellintrf;",
+            "description": "Named entity: Mellintrf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2133"
+                ]
+            ]
+        },
+        {
+            "input": "&Mfr",
+            "description": "Bad named entity: Mfr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Mfr"
+                ]
+            ]
+        },
+        {
+            "input": "&Mfr;",
+            "description": "Named entity: Mfr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd10"
+                ]
+            ]
+        },
+        {
+            "input": "&MinusPlus",
+            "description": "Bad named entity: MinusPlus without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&MinusPlus"
+                ]
+            ]
+        },
+        {
+            "input": "&MinusPlus;",
+            "description": "Named entity: MinusPlus; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2213"
+                ]
+            ]
+        },
+        {
+            "input": "&Mopf",
+            "description": "Bad named entity: Mopf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Mopf"
+                ]
+            ]
+        },
+        {
+            "input": "&Mopf;",
+            "description": "Named entity: Mopf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd44"
+                ]
+            ]
+        },
+        {
+            "input": "&Mscr",
+            "description": "Bad named entity: Mscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Mscr"
+                ]
+            ]
+        },
+        {
+            "input": "&Mscr;",
+            "description": "Named entity: Mscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2133"
+                ]
+            ]
+        },
+        {
+            "input": "&Mu",
+            "description": "Bad named entity: Mu without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Mu"
+                ]
+            ]
+        },
+        {
+            "input": "&Mu;",
+            "description": "Named entity: Mu; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u039c"
+                ]
+            ]
+        },
+        {
+            "input": "&NJcy",
+            "description": "Bad named entity: NJcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NJcy"
+                ]
+            ]
+        },
+        {
+            "input": "&NJcy;",
+            "description": "Named entity: NJcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u040a"
+                ]
+            ]
+        },
+        {
+            "input": "&Nacute",
+            "description": "Bad named entity: Nacute without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Nacute"
+                ]
+            ]
+        },
+        {
+            "input": "&Nacute;",
+            "description": "Named entity: Nacute; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0143"
+                ]
+            ]
+        },
+        {
+            "input": "&Ncaron",
+            "description": "Bad named entity: Ncaron without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Ncaron"
+                ]
+            ]
+        },
+        {
+            "input": "&Ncaron;",
+            "description": "Named entity: Ncaron; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0147"
+                ]
+            ]
+        },
+        {
+            "input": "&Ncedil",
+            "description": "Bad named entity: Ncedil without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Ncedil"
+                ]
+            ]
+        },
+        {
+            "input": "&Ncedil;",
+            "description": "Named entity: Ncedil; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0145"
+                ]
+            ]
+        },
+        {
+            "input": "&Ncy",
+            "description": "Bad named entity: Ncy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Ncy"
+                ]
+            ]
+        },
+        {
+            "input": "&Ncy;",
+            "description": "Named entity: Ncy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u041d"
+                ]
+            ]
+        },
+        {
+            "input": "&NegativeMediumSpace",
+            "description": "Bad named entity: NegativeMediumSpace without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NegativeMediumSpace"
+                ]
+            ]
+        },
+        {
+            "input": "&NegativeMediumSpace;",
+            "description": "Named entity: NegativeMediumSpace; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u200b"
+                ]
+            ]
+        },
+        {
+            "input": "&NegativeThickSpace",
+            "description": "Bad named entity: NegativeThickSpace without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NegativeThickSpace"
+                ]
+            ]
+        },
+        {
+            "input": "&NegativeThickSpace;",
+            "description": "Named entity: NegativeThickSpace; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u200b"
+                ]
+            ]
+        },
+        {
+            "input": "&NegativeThinSpace",
+            "description": "Bad named entity: NegativeThinSpace without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NegativeThinSpace"
+                ]
+            ]
+        },
+        {
+            "input": "&NegativeThinSpace;",
+            "description": "Named entity: NegativeThinSpace; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u200b"
+                ]
+            ]
+        },
+        {
+            "input": "&NegativeVeryThinSpace",
+            "description": "Bad named entity: NegativeVeryThinSpace without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NegativeVeryThinSpace"
+                ]
+            ]
+        },
+        {
+            "input": "&NegativeVeryThinSpace;",
+            "description": "Named entity: NegativeVeryThinSpace; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u200b"
+                ]
+            ]
+        },
+        {
+            "input": "&NestedGreaterGreater",
+            "description": "Bad named entity: NestedGreaterGreater without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NestedGreaterGreater"
+                ]
+            ]
+        },
+        {
+            "input": "&NestedGreaterGreater;",
+            "description": "Named entity: NestedGreaterGreater; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u226b"
+                ]
+            ]
+        },
+        {
+            "input": "&NestedLessLess",
+            "description": "Bad named entity: NestedLessLess without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NestedLessLess"
+                ]
+            ]
+        },
+        {
+            "input": "&NestedLessLess;",
+            "description": "Named entity: NestedLessLess; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u226a"
+                ]
+            ]
+        },
+        {
+            "input": "&NewLine",
+            "description": "Bad named entity: NewLine without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NewLine"
+                ]
+            ]
+        },
+        {
+            "input": "&NewLine;",
+            "description": "Named entity: NewLine; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\n"
+                ]
+            ]
+        },
+        {
+            "input": "&Nfr",
+            "description": "Bad named entity: Nfr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Nfr"
+                ]
+            ]
+        },
+        {
+            "input": "&Nfr;",
+            "description": "Named entity: Nfr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd11"
+                ]
+            ]
+        },
+        {
+            "input": "&NoBreak",
+            "description": "Bad named entity: NoBreak without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NoBreak"
+                ]
+            ]
+        },
+        {
+            "input": "&NoBreak;",
+            "description": "Named entity: NoBreak; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2060"
+                ]
+            ]
+        },
+        {
+            "input": "&NonBreakingSpace",
+            "description": "Bad named entity: NonBreakingSpace without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NonBreakingSpace"
+                ]
+            ]
+        },
+        {
+            "input": "&NonBreakingSpace;",
+            "description": "Named entity: NonBreakingSpace; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00a0"
+                ]
+            ]
+        },
+        {
+            "input": "&Nopf",
+            "description": "Bad named entity: Nopf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Nopf"
+                ]
+            ]
+        },
+        {
+            "input": "&Nopf;",
+            "description": "Named entity: Nopf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2115"
+                ]
+            ]
+        },
+        {
+            "input": "&Not",
+            "description": "Bad named entity: Not without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Not"
+                ]
+            ]
+        },
+        {
+            "input": "&Not;",
+            "description": "Named entity: Not; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2aec"
+                ]
+            ]
+        },
+        {
+            "input": "&NotCongruent",
+            "description": "Bad named entity: NotCongruent without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotCongruent"
+                ]
+            ]
+        },
+        {
+            "input": "&NotCongruent;",
+            "description": "Named entity: NotCongruent; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2262"
+                ]
+            ]
+        },
+        {
+            "input": "&NotCupCap",
+            "description": "Bad named entity: NotCupCap without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotCupCap"
+                ]
+            ]
+        },
+        {
+            "input": "&NotCupCap;",
+            "description": "Named entity: NotCupCap; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u226d"
+                ]
+            ]
+        },
+        {
+            "input": "&NotDoubleVerticalBar",
+            "description": "Bad named entity: NotDoubleVerticalBar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotDoubleVerticalBar"
+                ]
+            ]
+        },
+        {
+            "input": "&NotDoubleVerticalBar;",
+            "description": "Named entity: NotDoubleVerticalBar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2226"
+                ]
+            ]
+        },
+        {
+            "input": "&NotElement",
+            "description": "Bad named entity: NotElement without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotElement"
+                ]
+            ]
+        },
+        {
+            "input": "&NotElement;",
+            "description": "Named entity: NotElement; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2209"
+                ]
+            ]
+        },
+        {
+            "input": "&NotEqual",
+            "description": "Bad named entity: NotEqual without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotEqual"
+                ]
+            ]
+        },
+        {
+            "input": "&NotEqual;",
+            "description": "Named entity: NotEqual; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2260"
+                ]
+            ]
+        },
+        {
+            "input": "&NotEqualTilde",
+            "description": "Bad named entity: NotEqualTilde without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotEqualTilde"
+                ]
+            ]
+        },
+        {
+            "input": "&NotEqualTilde;",
+            "description": "Named entity: NotEqualTilde; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2242\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&NotExists",
+            "description": "Bad named entity: NotExists without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotExists"
+                ]
+            ]
+        },
+        {
+            "input": "&NotExists;",
+            "description": "Named entity: NotExists; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2204"
+                ]
+            ]
+        },
+        {
+            "input": "&NotGreater",
+            "description": "Bad named entity: NotGreater without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotGreater"
+                ]
+            ]
+        },
+        {
+            "input": "&NotGreater;",
+            "description": "Named entity: NotGreater; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u226f"
+                ]
+            ]
+        },
+        {
+            "input": "&NotGreaterEqual",
+            "description": "Bad named entity: NotGreaterEqual without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotGreaterEqual"
+                ]
+            ]
+        },
+        {
+            "input": "&NotGreaterEqual;",
+            "description": "Named entity: NotGreaterEqual; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2271"
+                ]
+            ]
+        },
+        {
+            "input": "&NotGreaterFullEqual",
+            "description": "Bad named entity: NotGreaterFullEqual without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotGreaterFullEqual"
+                ]
+            ]
+        },
+        {
+            "input": "&NotGreaterFullEqual;",
+            "description": "Named entity: NotGreaterFullEqual; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2267\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&NotGreaterGreater",
+            "description": "Bad named entity: NotGreaterGreater without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotGreaterGreater"
+                ]
+            ]
+        },
+        {
+            "input": "&NotGreaterGreater;",
+            "description": "Named entity: NotGreaterGreater; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u226b\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&NotGreaterLess",
+            "description": "Bad named entity: NotGreaterLess without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotGreaterLess"
+                ]
+            ]
+        },
+        {
+            "input": "&NotGreaterLess;",
+            "description": "Named entity: NotGreaterLess; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2279"
+                ]
+            ]
+        },
+        {
+            "input": "&NotGreaterSlantEqual",
+            "description": "Bad named entity: NotGreaterSlantEqual without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotGreaterSlantEqual"
+                ]
+            ]
+        },
+        {
+            "input": "&NotGreaterSlantEqual;",
+            "description": "Named entity: NotGreaterSlantEqual; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a7e\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&NotGreaterTilde",
+            "description": "Bad named entity: NotGreaterTilde without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotGreaterTilde"
+                ]
+            ]
+        },
+        {
+            "input": "&NotGreaterTilde;",
+            "description": "Named entity: NotGreaterTilde; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2275"
+                ]
+            ]
+        },
+        {
+            "input": "&NotHumpDownHump",
+            "description": "Bad named entity: NotHumpDownHump without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotHumpDownHump"
+                ]
+            ]
+        },
+        {
+            "input": "&NotHumpDownHump;",
+            "description": "Named entity: NotHumpDownHump; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u224e\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&NotHumpEqual",
+            "description": "Bad named entity: NotHumpEqual without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotHumpEqual"
+                ]
+            ]
+        },
+        {
+            "input": "&NotHumpEqual;",
+            "description": "Named entity: NotHumpEqual; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u224f\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&NotLeftTriangle",
+            "description": "Bad named entity: NotLeftTriangle without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotLeftTriangle"
+                ]
+            ]
+        },
+        {
+            "input": "&NotLeftTriangle;",
+            "description": "Named entity: NotLeftTriangle; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22ea"
+                ]
+            ]
+        },
+        {
+            "input": "&NotLeftTriangleBar",
+            "description": "Bad named entity: NotLeftTriangleBar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotLeftTriangleBar"
+                ]
+            ]
+        },
+        {
+            "input": "&NotLeftTriangleBar;",
+            "description": "Named entity: NotLeftTriangleBar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29cf\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&NotLeftTriangleEqual",
+            "description": "Bad named entity: NotLeftTriangleEqual without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotLeftTriangleEqual"
+                ]
+            ]
+        },
+        {
+            "input": "&NotLeftTriangleEqual;",
+            "description": "Named entity: NotLeftTriangleEqual; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22ec"
+                ]
+            ]
+        },
+        {
+            "input": "&NotLess",
+            "description": "Bad named entity: NotLess without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotLess"
+                ]
+            ]
+        },
+        {
+            "input": "&NotLess;",
+            "description": "Named entity: NotLess; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u226e"
+                ]
+            ]
+        },
+        {
+            "input": "&NotLessEqual",
+            "description": "Bad named entity: NotLessEqual without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotLessEqual"
+                ]
+            ]
+        },
+        {
+            "input": "&NotLessEqual;",
+            "description": "Named entity: NotLessEqual; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2270"
+                ]
+            ]
+        },
+        {
+            "input": "&NotLessGreater",
+            "description": "Bad named entity: NotLessGreater without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotLessGreater"
+                ]
+            ]
+        },
+        {
+            "input": "&NotLessGreater;",
+            "description": "Named entity: NotLessGreater; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2278"
+                ]
+            ]
+        },
+        {
+            "input": "&NotLessLess",
+            "description": "Bad named entity: NotLessLess without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotLessLess"
+                ]
+            ]
+        },
+        {
+            "input": "&NotLessLess;",
+            "description": "Named entity: NotLessLess; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u226a\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&NotLessSlantEqual",
+            "description": "Bad named entity: NotLessSlantEqual without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotLessSlantEqual"
+                ]
+            ]
+        },
+        {
+            "input": "&NotLessSlantEqual;",
+            "description": "Named entity: NotLessSlantEqual; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a7d\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&NotLessTilde",
+            "description": "Bad named entity: NotLessTilde without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotLessTilde"
+                ]
+            ]
+        },
+        {
+            "input": "&NotLessTilde;",
+            "description": "Named entity: NotLessTilde; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2274"
+                ]
+            ]
+        },
+        {
+            "input": "&NotNestedGreaterGreater",
+            "description": "Bad named entity: NotNestedGreaterGreater without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotNestedGreaterGreater"
+                ]
+            ]
+        },
+        {
+            "input": "&NotNestedGreaterGreater;",
+            "description": "Named entity: NotNestedGreaterGreater; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2aa2\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&NotNestedLessLess",
+            "description": "Bad named entity: NotNestedLessLess without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotNestedLessLess"
+                ]
+            ]
+        },
+        {
+            "input": "&NotNestedLessLess;",
+            "description": "Named entity: NotNestedLessLess; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2aa1\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&NotPrecedes",
+            "description": "Bad named entity: NotPrecedes without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotPrecedes"
+                ]
+            ]
+        },
+        {
+            "input": "&NotPrecedes;",
+            "description": "Named entity: NotPrecedes; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2280"
+                ]
+            ]
+        },
+        {
+            "input": "&NotPrecedesEqual",
+            "description": "Bad named entity: NotPrecedesEqual without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotPrecedesEqual"
+                ]
+            ]
+        },
+        {
+            "input": "&NotPrecedesEqual;",
+            "description": "Named entity: NotPrecedesEqual; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2aaf\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&NotPrecedesSlantEqual",
+            "description": "Bad named entity: NotPrecedesSlantEqual without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotPrecedesSlantEqual"
+                ]
+            ]
+        },
+        {
+            "input": "&NotPrecedesSlantEqual;",
+            "description": "Named entity: NotPrecedesSlantEqual; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22e0"
+                ]
+            ]
+        },
+        {
+            "input": "&NotReverseElement",
+            "description": "Bad named entity: NotReverseElement without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotReverseElement"
+                ]
+            ]
+        },
+        {
+            "input": "&NotReverseElement;",
+            "description": "Named entity: NotReverseElement; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u220c"
+                ]
+            ]
+        },
+        {
+            "input": "&NotRightTriangle",
+            "description": "Bad named entity: NotRightTriangle without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotRightTriangle"
+                ]
+            ]
+        },
+        {
+            "input": "&NotRightTriangle;",
+            "description": "Named entity: NotRightTriangle; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22eb"
+                ]
+            ]
+        },
+        {
+            "input": "&NotRightTriangleBar",
+            "description": "Bad named entity: NotRightTriangleBar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotRightTriangleBar"
+                ]
+            ]
+        },
+        {
+            "input": "&NotRightTriangleBar;",
+            "description": "Named entity: NotRightTriangleBar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29d0\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&NotRightTriangleEqual",
+            "description": "Bad named entity: NotRightTriangleEqual without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotRightTriangleEqual"
+                ]
+            ]
+        },
+        {
+            "input": "&NotRightTriangleEqual;",
+            "description": "Named entity: NotRightTriangleEqual; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22ed"
+                ]
+            ]
+        },
+        {
+            "input": "&NotSquareSubset",
+            "description": "Bad named entity: NotSquareSubset without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotSquareSubset"
+                ]
+            ]
+        },
+        {
+            "input": "&NotSquareSubset;",
+            "description": "Named entity: NotSquareSubset; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u228f\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&NotSquareSubsetEqual",
+            "description": "Bad named entity: NotSquareSubsetEqual without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotSquareSubsetEqual"
+                ]
+            ]
+        },
+        {
+            "input": "&NotSquareSubsetEqual;",
+            "description": "Named entity: NotSquareSubsetEqual; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22e2"
+                ]
+            ]
+        },
+        {
+            "input": "&NotSquareSuperset",
+            "description": "Bad named entity: NotSquareSuperset without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotSquareSuperset"
+                ]
+            ]
+        },
+        {
+            "input": "&NotSquareSuperset;",
+            "description": "Named entity: NotSquareSuperset; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2290\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&NotSquareSupersetEqual",
+            "description": "Bad named entity: NotSquareSupersetEqual without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotSquareSupersetEqual"
+                ]
+            ]
+        },
+        {
+            "input": "&NotSquareSupersetEqual;",
+            "description": "Named entity: NotSquareSupersetEqual; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22e3"
+                ]
+            ]
+        },
+        {
+            "input": "&NotSubset",
+            "description": "Bad named entity: NotSubset without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotSubset"
+                ]
+            ]
+        },
+        {
+            "input": "&NotSubset;",
+            "description": "Named entity: NotSubset; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2282\u20d2"
+                ]
+            ]
+        },
+        {
+            "input": "&NotSubsetEqual",
+            "description": "Bad named entity: NotSubsetEqual without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotSubsetEqual"
+                ]
+            ]
+        },
+        {
+            "input": "&NotSubsetEqual;",
+            "description": "Named entity: NotSubsetEqual; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2288"
+                ]
+            ]
+        },
+        {
+            "input": "&NotSucceeds",
+            "description": "Bad named entity: NotSucceeds without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotSucceeds"
+                ]
+            ]
+        },
+        {
+            "input": "&NotSucceeds;",
+            "description": "Named entity: NotSucceeds; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2281"
+                ]
+            ]
+        },
+        {
+            "input": "&NotSucceedsEqual",
+            "description": "Bad named entity: NotSucceedsEqual without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotSucceedsEqual"
+                ]
+            ]
+        },
+        {
+            "input": "&NotSucceedsEqual;",
+            "description": "Named entity: NotSucceedsEqual; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ab0\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&NotSucceedsSlantEqual",
+            "description": "Bad named entity: NotSucceedsSlantEqual without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotSucceedsSlantEqual"
+                ]
+            ]
+        },
+        {
+            "input": "&NotSucceedsSlantEqual;",
+            "description": "Named entity: NotSucceedsSlantEqual; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22e1"
+                ]
+            ]
+        },
+        {
+            "input": "&NotSucceedsTilde",
+            "description": "Bad named entity: NotSucceedsTilde without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotSucceedsTilde"
+                ]
+            ]
+        },
+        {
+            "input": "&NotSucceedsTilde;",
+            "description": "Named entity: NotSucceedsTilde; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u227f\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&NotSuperset",
+            "description": "Bad named entity: NotSuperset without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotSuperset"
+                ]
+            ]
+        },
+        {
+            "input": "&NotSuperset;",
+            "description": "Named entity: NotSuperset; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2283\u20d2"
+                ]
+            ]
+        },
+        {
+            "input": "&NotSupersetEqual",
+            "description": "Bad named entity: NotSupersetEqual without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotSupersetEqual"
+                ]
+            ]
+        },
+        {
+            "input": "&NotSupersetEqual;",
+            "description": "Named entity: NotSupersetEqual; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2289"
+                ]
+            ]
+        },
+        {
+            "input": "&NotTilde",
+            "description": "Bad named entity: NotTilde without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotTilde"
+                ]
+            ]
+        },
+        {
+            "input": "&NotTilde;",
+            "description": "Named entity: NotTilde; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2241"
+                ]
+            ]
+        },
+        {
+            "input": "&NotTildeEqual",
+            "description": "Bad named entity: NotTildeEqual without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotTildeEqual"
+                ]
+            ]
+        },
+        {
+            "input": "&NotTildeEqual;",
+            "description": "Named entity: NotTildeEqual; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2244"
+                ]
+            ]
+        },
+        {
+            "input": "&NotTildeFullEqual",
+            "description": "Bad named entity: NotTildeFullEqual without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotTildeFullEqual"
+                ]
+            ]
+        },
+        {
+            "input": "&NotTildeFullEqual;",
+            "description": "Named entity: NotTildeFullEqual; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2247"
+                ]
+            ]
+        },
+        {
+            "input": "&NotTildeTilde",
+            "description": "Bad named entity: NotTildeTilde without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotTildeTilde"
+                ]
+            ]
+        },
+        {
+            "input": "&NotTildeTilde;",
+            "description": "Named entity: NotTildeTilde; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2249"
+                ]
+            ]
+        },
+        {
+            "input": "&NotVerticalBar",
+            "description": "Bad named entity: NotVerticalBar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&NotVerticalBar"
+                ]
+            ]
+        },
+        {
+            "input": "&NotVerticalBar;",
+            "description": "Named entity: NotVerticalBar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2224"
+                ]
+            ]
+        },
+        {
+            "input": "&Nscr",
+            "description": "Bad named entity: Nscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Nscr"
+                ]
+            ]
+        },
+        {
+            "input": "&Nscr;",
+            "description": "Named entity: Nscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udca9"
+                ]
+            ]
+        },
+        {
+            "input": "&Ntilde",
+            "description": "Named entity: Ntilde without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00d1"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 8 }
+            ]
+        },
+        {
+            "input": "&Ntilde;",
+            "description": "Named entity: Ntilde; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00d1"
+                ]
+            ]
+        },
+        {
+            "input": "&Nu",
+            "description": "Bad named entity: Nu without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Nu"
+                ]
+            ]
+        },
+        {
+            "input": "&Nu;",
+            "description": "Named entity: Nu; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u039d"
+                ]
+            ]
+        },
+        {
+            "input": "&OElig",
+            "description": "Bad named entity: OElig without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&OElig"
+                ]
+            ]
+        },
+        {
+            "input": "&OElig;",
+            "description": "Named entity: OElig; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0152"
+                ]
+            ]
+        },
+        {
+            "input": "&Oacute",
+            "description": "Named entity: Oacute without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00d3"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 8 }
+            ]
+        },
+        {
+            "input": "&Oacute;",
+            "description": "Named entity: Oacute; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00d3"
+                ]
+            ]
+        },
+        {
+            "input": "&Ocirc",
+            "description": "Named entity: Ocirc without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00d4"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 7 }
+            ]
+        },
+        {
+            "input": "&Ocirc;",
+            "description": "Named entity: Ocirc; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00d4"
+                ]
+            ]
+        },
+        {
+            "input": "&Ocy",
+            "description": "Bad named entity: Ocy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Ocy"
+                ]
+            ]
+        },
+        {
+            "input": "&Ocy;",
+            "description": "Named entity: Ocy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u041e"
+                ]
+            ]
+        },
+        {
+            "input": "&Odblac",
+            "description": "Bad named entity: Odblac without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Odblac"
+                ]
+            ]
+        },
+        {
+            "input": "&Odblac;",
+            "description": "Named entity: Odblac; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0150"
+                ]
+            ]
+        },
+        {
+            "input": "&Ofr",
+            "description": "Bad named entity: Ofr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Ofr"
+                ]
+            ]
+        },
+        {
+            "input": "&Ofr;",
+            "description": "Named entity: Ofr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd12"
+                ]
+            ]
+        },
+        {
+            "input": "&Ograve",
+            "description": "Named entity: Ograve without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00d2"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 8 }
+            ]
+        },
+        {
+            "input": "&Ograve;",
+            "description": "Named entity: Ograve; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00d2"
+                ]
+            ]
+        },
+        {
+            "input": "&Omacr",
+            "description": "Bad named entity: Omacr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Omacr"
+                ]
+            ]
+        },
+        {
+            "input": "&Omacr;",
+            "description": "Named entity: Omacr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u014c"
+                ]
+            ]
+        },
+        {
+            "input": "&Omega",
+            "description": "Bad named entity: Omega without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Omega"
+                ]
+            ]
+        },
+        {
+            "input": "&Omega;",
+            "description": "Named entity: Omega; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03a9"
+                ]
+            ]
+        },
+        {
+            "input": "&Omicron",
+            "description": "Bad named entity: Omicron without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Omicron"
+                ]
+            ]
+        },
+        {
+            "input": "&Omicron;",
+            "description": "Named entity: Omicron; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u039f"
+                ]
+            ]
+        },
+        {
+            "input": "&Oopf",
+            "description": "Bad named entity: Oopf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Oopf"
+                ]
+            ]
+        },
+        {
+            "input": "&Oopf;",
+            "description": "Named entity: Oopf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd46"
+                ]
+            ]
+        },
+        {
+            "input": "&OpenCurlyDoubleQuote",
+            "description": "Bad named entity: OpenCurlyDoubleQuote without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&OpenCurlyDoubleQuote"
+                ]
+            ]
+        },
+        {
+            "input": "&OpenCurlyDoubleQuote;",
+            "description": "Named entity: OpenCurlyDoubleQuote; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u201c"
+                ]
+            ]
+        },
+        {
+            "input": "&OpenCurlyQuote",
+            "description": "Bad named entity: OpenCurlyQuote without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&OpenCurlyQuote"
+                ]
+            ]
+        },
+        {
+            "input": "&OpenCurlyQuote;",
+            "description": "Named entity: OpenCurlyQuote; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2018"
+                ]
+            ]
+        },
+        {
+            "input": "&Or",
+            "description": "Bad named entity: Or without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Or"
+                ]
+            ]
+        },
+        {
+            "input": "&Or;",
+            "description": "Named entity: Or; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a54"
+                ]
+            ]
+        },
+        {
+            "input": "&Oscr",
+            "description": "Bad named entity: Oscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Oscr"
+                ]
+            ]
+        },
+        {
+            "input": "&Oscr;",
+            "description": "Named entity: Oscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udcaa"
+                ]
+            ]
+        },
+        {
+            "input": "&Oslash",
+            "description": "Named entity: Oslash without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00d8"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 8 }
+            ]
+        },
+        {
+            "input": "&Oslash;",
+            "description": "Named entity: Oslash; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00d8"
+                ]
+            ]
+        },
+        {
+            "input": "&Otilde",
+            "description": "Named entity: Otilde without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00d5"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 8 }
+            ]
+        },
+        {
+            "input": "&Otilde;",
+            "description": "Named entity: Otilde; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00d5"
+                ]
+            ]
+        },
+        {
+            "input": "&Otimes",
+            "description": "Bad named entity: Otimes without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Otimes"
+                ]
+            ]
+        },
+        {
+            "input": "&Otimes;",
+            "description": "Named entity: Otimes; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a37"
+                ]
+            ]
+        },
+        {
+            "input": "&Ouml",
+            "description": "Named entity: Ouml without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00d6"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 6 }
+            ]
+        },
+        {
+            "input": "&Ouml;",
+            "description": "Named entity: Ouml; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00d6"
+                ]
+            ]
+        },
+        {
+            "input": "&OverBar",
+            "description": "Bad named entity: OverBar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&OverBar"
+                ]
+            ]
+        },
+        {
+            "input": "&OverBar;",
+            "description": "Named entity: OverBar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u203e"
+                ]
+            ]
+        },
+        {
+            "input": "&OverBrace",
+            "description": "Bad named entity: OverBrace without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&OverBrace"
+                ]
+            ]
+        },
+        {
+            "input": "&OverBrace;",
+            "description": "Named entity: OverBrace; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u23de"
+                ]
+            ]
+        },
+        {
+            "input": "&OverBracket",
+            "description": "Bad named entity: OverBracket without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&OverBracket"
+                ]
+            ]
+        },
+        {
+            "input": "&OverBracket;",
+            "description": "Named entity: OverBracket; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u23b4"
+                ]
+            ]
+        },
+        {
+            "input": "&OverParenthesis",
+            "description": "Bad named entity: OverParenthesis without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&OverParenthesis"
+                ]
+            ]
+        },
+        {
+            "input": "&OverParenthesis;",
+            "description": "Named entity: OverParenthesis; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u23dc"
+                ]
+            ]
+        },
+        {
+            "input": "&PartialD",
+            "description": "Bad named entity: PartialD without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&PartialD"
+                ]
+            ]
+        },
+        {
+            "input": "&PartialD;",
+            "description": "Named entity: PartialD; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2202"
+                ]
+            ]
+        },
+        {
+            "input": "&Pcy",
+            "description": "Bad named entity: Pcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Pcy"
+                ]
+            ]
+        },
+        {
+            "input": "&Pcy;",
+            "description": "Named entity: Pcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u041f"
+                ]
+            ]
+        },
+        {
+            "input": "&Pfr",
+            "description": "Bad named entity: Pfr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Pfr"
+                ]
+            ]
+        },
+        {
+            "input": "&Pfr;",
+            "description": "Named entity: Pfr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd13"
+                ]
+            ]
+        },
+        {
+            "input": "&Phi",
+            "description": "Bad named entity: Phi without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Phi"
+                ]
+            ]
+        },
+        {
+            "input": "&Phi;",
+            "description": "Named entity: Phi; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03a6"
+                ]
+            ]
+        },
+        {
+            "input": "&Pi",
+            "description": "Bad named entity: Pi without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Pi"
+                ]
+            ]
+        },
+        {
+            "input": "&Pi;",
+            "description": "Named entity: Pi; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03a0"
+                ]
+            ]
+        },
+        {
+            "input": "&PlusMinus",
+            "description": "Bad named entity: PlusMinus without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&PlusMinus"
+                ]
+            ]
+        },
+        {
+            "input": "&PlusMinus;",
+            "description": "Named entity: PlusMinus; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00b1"
+                ]
+            ]
+        },
+        {
+            "input": "&Poincareplane",
+            "description": "Bad named entity: Poincareplane without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Poincareplane"
+                ]
+            ]
+        },
+        {
+            "input": "&Poincareplane;",
+            "description": "Named entity: Poincareplane; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u210c"
+                ]
+            ]
+        },
+        {
+            "input": "&Popf",
+            "description": "Bad named entity: Popf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Popf"
+                ]
+            ]
+        },
+        {
+            "input": "&Popf;",
+            "description": "Named entity: Popf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2119"
+                ]
+            ]
+        },
+        {
+            "input": "&Pr",
+            "description": "Bad named entity: Pr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Pr"
+                ]
+            ]
+        },
+        {
+            "input": "&Pr;",
+            "description": "Named entity: Pr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2abb"
+                ]
+            ]
+        },
+        {
+            "input": "&Precedes",
+            "description": "Bad named entity: Precedes without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Precedes"
+                ]
+            ]
+        },
+        {
+            "input": "&Precedes;",
+            "description": "Named entity: Precedes; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u227a"
+                ]
+            ]
+        },
+        {
+            "input": "&PrecedesEqual",
+            "description": "Bad named entity: PrecedesEqual without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&PrecedesEqual"
+                ]
+            ]
+        },
+        {
+            "input": "&PrecedesEqual;",
+            "description": "Named entity: PrecedesEqual; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2aaf"
+                ]
+            ]
+        },
+        {
+            "input": "&PrecedesSlantEqual",
+            "description": "Bad named entity: PrecedesSlantEqual without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&PrecedesSlantEqual"
+                ]
+            ]
+        },
+        {
+            "input": "&PrecedesSlantEqual;",
+            "description": "Named entity: PrecedesSlantEqual; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u227c"
+                ]
+            ]
+        },
+        {
+            "input": "&PrecedesTilde",
+            "description": "Bad named entity: PrecedesTilde without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&PrecedesTilde"
+                ]
+            ]
+        },
+        {
+            "input": "&PrecedesTilde;",
+            "description": "Named entity: PrecedesTilde; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u227e"
+                ]
+            ]
+        },
+        {
+            "input": "&Prime",
+            "description": "Bad named entity: Prime without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Prime"
+                ]
+            ]
+        },
+        {
+            "input": "&Prime;",
+            "description": "Named entity: Prime; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2033"
+                ]
+            ]
+        },
+        {
+            "input": "&Product",
+            "description": "Bad named entity: Product without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Product"
+                ]
+            ]
+        },
+        {
+            "input": "&Product;",
+            "description": "Named entity: Product; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u220f"
+                ]
+            ]
+        },
+        {
+            "input": "&Proportion",
+            "description": "Bad named entity: Proportion without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Proportion"
+                ]
+            ]
+        },
+        {
+            "input": "&Proportion;",
+            "description": "Named entity: Proportion; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2237"
+                ]
+            ]
+        },
+        {
+            "input": "&Proportional",
+            "description": "Bad named entity: Proportional without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Proportional"
+                ]
+            ]
+        },
+        {
+            "input": "&Proportional;",
+            "description": "Named entity: Proportional; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u221d"
+                ]
+            ]
+        },
+        {
+            "input": "&Pscr",
+            "description": "Bad named entity: Pscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Pscr"
+                ]
+            ]
+        },
+        {
+            "input": "&Pscr;",
+            "description": "Named entity: Pscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udcab"
+                ]
+            ]
+        },
+        {
+            "input": "&Psi",
+            "description": "Bad named entity: Psi without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Psi"
+                ]
+            ]
+        },
+        {
+            "input": "&Psi;",
+            "description": "Named entity: Psi; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03a8"
+                ]
+            ]
+        },
+        {
+            "input": "&QUOT",
+            "description": "Named entity: QUOT without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\""
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 6 }
+            ]
+        },
+        {
+            "input": "&QUOT;",
+            "description": "Named entity: QUOT; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\""
+                ]
+            ]
+        },
+        {
+            "input": "&Qfr",
+            "description": "Bad named entity: Qfr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Qfr"
+                ]
+            ]
+        },
+        {
+            "input": "&Qfr;",
+            "description": "Named entity: Qfr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd14"
+                ]
+            ]
+        },
+        {
+            "input": "&Qopf",
+            "description": "Bad named entity: Qopf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Qopf"
+                ]
+            ]
+        },
+        {
+            "input": "&Qopf;",
+            "description": "Named entity: Qopf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u211a"
+                ]
+            ]
+        },
+        {
+            "input": "&Qscr",
+            "description": "Bad named entity: Qscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Qscr"
+                ]
+            ]
+        },
+        {
+            "input": "&Qscr;",
+            "description": "Named entity: Qscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udcac"
+                ]
+            ]
+        },
+        {
+            "input": "&RBarr",
+            "description": "Bad named entity: RBarr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&RBarr"
+                ]
+            ]
+        },
+        {
+            "input": "&RBarr;",
+            "description": "Named entity: RBarr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2910"
+                ]
+            ]
+        },
+        {
+            "input": "&REG",
+            "description": "Named entity: REG without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00ae"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 5 }
+            ]
+        },
+        {
+            "input": "&REG;",
+            "description": "Named entity: REG; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00ae"
+                ]
+            ]
+        },
+        {
+            "input": "&Racute",
+            "description": "Bad named entity: Racute without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Racute"
+                ]
+            ]
+        },
+        {
+            "input": "&Racute;",
+            "description": "Named entity: Racute; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0154"
+                ]
+            ]
+        },
+        {
+            "input": "&Rang",
+            "description": "Bad named entity: Rang without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Rang"
+                ]
+            ]
+        },
+        {
+            "input": "&Rang;",
+            "description": "Named entity: Rang; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u27eb"
+                ]
+            ]
+        },
+        {
+            "input": "&Rarr",
+            "description": "Bad named entity: Rarr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Rarr"
+                ]
+            ]
+        },
+        {
+            "input": "&Rarr;",
+            "description": "Named entity: Rarr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21a0"
+                ]
+            ]
+        },
+        {
+            "input": "&Rarrtl",
+            "description": "Bad named entity: Rarrtl without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Rarrtl"
+                ]
+            ]
+        },
+        {
+            "input": "&Rarrtl;",
+            "description": "Named entity: Rarrtl; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2916"
+                ]
+            ]
+        },
+        {
+            "input": "&Rcaron",
+            "description": "Bad named entity: Rcaron without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Rcaron"
+                ]
+            ]
+        },
+        {
+            "input": "&Rcaron;",
+            "description": "Named entity: Rcaron; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0158"
+                ]
+            ]
+        },
+        {
+            "input": "&Rcedil",
+            "description": "Bad named entity: Rcedil without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Rcedil"
+                ]
+            ]
+        },
+        {
+            "input": "&Rcedil;",
+            "description": "Named entity: Rcedil; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0156"
+                ]
+            ]
+        },
+        {
+            "input": "&Rcy",
+            "description": "Bad named entity: Rcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Rcy"
+                ]
+            ]
+        },
+        {
+            "input": "&Rcy;",
+            "description": "Named entity: Rcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0420"
+                ]
+            ]
+        },
+        {
+            "input": "&Re",
+            "description": "Bad named entity: Re without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Re"
+                ]
+            ]
+        },
+        {
+            "input": "&Re;",
+            "description": "Named entity: Re; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u211c"
+                ]
+            ]
+        },
+        {
+            "input": "&ReverseElement",
+            "description": "Bad named entity: ReverseElement without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ReverseElement"
+                ]
+            ]
+        },
+        {
+            "input": "&ReverseElement;",
+            "description": "Named entity: ReverseElement; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u220b"
+                ]
+            ]
+        },
+        {
+            "input": "&ReverseEquilibrium",
+            "description": "Bad named entity: ReverseEquilibrium without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ReverseEquilibrium"
+                ]
+            ]
+        },
+        {
+            "input": "&ReverseEquilibrium;",
+            "description": "Named entity: ReverseEquilibrium; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21cb"
+                ]
+            ]
+        },
+        {
+            "input": "&ReverseUpEquilibrium",
+            "description": "Bad named entity: ReverseUpEquilibrium without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ReverseUpEquilibrium"
+                ]
+            ]
+        },
+        {
+            "input": "&ReverseUpEquilibrium;",
+            "description": "Named entity: ReverseUpEquilibrium; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u296f"
+                ]
+            ]
+        },
+        {
+            "input": "&Rfr",
+            "description": "Bad named entity: Rfr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Rfr"
+                ]
+            ]
+        },
+        {
+            "input": "&Rfr;",
+            "description": "Named entity: Rfr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u211c"
+                ]
+            ]
+        },
+        {
+            "input": "&Rho",
+            "description": "Bad named entity: Rho without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Rho"
+                ]
+            ]
+        },
+        {
+            "input": "&Rho;",
+            "description": "Named entity: Rho; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03a1"
+                ]
+            ]
+        },
+        {
+            "input": "&RightAngleBracket",
+            "description": "Bad named entity: RightAngleBracket without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&RightAngleBracket"
+                ]
+            ]
+        },
+        {
+            "input": "&RightAngleBracket;",
+            "description": "Named entity: RightAngleBracket; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u27e9"
+                ]
+            ]
+        },
+        {
+            "input": "&RightArrow",
+            "description": "Bad named entity: RightArrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&RightArrow"
+                ]
+            ]
+        },
+        {
+            "input": "&RightArrow;",
+            "description": "Named entity: RightArrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2192"
+                ]
+            ]
+        },
+        {
+            "input": "&RightArrowBar",
+            "description": "Bad named entity: RightArrowBar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&RightArrowBar"
+                ]
+            ]
+        },
+        {
+            "input": "&RightArrowBar;",
+            "description": "Named entity: RightArrowBar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21e5"
+                ]
+            ]
+        },
+        {
+            "input": "&RightArrowLeftArrow",
+            "description": "Bad named entity: RightArrowLeftArrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&RightArrowLeftArrow"
+                ]
+            ]
+        },
+        {
+            "input": "&RightArrowLeftArrow;",
+            "description": "Named entity: RightArrowLeftArrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21c4"
+                ]
+            ]
+        },
+        {
+            "input": "&RightCeiling",
+            "description": "Bad named entity: RightCeiling without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&RightCeiling"
+                ]
+            ]
+        },
+        {
+            "input": "&RightCeiling;",
+            "description": "Named entity: RightCeiling; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2309"
+                ]
+            ]
+        },
+        {
+            "input": "&RightDoubleBracket",
+            "description": "Bad named entity: RightDoubleBracket without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&RightDoubleBracket"
+                ]
+            ]
+        },
+        {
+            "input": "&RightDoubleBracket;",
+            "description": "Named entity: RightDoubleBracket; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u27e7"
+                ]
+            ]
+        },
+        {
+            "input": "&RightDownTeeVector",
+            "description": "Bad named entity: RightDownTeeVector without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&RightDownTeeVector"
+                ]
+            ]
+        },
+        {
+            "input": "&RightDownTeeVector;",
+            "description": "Named entity: RightDownTeeVector; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u295d"
+                ]
+            ]
+        },
+        {
+            "input": "&RightDownVector",
+            "description": "Bad named entity: RightDownVector without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&RightDownVector"
+                ]
+            ]
+        },
+        {
+            "input": "&RightDownVector;",
+            "description": "Named entity: RightDownVector; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21c2"
+                ]
+            ]
+        },
+        {
+            "input": "&RightDownVectorBar",
+            "description": "Bad named entity: RightDownVectorBar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&RightDownVectorBar"
+                ]
+            ]
+        },
+        {
+            "input": "&RightDownVectorBar;",
+            "description": "Named entity: RightDownVectorBar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2955"
+                ]
+            ]
+        },
+        {
+            "input": "&RightFloor",
+            "description": "Bad named entity: RightFloor without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&RightFloor"
+                ]
+            ]
+        },
+        {
+            "input": "&RightFloor;",
+            "description": "Named entity: RightFloor; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u230b"
+                ]
+            ]
+        },
+        {
+            "input": "&RightTee",
+            "description": "Bad named entity: RightTee without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&RightTee"
+                ]
+            ]
+        },
+        {
+            "input": "&RightTee;",
+            "description": "Named entity: RightTee; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22a2"
+                ]
+            ]
+        },
+        {
+            "input": "&RightTeeArrow",
+            "description": "Bad named entity: RightTeeArrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&RightTeeArrow"
+                ]
+            ]
+        },
+        {
+            "input": "&RightTeeArrow;",
+            "description": "Named entity: RightTeeArrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21a6"
+                ]
+            ]
+        },
+        {
+            "input": "&RightTeeVector",
+            "description": "Bad named entity: RightTeeVector without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&RightTeeVector"
+                ]
+            ]
+        },
+        {
+            "input": "&RightTeeVector;",
+            "description": "Named entity: RightTeeVector; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u295b"
+                ]
+            ]
+        },
+        {
+            "input": "&RightTriangle",
+            "description": "Bad named entity: RightTriangle without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&RightTriangle"
+                ]
+            ]
+        },
+        {
+            "input": "&RightTriangle;",
+            "description": "Named entity: RightTriangle; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22b3"
+                ]
+            ]
+        },
+        {
+            "input": "&RightTriangleBar",
+            "description": "Bad named entity: RightTriangleBar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&RightTriangleBar"
+                ]
+            ]
+        },
+        {
+            "input": "&RightTriangleBar;",
+            "description": "Named entity: RightTriangleBar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29d0"
+                ]
+            ]
+        },
+        {
+            "input": "&RightTriangleEqual",
+            "description": "Bad named entity: RightTriangleEqual without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&RightTriangleEqual"
+                ]
+            ]
+        },
+        {
+            "input": "&RightTriangleEqual;",
+            "description": "Named entity: RightTriangleEqual; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22b5"
+                ]
+            ]
+        },
+        {
+            "input": "&RightUpDownVector",
+            "description": "Bad named entity: RightUpDownVector without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&RightUpDownVector"
+                ]
+            ]
+        },
+        {
+            "input": "&RightUpDownVector;",
+            "description": "Named entity: RightUpDownVector; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u294f"
+                ]
+            ]
+        },
+        {
+            "input": "&RightUpTeeVector",
+            "description": "Bad named entity: RightUpTeeVector without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&RightUpTeeVector"
+                ]
+            ]
+        },
+        {
+            "input": "&RightUpTeeVector;",
+            "description": "Named entity: RightUpTeeVector; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u295c"
+                ]
+            ]
+        },
+        {
+            "input": "&RightUpVector",
+            "description": "Bad named entity: RightUpVector without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&RightUpVector"
+                ]
+            ]
+        },
+        {
+            "input": "&RightUpVector;",
+            "description": "Named entity: RightUpVector; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21be"
+                ]
+            ]
+        },
+        {
+            "input": "&RightUpVectorBar",
+            "description": "Bad named entity: RightUpVectorBar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&RightUpVectorBar"
+                ]
+            ]
+        },
+        {
+            "input": "&RightUpVectorBar;",
+            "description": "Named entity: RightUpVectorBar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2954"
+                ]
+            ]
+        },
+        {
+            "input": "&RightVector",
+            "description": "Bad named entity: RightVector without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&RightVector"
+                ]
+            ]
+        },
+        {
+            "input": "&RightVector;",
+            "description": "Named entity: RightVector; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21c0"
+                ]
+            ]
+        },
+        {
+            "input": "&RightVectorBar",
+            "description": "Bad named entity: RightVectorBar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&RightVectorBar"
+                ]
+            ]
+        },
+        {
+            "input": "&RightVectorBar;",
+            "description": "Named entity: RightVectorBar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2953"
+                ]
+            ]
+        },
+        {
+            "input": "&Rightarrow",
+            "description": "Bad named entity: Rightarrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Rightarrow"
+                ]
+            ]
+        },
+        {
+            "input": "&Rightarrow;",
+            "description": "Named entity: Rightarrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21d2"
+                ]
+            ]
+        },
+        {
+            "input": "&Ropf",
+            "description": "Bad named entity: Ropf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Ropf"
+                ]
+            ]
+        },
+        {
+            "input": "&Ropf;",
+            "description": "Named entity: Ropf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u211d"
+                ]
+            ]
+        },
+        {
+            "input": "&RoundImplies",
+            "description": "Bad named entity: RoundImplies without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&RoundImplies"
+                ]
+            ]
+        },
+        {
+            "input": "&RoundImplies;",
+            "description": "Named entity: RoundImplies; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2970"
+                ]
+            ]
+        },
+        {
+            "input": "&Rrightarrow",
+            "description": "Bad named entity: Rrightarrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Rrightarrow"
+                ]
+            ]
+        },
+        {
+            "input": "&Rrightarrow;",
+            "description": "Named entity: Rrightarrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21db"
+                ]
+            ]
+        },
+        {
+            "input": "&Rscr",
+            "description": "Bad named entity: Rscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Rscr"
+                ]
+            ]
+        },
+        {
+            "input": "&Rscr;",
+            "description": "Named entity: Rscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u211b"
+                ]
+            ]
+        },
+        {
+            "input": "&Rsh",
+            "description": "Bad named entity: Rsh without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Rsh"
+                ]
+            ]
+        },
+        {
+            "input": "&Rsh;",
+            "description": "Named entity: Rsh; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21b1"
+                ]
+            ]
+        },
+        {
+            "input": "&RuleDelayed",
+            "description": "Bad named entity: RuleDelayed without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&RuleDelayed"
+                ]
+            ]
+        },
+        {
+            "input": "&RuleDelayed;",
+            "description": "Named entity: RuleDelayed; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29f4"
+                ]
+            ]
+        },
+        {
+            "input": "&SHCHcy",
+            "description": "Bad named entity: SHCHcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&SHCHcy"
+                ]
+            ]
+        },
+        {
+            "input": "&SHCHcy;",
+            "description": "Named entity: SHCHcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0429"
+                ]
+            ]
+        },
+        {
+            "input": "&SHcy",
+            "description": "Bad named entity: SHcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&SHcy"
+                ]
+            ]
+        },
+        {
+            "input": "&SHcy;",
+            "description": "Named entity: SHcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0428"
+                ]
+            ]
+        },
+        {
+            "input": "&SOFTcy",
+            "description": "Bad named entity: SOFTcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&SOFTcy"
+                ]
+            ]
+        },
+        {
+            "input": "&SOFTcy;",
+            "description": "Named entity: SOFTcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u042c"
+                ]
+            ]
+        },
+        {
+            "input": "&Sacute",
+            "description": "Bad named entity: Sacute without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Sacute"
+                ]
+            ]
+        },
+        {
+            "input": "&Sacute;",
+            "description": "Named entity: Sacute; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u015a"
+                ]
+            ]
+        },
+        {
+            "input": "&Sc",
+            "description": "Bad named entity: Sc without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Sc"
+                ]
+            ]
+        },
+        {
+            "input": "&Sc;",
+            "description": "Named entity: Sc; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2abc"
+                ]
+            ]
+        },
+        {
+            "input": "&Scaron",
+            "description": "Bad named entity: Scaron without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Scaron"
+                ]
+            ]
+        },
+        {
+            "input": "&Scaron;",
+            "description": "Named entity: Scaron; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0160"
+                ]
+            ]
+        },
+        {
+            "input": "&Scedil",
+            "description": "Bad named entity: Scedil without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Scedil"
+                ]
+            ]
+        },
+        {
+            "input": "&Scedil;",
+            "description": "Named entity: Scedil; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u015e"
+                ]
+            ]
+        },
+        {
+            "input": "&Scirc",
+            "description": "Bad named entity: Scirc without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Scirc"
+                ]
+            ]
+        },
+        {
+            "input": "&Scirc;",
+            "description": "Named entity: Scirc; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u015c"
+                ]
+            ]
+        },
+        {
+            "input": "&Scy",
+            "description": "Bad named entity: Scy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Scy"
+                ]
+            ]
+        },
+        {
+            "input": "&Scy;",
+            "description": "Named entity: Scy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0421"
+                ]
+            ]
+        },
+        {
+            "input": "&Sfr",
+            "description": "Bad named entity: Sfr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Sfr"
+                ]
+            ]
+        },
+        {
+            "input": "&Sfr;",
+            "description": "Named entity: Sfr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd16"
+                ]
+            ]
+        },
+        {
+            "input": "&ShortDownArrow",
+            "description": "Bad named entity: ShortDownArrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ShortDownArrow"
+                ]
+            ]
+        },
+        {
+            "input": "&ShortDownArrow;",
+            "description": "Named entity: ShortDownArrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2193"
+                ]
+            ]
+        },
+        {
+            "input": "&ShortLeftArrow",
+            "description": "Bad named entity: ShortLeftArrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ShortLeftArrow"
+                ]
+            ]
+        },
+        {
+            "input": "&ShortLeftArrow;",
+            "description": "Named entity: ShortLeftArrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2190"
+                ]
+            ]
+        },
+        {
+            "input": "&ShortRightArrow",
+            "description": "Bad named entity: ShortRightArrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ShortRightArrow"
+                ]
+            ]
+        },
+        {
+            "input": "&ShortRightArrow;",
+            "description": "Named entity: ShortRightArrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2192"
+                ]
+            ]
+        },
+        {
+            "input": "&ShortUpArrow",
+            "description": "Bad named entity: ShortUpArrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ShortUpArrow"
+                ]
+            ]
+        },
+        {
+            "input": "&ShortUpArrow;",
+            "description": "Named entity: ShortUpArrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2191"
+                ]
+            ]
+        },
+        {
+            "input": "&Sigma",
+            "description": "Bad named entity: Sigma without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Sigma"
+                ]
+            ]
+        },
+        {
+            "input": "&Sigma;",
+            "description": "Named entity: Sigma; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03a3"
+                ]
+            ]
+        },
+        {
+            "input": "&SmallCircle",
+            "description": "Bad named entity: SmallCircle without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&SmallCircle"
+                ]
+            ]
+        },
+        {
+            "input": "&SmallCircle;",
+            "description": "Named entity: SmallCircle; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2218"
+                ]
+            ]
+        },
+        {
+            "input": "&Sopf",
+            "description": "Bad named entity: Sopf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Sopf"
+                ]
+            ]
+        },
+        {
+            "input": "&Sopf;",
+            "description": "Named entity: Sopf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd4a"
+                ]
+            ]
+        },
+        {
+            "input": "&Sqrt",
+            "description": "Bad named entity: Sqrt without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Sqrt"
+                ]
+            ]
+        },
+        {
+            "input": "&Sqrt;",
+            "description": "Named entity: Sqrt; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u221a"
+                ]
+            ]
+        },
+        {
+            "input": "&Square",
+            "description": "Bad named entity: Square without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Square"
+                ]
+            ]
+        },
+        {
+            "input": "&Square;",
+            "description": "Named entity: Square; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u25a1"
+                ]
+            ]
+        },
+        {
+            "input": "&SquareIntersection",
+            "description": "Bad named entity: SquareIntersection without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&SquareIntersection"
+                ]
+            ]
+        },
+        {
+            "input": "&SquareIntersection;",
+            "description": "Named entity: SquareIntersection; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2293"
+                ]
+            ]
+        },
+        {
+            "input": "&SquareSubset",
+            "description": "Bad named entity: SquareSubset without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&SquareSubset"
+                ]
+            ]
+        },
+        {
+            "input": "&SquareSubset;",
+            "description": "Named entity: SquareSubset; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u228f"
+                ]
+            ]
+        },
+        {
+            "input": "&SquareSubsetEqual",
+            "description": "Bad named entity: SquareSubsetEqual without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&SquareSubsetEqual"
+                ]
+            ]
+        },
+        {
+            "input": "&SquareSubsetEqual;",
+            "description": "Named entity: SquareSubsetEqual; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2291"
+                ]
+            ]
+        },
+        {
+            "input": "&SquareSuperset",
+            "description": "Bad named entity: SquareSuperset without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&SquareSuperset"
+                ]
+            ]
+        },
+        {
+            "input": "&SquareSuperset;",
+            "description": "Named entity: SquareSuperset; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2290"
+                ]
+            ]
+        },
+        {
+            "input": "&SquareSupersetEqual",
+            "description": "Bad named entity: SquareSupersetEqual without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&SquareSupersetEqual"
+                ]
+            ]
+        },
+        {
+            "input": "&SquareSupersetEqual;",
+            "description": "Named entity: SquareSupersetEqual; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2292"
+                ]
+            ]
+        },
+        {
+            "input": "&SquareUnion",
+            "description": "Bad named entity: SquareUnion without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&SquareUnion"
+                ]
+            ]
+        },
+        {
+            "input": "&SquareUnion;",
+            "description": "Named entity: SquareUnion; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2294"
+                ]
+            ]
+        },
+        {
+            "input": "&Sscr",
+            "description": "Bad named entity: Sscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Sscr"
+                ]
+            ]
+        },
+        {
+            "input": "&Sscr;",
+            "description": "Named entity: Sscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udcae"
+                ]
+            ]
+        },
+        {
+            "input": "&Star",
+            "description": "Bad named entity: Star without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Star"
+                ]
+            ]
+        },
+        {
+            "input": "&Star;",
+            "description": "Named entity: Star; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22c6"
+                ]
+            ]
+        },
+        {
+            "input": "&Sub",
+            "description": "Bad named entity: Sub without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Sub"
+                ]
+            ]
+        },
+        {
+            "input": "&Sub;",
+            "description": "Named entity: Sub; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22d0"
+                ]
+            ]
+        },
+        {
+            "input": "&Subset",
+            "description": "Bad named entity: Subset without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Subset"
+                ]
+            ]
+        },
+        {
+            "input": "&Subset;",
+            "description": "Named entity: Subset; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22d0"
+                ]
+            ]
+        },
+        {
+            "input": "&SubsetEqual",
+            "description": "Bad named entity: SubsetEqual without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&SubsetEqual"
+                ]
+            ]
+        },
+        {
+            "input": "&SubsetEqual;",
+            "description": "Named entity: SubsetEqual; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2286"
+                ]
+            ]
+        },
+        {
+            "input": "&Succeeds",
+            "description": "Bad named entity: Succeeds without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Succeeds"
+                ]
+            ]
+        },
+        {
+            "input": "&Succeeds;",
+            "description": "Named entity: Succeeds; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u227b"
+                ]
+            ]
+        },
+        {
+            "input": "&SucceedsEqual",
+            "description": "Bad named entity: SucceedsEqual without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&SucceedsEqual"
+                ]
+            ]
+        },
+        {
+            "input": "&SucceedsEqual;",
+            "description": "Named entity: SucceedsEqual; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ab0"
+                ]
+            ]
+        },
+        {
+            "input": "&SucceedsSlantEqual",
+            "description": "Bad named entity: SucceedsSlantEqual without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&SucceedsSlantEqual"
+                ]
+            ]
+        },
+        {
+            "input": "&SucceedsSlantEqual;",
+            "description": "Named entity: SucceedsSlantEqual; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u227d"
+                ]
+            ]
+        },
+        {
+            "input": "&SucceedsTilde",
+            "description": "Bad named entity: SucceedsTilde without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&SucceedsTilde"
+                ]
+            ]
+        },
+        {
+            "input": "&SucceedsTilde;",
+            "description": "Named entity: SucceedsTilde; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u227f"
+                ]
+            ]
+        },
+        {
+            "input": "&SuchThat",
+            "description": "Bad named entity: SuchThat without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&SuchThat"
+                ]
+            ]
+        },
+        {
+            "input": "&SuchThat;",
+            "description": "Named entity: SuchThat; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u220b"
+                ]
+            ]
+        },
+        {
+            "input": "&Sum",
+            "description": "Bad named entity: Sum without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Sum"
+                ]
+            ]
+        },
+        {
+            "input": "&Sum;",
+            "description": "Named entity: Sum; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2211"
+                ]
+            ]
+        },
+        {
+            "input": "&Sup",
+            "description": "Bad named entity: Sup without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Sup"
+                ]
+            ]
+        },
+        {
+            "input": "&Sup;",
+            "description": "Named entity: Sup; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22d1"
+                ]
+            ]
+        },
+        {
+            "input": "&Superset",
+            "description": "Bad named entity: Superset without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Superset"
+                ]
+            ]
+        },
+        {
+            "input": "&Superset;",
+            "description": "Named entity: Superset; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2283"
+                ]
+            ]
+        },
+        {
+            "input": "&SupersetEqual",
+            "description": "Bad named entity: SupersetEqual without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&SupersetEqual"
+                ]
+            ]
+        },
+        {
+            "input": "&SupersetEqual;",
+            "description": "Named entity: SupersetEqual; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2287"
+                ]
+            ]
+        },
+        {
+            "input": "&Supset",
+            "description": "Bad named entity: Supset without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Supset"
+                ]
+            ]
+        },
+        {
+            "input": "&Supset;",
+            "description": "Named entity: Supset; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22d1"
+                ]
+            ]
+        },
+        {
+            "input": "&THORN",
+            "description": "Named entity: THORN without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00de"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 7 }
+            ]
+        },
+        {
+            "input": "&THORN;",
+            "description": "Named entity: THORN; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00de"
+                ]
+            ]
+        },
+        {
+            "input": "&TRADE",
+            "description": "Bad named entity: TRADE without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&TRADE"
+                ]
+            ]
+        },
+        {
+            "input": "&TRADE;",
+            "description": "Named entity: TRADE; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2122"
+                ]
+            ]
+        },
+        {
+            "input": "&TSHcy",
+            "description": "Bad named entity: TSHcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&TSHcy"
+                ]
+            ]
+        },
+        {
+            "input": "&TSHcy;",
+            "description": "Named entity: TSHcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u040b"
+                ]
+            ]
+        },
+        {
+            "input": "&TScy",
+            "description": "Bad named entity: TScy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&TScy"
+                ]
+            ]
+        },
+        {
+            "input": "&TScy;",
+            "description": "Named entity: TScy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0426"
+                ]
+            ]
+        },
+        {
+            "input": "&Tab",
+            "description": "Bad named entity: Tab without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Tab"
+                ]
+            ]
+        },
+        {
+            "input": "&Tab;",
+            "description": "Named entity: Tab; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\t"
+                ]
+            ]
+        },
+        {
+            "input": "&Tau",
+            "description": "Bad named entity: Tau without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Tau"
+                ]
+            ]
+        },
+        {
+            "input": "&Tau;",
+            "description": "Named entity: Tau; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03a4"
+                ]
+            ]
+        },
+        {
+            "input": "&Tcaron",
+            "description": "Bad named entity: Tcaron without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Tcaron"
+                ]
+            ]
+        },
+        {
+            "input": "&Tcaron;",
+            "description": "Named entity: Tcaron; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0164"
+                ]
+            ]
+        },
+        {
+            "input": "&Tcedil",
+            "description": "Bad named entity: Tcedil without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Tcedil"
+                ]
+            ]
+        },
+        {
+            "input": "&Tcedil;",
+            "description": "Named entity: Tcedil; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0162"
+                ]
+            ]
+        },
+        {
+            "input": "&Tcy",
+            "description": "Bad named entity: Tcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Tcy"
+                ]
+            ]
+        },
+        {
+            "input": "&Tcy;",
+            "description": "Named entity: Tcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0422"
+                ]
+            ]
+        },
+        {
+            "input": "&Tfr",
+            "description": "Bad named entity: Tfr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Tfr"
+                ]
+            ]
+        },
+        {
+            "input": "&Tfr;",
+            "description": "Named entity: Tfr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd17"
+                ]
+            ]
+        },
+        {
+            "input": "&Therefore",
+            "description": "Bad named entity: Therefore without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Therefore"
+                ]
+            ]
+        },
+        {
+            "input": "&Therefore;",
+            "description": "Named entity: Therefore; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2234"
+                ]
+            ]
+        },
+        {
+            "input": "&Theta",
+            "description": "Bad named entity: Theta without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Theta"
+                ]
+            ]
+        },
+        {
+            "input": "&Theta;",
+            "description": "Named entity: Theta; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0398"
+                ]
+            ]
+        },
+        {
+            "input": "&ThickSpace",
+            "description": "Bad named entity: ThickSpace without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ThickSpace"
+                ]
+            ]
+        },
+        {
+            "input": "&ThickSpace;",
+            "description": "Named entity: ThickSpace; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u205f\u200a"
+                ]
+            ]
+        },
+        {
+            "input": "&ThinSpace",
+            "description": "Bad named entity: ThinSpace without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ThinSpace"
+                ]
+            ]
+        },
+        {
+            "input": "&ThinSpace;",
+            "description": "Named entity: ThinSpace; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2009"
+                ]
+            ]
+        },
+        {
+            "input": "&Tilde",
+            "description": "Bad named entity: Tilde without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Tilde"
+                ]
+            ]
+        },
+        {
+            "input": "&Tilde;",
+            "description": "Named entity: Tilde; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u223c"
+                ]
+            ]
+        },
+        {
+            "input": "&TildeEqual",
+            "description": "Bad named entity: TildeEqual without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&TildeEqual"
+                ]
+            ]
+        },
+        {
+            "input": "&TildeEqual;",
+            "description": "Named entity: TildeEqual; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2243"
+                ]
+            ]
+        },
+        {
+            "input": "&TildeFullEqual",
+            "description": "Bad named entity: TildeFullEqual without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&TildeFullEqual"
+                ]
+            ]
+        },
+        {
+            "input": "&TildeFullEqual;",
+            "description": "Named entity: TildeFullEqual; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2245"
+                ]
+            ]
+        },
+        {
+            "input": "&TildeTilde",
+            "description": "Bad named entity: TildeTilde without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&TildeTilde"
+                ]
+            ]
+        },
+        {
+            "input": "&TildeTilde;",
+            "description": "Named entity: TildeTilde; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2248"
+                ]
+            ]
+        },
+        {
+            "input": "&Topf",
+            "description": "Bad named entity: Topf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Topf"
+                ]
+            ]
+        },
+        {
+            "input": "&Topf;",
+            "description": "Named entity: Topf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd4b"
+                ]
+            ]
+        },
+        {
+            "input": "&TripleDot",
+            "description": "Bad named entity: TripleDot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&TripleDot"
+                ]
+            ]
+        },
+        {
+            "input": "&TripleDot;",
+            "description": "Named entity: TripleDot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u20db"
+                ]
+            ]
+        },
+        {
+            "input": "&Tscr",
+            "description": "Bad named entity: Tscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Tscr"
+                ]
+            ]
+        },
+        {
+            "input": "&Tscr;",
+            "description": "Named entity: Tscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udcaf"
+                ]
+            ]
+        },
+        {
+            "input": "&Tstrok",
+            "description": "Bad named entity: Tstrok without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Tstrok"
+                ]
+            ]
+        },
+        {
+            "input": "&Tstrok;",
+            "description": "Named entity: Tstrok; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0166"
+                ]
+            ]
+        },
+        {
+            "input": "&Uacute",
+            "description": "Named entity: Uacute without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00da"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 8 }
+            ]
+        },
+        {
+            "input": "&Uacute;",
+            "description": "Named entity: Uacute; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00da"
+                ]
+            ]
+        },
+        {
+            "input": "&Uarr",
+            "description": "Bad named entity: Uarr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Uarr"
+                ]
+            ]
+        },
+        {
+            "input": "&Uarr;",
+            "description": "Named entity: Uarr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u219f"
+                ]
+            ]
+        },
+        {
+            "input": "&Uarrocir",
+            "description": "Bad named entity: Uarrocir without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Uarrocir"
+                ]
+            ]
+        },
+        {
+            "input": "&Uarrocir;",
+            "description": "Named entity: Uarrocir; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2949"
+                ]
+            ]
+        },
+        {
+            "input": "&Ubrcy",
+            "description": "Bad named entity: Ubrcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Ubrcy"
+                ]
+            ]
+        },
+        {
+            "input": "&Ubrcy;",
+            "description": "Named entity: Ubrcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u040e"
+                ]
+            ]
+        },
+        {
+            "input": "&Ubreve",
+            "description": "Bad named entity: Ubreve without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Ubreve"
+                ]
+            ]
+        },
+        {
+            "input": "&Ubreve;",
+            "description": "Named entity: Ubreve; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u016c"
+                ]
+            ]
+        },
+        {
+            "input": "&Ucirc",
+            "description": "Named entity: Ucirc without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00db"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 7 }
+            ]
+        },
+        {
+            "input": "&Ucirc;",
+            "description": "Named entity: Ucirc; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00db"
+                ]
+            ]
+        },
+        {
+            "input": "&Ucy",
+            "description": "Bad named entity: Ucy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Ucy"
+                ]
+            ]
+        },
+        {
+            "input": "&Ucy;",
+            "description": "Named entity: Ucy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0423"
+                ]
+            ]
+        },
+        {
+            "input": "&Udblac",
+            "description": "Bad named entity: Udblac without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Udblac"
+                ]
+            ]
+        },
+        {
+            "input": "&Udblac;",
+            "description": "Named entity: Udblac; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0170"
+                ]
+            ]
+        },
+        {
+            "input": "&Ufr",
+            "description": "Bad named entity: Ufr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Ufr"
+                ]
+            ]
+        },
+        {
+            "input": "&Ufr;",
+            "description": "Named entity: Ufr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd18"
+                ]
+            ]
+        },
+        {
+            "input": "&Ugrave",
+            "description": "Named entity: Ugrave without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00d9"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 8 }
+            ]
+        },
+        {
+            "input": "&Ugrave;",
+            "description": "Named entity: Ugrave; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00d9"
+                ]
+            ]
+        },
+        {
+            "input": "&Umacr",
+            "description": "Bad named entity: Umacr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Umacr"
+                ]
+            ]
+        },
+        {
+            "input": "&Umacr;",
+            "description": "Named entity: Umacr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u016a"
+                ]
+            ]
+        },
+        {
+            "input": "&UnderBar",
+            "description": "Bad named entity: UnderBar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&UnderBar"
+                ]
+            ]
+        },
+        {
+            "input": "&UnderBar;",
+            "description": "Named entity: UnderBar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "_"
+                ]
+            ]
+        },
+        {
+            "input": "&UnderBrace",
+            "description": "Bad named entity: UnderBrace without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&UnderBrace"
+                ]
+            ]
+        },
+        {
+            "input": "&UnderBrace;",
+            "description": "Named entity: UnderBrace; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u23df"
+                ]
+            ]
+        },
+        {
+            "input": "&UnderBracket",
+            "description": "Bad named entity: UnderBracket without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&UnderBracket"
+                ]
+            ]
+        },
+        {
+            "input": "&UnderBracket;",
+            "description": "Named entity: UnderBracket; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u23b5"
+                ]
+            ]
+        },
+        {
+            "input": "&UnderParenthesis",
+            "description": "Bad named entity: UnderParenthesis without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&UnderParenthesis"
+                ]
+            ]
+        },
+        {
+            "input": "&UnderParenthesis;",
+            "description": "Named entity: UnderParenthesis; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u23dd"
+                ]
+            ]
+        },
+        {
+            "input": "&Union",
+            "description": "Bad named entity: Union without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Union"
+                ]
+            ]
+        },
+        {
+            "input": "&Union;",
+            "description": "Named entity: Union; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22c3"
+                ]
+            ]
+        },
+        {
+            "input": "&UnionPlus",
+            "description": "Bad named entity: UnionPlus without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&UnionPlus"
+                ]
+            ]
+        },
+        {
+            "input": "&UnionPlus;",
+            "description": "Named entity: UnionPlus; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u228e"
+                ]
+            ]
+        },
+        {
+            "input": "&Uogon",
+            "description": "Bad named entity: Uogon without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Uogon"
+                ]
+            ]
+        },
+        {
+            "input": "&Uogon;",
+            "description": "Named entity: Uogon; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0172"
+                ]
+            ]
+        },
+        {
+            "input": "&Uopf",
+            "description": "Bad named entity: Uopf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Uopf"
+                ]
+            ]
+        },
+        {
+            "input": "&Uopf;",
+            "description": "Named entity: Uopf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd4c"
+                ]
+            ]
+        },
+        {
+            "input": "&UpArrow",
+            "description": "Bad named entity: UpArrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&UpArrow"
+                ]
+            ]
+        },
+        {
+            "input": "&UpArrow;",
+            "description": "Named entity: UpArrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2191"
+                ]
+            ]
+        },
+        {
+            "input": "&UpArrowBar",
+            "description": "Bad named entity: UpArrowBar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&UpArrowBar"
+                ]
+            ]
+        },
+        {
+            "input": "&UpArrowBar;",
+            "description": "Named entity: UpArrowBar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2912"
+                ]
+            ]
+        },
+        {
+            "input": "&UpArrowDownArrow",
+            "description": "Bad named entity: UpArrowDownArrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&UpArrowDownArrow"
+                ]
+            ]
+        },
+        {
+            "input": "&UpArrowDownArrow;",
+            "description": "Named entity: UpArrowDownArrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21c5"
+                ]
+            ]
+        },
+        {
+            "input": "&UpDownArrow",
+            "description": "Bad named entity: UpDownArrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&UpDownArrow"
+                ]
+            ]
+        },
+        {
+            "input": "&UpDownArrow;",
+            "description": "Named entity: UpDownArrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2195"
+                ]
+            ]
+        },
+        {
+            "input": "&UpEquilibrium",
+            "description": "Bad named entity: UpEquilibrium without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&UpEquilibrium"
+                ]
+            ]
+        },
+        {
+            "input": "&UpEquilibrium;",
+            "description": "Named entity: UpEquilibrium; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u296e"
+                ]
+            ]
+        },
+        {
+            "input": "&UpTee",
+            "description": "Bad named entity: UpTee without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&UpTee"
+                ]
+            ]
+        },
+        {
+            "input": "&UpTee;",
+            "description": "Named entity: UpTee; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22a5"
+                ]
+            ]
+        },
+        {
+            "input": "&UpTeeArrow",
+            "description": "Bad named entity: UpTeeArrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&UpTeeArrow"
+                ]
+            ]
+        },
+        {
+            "input": "&UpTeeArrow;",
+            "description": "Named entity: UpTeeArrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21a5"
+                ]
+            ]
+        },
+        {
+            "input": "&Uparrow",
+            "description": "Bad named entity: Uparrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Uparrow"
+                ]
+            ]
+        },
+        {
+            "input": "&Uparrow;",
+            "description": "Named entity: Uparrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21d1"
+                ]
+            ]
+        },
+        {
+            "input": "&Updownarrow",
+            "description": "Bad named entity: Updownarrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Updownarrow"
+                ]
+            ]
+        },
+        {
+            "input": "&Updownarrow;",
+            "description": "Named entity: Updownarrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21d5"
+                ]
+            ]
+        },
+        {
+            "input": "&UpperLeftArrow",
+            "description": "Bad named entity: UpperLeftArrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&UpperLeftArrow"
+                ]
+            ]
+        },
+        {
+            "input": "&UpperLeftArrow;",
+            "description": "Named entity: UpperLeftArrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2196"
+                ]
+            ]
+        },
+        {
+            "input": "&UpperRightArrow",
+            "description": "Bad named entity: UpperRightArrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&UpperRightArrow"
+                ]
+            ]
+        },
+        {
+            "input": "&UpperRightArrow;",
+            "description": "Named entity: UpperRightArrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2197"
+                ]
+            ]
+        },
+        {
+            "input": "&Upsi",
+            "description": "Bad named entity: Upsi without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Upsi"
+                ]
+            ]
+        },
+        {
+            "input": "&Upsi;",
+            "description": "Named entity: Upsi; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03d2"
+                ]
+            ]
+        },
+        {
+            "input": "&Upsilon",
+            "description": "Bad named entity: Upsilon without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Upsilon"
+                ]
+            ]
+        },
+        {
+            "input": "&Upsilon;",
+            "description": "Named entity: Upsilon; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03a5"
+                ]
+            ]
+        },
+        {
+            "input": "&Uring",
+            "description": "Bad named entity: Uring without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Uring"
+                ]
+            ]
+        },
+        {
+            "input": "&Uring;",
+            "description": "Named entity: Uring; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u016e"
+                ]
+            ]
+        },
+        {
+            "input": "&Uscr",
+            "description": "Bad named entity: Uscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Uscr"
+                ]
+            ]
+        },
+        {
+            "input": "&Uscr;",
+            "description": "Named entity: Uscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udcb0"
+                ]
+            ]
+        },
+        {
+            "input": "&Utilde",
+            "description": "Bad named entity: Utilde without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Utilde"
+                ]
+            ]
+        },
+        {
+            "input": "&Utilde;",
+            "description": "Named entity: Utilde; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0168"
+                ]
+            ]
+        },
+        {
+            "input": "&Uuml",
+            "description": "Named entity: Uuml without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00dc"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 6 }
+            ]
+        },
+        {
+            "input": "&Uuml;",
+            "description": "Named entity: Uuml; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00dc"
+                ]
+            ]
+        },
+        {
+            "input": "&VDash",
+            "description": "Bad named entity: VDash without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&VDash"
+                ]
+            ]
+        },
+        {
+            "input": "&VDash;",
+            "description": "Named entity: VDash; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22ab"
+                ]
+            ]
+        },
+        {
+            "input": "&Vbar",
+            "description": "Bad named entity: Vbar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Vbar"
+                ]
+            ]
+        },
+        {
+            "input": "&Vbar;",
+            "description": "Named entity: Vbar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2aeb"
+                ]
+            ]
+        },
+        {
+            "input": "&Vcy",
+            "description": "Bad named entity: Vcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Vcy"
+                ]
+            ]
+        },
+        {
+            "input": "&Vcy;",
+            "description": "Named entity: Vcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0412"
+                ]
+            ]
+        },
+        {
+            "input": "&Vdash",
+            "description": "Bad named entity: Vdash without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Vdash"
+                ]
+            ]
+        },
+        {
+            "input": "&Vdash;",
+            "description": "Named entity: Vdash; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22a9"
+                ]
+            ]
+        },
+        {
+            "input": "&Vdashl",
+            "description": "Bad named entity: Vdashl without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Vdashl"
+                ]
+            ]
+        },
+        {
+            "input": "&Vdashl;",
+            "description": "Named entity: Vdashl; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ae6"
+                ]
+            ]
+        },
+        {
+            "input": "&Vee",
+            "description": "Bad named entity: Vee without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Vee"
+                ]
+            ]
+        },
+        {
+            "input": "&Vee;",
+            "description": "Named entity: Vee; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22c1"
+                ]
+            ]
+        },
+        {
+            "input": "&Verbar",
+            "description": "Bad named entity: Verbar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Verbar"
+                ]
+            ]
+        },
+        {
+            "input": "&Verbar;",
+            "description": "Named entity: Verbar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2016"
+                ]
+            ]
+        },
+        {
+            "input": "&Vert",
+            "description": "Bad named entity: Vert without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Vert"
+                ]
+            ]
+        },
+        {
+            "input": "&Vert;",
+            "description": "Named entity: Vert; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2016"
+                ]
+            ]
+        },
+        {
+            "input": "&VerticalBar",
+            "description": "Bad named entity: VerticalBar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&VerticalBar"
+                ]
+            ]
+        },
+        {
+            "input": "&VerticalBar;",
+            "description": "Named entity: VerticalBar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2223"
+                ]
+            ]
+        },
+        {
+            "input": "&VerticalLine",
+            "description": "Bad named entity: VerticalLine without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&VerticalLine"
+                ]
+            ]
+        },
+        {
+            "input": "&VerticalLine;",
+            "description": "Named entity: VerticalLine; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "|"
+                ]
+            ]
+        },
+        {
+            "input": "&VerticalSeparator",
+            "description": "Bad named entity: VerticalSeparator without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&VerticalSeparator"
+                ]
+            ]
+        },
+        {
+            "input": "&VerticalSeparator;",
+            "description": "Named entity: VerticalSeparator; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2758"
+                ]
+            ]
+        },
+        {
+            "input": "&VerticalTilde",
+            "description": "Bad named entity: VerticalTilde without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&VerticalTilde"
+                ]
+            ]
+        },
+        {
+            "input": "&VerticalTilde;",
+            "description": "Named entity: VerticalTilde; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2240"
+                ]
+            ]
+        },
+        {
+            "input": "&VeryThinSpace",
+            "description": "Bad named entity: VeryThinSpace without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&VeryThinSpace"
+                ]
+            ]
+        },
+        {
+            "input": "&VeryThinSpace;",
+            "description": "Named entity: VeryThinSpace; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u200a"
+                ]
+            ]
+        },
+        {
+            "input": "&Vfr",
+            "description": "Bad named entity: Vfr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Vfr"
+                ]
+            ]
+        },
+        {
+            "input": "&Vfr;",
+            "description": "Named entity: Vfr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd19"
+                ]
+            ]
+        },
+        {
+            "input": "&Vopf",
+            "description": "Bad named entity: Vopf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Vopf"
+                ]
+            ]
+        },
+        {
+            "input": "&Vopf;",
+            "description": "Named entity: Vopf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd4d"
+                ]
+            ]
+        },
+        {
+            "input": "&Vscr",
+            "description": "Bad named entity: Vscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Vscr"
+                ]
+            ]
+        },
+        {
+            "input": "&Vscr;",
+            "description": "Named entity: Vscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udcb1"
+                ]
+            ]
+        },
+        {
+            "input": "&Vvdash",
+            "description": "Bad named entity: Vvdash without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Vvdash"
+                ]
+            ]
+        },
+        {
+            "input": "&Vvdash;",
+            "description": "Named entity: Vvdash; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22aa"
+                ]
+            ]
+        },
+        {
+            "input": "&Wcirc",
+            "description": "Bad named entity: Wcirc without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Wcirc"
+                ]
+            ]
+        },
+        {
+            "input": "&Wcirc;",
+            "description": "Named entity: Wcirc; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0174"
+                ]
+            ]
+        },
+        {
+            "input": "&Wedge",
+            "description": "Bad named entity: Wedge without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Wedge"
+                ]
+            ]
+        },
+        {
+            "input": "&Wedge;",
+            "description": "Named entity: Wedge; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22c0"
+                ]
+            ]
+        },
+        {
+            "input": "&Wfr",
+            "description": "Bad named entity: Wfr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Wfr"
+                ]
+            ]
+        },
+        {
+            "input": "&Wfr;",
+            "description": "Named entity: Wfr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd1a"
+                ]
+            ]
+        },
+        {
+            "input": "&Wopf",
+            "description": "Bad named entity: Wopf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Wopf"
+                ]
+            ]
+        },
+        {
+            "input": "&Wopf;",
+            "description": "Named entity: Wopf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd4e"
+                ]
+            ]
+        },
+        {
+            "input": "&Wscr",
+            "description": "Bad named entity: Wscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Wscr"
+                ]
+            ]
+        },
+        {
+            "input": "&Wscr;",
+            "description": "Named entity: Wscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udcb2"
+                ]
+            ]
+        },
+        {
+            "input": "&Xfr",
+            "description": "Bad named entity: Xfr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Xfr"
+                ]
+            ]
+        },
+        {
+            "input": "&Xfr;",
+            "description": "Named entity: Xfr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd1b"
+                ]
+            ]
+        },
+        {
+            "input": "&Xi",
+            "description": "Bad named entity: Xi without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Xi"
+                ]
+            ]
+        },
+        {
+            "input": "&Xi;",
+            "description": "Named entity: Xi; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u039e"
+                ]
+            ]
+        },
+        {
+            "input": "&Xopf",
+            "description": "Bad named entity: Xopf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Xopf"
+                ]
+            ]
+        },
+        {
+            "input": "&Xopf;",
+            "description": "Named entity: Xopf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd4f"
+                ]
+            ]
+        },
+        {
+            "input": "&Xscr",
+            "description": "Bad named entity: Xscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Xscr"
+                ]
+            ]
+        },
+        {
+            "input": "&Xscr;",
+            "description": "Named entity: Xscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udcb3"
+                ]
+            ]
+        },
+        {
+            "input": "&YAcy",
+            "description": "Bad named entity: YAcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&YAcy"
+                ]
+            ]
+        },
+        {
+            "input": "&YAcy;",
+            "description": "Named entity: YAcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u042f"
+                ]
+            ]
+        },
+        {
+            "input": "&YIcy",
+            "description": "Bad named entity: YIcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&YIcy"
+                ]
+            ]
+        },
+        {
+            "input": "&YIcy;",
+            "description": "Named entity: YIcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0407"
+                ]
+            ]
+        },
+        {
+            "input": "&YUcy",
+            "description": "Bad named entity: YUcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&YUcy"
+                ]
+            ]
+        },
+        {
+            "input": "&YUcy;",
+            "description": "Named entity: YUcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u042e"
+                ]
+            ]
+        },
+        {
+            "input": "&Yacute",
+            "description": "Named entity: Yacute without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00dd"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 8 }
+            ]
+        },
+        {
+            "input": "&Yacute;",
+            "description": "Named entity: Yacute; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00dd"
+                ]
+            ]
+        },
+        {
+            "input": "&Ycirc",
+            "description": "Bad named entity: Ycirc without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Ycirc"
+                ]
+            ]
+        },
+        {
+            "input": "&Ycirc;",
+            "description": "Named entity: Ycirc; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0176"
+                ]
+            ]
+        },
+        {
+            "input": "&Ycy",
+            "description": "Bad named entity: Ycy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Ycy"
+                ]
+            ]
+        },
+        {
+            "input": "&Ycy;",
+            "description": "Named entity: Ycy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u042b"
+                ]
+            ]
+        },
+        {
+            "input": "&Yfr",
+            "description": "Bad named entity: Yfr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Yfr"
+                ]
+            ]
+        },
+        {
+            "input": "&Yfr;",
+            "description": "Named entity: Yfr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd1c"
+                ]
+            ]
+        },
+        {
+            "input": "&Yopf",
+            "description": "Bad named entity: Yopf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Yopf"
+                ]
+            ]
+        },
+        {
+            "input": "&Yopf;",
+            "description": "Named entity: Yopf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd50"
+                ]
+            ]
+        },
+        {
+            "input": "&Yscr",
+            "description": "Bad named entity: Yscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Yscr"
+                ]
+            ]
+        },
+        {
+            "input": "&Yscr;",
+            "description": "Named entity: Yscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udcb4"
+                ]
+            ]
+        },
+        {
+            "input": "&Yuml",
+            "description": "Bad named entity: Yuml without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Yuml"
+                ]
+            ]
+        },
+        {
+            "input": "&Yuml;",
+            "description": "Named entity: Yuml; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0178"
+                ]
+            ]
+        },
+        {
+            "input": "&ZHcy",
+            "description": "Bad named entity: ZHcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ZHcy"
+                ]
+            ]
+        },
+        {
+            "input": "&ZHcy;",
+            "description": "Named entity: ZHcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0416"
+                ]
+            ]
+        },
+        {
+            "input": "&Zacute",
+            "description": "Bad named entity: Zacute without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Zacute"
+                ]
+            ]
+        },
+        {
+            "input": "&Zacute;",
+            "description": "Named entity: Zacute; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0179"
+                ]
+            ]
+        },
+        {
+            "input": "&Zcaron",
+            "description": "Bad named entity: Zcaron without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Zcaron"
+                ]
+            ]
+        },
+        {
+            "input": "&Zcaron;",
+            "description": "Named entity: Zcaron; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u017d"
+                ]
+            ]
+        },
+        {
+            "input": "&Zcy",
+            "description": "Bad named entity: Zcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Zcy"
+                ]
+            ]
+        },
+        {
+            "input": "&Zcy;",
+            "description": "Named entity: Zcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0417"
+                ]
+            ]
+        },
+        {
+            "input": "&Zdot",
+            "description": "Bad named entity: Zdot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Zdot"
+                ]
+            ]
+        },
+        {
+            "input": "&Zdot;",
+            "description": "Named entity: Zdot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u017b"
+                ]
+            ]
+        },
+        {
+            "input": "&ZeroWidthSpace",
+            "description": "Bad named entity: ZeroWidthSpace without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ZeroWidthSpace"
+                ]
+            ]
+        },
+        {
+            "input": "&ZeroWidthSpace;",
+            "description": "Named entity: ZeroWidthSpace; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u200b"
+                ]
+            ]
+        },
+        {
+            "input": "&Zeta",
+            "description": "Bad named entity: Zeta without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Zeta"
+                ]
+            ]
+        },
+        {
+            "input": "&Zeta;",
+            "description": "Named entity: Zeta; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0396"
+                ]
+            ]
+        },
+        {
+            "input": "&Zfr",
+            "description": "Bad named entity: Zfr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Zfr"
+                ]
+            ]
+        },
+        {
+            "input": "&Zfr;",
+            "description": "Named entity: Zfr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2128"
+                ]
+            ]
+        },
+        {
+            "input": "&Zopf",
+            "description": "Bad named entity: Zopf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Zopf"
+                ]
+            ]
+        },
+        {
+            "input": "&Zopf;",
+            "description": "Named entity: Zopf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2124"
+                ]
+            ]
+        },
+        {
+            "input": "&Zscr",
+            "description": "Bad named entity: Zscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&Zscr"
+                ]
+            ]
+        },
+        {
+            "input": "&Zscr;",
+            "description": "Named entity: Zscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udcb5"
+                ]
+            ]
+        },
+        {
+            "input": "&aacute",
+            "description": "Named entity: aacute without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00e1"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 8 }
+            ]
+        },
+        {
+            "input": "&aacute;",
+            "description": "Named entity: aacute; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00e1"
+                ]
+            ]
+        },
+        {
+            "input": "&abreve",
+            "description": "Bad named entity: abreve without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&abreve"
+                ]
+            ]
+        },
+        {
+            "input": "&abreve;",
+            "description": "Named entity: abreve; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0103"
+                ]
+            ]
+        },
+        {
+            "input": "&ac",
+            "description": "Bad named entity: ac without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ac"
+                ]
+            ]
+        },
+        {
+            "input": "&ac;",
+            "description": "Named entity: ac; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u223e"
+                ]
+            ]
+        },
+        {
+            "input": "&acE",
+            "description": "Bad named entity: acE without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&acE"
+                ]
+            ]
+        },
+        {
+            "input": "&acE;",
+            "description": "Named entity: acE; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u223e\u0333"
+                ]
+            ]
+        },
+        {
+            "input": "&acd",
+            "description": "Bad named entity: acd without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&acd"
+                ]
+            ]
+        },
+        {
+            "input": "&acd;",
+            "description": "Named entity: acd; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u223f"
+                ]
+            ]
+        },
+        {
+            "input": "&acirc",
+            "description": "Named entity: acirc without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00e2"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 7 }
+            ]
+        },
+        {
+            "input": "&acirc;",
+            "description": "Named entity: acirc; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00e2"
+                ]
+            ]
+        },
+        {
+            "input": "&acute",
+            "description": "Named entity: acute without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00b4"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 7 }
+            ]
+        },
+        {
+            "input": "&acute;",
+            "description": "Named entity: acute; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00b4"
+                ]
+            ]
+        },
+        {
+            "input": "&acy",
+            "description": "Bad named entity: acy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&acy"
+                ]
+            ]
+        },
+        {
+            "input": "&acy;",
+            "description": "Named entity: acy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0430"
+                ]
+            ]
+        },
+        {
+            "input": "&aelig",
+            "description": "Named entity: aelig without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00e6"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 7 }
+            ]
+        },
+        {
+            "input": "&aelig;",
+            "description": "Named entity: aelig; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00e6"
+                ]
+            ]
+        },
+        {
+            "input": "&af",
+            "description": "Bad named entity: af without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&af"
+                ]
+            ]
+        },
+        {
+            "input": "&af;",
+            "description": "Named entity: af; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2061"
+                ]
+            ]
+        },
+        {
+            "input": "&afr",
+            "description": "Bad named entity: afr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&afr"
+                ]
+            ]
+        },
+        {
+            "input": "&afr;",
+            "description": "Named entity: afr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd1e"
+                ]
+            ]
+        },
+        {
+            "input": "&agrave",
+            "description": "Named entity: agrave without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00e0"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 8 }
+            ]
+        },
+        {
+            "input": "&agrave;",
+            "description": "Named entity: agrave; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00e0"
+                ]
+            ]
+        },
+        {
+            "input": "&alefsym",
+            "description": "Bad named entity: alefsym without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&alefsym"
+                ]
+            ]
+        },
+        {
+            "input": "&alefsym;",
+            "description": "Named entity: alefsym; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2135"
+                ]
+            ]
+        },
+        {
+            "input": "&aleph",
+            "description": "Bad named entity: aleph without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&aleph"
+                ]
+            ]
+        },
+        {
+            "input": "&aleph;",
+            "description": "Named entity: aleph; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2135"
+                ]
+            ]
+        },
+        {
+            "input": "&alpha",
+            "description": "Bad named entity: alpha without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&alpha"
+                ]
+            ]
+        },
+        {
+            "input": "&alpha;",
+            "description": "Named entity: alpha; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03b1"
+                ]
+            ]
+        },
+        {
+            "input": "&amacr",
+            "description": "Bad named entity: amacr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&amacr"
+                ]
+            ]
+        },
+        {
+            "input": "&amacr;",
+            "description": "Named entity: amacr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0101"
+                ]
+            ]
+        },
+        {
+            "input": "&amalg",
+            "description": "Bad named entity: amalg without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&amalg"
+                ]
+            ]
+        },
+        {
+            "input": "&amalg;",
+            "description": "Named entity: amalg; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a3f"
+                ]
+            ]
+        },
+        {
+            "input": "&amp",
+            "description": "Named entity: amp without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 5 }
+            ]
+        },
+        {
+            "input": "&amp;",
+            "description": "Named entity: amp; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&"
+                ]
+            ]
+        },
+        {
+            "input": "&and",
+            "description": "Bad named entity: and without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&and"
+                ]
+            ]
+        },
+        {
+            "input": "&and;",
+            "description": "Named entity: and; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2227"
+                ]
+            ]
+        },
+        {
+            "input": "&andand",
+            "description": "Bad named entity: andand without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&andand"
+                ]
+            ]
+        },
+        {
+            "input": "&andand;",
+            "description": "Named entity: andand; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a55"
+                ]
+            ]
+        },
+        {
+            "input": "&andd",
+            "description": "Bad named entity: andd without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&andd"
+                ]
+            ]
+        },
+        {
+            "input": "&andd;",
+            "description": "Named entity: andd; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a5c"
+                ]
+            ]
+        },
+        {
+            "input": "&andslope",
+            "description": "Bad named entity: andslope without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&andslope"
+                ]
+            ]
+        },
+        {
+            "input": "&andslope;",
+            "description": "Named entity: andslope; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a58"
+                ]
+            ]
+        },
+        {
+            "input": "&andv",
+            "description": "Bad named entity: andv without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&andv"
+                ]
+            ]
+        },
+        {
+            "input": "&andv;",
+            "description": "Named entity: andv; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a5a"
+                ]
+            ]
+        },
+        {
+            "input": "&ang",
+            "description": "Bad named entity: ang without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ang"
+                ]
+            ]
+        },
+        {
+            "input": "&ang;",
+            "description": "Named entity: ang; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2220"
+                ]
+            ]
+        },
+        {
+            "input": "&ange",
+            "description": "Bad named entity: ange without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ange"
+                ]
+            ]
+        },
+        {
+            "input": "&ange;",
+            "description": "Named entity: ange; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29a4"
+                ]
+            ]
+        },
+        {
+            "input": "&angle",
+            "description": "Bad named entity: angle without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&angle"
+                ]
+            ]
+        },
+        {
+            "input": "&angle;",
+            "description": "Named entity: angle; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2220"
+                ]
+            ]
+        },
+        {
+            "input": "&angmsd",
+            "description": "Bad named entity: angmsd without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&angmsd"
+                ]
+            ]
+        },
+        {
+            "input": "&angmsd;",
+            "description": "Named entity: angmsd; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2221"
+                ]
+            ]
+        },
+        {
+            "input": "&angmsdaa",
+            "description": "Bad named entity: angmsdaa without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&angmsdaa"
+                ]
+            ]
+        },
+        {
+            "input": "&angmsdaa;",
+            "description": "Named entity: angmsdaa; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29a8"
+                ]
+            ]
+        },
+        {
+            "input": "&angmsdab",
+            "description": "Bad named entity: angmsdab without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&angmsdab"
+                ]
+            ]
+        },
+        {
+            "input": "&angmsdab;",
+            "description": "Named entity: angmsdab; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29a9"
+                ]
+            ]
+        },
+        {
+            "input": "&angmsdac",
+            "description": "Bad named entity: angmsdac without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&angmsdac"
+                ]
+            ]
+        },
+        {
+            "input": "&angmsdac;",
+            "description": "Named entity: angmsdac; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29aa"
+                ]
+            ]
+        },
+        {
+            "input": "&angmsdad",
+            "description": "Bad named entity: angmsdad without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&angmsdad"
+                ]
+            ]
+        },
+        {
+            "input": "&angmsdad;",
+            "description": "Named entity: angmsdad; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29ab"
+                ]
+            ]
+        },
+        {
+            "input": "&angmsdae",
+            "description": "Bad named entity: angmsdae without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&angmsdae"
+                ]
+            ]
+        },
+        {
+            "input": "&angmsdae;",
+            "description": "Named entity: angmsdae; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29ac"
+                ]
+            ]
+        },
+        {
+            "input": "&angmsdaf",
+            "description": "Bad named entity: angmsdaf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&angmsdaf"
+                ]
+            ]
+        },
+        {
+            "input": "&angmsdaf;",
+            "description": "Named entity: angmsdaf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29ad"
+                ]
+            ]
+        },
+        {
+            "input": "&angmsdag",
+            "description": "Bad named entity: angmsdag without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&angmsdag"
+                ]
+            ]
+        },
+        {
+            "input": "&angmsdag;",
+            "description": "Named entity: angmsdag; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29ae"
+                ]
+            ]
+        },
+        {
+            "input": "&angmsdah",
+            "description": "Bad named entity: angmsdah without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&angmsdah"
+                ]
+            ]
+        },
+        {
+            "input": "&angmsdah;",
+            "description": "Named entity: angmsdah; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29af"
+                ]
+            ]
+        },
+        {
+            "input": "&angrt",
+            "description": "Bad named entity: angrt without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&angrt"
+                ]
+            ]
+        },
+        {
+            "input": "&angrt;",
+            "description": "Named entity: angrt; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u221f"
+                ]
+            ]
+        },
+        {
+            "input": "&angrtvb",
+            "description": "Bad named entity: angrtvb without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&angrtvb"
+                ]
+            ]
+        },
+        {
+            "input": "&angrtvb;",
+            "description": "Named entity: angrtvb; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22be"
+                ]
+            ]
+        },
+        {
+            "input": "&angrtvbd",
+            "description": "Bad named entity: angrtvbd without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&angrtvbd"
+                ]
+            ]
+        },
+        {
+            "input": "&angrtvbd;",
+            "description": "Named entity: angrtvbd; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u299d"
+                ]
+            ]
+        },
+        {
+            "input": "&angsph",
+            "description": "Bad named entity: angsph without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&angsph"
+                ]
+            ]
+        },
+        {
+            "input": "&angsph;",
+            "description": "Named entity: angsph; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2222"
+                ]
+            ]
+        },
+        {
+            "input": "&angst",
+            "description": "Bad named entity: angst without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&angst"
+                ]
+            ]
+        },
+        {
+            "input": "&angst;",
+            "description": "Named entity: angst; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00c5"
+                ]
+            ]
+        },
+        {
+            "input": "&angzarr",
+            "description": "Bad named entity: angzarr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&angzarr"
+                ]
+            ]
+        },
+        {
+            "input": "&angzarr;",
+            "description": "Named entity: angzarr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u237c"
+                ]
+            ]
+        },
+        {
+            "input": "&aogon",
+            "description": "Bad named entity: aogon without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&aogon"
+                ]
+            ]
+        },
+        {
+            "input": "&aogon;",
+            "description": "Named entity: aogon; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0105"
+                ]
+            ]
+        },
+        {
+            "input": "&aopf",
+            "description": "Bad named entity: aopf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&aopf"
+                ]
+            ]
+        },
+        {
+            "input": "&aopf;",
+            "description": "Named entity: aopf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd52"
+                ]
+            ]
+        },
+        {
+            "input": "&ap",
+            "description": "Bad named entity: ap without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ap"
+                ]
+            ]
+        },
+        {
+            "input": "&ap;",
+            "description": "Named entity: ap; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2248"
+                ]
+            ]
+        },
+        {
+            "input": "&apE",
+            "description": "Bad named entity: apE without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&apE"
+                ]
+            ]
+        },
+        {
+            "input": "&apE;",
+            "description": "Named entity: apE; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a70"
+                ]
+            ]
+        },
+        {
+            "input": "&apacir",
+            "description": "Bad named entity: apacir without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&apacir"
+                ]
+            ]
+        },
+        {
+            "input": "&apacir;",
+            "description": "Named entity: apacir; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a6f"
+                ]
+            ]
+        },
+        {
+            "input": "&ape",
+            "description": "Bad named entity: ape without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ape"
+                ]
+            ]
+        },
+        {
+            "input": "&ape;",
+            "description": "Named entity: ape; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u224a"
+                ]
+            ]
+        },
+        {
+            "input": "&apid",
+            "description": "Bad named entity: apid without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&apid"
+                ]
+            ]
+        },
+        {
+            "input": "&apid;",
+            "description": "Named entity: apid; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u224b"
+                ]
+            ]
+        },
+        {
+            "input": "&apos",
+            "description": "Bad named entity: apos without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&apos"
+                ]
+            ]
+        },
+        {
+            "input": "&apos;",
+            "description": "Named entity: apos; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "'"
+                ]
+            ]
+        },
+        {
+            "input": "&approx",
+            "description": "Bad named entity: approx without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&approx"
+                ]
+            ]
+        },
+        {
+            "input": "&approx;",
+            "description": "Named entity: approx; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2248"
+                ]
+            ]
+        },
+        {
+            "input": "&approxeq",
+            "description": "Bad named entity: approxeq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&approxeq"
+                ]
+            ]
+        },
+        {
+            "input": "&approxeq;",
+            "description": "Named entity: approxeq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u224a"
+                ]
+            ]
+        },
+        {
+            "input": "&aring",
+            "description": "Named entity: aring without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00e5"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 7 }
+            ]
+        },
+        {
+            "input": "&aring;",
+            "description": "Named entity: aring; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00e5"
+                ]
+            ]
+        },
+        {
+            "input": "&ascr",
+            "description": "Bad named entity: ascr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ascr"
+                ]
+            ]
+        },
+        {
+            "input": "&ascr;",
+            "description": "Named entity: ascr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udcb6"
+                ]
+            ]
+        },
+        {
+            "input": "&ast",
+            "description": "Bad named entity: ast without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ast"
+                ]
+            ]
+        },
+        {
+            "input": "&ast;",
+            "description": "Named entity: ast; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "*"
+                ]
+            ]
+        },
+        {
+            "input": "&asymp",
+            "description": "Bad named entity: asymp without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&asymp"
+                ]
+            ]
+        },
+        {
+            "input": "&asymp;",
+            "description": "Named entity: asymp; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2248"
+                ]
+            ]
+        },
+        {
+            "input": "&asympeq",
+            "description": "Bad named entity: asympeq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&asympeq"
+                ]
+            ]
+        },
+        {
+            "input": "&asympeq;",
+            "description": "Named entity: asympeq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u224d"
+                ]
+            ]
+        },
+        {
+            "input": "&atilde",
+            "description": "Named entity: atilde without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00e3"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 8 }
+            ]
+        },
+        {
+            "input": "&atilde;",
+            "description": "Named entity: atilde; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00e3"
+                ]
+            ]
+        },
+        {
+            "input": "&auml",
+            "description": "Named entity: auml without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00e4"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 6 }
+            ]
+        },
+        {
+            "input": "&auml;",
+            "description": "Named entity: auml; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00e4"
+                ]
+            ]
+        },
+        {
+            "input": "&awconint",
+            "description": "Bad named entity: awconint without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&awconint"
+                ]
+            ]
+        },
+        {
+            "input": "&awconint;",
+            "description": "Named entity: awconint; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2233"
+                ]
+            ]
+        },
+        {
+            "input": "&awint",
+            "description": "Bad named entity: awint without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&awint"
+                ]
+            ]
+        },
+        {
+            "input": "&awint;",
+            "description": "Named entity: awint; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a11"
+                ]
+            ]
+        },
+        {
+            "input": "&bNot",
+            "description": "Bad named entity: bNot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&bNot"
+                ]
+            ]
+        },
+        {
+            "input": "&bNot;",
+            "description": "Named entity: bNot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2aed"
+                ]
+            ]
+        },
+        {
+            "input": "&backcong",
+            "description": "Bad named entity: backcong without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&backcong"
+                ]
+            ]
+        },
+        {
+            "input": "&backcong;",
+            "description": "Named entity: backcong; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u224c"
+                ]
+            ]
+        },
+        {
+            "input": "&backepsilon",
+            "description": "Bad named entity: backepsilon without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&backepsilon"
+                ]
+            ]
+        },
+        {
+            "input": "&backepsilon;",
+            "description": "Named entity: backepsilon; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03f6"
+                ]
+            ]
+        },
+        {
+            "input": "&backprime",
+            "description": "Bad named entity: backprime without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&backprime"
+                ]
+            ]
+        },
+        {
+            "input": "&backprime;",
+            "description": "Named entity: backprime; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2035"
+                ]
+            ]
+        },
+        {
+            "input": "&backsim",
+            "description": "Bad named entity: backsim without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&backsim"
+                ]
+            ]
+        },
+        {
+            "input": "&backsim;",
+            "description": "Named entity: backsim; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u223d"
+                ]
+            ]
+        },
+        {
+            "input": "&backsimeq",
+            "description": "Bad named entity: backsimeq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&backsimeq"
+                ]
+            ]
+        },
+        {
+            "input": "&backsimeq;",
+            "description": "Named entity: backsimeq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22cd"
+                ]
+            ]
+        },
+        {
+            "input": "&barvee",
+            "description": "Bad named entity: barvee without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&barvee"
+                ]
+            ]
+        },
+        {
+            "input": "&barvee;",
+            "description": "Named entity: barvee; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22bd"
+                ]
+            ]
+        },
+        {
+            "input": "&barwed",
+            "description": "Bad named entity: barwed without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&barwed"
+                ]
+            ]
+        },
+        {
+            "input": "&barwed;",
+            "description": "Named entity: barwed; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2305"
+                ]
+            ]
+        },
+        {
+            "input": "&barwedge",
+            "description": "Bad named entity: barwedge without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&barwedge"
+                ]
+            ]
+        },
+        {
+            "input": "&barwedge;",
+            "description": "Named entity: barwedge; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2305"
+                ]
+            ]
+        },
+        {
+            "input": "&bbrk",
+            "description": "Bad named entity: bbrk without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&bbrk"
+                ]
+            ]
+        },
+        {
+            "input": "&bbrk;",
+            "description": "Named entity: bbrk; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u23b5"
+                ]
+            ]
+        },
+        {
+            "input": "&bbrktbrk",
+            "description": "Bad named entity: bbrktbrk without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&bbrktbrk"
+                ]
+            ]
+        },
+        {
+            "input": "&bbrktbrk;",
+            "description": "Named entity: bbrktbrk; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u23b6"
+                ]
+            ]
+        },
+        {
+            "input": "&bcong",
+            "description": "Bad named entity: bcong without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&bcong"
+                ]
+            ]
+        },
+        {
+            "input": "&bcong;",
+            "description": "Named entity: bcong; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u224c"
+                ]
+            ]
+        },
+        {
+            "input": "&bcy",
+            "description": "Bad named entity: bcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&bcy"
+                ]
+            ]
+        },
+        {
+            "input": "&bcy;",
+            "description": "Named entity: bcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0431"
+                ]
+            ]
+        },
+        {
+            "input": "&bdquo",
+            "description": "Bad named entity: bdquo without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&bdquo"
+                ]
+            ]
+        },
+        {
+            "input": "&bdquo;",
+            "description": "Named entity: bdquo; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u201e"
+                ]
+            ]
+        },
+        {
+            "input": "&becaus",
+            "description": "Bad named entity: becaus without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&becaus"
+                ]
+            ]
+        },
+        {
+            "input": "&becaus;",
+            "description": "Named entity: becaus; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2235"
+                ]
+            ]
+        },
+        {
+            "input": "&because",
+            "description": "Bad named entity: because without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&because"
+                ]
+            ]
+        },
+        {
+            "input": "&because;",
+            "description": "Named entity: because; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2235"
+                ]
+            ]
+        },
+        {
+            "input": "&bemptyv",
+            "description": "Bad named entity: bemptyv without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&bemptyv"
+                ]
+            ]
+        },
+        {
+            "input": "&bemptyv;",
+            "description": "Named entity: bemptyv; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29b0"
+                ]
+            ]
+        },
+        {
+            "input": "&bepsi",
+            "description": "Bad named entity: bepsi without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&bepsi"
+                ]
+            ]
+        },
+        {
+            "input": "&bepsi;",
+            "description": "Named entity: bepsi; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03f6"
+                ]
+            ]
+        },
+        {
+            "input": "&bernou",
+            "description": "Bad named entity: bernou without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&bernou"
+                ]
+            ]
+        },
+        {
+            "input": "&bernou;",
+            "description": "Named entity: bernou; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u212c"
+                ]
+            ]
+        },
+        {
+            "input": "&beta",
+            "description": "Bad named entity: beta without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&beta"
+                ]
+            ]
+        },
+        {
+            "input": "&beta;",
+            "description": "Named entity: beta; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03b2"
+                ]
+            ]
+        },
+        {
+            "input": "&beth",
+            "description": "Bad named entity: beth without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&beth"
+                ]
+            ]
+        },
+        {
+            "input": "&beth;",
+            "description": "Named entity: beth; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2136"
+                ]
+            ]
+        },
+        {
+            "input": "&between",
+            "description": "Bad named entity: between without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&between"
+                ]
+            ]
+        },
+        {
+            "input": "&between;",
+            "description": "Named entity: between; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u226c"
+                ]
+            ]
+        },
+        {
+            "input": "&bfr",
+            "description": "Bad named entity: bfr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&bfr"
+                ]
+            ]
+        },
+        {
+            "input": "&bfr;",
+            "description": "Named entity: bfr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd1f"
+                ]
+            ]
+        },
+        {
+            "input": "&bigcap",
+            "description": "Bad named entity: bigcap without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&bigcap"
+                ]
+            ]
+        },
+        {
+            "input": "&bigcap;",
+            "description": "Named entity: bigcap; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22c2"
+                ]
+            ]
+        },
+        {
+            "input": "&bigcirc",
+            "description": "Bad named entity: bigcirc without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&bigcirc"
+                ]
+            ]
+        },
+        {
+            "input": "&bigcirc;",
+            "description": "Named entity: bigcirc; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u25ef"
+                ]
+            ]
+        },
+        {
+            "input": "&bigcup",
+            "description": "Bad named entity: bigcup without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&bigcup"
+                ]
+            ]
+        },
+        {
+            "input": "&bigcup;",
+            "description": "Named entity: bigcup; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22c3"
+                ]
+            ]
+        },
+        {
+            "input": "&bigodot",
+            "description": "Bad named entity: bigodot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&bigodot"
+                ]
+            ]
+        },
+        {
+            "input": "&bigodot;",
+            "description": "Named entity: bigodot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a00"
+                ]
+            ]
+        },
+        {
+            "input": "&bigoplus",
+            "description": "Bad named entity: bigoplus without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&bigoplus"
+                ]
+            ]
+        },
+        {
+            "input": "&bigoplus;",
+            "description": "Named entity: bigoplus; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a01"
+                ]
+            ]
+        },
+        {
+            "input": "&bigotimes",
+            "description": "Bad named entity: bigotimes without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&bigotimes"
+                ]
+            ]
+        },
+        {
+            "input": "&bigotimes;",
+            "description": "Named entity: bigotimes; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a02"
+                ]
+            ]
+        },
+        {
+            "input": "&bigsqcup",
+            "description": "Bad named entity: bigsqcup without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&bigsqcup"
+                ]
+            ]
+        },
+        {
+            "input": "&bigsqcup;",
+            "description": "Named entity: bigsqcup; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a06"
+                ]
+            ]
+        },
+        {
+            "input": "&bigstar",
+            "description": "Bad named entity: bigstar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&bigstar"
+                ]
+            ]
+        },
+        {
+            "input": "&bigstar;",
+            "description": "Named entity: bigstar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2605"
+                ]
+            ]
+        },
+        {
+            "input": "&bigtriangledown",
+            "description": "Bad named entity: bigtriangledown without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&bigtriangledown"
+                ]
+            ]
+        },
+        {
+            "input": "&bigtriangledown;",
+            "description": "Named entity: bigtriangledown; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u25bd"
+                ]
+            ]
+        },
+        {
+            "input": "&bigtriangleup",
+            "description": "Bad named entity: bigtriangleup without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&bigtriangleup"
+                ]
+            ]
+        },
+        {
+            "input": "&bigtriangleup;",
+            "description": "Named entity: bigtriangleup; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u25b3"
+                ]
+            ]
+        },
+        {
+            "input": "&biguplus",
+            "description": "Bad named entity: biguplus without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&biguplus"
+                ]
+            ]
+        },
+        {
+            "input": "&biguplus;",
+            "description": "Named entity: biguplus; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a04"
+                ]
+            ]
+        },
+        {
+            "input": "&bigvee",
+            "description": "Bad named entity: bigvee without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&bigvee"
+                ]
+            ]
+        },
+        {
+            "input": "&bigvee;",
+            "description": "Named entity: bigvee; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22c1"
+                ]
+            ]
+        },
+        {
+            "input": "&bigwedge",
+            "description": "Bad named entity: bigwedge without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&bigwedge"
+                ]
+            ]
+        },
+        {
+            "input": "&bigwedge;",
+            "description": "Named entity: bigwedge; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22c0"
+                ]
+            ]
+        },
+        {
+            "input": "&bkarow",
+            "description": "Bad named entity: bkarow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&bkarow"
+                ]
+            ]
+        },
+        {
+            "input": "&bkarow;",
+            "description": "Named entity: bkarow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u290d"
+                ]
+            ]
+        },
+        {
+            "input": "&blacklozenge",
+            "description": "Bad named entity: blacklozenge without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&blacklozenge"
+                ]
+            ]
+        },
+        {
+            "input": "&blacklozenge;",
+            "description": "Named entity: blacklozenge; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29eb"
+                ]
+            ]
+        },
+        {
+            "input": "&blacksquare",
+            "description": "Bad named entity: blacksquare without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&blacksquare"
+                ]
+            ]
+        },
+        {
+            "input": "&blacksquare;",
+            "description": "Named entity: blacksquare; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u25aa"
+                ]
+            ]
+        },
+        {
+            "input": "&blacktriangle",
+            "description": "Bad named entity: blacktriangle without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&blacktriangle"
+                ]
+            ]
+        },
+        {
+            "input": "&blacktriangle;",
+            "description": "Named entity: blacktriangle; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u25b4"
+                ]
+            ]
+        },
+        {
+            "input": "&blacktriangledown",
+            "description": "Bad named entity: blacktriangledown without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&blacktriangledown"
+                ]
+            ]
+        },
+        {
+            "input": "&blacktriangledown;",
+            "description": "Named entity: blacktriangledown; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u25be"
+                ]
+            ]
+        },
+        {
+            "input": "&blacktriangleleft",
+            "description": "Bad named entity: blacktriangleleft without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&blacktriangleleft"
+                ]
+            ]
+        },
+        {
+            "input": "&blacktriangleleft;",
+            "description": "Named entity: blacktriangleleft; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u25c2"
+                ]
+            ]
+        },
+        {
+            "input": "&blacktriangleright",
+            "description": "Bad named entity: blacktriangleright without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&blacktriangleright"
+                ]
+            ]
+        },
+        {
+            "input": "&blacktriangleright;",
+            "description": "Named entity: blacktriangleright; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u25b8"
+                ]
+            ]
+        },
+        {
+            "input": "&blank",
+            "description": "Bad named entity: blank without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&blank"
+                ]
+            ]
+        },
+        {
+            "input": "&blank;",
+            "description": "Named entity: blank; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2423"
+                ]
+            ]
+        },
+        {
+            "input": "&blk12",
+            "description": "Bad named entity: blk12 without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&blk12"
+                ]
+            ]
+        },
+        {
+            "input": "&blk12;",
+            "description": "Named entity: blk12; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2592"
+                ]
+            ]
+        },
+        {
+            "input": "&blk14",
+            "description": "Bad named entity: blk14 without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&blk14"
+                ]
+            ]
+        },
+        {
+            "input": "&blk14;",
+            "description": "Named entity: blk14; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2591"
+                ]
+            ]
+        },
+        {
+            "input": "&blk34",
+            "description": "Bad named entity: blk34 without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&blk34"
+                ]
+            ]
+        },
+        {
+            "input": "&blk34;",
+            "description": "Named entity: blk34; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2593"
+                ]
+            ]
+        },
+        {
+            "input": "&block",
+            "description": "Bad named entity: block without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&block"
+                ]
+            ]
+        },
+        {
+            "input": "&block;",
+            "description": "Named entity: block; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2588"
+                ]
+            ]
+        },
+        {
+            "input": "&bne",
+            "description": "Bad named entity: bne without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&bne"
+                ]
+            ]
+        },
+        {
+            "input": "&bne;",
+            "description": "Named entity: bne; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "=\u20e5"
+                ]
+            ]
+        },
+        {
+            "input": "&bnequiv",
+            "description": "Bad named entity: bnequiv without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&bnequiv"
+                ]
+            ]
+        },
+        {
+            "input": "&bnequiv;",
+            "description": "Named entity: bnequiv; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2261\u20e5"
+                ]
+            ]
+        },
+        {
+            "input": "&bnot",
+            "description": "Bad named entity: bnot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&bnot"
+                ]
+            ]
+        },
+        {
+            "input": "&bnot;",
+            "description": "Named entity: bnot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2310"
+                ]
+            ]
+        },
+        {
+            "input": "&bopf",
+            "description": "Bad named entity: bopf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&bopf"
+                ]
+            ]
+        },
+        {
+            "input": "&bopf;",
+            "description": "Named entity: bopf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd53"
+                ]
+            ]
+        },
+        {
+            "input": "&bot",
+            "description": "Bad named entity: bot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&bot"
+                ]
+            ]
+        },
+        {
+            "input": "&bot;",
+            "description": "Named entity: bot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22a5"
+                ]
+            ]
+        },
+        {
+            "input": "&bottom",
+            "description": "Bad named entity: bottom without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&bottom"
+                ]
+            ]
+        },
+        {
+            "input": "&bottom;",
+            "description": "Named entity: bottom; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22a5"
+                ]
+            ]
+        },
+        {
+            "input": "&bowtie",
+            "description": "Bad named entity: bowtie without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&bowtie"
+                ]
+            ]
+        },
+        {
+            "input": "&bowtie;",
+            "description": "Named entity: bowtie; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22c8"
+                ]
+            ]
+        },
+        {
+            "input": "&boxDL",
+            "description": "Bad named entity: boxDL without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&boxDL"
+                ]
+            ]
+        },
+        {
+            "input": "&boxDL;",
+            "description": "Named entity: boxDL; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2557"
+                ]
+            ]
+        },
+        {
+            "input": "&boxDR",
+            "description": "Bad named entity: boxDR without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&boxDR"
+                ]
+            ]
+        },
+        {
+            "input": "&boxDR;",
+            "description": "Named entity: boxDR; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2554"
+                ]
+            ]
+        },
+        {
+            "input": "&boxDl",
+            "description": "Bad named entity: boxDl without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&boxDl"
+                ]
+            ]
+        },
+        {
+            "input": "&boxDl;",
+            "description": "Named entity: boxDl; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2556"
+                ]
+            ]
+        },
+        {
+            "input": "&boxDr",
+            "description": "Bad named entity: boxDr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&boxDr"
+                ]
+            ]
+        },
+        {
+            "input": "&boxDr;",
+            "description": "Named entity: boxDr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2553"
+                ]
+            ]
+        },
+        {
+            "input": "&boxH",
+            "description": "Bad named entity: boxH without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&boxH"
+                ]
+            ]
+        },
+        {
+            "input": "&boxH;",
+            "description": "Named entity: boxH; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2550"
+                ]
+            ]
+        },
+        {
+            "input": "&boxHD",
+            "description": "Bad named entity: boxHD without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&boxHD"
+                ]
+            ]
+        },
+        {
+            "input": "&boxHD;",
+            "description": "Named entity: boxHD; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2566"
+                ]
+            ]
+        },
+        {
+            "input": "&boxHU",
+            "description": "Bad named entity: boxHU without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&boxHU"
+                ]
+            ]
+        },
+        {
+            "input": "&boxHU;",
+            "description": "Named entity: boxHU; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2569"
+                ]
+            ]
+        },
+        {
+            "input": "&boxHd",
+            "description": "Bad named entity: boxHd without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&boxHd"
+                ]
+            ]
+        },
+        {
+            "input": "&boxHd;",
+            "description": "Named entity: boxHd; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2564"
+                ]
+            ]
+        },
+        {
+            "input": "&boxHu",
+            "description": "Bad named entity: boxHu without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&boxHu"
+                ]
+            ]
+        },
+        {
+            "input": "&boxHu;",
+            "description": "Named entity: boxHu; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2567"
+                ]
+            ]
+        },
+        {
+            "input": "&boxUL",
+            "description": "Bad named entity: boxUL without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&boxUL"
+                ]
+            ]
+        },
+        {
+            "input": "&boxUL;",
+            "description": "Named entity: boxUL; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u255d"
+                ]
+            ]
+        },
+        {
+            "input": "&boxUR",
+            "description": "Bad named entity: boxUR without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&boxUR"
+                ]
+            ]
+        },
+        {
+            "input": "&boxUR;",
+            "description": "Named entity: boxUR; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u255a"
+                ]
+            ]
+        },
+        {
+            "input": "&boxUl",
+            "description": "Bad named entity: boxUl without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&boxUl"
+                ]
+            ]
+        },
+        {
+            "input": "&boxUl;",
+            "description": "Named entity: boxUl; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u255c"
+                ]
+            ]
+        },
+        {
+            "input": "&boxUr",
+            "description": "Bad named entity: boxUr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&boxUr"
+                ]
+            ]
+        },
+        {
+            "input": "&boxUr;",
+            "description": "Named entity: boxUr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2559"
+                ]
+            ]
+        },
+        {
+            "input": "&boxV",
+            "description": "Bad named entity: boxV without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&boxV"
+                ]
+            ]
+        },
+        {
+            "input": "&boxV;",
+            "description": "Named entity: boxV; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2551"
+                ]
+            ]
+        },
+        {
+            "input": "&boxVH",
+            "description": "Bad named entity: boxVH without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&boxVH"
+                ]
+            ]
+        },
+        {
+            "input": "&boxVH;",
+            "description": "Named entity: boxVH; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u256c"
+                ]
+            ]
+        },
+        {
+            "input": "&boxVL",
+            "description": "Bad named entity: boxVL without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&boxVL"
+                ]
+            ]
+        },
+        {
+            "input": "&boxVL;",
+            "description": "Named entity: boxVL; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2563"
+                ]
+            ]
+        },
+        {
+            "input": "&boxVR",
+            "description": "Bad named entity: boxVR without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&boxVR"
+                ]
+            ]
+        },
+        {
+            "input": "&boxVR;",
+            "description": "Named entity: boxVR; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2560"
+                ]
+            ]
+        },
+        {
+            "input": "&boxVh",
+            "description": "Bad named entity: boxVh without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&boxVh"
+                ]
+            ]
+        },
+        {
+            "input": "&boxVh;",
+            "description": "Named entity: boxVh; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u256b"
+                ]
+            ]
+        },
+        {
+            "input": "&boxVl",
+            "description": "Bad named entity: boxVl without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&boxVl"
+                ]
+            ]
+        },
+        {
+            "input": "&boxVl;",
+            "description": "Named entity: boxVl; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2562"
+                ]
+            ]
+        },
+        {
+            "input": "&boxVr",
+            "description": "Bad named entity: boxVr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&boxVr"
+                ]
+            ]
+        },
+        {
+            "input": "&boxVr;",
+            "description": "Named entity: boxVr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u255f"
+                ]
+            ]
+        },
+        {
+            "input": "&boxbox",
+            "description": "Bad named entity: boxbox without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&boxbox"
+                ]
+            ]
+        },
+        {
+            "input": "&boxbox;",
+            "description": "Named entity: boxbox; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29c9"
+                ]
+            ]
+        },
+        {
+            "input": "&boxdL",
+            "description": "Bad named entity: boxdL without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&boxdL"
+                ]
+            ]
+        },
+        {
+            "input": "&boxdL;",
+            "description": "Named entity: boxdL; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2555"
+                ]
+            ]
+        },
+        {
+            "input": "&boxdR",
+            "description": "Bad named entity: boxdR without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&boxdR"
+                ]
+            ]
+        },
+        {
+            "input": "&boxdR;",
+            "description": "Named entity: boxdR; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2552"
+                ]
+            ]
+        },
+        {
+            "input": "&boxdl",
+            "description": "Bad named entity: boxdl without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&boxdl"
+                ]
+            ]
+        },
+        {
+            "input": "&boxdl;",
+            "description": "Named entity: boxdl; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2510"
+                ]
+            ]
+        },
+        {
+            "input": "&boxdr",
+            "description": "Bad named entity: boxdr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&boxdr"
+                ]
+            ]
+        },
+        {
+            "input": "&boxdr;",
+            "description": "Named entity: boxdr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u250c"
+                ]
+            ]
+        },
+        {
+            "input": "&boxh",
+            "description": "Bad named entity: boxh without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&boxh"
+                ]
+            ]
+        },
+        {
+            "input": "&boxh;",
+            "description": "Named entity: boxh; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2500"
+                ]
+            ]
+        },
+        {
+            "input": "&boxhD",
+            "description": "Bad named entity: boxhD without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&boxhD"
+                ]
+            ]
+        },
+        {
+            "input": "&boxhD;",
+            "description": "Named entity: boxhD; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2565"
+                ]
+            ]
+        },
+        {
+            "input": "&boxhU",
+            "description": "Bad named entity: boxhU without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&boxhU"
+                ]
+            ]
+        },
+        {
+            "input": "&boxhU;",
+            "description": "Named entity: boxhU; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2568"
+                ]
+            ]
+        },
+        {
+            "input": "&boxhd",
+            "description": "Bad named entity: boxhd without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&boxhd"
+                ]
+            ]
+        },
+        {
+            "input": "&boxhd;",
+            "description": "Named entity: boxhd; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u252c"
+                ]
+            ]
+        },
+        {
+            "input": "&boxhu",
+            "description": "Bad named entity: boxhu without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&boxhu"
+                ]
+            ]
+        },
+        {
+            "input": "&boxhu;",
+            "description": "Named entity: boxhu; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2534"
+                ]
+            ]
+        },
+        {
+            "input": "&boxminus",
+            "description": "Bad named entity: boxminus without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&boxminus"
+                ]
+            ]
+        },
+        {
+            "input": "&boxminus;",
+            "description": "Named entity: boxminus; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u229f"
+                ]
+            ]
+        },
+        {
+            "input": "&boxplus",
+            "description": "Bad named entity: boxplus without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&boxplus"
+                ]
+            ]
+        },
+        {
+            "input": "&boxplus;",
+            "description": "Named entity: boxplus; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u229e"
+                ]
+            ]
+        },
+        {
+            "input": "&boxtimes",
+            "description": "Bad named entity: boxtimes without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&boxtimes"
+                ]
+            ]
+        },
+        {
+            "input": "&boxtimes;",
+            "description": "Named entity: boxtimes; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22a0"
+                ]
+            ]
+        },
+        {
+            "input": "&boxuL",
+            "description": "Bad named entity: boxuL without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&boxuL"
+                ]
+            ]
+        },
+        {
+            "input": "&boxuL;",
+            "description": "Named entity: boxuL; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u255b"
+                ]
+            ]
+        },
+        {
+            "input": "&boxuR",
+            "description": "Bad named entity: boxuR without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&boxuR"
+                ]
+            ]
+        },
+        {
+            "input": "&boxuR;",
+            "description": "Named entity: boxuR; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2558"
+                ]
+            ]
+        },
+        {
+            "input": "&boxul",
+            "description": "Bad named entity: boxul without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&boxul"
+                ]
+            ]
+        },
+        {
+            "input": "&boxul;",
+            "description": "Named entity: boxul; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2518"
+                ]
+            ]
+        },
+        {
+            "input": "&boxur",
+            "description": "Bad named entity: boxur without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&boxur"
+                ]
+            ]
+        },
+        {
+            "input": "&boxur;",
+            "description": "Named entity: boxur; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2514"
+                ]
+            ]
+        },
+        {
+            "input": "&boxv",
+            "description": "Bad named entity: boxv without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&boxv"
+                ]
+            ]
+        },
+        {
+            "input": "&boxv;",
+            "description": "Named entity: boxv; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2502"
+                ]
+            ]
+        },
+        {
+            "input": "&boxvH",
+            "description": "Bad named entity: boxvH without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&boxvH"
+                ]
+            ]
+        },
+        {
+            "input": "&boxvH;",
+            "description": "Named entity: boxvH; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u256a"
+                ]
+            ]
+        },
+        {
+            "input": "&boxvL",
+            "description": "Bad named entity: boxvL without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&boxvL"
+                ]
+            ]
+        },
+        {
+            "input": "&boxvL;",
+            "description": "Named entity: boxvL; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2561"
+                ]
+            ]
+        },
+        {
+            "input": "&boxvR",
+            "description": "Bad named entity: boxvR without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&boxvR"
+                ]
+            ]
+        },
+        {
+            "input": "&boxvR;",
+            "description": "Named entity: boxvR; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u255e"
+                ]
+            ]
+        },
+        {
+            "input": "&boxvh",
+            "description": "Bad named entity: boxvh without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&boxvh"
+                ]
+            ]
+        },
+        {
+            "input": "&boxvh;",
+            "description": "Named entity: boxvh; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u253c"
+                ]
+            ]
+        },
+        {
+            "input": "&boxvl",
+            "description": "Bad named entity: boxvl without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&boxvl"
+                ]
+            ]
+        },
+        {
+            "input": "&boxvl;",
+            "description": "Named entity: boxvl; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2524"
+                ]
+            ]
+        },
+        {
+            "input": "&boxvr",
+            "description": "Bad named entity: boxvr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&boxvr"
+                ]
+            ]
+        },
+        {
+            "input": "&boxvr;",
+            "description": "Named entity: boxvr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u251c"
+                ]
+            ]
+        },
+        {
+            "input": "&bprime",
+            "description": "Bad named entity: bprime without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&bprime"
+                ]
+            ]
+        },
+        {
+            "input": "&bprime;",
+            "description": "Named entity: bprime; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2035"
+                ]
+            ]
+        },
+        {
+            "input": "&breve",
+            "description": "Bad named entity: breve without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&breve"
+                ]
+            ]
+        },
+        {
+            "input": "&breve;",
+            "description": "Named entity: breve; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u02d8"
+                ]
+            ]
+        },
+        {
+            "input": "&brvbar",
+            "description": "Named entity: brvbar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00a6"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 8 }
+            ]
+        },
+        {
+            "input": "&brvbar;",
+            "description": "Named entity: brvbar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00a6"
+                ]
+            ]
+        },
+        {
+            "input": "&bscr",
+            "description": "Bad named entity: bscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&bscr"
+                ]
+            ]
+        },
+        {
+            "input": "&bscr;",
+            "description": "Named entity: bscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udcb7"
+                ]
+            ]
+        },
+        {
+            "input": "&bsemi",
+            "description": "Bad named entity: bsemi without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&bsemi"
+                ]
+            ]
+        },
+        {
+            "input": "&bsemi;",
+            "description": "Named entity: bsemi; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u204f"
+                ]
+            ]
+        },
+        {
+            "input": "&bsim",
+            "description": "Bad named entity: bsim without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&bsim"
+                ]
+            ]
+        },
+        {
+            "input": "&bsim;",
+            "description": "Named entity: bsim; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u223d"
+                ]
+            ]
+        },
+        {
+            "input": "&bsime",
+            "description": "Bad named entity: bsime without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&bsime"
+                ]
+            ]
+        },
+        {
+            "input": "&bsime;",
+            "description": "Named entity: bsime; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22cd"
+                ]
+            ]
+        },
+        {
+            "input": "&bsol",
+            "description": "Bad named entity: bsol without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&bsol"
+                ]
+            ]
+        },
+        {
+            "input": "&bsol;",
+            "description": "Named entity: bsol; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\\"
+                ]
+            ]
+        },
+        {
+            "input": "&bsolb",
+            "description": "Bad named entity: bsolb without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&bsolb"
+                ]
+            ]
+        },
+        {
+            "input": "&bsolb;",
+            "description": "Named entity: bsolb; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29c5"
+                ]
+            ]
+        },
+        {
+            "input": "&bsolhsub",
+            "description": "Bad named entity: bsolhsub without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&bsolhsub"
+                ]
+            ]
+        },
+        {
+            "input": "&bsolhsub;",
+            "description": "Named entity: bsolhsub; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u27c8"
+                ]
+            ]
+        },
+        {
+            "input": "&bull",
+            "description": "Bad named entity: bull without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&bull"
+                ]
+            ]
+        },
+        {
+            "input": "&bull;",
+            "description": "Named entity: bull; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2022"
+                ]
+            ]
+        },
+        {
+            "input": "&bullet",
+            "description": "Bad named entity: bullet without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&bullet"
+                ]
+            ]
+        },
+        {
+            "input": "&bullet;",
+            "description": "Named entity: bullet; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2022"
+                ]
+            ]
+        },
+        {
+            "input": "&bump",
+            "description": "Bad named entity: bump without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&bump"
+                ]
+            ]
+        },
+        {
+            "input": "&bump;",
+            "description": "Named entity: bump; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u224e"
+                ]
+            ]
+        },
+        {
+            "input": "&bumpE",
+            "description": "Bad named entity: bumpE without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&bumpE"
+                ]
+            ]
+        },
+        {
+            "input": "&bumpE;",
+            "description": "Named entity: bumpE; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2aae"
+                ]
+            ]
+        },
+        {
+            "input": "&bumpe",
+            "description": "Bad named entity: bumpe without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&bumpe"
+                ]
+            ]
+        },
+        {
+            "input": "&bumpe;",
+            "description": "Named entity: bumpe; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u224f"
+                ]
+            ]
+        },
+        {
+            "input": "&bumpeq",
+            "description": "Bad named entity: bumpeq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&bumpeq"
+                ]
+            ]
+        },
+        {
+            "input": "&bumpeq;",
+            "description": "Named entity: bumpeq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u224f"
+                ]
+            ]
+        },
+        {
+            "input": "&cacute",
+            "description": "Bad named entity: cacute without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&cacute"
+                ]
+            ]
+        },
+        {
+            "input": "&cacute;",
+            "description": "Named entity: cacute; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0107"
+                ]
+            ]
+        },
+        {
+            "input": "&cap",
+            "description": "Bad named entity: cap without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&cap"
+                ]
+            ]
+        },
+        {
+            "input": "&cap;",
+            "description": "Named entity: cap; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2229"
+                ]
+            ]
+        },
+        {
+            "input": "&capand",
+            "description": "Bad named entity: capand without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&capand"
+                ]
+            ]
+        },
+        {
+            "input": "&capand;",
+            "description": "Named entity: capand; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a44"
+                ]
+            ]
+        },
+        {
+            "input": "&capbrcup",
+            "description": "Bad named entity: capbrcup without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&capbrcup"
+                ]
+            ]
+        },
+        {
+            "input": "&capbrcup;",
+            "description": "Named entity: capbrcup; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a49"
+                ]
+            ]
+        },
+        {
+            "input": "&capcap",
+            "description": "Bad named entity: capcap without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&capcap"
+                ]
+            ]
+        },
+        {
+            "input": "&capcap;",
+            "description": "Named entity: capcap; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a4b"
+                ]
+            ]
+        },
+        {
+            "input": "&capcup",
+            "description": "Bad named entity: capcup without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&capcup"
+                ]
+            ]
+        },
+        {
+            "input": "&capcup;",
+            "description": "Named entity: capcup; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a47"
+                ]
+            ]
+        },
+        {
+            "input": "&capdot",
+            "description": "Bad named entity: capdot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&capdot"
+                ]
+            ]
+        },
+        {
+            "input": "&capdot;",
+            "description": "Named entity: capdot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a40"
+                ]
+            ]
+        },
+        {
+            "input": "&caps",
+            "description": "Bad named entity: caps without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&caps"
+                ]
+            ]
+        },
+        {
+            "input": "&caps;",
+            "description": "Named entity: caps; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2229\ufe00"
+                ]
+            ]
+        },
+        {
+            "input": "&caret",
+            "description": "Bad named entity: caret without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&caret"
+                ]
+            ]
+        },
+        {
+            "input": "&caret;",
+            "description": "Named entity: caret; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2041"
+                ]
+            ]
+        },
+        {
+            "input": "&caron",
+            "description": "Bad named entity: caron without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&caron"
+                ]
+            ]
+        },
+        {
+            "input": "&caron;",
+            "description": "Named entity: caron; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u02c7"
+                ]
+            ]
+        },
+        {
+            "input": "&ccaps",
+            "description": "Bad named entity: ccaps without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ccaps"
+                ]
+            ]
+        },
+        {
+            "input": "&ccaps;",
+            "description": "Named entity: ccaps; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a4d"
+                ]
+            ]
+        },
+        {
+            "input": "&ccaron",
+            "description": "Bad named entity: ccaron without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ccaron"
+                ]
+            ]
+        },
+        {
+            "input": "&ccaron;",
+            "description": "Named entity: ccaron; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u010d"
+                ]
+            ]
+        },
+        {
+            "input": "&ccedil",
+            "description": "Named entity: ccedil without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00e7"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 8 }
+            ]
+        },
+        {
+            "input": "&ccedil;",
+            "description": "Named entity: ccedil; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00e7"
+                ]
+            ]
+        },
+        {
+            "input": "&ccirc",
+            "description": "Bad named entity: ccirc without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ccirc"
+                ]
+            ]
+        },
+        {
+            "input": "&ccirc;",
+            "description": "Named entity: ccirc; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0109"
+                ]
+            ]
+        },
+        {
+            "input": "&ccups",
+            "description": "Bad named entity: ccups without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ccups"
+                ]
+            ]
+        },
+        {
+            "input": "&ccups;",
+            "description": "Named entity: ccups; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a4c"
+                ]
+            ]
+        },
+        {
+            "input": "&ccupssm",
+            "description": "Bad named entity: ccupssm without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ccupssm"
+                ]
+            ]
+        },
+        {
+            "input": "&ccupssm;",
+            "description": "Named entity: ccupssm; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a50"
+                ]
+            ]
+        },
+        {
+            "input": "&cdot",
+            "description": "Bad named entity: cdot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&cdot"
+                ]
+            ]
+        },
+        {
+            "input": "&cdot;",
+            "description": "Named entity: cdot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u010b"
+                ]
+            ]
+        },
+        {
+            "input": "&cedil",
+            "description": "Named entity: cedil without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00b8"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 7 }
+            ]
+        },
+        {
+            "input": "&cedil;",
+            "description": "Named entity: cedil; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00b8"
+                ]
+            ]
+        },
+        {
+            "input": "&cemptyv",
+            "description": "Bad named entity: cemptyv without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&cemptyv"
+                ]
+            ]
+        },
+        {
+            "input": "&cemptyv;",
+            "description": "Named entity: cemptyv; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29b2"
+                ]
+            ]
+        },
+        {
+            "input": "&cent",
+            "description": "Named entity: cent without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00a2"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 6 }
+            ]
+        },
+        {
+            "input": "&cent;",
+            "description": "Named entity: cent; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00a2"
+                ]
+            ]
+        },
+        {
+            "input": "&centerdot;",
+            "description": "Named entity: centerdot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00b7"
+                ]
+            ]
+        },
+        {
+            "input": "&cfr",
+            "description": "Bad named entity: cfr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&cfr"
+                ]
+            ]
+        },
+        {
+            "input": "&cfr;",
+            "description": "Named entity: cfr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd20"
+                ]
+            ]
+        },
+        {
+            "input": "&chcy",
+            "description": "Bad named entity: chcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&chcy"
+                ]
+            ]
+        },
+        {
+            "input": "&chcy;",
+            "description": "Named entity: chcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0447"
+                ]
+            ]
+        },
+        {
+            "input": "&check",
+            "description": "Bad named entity: check without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&check"
+                ]
+            ]
+        },
+        {
+            "input": "&check;",
+            "description": "Named entity: check; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2713"
+                ]
+            ]
+        },
+        {
+            "input": "&checkmark",
+            "description": "Bad named entity: checkmark without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&checkmark"
+                ]
+            ]
+        },
+        {
+            "input": "&checkmark;",
+            "description": "Named entity: checkmark; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2713"
+                ]
+            ]
+        },
+        {
+            "input": "&chi",
+            "description": "Bad named entity: chi without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&chi"
+                ]
+            ]
+        },
+        {
+            "input": "&chi;",
+            "description": "Named entity: chi; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03c7"
+                ]
+            ]
+        },
+        {
+            "input": "&cir",
+            "description": "Bad named entity: cir without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&cir"
+                ]
+            ]
+        },
+        {
+            "input": "&cir;",
+            "description": "Named entity: cir; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u25cb"
+                ]
+            ]
+        },
+        {
+            "input": "&cirE",
+            "description": "Bad named entity: cirE without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&cirE"
+                ]
+            ]
+        },
+        {
+            "input": "&cirE;",
+            "description": "Named entity: cirE; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29c3"
+                ]
+            ]
+        },
+        {
+            "input": "&circ",
+            "description": "Bad named entity: circ without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&circ"
+                ]
+            ]
+        },
+        {
+            "input": "&circ;",
+            "description": "Named entity: circ; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u02c6"
+                ]
+            ]
+        },
+        {
+            "input": "&circeq",
+            "description": "Bad named entity: circeq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&circeq"
+                ]
+            ]
+        },
+        {
+            "input": "&circeq;",
+            "description": "Named entity: circeq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2257"
+                ]
+            ]
+        },
+        {
+            "input": "&circlearrowleft",
+            "description": "Bad named entity: circlearrowleft without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&circlearrowleft"
+                ]
+            ]
+        },
+        {
+            "input": "&circlearrowleft;",
+            "description": "Named entity: circlearrowleft; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21ba"
+                ]
+            ]
+        },
+        {
+            "input": "&circlearrowright",
+            "description": "Bad named entity: circlearrowright without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&circlearrowright"
+                ]
+            ]
+        },
+        {
+            "input": "&circlearrowright;",
+            "description": "Named entity: circlearrowright; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21bb"
+                ]
+            ]
+        },
+        {
+            "input": "&circledR",
+            "description": "Bad named entity: circledR without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&circledR"
+                ]
+            ]
+        },
+        {
+            "input": "&circledR;",
+            "description": "Named entity: circledR; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00ae"
+                ]
+            ]
+        },
+        {
+            "input": "&circledS",
+            "description": "Bad named entity: circledS without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&circledS"
+                ]
+            ]
+        },
+        {
+            "input": "&circledS;",
+            "description": "Named entity: circledS; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u24c8"
+                ]
+            ]
+        },
+        {
+            "input": "&circledast",
+            "description": "Bad named entity: circledast without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&circledast"
+                ]
+            ]
+        },
+        {
+            "input": "&circledast;",
+            "description": "Named entity: circledast; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u229b"
+                ]
+            ]
+        },
+        {
+            "input": "&circledcirc",
+            "description": "Bad named entity: circledcirc without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&circledcirc"
+                ]
+            ]
+        },
+        {
+            "input": "&circledcirc;",
+            "description": "Named entity: circledcirc; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u229a"
+                ]
+            ]
+        },
+        {
+            "input": "&circleddash",
+            "description": "Bad named entity: circleddash without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&circleddash"
+                ]
+            ]
+        },
+        {
+            "input": "&circleddash;",
+            "description": "Named entity: circleddash; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u229d"
+                ]
+            ]
+        },
+        {
+            "input": "&cire",
+            "description": "Bad named entity: cire without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&cire"
+                ]
+            ]
+        },
+        {
+            "input": "&cire;",
+            "description": "Named entity: cire; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2257"
+                ]
+            ]
+        },
+        {
+            "input": "&cirfnint",
+            "description": "Bad named entity: cirfnint without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&cirfnint"
+                ]
+            ]
+        },
+        {
+            "input": "&cirfnint;",
+            "description": "Named entity: cirfnint; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a10"
+                ]
+            ]
+        },
+        {
+            "input": "&cirmid",
+            "description": "Bad named entity: cirmid without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&cirmid"
+                ]
+            ]
+        },
+        {
+            "input": "&cirmid;",
+            "description": "Named entity: cirmid; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2aef"
+                ]
+            ]
+        },
+        {
+            "input": "&cirscir",
+            "description": "Bad named entity: cirscir without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&cirscir"
+                ]
+            ]
+        },
+        {
+            "input": "&cirscir;",
+            "description": "Named entity: cirscir; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29c2"
+                ]
+            ]
+        },
+        {
+            "input": "&clubs",
+            "description": "Bad named entity: clubs without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&clubs"
+                ]
+            ]
+        },
+        {
+            "input": "&clubs;",
+            "description": "Named entity: clubs; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2663"
+                ]
+            ]
+        },
+        {
+            "input": "&clubsuit",
+            "description": "Bad named entity: clubsuit without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&clubsuit"
+                ]
+            ]
+        },
+        {
+            "input": "&clubsuit;",
+            "description": "Named entity: clubsuit; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2663"
+                ]
+            ]
+        },
+        {
+            "input": "&colon",
+            "description": "Bad named entity: colon without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&colon"
+                ]
+            ]
+        },
+        {
+            "input": "&colon;",
+            "description": "Named entity: colon; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    ":"
+                ]
+            ]
+        },
+        {
+            "input": "&colone",
+            "description": "Bad named entity: colone without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&colone"
+                ]
+            ]
+        },
+        {
+            "input": "&colone;",
+            "description": "Named entity: colone; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2254"
+                ]
+            ]
+        },
+        {
+            "input": "&coloneq",
+            "description": "Bad named entity: coloneq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&coloneq"
+                ]
+            ]
+        },
+        {
+            "input": "&coloneq;",
+            "description": "Named entity: coloneq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2254"
+                ]
+            ]
+        },
+        {
+            "input": "&comma",
+            "description": "Bad named entity: comma without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&comma"
+                ]
+            ]
+        },
+        {
+            "input": "&comma;",
+            "description": "Named entity: comma; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    ","
+                ]
+            ]
+        },
+        {
+            "input": "&commat",
+            "description": "Bad named entity: commat without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&commat"
+                ]
+            ]
+        },
+        {
+            "input": "&commat;",
+            "description": "Named entity: commat; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "@"
+                ]
+            ]
+        },
+        {
+            "input": "&comp",
+            "description": "Bad named entity: comp without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&comp"
+                ]
+            ]
+        },
+        {
+            "input": "&comp;",
+            "description": "Named entity: comp; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2201"
+                ]
+            ]
+        },
+        {
+            "input": "&compfn",
+            "description": "Bad named entity: compfn without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&compfn"
+                ]
+            ]
+        },
+        {
+            "input": "&compfn;",
+            "description": "Named entity: compfn; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2218"
+                ]
+            ]
+        },
+        {
+            "input": "&complement",
+            "description": "Bad named entity: complement without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&complement"
+                ]
+            ]
+        },
+        {
+            "input": "&complement;",
+            "description": "Named entity: complement; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2201"
+                ]
+            ]
+        },
+        {
+            "input": "&complexes",
+            "description": "Bad named entity: complexes without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&complexes"
+                ]
+            ]
+        },
+        {
+            "input": "&complexes;",
+            "description": "Named entity: complexes; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2102"
+                ]
+            ]
+        },
+        {
+            "input": "&cong",
+            "description": "Bad named entity: cong without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&cong"
+                ]
+            ]
+        },
+        {
+            "input": "&cong;",
+            "description": "Named entity: cong; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2245"
+                ]
+            ]
+        },
+        {
+            "input": "&congdot",
+            "description": "Bad named entity: congdot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&congdot"
+                ]
+            ]
+        },
+        {
+            "input": "&congdot;",
+            "description": "Named entity: congdot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a6d"
+                ]
+            ]
+        },
+        {
+            "input": "&conint",
+            "description": "Bad named entity: conint without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&conint"
+                ]
+            ]
+        },
+        {
+            "input": "&conint;",
+            "description": "Named entity: conint; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u222e"
+                ]
+            ]
+        },
+        {
+            "input": "&copf",
+            "description": "Bad named entity: copf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&copf"
+                ]
+            ]
+        },
+        {
+            "input": "&copf;",
+            "description": "Named entity: copf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd54"
+                ]
+            ]
+        },
+        {
+            "input": "&coprod",
+            "description": "Bad named entity: coprod without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&coprod"
+                ]
+            ]
+        },
+        {
+            "input": "&coprod;",
+            "description": "Named entity: coprod; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2210"
+                ]
+            ]
+        },
+        {
+            "input": "&copy",
+            "description": "Named entity: copy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00a9"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 6 }
+            ]
+        },
+        {
+            "input": "&copy;",
+            "description": "Named entity: copy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00a9"
+                ]
+            ]
+        },
+        {
+            "input": "&copysr;",
+            "description": "Named entity: copysr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2117"
+                ]
+            ]
+        },
+        {
+            "input": "&crarr",
+            "description": "Bad named entity: crarr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&crarr"
+                ]
+            ]
+        },
+        {
+            "input": "&crarr;",
+            "description": "Named entity: crarr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21b5"
+                ]
+            ]
+        },
+        {
+            "input": "&cross",
+            "description": "Bad named entity: cross without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&cross"
+                ]
+            ]
+        },
+        {
+            "input": "&cross;",
+            "description": "Named entity: cross; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2717"
+                ]
+            ]
+        },
+        {
+            "input": "&cscr",
+            "description": "Bad named entity: cscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&cscr"
+                ]
+            ]
+        },
+        {
+            "input": "&cscr;",
+            "description": "Named entity: cscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udcb8"
+                ]
+            ]
+        },
+        {
+            "input": "&csub",
+            "description": "Bad named entity: csub without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&csub"
+                ]
+            ]
+        },
+        {
+            "input": "&csub;",
+            "description": "Named entity: csub; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2acf"
+                ]
+            ]
+        },
+        {
+            "input": "&csube",
+            "description": "Bad named entity: csube without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&csube"
+                ]
+            ]
+        },
+        {
+            "input": "&csube;",
+            "description": "Named entity: csube; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ad1"
+                ]
+            ]
+        },
+        {
+            "input": "&csup",
+            "description": "Bad named entity: csup without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&csup"
+                ]
+            ]
+        },
+        {
+            "input": "&csup;",
+            "description": "Named entity: csup; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ad0"
+                ]
+            ]
+        },
+        {
+            "input": "&csupe",
+            "description": "Bad named entity: csupe without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&csupe"
+                ]
+            ]
+        },
+        {
+            "input": "&csupe;",
+            "description": "Named entity: csupe; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ad2"
+                ]
+            ]
+        },
+        {
+            "input": "&ctdot",
+            "description": "Bad named entity: ctdot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ctdot"
+                ]
+            ]
+        },
+        {
+            "input": "&ctdot;",
+            "description": "Named entity: ctdot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22ef"
+                ]
+            ]
+        },
+        {
+            "input": "&cudarrl",
+            "description": "Bad named entity: cudarrl without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&cudarrl"
+                ]
+            ]
+        },
+        {
+            "input": "&cudarrl;",
+            "description": "Named entity: cudarrl; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2938"
+                ]
+            ]
+        },
+        {
+            "input": "&cudarrr",
+            "description": "Bad named entity: cudarrr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&cudarrr"
+                ]
+            ]
+        },
+        {
+            "input": "&cudarrr;",
+            "description": "Named entity: cudarrr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2935"
+                ]
+            ]
+        },
+        {
+            "input": "&cuepr",
+            "description": "Bad named entity: cuepr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&cuepr"
+                ]
+            ]
+        },
+        {
+            "input": "&cuepr;",
+            "description": "Named entity: cuepr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22de"
+                ]
+            ]
+        },
+        {
+            "input": "&cuesc",
+            "description": "Bad named entity: cuesc without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&cuesc"
+                ]
+            ]
+        },
+        {
+            "input": "&cuesc;",
+            "description": "Named entity: cuesc; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22df"
+                ]
+            ]
+        },
+        {
+            "input": "&cularr",
+            "description": "Bad named entity: cularr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&cularr"
+                ]
+            ]
+        },
+        {
+            "input": "&cularr;",
+            "description": "Named entity: cularr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21b6"
+                ]
+            ]
+        },
+        {
+            "input": "&cularrp",
+            "description": "Bad named entity: cularrp without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&cularrp"
+                ]
+            ]
+        },
+        {
+            "input": "&cularrp;",
+            "description": "Named entity: cularrp; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u293d"
+                ]
+            ]
+        },
+        {
+            "input": "&cup",
+            "description": "Bad named entity: cup without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&cup"
+                ]
+            ]
+        },
+        {
+            "input": "&cup;",
+            "description": "Named entity: cup; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u222a"
+                ]
+            ]
+        },
+        {
+            "input": "&cupbrcap",
+            "description": "Bad named entity: cupbrcap without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&cupbrcap"
+                ]
+            ]
+        },
+        {
+            "input": "&cupbrcap;",
+            "description": "Named entity: cupbrcap; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a48"
+                ]
+            ]
+        },
+        {
+            "input": "&cupcap",
+            "description": "Bad named entity: cupcap without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&cupcap"
+                ]
+            ]
+        },
+        {
+            "input": "&cupcap;",
+            "description": "Named entity: cupcap; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a46"
+                ]
+            ]
+        },
+        {
+            "input": "&cupcup",
+            "description": "Bad named entity: cupcup without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&cupcup"
+                ]
+            ]
+        },
+        {
+            "input": "&cupcup;",
+            "description": "Named entity: cupcup; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a4a"
+                ]
+            ]
+        },
+        {
+            "input": "&cupdot",
+            "description": "Bad named entity: cupdot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&cupdot"
+                ]
+            ]
+        },
+        {
+            "input": "&cupdot;",
+            "description": "Named entity: cupdot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u228d"
+                ]
+            ]
+        },
+        {
+            "input": "&cupor",
+            "description": "Bad named entity: cupor without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&cupor"
+                ]
+            ]
+        },
+        {
+            "input": "&cupor;",
+            "description": "Named entity: cupor; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a45"
+                ]
+            ]
+        },
+        {
+            "input": "&cups",
+            "description": "Bad named entity: cups without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&cups"
+                ]
+            ]
+        },
+        {
+            "input": "&cups;",
+            "description": "Named entity: cups; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u222a\ufe00"
+                ]
+            ]
+        },
+        {
+            "input": "&curarr",
+            "description": "Bad named entity: curarr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&curarr"
+                ]
+            ]
+        },
+        {
+            "input": "&curarr;",
+            "description": "Named entity: curarr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21b7"
+                ]
+            ]
+        },
+        {
+            "input": "&curarrm",
+            "description": "Bad named entity: curarrm without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&curarrm"
+                ]
+            ]
+        },
+        {
+            "input": "&curarrm;",
+            "description": "Named entity: curarrm; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u293c"
+                ]
+            ]
+        },
+        {
+            "input": "&curlyeqprec",
+            "description": "Bad named entity: curlyeqprec without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&curlyeqprec"
+                ]
+            ]
+        },
+        {
+            "input": "&curlyeqprec;",
+            "description": "Named entity: curlyeqprec; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22de"
+                ]
+            ]
+        },
+        {
+            "input": "&curlyeqsucc",
+            "description": "Bad named entity: curlyeqsucc without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&curlyeqsucc"
+                ]
+            ]
+        },
+        {
+            "input": "&curlyeqsucc;",
+            "description": "Named entity: curlyeqsucc; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22df"
+                ]
+            ]
+        },
+        {
+            "input": "&curlyvee",
+            "description": "Bad named entity: curlyvee without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&curlyvee"
+                ]
+            ]
+        },
+        {
+            "input": "&curlyvee;",
+            "description": "Named entity: curlyvee; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22ce"
+                ]
+            ]
+        },
+        {
+            "input": "&curlywedge",
+            "description": "Bad named entity: curlywedge without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&curlywedge"
+                ]
+            ]
+        },
+        {
+            "input": "&curlywedge;",
+            "description": "Named entity: curlywedge; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22cf"
+                ]
+            ]
+        },
+        {
+            "input": "&curren",
+            "description": "Named entity: curren without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00a4"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 8 }
+            ]
+        },
+        {
+            "input": "&curren;",
+            "description": "Named entity: curren; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00a4"
+                ]
+            ]
+        },
+        {
+            "input": "&curvearrowleft",
+            "description": "Bad named entity: curvearrowleft without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&curvearrowleft"
+                ]
+            ]
+        },
+        {
+            "input": "&curvearrowleft;",
+            "description": "Named entity: curvearrowleft; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21b6"
+                ]
+            ]
+        },
+        {
+            "input": "&curvearrowright",
+            "description": "Bad named entity: curvearrowright without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&curvearrowright"
+                ]
+            ]
+        },
+        {
+            "input": "&curvearrowright;",
+            "description": "Named entity: curvearrowright; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21b7"
+                ]
+            ]
+        },
+        {
+            "input": "&cuvee",
+            "description": "Bad named entity: cuvee without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&cuvee"
+                ]
+            ]
+        },
+        {
+            "input": "&cuvee;",
+            "description": "Named entity: cuvee; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22ce"
+                ]
+            ]
+        },
+        {
+            "input": "&cuwed",
+            "description": "Bad named entity: cuwed without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&cuwed"
+                ]
+            ]
+        },
+        {
+            "input": "&cuwed;",
+            "description": "Named entity: cuwed; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22cf"
+                ]
+            ]
+        },
+        {
+            "input": "&cwconint",
+            "description": "Bad named entity: cwconint without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&cwconint"
+                ]
+            ]
+        },
+        {
+            "input": "&cwconint;",
+            "description": "Named entity: cwconint; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2232"
+                ]
+            ]
+        },
+        {
+            "input": "&cwint",
+            "description": "Bad named entity: cwint without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&cwint"
+                ]
+            ]
+        },
+        {
+            "input": "&cwint;",
+            "description": "Named entity: cwint; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2231"
+                ]
+            ]
+        },
+        {
+            "input": "&cylcty",
+            "description": "Bad named entity: cylcty without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&cylcty"
+                ]
+            ]
+        },
+        {
+            "input": "&cylcty;",
+            "description": "Named entity: cylcty; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u232d"
+                ]
+            ]
+        },
+        {
+            "input": "&dArr",
+            "description": "Bad named entity: dArr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&dArr"
+                ]
+            ]
+        },
+        {
+            "input": "&dArr;",
+            "description": "Named entity: dArr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21d3"
+                ]
+            ]
+        },
+        {
+            "input": "&dHar",
+            "description": "Bad named entity: dHar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&dHar"
+                ]
+            ]
+        },
+        {
+            "input": "&dHar;",
+            "description": "Named entity: dHar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2965"
+                ]
+            ]
+        },
+        {
+            "input": "&dagger",
+            "description": "Bad named entity: dagger without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&dagger"
+                ]
+            ]
+        },
+        {
+            "input": "&dagger;",
+            "description": "Named entity: dagger; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2020"
+                ]
+            ]
+        },
+        {
+            "input": "&daleth",
+            "description": "Bad named entity: daleth without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&daleth"
+                ]
+            ]
+        },
+        {
+            "input": "&daleth;",
+            "description": "Named entity: daleth; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2138"
+                ]
+            ]
+        },
+        {
+            "input": "&darr",
+            "description": "Bad named entity: darr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&darr"
+                ]
+            ]
+        },
+        {
+            "input": "&darr;",
+            "description": "Named entity: darr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2193"
+                ]
+            ]
+        },
+        {
+            "input": "&dash",
+            "description": "Bad named entity: dash without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&dash"
+                ]
+            ]
+        },
+        {
+            "input": "&dash;",
+            "description": "Named entity: dash; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2010"
+                ]
+            ]
+        },
+        {
+            "input": "&dashv",
+            "description": "Bad named entity: dashv without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&dashv"
+                ]
+            ]
+        },
+        {
+            "input": "&dashv;",
+            "description": "Named entity: dashv; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22a3"
+                ]
+            ]
+        },
+        {
+            "input": "&dbkarow",
+            "description": "Bad named entity: dbkarow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&dbkarow"
+                ]
+            ]
+        },
+        {
+            "input": "&dbkarow;",
+            "description": "Named entity: dbkarow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u290f"
+                ]
+            ]
+        },
+        {
+            "input": "&dblac",
+            "description": "Bad named entity: dblac without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&dblac"
+                ]
+            ]
+        },
+        {
+            "input": "&dblac;",
+            "description": "Named entity: dblac; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u02dd"
+                ]
+            ]
+        },
+        {
+            "input": "&dcaron",
+            "description": "Bad named entity: dcaron without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&dcaron"
+                ]
+            ]
+        },
+        {
+            "input": "&dcaron;",
+            "description": "Named entity: dcaron; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u010f"
+                ]
+            ]
+        },
+        {
+            "input": "&dcy",
+            "description": "Bad named entity: dcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&dcy"
+                ]
+            ]
+        },
+        {
+            "input": "&dcy;",
+            "description": "Named entity: dcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0434"
+                ]
+            ]
+        },
+        {
+            "input": "&dd",
+            "description": "Bad named entity: dd without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&dd"
+                ]
+            ]
+        },
+        {
+            "input": "&dd;",
+            "description": "Named entity: dd; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2146"
+                ]
+            ]
+        },
+        {
+            "input": "&ddagger",
+            "description": "Bad named entity: ddagger without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ddagger"
+                ]
+            ]
+        },
+        {
+            "input": "&ddagger;",
+            "description": "Named entity: ddagger; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2021"
+                ]
+            ]
+        },
+        {
+            "input": "&ddarr",
+            "description": "Bad named entity: ddarr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ddarr"
+                ]
+            ]
+        },
+        {
+            "input": "&ddarr;",
+            "description": "Named entity: ddarr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21ca"
+                ]
+            ]
+        },
+        {
+            "input": "&ddotseq",
+            "description": "Bad named entity: ddotseq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ddotseq"
+                ]
+            ]
+        },
+        {
+            "input": "&ddotseq;",
+            "description": "Named entity: ddotseq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a77"
+                ]
+            ]
+        },
+        {
+            "input": "&deg",
+            "description": "Named entity: deg without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00b0"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 5 }
+            ]
+        },
+        {
+            "input": "&deg;",
+            "description": "Named entity: deg; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00b0"
+                ]
+            ]
+        },
+        {
+            "input": "&delta",
+            "description": "Bad named entity: delta without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&delta"
+                ]
+            ]
+        },
+        {
+            "input": "&delta;",
+            "description": "Named entity: delta; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03b4"
+                ]
+            ]
+        },
+        {
+            "input": "&demptyv",
+            "description": "Bad named entity: demptyv without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&demptyv"
+                ]
+            ]
+        },
+        {
+            "input": "&demptyv;",
+            "description": "Named entity: demptyv; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29b1"
+                ]
+            ]
+        },
+        {
+            "input": "&dfisht",
+            "description": "Bad named entity: dfisht without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&dfisht"
+                ]
+            ]
+        },
+        {
+            "input": "&dfisht;",
+            "description": "Named entity: dfisht; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u297f"
+                ]
+            ]
+        },
+        {
+            "input": "&dfr",
+            "description": "Bad named entity: dfr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&dfr"
+                ]
+            ]
+        },
+        {
+            "input": "&dfr;",
+            "description": "Named entity: dfr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd21"
+                ]
+            ]
+        },
+        {
+            "input": "&dharl",
+            "description": "Bad named entity: dharl without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&dharl"
+                ]
+            ]
+        },
+        {
+            "input": "&dharl;",
+            "description": "Named entity: dharl; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21c3"
+                ]
+            ]
+        },
+        {
+            "input": "&dharr",
+            "description": "Bad named entity: dharr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&dharr"
+                ]
+            ]
+        },
+        {
+            "input": "&dharr;",
+            "description": "Named entity: dharr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21c2"
+                ]
+            ]
+        },
+        {
+            "input": "&diam",
+            "description": "Bad named entity: diam without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&diam"
+                ]
+            ]
+        },
+        {
+            "input": "&diam;",
+            "description": "Named entity: diam; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22c4"
+                ]
+            ]
+        },
+        {
+            "input": "&diamond",
+            "description": "Bad named entity: diamond without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&diamond"
+                ]
+            ]
+        },
+        {
+            "input": "&diamond;",
+            "description": "Named entity: diamond; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22c4"
+                ]
+            ]
+        },
+        {
+            "input": "&diamondsuit",
+            "description": "Bad named entity: diamondsuit without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&diamondsuit"
+                ]
+            ]
+        },
+        {
+            "input": "&diamondsuit;",
+            "description": "Named entity: diamondsuit; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2666"
+                ]
+            ]
+        },
+        {
+            "input": "&diams",
+            "description": "Bad named entity: diams without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&diams"
+                ]
+            ]
+        },
+        {
+            "input": "&diams;",
+            "description": "Named entity: diams; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2666"
+                ]
+            ]
+        },
+        {
+            "input": "&die",
+            "description": "Bad named entity: die without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&die"
+                ]
+            ]
+        },
+        {
+            "input": "&die;",
+            "description": "Named entity: die; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00a8"
+                ]
+            ]
+        },
+        {
+            "input": "&digamma",
+            "description": "Bad named entity: digamma without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&digamma"
+                ]
+            ]
+        },
+        {
+            "input": "&digamma;",
+            "description": "Named entity: digamma; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03dd"
+                ]
+            ]
+        },
+        {
+            "input": "&disin",
+            "description": "Bad named entity: disin without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&disin"
+                ]
+            ]
+        },
+        {
+            "input": "&disin;",
+            "description": "Named entity: disin; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22f2"
+                ]
+            ]
+        },
+        {
+            "input": "&div",
+            "description": "Bad named entity: div without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&div"
+                ]
+            ]
+        },
+        {
+            "input": "&div;",
+            "description": "Named entity: div; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00f7"
+                ]
+            ]
+        },
+        {
+            "input": "&divide",
+            "description": "Named entity: divide without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00f7"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 8 }
+            ]
+        },
+        {
+            "input": "&divide;",
+            "description": "Named entity: divide; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00f7"
+                ]
+            ]
+        },
+        {
+            "input": "&divideontimes;",
+            "description": "Named entity: divideontimes; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22c7"
+                ]
+            ]
+        },
+        {
+            "input": "&divonx",
+            "description": "Bad named entity: divonx without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&divonx"
+                ]
+            ]
+        },
+        {
+            "input": "&divonx;",
+            "description": "Named entity: divonx; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22c7"
+                ]
+            ]
+        },
+        {
+            "input": "&djcy",
+            "description": "Bad named entity: djcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&djcy"
+                ]
+            ]
+        },
+        {
+            "input": "&djcy;",
+            "description": "Named entity: djcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0452"
+                ]
+            ]
+        },
+        {
+            "input": "&dlcorn",
+            "description": "Bad named entity: dlcorn without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&dlcorn"
+                ]
+            ]
+        },
+        {
+            "input": "&dlcorn;",
+            "description": "Named entity: dlcorn; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u231e"
+                ]
+            ]
+        },
+        {
+            "input": "&dlcrop",
+            "description": "Bad named entity: dlcrop without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&dlcrop"
+                ]
+            ]
+        },
+        {
+            "input": "&dlcrop;",
+            "description": "Named entity: dlcrop; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u230d"
+                ]
+            ]
+        },
+        {
+            "input": "&dollar",
+            "description": "Bad named entity: dollar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&dollar"
+                ]
+            ]
+        },
+        {
+            "input": "&dollar;",
+            "description": "Named entity: dollar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "$"
+                ]
+            ]
+        },
+        {
+            "input": "&dopf",
+            "description": "Bad named entity: dopf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&dopf"
+                ]
+            ]
+        },
+        {
+            "input": "&dopf;",
+            "description": "Named entity: dopf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd55"
+                ]
+            ]
+        },
+        {
+            "input": "&dot",
+            "description": "Bad named entity: dot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&dot"
+                ]
+            ]
+        },
+        {
+            "input": "&dot;",
+            "description": "Named entity: dot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u02d9"
+                ]
+            ]
+        },
+        {
+            "input": "&doteq",
+            "description": "Bad named entity: doteq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&doteq"
+                ]
+            ]
+        },
+        {
+            "input": "&doteq;",
+            "description": "Named entity: doteq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2250"
+                ]
+            ]
+        },
+        {
+            "input": "&doteqdot",
+            "description": "Bad named entity: doteqdot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&doteqdot"
+                ]
+            ]
+        },
+        {
+            "input": "&doteqdot;",
+            "description": "Named entity: doteqdot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2251"
+                ]
+            ]
+        },
+        {
+            "input": "&dotminus",
+            "description": "Bad named entity: dotminus without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&dotminus"
+                ]
+            ]
+        },
+        {
+            "input": "&dotminus;",
+            "description": "Named entity: dotminus; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2238"
+                ]
+            ]
+        },
+        {
+            "input": "&dotplus",
+            "description": "Bad named entity: dotplus without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&dotplus"
+                ]
+            ]
+        },
+        {
+            "input": "&dotplus;",
+            "description": "Named entity: dotplus; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2214"
+                ]
+            ]
+        },
+        {
+            "input": "&dotsquare",
+            "description": "Bad named entity: dotsquare without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&dotsquare"
+                ]
+            ]
+        },
+        {
+            "input": "&dotsquare;",
+            "description": "Named entity: dotsquare; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22a1"
+                ]
+            ]
+        },
+        {
+            "input": "&doublebarwedge",
+            "description": "Bad named entity: doublebarwedge without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&doublebarwedge"
+                ]
+            ]
+        },
+        {
+            "input": "&doublebarwedge;",
+            "description": "Named entity: doublebarwedge; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2306"
+                ]
+            ]
+        },
+        {
+            "input": "&downarrow",
+            "description": "Bad named entity: downarrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&downarrow"
+                ]
+            ]
+        },
+        {
+            "input": "&downarrow;",
+            "description": "Named entity: downarrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2193"
+                ]
+            ]
+        },
+        {
+            "input": "&downdownarrows",
+            "description": "Bad named entity: downdownarrows without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&downdownarrows"
+                ]
+            ]
+        },
+        {
+            "input": "&downdownarrows;",
+            "description": "Named entity: downdownarrows; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21ca"
+                ]
+            ]
+        },
+        {
+            "input": "&downharpoonleft",
+            "description": "Bad named entity: downharpoonleft without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&downharpoonleft"
+                ]
+            ]
+        },
+        {
+            "input": "&downharpoonleft;",
+            "description": "Named entity: downharpoonleft; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21c3"
+                ]
+            ]
+        },
+        {
+            "input": "&downharpoonright",
+            "description": "Bad named entity: downharpoonright without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&downharpoonright"
+                ]
+            ]
+        },
+        {
+            "input": "&downharpoonright;",
+            "description": "Named entity: downharpoonright; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21c2"
+                ]
+            ]
+        },
+        {
+            "input": "&drbkarow",
+            "description": "Bad named entity: drbkarow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&drbkarow"
+                ]
+            ]
+        },
+        {
+            "input": "&drbkarow;",
+            "description": "Named entity: drbkarow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2910"
+                ]
+            ]
+        },
+        {
+            "input": "&drcorn",
+            "description": "Bad named entity: drcorn without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&drcorn"
+                ]
+            ]
+        },
+        {
+            "input": "&drcorn;",
+            "description": "Named entity: drcorn; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u231f"
+                ]
+            ]
+        },
+        {
+            "input": "&drcrop",
+            "description": "Bad named entity: drcrop without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&drcrop"
+                ]
+            ]
+        },
+        {
+            "input": "&drcrop;",
+            "description": "Named entity: drcrop; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u230c"
+                ]
+            ]
+        },
+        {
+            "input": "&dscr",
+            "description": "Bad named entity: dscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&dscr"
+                ]
+            ]
+        },
+        {
+            "input": "&dscr;",
+            "description": "Named entity: dscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udcb9"
+                ]
+            ]
+        },
+        {
+            "input": "&dscy",
+            "description": "Bad named entity: dscy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&dscy"
+                ]
+            ]
+        },
+        {
+            "input": "&dscy;",
+            "description": "Named entity: dscy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0455"
+                ]
+            ]
+        },
+        {
+            "input": "&dsol",
+            "description": "Bad named entity: dsol without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&dsol"
+                ]
+            ]
+        },
+        {
+            "input": "&dsol;",
+            "description": "Named entity: dsol; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29f6"
+                ]
+            ]
+        },
+        {
+            "input": "&dstrok",
+            "description": "Bad named entity: dstrok without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&dstrok"
+                ]
+            ]
+        },
+        {
+            "input": "&dstrok;",
+            "description": "Named entity: dstrok; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0111"
+                ]
+            ]
+        },
+        {
+            "input": "&dtdot",
+            "description": "Bad named entity: dtdot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&dtdot"
+                ]
+            ]
+        },
+        {
+            "input": "&dtdot;",
+            "description": "Named entity: dtdot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22f1"
+                ]
+            ]
+        },
+        {
+            "input": "&dtri",
+            "description": "Bad named entity: dtri without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&dtri"
+                ]
+            ]
+        },
+        {
+            "input": "&dtri;",
+            "description": "Named entity: dtri; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u25bf"
+                ]
+            ]
+        },
+        {
+            "input": "&dtrif",
+            "description": "Bad named entity: dtrif without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&dtrif"
+                ]
+            ]
+        },
+        {
+            "input": "&dtrif;",
+            "description": "Named entity: dtrif; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u25be"
+                ]
+            ]
+        },
+        {
+            "input": "&duarr",
+            "description": "Bad named entity: duarr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&duarr"
+                ]
+            ]
+        },
+        {
+            "input": "&duarr;",
+            "description": "Named entity: duarr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21f5"
+                ]
+            ]
+        },
+        {
+            "input": "&duhar",
+            "description": "Bad named entity: duhar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&duhar"
+                ]
+            ]
+        },
+        {
+            "input": "&duhar;",
+            "description": "Named entity: duhar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u296f"
+                ]
+            ]
+        },
+        {
+            "input": "&dwangle",
+            "description": "Bad named entity: dwangle without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&dwangle"
+                ]
+            ]
+        },
+        {
+            "input": "&dwangle;",
+            "description": "Named entity: dwangle; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29a6"
+                ]
+            ]
+        },
+        {
+            "input": "&dzcy",
+            "description": "Bad named entity: dzcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&dzcy"
+                ]
+            ]
+        },
+        {
+            "input": "&dzcy;",
+            "description": "Named entity: dzcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u045f"
+                ]
+            ]
+        },
+        {
+            "input": "&dzigrarr",
+            "description": "Bad named entity: dzigrarr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&dzigrarr"
+                ]
+            ]
+        },
+        {
+            "input": "&dzigrarr;",
+            "description": "Named entity: dzigrarr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u27ff"
+                ]
+            ]
+        },
+        {
+            "input": "&eDDot",
+            "description": "Bad named entity: eDDot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&eDDot"
+                ]
+            ]
+        },
+        {
+            "input": "&eDDot;",
+            "description": "Named entity: eDDot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a77"
+                ]
+            ]
+        },
+        {
+            "input": "&eDot",
+            "description": "Bad named entity: eDot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&eDot"
+                ]
+            ]
+        },
+        {
+            "input": "&eDot;",
+            "description": "Named entity: eDot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2251"
+                ]
+            ]
+        },
+        {
+            "input": "&eacute",
+            "description": "Named entity: eacute without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00e9"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 8 }
+            ]
+        },
+        {
+            "input": "&eacute;",
+            "description": "Named entity: eacute; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00e9"
+                ]
+            ]
+        },
+        {
+            "input": "&easter",
+            "description": "Bad named entity: easter without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&easter"
+                ]
+            ]
+        },
+        {
+            "input": "&easter;",
+            "description": "Named entity: easter; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a6e"
+                ]
+            ]
+        },
+        {
+            "input": "&ecaron",
+            "description": "Bad named entity: ecaron without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ecaron"
+                ]
+            ]
+        },
+        {
+            "input": "&ecaron;",
+            "description": "Named entity: ecaron; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u011b"
+                ]
+            ]
+        },
+        {
+            "input": "&ecir",
+            "description": "Bad named entity: ecir without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ecir"
+                ]
+            ]
+        },
+        {
+            "input": "&ecir;",
+            "description": "Named entity: ecir; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2256"
+                ]
+            ]
+        },
+        {
+            "input": "&ecirc",
+            "description": "Named entity: ecirc without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00ea"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 7 }
+            ]
+        },
+        {
+            "input": "&ecirc;",
+            "description": "Named entity: ecirc; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00ea"
+                ]
+            ]
+        },
+        {
+            "input": "&ecolon",
+            "description": "Bad named entity: ecolon without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ecolon"
+                ]
+            ]
+        },
+        {
+            "input": "&ecolon;",
+            "description": "Named entity: ecolon; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2255"
+                ]
+            ]
+        },
+        {
+            "input": "&ecy",
+            "description": "Bad named entity: ecy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ecy"
+                ]
+            ]
+        },
+        {
+            "input": "&ecy;",
+            "description": "Named entity: ecy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u044d"
+                ]
+            ]
+        },
+        {
+            "input": "&edot",
+            "description": "Bad named entity: edot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&edot"
+                ]
+            ]
+        },
+        {
+            "input": "&edot;",
+            "description": "Named entity: edot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0117"
+                ]
+            ]
+        },
+        {
+            "input": "&ee",
+            "description": "Bad named entity: ee without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ee"
+                ]
+            ]
+        },
+        {
+            "input": "&ee;",
+            "description": "Named entity: ee; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2147"
+                ]
+            ]
+        },
+        {
+            "input": "&efDot",
+            "description": "Bad named entity: efDot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&efDot"
+                ]
+            ]
+        },
+        {
+            "input": "&efDot;",
+            "description": "Named entity: efDot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2252"
+                ]
+            ]
+        },
+        {
+            "input": "&efr",
+            "description": "Bad named entity: efr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&efr"
+                ]
+            ]
+        },
+        {
+            "input": "&efr;",
+            "description": "Named entity: efr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd22"
+                ]
+            ]
+        },
+        {
+            "input": "&eg",
+            "description": "Bad named entity: eg without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&eg"
+                ]
+            ]
+        },
+        {
+            "input": "&eg;",
+            "description": "Named entity: eg; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a9a"
+                ]
+            ]
+        },
+        {
+            "input": "&egrave",
+            "description": "Named entity: egrave without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00e8"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 8 }
+            ]
+        },
+        {
+            "input": "&egrave;",
+            "description": "Named entity: egrave; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00e8"
+                ]
+            ]
+        },
+        {
+            "input": "&egs",
+            "description": "Bad named entity: egs without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&egs"
+                ]
+            ]
+        },
+        {
+            "input": "&egs;",
+            "description": "Named entity: egs; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a96"
+                ]
+            ]
+        },
+        {
+            "input": "&egsdot",
+            "description": "Bad named entity: egsdot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&egsdot"
+                ]
+            ]
+        },
+        {
+            "input": "&egsdot;",
+            "description": "Named entity: egsdot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a98"
+                ]
+            ]
+        },
+        {
+            "input": "&el",
+            "description": "Bad named entity: el without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&el"
+                ]
+            ]
+        },
+        {
+            "input": "&el;",
+            "description": "Named entity: el; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a99"
+                ]
+            ]
+        },
+        {
+            "input": "&elinters",
+            "description": "Bad named entity: elinters without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&elinters"
+                ]
+            ]
+        },
+        {
+            "input": "&elinters;",
+            "description": "Named entity: elinters; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u23e7"
+                ]
+            ]
+        },
+        {
+            "input": "&ell",
+            "description": "Bad named entity: ell without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ell"
+                ]
+            ]
+        },
+        {
+            "input": "&ell;",
+            "description": "Named entity: ell; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2113"
+                ]
+            ]
+        },
+        {
+            "input": "&els",
+            "description": "Bad named entity: els without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&els"
+                ]
+            ]
+        },
+        {
+            "input": "&els;",
+            "description": "Named entity: els; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a95"
+                ]
+            ]
+        },
+        {
+            "input": "&elsdot",
+            "description": "Bad named entity: elsdot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&elsdot"
+                ]
+            ]
+        },
+        {
+            "input": "&elsdot;",
+            "description": "Named entity: elsdot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a97"
+                ]
+            ]
+        },
+        {
+            "input": "&emacr",
+            "description": "Bad named entity: emacr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&emacr"
+                ]
+            ]
+        },
+        {
+            "input": "&emacr;",
+            "description": "Named entity: emacr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0113"
+                ]
+            ]
+        },
+        {
+            "input": "&empty",
+            "description": "Bad named entity: empty without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&empty"
+                ]
+            ]
+        },
+        {
+            "input": "&empty;",
+            "description": "Named entity: empty; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2205"
+                ]
+            ]
+        },
+        {
+            "input": "&emptyset",
+            "description": "Bad named entity: emptyset without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&emptyset"
+                ]
+            ]
+        },
+        {
+            "input": "&emptyset;",
+            "description": "Named entity: emptyset; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2205"
+                ]
+            ]
+        },
+        {
+            "input": "&emptyv",
+            "description": "Bad named entity: emptyv without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&emptyv"
+                ]
+            ]
+        },
+        {
+            "input": "&emptyv;",
+            "description": "Named entity: emptyv; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2205"
+                ]
+            ]
+        },
+        {
+            "input": "&emsp",
+            "description": "Bad named entity: emsp without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&emsp"
+                ]
+            ]
+        },
+        {
+            "input": "&emsp13",
+            "description": "Bad named entity: emsp13 without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&emsp13"
+                ]
+            ]
+        },
+        {
+            "input": "&emsp13;",
+            "description": "Named entity: emsp13; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2004"
+                ]
+            ]
+        },
+        {
+            "input": "&emsp14",
+            "description": "Bad named entity: emsp14 without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&emsp14"
+                ]
+            ]
+        },
+        {
+            "input": "&emsp14;",
+            "description": "Named entity: emsp14; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2005"
+                ]
+            ]
+        },
+        {
+            "input": "&emsp;",
+            "description": "Named entity: emsp; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2003"
+                ]
+            ]
+        },
+        {
+            "input": "&eng",
+            "description": "Bad named entity: eng without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&eng"
+                ]
+            ]
+        },
+        {
+            "input": "&eng;",
+            "description": "Named entity: eng; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u014b"
+                ]
+            ]
+        },
+        {
+            "input": "&ensp",
+            "description": "Bad named entity: ensp without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ensp"
+                ]
+            ]
+        },
+        {
+            "input": "&ensp;",
+            "description": "Named entity: ensp; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2002"
+                ]
+            ]
+        },
+        {
+            "input": "&eogon",
+            "description": "Bad named entity: eogon without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&eogon"
+                ]
+            ]
+        },
+        {
+            "input": "&eogon;",
+            "description": "Named entity: eogon; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0119"
+                ]
+            ]
+        },
+        {
+            "input": "&eopf",
+            "description": "Bad named entity: eopf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&eopf"
+                ]
+            ]
+        },
+        {
+            "input": "&eopf;",
+            "description": "Named entity: eopf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd56"
+                ]
+            ]
+        },
+        {
+            "input": "&epar",
+            "description": "Bad named entity: epar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&epar"
+                ]
+            ]
+        },
+        {
+            "input": "&epar;",
+            "description": "Named entity: epar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22d5"
+                ]
+            ]
+        },
+        {
+            "input": "&eparsl",
+            "description": "Bad named entity: eparsl without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&eparsl"
+                ]
+            ]
+        },
+        {
+            "input": "&eparsl;",
+            "description": "Named entity: eparsl; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29e3"
+                ]
+            ]
+        },
+        {
+            "input": "&eplus",
+            "description": "Bad named entity: eplus without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&eplus"
+                ]
+            ]
+        },
+        {
+            "input": "&eplus;",
+            "description": "Named entity: eplus; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a71"
+                ]
+            ]
+        },
+        {
+            "input": "&epsi",
+            "description": "Bad named entity: epsi without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&epsi"
+                ]
+            ]
+        },
+        {
+            "input": "&epsi;",
+            "description": "Named entity: epsi; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03b5"
+                ]
+            ]
+        },
+        {
+            "input": "&epsilon",
+            "description": "Bad named entity: epsilon without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&epsilon"
+                ]
+            ]
+        },
+        {
+            "input": "&epsilon;",
+            "description": "Named entity: epsilon; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03b5"
+                ]
+            ]
+        },
+        {
+            "input": "&epsiv",
+            "description": "Bad named entity: epsiv without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&epsiv"
+                ]
+            ]
+        },
+        {
+            "input": "&epsiv;",
+            "description": "Named entity: epsiv; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03f5"
+                ]
+            ]
+        },
+        {
+            "input": "&eqcirc",
+            "description": "Bad named entity: eqcirc without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&eqcirc"
+                ]
+            ]
+        },
+        {
+            "input": "&eqcirc;",
+            "description": "Named entity: eqcirc; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2256"
+                ]
+            ]
+        },
+        {
+            "input": "&eqcolon",
+            "description": "Bad named entity: eqcolon without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&eqcolon"
+                ]
+            ]
+        },
+        {
+            "input": "&eqcolon;",
+            "description": "Named entity: eqcolon; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2255"
+                ]
+            ]
+        },
+        {
+            "input": "&eqsim",
+            "description": "Bad named entity: eqsim without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&eqsim"
+                ]
+            ]
+        },
+        {
+            "input": "&eqsim;",
+            "description": "Named entity: eqsim; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2242"
+                ]
+            ]
+        },
+        {
+            "input": "&eqslantgtr",
+            "description": "Bad named entity: eqslantgtr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&eqslantgtr"
+                ]
+            ]
+        },
+        {
+            "input": "&eqslantgtr;",
+            "description": "Named entity: eqslantgtr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a96"
+                ]
+            ]
+        },
+        {
+            "input": "&eqslantless",
+            "description": "Bad named entity: eqslantless without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&eqslantless"
+                ]
+            ]
+        },
+        {
+            "input": "&eqslantless;",
+            "description": "Named entity: eqslantless; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a95"
+                ]
+            ]
+        },
+        {
+            "input": "&equals",
+            "description": "Bad named entity: equals without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&equals"
+                ]
+            ]
+        },
+        {
+            "input": "&equals;",
+            "description": "Named entity: equals; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "="
+                ]
+            ]
+        },
+        {
+            "input": "&equest",
+            "description": "Bad named entity: equest without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&equest"
+                ]
+            ]
+        },
+        {
+            "input": "&equest;",
+            "description": "Named entity: equest; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u225f"
+                ]
+            ]
+        },
+        {
+            "input": "&equiv",
+            "description": "Bad named entity: equiv without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&equiv"
+                ]
+            ]
+        },
+        {
+            "input": "&equiv;",
+            "description": "Named entity: equiv; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2261"
+                ]
+            ]
+        },
+        {
+            "input": "&equivDD",
+            "description": "Bad named entity: equivDD without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&equivDD"
+                ]
+            ]
+        },
+        {
+            "input": "&equivDD;",
+            "description": "Named entity: equivDD; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a78"
+                ]
+            ]
+        },
+        {
+            "input": "&eqvparsl",
+            "description": "Bad named entity: eqvparsl without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&eqvparsl"
+                ]
+            ]
+        },
+        {
+            "input": "&eqvparsl;",
+            "description": "Named entity: eqvparsl; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29e5"
+                ]
+            ]
+        },
+        {
+            "input": "&erDot",
+            "description": "Bad named entity: erDot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&erDot"
+                ]
+            ]
+        },
+        {
+            "input": "&erDot;",
+            "description": "Named entity: erDot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2253"
+                ]
+            ]
+        },
+        {
+            "input": "&erarr",
+            "description": "Bad named entity: erarr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&erarr"
+                ]
+            ]
+        },
+        {
+            "input": "&erarr;",
+            "description": "Named entity: erarr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2971"
+                ]
+            ]
+        },
+        {
+            "input": "&escr",
+            "description": "Bad named entity: escr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&escr"
+                ]
+            ]
+        },
+        {
+            "input": "&escr;",
+            "description": "Named entity: escr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u212f"
+                ]
+            ]
+        },
+        {
+            "input": "&esdot",
+            "description": "Bad named entity: esdot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&esdot"
+                ]
+            ]
+        },
+        {
+            "input": "&esdot;",
+            "description": "Named entity: esdot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2250"
+                ]
+            ]
+        },
+        {
+            "input": "&esim",
+            "description": "Bad named entity: esim without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&esim"
+                ]
+            ]
+        },
+        {
+            "input": "&esim;",
+            "description": "Named entity: esim; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2242"
+                ]
+            ]
+        },
+        {
+            "input": "&eta",
+            "description": "Bad named entity: eta without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&eta"
+                ]
+            ]
+        },
+        {
+            "input": "&eta;",
+            "description": "Named entity: eta; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03b7"
+                ]
+            ]
+        },
+        {
+            "input": "&eth",
+            "description": "Named entity: eth without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00f0"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 5 }
+            ]
+        },
+        {
+            "input": "&eth;",
+            "description": "Named entity: eth; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00f0"
+                ]
+            ]
+        },
+        {
+            "input": "&euml",
+            "description": "Named entity: euml without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00eb"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 6 }
+            ]
+        },
+        {
+            "input": "&euml;",
+            "description": "Named entity: euml; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00eb"
+                ]
+            ]
+        },
+        {
+            "input": "&euro",
+            "description": "Bad named entity: euro without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&euro"
+                ]
+            ]
+        },
+        {
+            "input": "&euro;",
+            "description": "Named entity: euro; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u20ac"
+                ]
+            ]
+        },
+        {
+            "input": "&excl",
+            "description": "Bad named entity: excl without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&excl"
+                ]
+            ]
+        },
+        {
+            "input": "&excl;",
+            "description": "Named entity: excl; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "!"
+                ]
+            ]
+        },
+        {
+            "input": "&exist",
+            "description": "Bad named entity: exist without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&exist"
+                ]
+            ]
+        },
+        {
+            "input": "&exist;",
+            "description": "Named entity: exist; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2203"
+                ]
+            ]
+        },
+        {
+            "input": "&expectation",
+            "description": "Bad named entity: expectation without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&expectation"
+                ]
+            ]
+        },
+        {
+            "input": "&expectation;",
+            "description": "Named entity: expectation; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2130"
+                ]
+            ]
+        },
+        {
+            "input": "&exponentiale",
+            "description": "Bad named entity: exponentiale without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&exponentiale"
+                ]
+            ]
+        },
+        {
+            "input": "&exponentiale;",
+            "description": "Named entity: exponentiale; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2147"
+                ]
+            ]
+        },
+        {
+            "input": "&fallingdotseq",
+            "description": "Bad named entity: fallingdotseq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&fallingdotseq"
+                ]
+            ]
+        },
+        {
+            "input": "&fallingdotseq;",
+            "description": "Named entity: fallingdotseq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2252"
+                ]
+            ]
+        },
+        {
+            "input": "&fcy",
+            "description": "Bad named entity: fcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&fcy"
+                ]
+            ]
+        },
+        {
+            "input": "&fcy;",
+            "description": "Named entity: fcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0444"
+                ]
+            ]
+        },
+        {
+            "input": "&female",
+            "description": "Bad named entity: female without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&female"
+                ]
+            ]
+        },
+        {
+            "input": "&female;",
+            "description": "Named entity: female; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2640"
+                ]
+            ]
+        },
+        {
+            "input": "&ffilig",
+            "description": "Bad named entity: ffilig without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ffilig"
+                ]
+            ]
+        },
+        {
+            "input": "&ffilig;",
+            "description": "Named entity: ffilig; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ufb03"
+                ]
+            ]
+        },
+        {
+            "input": "&fflig",
+            "description": "Bad named entity: fflig without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&fflig"
+                ]
+            ]
+        },
+        {
+            "input": "&fflig;",
+            "description": "Named entity: fflig; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ufb00"
+                ]
+            ]
+        },
+        {
+            "input": "&ffllig",
+            "description": "Bad named entity: ffllig without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ffllig"
+                ]
+            ]
+        },
+        {
+            "input": "&ffllig;",
+            "description": "Named entity: ffllig; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ufb04"
+                ]
+            ]
+        },
+        {
+            "input": "&ffr",
+            "description": "Bad named entity: ffr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ffr"
+                ]
+            ]
+        },
+        {
+            "input": "&ffr;",
+            "description": "Named entity: ffr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd23"
+                ]
+            ]
+        },
+        {
+            "input": "&filig",
+            "description": "Bad named entity: filig without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&filig"
+                ]
+            ]
+        },
+        {
+            "input": "&filig;",
+            "description": "Named entity: filig; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ufb01"
+                ]
+            ]
+        },
+        {
+            "input": "&fjlig",
+            "description": "Bad named entity: fjlig without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&fjlig"
+                ]
+            ]
+        },
+        {
+            "input": "&fjlig;",
+            "description": "Named entity: fjlig; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "fj"
+                ]
+            ]
+        },
+        {
+            "input": "&flat",
+            "description": "Bad named entity: flat without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&flat"
+                ]
+            ]
+        },
+        {
+            "input": "&flat;",
+            "description": "Named entity: flat; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u266d"
+                ]
+            ]
+        },
+        {
+            "input": "&fllig",
+            "description": "Bad named entity: fllig without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&fllig"
+                ]
+            ]
+        },
+        {
+            "input": "&fllig;",
+            "description": "Named entity: fllig; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ufb02"
+                ]
+            ]
+        },
+        {
+            "input": "&fltns",
+            "description": "Bad named entity: fltns without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&fltns"
+                ]
+            ]
+        },
+        {
+            "input": "&fltns;",
+            "description": "Named entity: fltns; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u25b1"
+                ]
+            ]
+        },
+        {
+            "input": "&fnof",
+            "description": "Bad named entity: fnof without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&fnof"
+                ]
+            ]
+        },
+        {
+            "input": "&fnof;",
+            "description": "Named entity: fnof; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0192"
+                ]
+            ]
+        },
+        {
+            "input": "&fopf",
+            "description": "Bad named entity: fopf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&fopf"
+                ]
+            ]
+        },
+        {
+            "input": "&fopf;",
+            "description": "Named entity: fopf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd57"
+                ]
+            ]
+        },
+        {
+            "input": "&forall",
+            "description": "Bad named entity: forall without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&forall"
+                ]
+            ]
+        },
+        {
+            "input": "&forall;",
+            "description": "Named entity: forall; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2200"
+                ]
+            ]
+        },
+        {
+            "input": "&fork",
+            "description": "Bad named entity: fork without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&fork"
+                ]
+            ]
+        },
+        {
+            "input": "&fork;",
+            "description": "Named entity: fork; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22d4"
+                ]
+            ]
+        },
+        {
+            "input": "&forkv",
+            "description": "Bad named entity: forkv without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&forkv"
+                ]
+            ]
+        },
+        {
+            "input": "&forkv;",
+            "description": "Named entity: forkv; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ad9"
+                ]
+            ]
+        },
+        {
+            "input": "&fpartint",
+            "description": "Bad named entity: fpartint without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&fpartint"
+                ]
+            ]
+        },
+        {
+            "input": "&fpartint;",
+            "description": "Named entity: fpartint; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a0d"
+                ]
+            ]
+        },
+        {
+            "input": "&frac12",
+            "description": "Named entity: frac12 without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00bd"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 8 }
+            ]
+        },
+        {
+            "input": "&frac12;",
+            "description": "Named entity: frac12; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00bd"
+                ]
+            ]
+        },
+        {
+            "input": "&frac13",
+            "description": "Bad named entity: frac13 without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&frac13"
+                ]
+            ]
+        },
+        {
+            "input": "&frac13;",
+            "description": "Named entity: frac13; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2153"
+                ]
+            ]
+        },
+        {
+            "input": "&frac14",
+            "description": "Named entity: frac14 without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00bc"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 8 }
+            ]
+        },
+        {
+            "input": "&frac14;",
+            "description": "Named entity: frac14; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00bc"
+                ]
+            ]
+        },
+        {
+            "input": "&frac15",
+            "description": "Bad named entity: frac15 without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&frac15"
+                ]
+            ]
+        },
+        {
+            "input": "&frac15;",
+            "description": "Named entity: frac15; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2155"
+                ]
+            ]
+        },
+        {
+            "input": "&frac16",
+            "description": "Bad named entity: frac16 without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&frac16"
+                ]
+            ]
+        },
+        {
+            "input": "&frac16;",
+            "description": "Named entity: frac16; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2159"
+                ]
+            ]
+        },
+        {
+            "input": "&frac18",
+            "description": "Bad named entity: frac18 without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&frac18"
+                ]
+            ]
+        },
+        {
+            "input": "&frac18;",
+            "description": "Named entity: frac18; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u215b"
+                ]
+            ]
+        },
+        {
+            "input": "&frac23",
+            "description": "Bad named entity: frac23 without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&frac23"
+                ]
+            ]
+        },
+        {
+            "input": "&frac23;",
+            "description": "Named entity: frac23; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2154"
+                ]
+            ]
+        },
+        {
+            "input": "&frac25",
+            "description": "Bad named entity: frac25 without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&frac25"
+                ]
+            ]
+        },
+        {
+            "input": "&frac25;",
+            "description": "Named entity: frac25; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2156"
+                ]
+            ]
+        },
+        {
+            "input": "&frac34",
+            "description": "Named entity: frac34 without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00be"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 8 }
+            ]
+        },
+        {
+            "input": "&frac34;",
+            "description": "Named entity: frac34; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00be"
+                ]
+            ]
+        },
+        {
+            "input": "&frac35",
+            "description": "Bad named entity: frac35 without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&frac35"
+                ]
+            ]
+        },
+        {
+            "input": "&frac35;",
+            "description": "Named entity: frac35; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2157"
+                ]
+            ]
+        },
+        {
+            "input": "&frac38",
+            "description": "Bad named entity: frac38 without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&frac38"
+                ]
+            ]
+        },
+        {
+            "input": "&frac38;",
+            "description": "Named entity: frac38; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u215c"
+                ]
+            ]
+        },
+        {
+            "input": "&frac45",
+            "description": "Bad named entity: frac45 without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&frac45"
+                ]
+            ]
+        },
+        {
+            "input": "&frac45;",
+            "description": "Named entity: frac45; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2158"
+                ]
+            ]
+        },
+        {
+            "input": "&frac56",
+            "description": "Bad named entity: frac56 without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&frac56"
+                ]
+            ]
+        },
+        {
+            "input": "&frac56;",
+            "description": "Named entity: frac56; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u215a"
+                ]
+            ]
+        },
+        {
+            "input": "&frac58",
+            "description": "Bad named entity: frac58 without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&frac58"
+                ]
+            ]
+        },
+        {
+            "input": "&frac58;",
+            "description": "Named entity: frac58; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u215d"
+                ]
+            ]
+        },
+        {
+            "input": "&frac78",
+            "description": "Bad named entity: frac78 without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&frac78"
+                ]
+            ]
+        },
+        {
+            "input": "&frac78;",
+            "description": "Named entity: frac78; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u215e"
+                ]
+            ]
+        },
+        {
+            "input": "&frasl",
+            "description": "Bad named entity: frasl without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&frasl"
+                ]
+            ]
+        },
+        {
+            "input": "&frasl;",
+            "description": "Named entity: frasl; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2044"
+                ]
+            ]
+        },
+        {
+            "input": "&frown",
+            "description": "Bad named entity: frown without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&frown"
+                ]
+            ]
+        },
+        {
+            "input": "&frown;",
+            "description": "Named entity: frown; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2322"
+                ]
+            ]
+        },
+        {
+            "input": "&fscr",
+            "description": "Bad named entity: fscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&fscr"
+                ]
+            ]
+        },
+        {
+            "input": "&fscr;",
+            "description": "Named entity: fscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udcbb"
+                ]
+            ]
+        },
+        {
+            "input": "&gE",
+            "description": "Bad named entity: gE without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&gE"
+                ]
+            ]
+        },
+        {
+            "input": "&gE;",
+            "description": "Named entity: gE; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2267"
+                ]
+            ]
+        },
+        {
+            "input": "&gEl",
+            "description": "Bad named entity: gEl without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&gEl"
+                ]
+            ]
+        },
+        {
+            "input": "&gEl;",
+            "description": "Named entity: gEl; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a8c"
+                ]
+            ]
+        },
+        {
+            "input": "&gacute",
+            "description": "Bad named entity: gacute without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&gacute"
+                ]
+            ]
+        },
+        {
+            "input": "&gacute;",
+            "description": "Named entity: gacute; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u01f5"
+                ]
+            ]
+        },
+        {
+            "input": "&gamma",
+            "description": "Bad named entity: gamma without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&gamma"
+                ]
+            ]
+        },
+        {
+            "input": "&gamma;",
+            "description": "Named entity: gamma; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03b3"
+                ]
+            ]
+        },
+        {
+            "input": "&gammad",
+            "description": "Bad named entity: gammad without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&gammad"
+                ]
+            ]
+        },
+        {
+            "input": "&gammad;",
+            "description": "Named entity: gammad; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03dd"
+                ]
+            ]
+        },
+        {
+            "input": "&gap",
+            "description": "Bad named entity: gap without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&gap"
+                ]
+            ]
+        },
+        {
+            "input": "&gap;",
+            "description": "Named entity: gap; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a86"
+                ]
+            ]
+        },
+        {
+            "input": "&gbreve",
+            "description": "Bad named entity: gbreve without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&gbreve"
+                ]
+            ]
+        },
+        {
+            "input": "&gbreve;",
+            "description": "Named entity: gbreve; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u011f"
+                ]
+            ]
+        },
+        {
+            "input": "&gcirc",
+            "description": "Bad named entity: gcirc without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&gcirc"
+                ]
+            ]
+        },
+        {
+            "input": "&gcirc;",
+            "description": "Named entity: gcirc; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u011d"
+                ]
+            ]
+        },
+        {
+            "input": "&gcy",
+            "description": "Bad named entity: gcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&gcy"
+                ]
+            ]
+        },
+        {
+            "input": "&gcy;",
+            "description": "Named entity: gcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0433"
+                ]
+            ]
+        },
+        {
+            "input": "&gdot",
+            "description": "Bad named entity: gdot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&gdot"
+                ]
+            ]
+        },
+        {
+            "input": "&gdot;",
+            "description": "Named entity: gdot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0121"
+                ]
+            ]
+        },
+        {
+            "input": "&ge",
+            "description": "Bad named entity: ge without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ge"
+                ]
+            ]
+        },
+        {
+            "input": "&ge;",
+            "description": "Named entity: ge; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2265"
+                ]
+            ]
+        },
+        {
+            "input": "&gel",
+            "description": "Bad named entity: gel without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&gel"
+                ]
+            ]
+        },
+        {
+            "input": "&gel;",
+            "description": "Named entity: gel; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22db"
+                ]
+            ]
+        },
+        {
+            "input": "&geq",
+            "description": "Bad named entity: geq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&geq"
+                ]
+            ]
+        },
+        {
+            "input": "&geq;",
+            "description": "Named entity: geq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2265"
+                ]
+            ]
+        },
+        {
+            "input": "&geqq",
+            "description": "Bad named entity: geqq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&geqq"
+                ]
+            ]
+        },
+        {
+            "input": "&geqq;",
+            "description": "Named entity: geqq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2267"
+                ]
+            ]
+        },
+        {
+            "input": "&geqslant",
+            "description": "Bad named entity: geqslant without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&geqslant"
+                ]
+            ]
+        },
+        {
+            "input": "&geqslant;",
+            "description": "Named entity: geqslant; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a7e"
+                ]
+            ]
+        },
+        {
+            "input": "&ges",
+            "description": "Bad named entity: ges without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ges"
+                ]
+            ]
+        },
+        {
+            "input": "&ges;",
+            "description": "Named entity: ges; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a7e"
+                ]
+            ]
+        },
+        {
+            "input": "&gescc",
+            "description": "Bad named entity: gescc without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&gescc"
+                ]
+            ]
+        },
+        {
+            "input": "&gescc;",
+            "description": "Named entity: gescc; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2aa9"
+                ]
+            ]
+        },
+        {
+            "input": "&gesdot",
+            "description": "Bad named entity: gesdot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&gesdot"
+                ]
+            ]
+        },
+        {
+            "input": "&gesdot;",
+            "description": "Named entity: gesdot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a80"
+                ]
+            ]
+        },
+        {
+            "input": "&gesdoto",
+            "description": "Bad named entity: gesdoto without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&gesdoto"
+                ]
+            ]
+        },
+        {
+            "input": "&gesdoto;",
+            "description": "Named entity: gesdoto; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a82"
+                ]
+            ]
+        },
+        {
+            "input": "&gesdotol",
+            "description": "Bad named entity: gesdotol without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&gesdotol"
+                ]
+            ]
+        },
+        {
+            "input": "&gesdotol;",
+            "description": "Named entity: gesdotol; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a84"
+                ]
+            ]
+        },
+        {
+            "input": "&gesl",
+            "description": "Bad named entity: gesl without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&gesl"
+                ]
+            ]
+        },
+        {
+            "input": "&gesl;",
+            "description": "Named entity: gesl; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22db\ufe00"
+                ]
+            ]
+        },
+        {
+            "input": "&gesles",
+            "description": "Bad named entity: gesles without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&gesles"
+                ]
+            ]
+        },
+        {
+            "input": "&gesles;",
+            "description": "Named entity: gesles; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a94"
+                ]
+            ]
+        },
+        {
+            "input": "&gfr",
+            "description": "Bad named entity: gfr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&gfr"
+                ]
+            ]
+        },
+        {
+            "input": "&gfr;",
+            "description": "Named entity: gfr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd24"
+                ]
+            ]
+        },
+        {
+            "input": "&gg",
+            "description": "Bad named entity: gg without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&gg"
+                ]
+            ]
+        },
+        {
+            "input": "&gg;",
+            "description": "Named entity: gg; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u226b"
+                ]
+            ]
+        },
+        {
+            "input": "&ggg",
+            "description": "Bad named entity: ggg without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ggg"
+                ]
+            ]
+        },
+        {
+            "input": "&ggg;",
+            "description": "Named entity: ggg; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22d9"
+                ]
+            ]
+        },
+        {
+            "input": "&gimel",
+            "description": "Bad named entity: gimel without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&gimel"
+                ]
+            ]
+        },
+        {
+            "input": "&gimel;",
+            "description": "Named entity: gimel; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2137"
+                ]
+            ]
+        },
+        {
+            "input": "&gjcy",
+            "description": "Bad named entity: gjcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&gjcy"
+                ]
+            ]
+        },
+        {
+            "input": "&gjcy;",
+            "description": "Named entity: gjcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0453"
+                ]
+            ]
+        },
+        {
+            "input": "&gl",
+            "description": "Bad named entity: gl without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&gl"
+                ]
+            ]
+        },
+        {
+            "input": "&gl;",
+            "description": "Named entity: gl; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2277"
+                ]
+            ]
+        },
+        {
+            "input": "&glE",
+            "description": "Bad named entity: glE without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&glE"
+                ]
+            ]
+        },
+        {
+            "input": "&glE;",
+            "description": "Named entity: glE; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a92"
+                ]
+            ]
+        },
+        {
+            "input": "&gla",
+            "description": "Bad named entity: gla without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&gla"
+                ]
+            ]
+        },
+        {
+            "input": "&gla;",
+            "description": "Named entity: gla; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2aa5"
+                ]
+            ]
+        },
+        {
+            "input": "&glj",
+            "description": "Bad named entity: glj without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&glj"
+                ]
+            ]
+        },
+        {
+            "input": "&glj;",
+            "description": "Named entity: glj; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2aa4"
+                ]
+            ]
+        },
+        {
+            "input": "&gnE",
+            "description": "Bad named entity: gnE without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&gnE"
+                ]
+            ]
+        },
+        {
+            "input": "&gnE;",
+            "description": "Named entity: gnE; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2269"
+                ]
+            ]
+        },
+        {
+            "input": "&gnap",
+            "description": "Bad named entity: gnap without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&gnap"
+                ]
+            ]
+        },
+        {
+            "input": "&gnap;",
+            "description": "Named entity: gnap; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a8a"
+                ]
+            ]
+        },
+        {
+            "input": "&gnapprox",
+            "description": "Bad named entity: gnapprox without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&gnapprox"
+                ]
+            ]
+        },
+        {
+            "input": "&gnapprox;",
+            "description": "Named entity: gnapprox; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a8a"
+                ]
+            ]
+        },
+        {
+            "input": "&gne",
+            "description": "Bad named entity: gne without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&gne"
+                ]
+            ]
+        },
+        {
+            "input": "&gne;",
+            "description": "Named entity: gne; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a88"
+                ]
+            ]
+        },
+        {
+            "input": "&gneq",
+            "description": "Bad named entity: gneq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&gneq"
+                ]
+            ]
+        },
+        {
+            "input": "&gneq;",
+            "description": "Named entity: gneq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a88"
+                ]
+            ]
+        },
+        {
+            "input": "&gneqq",
+            "description": "Bad named entity: gneqq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&gneqq"
+                ]
+            ]
+        },
+        {
+            "input": "&gneqq;",
+            "description": "Named entity: gneqq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2269"
+                ]
+            ]
+        },
+        {
+            "input": "&gnsim",
+            "description": "Bad named entity: gnsim without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&gnsim"
+                ]
+            ]
+        },
+        {
+            "input": "&gnsim;",
+            "description": "Named entity: gnsim; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22e7"
+                ]
+            ]
+        },
+        {
+            "input": "&gopf",
+            "description": "Bad named entity: gopf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&gopf"
+                ]
+            ]
+        },
+        {
+            "input": "&gopf;",
+            "description": "Named entity: gopf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd58"
+                ]
+            ]
+        },
+        {
+            "input": "&grave",
+            "description": "Bad named entity: grave without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&grave"
+                ]
+            ]
+        },
+        {
+            "input": "&grave;",
+            "description": "Named entity: grave; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "`"
+                ]
+            ]
+        },
+        {
+            "input": "&gscr",
+            "description": "Bad named entity: gscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&gscr"
+                ]
+            ]
+        },
+        {
+            "input": "&gscr;",
+            "description": "Named entity: gscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u210a"
+                ]
+            ]
+        },
+        {
+            "input": "&gsim",
+            "description": "Bad named entity: gsim without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&gsim"
+                ]
+            ]
+        },
+        {
+            "input": "&gsim;",
+            "description": "Named entity: gsim; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2273"
+                ]
+            ]
+        },
+        {
+            "input": "&gsime",
+            "description": "Bad named entity: gsime without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&gsime"
+                ]
+            ]
+        },
+        {
+            "input": "&gsime;",
+            "description": "Named entity: gsime; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a8e"
+                ]
+            ]
+        },
+        {
+            "input": "&gsiml",
+            "description": "Bad named entity: gsiml without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&gsiml"
+                ]
+            ]
+        },
+        {
+            "input": "&gsiml;",
+            "description": "Named entity: gsiml; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a90"
+                ]
+            ]
+        },
+        {
+            "input": "&gt",
+            "description": "Named entity: gt without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    ">"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 4 }
+            ]
+        },
+        {
+            "input": "&gt;",
+            "description": "Named entity: gt; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    ">"
+                ]
+            ]
+        },
+        {
+            "input": "&gtcc;",
+            "description": "Named entity: gtcc; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2aa7"
+                ]
+            ]
+        },
+        {
+            "input": "&gtcir;",
+            "description": "Named entity: gtcir; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a7a"
+                ]
+            ]
+        },
+        {
+            "input": "&gtdot;",
+            "description": "Named entity: gtdot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22d7"
+                ]
+            ]
+        },
+        {
+            "input": "&gtlPar;",
+            "description": "Named entity: gtlPar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2995"
+                ]
+            ]
+        },
+        {
+            "input": "&gtquest;",
+            "description": "Named entity: gtquest; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a7c"
+                ]
+            ]
+        },
+        {
+            "input": "&gtrapprox;",
+            "description": "Named entity: gtrapprox; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a86"
+                ]
+            ]
+        },
+        {
+            "input": "&gtrarr;",
+            "description": "Named entity: gtrarr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2978"
+                ]
+            ]
+        },
+        {
+            "input": "&gtrdot;",
+            "description": "Named entity: gtrdot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22d7"
+                ]
+            ]
+        },
+        {
+            "input": "&gtreqless;",
+            "description": "Named entity: gtreqless; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22db"
+                ]
+            ]
+        },
+        {
+            "input": "&gtreqqless;",
+            "description": "Named entity: gtreqqless; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a8c"
+                ]
+            ]
+        },
+        {
+            "input": "&gtrless;",
+            "description": "Named entity: gtrless; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2277"
+                ]
+            ]
+        },
+        {
+            "input": "&gtrsim;",
+            "description": "Named entity: gtrsim; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2273"
+                ]
+            ]
+        },
+        {
+            "input": "&gvertneqq",
+            "description": "Bad named entity: gvertneqq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&gvertneqq"
+                ]
+            ]
+        },
+        {
+            "input": "&gvertneqq;",
+            "description": "Named entity: gvertneqq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2269\ufe00"
+                ]
+            ]
+        },
+        {
+            "input": "&gvnE",
+            "description": "Bad named entity: gvnE without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&gvnE"
+                ]
+            ]
+        },
+        {
+            "input": "&gvnE;",
+            "description": "Named entity: gvnE; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2269\ufe00"
+                ]
+            ]
+        },
+        {
+            "input": "&hArr",
+            "description": "Bad named entity: hArr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&hArr"
+                ]
+            ]
+        },
+        {
+            "input": "&hArr;",
+            "description": "Named entity: hArr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21d4"
+                ]
+            ]
+        },
+        {
+            "input": "&hairsp",
+            "description": "Bad named entity: hairsp without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&hairsp"
+                ]
+            ]
+        },
+        {
+            "input": "&hairsp;",
+            "description": "Named entity: hairsp; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u200a"
+                ]
+            ]
+        },
+        {
+            "input": "&half",
+            "description": "Bad named entity: half without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&half"
+                ]
+            ]
+        },
+        {
+            "input": "&half;",
+            "description": "Named entity: half; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00bd"
+                ]
+            ]
+        },
+        {
+            "input": "&hamilt",
+            "description": "Bad named entity: hamilt without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&hamilt"
+                ]
+            ]
+        },
+        {
+            "input": "&hamilt;",
+            "description": "Named entity: hamilt; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u210b"
+                ]
+            ]
+        },
+        {
+            "input": "&hardcy",
+            "description": "Bad named entity: hardcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&hardcy"
+                ]
+            ]
+        },
+        {
+            "input": "&hardcy;",
+            "description": "Named entity: hardcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u044a"
+                ]
+            ]
+        },
+        {
+            "input": "&harr",
+            "description": "Bad named entity: harr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&harr"
+                ]
+            ]
+        },
+        {
+            "input": "&harr;",
+            "description": "Named entity: harr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2194"
+                ]
+            ]
+        },
+        {
+            "input": "&harrcir",
+            "description": "Bad named entity: harrcir without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&harrcir"
+                ]
+            ]
+        },
+        {
+            "input": "&harrcir;",
+            "description": "Named entity: harrcir; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2948"
+                ]
+            ]
+        },
+        {
+            "input": "&harrw",
+            "description": "Bad named entity: harrw without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&harrw"
+                ]
+            ]
+        },
+        {
+            "input": "&harrw;",
+            "description": "Named entity: harrw; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21ad"
+                ]
+            ]
+        },
+        {
+            "input": "&hbar",
+            "description": "Bad named entity: hbar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&hbar"
+                ]
+            ]
+        },
+        {
+            "input": "&hbar;",
+            "description": "Named entity: hbar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u210f"
+                ]
+            ]
+        },
+        {
+            "input": "&hcirc",
+            "description": "Bad named entity: hcirc without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&hcirc"
+                ]
+            ]
+        },
+        {
+            "input": "&hcirc;",
+            "description": "Named entity: hcirc; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0125"
+                ]
+            ]
+        },
+        {
+            "input": "&hearts",
+            "description": "Bad named entity: hearts without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&hearts"
+                ]
+            ]
+        },
+        {
+            "input": "&hearts;",
+            "description": "Named entity: hearts; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2665"
+                ]
+            ]
+        },
+        {
+            "input": "&heartsuit",
+            "description": "Bad named entity: heartsuit without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&heartsuit"
+                ]
+            ]
+        },
+        {
+            "input": "&heartsuit;",
+            "description": "Named entity: heartsuit; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2665"
+                ]
+            ]
+        },
+        {
+            "input": "&hellip",
+            "description": "Bad named entity: hellip without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&hellip"
+                ]
+            ]
+        },
+        {
+            "input": "&hellip;",
+            "description": "Named entity: hellip; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2026"
+                ]
+            ]
+        },
+        {
+            "input": "&hercon",
+            "description": "Bad named entity: hercon without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&hercon"
+                ]
+            ]
+        },
+        {
+            "input": "&hercon;",
+            "description": "Named entity: hercon; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22b9"
+                ]
+            ]
+        },
+        {
+            "input": "&hfr",
+            "description": "Bad named entity: hfr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&hfr"
+                ]
+            ]
+        },
+        {
+            "input": "&hfr;",
+            "description": "Named entity: hfr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd25"
+                ]
+            ]
+        },
+        {
+            "input": "&hksearow",
+            "description": "Bad named entity: hksearow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&hksearow"
+                ]
+            ]
+        },
+        {
+            "input": "&hksearow;",
+            "description": "Named entity: hksearow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2925"
+                ]
+            ]
+        },
+        {
+            "input": "&hkswarow",
+            "description": "Bad named entity: hkswarow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&hkswarow"
+                ]
+            ]
+        },
+        {
+            "input": "&hkswarow;",
+            "description": "Named entity: hkswarow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2926"
+                ]
+            ]
+        },
+        {
+            "input": "&hoarr",
+            "description": "Bad named entity: hoarr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&hoarr"
+                ]
+            ]
+        },
+        {
+            "input": "&hoarr;",
+            "description": "Named entity: hoarr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21ff"
+                ]
+            ]
+        },
+        {
+            "input": "&homtht",
+            "description": "Bad named entity: homtht without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&homtht"
+                ]
+            ]
+        },
+        {
+            "input": "&homtht;",
+            "description": "Named entity: homtht; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u223b"
+                ]
+            ]
+        },
+        {
+            "input": "&hookleftarrow",
+            "description": "Bad named entity: hookleftarrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&hookleftarrow"
+                ]
+            ]
+        },
+        {
+            "input": "&hookleftarrow;",
+            "description": "Named entity: hookleftarrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21a9"
+                ]
+            ]
+        },
+        {
+            "input": "&hookrightarrow",
+            "description": "Bad named entity: hookrightarrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&hookrightarrow"
+                ]
+            ]
+        },
+        {
+            "input": "&hookrightarrow;",
+            "description": "Named entity: hookrightarrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21aa"
+                ]
+            ]
+        },
+        {
+            "input": "&hopf",
+            "description": "Bad named entity: hopf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&hopf"
+                ]
+            ]
+        },
+        {
+            "input": "&hopf;",
+            "description": "Named entity: hopf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd59"
+                ]
+            ]
+        },
+        {
+            "input": "&horbar",
+            "description": "Bad named entity: horbar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&horbar"
+                ]
+            ]
+        },
+        {
+            "input": "&horbar;",
+            "description": "Named entity: horbar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2015"
+                ]
+            ]
+        },
+        {
+            "input": "&hscr",
+            "description": "Bad named entity: hscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&hscr"
+                ]
+            ]
+        },
+        {
+            "input": "&hscr;",
+            "description": "Named entity: hscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udcbd"
+                ]
+            ]
+        },
+        {
+            "input": "&hslash",
+            "description": "Bad named entity: hslash without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&hslash"
+                ]
+            ]
+        },
+        {
+            "input": "&hslash;",
+            "description": "Named entity: hslash; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u210f"
+                ]
+            ]
+        },
+        {
+            "input": "&hstrok",
+            "description": "Bad named entity: hstrok without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&hstrok"
+                ]
+            ]
+        },
+        {
+            "input": "&hstrok;",
+            "description": "Named entity: hstrok; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0127"
+                ]
+            ]
+        },
+        {
+            "input": "&hybull",
+            "description": "Bad named entity: hybull without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&hybull"
+                ]
+            ]
+        },
+        {
+            "input": "&hybull;",
+            "description": "Named entity: hybull; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2043"
+                ]
+            ]
+        },
+        {
+            "input": "&hyphen",
+            "description": "Bad named entity: hyphen without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&hyphen"
+                ]
+            ]
+        },
+        {
+            "input": "&hyphen;",
+            "description": "Named entity: hyphen; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2010"
+                ]
+            ]
+        },
+        {
+            "input": "&iacute",
+            "description": "Named entity: iacute without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00ed"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 8 }
+            ]
+        },
+        {
+            "input": "&iacute;",
+            "description": "Named entity: iacute; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00ed"
+                ]
+            ]
+        },
+        {
+            "input": "&ic",
+            "description": "Bad named entity: ic without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ic"
+                ]
+            ]
+        },
+        {
+            "input": "&ic;",
+            "description": "Named entity: ic; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2063"
+                ]
+            ]
+        },
+        {
+            "input": "&icirc",
+            "description": "Named entity: icirc without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00ee"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 7 }
+            ]
+        },
+        {
+            "input": "&icirc;",
+            "description": "Named entity: icirc; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00ee"
+                ]
+            ]
+        },
+        {
+            "input": "&icy",
+            "description": "Bad named entity: icy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&icy"
+                ]
+            ]
+        },
+        {
+            "input": "&icy;",
+            "description": "Named entity: icy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0438"
+                ]
+            ]
+        },
+        {
+            "input": "&iecy",
+            "description": "Bad named entity: iecy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&iecy"
+                ]
+            ]
+        },
+        {
+            "input": "&iecy;",
+            "description": "Named entity: iecy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0435"
+                ]
+            ]
+        },
+        {
+            "input": "&iexcl",
+            "description": "Named entity: iexcl without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00a1"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 7 }
+            ]
+        },
+        {
+            "input": "&iexcl;",
+            "description": "Named entity: iexcl; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00a1"
+                ]
+            ]
+        },
+        {
+            "input": "&iff",
+            "description": "Bad named entity: iff without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&iff"
+                ]
+            ]
+        },
+        {
+            "input": "&iff;",
+            "description": "Named entity: iff; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21d4"
+                ]
+            ]
+        },
+        {
+            "input": "&ifr",
+            "description": "Bad named entity: ifr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ifr"
+                ]
+            ]
+        },
+        {
+            "input": "&ifr;",
+            "description": "Named entity: ifr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd26"
+                ]
+            ]
+        },
+        {
+            "input": "&igrave",
+            "description": "Named entity: igrave without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00ec"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 8 }
+            ]
+        },
+        {
+            "input": "&igrave;",
+            "description": "Named entity: igrave; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00ec"
+                ]
+            ]
+        },
+        {
+            "input": "&ii",
+            "description": "Bad named entity: ii without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ii"
+                ]
+            ]
+        },
+        {
+            "input": "&ii;",
+            "description": "Named entity: ii; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2148"
+                ]
+            ]
+        },
+        {
+            "input": "&iiiint",
+            "description": "Bad named entity: iiiint without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&iiiint"
+                ]
+            ]
+        },
+        {
+            "input": "&iiiint;",
+            "description": "Named entity: iiiint; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a0c"
+                ]
+            ]
+        },
+        {
+            "input": "&iiint",
+            "description": "Bad named entity: iiint without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&iiint"
+                ]
+            ]
+        },
+        {
+            "input": "&iiint;",
+            "description": "Named entity: iiint; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u222d"
+                ]
+            ]
+        },
+        {
+            "input": "&iinfin",
+            "description": "Bad named entity: iinfin without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&iinfin"
+                ]
+            ]
+        },
+        {
+            "input": "&iinfin;",
+            "description": "Named entity: iinfin; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29dc"
+                ]
+            ]
+        },
+        {
+            "input": "&iiota",
+            "description": "Bad named entity: iiota without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&iiota"
+                ]
+            ]
+        },
+        {
+            "input": "&iiota;",
+            "description": "Named entity: iiota; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2129"
+                ]
+            ]
+        },
+        {
+            "input": "&ijlig",
+            "description": "Bad named entity: ijlig without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ijlig"
+                ]
+            ]
+        },
+        {
+            "input": "&ijlig;",
+            "description": "Named entity: ijlig; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0133"
+                ]
+            ]
+        },
+        {
+            "input": "&imacr",
+            "description": "Bad named entity: imacr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&imacr"
+                ]
+            ]
+        },
+        {
+            "input": "&imacr;",
+            "description": "Named entity: imacr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u012b"
+                ]
+            ]
+        },
+        {
+            "input": "&image",
+            "description": "Bad named entity: image without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&image"
+                ]
+            ]
+        },
+        {
+            "input": "&image;",
+            "description": "Named entity: image; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2111"
+                ]
+            ]
+        },
+        {
+            "input": "&imagline",
+            "description": "Bad named entity: imagline without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&imagline"
+                ]
+            ]
+        },
+        {
+            "input": "&imagline;",
+            "description": "Named entity: imagline; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2110"
+                ]
+            ]
+        },
+        {
+            "input": "&imagpart",
+            "description": "Bad named entity: imagpart without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&imagpart"
+                ]
+            ]
+        },
+        {
+            "input": "&imagpart;",
+            "description": "Named entity: imagpart; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2111"
+                ]
+            ]
+        },
+        {
+            "input": "&imath",
+            "description": "Bad named entity: imath without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&imath"
+                ]
+            ]
+        },
+        {
+            "input": "&imath;",
+            "description": "Named entity: imath; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0131"
+                ]
+            ]
+        },
+        {
+            "input": "&imof",
+            "description": "Bad named entity: imof without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&imof"
+                ]
+            ]
+        },
+        {
+            "input": "&imof;",
+            "description": "Named entity: imof; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22b7"
+                ]
+            ]
+        },
+        {
+            "input": "&imped",
+            "description": "Bad named entity: imped without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&imped"
+                ]
+            ]
+        },
+        {
+            "input": "&imped;",
+            "description": "Named entity: imped; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u01b5"
+                ]
+            ]
+        },
+        {
+            "input": "&in",
+            "description": "Bad named entity: in without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&in"
+                ]
+            ]
+        },
+        {
+            "input": "&in;",
+            "description": "Named entity: in; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2208"
+                ]
+            ]
+        },
+        {
+            "input": "&incare",
+            "description": "Bad named entity: incare without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&incare"
+                ]
+            ]
+        },
+        {
+            "input": "&incare;",
+            "description": "Named entity: incare; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2105"
+                ]
+            ]
+        },
+        {
+            "input": "&infin",
+            "description": "Bad named entity: infin without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&infin"
+                ]
+            ]
+        },
+        {
+            "input": "&infin;",
+            "description": "Named entity: infin; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u221e"
+                ]
+            ]
+        },
+        {
+            "input": "&infintie",
+            "description": "Bad named entity: infintie without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&infintie"
+                ]
+            ]
+        },
+        {
+            "input": "&infintie;",
+            "description": "Named entity: infintie; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29dd"
+                ]
+            ]
+        },
+        {
+            "input": "&inodot",
+            "description": "Bad named entity: inodot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&inodot"
+                ]
+            ]
+        },
+        {
+            "input": "&inodot;",
+            "description": "Named entity: inodot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0131"
+                ]
+            ]
+        },
+        {
+            "input": "&int",
+            "description": "Bad named entity: int without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&int"
+                ]
+            ]
+        },
+        {
+            "input": "&int;",
+            "description": "Named entity: int; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u222b"
+                ]
+            ]
+        },
+        {
+            "input": "&intcal",
+            "description": "Bad named entity: intcal without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&intcal"
+                ]
+            ]
+        },
+        {
+            "input": "&intcal;",
+            "description": "Named entity: intcal; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22ba"
+                ]
+            ]
+        },
+        {
+            "input": "&integers",
+            "description": "Bad named entity: integers without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&integers"
+                ]
+            ]
+        },
+        {
+            "input": "&integers;",
+            "description": "Named entity: integers; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2124"
+                ]
+            ]
+        },
+        {
+            "input": "&intercal",
+            "description": "Bad named entity: intercal without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&intercal"
+                ]
+            ]
+        },
+        {
+            "input": "&intercal;",
+            "description": "Named entity: intercal; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22ba"
+                ]
+            ]
+        },
+        {
+            "input": "&intlarhk",
+            "description": "Bad named entity: intlarhk without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&intlarhk"
+                ]
+            ]
+        },
+        {
+            "input": "&intlarhk;",
+            "description": "Named entity: intlarhk; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a17"
+                ]
+            ]
+        },
+        {
+            "input": "&intprod",
+            "description": "Bad named entity: intprod without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&intprod"
+                ]
+            ]
+        },
+        {
+            "input": "&intprod;",
+            "description": "Named entity: intprod; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a3c"
+                ]
+            ]
+        },
+        {
+            "input": "&iocy",
+            "description": "Bad named entity: iocy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&iocy"
+                ]
+            ]
+        },
+        {
+            "input": "&iocy;",
+            "description": "Named entity: iocy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0451"
+                ]
+            ]
+        },
+        {
+            "input": "&iogon",
+            "description": "Bad named entity: iogon without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&iogon"
+                ]
+            ]
+        },
+        {
+            "input": "&iogon;",
+            "description": "Named entity: iogon; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u012f"
+                ]
+            ]
+        },
+        {
+            "input": "&iopf",
+            "description": "Bad named entity: iopf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&iopf"
+                ]
+            ]
+        },
+        {
+            "input": "&iopf;",
+            "description": "Named entity: iopf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd5a"
+                ]
+            ]
+        },
+        {
+            "input": "&iota",
+            "description": "Bad named entity: iota without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&iota"
+                ]
+            ]
+        },
+        {
+            "input": "&iota;",
+            "description": "Named entity: iota; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03b9"
+                ]
+            ]
+        },
+        {
+            "input": "&iprod",
+            "description": "Bad named entity: iprod without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&iprod"
+                ]
+            ]
+        },
+        {
+            "input": "&iprod;",
+            "description": "Named entity: iprod; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a3c"
+                ]
+            ]
+        },
+        {
+            "input": "&iquest",
+            "description": "Named entity: iquest without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00bf"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 8 }
+            ]
+        },
+        {
+            "input": "&iquest;",
+            "description": "Named entity: iquest; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00bf"
+                ]
+            ]
+        },
+        {
+            "input": "&iscr",
+            "description": "Bad named entity: iscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&iscr"
+                ]
+            ]
+        },
+        {
+            "input": "&iscr;",
+            "description": "Named entity: iscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udcbe"
+                ]
+            ]
+        },
+        {
+            "input": "&isin",
+            "description": "Bad named entity: isin without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&isin"
+                ]
+            ]
+        },
+        {
+            "input": "&isin;",
+            "description": "Named entity: isin; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2208"
+                ]
+            ]
+        },
+        {
+            "input": "&isinE",
+            "description": "Bad named entity: isinE without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&isinE"
+                ]
+            ]
+        },
+        {
+            "input": "&isinE;",
+            "description": "Named entity: isinE; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22f9"
+                ]
+            ]
+        },
+        {
+            "input": "&isindot",
+            "description": "Bad named entity: isindot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&isindot"
+                ]
+            ]
+        },
+        {
+            "input": "&isindot;",
+            "description": "Named entity: isindot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22f5"
+                ]
+            ]
+        },
+        {
+            "input": "&isins",
+            "description": "Bad named entity: isins without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&isins"
+                ]
+            ]
+        },
+        {
+            "input": "&isins;",
+            "description": "Named entity: isins; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22f4"
+                ]
+            ]
+        },
+        {
+            "input": "&isinsv",
+            "description": "Bad named entity: isinsv without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&isinsv"
+                ]
+            ]
+        },
+        {
+            "input": "&isinsv;",
+            "description": "Named entity: isinsv; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22f3"
+                ]
+            ]
+        },
+        {
+            "input": "&isinv",
+            "description": "Bad named entity: isinv without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&isinv"
+                ]
+            ]
+        },
+        {
+            "input": "&isinv;",
+            "description": "Named entity: isinv; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2208"
+                ]
+            ]
+        },
+        {
+            "input": "&it",
+            "description": "Bad named entity: it without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&it"
+                ]
+            ]
+        },
+        {
+            "input": "&it;",
+            "description": "Named entity: it; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2062"
+                ]
+            ]
+        },
+        {
+            "input": "&itilde",
+            "description": "Bad named entity: itilde without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&itilde"
+                ]
+            ]
+        },
+        {
+            "input": "&itilde;",
+            "description": "Named entity: itilde; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0129"
+                ]
+            ]
+        },
+        {
+            "input": "&iukcy",
+            "description": "Bad named entity: iukcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&iukcy"
+                ]
+            ]
+        },
+        {
+            "input": "&iukcy;",
+            "description": "Named entity: iukcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0456"
+                ]
+            ]
+        },
+        {
+            "input": "&iuml",
+            "description": "Named entity: iuml without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00ef"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 6 }
+            ]
+        },
+        {
+            "input": "&iuml;",
+            "description": "Named entity: iuml; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00ef"
+                ]
+            ]
+        },
+        {
+            "input": "&jcirc",
+            "description": "Bad named entity: jcirc without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&jcirc"
+                ]
+            ]
+        },
+        {
+            "input": "&jcirc;",
+            "description": "Named entity: jcirc; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0135"
+                ]
+            ]
+        },
+        {
+            "input": "&jcy",
+            "description": "Bad named entity: jcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&jcy"
+                ]
+            ]
+        },
+        {
+            "input": "&jcy;",
+            "description": "Named entity: jcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0439"
+                ]
+            ]
+        },
+        {
+            "input": "&jfr",
+            "description": "Bad named entity: jfr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&jfr"
+                ]
+            ]
+        },
+        {
+            "input": "&jfr;",
+            "description": "Named entity: jfr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd27"
+                ]
+            ]
+        },
+        {
+            "input": "&jmath",
+            "description": "Bad named entity: jmath without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&jmath"
+                ]
+            ]
+        },
+        {
+            "input": "&jmath;",
+            "description": "Named entity: jmath; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0237"
+                ]
+            ]
+        },
+        {
+            "input": "&jopf",
+            "description": "Bad named entity: jopf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&jopf"
+                ]
+            ]
+        },
+        {
+            "input": "&jopf;",
+            "description": "Named entity: jopf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd5b"
+                ]
+            ]
+        },
+        {
+            "input": "&jscr",
+            "description": "Bad named entity: jscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&jscr"
+                ]
+            ]
+        },
+        {
+            "input": "&jscr;",
+            "description": "Named entity: jscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udcbf"
+                ]
+            ]
+        },
+        {
+            "input": "&jsercy",
+            "description": "Bad named entity: jsercy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&jsercy"
+                ]
+            ]
+        },
+        {
+            "input": "&jsercy;",
+            "description": "Named entity: jsercy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0458"
+                ]
+            ]
+        },
+        {
+            "input": "&jukcy",
+            "description": "Bad named entity: jukcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&jukcy"
+                ]
+            ]
+        },
+        {
+            "input": "&jukcy;",
+            "description": "Named entity: jukcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0454"
+                ]
+            ]
+        },
+        {
+            "input": "&kappa",
+            "description": "Bad named entity: kappa without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&kappa"
+                ]
+            ]
+        },
+        {
+            "input": "&kappa;",
+            "description": "Named entity: kappa; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03ba"
+                ]
+            ]
+        },
+        {
+            "input": "&kappav",
+            "description": "Bad named entity: kappav without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&kappav"
+                ]
+            ]
+        },
+        {
+            "input": "&kappav;",
+            "description": "Named entity: kappav; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03f0"
+                ]
+            ]
+        },
+        {
+            "input": "&kcedil",
+            "description": "Bad named entity: kcedil without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&kcedil"
+                ]
+            ]
+        },
+        {
+            "input": "&kcedil;",
+            "description": "Named entity: kcedil; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0137"
+                ]
+            ]
+        },
+        {
+            "input": "&kcy",
+            "description": "Bad named entity: kcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&kcy"
+                ]
+            ]
+        },
+        {
+            "input": "&kcy;",
+            "description": "Named entity: kcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u043a"
+                ]
+            ]
+        },
+        {
+            "input": "&kfr",
+            "description": "Bad named entity: kfr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&kfr"
+                ]
+            ]
+        },
+        {
+            "input": "&kfr;",
+            "description": "Named entity: kfr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd28"
+                ]
+            ]
+        },
+        {
+            "input": "&kgreen",
+            "description": "Bad named entity: kgreen without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&kgreen"
+                ]
+            ]
+        },
+        {
+            "input": "&kgreen;",
+            "description": "Named entity: kgreen; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0138"
+                ]
+            ]
+        },
+        {
+            "input": "&khcy",
+            "description": "Bad named entity: khcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&khcy"
+                ]
+            ]
+        },
+        {
+            "input": "&khcy;",
+            "description": "Named entity: khcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0445"
+                ]
+            ]
+        },
+        {
+            "input": "&kjcy",
+            "description": "Bad named entity: kjcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&kjcy"
+                ]
+            ]
+        },
+        {
+            "input": "&kjcy;",
+            "description": "Named entity: kjcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u045c"
+                ]
+            ]
+        },
+        {
+            "input": "&kopf",
+            "description": "Bad named entity: kopf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&kopf"
+                ]
+            ]
+        },
+        {
+            "input": "&kopf;",
+            "description": "Named entity: kopf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd5c"
+                ]
+            ]
+        },
+        {
+            "input": "&kscr",
+            "description": "Bad named entity: kscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&kscr"
+                ]
+            ]
+        },
+        {
+            "input": "&kscr;",
+            "description": "Named entity: kscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udcc0"
+                ]
+            ]
+        },
+        {
+            "input": "&lAarr",
+            "description": "Bad named entity: lAarr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lAarr"
+                ]
+            ]
+        },
+        {
+            "input": "&lAarr;",
+            "description": "Named entity: lAarr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21da"
+                ]
+            ]
+        },
+        {
+            "input": "&lArr",
+            "description": "Bad named entity: lArr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lArr"
+                ]
+            ]
+        },
+        {
+            "input": "&lArr;",
+            "description": "Named entity: lArr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21d0"
+                ]
+            ]
+        },
+        {
+            "input": "&lAtail",
+            "description": "Bad named entity: lAtail without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lAtail"
+                ]
+            ]
+        },
+        {
+            "input": "&lAtail;",
+            "description": "Named entity: lAtail; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u291b"
+                ]
+            ]
+        },
+        {
+            "input": "&lBarr",
+            "description": "Bad named entity: lBarr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lBarr"
+                ]
+            ]
+        },
+        {
+            "input": "&lBarr;",
+            "description": "Named entity: lBarr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u290e"
+                ]
+            ]
+        },
+        {
+            "input": "&lE",
+            "description": "Bad named entity: lE without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lE"
+                ]
+            ]
+        },
+        {
+            "input": "&lE;",
+            "description": "Named entity: lE; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2266"
+                ]
+            ]
+        },
+        {
+            "input": "&lEg",
+            "description": "Bad named entity: lEg without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lEg"
+                ]
+            ]
+        },
+        {
+            "input": "&lEg;",
+            "description": "Named entity: lEg; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a8b"
+                ]
+            ]
+        },
+        {
+            "input": "&lHar",
+            "description": "Bad named entity: lHar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lHar"
+                ]
+            ]
+        },
+        {
+            "input": "&lHar;",
+            "description": "Named entity: lHar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2962"
+                ]
+            ]
+        },
+        {
+            "input": "&lacute",
+            "description": "Bad named entity: lacute without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lacute"
+                ]
+            ]
+        },
+        {
+            "input": "&lacute;",
+            "description": "Named entity: lacute; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u013a"
+                ]
+            ]
+        },
+        {
+            "input": "&laemptyv",
+            "description": "Bad named entity: laemptyv without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&laemptyv"
+                ]
+            ]
+        },
+        {
+            "input": "&laemptyv;",
+            "description": "Named entity: laemptyv; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29b4"
+                ]
+            ]
+        },
+        {
+            "input": "&lagran",
+            "description": "Bad named entity: lagran without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lagran"
+                ]
+            ]
+        },
+        {
+            "input": "&lagran;",
+            "description": "Named entity: lagran; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2112"
+                ]
+            ]
+        },
+        {
+            "input": "&lambda",
+            "description": "Bad named entity: lambda without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lambda"
+                ]
+            ]
+        },
+        {
+            "input": "&lambda;",
+            "description": "Named entity: lambda; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03bb"
+                ]
+            ]
+        },
+        {
+            "input": "&lang",
+            "description": "Bad named entity: lang without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lang"
+                ]
+            ]
+        },
+        {
+            "input": "&lang;",
+            "description": "Named entity: lang; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u27e8"
+                ]
+            ]
+        },
+        {
+            "input": "&langd",
+            "description": "Bad named entity: langd without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&langd"
+                ]
+            ]
+        },
+        {
+            "input": "&langd;",
+            "description": "Named entity: langd; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2991"
+                ]
+            ]
+        },
+        {
+            "input": "&langle",
+            "description": "Bad named entity: langle without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&langle"
+                ]
+            ]
+        },
+        {
+            "input": "&langle;",
+            "description": "Named entity: langle; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u27e8"
+                ]
+            ]
+        },
+        {
+            "input": "&lap",
+            "description": "Bad named entity: lap without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lap"
+                ]
+            ]
+        },
+        {
+            "input": "&lap;",
+            "description": "Named entity: lap; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a85"
+                ]
+            ]
+        },
+        {
+            "input": "&laquo",
+            "description": "Named entity: laquo without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00ab"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 7 }
+            ]
+        },
+        {
+            "input": "&laquo;",
+            "description": "Named entity: laquo; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00ab"
+                ]
+            ]
+        },
+        {
+            "input": "&larr",
+            "description": "Bad named entity: larr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&larr"
+                ]
+            ]
+        },
+        {
+            "input": "&larr;",
+            "description": "Named entity: larr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2190"
+                ]
+            ]
+        },
+        {
+            "input": "&larrb",
+            "description": "Bad named entity: larrb without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&larrb"
+                ]
+            ]
+        },
+        {
+            "input": "&larrb;",
+            "description": "Named entity: larrb; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21e4"
+                ]
+            ]
+        },
+        {
+            "input": "&larrbfs",
+            "description": "Bad named entity: larrbfs without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&larrbfs"
+                ]
+            ]
+        },
+        {
+            "input": "&larrbfs;",
+            "description": "Named entity: larrbfs; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u291f"
+                ]
+            ]
+        },
+        {
+            "input": "&larrfs",
+            "description": "Bad named entity: larrfs without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&larrfs"
+                ]
+            ]
+        },
+        {
+            "input": "&larrfs;",
+            "description": "Named entity: larrfs; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u291d"
+                ]
+            ]
+        },
+        {
+            "input": "&larrhk",
+            "description": "Bad named entity: larrhk without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&larrhk"
+                ]
+            ]
+        },
+        {
+            "input": "&larrhk;",
+            "description": "Named entity: larrhk; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21a9"
+                ]
+            ]
+        },
+        {
+            "input": "&larrlp",
+            "description": "Bad named entity: larrlp without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&larrlp"
+                ]
+            ]
+        },
+        {
+            "input": "&larrlp;",
+            "description": "Named entity: larrlp; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21ab"
+                ]
+            ]
+        },
+        {
+            "input": "&larrpl",
+            "description": "Bad named entity: larrpl without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&larrpl"
+                ]
+            ]
+        },
+        {
+            "input": "&larrpl;",
+            "description": "Named entity: larrpl; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2939"
+                ]
+            ]
+        },
+        {
+            "input": "&larrsim",
+            "description": "Bad named entity: larrsim without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&larrsim"
+                ]
+            ]
+        },
+        {
+            "input": "&larrsim;",
+            "description": "Named entity: larrsim; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2973"
+                ]
+            ]
+        },
+        {
+            "input": "&larrtl",
+            "description": "Bad named entity: larrtl without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&larrtl"
+                ]
+            ]
+        },
+        {
+            "input": "&larrtl;",
+            "description": "Named entity: larrtl; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21a2"
+                ]
+            ]
+        },
+        {
+            "input": "&lat",
+            "description": "Bad named entity: lat without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lat"
+                ]
+            ]
+        },
+        {
+            "input": "&lat;",
+            "description": "Named entity: lat; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2aab"
+                ]
+            ]
+        },
+        {
+            "input": "&latail",
+            "description": "Bad named entity: latail without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&latail"
+                ]
+            ]
+        },
+        {
+            "input": "&latail;",
+            "description": "Named entity: latail; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2919"
+                ]
+            ]
+        },
+        {
+            "input": "&late",
+            "description": "Bad named entity: late without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&late"
+                ]
+            ]
+        },
+        {
+            "input": "&late;",
+            "description": "Named entity: late; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2aad"
+                ]
+            ]
+        },
+        {
+            "input": "&lates",
+            "description": "Bad named entity: lates without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lates"
+                ]
+            ]
+        },
+        {
+            "input": "&lates;",
+            "description": "Named entity: lates; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2aad\ufe00"
+                ]
+            ]
+        },
+        {
+            "input": "&lbarr",
+            "description": "Bad named entity: lbarr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lbarr"
+                ]
+            ]
+        },
+        {
+            "input": "&lbarr;",
+            "description": "Named entity: lbarr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u290c"
+                ]
+            ]
+        },
+        {
+            "input": "&lbbrk",
+            "description": "Bad named entity: lbbrk without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lbbrk"
+                ]
+            ]
+        },
+        {
+            "input": "&lbbrk;",
+            "description": "Named entity: lbbrk; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2772"
+                ]
+            ]
+        },
+        {
+            "input": "&lbrace",
+            "description": "Bad named entity: lbrace without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lbrace"
+                ]
+            ]
+        },
+        {
+            "input": "&lbrace;",
+            "description": "Named entity: lbrace; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "{"
+                ]
+            ]
+        },
+        {
+            "input": "&lbrack",
+            "description": "Bad named entity: lbrack without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lbrack"
+                ]
+            ]
+        },
+        {
+            "input": "&lbrack;",
+            "description": "Named entity: lbrack; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "["
+                ]
+            ]
+        },
+        {
+            "input": "&lbrke",
+            "description": "Bad named entity: lbrke without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lbrke"
+                ]
+            ]
+        },
+        {
+            "input": "&lbrke;",
+            "description": "Named entity: lbrke; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u298b"
+                ]
+            ]
+        },
+        {
+            "input": "&lbrksld",
+            "description": "Bad named entity: lbrksld without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lbrksld"
+                ]
+            ]
+        },
+        {
+            "input": "&lbrksld;",
+            "description": "Named entity: lbrksld; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u298f"
+                ]
+            ]
+        },
+        {
+            "input": "&lbrkslu",
+            "description": "Bad named entity: lbrkslu without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lbrkslu"
+                ]
+            ]
+        },
+        {
+            "input": "&lbrkslu;",
+            "description": "Named entity: lbrkslu; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u298d"
+                ]
+            ]
+        },
+        {
+            "input": "&lcaron",
+            "description": "Bad named entity: lcaron without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lcaron"
+                ]
+            ]
+        },
+        {
+            "input": "&lcaron;",
+            "description": "Named entity: lcaron; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u013e"
+                ]
+            ]
+        },
+        {
+            "input": "&lcedil",
+            "description": "Bad named entity: lcedil without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lcedil"
+                ]
+            ]
+        },
+        {
+            "input": "&lcedil;",
+            "description": "Named entity: lcedil; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u013c"
+                ]
+            ]
+        },
+        {
+            "input": "&lceil",
+            "description": "Bad named entity: lceil without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lceil"
+                ]
+            ]
+        },
+        {
+            "input": "&lceil;",
+            "description": "Named entity: lceil; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2308"
+                ]
+            ]
+        },
+        {
+            "input": "&lcub",
+            "description": "Bad named entity: lcub without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lcub"
+                ]
+            ]
+        },
+        {
+            "input": "&lcub;",
+            "description": "Named entity: lcub; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "{"
+                ]
+            ]
+        },
+        {
+            "input": "&lcy",
+            "description": "Bad named entity: lcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lcy"
+                ]
+            ]
+        },
+        {
+            "input": "&lcy;",
+            "description": "Named entity: lcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u043b"
+                ]
+            ]
+        },
+        {
+            "input": "&ldca",
+            "description": "Bad named entity: ldca without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ldca"
+                ]
+            ]
+        },
+        {
+            "input": "&ldca;",
+            "description": "Named entity: ldca; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2936"
+                ]
+            ]
+        },
+        {
+            "input": "&ldquo",
+            "description": "Bad named entity: ldquo without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ldquo"
+                ]
+            ]
+        },
+        {
+            "input": "&ldquo;",
+            "description": "Named entity: ldquo; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u201c"
+                ]
+            ]
+        },
+        {
+            "input": "&ldquor",
+            "description": "Bad named entity: ldquor without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ldquor"
+                ]
+            ]
+        },
+        {
+            "input": "&ldquor;",
+            "description": "Named entity: ldquor; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u201e"
+                ]
+            ]
+        },
+        {
+            "input": "&ldrdhar",
+            "description": "Bad named entity: ldrdhar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ldrdhar"
+                ]
+            ]
+        },
+        {
+            "input": "&ldrdhar;",
+            "description": "Named entity: ldrdhar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2967"
+                ]
+            ]
+        },
+        {
+            "input": "&ldrushar",
+            "description": "Bad named entity: ldrushar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ldrushar"
+                ]
+            ]
+        },
+        {
+            "input": "&ldrushar;",
+            "description": "Named entity: ldrushar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u294b"
+                ]
+            ]
+        },
+        {
+            "input": "&ldsh",
+            "description": "Bad named entity: ldsh without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ldsh"
+                ]
+            ]
+        },
+        {
+            "input": "&ldsh;",
+            "description": "Named entity: ldsh; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21b2"
+                ]
+            ]
+        },
+        {
+            "input": "&le",
+            "description": "Bad named entity: le without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&le"
+                ]
+            ]
+        },
+        {
+            "input": "&le;",
+            "description": "Named entity: le; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2264"
+                ]
+            ]
+        },
+        {
+            "input": "&leftarrow",
+            "description": "Bad named entity: leftarrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&leftarrow"
+                ]
+            ]
+        },
+        {
+            "input": "&leftarrow;",
+            "description": "Named entity: leftarrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2190"
+                ]
+            ]
+        },
+        {
+            "input": "&leftarrowtail",
+            "description": "Bad named entity: leftarrowtail without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&leftarrowtail"
+                ]
+            ]
+        },
+        {
+            "input": "&leftarrowtail;",
+            "description": "Named entity: leftarrowtail; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21a2"
+                ]
+            ]
+        },
+        {
+            "input": "&leftharpoondown",
+            "description": "Bad named entity: leftharpoondown without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&leftharpoondown"
+                ]
+            ]
+        },
+        {
+            "input": "&leftharpoondown;",
+            "description": "Named entity: leftharpoondown; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21bd"
+                ]
+            ]
+        },
+        {
+            "input": "&leftharpoonup",
+            "description": "Bad named entity: leftharpoonup without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&leftharpoonup"
+                ]
+            ]
+        },
+        {
+            "input": "&leftharpoonup;",
+            "description": "Named entity: leftharpoonup; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21bc"
+                ]
+            ]
+        },
+        {
+            "input": "&leftleftarrows",
+            "description": "Bad named entity: leftleftarrows without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&leftleftarrows"
+                ]
+            ]
+        },
+        {
+            "input": "&leftleftarrows;",
+            "description": "Named entity: leftleftarrows; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21c7"
+                ]
+            ]
+        },
+        {
+            "input": "&leftrightarrow",
+            "description": "Bad named entity: leftrightarrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&leftrightarrow"
+                ]
+            ]
+        },
+        {
+            "input": "&leftrightarrow;",
+            "description": "Named entity: leftrightarrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2194"
+                ]
+            ]
+        },
+        {
+            "input": "&leftrightarrows",
+            "description": "Bad named entity: leftrightarrows without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&leftrightarrows"
+                ]
+            ]
+        },
+        {
+            "input": "&leftrightarrows;",
+            "description": "Named entity: leftrightarrows; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21c6"
+                ]
+            ]
+        },
+        {
+            "input": "&leftrightharpoons",
+            "description": "Bad named entity: leftrightharpoons without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&leftrightharpoons"
+                ]
+            ]
+        },
+        {
+            "input": "&leftrightharpoons;",
+            "description": "Named entity: leftrightharpoons; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21cb"
+                ]
+            ]
+        },
+        {
+            "input": "&leftrightsquigarrow",
+            "description": "Bad named entity: leftrightsquigarrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&leftrightsquigarrow"
+                ]
+            ]
+        },
+        {
+            "input": "&leftrightsquigarrow;",
+            "description": "Named entity: leftrightsquigarrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21ad"
+                ]
+            ]
+        },
+        {
+            "input": "&leftthreetimes",
+            "description": "Bad named entity: leftthreetimes without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&leftthreetimes"
+                ]
+            ]
+        },
+        {
+            "input": "&leftthreetimes;",
+            "description": "Named entity: leftthreetimes; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22cb"
+                ]
+            ]
+        },
+        {
+            "input": "&leg",
+            "description": "Bad named entity: leg without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&leg"
+                ]
+            ]
+        },
+        {
+            "input": "&leg;",
+            "description": "Named entity: leg; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22da"
+                ]
+            ]
+        },
+        {
+            "input": "&leq",
+            "description": "Bad named entity: leq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&leq"
+                ]
+            ]
+        },
+        {
+            "input": "&leq;",
+            "description": "Named entity: leq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2264"
+                ]
+            ]
+        },
+        {
+            "input": "&leqq",
+            "description": "Bad named entity: leqq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&leqq"
+                ]
+            ]
+        },
+        {
+            "input": "&leqq;",
+            "description": "Named entity: leqq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2266"
+                ]
+            ]
+        },
+        {
+            "input": "&leqslant",
+            "description": "Bad named entity: leqslant without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&leqslant"
+                ]
+            ]
+        },
+        {
+            "input": "&leqslant;",
+            "description": "Named entity: leqslant; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a7d"
+                ]
+            ]
+        },
+        {
+            "input": "&les",
+            "description": "Bad named entity: les without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&les"
+                ]
+            ]
+        },
+        {
+            "input": "&les;",
+            "description": "Named entity: les; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a7d"
+                ]
+            ]
+        },
+        {
+            "input": "&lescc",
+            "description": "Bad named entity: lescc without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lescc"
+                ]
+            ]
+        },
+        {
+            "input": "&lescc;",
+            "description": "Named entity: lescc; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2aa8"
+                ]
+            ]
+        },
+        {
+            "input": "&lesdot",
+            "description": "Bad named entity: lesdot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lesdot"
+                ]
+            ]
+        },
+        {
+            "input": "&lesdot;",
+            "description": "Named entity: lesdot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a7f"
+                ]
+            ]
+        },
+        {
+            "input": "&lesdoto",
+            "description": "Bad named entity: lesdoto without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lesdoto"
+                ]
+            ]
+        },
+        {
+            "input": "&lesdoto;",
+            "description": "Named entity: lesdoto; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a81"
+                ]
+            ]
+        },
+        {
+            "input": "&lesdotor",
+            "description": "Bad named entity: lesdotor without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lesdotor"
+                ]
+            ]
+        },
+        {
+            "input": "&lesdotor;",
+            "description": "Named entity: lesdotor; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a83"
+                ]
+            ]
+        },
+        {
+            "input": "&lesg",
+            "description": "Bad named entity: lesg without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lesg"
+                ]
+            ]
+        },
+        {
+            "input": "&lesg;",
+            "description": "Named entity: lesg; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22da\ufe00"
+                ]
+            ]
+        },
+        {
+            "input": "&lesges",
+            "description": "Bad named entity: lesges without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lesges"
+                ]
+            ]
+        },
+        {
+            "input": "&lesges;",
+            "description": "Named entity: lesges; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a93"
+                ]
+            ]
+        },
+        {
+            "input": "&lessapprox",
+            "description": "Bad named entity: lessapprox without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lessapprox"
+                ]
+            ]
+        },
+        {
+            "input": "&lessapprox;",
+            "description": "Named entity: lessapprox; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a85"
+                ]
+            ]
+        },
+        {
+            "input": "&lessdot",
+            "description": "Bad named entity: lessdot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lessdot"
+                ]
+            ]
+        },
+        {
+            "input": "&lessdot;",
+            "description": "Named entity: lessdot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22d6"
+                ]
+            ]
+        },
+        {
+            "input": "&lesseqgtr",
+            "description": "Bad named entity: lesseqgtr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lesseqgtr"
+                ]
+            ]
+        },
+        {
+            "input": "&lesseqgtr;",
+            "description": "Named entity: lesseqgtr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22da"
+                ]
+            ]
+        },
+        {
+            "input": "&lesseqqgtr",
+            "description": "Bad named entity: lesseqqgtr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lesseqqgtr"
+                ]
+            ]
+        },
+        {
+            "input": "&lesseqqgtr;",
+            "description": "Named entity: lesseqqgtr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a8b"
+                ]
+            ]
+        },
+        {
+            "input": "&lessgtr",
+            "description": "Bad named entity: lessgtr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lessgtr"
+                ]
+            ]
+        },
+        {
+            "input": "&lessgtr;",
+            "description": "Named entity: lessgtr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2276"
+                ]
+            ]
+        },
+        {
+            "input": "&lesssim",
+            "description": "Bad named entity: lesssim without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lesssim"
+                ]
+            ]
+        },
+        {
+            "input": "&lesssim;",
+            "description": "Named entity: lesssim; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2272"
+                ]
+            ]
+        },
+        {
+            "input": "&lfisht",
+            "description": "Bad named entity: lfisht without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lfisht"
+                ]
+            ]
+        },
+        {
+            "input": "&lfisht;",
+            "description": "Named entity: lfisht; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u297c"
+                ]
+            ]
+        },
+        {
+            "input": "&lfloor",
+            "description": "Bad named entity: lfloor without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lfloor"
+                ]
+            ]
+        },
+        {
+            "input": "&lfloor;",
+            "description": "Named entity: lfloor; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u230a"
+                ]
+            ]
+        },
+        {
+            "input": "&lfr",
+            "description": "Bad named entity: lfr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lfr"
+                ]
+            ]
+        },
+        {
+            "input": "&lfr;",
+            "description": "Named entity: lfr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd29"
+                ]
+            ]
+        },
+        {
+            "input": "&lg",
+            "description": "Bad named entity: lg without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lg"
+                ]
+            ]
+        },
+        {
+            "input": "&lg;",
+            "description": "Named entity: lg; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2276"
+                ]
+            ]
+        },
+        {
+            "input": "&lgE",
+            "description": "Bad named entity: lgE without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lgE"
+                ]
+            ]
+        },
+        {
+            "input": "&lgE;",
+            "description": "Named entity: lgE; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a91"
+                ]
+            ]
+        },
+        {
+            "input": "&lhard",
+            "description": "Bad named entity: lhard without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lhard"
+                ]
+            ]
+        },
+        {
+            "input": "&lhard;",
+            "description": "Named entity: lhard; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21bd"
+                ]
+            ]
+        },
+        {
+            "input": "&lharu",
+            "description": "Bad named entity: lharu without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lharu"
+                ]
+            ]
+        },
+        {
+            "input": "&lharu;",
+            "description": "Named entity: lharu; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21bc"
+                ]
+            ]
+        },
+        {
+            "input": "&lharul",
+            "description": "Bad named entity: lharul without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lharul"
+                ]
+            ]
+        },
+        {
+            "input": "&lharul;",
+            "description": "Named entity: lharul; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u296a"
+                ]
+            ]
+        },
+        {
+            "input": "&lhblk",
+            "description": "Bad named entity: lhblk without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lhblk"
+                ]
+            ]
+        },
+        {
+            "input": "&lhblk;",
+            "description": "Named entity: lhblk; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2584"
+                ]
+            ]
+        },
+        {
+            "input": "&ljcy",
+            "description": "Bad named entity: ljcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ljcy"
+                ]
+            ]
+        },
+        {
+            "input": "&ljcy;",
+            "description": "Named entity: ljcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0459"
+                ]
+            ]
+        },
+        {
+            "input": "&ll",
+            "description": "Bad named entity: ll without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ll"
+                ]
+            ]
+        },
+        {
+            "input": "&ll;",
+            "description": "Named entity: ll; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u226a"
+                ]
+            ]
+        },
+        {
+            "input": "&llarr",
+            "description": "Bad named entity: llarr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&llarr"
+                ]
+            ]
+        },
+        {
+            "input": "&llarr;",
+            "description": "Named entity: llarr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21c7"
+                ]
+            ]
+        },
+        {
+            "input": "&llcorner",
+            "description": "Bad named entity: llcorner without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&llcorner"
+                ]
+            ]
+        },
+        {
+            "input": "&llcorner;",
+            "description": "Named entity: llcorner; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u231e"
+                ]
+            ]
+        },
+        {
+            "input": "&llhard",
+            "description": "Bad named entity: llhard without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&llhard"
+                ]
+            ]
+        },
+        {
+            "input": "&llhard;",
+            "description": "Named entity: llhard; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u296b"
+                ]
+            ]
+        },
+        {
+            "input": "&lltri",
+            "description": "Bad named entity: lltri without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lltri"
+                ]
+            ]
+        },
+        {
+            "input": "&lltri;",
+            "description": "Named entity: lltri; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u25fa"
+                ]
+            ]
+        },
+        {
+            "input": "&lmidot",
+            "description": "Bad named entity: lmidot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lmidot"
+                ]
+            ]
+        },
+        {
+            "input": "&lmidot;",
+            "description": "Named entity: lmidot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0140"
+                ]
+            ]
+        },
+        {
+            "input": "&lmoust",
+            "description": "Bad named entity: lmoust without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lmoust"
+                ]
+            ]
+        },
+        {
+            "input": "&lmoust;",
+            "description": "Named entity: lmoust; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u23b0"
+                ]
+            ]
+        },
+        {
+            "input": "&lmoustache",
+            "description": "Bad named entity: lmoustache without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lmoustache"
+                ]
+            ]
+        },
+        {
+            "input": "&lmoustache;",
+            "description": "Named entity: lmoustache; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u23b0"
+                ]
+            ]
+        },
+        {
+            "input": "&lnE",
+            "description": "Bad named entity: lnE without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lnE"
+                ]
+            ]
+        },
+        {
+            "input": "&lnE;",
+            "description": "Named entity: lnE; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2268"
+                ]
+            ]
+        },
+        {
+            "input": "&lnap",
+            "description": "Bad named entity: lnap without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lnap"
+                ]
+            ]
+        },
+        {
+            "input": "&lnap;",
+            "description": "Named entity: lnap; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a89"
+                ]
+            ]
+        },
+        {
+            "input": "&lnapprox",
+            "description": "Bad named entity: lnapprox without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lnapprox"
+                ]
+            ]
+        },
+        {
+            "input": "&lnapprox;",
+            "description": "Named entity: lnapprox; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a89"
+                ]
+            ]
+        },
+        {
+            "input": "&lne",
+            "description": "Bad named entity: lne without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lne"
+                ]
+            ]
+        },
+        {
+            "input": "&lne;",
+            "description": "Named entity: lne; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a87"
+                ]
+            ]
+        },
+        {
+            "input": "&lneq",
+            "description": "Bad named entity: lneq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lneq"
+                ]
+            ]
+        },
+        {
+            "input": "&lneq;",
+            "description": "Named entity: lneq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a87"
+                ]
+            ]
+        },
+        {
+            "input": "&lneqq",
+            "description": "Bad named entity: lneqq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lneqq"
+                ]
+            ]
+        },
+        {
+            "input": "&lneqq;",
+            "description": "Named entity: lneqq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2268"
+                ]
+            ]
+        },
+        {
+            "input": "&lnsim",
+            "description": "Bad named entity: lnsim without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lnsim"
+                ]
+            ]
+        },
+        {
+            "input": "&lnsim;",
+            "description": "Named entity: lnsim; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22e6"
+                ]
+            ]
+        },
+        {
+            "input": "&loang",
+            "description": "Bad named entity: loang without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&loang"
+                ]
+            ]
+        },
+        {
+            "input": "&loang;",
+            "description": "Named entity: loang; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u27ec"
+                ]
+            ]
+        },
+        {
+            "input": "&loarr",
+            "description": "Bad named entity: loarr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&loarr"
+                ]
+            ]
+        },
+        {
+            "input": "&loarr;",
+            "description": "Named entity: loarr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21fd"
+                ]
+            ]
+        },
+        {
+            "input": "&lobrk",
+            "description": "Bad named entity: lobrk without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lobrk"
+                ]
+            ]
+        },
+        {
+            "input": "&lobrk;",
+            "description": "Named entity: lobrk; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u27e6"
+                ]
+            ]
+        },
+        {
+            "input": "&longleftarrow",
+            "description": "Bad named entity: longleftarrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&longleftarrow"
+                ]
+            ]
+        },
+        {
+            "input": "&longleftarrow;",
+            "description": "Named entity: longleftarrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u27f5"
+                ]
+            ]
+        },
+        {
+            "input": "&longleftrightarrow",
+            "description": "Bad named entity: longleftrightarrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&longleftrightarrow"
+                ]
+            ]
+        },
+        {
+            "input": "&longleftrightarrow;",
+            "description": "Named entity: longleftrightarrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u27f7"
+                ]
+            ]
+        },
+        {
+            "input": "&longmapsto",
+            "description": "Bad named entity: longmapsto without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&longmapsto"
+                ]
+            ]
+        },
+        {
+            "input": "&longmapsto;",
+            "description": "Named entity: longmapsto; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u27fc"
+                ]
+            ]
+        },
+        {
+            "input": "&longrightarrow",
+            "description": "Bad named entity: longrightarrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&longrightarrow"
+                ]
+            ]
+        },
+        {
+            "input": "&longrightarrow;",
+            "description": "Named entity: longrightarrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u27f6"
+                ]
+            ]
+        },
+        {
+            "input": "&looparrowleft",
+            "description": "Bad named entity: looparrowleft without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&looparrowleft"
+                ]
+            ]
+        },
+        {
+            "input": "&looparrowleft;",
+            "description": "Named entity: looparrowleft; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21ab"
+                ]
+            ]
+        },
+        {
+            "input": "&looparrowright",
+            "description": "Bad named entity: looparrowright without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&looparrowright"
+                ]
+            ]
+        },
+        {
+            "input": "&looparrowright;",
+            "description": "Named entity: looparrowright; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21ac"
+                ]
+            ]
+        },
+        {
+            "input": "&lopar",
+            "description": "Bad named entity: lopar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lopar"
+                ]
+            ]
+        },
+        {
+            "input": "&lopar;",
+            "description": "Named entity: lopar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2985"
+                ]
+            ]
+        },
+        {
+            "input": "&lopf",
+            "description": "Bad named entity: lopf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lopf"
+                ]
+            ]
+        },
+        {
+            "input": "&lopf;",
+            "description": "Named entity: lopf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd5d"
+                ]
+            ]
+        },
+        {
+            "input": "&loplus",
+            "description": "Bad named entity: loplus without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&loplus"
+                ]
+            ]
+        },
+        {
+            "input": "&loplus;",
+            "description": "Named entity: loplus; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a2d"
+                ]
+            ]
+        },
+        {
+            "input": "&lotimes",
+            "description": "Bad named entity: lotimes without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lotimes"
+                ]
+            ]
+        },
+        {
+            "input": "&lotimes;",
+            "description": "Named entity: lotimes; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a34"
+                ]
+            ]
+        },
+        {
+            "input": "&lowast",
+            "description": "Bad named entity: lowast without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lowast"
+                ]
+            ]
+        },
+        {
+            "input": "&lowast;",
+            "description": "Named entity: lowast; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2217"
+                ]
+            ]
+        },
+        {
+            "input": "&lowbar",
+            "description": "Bad named entity: lowbar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lowbar"
+                ]
+            ]
+        },
+        {
+            "input": "&lowbar;",
+            "description": "Named entity: lowbar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "_"
+                ]
+            ]
+        },
+        {
+            "input": "&loz",
+            "description": "Bad named entity: loz without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&loz"
+                ]
+            ]
+        },
+        {
+            "input": "&loz;",
+            "description": "Named entity: loz; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u25ca"
+                ]
+            ]
+        },
+        {
+            "input": "&lozenge",
+            "description": "Bad named entity: lozenge without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lozenge"
+                ]
+            ]
+        },
+        {
+            "input": "&lozenge;",
+            "description": "Named entity: lozenge; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u25ca"
+                ]
+            ]
+        },
+        {
+            "input": "&lozf",
+            "description": "Bad named entity: lozf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lozf"
+                ]
+            ]
+        },
+        {
+            "input": "&lozf;",
+            "description": "Named entity: lozf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29eb"
+                ]
+            ]
+        },
+        {
+            "input": "&lpar",
+            "description": "Bad named entity: lpar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lpar"
+                ]
+            ]
+        },
+        {
+            "input": "&lpar;",
+            "description": "Named entity: lpar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "("
+                ]
+            ]
+        },
+        {
+            "input": "&lparlt",
+            "description": "Bad named entity: lparlt without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lparlt"
+                ]
+            ]
+        },
+        {
+            "input": "&lparlt;",
+            "description": "Named entity: lparlt; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2993"
+                ]
+            ]
+        },
+        {
+            "input": "&lrarr",
+            "description": "Bad named entity: lrarr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lrarr"
+                ]
+            ]
+        },
+        {
+            "input": "&lrarr;",
+            "description": "Named entity: lrarr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21c6"
+                ]
+            ]
+        },
+        {
+            "input": "&lrcorner",
+            "description": "Bad named entity: lrcorner without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lrcorner"
+                ]
+            ]
+        },
+        {
+            "input": "&lrcorner;",
+            "description": "Named entity: lrcorner; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u231f"
+                ]
+            ]
+        },
+        {
+            "input": "&lrhar",
+            "description": "Bad named entity: lrhar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lrhar"
+                ]
+            ]
+        },
+        {
+            "input": "&lrhar;",
+            "description": "Named entity: lrhar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21cb"
+                ]
+            ]
+        },
+        {
+            "input": "&lrhard",
+            "description": "Bad named entity: lrhard without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lrhard"
+                ]
+            ]
+        },
+        {
+            "input": "&lrhard;",
+            "description": "Named entity: lrhard; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u296d"
+                ]
+            ]
+        },
+        {
+            "input": "&lrm",
+            "description": "Bad named entity: lrm without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lrm"
+                ]
+            ]
+        },
+        {
+            "input": "&lrm;",
+            "description": "Named entity: lrm; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u200e"
+                ]
+            ]
+        },
+        {
+            "input": "&lrtri",
+            "description": "Bad named entity: lrtri without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lrtri"
+                ]
+            ]
+        },
+        {
+            "input": "&lrtri;",
+            "description": "Named entity: lrtri; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22bf"
+                ]
+            ]
+        },
+        {
+            "input": "&lsaquo",
+            "description": "Bad named entity: lsaquo without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lsaquo"
+                ]
+            ]
+        },
+        {
+            "input": "&lsaquo;",
+            "description": "Named entity: lsaquo; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2039"
+                ]
+            ]
+        },
+        {
+            "input": "&lscr",
+            "description": "Bad named entity: lscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lscr"
+                ]
+            ]
+        },
+        {
+            "input": "&lscr;",
+            "description": "Named entity: lscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udcc1"
+                ]
+            ]
+        },
+        {
+            "input": "&lsh",
+            "description": "Bad named entity: lsh without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lsh"
+                ]
+            ]
+        },
+        {
+            "input": "&lsh;",
+            "description": "Named entity: lsh; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21b0"
+                ]
+            ]
+        },
+        {
+            "input": "&lsim",
+            "description": "Bad named entity: lsim without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lsim"
+                ]
+            ]
+        },
+        {
+            "input": "&lsim;",
+            "description": "Named entity: lsim; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2272"
+                ]
+            ]
+        },
+        {
+            "input": "&lsime",
+            "description": "Bad named entity: lsime without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lsime"
+                ]
+            ]
+        },
+        {
+            "input": "&lsime;",
+            "description": "Named entity: lsime; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a8d"
+                ]
+            ]
+        },
+        {
+            "input": "&lsimg",
+            "description": "Bad named entity: lsimg without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lsimg"
+                ]
+            ]
+        },
+        {
+            "input": "&lsimg;",
+            "description": "Named entity: lsimg; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a8f"
+                ]
+            ]
+        },
+        {
+            "input": "&lsqb",
+            "description": "Bad named entity: lsqb without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lsqb"
+                ]
+            ]
+        },
+        {
+            "input": "&lsqb;",
+            "description": "Named entity: lsqb; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "["
+                ]
+            ]
+        },
+        {
+            "input": "&lsquo",
+            "description": "Bad named entity: lsquo without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lsquo"
+                ]
+            ]
+        },
+        {
+            "input": "&lsquo;",
+            "description": "Named entity: lsquo; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2018"
+                ]
+            ]
+        },
+        {
+            "input": "&lsquor",
+            "description": "Bad named entity: lsquor without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lsquor"
+                ]
+            ]
+        },
+        {
+            "input": "&lsquor;",
+            "description": "Named entity: lsquor; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u201a"
+                ]
+            ]
+        },
+        {
+            "input": "&lstrok",
+            "description": "Bad named entity: lstrok without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lstrok"
+                ]
+            ]
+        },
+        {
+            "input": "&lstrok;",
+            "description": "Named entity: lstrok; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0142"
+                ]
+            ]
+        },
+        {
+            "input": "&lt",
+            "description": "Named entity: lt without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "<"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 4 }
+            ]
+        },
+        {
+            "input": "&lt;",
+            "description": "Named entity: lt; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "<"
+                ]
+            ]
+        },
+        {
+            "input": "&ltcc;",
+            "description": "Named entity: ltcc; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2aa6"
+                ]
+            ]
+        },
+        {
+            "input": "&ltcir;",
+            "description": "Named entity: ltcir; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a79"
+                ]
+            ]
+        },
+        {
+            "input": "&ltdot;",
+            "description": "Named entity: ltdot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22d6"
+                ]
+            ]
+        },
+        {
+            "input": "&lthree;",
+            "description": "Named entity: lthree; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22cb"
+                ]
+            ]
+        },
+        {
+            "input": "&ltimes;",
+            "description": "Named entity: ltimes; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22c9"
+                ]
+            ]
+        },
+        {
+            "input": "&ltlarr;",
+            "description": "Named entity: ltlarr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2976"
+                ]
+            ]
+        },
+        {
+            "input": "&ltquest;",
+            "description": "Named entity: ltquest; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a7b"
+                ]
+            ]
+        },
+        {
+            "input": "&ltrPar;",
+            "description": "Named entity: ltrPar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2996"
+                ]
+            ]
+        },
+        {
+            "input": "&ltri;",
+            "description": "Named entity: ltri; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u25c3"
+                ]
+            ]
+        },
+        {
+            "input": "&ltrie;",
+            "description": "Named entity: ltrie; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22b4"
+                ]
+            ]
+        },
+        {
+            "input": "&ltrif;",
+            "description": "Named entity: ltrif; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u25c2"
+                ]
+            ]
+        },
+        {
+            "input": "&lurdshar",
+            "description": "Bad named entity: lurdshar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lurdshar"
+                ]
+            ]
+        },
+        {
+            "input": "&lurdshar;",
+            "description": "Named entity: lurdshar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u294a"
+                ]
+            ]
+        },
+        {
+            "input": "&luruhar",
+            "description": "Bad named entity: luruhar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&luruhar"
+                ]
+            ]
+        },
+        {
+            "input": "&luruhar;",
+            "description": "Named entity: luruhar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2966"
+                ]
+            ]
+        },
+        {
+            "input": "&lvertneqq",
+            "description": "Bad named entity: lvertneqq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lvertneqq"
+                ]
+            ]
+        },
+        {
+            "input": "&lvertneqq;",
+            "description": "Named entity: lvertneqq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2268\ufe00"
+                ]
+            ]
+        },
+        {
+            "input": "&lvnE",
+            "description": "Bad named entity: lvnE without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&lvnE"
+                ]
+            ]
+        },
+        {
+            "input": "&lvnE;",
+            "description": "Named entity: lvnE; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2268\ufe00"
+                ]
+            ]
+        },
+        {
+            "input": "&mDDot",
+            "description": "Bad named entity: mDDot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&mDDot"
+                ]
+            ]
+        },
+        {
+            "input": "&mDDot;",
+            "description": "Named entity: mDDot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u223a"
+                ]
+            ]
+        },
+        {
+            "input": "&macr",
+            "description": "Named entity: macr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00af"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 6 }
+            ]
+        },
+        {
+            "input": "&macr;",
+            "description": "Named entity: macr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00af"
+                ]
+            ]
+        },
+        {
+            "input": "&male",
+            "description": "Bad named entity: male without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&male"
+                ]
+            ]
+        },
+        {
+            "input": "&male;",
+            "description": "Named entity: male; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2642"
+                ]
+            ]
+        },
+        {
+            "input": "&malt",
+            "description": "Bad named entity: malt without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&malt"
+                ]
+            ]
+        },
+        {
+            "input": "&malt;",
+            "description": "Named entity: malt; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2720"
+                ]
+            ]
+        },
+        {
+            "input": "&maltese",
+            "description": "Bad named entity: maltese without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&maltese"
+                ]
+            ]
+        },
+        {
+            "input": "&maltese;",
+            "description": "Named entity: maltese; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2720"
+                ]
+            ]
+        },
+        {
+            "input": "&map",
+            "description": "Bad named entity: map without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&map"
+                ]
+            ]
+        },
+        {
+            "input": "&map;",
+            "description": "Named entity: map; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21a6"
+                ]
+            ]
+        },
+        {
+            "input": "&mapsto",
+            "description": "Bad named entity: mapsto without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&mapsto"
+                ]
+            ]
+        },
+        {
+            "input": "&mapsto;",
+            "description": "Named entity: mapsto; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21a6"
+                ]
+            ]
+        },
+        {
+            "input": "&mapstodown",
+            "description": "Bad named entity: mapstodown without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&mapstodown"
+                ]
+            ]
+        },
+        {
+            "input": "&mapstodown;",
+            "description": "Named entity: mapstodown; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21a7"
+                ]
+            ]
+        },
+        {
+            "input": "&mapstoleft",
+            "description": "Bad named entity: mapstoleft without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&mapstoleft"
+                ]
+            ]
+        },
+        {
+            "input": "&mapstoleft;",
+            "description": "Named entity: mapstoleft; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21a4"
+                ]
+            ]
+        },
+        {
+            "input": "&mapstoup",
+            "description": "Bad named entity: mapstoup without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&mapstoup"
+                ]
+            ]
+        },
+        {
+            "input": "&mapstoup;",
+            "description": "Named entity: mapstoup; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21a5"
+                ]
+            ]
+        },
+        {
+            "input": "&marker",
+            "description": "Bad named entity: marker without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&marker"
+                ]
+            ]
+        },
+        {
+            "input": "&marker;",
+            "description": "Named entity: marker; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u25ae"
+                ]
+            ]
+        },
+        {
+            "input": "&mcomma",
+            "description": "Bad named entity: mcomma without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&mcomma"
+                ]
+            ]
+        },
+        {
+            "input": "&mcomma;",
+            "description": "Named entity: mcomma; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a29"
+                ]
+            ]
+        },
+        {
+            "input": "&mcy",
+            "description": "Bad named entity: mcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&mcy"
+                ]
+            ]
+        },
+        {
+            "input": "&mcy;",
+            "description": "Named entity: mcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u043c"
+                ]
+            ]
+        },
+        {
+            "input": "&mdash",
+            "description": "Bad named entity: mdash without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&mdash"
+                ]
+            ]
+        },
+        {
+            "input": "&mdash;",
+            "description": "Named entity: mdash; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2014"
+                ]
+            ]
+        },
+        {
+            "input": "&measuredangle",
+            "description": "Bad named entity: measuredangle without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&measuredangle"
+                ]
+            ]
+        },
+        {
+            "input": "&measuredangle;",
+            "description": "Named entity: measuredangle; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2221"
+                ]
+            ]
+        },
+        {
+            "input": "&mfr",
+            "description": "Bad named entity: mfr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&mfr"
+                ]
+            ]
+        },
+        {
+            "input": "&mfr;",
+            "description": "Named entity: mfr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd2a"
+                ]
+            ]
+        },
+        {
+            "input": "&mho",
+            "description": "Bad named entity: mho without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&mho"
+                ]
+            ]
+        },
+        {
+            "input": "&mho;",
+            "description": "Named entity: mho; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2127"
+                ]
+            ]
+        },
+        {
+            "input": "&micro",
+            "description": "Named entity: micro without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00b5"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 7 }
+            ]
+        },
+        {
+            "input": "&micro;",
+            "description": "Named entity: micro; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00b5"
+                ]
+            ]
+        },
+        {
+            "input": "&mid",
+            "description": "Bad named entity: mid without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&mid"
+                ]
+            ]
+        },
+        {
+            "input": "&mid;",
+            "description": "Named entity: mid; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2223"
+                ]
+            ]
+        },
+        {
+            "input": "&midast",
+            "description": "Bad named entity: midast without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&midast"
+                ]
+            ]
+        },
+        {
+            "input": "&midast;",
+            "description": "Named entity: midast; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "*"
+                ]
+            ]
+        },
+        {
+            "input": "&midcir",
+            "description": "Bad named entity: midcir without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&midcir"
+                ]
+            ]
+        },
+        {
+            "input": "&midcir;",
+            "description": "Named entity: midcir; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2af0"
+                ]
+            ]
+        },
+        {
+            "input": "&middot",
+            "description": "Named entity: middot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00b7"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 8 }
+            ]
+        },
+        {
+            "input": "&middot;",
+            "description": "Named entity: middot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00b7"
+                ]
+            ]
+        },
+        {
+            "input": "&minus",
+            "description": "Bad named entity: minus without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&minus"
+                ]
+            ]
+        },
+        {
+            "input": "&minus;",
+            "description": "Named entity: minus; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2212"
+                ]
+            ]
+        },
+        {
+            "input": "&minusb",
+            "description": "Bad named entity: minusb without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&minusb"
+                ]
+            ]
+        },
+        {
+            "input": "&minusb;",
+            "description": "Named entity: minusb; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u229f"
+                ]
+            ]
+        },
+        {
+            "input": "&minusd",
+            "description": "Bad named entity: minusd without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&minusd"
+                ]
+            ]
+        },
+        {
+            "input": "&minusd;",
+            "description": "Named entity: minusd; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2238"
+                ]
+            ]
+        },
+        {
+            "input": "&minusdu",
+            "description": "Bad named entity: minusdu without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&minusdu"
+                ]
+            ]
+        },
+        {
+            "input": "&minusdu;",
+            "description": "Named entity: minusdu; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a2a"
+                ]
+            ]
+        },
+        {
+            "input": "&mlcp",
+            "description": "Bad named entity: mlcp without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&mlcp"
+                ]
+            ]
+        },
+        {
+            "input": "&mlcp;",
+            "description": "Named entity: mlcp; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2adb"
+                ]
+            ]
+        },
+        {
+            "input": "&mldr",
+            "description": "Bad named entity: mldr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&mldr"
+                ]
+            ]
+        },
+        {
+            "input": "&mldr;",
+            "description": "Named entity: mldr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2026"
+                ]
+            ]
+        },
+        {
+            "input": "&mnplus",
+            "description": "Bad named entity: mnplus without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&mnplus"
+                ]
+            ]
+        },
+        {
+            "input": "&mnplus;",
+            "description": "Named entity: mnplus; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2213"
+                ]
+            ]
+        },
+        {
+            "input": "&models",
+            "description": "Bad named entity: models without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&models"
+                ]
+            ]
+        },
+        {
+            "input": "&models;",
+            "description": "Named entity: models; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22a7"
+                ]
+            ]
+        },
+        {
+            "input": "&mopf",
+            "description": "Bad named entity: mopf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&mopf"
+                ]
+            ]
+        },
+        {
+            "input": "&mopf;",
+            "description": "Named entity: mopf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd5e"
+                ]
+            ]
+        },
+        {
+            "input": "&mp",
+            "description": "Bad named entity: mp without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&mp"
+                ]
+            ]
+        },
+        {
+            "input": "&mp;",
+            "description": "Named entity: mp; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2213"
+                ]
+            ]
+        },
+        {
+            "input": "&mscr",
+            "description": "Bad named entity: mscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&mscr"
+                ]
+            ]
+        },
+        {
+            "input": "&mscr;",
+            "description": "Named entity: mscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udcc2"
+                ]
+            ]
+        },
+        {
+            "input": "&mstpos",
+            "description": "Bad named entity: mstpos without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&mstpos"
+                ]
+            ]
+        },
+        {
+            "input": "&mstpos;",
+            "description": "Named entity: mstpos; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u223e"
+                ]
+            ]
+        },
+        {
+            "input": "&mu",
+            "description": "Bad named entity: mu without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&mu"
+                ]
+            ]
+        },
+        {
+            "input": "&mu;",
+            "description": "Named entity: mu; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03bc"
+                ]
+            ]
+        },
+        {
+            "input": "&multimap",
+            "description": "Bad named entity: multimap without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&multimap"
+                ]
+            ]
+        },
+        {
+            "input": "&multimap;",
+            "description": "Named entity: multimap; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22b8"
+                ]
+            ]
+        },
+        {
+            "input": "&mumap",
+            "description": "Bad named entity: mumap without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&mumap"
+                ]
+            ]
+        },
+        {
+            "input": "&mumap;",
+            "description": "Named entity: mumap; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22b8"
+                ]
+            ]
+        },
+        {
+            "input": "&nGg",
+            "description": "Bad named entity: nGg without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nGg"
+                ]
+            ]
+        },
+        {
+            "input": "&nGg;",
+            "description": "Named entity: nGg; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22d9\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&nGt",
+            "description": "Bad named entity: nGt without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nGt"
+                ]
+            ]
+        },
+        {
+            "input": "&nGt;",
+            "description": "Named entity: nGt; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u226b\u20d2"
+                ]
+            ]
+        },
+        {
+            "input": "&nGtv",
+            "description": "Bad named entity: nGtv without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nGtv"
+                ]
+            ]
+        },
+        {
+            "input": "&nGtv;",
+            "description": "Named entity: nGtv; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u226b\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&nLeftarrow",
+            "description": "Bad named entity: nLeftarrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nLeftarrow"
+                ]
+            ]
+        },
+        {
+            "input": "&nLeftarrow;",
+            "description": "Named entity: nLeftarrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21cd"
+                ]
+            ]
+        },
+        {
+            "input": "&nLeftrightarrow",
+            "description": "Bad named entity: nLeftrightarrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nLeftrightarrow"
+                ]
+            ]
+        },
+        {
+            "input": "&nLeftrightarrow;",
+            "description": "Named entity: nLeftrightarrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21ce"
+                ]
+            ]
+        },
+        {
+            "input": "&nLl",
+            "description": "Bad named entity: nLl without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nLl"
+                ]
+            ]
+        },
+        {
+            "input": "&nLl;",
+            "description": "Named entity: nLl; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22d8\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&nLt",
+            "description": "Bad named entity: nLt without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nLt"
+                ]
+            ]
+        },
+        {
+            "input": "&nLt;",
+            "description": "Named entity: nLt; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u226a\u20d2"
+                ]
+            ]
+        },
+        {
+            "input": "&nLtv",
+            "description": "Bad named entity: nLtv without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nLtv"
+                ]
+            ]
+        },
+        {
+            "input": "&nLtv;",
+            "description": "Named entity: nLtv; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u226a\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&nRightarrow",
+            "description": "Bad named entity: nRightarrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nRightarrow"
+                ]
+            ]
+        },
+        {
+            "input": "&nRightarrow;",
+            "description": "Named entity: nRightarrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21cf"
+                ]
+            ]
+        },
+        {
+            "input": "&nVDash",
+            "description": "Bad named entity: nVDash without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nVDash"
+                ]
+            ]
+        },
+        {
+            "input": "&nVDash;",
+            "description": "Named entity: nVDash; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22af"
+                ]
+            ]
+        },
+        {
+            "input": "&nVdash",
+            "description": "Bad named entity: nVdash without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nVdash"
+                ]
+            ]
+        },
+        {
+            "input": "&nVdash;",
+            "description": "Named entity: nVdash; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22ae"
+                ]
+            ]
+        },
+        {
+            "input": "&nabla",
+            "description": "Bad named entity: nabla without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nabla"
+                ]
+            ]
+        },
+        {
+            "input": "&nabla;",
+            "description": "Named entity: nabla; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2207"
+                ]
+            ]
+        },
+        {
+            "input": "&nacute",
+            "description": "Bad named entity: nacute without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nacute"
+                ]
+            ]
+        },
+        {
+            "input": "&nacute;",
+            "description": "Named entity: nacute; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0144"
+                ]
+            ]
+        },
+        {
+            "input": "&nang",
+            "description": "Bad named entity: nang without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nang"
+                ]
+            ]
+        },
+        {
+            "input": "&nang;",
+            "description": "Named entity: nang; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2220\u20d2"
+                ]
+            ]
+        },
+        {
+            "input": "&nap",
+            "description": "Bad named entity: nap without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nap"
+                ]
+            ]
+        },
+        {
+            "input": "&nap;",
+            "description": "Named entity: nap; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2249"
+                ]
+            ]
+        },
+        {
+            "input": "&napE",
+            "description": "Bad named entity: napE without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&napE"
+                ]
+            ]
+        },
+        {
+            "input": "&napE;",
+            "description": "Named entity: napE; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a70\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&napid",
+            "description": "Bad named entity: napid without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&napid"
+                ]
+            ]
+        },
+        {
+            "input": "&napid;",
+            "description": "Named entity: napid; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u224b\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&napos",
+            "description": "Bad named entity: napos without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&napos"
+                ]
+            ]
+        },
+        {
+            "input": "&napos;",
+            "description": "Named entity: napos; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0149"
+                ]
+            ]
+        },
+        {
+            "input": "&napprox",
+            "description": "Bad named entity: napprox without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&napprox"
+                ]
+            ]
+        },
+        {
+            "input": "&napprox;",
+            "description": "Named entity: napprox; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2249"
+                ]
+            ]
+        },
+        {
+            "input": "&natur",
+            "description": "Bad named entity: natur without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&natur"
+                ]
+            ]
+        },
+        {
+            "input": "&natur;",
+            "description": "Named entity: natur; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u266e"
+                ]
+            ]
+        },
+        {
+            "input": "&natural",
+            "description": "Bad named entity: natural without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&natural"
+                ]
+            ]
+        },
+        {
+            "input": "&natural;",
+            "description": "Named entity: natural; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u266e"
+                ]
+            ]
+        },
+        {
+            "input": "&naturals",
+            "description": "Bad named entity: naturals without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&naturals"
+                ]
+            ]
+        },
+        {
+            "input": "&naturals;",
+            "description": "Named entity: naturals; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2115"
+                ]
+            ]
+        },
+        {
+            "input": "&nbsp",
+            "description": "Named entity: nbsp without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00a0"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 6 }
+            ]
+        },
+        {
+            "input": "&nbsp;",
+            "description": "Named entity: nbsp; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00a0"
+                ]
+            ]
+        },
+        {
+            "input": "&nbump",
+            "description": "Bad named entity: nbump without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nbump"
+                ]
+            ]
+        },
+        {
+            "input": "&nbump;",
+            "description": "Named entity: nbump; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u224e\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&nbumpe",
+            "description": "Bad named entity: nbumpe without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nbumpe"
+                ]
+            ]
+        },
+        {
+            "input": "&nbumpe;",
+            "description": "Named entity: nbumpe; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u224f\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&ncap",
+            "description": "Bad named entity: ncap without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ncap"
+                ]
+            ]
+        },
+        {
+            "input": "&ncap;",
+            "description": "Named entity: ncap; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a43"
+                ]
+            ]
+        },
+        {
+            "input": "&ncaron",
+            "description": "Bad named entity: ncaron without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ncaron"
+                ]
+            ]
+        },
+        {
+            "input": "&ncaron;",
+            "description": "Named entity: ncaron; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0148"
+                ]
+            ]
+        },
+        {
+            "input": "&ncedil",
+            "description": "Bad named entity: ncedil without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ncedil"
+                ]
+            ]
+        },
+        {
+            "input": "&ncedil;",
+            "description": "Named entity: ncedil; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0146"
+                ]
+            ]
+        },
+        {
+            "input": "&ncong",
+            "description": "Bad named entity: ncong without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ncong"
+                ]
+            ]
+        },
+        {
+            "input": "&ncong;",
+            "description": "Named entity: ncong; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2247"
+                ]
+            ]
+        },
+        {
+            "input": "&ncongdot",
+            "description": "Bad named entity: ncongdot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ncongdot"
+                ]
+            ]
+        },
+        {
+            "input": "&ncongdot;",
+            "description": "Named entity: ncongdot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a6d\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&ncup",
+            "description": "Bad named entity: ncup without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ncup"
+                ]
+            ]
+        },
+        {
+            "input": "&ncup;",
+            "description": "Named entity: ncup; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a42"
+                ]
+            ]
+        },
+        {
+            "input": "&ncy",
+            "description": "Bad named entity: ncy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ncy"
+                ]
+            ]
+        },
+        {
+            "input": "&ncy;",
+            "description": "Named entity: ncy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u043d"
+                ]
+            ]
+        },
+        {
+            "input": "&ndash",
+            "description": "Bad named entity: ndash without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ndash"
+                ]
+            ]
+        },
+        {
+            "input": "&ndash;",
+            "description": "Named entity: ndash; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2013"
+                ]
+            ]
+        },
+        {
+            "input": "&ne",
+            "description": "Bad named entity: ne without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ne"
+                ]
+            ]
+        },
+        {
+            "input": "&ne;",
+            "description": "Named entity: ne; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2260"
+                ]
+            ]
+        },
+        {
+            "input": "&neArr",
+            "description": "Bad named entity: neArr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&neArr"
+                ]
+            ]
+        },
+        {
+            "input": "&neArr;",
+            "description": "Named entity: neArr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21d7"
+                ]
+            ]
+        },
+        {
+            "input": "&nearhk",
+            "description": "Bad named entity: nearhk without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nearhk"
+                ]
+            ]
+        },
+        {
+            "input": "&nearhk;",
+            "description": "Named entity: nearhk; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2924"
+                ]
+            ]
+        },
+        {
+            "input": "&nearr",
+            "description": "Bad named entity: nearr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nearr"
+                ]
+            ]
+        },
+        {
+            "input": "&nearr;",
+            "description": "Named entity: nearr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2197"
+                ]
+            ]
+        },
+        {
+            "input": "&nearrow",
+            "description": "Bad named entity: nearrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nearrow"
+                ]
+            ]
+        },
+        {
+            "input": "&nearrow;",
+            "description": "Named entity: nearrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2197"
+                ]
+            ]
+        },
+        {
+            "input": "&nedot",
+            "description": "Bad named entity: nedot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nedot"
+                ]
+            ]
+        },
+        {
+            "input": "&nedot;",
+            "description": "Named entity: nedot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2250\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&nequiv",
+            "description": "Bad named entity: nequiv without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nequiv"
+                ]
+            ]
+        },
+        {
+            "input": "&nequiv;",
+            "description": "Named entity: nequiv; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2262"
+                ]
+            ]
+        },
+        {
+            "input": "&nesear",
+            "description": "Bad named entity: nesear without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nesear"
+                ]
+            ]
+        },
+        {
+            "input": "&nesear;",
+            "description": "Named entity: nesear; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2928"
+                ]
+            ]
+        },
+        {
+            "input": "&nesim",
+            "description": "Bad named entity: nesim without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nesim"
+                ]
+            ]
+        },
+        {
+            "input": "&nesim;",
+            "description": "Named entity: nesim; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2242\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&nexist",
+            "description": "Bad named entity: nexist without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nexist"
+                ]
+            ]
+        },
+        {
+            "input": "&nexist;",
+            "description": "Named entity: nexist; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2204"
+                ]
+            ]
+        },
+        {
+            "input": "&nexists",
+            "description": "Bad named entity: nexists without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nexists"
+                ]
+            ]
+        },
+        {
+            "input": "&nexists;",
+            "description": "Named entity: nexists; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2204"
+                ]
+            ]
+        },
+        {
+            "input": "&nfr",
+            "description": "Bad named entity: nfr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nfr"
+                ]
+            ]
+        },
+        {
+            "input": "&nfr;",
+            "description": "Named entity: nfr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd2b"
+                ]
+            ]
+        },
+        {
+            "input": "&ngE",
+            "description": "Bad named entity: ngE without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ngE"
+                ]
+            ]
+        },
+        {
+            "input": "&ngE;",
+            "description": "Named entity: ngE; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2267\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&nge",
+            "description": "Bad named entity: nge without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nge"
+                ]
+            ]
+        },
+        {
+            "input": "&nge;",
+            "description": "Named entity: nge; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2271"
+                ]
+            ]
+        },
+        {
+            "input": "&ngeq",
+            "description": "Bad named entity: ngeq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ngeq"
+                ]
+            ]
+        },
+        {
+            "input": "&ngeq;",
+            "description": "Named entity: ngeq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2271"
+                ]
+            ]
+        },
+        {
+            "input": "&ngeqq",
+            "description": "Bad named entity: ngeqq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ngeqq"
+                ]
+            ]
+        },
+        {
+            "input": "&ngeqq;",
+            "description": "Named entity: ngeqq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2267\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&ngeqslant",
+            "description": "Bad named entity: ngeqslant without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ngeqslant"
+                ]
+            ]
+        },
+        {
+            "input": "&ngeqslant;",
+            "description": "Named entity: ngeqslant; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a7e\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&nges",
+            "description": "Bad named entity: nges without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nges"
+                ]
+            ]
+        },
+        {
+            "input": "&nges;",
+            "description": "Named entity: nges; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a7e\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&ngsim",
+            "description": "Bad named entity: ngsim without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ngsim"
+                ]
+            ]
+        },
+        {
+            "input": "&ngsim;",
+            "description": "Named entity: ngsim; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2275"
+                ]
+            ]
+        },
+        {
+            "input": "&ngt",
+            "description": "Bad named entity: ngt without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ngt"
+                ]
+            ]
+        },
+        {
+            "input": "&ngt;",
+            "description": "Named entity: ngt; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u226f"
+                ]
+            ]
+        },
+        {
+            "input": "&ngtr",
+            "description": "Bad named entity: ngtr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ngtr"
+                ]
+            ]
+        },
+        {
+            "input": "&ngtr;",
+            "description": "Named entity: ngtr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u226f"
+                ]
+            ]
+        },
+        {
+            "input": "&nhArr",
+            "description": "Bad named entity: nhArr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nhArr"
+                ]
+            ]
+        },
+        {
+            "input": "&nhArr;",
+            "description": "Named entity: nhArr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21ce"
+                ]
+            ]
+        },
+        {
+            "input": "&nharr",
+            "description": "Bad named entity: nharr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nharr"
+                ]
+            ]
+        },
+        {
+            "input": "&nharr;",
+            "description": "Named entity: nharr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21ae"
+                ]
+            ]
+        },
+        {
+            "input": "&nhpar",
+            "description": "Bad named entity: nhpar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nhpar"
+                ]
+            ]
+        },
+        {
+            "input": "&nhpar;",
+            "description": "Named entity: nhpar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2af2"
+                ]
+            ]
+        },
+        {
+            "input": "&ni",
+            "description": "Bad named entity: ni without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ni"
+                ]
+            ]
+        },
+        {
+            "input": "&ni;",
+            "description": "Named entity: ni; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u220b"
+                ]
+            ]
+        },
+        {
+            "input": "&nis",
+            "description": "Bad named entity: nis without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nis"
+                ]
+            ]
+        },
+        {
+            "input": "&nis;",
+            "description": "Named entity: nis; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22fc"
+                ]
+            ]
+        },
+        {
+            "input": "&nisd",
+            "description": "Bad named entity: nisd without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nisd"
+                ]
+            ]
+        },
+        {
+            "input": "&nisd;",
+            "description": "Named entity: nisd; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22fa"
+                ]
+            ]
+        },
+        {
+            "input": "&niv",
+            "description": "Bad named entity: niv without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&niv"
+                ]
+            ]
+        },
+        {
+            "input": "&niv;",
+            "description": "Named entity: niv; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u220b"
+                ]
+            ]
+        },
+        {
+            "input": "&njcy",
+            "description": "Bad named entity: njcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&njcy"
+                ]
+            ]
+        },
+        {
+            "input": "&njcy;",
+            "description": "Named entity: njcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u045a"
+                ]
+            ]
+        },
+        {
+            "input": "&nlArr",
+            "description": "Bad named entity: nlArr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nlArr"
+                ]
+            ]
+        },
+        {
+            "input": "&nlArr;",
+            "description": "Named entity: nlArr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21cd"
+                ]
+            ]
+        },
+        {
+            "input": "&nlE",
+            "description": "Bad named entity: nlE without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nlE"
+                ]
+            ]
+        },
+        {
+            "input": "&nlE;",
+            "description": "Named entity: nlE; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2266\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&nlarr",
+            "description": "Bad named entity: nlarr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nlarr"
+                ]
+            ]
+        },
+        {
+            "input": "&nlarr;",
+            "description": "Named entity: nlarr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u219a"
+                ]
+            ]
+        },
+        {
+            "input": "&nldr",
+            "description": "Bad named entity: nldr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nldr"
+                ]
+            ]
+        },
+        {
+            "input": "&nldr;",
+            "description": "Named entity: nldr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2025"
+                ]
+            ]
+        },
+        {
+            "input": "&nle",
+            "description": "Bad named entity: nle without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nle"
+                ]
+            ]
+        },
+        {
+            "input": "&nle;",
+            "description": "Named entity: nle; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2270"
+                ]
+            ]
+        },
+        {
+            "input": "&nleftarrow",
+            "description": "Bad named entity: nleftarrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nleftarrow"
+                ]
+            ]
+        },
+        {
+            "input": "&nleftarrow;",
+            "description": "Named entity: nleftarrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u219a"
+                ]
+            ]
+        },
+        {
+            "input": "&nleftrightarrow",
+            "description": "Bad named entity: nleftrightarrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nleftrightarrow"
+                ]
+            ]
+        },
+        {
+            "input": "&nleftrightarrow;",
+            "description": "Named entity: nleftrightarrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21ae"
+                ]
+            ]
+        },
+        {
+            "input": "&nleq",
+            "description": "Bad named entity: nleq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nleq"
+                ]
+            ]
+        },
+        {
+            "input": "&nleq;",
+            "description": "Named entity: nleq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2270"
+                ]
+            ]
+        },
+        {
+            "input": "&nleqq",
+            "description": "Bad named entity: nleqq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nleqq"
+                ]
+            ]
+        },
+        {
+            "input": "&nleqq;",
+            "description": "Named entity: nleqq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2266\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&nleqslant",
+            "description": "Bad named entity: nleqslant without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nleqslant"
+                ]
+            ]
+        },
+        {
+            "input": "&nleqslant;",
+            "description": "Named entity: nleqslant; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a7d\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&nles",
+            "description": "Bad named entity: nles without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nles"
+                ]
+            ]
+        },
+        {
+            "input": "&nles;",
+            "description": "Named entity: nles; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a7d\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&nless",
+            "description": "Bad named entity: nless without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nless"
+                ]
+            ]
+        },
+        {
+            "input": "&nless;",
+            "description": "Named entity: nless; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u226e"
+                ]
+            ]
+        },
+        {
+            "input": "&nlsim",
+            "description": "Bad named entity: nlsim without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nlsim"
+                ]
+            ]
+        },
+        {
+            "input": "&nlsim;",
+            "description": "Named entity: nlsim; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2274"
+                ]
+            ]
+        },
+        {
+            "input": "&nlt",
+            "description": "Bad named entity: nlt without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nlt"
+                ]
+            ]
+        },
+        {
+            "input": "&nlt;",
+            "description": "Named entity: nlt; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u226e"
+                ]
+            ]
+        },
+        {
+            "input": "&nltri",
+            "description": "Bad named entity: nltri without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nltri"
+                ]
+            ]
+        },
+        {
+            "input": "&nltri;",
+            "description": "Named entity: nltri; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22ea"
+                ]
+            ]
+        },
+        {
+            "input": "&nltrie",
+            "description": "Bad named entity: nltrie without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nltrie"
+                ]
+            ]
+        },
+        {
+            "input": "&nltrie;",
+            "description": "Named entity: nltrie; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22ec"
+                ]
+            ]
+        },
+        {
+            "input": "&nmid",
+            "description": "Bad named entity: nmid without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nmid"
+                ]
+            ]
+        },
+        {
+            "input": "&nmid;",
+            "description": "Named entity: nmid; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2224"
+                ]
+            ]
+        },
+        {
+            "input": "&nopf",
+            "description": "Bad named entity: nopf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nopf"
+                ]
+            ]
+        },
+        {
+            "input": "&nopf;",
+            "description": "Named entity: nopf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd5f"
+                ]
+            ]
+        },
+        {
+            "input": "&not",
+            "description": "Named entity: not without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00ac"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 5 }
+            ]
+        },
+        {
+            "input": "&not;",
+            "description": "Named entity: not; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00ac"
+                ]
+            ]
+        },
+        {
+            "input": "&notin;",
+            "description": "Named entity: notin; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2209"
+                ]
+            ]
+        },
+        {
+            "input": "&notinE;",
+            "description": "Named entity: notinE; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22f9\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&notindot;",
+            "description": "Named entity: notindot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22f5\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&notinva;",
+            "description": "Named entity: notinva; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2209"
+                ]
+            ]
+        },
+        {
+            "input": "&notinvb;",
+            "description": "Named entity: notinvb; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22f7"
+                ]
+            ]
+        },
+        {
+            "input": "&notinvc;",
+            "description": "Named entity: notinvc; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22f6"
+                ]
+            ]
+        },
+        {
+            "input": "&notni;",
+            "description": "Named entity: notni; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u220c"
+                ]
+            ]
+        },
+        {
+            "input": "&notniva;",
+            "description": "Named entity: notniva; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u220c"
+                ]
+            ]
+        },
+        {
+            "input": "&notnivb;",
+            "description": "Named entity: notnivb; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22fe"
+                ]
+            ]
+        },
+        {
+            "input": "&notnivc;",
+            "description": "Named entity: notnivc; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22fd"
+                ]
+            ]
+        },
+        {
+            "input": "&npar",
+            "description": "Bad named entity: npar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&npar"
+                ]
+            ]
+        },
+        {
+            "input": "&npar;",
+            "description": "Named entity: npar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2226"
+                ]
+            ]
+        },
+        {
+            "input": "&nparallel",
+            "description": "Bad named entity: nparallel without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nparallel"
+                ]
+            ]
+        },
+        {
+            "input": "&nparallel;",
+            "description": "Named entity: nparallel; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2226"
+                ]
+            ]
+        },
+        {
+            "input": "&nparsl",
+            "description": "Bad named entity: nparsl without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nparsl"
+                ]
+            ]
+        },
+        {
+            "input": "&nparsl;",
+            "description": "Named entity: nparsl; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2afd\u20e5"
+                ]
+            ]
+        },
+        {
+            "input": "&npart",
+            "description": "Bad named entity: npart without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&npart"
+                ]
+            ]
+        },
+        {
+            "input": "&npart;",
+            "description": "Named entity: npart; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2202\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&npolint",
+            "description": "Bad named entity: npolint without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&npolint"
+                ]
+            ]
+        },
+        {
+            "input": "&npolint;",
+            "description": "Named entity: npolint; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a14"
+                ]
+            ]
+        },
+        {
+            "input": "&npr",
+            "description": "Bad named entity: npr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&npr"
+                ]
+            ]
+        },
+        {
+            "input": "&npr;",
+            "description": "Named entity: npr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2280"
+                ]
+            ]
+        },
+        {
+            "input": "&nprcue",
+            "description": "Bad named entity: nprcue without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nprcue"
+                ]
+            ]
+        },
+        {
+            "input": "&nprcue;",
+            "description": "Named entity: nprcue; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22e0"
+                ]
+            ]
+        },
+        {
+            "input": "&npre",
+            "description": "Bad named entity: npre without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&npre"
+                ]
+            ]
+        },
+        {
+            "input": "&npre;",
+            "description": "Named entity: npre; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2aaf\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&nprec",
+            "description": "Bad named entity: nprec without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nprec"
+                ]
+            ]
+        },
+        {
+            "input": "&nprec;",
+            "description": "Named entity: nprec; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2280"
+                ]
+            ]
+        },
+        {
+            "input": "&npreceq",
+            "description": "Bad named entity: npreceq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&npreceq"
+                ]
+            ]
+        },
+        {
+            "input": "&npreceq;",
+            "description": "Named entity: npreceq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2aaf\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&nrArr",
+            "description": "Bad named entity: nrArr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nrArr"
+                ]
+            ]
+        },
+        {
+            "input": "&nrArr;",
+            "description": "Named entity: nrArr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21cf"
+                ]
+            ]
+        },
+        {
+            "input": "&nrarr",
+            "description": "Bad named entity: nrarr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nrarr"
+                ]
+            ]
+        },
+        {
+            "input": "&nrarr;",
+            "description": "Named entity: nrarr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u219b"
+                ]
+            ]
+        },
+        {
+            "input": "&nrarrc",
+            "description": "Bad named entity: nrarrc without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nrarrc"
+                ]
+            ]
+        },
+        {
+            "input": "&nrarrc;",
+            "description": "Named entity: nrarrc; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2933\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&nrarrw",
+            "description": "Bad named entity: nrarrw without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nrarrw"
+                ]
+            ]
+        },
+        {
+            "input": "&nrarrw;",
+            "description": "Named entity: nrarrw; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u219d\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&nrightarrow",
+            "description": "Bad named entity: nrightarrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nrightarrow"
+                ]
+            ]
+        },
+        {
+            "input": "&nrightarrow;",
+            "description": "Named entity: nrightarrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u219b"
+                ]
+            ]
+        },
+        {
+            "input": "&nrtri",
+            "description": "Bad named entity: nrtri without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nrtri"
+                ]
+            ]
+        },
+        {
+            "input": "&nrtri;",
+            "description": "Named entity: nrtri; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22eb"
+                ]
+            ]
+        },
+        {
+            "input": "&nrtrie",
+            "description": "Bad named entity: nrtrie without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nrtrie"
+                ]
+            ]
+        },
+        {
+            "input": "&nrtrie;",
+            "description": "Named entity: nrtrie; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22ed"
+                ]
+            ]
+        },
+        {
+            "input": "&nsc",
+            "description": "Bad named entity: nsc without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nsc"
+                ]
+            ]
+        },
+        {
+            "input": "&nsc;",
+            "description": "Named entity: nsc; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2281"
+                ]
+            ]
+        },
+        {
+            "input": "&nsccue",
+            "description": "Bad named entity: nsccue without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nsccue"
+                ]
+            ]
+        },
+        {
+            "input": "&nsccue;",
+            "description": "Named entity: nsccue; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22e1"
+                ]
+            ]
+        },
+        {
+            "input": "&nsce",
+            "description": "Bad named entity: nsce without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nsce"
+                ]
+            ]
+        },
+        {
+            "input": "&nsce;",
+            "description": "Named entity: nsce; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ab0\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&nscr",
+            "description": "Bad named entity: nscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nscr"
+                ]
+            ]
+        },
+        {
+            "input": "&nscr;",
+            "description": "Named entity: nscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udcc3"
+                ]
+            ]
+        },
+        {
+            "input": "&nshortmid",
+            "description": "Bad named entity: nshortmid without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nshortmid"
+                ]
+            ]
+        },
+        {
+            "input": "&nshortmid;",
+            "description": "Named entity: nshortmid; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2224"
+                ]
+            ]
+        },
+        {
+            "input": "&nshortparallel",
+            "description": "Bad named entity: nshortparallel without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nshortparallel"
+                ]
+            ]
+        },
+        {
+            "input": "&nshortparallel;",
+            "description": "Named entity: nshortparallel; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2226"
+                ]
+            ]
+        },
+        {
+            "input": "&nsim",
+            "description": "Bad named entity: nsim without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nsim"
+                ]
+            ]
+        },
+        {
+            "input": "&nsim;",
+            "description": "Named entity: nsim; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2241"
+                ]
+            ]
+        },
+        {
+            "input": "&nsime",
+            "description": "Bad named entity: nsime without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nsime"
+                ]
+            ]
+        },
+        {
+            "input": "&nsime;",
+            "description": "Named entity: nsime; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2244"
+                ]
+            ]
+        },
+        {
+            "input": "&nsimeq",
+            "description": "Bad named entity: nsimeq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nsimeq"
+                ]
+            ]
+        },
+        {
+            "input": "&nsimeq;",
+            "description": "Named entity: nsimeq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2244"
+                ]
+            ]
+        },
+        {
+            "input": "&nsmid",
+            "description": "Bad named entity: nsmid without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nsmid"
+                ]
+            ]
+        },
+        {
+            "input": "&nsmid;",
+            "description": "Named entity: nsmid; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2224"
+                ]
+            ]
+        },
+        {
+            "input": "&nspar",
+            "description": "Bad named entity: nspar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nspar"
+                ]
+            ]
+        },
+        {
+            "input": "&nspar;",
+            "description": "Named entity: nspar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2226"
+                ]
+            ]
+        },
+        {
+            "input": "&nsqsube",
+            "description": "Bad named entity: nsqsube without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nsqsube"
+                ]
+            ]
+        },
+        {
+            "input": "&nsqsube;",
+            "description": "Named entity: nsqsube; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22e2"
+                ]
+            ]
+        },
+        {
+            "input": "&nsqsupe",
+            "description": "Bad named entity: nsqsupe without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nsqsupe"
+                ]
+            ]
+        },
+        {
+            "input": "&nsqsupe;",
+            "description": "Named entity: nsqsupe; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22e3"
+                ]
+            ]
+        },
+        {
+            "input": "&nsub",
+            "description": "Bad named entity: nsub without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nsub"
+                ]
+            ]
+        },
+        {
+            "input": "&nsub;",
+            "description": "Named entity: nsub; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2284"
+                ]
+            ]
+        },
+        {
+            "input": "&nsubE",
+            "description": "Bad named entity: nsubE without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nsubE"
+                ]
+            ]
+        },
+        {
+            "input": "&nsubE;",
+            "description": "Named entity: nsubE; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ac5\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&nsube",
+            "description": "Bad named entity: nsube without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nsube"
+                ]
+            ]
+        },
+        {
+            "input": "&nsube;",
+            "description": "Named entity: nsube; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2288"
+                ]
+            ]
+        },
+        {
+            "input": "&nsubset",
+            "description": "Bad named entity: nsubset without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nsubset"
+                ]
+            ]
+        },
+        {
+            "input": "&nsubset;",
+            "description": "Named entity: nsubset; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2282\u20d2"
+                ]
+            ]
+        },
+        {
+            "input": "&nsubseteq",
+            "description": "Bad named entity: nsubseteq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nsubseteq"
+                ]
+            ]
+        },
+        {
+            "input": "&nsubseteq;",
+            "description": "Named entity: nsubseteq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2288"
+                ]
+            ]
+        },
+        {
+            "input": "&nsubseteqq",
+            "description": "Bad named entity: nsubseteqq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nsubseteqq"
+                ]
+            ]
+        },
+        {
+            "input": "&nsubseteqq;",
+            "description": "Named entity: nsubseteqq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ac5\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&nsucc",
+            "description": "Bad named entity: nsucc without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nsucc"
+                ]
+            ]
+        },
+        {
+            "input": "&nsucc;",
+            "description": "Named entity: nsucc; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2281"
+                ]
+            ]
+        },
+        {
+            "input": "&nsucceq",
+            "description": "Bad named entity: nsucceq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nsucceq"
+                ]
+            ]
+        },
+        {
+            "input": "&nsucceq;",
+            "description": "Named entity: nsucceq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ab0\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&nsup",
+            "description": "Bad named entity: nsup without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nsup"
+                ]
+            ]
+        },
+        {
+            "input": "&nsup;",
+            "description": "Named entity: nsup; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2285"
+                ]
+            ]
+        },
+        {
+            "input": "&nsupE",
+            "description": "Bad named entity: nsupE without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nsupE"
+                ]
+            ]
+        },
+        {
+            "input": "&nsupE;",
+            "description": "Named entity: nsupE; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ac6\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&nsupe",
+            "description": "Bad named entity: nsupe without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nsupe"
+                ]
+            ]
+        },
+        {
+            "input": "&nsupe;",
+            "description": "Named entity: nsupe; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2289"
+                ]
+            ]
+        },
+        {
+            "input": "&nsupset",
+            "description": "Bad named entity: nsupset without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nsupset"
+                ]
+            ]
+        },
+        {
+            "input": "&nsupset;",
+            "description": "Named entity: nsupset; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2283\u20d2"
+                ]
+            ]
+        },
+        {
+            "input": "&nsupseteq",
+            "description": "Bad named entity: nsupseteq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nsupseteq"
+                ]
+            ]
+        },
+        {
+            "input": "&nsupseteq;",
+            "description": "Named entity: nsupseteq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2289"
+                ]
+            ]
+        },
+        {
+            "input": "&nsupseteqq",
+            "description": "Bad named entity: nsupseteqq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nsupseteqq"
+                ]
+            ]
+        },
+        {
+            "input": "&nsupseteqq;",
+            "description": "Named entity: nsupseteqq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ac6\u0338"
+                ]
+            ]
+        },
+        {
+            "input": "&ntgl",
+            "description": "Bad named entity: ntgl without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ntgl"
+                ]
+            ]
+        },
+        {
+            "input": "&ntgl;",
+            "description": "Named entity: ntgl; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2279"
+                ]
+            ]
+        },
+        {
+            "input": "&ntilde",
+            "description": "Named entity: ntilde without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00f1"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 8 }
+            ]
+        },
+        {
+            "input": "&ntilde;",
+            "description": "Named entity: ntilde; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00f1"
+                ]
+            ]
+        },
+        {
+            "input": "&ntlg",
+            "description": "Bad named entity: ntlg without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ntlg"
+                ]
+            ]
+        },
+        {
+            "input": "&ntlg;",
+            "description": "Named entity: ntlg; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2278"
+                ]
+            ]
+        },
+        {
+            "input": "&ntriangleleft",
+            "description": "Bad named entity: ntriangleleft without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ntriangleleft"
+                ]
+            ]
+        },
+        {
+            "input": "&ntriangleleft;",
+            "description": "Named entity: ntriangleleft; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22ea"
+                ]
+            ]
+        },
+        {
+            "input": "&ntrianglelefteq",
+            "description": "Bad named entity: ntrianglelefteq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ntrianglelefteq"
+                ]
+            ]
+        },
+        {
+            "input": "&ntrianglelefteq;",
+            "description": "Named entity: ntrianglelefteq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22ec"
+                ]
+            ]
+        },
+        {
+            "input": "&ntriangleright",
+            "description": "Bad named entity: ntriangleright without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ntriangleright"
+                ]
+            ]
+        },
+        {
+            "input": "&ntriangleright;",
+            "description": "Named entity: ntriangleright; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22eb"
+                ]
+            ]
+        },
+        {
+            "input": "&ntrianglerighteq",
+            "description": "Bad named entity: ntrianglerighteq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ntrianglerighteq"
+                ]
+            ]
+        },
+        {
+            "input": "&ntrianglerighteq;",
+            "description": "Named entity: ntrianglerighteq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22ed"
+                ]
+            ]
+        },
+        {
+            "input": "&nu",
+            "description": "Bad named entity: nu without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nu"
+                ]
+            ]
+        },
+        {
+            "input": "&nu;",
+            "description": "Named entity: nu; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03bd"
+                ]
+            ]
+        },
+        {
+            "input": "&num",
+            "description": "Bad named entity: num without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&num"
+                ]
+            ]
+        },
+        {
+            "input": "&num;",
+            "description": "Named entity: num; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "#"
+                ]
+            ]
+        },
+        {
+            "input": "&numero",
+            "description": "Bad named entity: numero without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&numero"
+                ]
+            ]
+        },
+        {
+            "input": "&numero;",
+            "description": "Named entity: numero; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2116"
+                ]
+            ]
+        },
+        {
+            "input": "&numsp",
+            "description": "Bad named entity: numsp without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&numsp"
+                ]
+            ]
+        },
+        {
+            "input": "&numsp;",
+            "description": "Named entity: numsp; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2007"
+                ]
+            ]
+        },
+        {
+            "input": "&nvDash",
+            "description": "Bad named entity: nvDash without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nvDash"
+                ]
+            ]
+        },
+        {
+            "input": "&nvDash;",
+            "description": "Named entity: nvDash; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22ad"
+                ]
+            ]
+        },
+        {
+            "input": "&nvHarr",
+            "description": "Bad named entity: nvHarr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nvHarr"
+                ]
+            ]
+        },
+        {
+            "input": "&nvHarr;",
+            "description": "Named entity: nvHarr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2904"
+                ]
+            ]
+        },
+        {
+            "input": "&nvap",
+            "description": "Bad named entity: nvap without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nvap"
+                ]
+            ]
+        },
+        {
+            "input": "&nvap;",
+            "description": "Named entity: nvap; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u224d\u20d2"
+                ]
+            ]
+        },
+        {
+            "input": "&nvdash",
+            "description": "Bad named entity: nvdash without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nvdash"
+                ]
+            ]
+        },
+        {
+            "input": "&nvdash;",
+            "description": "Named entity: nvdash; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22ac"
+                ]
+            ]
+        },
+        {
+            "input": "&nvge",
+            "description": "Bad named entity: nvge without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nvge"
+                ]
+            ]
+        },
+        {
+            "input": "&nvge;",
+            "description": "Named entity: nvge; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2265\u20d2"
+                ]
+            ]
+        },
+        {
+            "input": "&nvgt",
+            "description": "Bad named entity: nvgt without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nvgt"
+                ]
+            ]
+        },
+        {
+            "input": "&nvgt;",
+            "description": "Named entity: nvgt; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    ">\u20d2"
+                ]
+            ]
+        },
+        {
+            "input": "&nvinfin",
+            "description": "Bad named entity: nvinfin without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nvinfin"
+                ]
+            ]
+        },
+        {
+            "input": "&nvinfin;",
+            "description": "Named entity: nvinfin; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29de"
+                ]
+            ]
+        },
+        {
+            "input": "&nvlArr",
+            "description": "Bad named entity: nvlArr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nvlArr"
+                ]
+            ]
+        },
+        {
+            "input": "&nvlArr;",
+            "description": "Named entity: nvlArr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2902"
+                ]
+            ]
+        },
+        {
+            "input": "&nvle",
+            "description": "Bad named entity: nvle without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nvle"
+                ]
+            ]
+        },
+        {
+            "input": "&nvle;",
+            "description": "Named entity: nvle; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2264\u20d2"
+                ]
+            ]
+        },
+        {
+            "input": "&nvlt",
+            "description": "Bad named entity: nvlt without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nvlt"
+                ]
+            ]
+        },
+        {
+            "input": "&nvlt;",
+            "description": "Named entity: nvlt; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "<\u20d2"
+                ]
+            ]
+        },
+        {
+            "input": "&nvltrie",
+            "description": "Bad named entity: nvltrie without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nvltrie"
+                ]
+            ]
+        },
+        {
+            "input": "&nvltrie;",
+            "description": "Named entity: nvltrie; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22b4\u20d2"
+                ]
+            ]
+        },
+        {
+            "input": "&nvrArr",
+            "description": "Bad named entity: nvrArr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nvrArr"
+                ]
+            ]
+        },
+        {
+            "input": "&nvrArr;",
+            "description": "Named entity: nvrArr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2903"
+                ]
+            ]
+        },
+        {
+            "input": "&nvrtrie",
+            "description": "Bad named entity: nvrtrie without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nvrtrie"
+                ]
+            ]
+        },
+        {
+            "input": "&nvrtrie;",
+            "description": "Named entity: nvrtrie; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22b5\u20d2"
+                ]
+            ]
+        },
+        {
+            "input": "&nvsim",
+            "description": "Bad named entity: nvsim without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nvsim"
+                ]
+            ]
+        },
+        {
+            "input": "&nvsim;",
+            "description": "Named entity: nvsim; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u223c\u20d2"
+                ]
+            ]
+        },
+        {
+            "input": "&nwArr",
+            "description": "Bad named entity: nwArr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nwArr"
+                ]
+            ]
+        },
+        {
+            "input": "&nwArr;",
+            "description": "Named entity: nwArr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21d6"
+                ]
+            ]
+        },
+        {
+            "input": "&nwarhk",
+            "description": "Bad named entity: nwarhk without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nwarhk"
+                ]
+            ]
+        },
+        {
+            "input": "&nwarhk;",
+            "description": "Named entity: nwarhk; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2923"
+                ]
+            ]
+        },
+        {
+            "input": "&nwarr",
+            "description": "Bad named entity: nwarr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nwarr"
+                ]
+            ]
+        },
+        {
+            "input": "&nwarr;",
+            "description": "Named entity: nwarr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2196"
+                ]
+            ]
+        },
+        {
+            "input": "&nwarrow",
+            "description": "Bad named entity: nwarrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nwarrow"
+                ]
+            ]
+        },
+        {
+            "input": "&nwarrow;",
+            "description": "Named entity: nwarrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2196"
+                ]
+            ]
+        },
+        {
+            "input": "&nwnear",
+            "description": "Bad named entity: nwnear without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&nwnear"
+                ]
+            ]
+        },
+        {
+            "input": "&nwnear;",
+            "description": "Named entity: nwnear; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2927"
+                ]
+            ]
+        },
+        {
+            "input": "&oS",
+            "description": "Bad named entity: oS without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&oS"
+                ]
+            ]
+        },
+        {
+            "input": "&oS;",
+            "description": "Named entity: oS; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u24c8"
+                ]
+            ]
+        },
+        {
+            "input": "&oacute",
+            "description": "Named entity: oacute without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00f3"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 8 }
+            ]
+        },
+        {
+            "input": "&oacute;",
+            "description": "Named entity: oacute; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00f3"
+                ]
+            ]
+        },
+        {
+            "input": "&oast",
+            "description": "Bad named entity: oast without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&oast"
+                ]
+            ]
+        },
+        {
+            "input": "&oast;",
+            "description": "Named entity: oast; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u229b"
+                ]
+            ]
+        },
+        {
+            "input": "&ocir",
+            "description": "Bad named entity: ocir without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ocir"
+                ]
+            ]
+        },
+        {
+            "input": "&ocir;",
+            "description": "Named entity: ocir; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u229a"
+                ]
+            ]
+        },
+        {
+            "input": "&ocirc",
+            "description": "Named entity: ocirc without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00f4"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 7 }
+            ]
+        },
+        {
+            "input": "&ocirc;",
+            "description": "Named entity: ocirc; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00f4"
+                ]
+            ]
+        },
+        {
+            "input": "&ocy",
+            "description": "Bad named entity: ocy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ocy"
+                ]
+            ]
+        },
+        {
+            "input": "&ocy;",
+            "description": "Named entity: ocy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u043e"
+                ]
+            ]
+        },
+        {
+            "input": "&odash",
+            "description": "Bad named entity: odash without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&odash"
+                ]
+            ]
+        },
+        {
+            "input": "&odash;",
+            "description": "Named entity: odash; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u229d"
+                ]
+            ]
+        },
+        {
+            "input": "&odblac",
+            "description": "Bad named entity: odblac without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&odblac"
+                ]
+            ]
+        },
+        {
+            "input": "&odblac;",
+            "description": "Named entity: odblac; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0151"
+                ]
+            ]
+        },
+        {
+            "input": "&odiv",
+            "description": "Bad named entity: odiv without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&odiv"
+                ]
+            ]
+        },
+        {
+            "input": "&odiv;",
+            "description": "Named entity: odiv; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a38"
+                ]
+            ]
+        },
+        {
+            "input": "&odot",
+            "description": "Bad named entity: odot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&odot"
+                ]
+            ]
+        },
+        {
+            "input": "&odot;",
+            "description": "Named entity: odot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2299"
+                ]
+            ]
+        },
+        {
+            "input": "&odsold",
+            "description": "Bad named entity: odsold without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&odsold"
+                ]
+            ]
+        },
+        {
+            "input": "&odsold;",
+            "description": "Named entity: odsold; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29bc"
+                ]
+            ]
+        },
+        {
+            "input": "&oelig",
+            "description": "Bad named entity: oelig without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&oelig"
+                ]
+            ]
+        },
+        {
+            "input": "&oelig;",
+            "description": "Named entity: oelig; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0153"
+                ]
+            ]
+        },
+        {
+            "input": "&ofcir",
+            "description": "Bad named entity: ofcir without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ofcir"
+                ]
+            ]
+        },
+        {
+            "input": "&ofcir;",
+            "description": "Named entity: ofcir; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29bf"
+                ]
+            ]
+        },
+        {
+            "input": "&ofr",
+            "description": "Bad named entity: ofr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ofr"
+                ]
+            ]
+        },
+        {
+            "input": "&ofr;",
+            "description": "Named entity: ofr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd2c"
+                ]
+            ]
+        },
+        {
+            "input": "&ogon",
+            "description": "Bad named entity: ogon without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ogon"
+                ]
+            ]
+        },
+        {
+            "input": "&ogon;",
+            "description": "Named entity: ogon; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u02db"
+                ]
+            ]
+        },
+        {
+            "input": "&ograve",
+            "description": "Named entity: ograve without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00f2"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 8 }
+            ]
+        },
+        {
+            "input": "&ograve;",
+            "description": "Named entity: ograve; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00f2"
+                ]
+            ]
+        },
+        {
+            "input": "&ogt",
+            "description": "Bad named entity: ogt without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ogt"
+                ]
+            ]
+        },
+        {
+            "input": "&ogt;",
+            "description": "Named entity: ogt; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29c1"
+                ]
+            ]
+        },
+        {
+            "input": "&ohbar",
+            "description": "Bad named entity: ohbar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ohbar"
+                ]
+            ]
+        },
+        {
+            "input": "&ohbar;",
+            "description": "Named entity: ohbar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29b5"
+                ]
+            ]
+        },
+        {
+            "input": "&ohm",
+            "description": "Bad named entity: ohm without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ohm"
+                ]
+            ]
+        },
+        {
+            "input": "&ohm;",
+            "description": "Named entity: ohm; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03a9"
+                ]
+            ]
+        },
+        {
+            "input": "&oint",
+            "description": "Bad named entity: oint without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&oint"
+                ]
+            ]
+        },
+        {
+            "input": "&oint;",
+            "description": "Named entity: oint; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u222e"
+                ]
+            ]
+        },
+        {
+            "input": "&olarr",
+            "description": "Bad named entity: olarr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&olarr"
+                ]
+            ]
+        },
+        {
+            "input": "&olarr;",
+            "description": "Named entity: olarr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21ba"
+                ]
+            ]
+        },
+        {
+            "input": "&olcir",
+            "description": "Bad named entity: olcir without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&olcir"
+                ]
+            ]
+        },
+        {
+            "input": "&olcir;",
+            "description": "Named entity: olcir; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29be"
+                ]
+            ]
+        },
+        {
+            "input": "&olcross",
+            "description": "Bad named entity: olcross without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&olcross"
+                ]
+            ]
+        },
+        {
+            "input": "&olcross;",
+            "description": "Named entity: olcross; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29bb"
+                ]
+            ]
+        },
+        {
+            "input": "&oline",
+            "description": "Bad named entity: oline without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&oline"
+                ]
+            ]
+        },
+        {
+            "input": "&oline;",
+            "description": "Named entity: oline; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u203e"
+                ]
+            ]
+        },
+        {
+            "input": "&olt",
+            "description": "Bad named entity: olt without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&olt"
+                ]
+            ]
+        },
+        {
+            "input": "&olt;",
+            "description": "Named entity: olt; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29c0"
+                ]
+            ]
+        },
+        {
+            "input": "&omacr",
+            "description": "Bad named entity: omacr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&omacr"
+                ]
+            ]
+        },
+        {
+            "input": "&omacr;",
+            "description": "Named entity: omacr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u014d"
+                ]
+            ]
+        },
+        {
+            "input": "&omega",
+            "description": "Bad named entity: omega without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&omega"
+                ]
+            ]
+        },
+        {
+            "input": "&omega;",
+            "description": "Named entity: omega; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03c9"
+                ]
+            ]
+        },
+        {
+            "input": "&omicron",
+            "description": "Bad named entity: omicron without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&omicron"
+                ]
+            ]
+        },
+        {
+            "input": "&omicron;",
+            "description": "Named entity: omicron; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03bf"
+                ]
+            ]
+        },
+        {
+            "input": "&omid",
+            "description": "Bad named entity: omid without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&omid"
+                ]
+            ]
+        },
+        {
+            "input": "&omid;",
+            "description": "Named entity: omid; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29b6"
+                ]
+            ]
+        },
+        {
+            "input": "&ominus",
+            "description": "Bad named entity: ominus without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ominus"
+                ]
+            ]
+        },
+        {
+            "input": "&ominus;",
+            "description": "Named entity: ominus; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2296"
+                ]
+            ]
+        },
+        {
+            "input": "&oopf",
+            "description": "Bad named entity: oopf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&oopf"
+                ]
+            ]
+        },
+        {
+            "input": "&oopf;",
+            "description": "Named entity: oopf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd60"
+                ]
+            ]
+        },
+        {
+            "input": "&opar",
+            "description": "Bad named entity: opar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&opar"
+                ]
+            ]
+        },
+        {
+            "input": "&opar;",
+            "description": "Named entity: opar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29b7"
+                ]
+            ]
+        },
+        {
+            "input": "&operp",
+            "description": "Bad named entity: operp without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&operp"
+                ]
+            ]
+        },
+        {
+            "input": "&operp;",
+            "description": "Named entity: operp; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29b9"
+                ]
+            ]
+        },
+        {
+            "input": "&oplus",
+            "description": "Bad named entity: oplus without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&oplus"
+                ]
+            ]
+        },
+        {
+            "input": "&oplus;",
+            "description": "Named entity: oplus; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2295"
+                ]
+            ]
+        },
+        {
+            "input": "&or",
+            "description": "Bad named entity: or without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&or"
+                ]
+            ]
+        },
+        {
+            "input": "&or;",
+            "description": "Named entity: or; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2228"
+                ]
+            ]
+        },
+        {
+            "input": "&orarr",
+            "description": "Bad named entity: orarr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&orarr"
+                ]
+            ]
+        },
+        {
+            "input": "&orarr;",
+            "description": "Named entity: orarr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21bb"
+                ]
+            ]
+        },
+        {
+            "input": "&ord",
+            "description": "Bad named entity: ord without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ord"
+                ]
+            ]
+        },
+        {
+            "input": "&ord;",
+            "description": "Named entity: ord; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a5d"
+                ]
+            ]
+        },
+        {
+            "input": "&order",
+            "description": "Bad named entity: order without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&order"
+                ]
+            ]
+        },
+        {
+            "input": "&order;",
+            "description": "Named entity: order; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2134"
+                ]
+            ]
+        },
+        {
+            "input": "&orderof",
+            "description": "Bad named entity: orderof without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&orderof"
+                ]
+            ]
+        },
+        {
+            "input": "&orderof;",
+            "description": "Named entity: orderof; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2134"
+                ]
+            ]
+        },
+        {
+            "input": "&ordf",
+            "description": "Named entity: ordf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00aa"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 6 }
+            ]
+        },
+        {
+            "input": "&ordf;",
+            "description": "Named entity: ordf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00aa"
+                ]
+            ]
+        },
+        {
+            "input": "&ordm",
+            "description": "Named entity: ordm without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00ba"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 6 }
+            ]
+        },
+        {
+            "input": "&ordm;",
+            "description": "Named entity: ordm; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00ba"
+                ]
+            ]
+        },
+        {
+            "input": "&origof",
+            "description": "Bad named entity: origof without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&origof"
+                ]
+            ]
+        },
+        {
+            "input": "&origof;",
+            "description": "Named entity: origof; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22b6"
+                ]
+            ]
+        },
+        {
+            "input": "&oror",
+            "description": "Bad named entity: oror without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&oror"
+                ]
+            ]
+        },
+        {
+            "input": "&oror;",
+            "description": "Named entity: oror; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a56"
+                ]
+            ]
+        },
+        {
+            "input": "&orslope",
+            "description": "Bad named entity: orslope without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&orslope"
+                ]
+            ]
+        },
+        {
+            "input": "&orslope;",
+            "description": "Named entity: orslope; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a57"
+                ]
+            ]
+        },
+        {
+            "input": "&orv",
+            "description": "Bad named entity: orv without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&orv"
+                ]
+            ]
+        },
+        {
+            "input": "&orv;",
+            "description": "Named entity: orv; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a5b"
+                ]
+            ]
+        },
+        {
+            "input": "&oscr",
+            "description": "Bad named entity: oscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&oscr"
+                ]
+            ]
+        },
+        {
+            "input": "&oscr;",
+            "description": "Named entity: oscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2134"
+                ]
+            ]
+        },
+        {
+            "input": "&oslash",
+            "description": "Named entity: oslash without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00f8"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 8 }
+            ]
+        },
+        {
+            "input": "&oslash;",
+            "description": "Named entity: oslash; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00f8"
+                ]
+            ]
+        },
+        {
+            "input": "&osol",
+            "description": "Bad named entity: osol without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&osol"
+                ]
+            ]
+        },
+        {
+            "input": "&osol;",
+            "description": "Named entity: osol; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2298"
+                ]
+            ]
+        },
+        {
+            "input": "&otilde",
+            "description": "Named entity: otilde without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00f5"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 8 }
+            ]
+        },
+        {
+            "input": "&otilde;",
+            "description": "Named entity: otilde; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00f5"
+                ]
+            ]
+        },
+        {
+            "input": "&otimes",
+            "description": "Bad named entity: otimes without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&otimes"
+                ]
+            ]
+        },
+        {
+            "input": "&otimes;",
+            "description": "Named entity: otimes; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2297"
+                ]
+            ]
+        },
+        {
+            "input": "&otimesas",
+            "description": "Bad named entity: otimesas without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&otimesas"
+                ]
+            ]
+        },
+        {
+            "input": "&otimesas;",
+            "description": "Named entity: otimesas; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a36"
+                ]
+            ]
+        },
+        {
+            "input": "&ouml",
+            "description": "Named entity: ouml without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00f6"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 6 }
+            ]
+        },
+        {
+            "input": "&ouml;",
+            "description": "Named entity: ouml; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00f6"
+                ]
+            ]
+        },
+        {
+            "input": "&ovbar",
+            "description": "Bad named entity: ovbar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ovbar"
+                ]
+            ]
+        },
+        {
+            "input": "&ovbar;",
+            "description": "Named entity: ovbar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u233d"
+                ]
+            ]
+        },
+        {
+            "input": "&par",
+            "description": "Bad named entity: par without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&par"
+                ]
+            ]
+        },
+        {
+            "input": "&par;",
+            "description": "Named entity: par; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2225"
+                ]
+            ]
+        },
+        {
+            "input": "&para",
+            "description": "Named entity: para without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00b6"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 6 }
+            ]
+        },
+        {
+            "input": "&para;",
+            "description": "Named entity: para; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00b6"
+                ]
+            ]
+        },
+        {
+            "input": "&parallel;",
+            "description": "Named entity: parallel; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2225"
+                ]
+            ]
+        },
+        {
+            "input": "&parsim",
+            "description": "Bad named entity: parsim without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&parsim"
+                ]
+            ]
+        },
+        {
+            "input": "&parsim;",
+            "description": "Named entity: parsim; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2af3"
+                ]
+            ]
+        },
+        {
+            "input": "&parsl",
+            "description": "Bad named entity: parsl without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&parsl"
+                ]
+            ]
+        },
+        {
+            "input": "&parsl;",
+            "description": "Named entity: parsl; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2afd"
+                ]
+            ]
+        },
+        {
+            "input": "&part",
+            "description": "Bad named entity: part without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&part"
+                ]
+            ]
+        },
+        {
+            "input": "&part;",
+            "description": "Named entity: part; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2202"
+                ]
+            ]
+        },
+        {
+            "input": "&pcy",
+            "description": "Bad named entity: pcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&pcy"
+                ]
+            ]
+        },
+        {
+            "input": "&pcy;",
+            "description": "Named entity: pcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u043f"
+                ]
+            ]
+        },
+        {
+            "input": "&percnt",
+            "description": "Bad named entity: percnt without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&percnt"
+                ]
+            ]
+        },
+        {
+            "input": "&percnt;",
+            "description": "Named entity: percnt; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "%"
+                ]
+            ]
+        },
+        {
+            "input": "&period",
+            "description": "Bad named entity: period without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&period"
+                ]
+            ]
+        },
+        {
+            "input": "&period;",
+            "description": "Named entity: period; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "."
+                ]
+            ]
+        },
+        {
+            "input": "&permil",
+            "description": "Bad named entity: permil without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&permil"
+                ]
+            ]
+        },
+        {
+            "input": "&permil;",
+            "description": "Named entity: permil; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2030"
+                ]
+            ]
+        },
+        {
+            "input": "&perp",
+            "description": "Bad named entity: perp without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&perp"
+                ]
+            ]
+        },
+        {
+            "input": "&perp;",
+            "description": "Named entity: perp; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22a5"
+                ]
+            ]
+        },
+        {
+            "input": "&pertenk",
+            "description": "Bad named entity: pertenk without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&pertenk"
+                ]
+            ]
+        },
+        {
+            "input": "&pertenk;",
+            "description": "Named entity: pertenk; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2031"
+                ]
+            ]
+        },
+        {
+            "input": "&pfr",
+            "description": "Bad named entity: pfr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&pfr"
+                ]
+            ]
+        },
+        {
+            "input": "&pfr;",
+            "description": "Named entity: pfr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd2d"
+                ]
+            ]
+        },
+        {
+            "input": "&phi",
+            "description": "Bad named entity: phi without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&phi"
+                ]
+            ]
+        },
+        {
+            "input": "&phi;",
+            "description": "Named entity: phi; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03c6"
+                ]
+            ]
+        },
+        {
+            "input": "&phiv",
+            "description": "Bad named entity: phiv without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&phiv"
+                ]
+            ]
+        },
+        {
+            "input": "&phiv;",
+            "description": "Named entity: phiv; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03d5"
+                ]
+            ]
+        },
+        {
+            "input": "&phmmat",
+            "description": "Bad named entity: phmmat without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&phmmat"
+                ]
+            ]
+        },
+        {
+            "input": "&phmmat;",
+            "description": "Named entity: phmmat; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2133"
+                ]
+            ]
+        },
+        {
+            "input": "&phone",
+            "description": "Bad named entity: phone without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&phone"
+                ]
+            ]
+        },
+        {
+            "input": "&phone;",
+            "description": "Named entity: phone; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u260e"
+                ]
+            ]
+        },
+        {
+            "input": "&pi",
+            "description": "Bad named entity: pi without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&pi"
+                ]
+            ]
+        },
+        {
+            "input": "&pi;",
+            "description": "Named entity: pi; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03c0"
+                ]
+            ]
+        },
+        {
+            "input": "&pitchfork",
+            "description": "Bad named entity: pitchfork without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&pitchfork"
+                ]
+            ]
+        },
+        {
+            "input": "&pitchfork;",
+            "description": "Named entity: pitchfork; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22d4"
+                ]
+            ]
+        },
+        {
+            "input": "&piv",
+            "description": "Bad named entity: piv without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&piv"
+                ]
+            ]
+        },
+        {
+            "input": "&piv;",
+            "description": "Named entity: piv; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03d6"
+                ]
+            ]
+        },
+        {
+            "input": "&planck",
+            "description": "Bad named entity: planck without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&planck"
+                ]
+            ]
+        },
+        {
+            "input": "&planck;",
+            "description": "Named entity: planck; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u210f"
+                ]
+            ]
+        },
+        {
+            "input": "&planckh",
+            "description": "Bad named entity: planckh without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&planckh"
+                ]
+            ]
+        },
+        {
+            "input": "&planckh;",
+            "description": "Named entity: planckh; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u210e"
+                ]
+            ]
+        },
+        {
+            "input": "&plankv",
+            "description": "Bad named entity: plankv without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&plankv"
+                ]
+            ]
+        },
+        {
+            "input": "&plankv;",
+            "description": "Named entity: plankv; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u210f"
+                ]
+            ]
+        },
+        {
+            "input": "&plus",
+            "description": "Bad named entity: plus without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&plus"
+                ]
+            ]
+        },
+        {
+            "input": "&plus;",
+            "description": "Named entity: plus; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "+"
+                ]
+            ]
+        },
+        {
+            "input": "&plusacir",
+            "description": "Bad named entity: plusacir without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&plusacir"
+                ]
+            ]
+        },
+        {
+            "input": "&plusacir;",
+            "description": "Named entity: plusacir; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a23"
+                ]
+            ]
+        },
+        {
+            "input": "&plusb",
+            "description": "Bad named entity: plusb without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&plusb"
+                ]
+            ]
+        },
+        {
+            "input": "&plusb;",
+            "description": "Named entity: plusb; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u229e"
+                ]
+            ]
+        },
+        {
+            "input": "&pluscir",
+            "description": "Bad named entity: pluscir without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&pluscir"
+                ]
+            ]
+        },
+        {
+            "input": "&pluscir;",
+            "description": "Named entity: pluscir; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a22"
+                ]
+            ]
+        },
+        {
+            "input": "&plusdo",
+            "description": "Bad named entity: plusdo without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&plusdo"
+                ]
+            ]
+        },
+        {
+            "input": "&plusdo;",
+            "description": "Named entity: plusdo; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2214"
+                ]
+            ]
+        },
+        {
+            "input": "&plusdu",
+            "description": "Bad named entity: plusdu without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&plusdu"
+                ]
+            ]
+        },
+        {
+            "input": "&plusdu;",
+            "description": "Named entity: plusdu; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a25"
+                ]
+            ]
+        },
+        {
+            "input": "&pluse",
+            "description": "Bad named entity: pluse without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&pluse"
+                ]
+            ]
+        },
+        {
+            "input": "&pluse;",
+            "description": "Named entity: pluse; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a72"
+                ]
+            ]
+        },
+        {
+            "input": "&plusmn",
+            "description": "Named entity: plusmn without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00b1"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 8 }
+            ]
+        },
+        {
+            "input": "&plusmn;",
+            "description": "Named entity: plusmn; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00b1"
+                ]
+            ]
+        },
+        {
+            "input": "&plussim",
+            "description": "Bad named entity: plussim without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&plussim"
+                ]
+            ]
+        },
+        {
+            "input": "&plussim;",
+            "description": "Named entity: plussim; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a26"
+                ]
+            ]
+        },
+        {
+            "input": "&plustwo",
+            "description": "Bad named entity: plustwo without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&plustwo"
+                ]
+            ]
+        },
+        {
+            "input": "&plustwo;",
+            "description": "Named entity: plustwo; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a27"
+                ]
+            ]
+        },
+        {
+            "input": "&pm",
+            "description": "Bad named entity: pm without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&pm"
+                ]
+            ]
+        },
+        {
+            "input": "&pm;",
+            "description": "Named entity: pm; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00b1"
+                ]
+            ]
+        },
+        {
+            "input": "&pointint",
+            "description": "Bad named entity: pointint without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&pointint"
+                ]
+            ]
+        },
+        {
+            "input": "&pointint;",
+            "description": "Named entity: pointint; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a15"
+                ]
+            ]
+        },
+        {
+            "input": "&popf",
+            "description": "Bad named entity: popf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&popf"
+                ]
+            ]
+        },
+        {
+            "input": "&popf;",
+            "description": "Named entity: popf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd61"
+                ]
+            ]
+        },
+        {
+            "input": "&pound",
+            "description": "Named entity: pound without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00a3"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 7 }
+            ]
+        },
+        {
+            "input": "&pound;",
+            "description": "Named entity: pound; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00a3"
+                ]
+            ]
+        },
+        {
+            "input": "&pr",
+            "description": "Bad named entity: pr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&pr"
+                ]
+            ]
+        },
+        {
+            "input": "&pr;",
+            "description": "Named entity: pr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u227a"
+                ]
+            ]
+        },
+        {
+            "input": "&prE",
+            "description": "Bad named entity: prE without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&prE"
+                ]
+            ]
+        },
+        {
+            "input": "&prE;",
+            "description": "Named entity: prE; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ab3"
+                ]
+            ]
+        },
+        {
+            "input": "&prap",
+            "description": "Bad named entity: prap without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&prap"
+                ]
+            ]
+        },
+        {
+            "input": "&prap;",
+            "description": "Named entity: prap; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ab7"
+                ]
+            ]
+        },
+        {
+            "input": "&prcue",
+            "description": "Bad named entity: prcue without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&prcue"
+                ]
+            ]
+        },
+        {
+            "input": "&prcue;",
+            "description": "Named entity: prcue; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u227c"
+                ]
+            ]
+        },
+        {
+            "input": "&pre",
+            "description": "Bad named entity: pre without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&pre"
+                ]
+            ]
+        },
+        {
+            "input": "&pre;",
+            "description": "Named entity: pre; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2aaf"
+                ]
+            ]
+        },
+        {
+            "input": "&prec",
+            "description": "Bad named entity: prec without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&prec"
+                ]
+            ]
+        },
+        {
+            "input": "&prec;",
+            "description": "Named entity: prec; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u227a"
+                ]
+            ]
+        },
+        {
+            "input": "&precapprox",
+            "description": "Bad named entity: precapprox without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&precapprox"
+                ]
+            ]
+        },
+        {
+            "input": "&precapprox;",
+            "description": "Named entity: precapprox; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ab7"
+                ]
+            ]
+        },
+        {
+            "input": "&preccurlyeq",
+            "description": "Bad named entity: preccurlyeq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&preccurlyeq"
+                ]
+            ]
+        },
+        {
+            "input": "&preccurlyeq;",
+            "description": "Named entity: preccurlyeq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u227c"
+                ]
+            ]
+        },
+        {
+            "input": "&preceq",
+            "description": "Bad named entity: preceq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&preceq"
+                ]
+            ]
+        },
+        {
+            "input": "&preceq;",
+            "description": "Named entity: preceq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2aaf"
+                ]
+            ]
+        },
+        {
+            "input": "&precnapprox",
+            "description": "Bad named entity: precnapprox without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&precnapprox"
+                ]
+            ]
+        },
+        {
+            "input": "&precnapprox;",
+            "description": "Named entity: precnapprox; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ab9"
+                ]
+            ]
+        },
+        {
+            "input": "&precneqq",
+            "description": "Bad named entity: precneqq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&precneqq"
+                ]
+            ]
+        },
+        {
+            "input": "&precneqq;",
+            "description": "Named entity: precneqq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ab5"
+                ]
+            ]
+        },
+        {
+            "input": "&precnsim",
+            "description": "Bad named entity: precnsim without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&precnsim"
+                ]
+            ]
+        },
+        {
+            "input": "&precnsim;",
+            "description": "Named entity: precnsim; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22e8"
+                ]
+            ]
+        },
+        {
+            "input": "&precsim",
+            "description": "Bad named entity: precsim without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&precsim"
+                ]
+            ]
+        },
+        {
+            "input": "&precsim;",
+            "description": "Named entity: precsim; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u227e"
+                ]
+            ]
+        },
+        {
+            "input": "&prime",
+            "description": "Bad named entity: prime without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&prime"
+                ]
+            ]
+        },
+        {
+            "input": "&prime;",
+            "description": "Named entity: prime; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2032"
+                ]
+            ]
+        },
+        {
+            "input": "&primes",
+            "description": "Bad named entity: primes without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&primes"
+                ]
+            ]
+        },
+        {
+            "input": "&primes;",
+            "description": "Named entity: primes; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2119"
+                ]
+            ]
+        },
+        {
+            "input": "&prnE",
+            "description": "Bad named entity: prnE without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&prnE"
+                ]
+            ]
+        },
+        {
+            "input": "&prnE;",
+            "description": "Named entity: prnE; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ab5"
+                ]
+            ]
+        },
+        {
+            "input": "&prnap",
+            "description": "Bad named entity: prnap without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&prnap"
+                ]
+            ]
+        },
+        {
+            "input": "&prnap;",
+            "description": "Named entity: prnap; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ab9"
+                ]
+            ]
+        },
+        {
+            "input": "&prnsim",
+            "description": "Bad named entity: prnsim without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&prnsim"
+                ]
+            ]
+        },
+        {
+            "input": "&prnsim;",
+            "description": "Named entity: prnsim; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22e8"
+                ]
+            ]
+        },
+        {
+            "input": "&prod",
+            "description": "Bad named entity: prod without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&prod"
+                ]
+            ]
+        },
+        {
+            "input": "&prod;",
+            "description": "Named entity: prod; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u220f"
+                ]
+            ]
+        },
+        {
+            "input": "&profalar",
+            "description": "Bad named entity: profalar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&profalar"
+                ]
+            ]
+        },
+        {
+            "input": "&profalar;",
+            "description": "Named entity: profalar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u232e"
+                ]
+            ]
+        },
+        {
+            "input": "&profline",
+            "description": "Bad named entity: profline without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&profline"
+                ]
+            ]
+        },
+        {
+            "input": "&profline;",
+            "description": "Named entity: profline; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2312"
+                ]
+            ]
+        },
+        {
+            "input": "&profsurf",
+            "description": "Bad named entity: profsurf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&profsurf"
+                ]
+            ]
+        },
+        {
+            "input": "&profsurf;",
+            "description": "Named entity: profsurf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2313"
+                ]
+            ]
+        },
+        {
+            "input": "&prop",
+            "description": "Bad named entity: prop without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&prop"
+                ]
+            ]
+        },
+        {
+            "input": "&prop;",
+            "description": "Named entity: prop; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u221d"
+                ]
+            ]
+        },
+        {
+            "input": "&propto",
+            "description": "Bad named entity: propto without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&propto"
+                ]
+            ]
+        },
+        {
+            "input": "&propto;",
+            "description": "Named entity: propto; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u221d"
+                ]
+            ]
+        },
+        {
+            "input": "&prsim",
+            "description": "Bad named entity: prsim without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&prsim"
+                ]
+            ]
+        },
+        {
+            "input": "&prsim;",
+            "description": "Named entity: prsim; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u227e"
+                ]
+            ]
+        },
+        {
+            "input": "&prurel",
+            "description": "Bad named entity: prurel without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&prurel"
+                ]
+            ]
+        },
+        {
+            "input": "&prurel;",
+            "description": "Named entity: prurel; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22b0"
+                ]
+            ]
+        },
+        {
+            "input": "&pscr",
+            "description": "Bad named entity: pscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&pscr"
+                ]
+            ]
+        },
+        {
+            "input": "&pscr;",
+            "description": "Named entity: pscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udcc5"
+                ]
+            ]
+        },
+        {
+            "input": "&psi",
+            "description": "Bad named entity: psi without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&psi"
+                ]
+            ]
+        },
+        {
+            "input": "&psi;",
+            "description": "Named entity: psi; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03c8"
+                ]
+            ]
+        },
+        {
+            "input": "&puncsp",
+            "description": "Bad named entity: puncsp without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&puncsp"
+                ]
+            ]
+        },
+        {
+            "input": "&puncsp;",
+            "description": "Named entity: puncsp; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2008"
+                ]
+            ]
+        },
+        {
+            "input": "&qfr",
+            "description": "Bad named entity: qfr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&qfr"
+                ]
+            ]
+        },
+        {
+            "input": "&qfr;",
+            "description": "Named entity: qfr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd2e"
+                ]
+            ]
+        },
+        {
+            "input": "&qint",
+            "description": "Bad named entity: qint without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&qint"
+                ]
+            ]
+        },
+        {
+            "input": "&qint;",
+            "description": "Named entity: qint; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a0c"
+                ]
+            ]
+        },
+        {
+            "input": "&qopf",
+            "description": "Bad named entity: qopf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&qopf"
+                ]
+            ]
+        },
+        {
+            "input": "&qopf;",
+            "description": "Named entity: qopf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd62"
+                ]
+            ]
+        },
+        {
+            "input": "&qprime",
+            "description": "Bad named entity: qprime without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&qprime"
+                ]
+            ]
+        },
+        {
+            "input": "&qprime;",
+            "description": "Named entity: qprime; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2057"
+                ]
+            ]
+        },
+        {
+            "input": "&qscr",
+            "description": "Bad named entity: qscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&qscr"
+                ]
+            ]
+        },
+        {
+            "input": "&qscr;",
+            "description": "Named entity: qscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udcc6"
+                ]
+            ]
+        },
+        {
+            "input": "&quaternions",
+            "description": "Bad named entity: quaternions without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&quaternions"
+                ]
+            ]
+        },
+        {
+            "input": "&quaternions;",
+            "description": "Named entity: quaternions; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u210d"
+                ]
+            ]
+        },
+        {
+            "input": "&quatint",
+            "description": "Bad named entity: quatint without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&quatint"
+                ]
+            ]
+        },
+        {
+            "input": "&quatint;",
+            "description": "Named entity: quatint; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a16"
+                ]
+            ]
+        },
+        {
+            "input": "&quest",
+            "description": "Bad named entity: quest without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&quest"
+                ]
+            ]
+        },
+        {
+            "input": "&quest;",
+            "description": "Named entity: quest; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "?"
+                ]
+            ]
+        },
+        {
+            "input": "&questeq",
+            "description": "Bad named entity: questeq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&questeq"
+                ]
+            ]
+        },
+        {
+            "input": "&questeq;",
+            "description": "Named entity: questeq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u225f"
+                ]
+            ]
+        },
+        {
+            "input": "&quot",
+            "description": "Named entity: quot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\""
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 6 }
+            ]
+        },
+        {
+            "input": "&quot;",
+            "description": "Named entity: quot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\""
+                ]
+            ]
+        },
+        {
+            "input": "&rAarr",
+            "description": "Bad named entity: rAarr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rAarr"
+                ]
+            ]
+        },
+        {
+            "input": "&rAarr;",
+            "description": "Named entity: rAarr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21db"
+                ]
+            ]
+        },
+        {
+            "input": "&rArr",
+            "description": "Bad named entity: rArr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rArr"
+                ]
+            ]
+        },
+        {
+            "input": "&rArr;",
+            "description": "Named entity: rArr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21d2"
+                ]
+            ]
+        },
+        {
+            "input": "&rAtail",
+            "description": "Bad named entity: rAtail without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rAtail"
+                ]
+            ]
+        },
+        {
+            "input": "&rAtail;",
+            "description": "Named entity: rAtail; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u291c"
+                ]
+            ]
+        },
+        {
+            "input": "&rBarr",
+            "description": "Bad named entity: rBarr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rBarr"
+                ]
+            ]
+        },
+        {
+            "input": "&rBarr;",
+            "description": "Named entity: rBarr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u290f"
+                ]
+            ]
+        },
+        {
+            "input": "&rHar",
+            "description": "Bad named entity: rHar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rHar"
+                ]
+            ]
+        },
+        {
+            "input": "&rHar;",
+            "description": "Named entity: rHar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2964"
+                ]
+            ]
+        },
+        {
+            "input": "&race",
+            "description": "Bad named entity: race without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&race"
+                ]
+            ]
+        },
+        {
+            "input": "&race;",
+            "description": "Named entity: race; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u223d\u0331"
+                ]
+            ]
+        },
+        {
+            "input": "&racute",
+            "description": "Bad named entity: racute without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&racute"
+                ]
+            ]
+        },
+        {
+            "input": "&racute;",
+            "description": "Named entity: racute; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0155"
+                ]
+            ]
+        },
+        {
+            "input": "&radic",
+            "description": "Bad named entity: radic without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&radic"
+                ]
+            ]
+        },
+        {
+            "input": "&radic;",
+            "description": "Named entity: radic; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u221a"
+                ]
+            ]
+        },
+        {
+            "input": "&raemptyv",
+            "description": "Bad named entity: raemptyv without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&raemptyv"
+                ]
+            ]
+        },
+        {
+            "input": "&raemptyv;",
+            "description": "Named entity: raemptyv; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29b3"
+                ]
+            ]
+        },
+        {
+            "input": "&rang",
+            "description": "Bad named entity: rang without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rang"
+                ]
+            ]
+        },
+        {
+            "input": "&rang;",
+            "description": "Named entity: rang; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u27e9"
+                ]
+            ]
+        },
+        {
+            "input": "&rangd",
+            "description": "Bad named entity: rangd without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rangd"
+                ]
+            ]
+        },
+        {
+            "input": "&rangd;",
+            "description": "Named entity: rangd; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2992"
+                ]
+            ]
+        },
+        {
+            "input": "&range",
+            "description": "Bad named entity: range without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&range"
+                ]
+            ]
+        },
+        {
+            "input": "&range;",
+            "description": "Named entity: range; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29a5"
+                ]
+            ]
+        },
+        {
+            "input": "&rangle",
+            "description": "Bad named entity: rangle without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rangle"
+                ]
+            ]
+        },
+        {
+            "input": "&rangle;",
+            "description": "Named entity: rangle; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u27e9"
+                ]
+            ]
+        },
+        {
+            "input": "&raquo",
+            "description": "Named entity: raquo without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00bb"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 7 }
+            ]
+        },
+        {
+            "input": "&raquo;",
+            "description": "Named entity: raquo; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00bb"
+                ]
+            ]
+        },
+        {
+            "input": "&rarr",
+            "description": "Bad named entity: rarr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rarr"
+                ]
+            ]
+        },
+        {
+            "input": "&rarr;",
+            "description": "Named entity: rarr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2192"
+                ]
+            ]
+        },
+        {
+            "input": "&rarrap",
+            "description": "Bad named entity: rarrap without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rarrap"
+                ]
+            ]
+        },
+        {
+            "input": "&rarrap;",
+            "description": "Named entity: rarrap; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2975"
+                ]
+            ]
+        },
+        {
+            "input": "&rarrb",
+            "description": "Bad named entity: rarrb without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rarrb"
+                ]
+            ]
+        },
+        {
+            "input": "&rarrb;",
+            "description": "Named entity: rarrb; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21e5"
+                ]
+            ]
+        },
+        {
+            "input": "&rarrbfs",
+            "description": "Bad named entity: rarrbfs without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rarrbfs"
+                ]
+            ]
+        },
+        {
+            "input": "&rarrbfs;",
+            "description": "Named entity: rarrbfs; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2920"
+                ]
+            ]
+        },
+        {
+            "input": "&rarrc",
+            "description": "Bad named entity: rarrc without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rarrc"
+                ]
+            ]
+        },
+        {
+            "input": "&rarrc;",
+            "description": "Named entity: rarrc; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2933"
+                ]
+            ]
+        },
+        {
+            "input": "&rarrfs",
+            "description": "Bad named entity: rarrfs without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rarrfs"
+                ]
+            ]
+        },
+        {
+            "input": "&rarrfs;",
+            "description": "Named entity: rarrfs; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u291e"
+                ]
+            ]
+        },
+        {
+            "input": "&rarrhk",
+            "description": "Bad named entity: rarrhk without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rarrhk"
+                ]
+            ]
+        },
+        {
+            "input": "&rarrhk;",
+            "description": "Named entity: rarrhk; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21aa"
+                ]
+            ]
+        },
+        {
+            "input": "&rarrlp",
+            "description": "Bad named entity: rarrlp without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rarrlp"
+                ]
+            ]
+        },
+        {
+            "input": "&rarrlp;",
+            "description": "Named entity: rarrlp; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21ac"
+                ]
+            ]
+        },
+        {
+            "input": "&rarrpl",
+            "description": "Bad named entity: rarrpl without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rarrpl"
+                ]
+            ]
+        },
+        {
+            "input": "&rarrpl;",
+            "description": "Named entity: rarrpl; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2945"
+                ]
+            ]
+        },
+        {
+            "input": "&rarrsim",
+            "description": "Bad named entity: rarrsim without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rarrsim"
+                ]
+            ]
+        },
+        {
+            "input": "&rarrsim;",
+            "description": "Named entity: rarrsim; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2974"
+                ]
+            ]
+        },
+        {
+            "input": "&rarrtl",
+            "description": "Bad named entity: rarrtl without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rarrtl"
+                ]
+            ]
+        },
+        {
+            "input": "&rarrtl;",
+            "description": "Named entity: rarrtl; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21a3"
+                ]
+            ]
+        },
+        {
+            "input": "&rarrw",
+            "description": "Bad named entity: rarrw without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rarrw"
+                ]
+            ]
+        },
+        {
+            "input": "&rarrw;",
+            "description": "Named entity: rarrw; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u219d"
+                ]
+            ]
+        },
+        {
+            "input": "&ratail",
+            "description": "Bad named entity: ratail without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ratail"
+                ]
+            ]
+        },
+        {
+            "input": "&ratail;",
+            "description": "Named entity: ratail; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u291a"
+                ]
+            ]
+        },
+        {
+            "input": "&ratio",
+            "description": "Bad named entity: ratio without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ratio"
+                ]
+            ]
+        },
+        {
+            "input": "&ratio;",
+            "description": "Named entity: ratio; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2236"
+                ]
+            ]
+        },
+        {
+            "input": "&rationals",
+            "description": "Bad named entity: rationals without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rationals"
+                ]
+            ]
+        },
+        {
+            "input": "&rationals;",
+            "description": "Named entity: rationals; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u211a"
+                ]
+            ]
+        },
+        {
+            "input": "&rbarr",
+            "description": "Bad named entity: rbarr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rbarr"
+                ]
+            ]
+        },
+        {
+            "input": "&rbarr;",
+            "description": "Named entity: rbarr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u290d"
+                ]
+            ]
+        },
+        {
+            "input": "&rbbrk",
+            "description": "Bad named entity: rbbrk without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rbbrk"
+                ]
+            ]
+        },
+        {
+            "input": "&rbbrk;",
+            "description": "Named entity: rbbrk; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2773"
+                ]
+            ]
+        },
+        {
+            "input": "&rbrace",
+            "description": "Bad named entity: rbrace without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rbrace"
+                ]
+            ]
+        },
+        {
+            "input": "&rbrace;",
+            "description": "Named entity: rbrace; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "}"
+                ]
+            ]
+        },
+        {
+            "input": "&rbrack",
+            "description": "Bad named entity: rbrack without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rbrack"
+                ]
+            ]
+        },
+        {
+            "input": "&rbrack;",
+            "description": "Named entity: rbrack; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "]"
+                ]
+            ]
+        },
+        {
+            "input": "&rbrke",
+            "description": "Bad named entity: rbrke without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rbrke"
+                ]
+            ]
+        },
+        {
+            "input": "&rbrke;",
+            "description": "Named entity: rbrke; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u298c"
+                ]
+            ]
+        },
+        {
+            "input": "&rbrksld",
+            "description": "Bad named entity: rbrksld without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rbrksld"
+                ]
+            ]
+        },
+        {
+            "input": "&rbrksld;",
+            "description": "Named entity: rbrksld; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u298e"
+                ]
+            ]
+        },
+        {
+            "input": "&rbrkslu",
+            "description": "Bad named entity: rbrkslu without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rbrkslu"
+                ]
+            ]
+        },
+        {
+            "input": "&rbrkslu;",
+            "description": "Named entity: rbrkslu; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2990"
+                ]
+            ]
+        },
+        {
+            "input": "&rcaron",
+            "description": "Bad named entity: rcaron without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rcaron"
+                ]
+            ]
+        },
+        {
+            "input": "&rcaron;",
+            "description": "Named entity: rcaron; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0159"
+                ]
+            ]
+        },
+        {
+            "input": "&rcedil",
+            "description": "Bad named entity: rcedil without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rcedil"
+                ]
+            ]
+        },
+        {
+            "input": "&rcedil;",
+            "description": "Named entity: rcedil; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0157"
+                ]
+            ]
+        },
+        {
+            "input": "&rceil",
+            "description": "Bad named entity: rceil without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rceil"
+                ]
+            ]
+        },
+        {
+            "input": "&rceil;",
+            "description": "Named entity: rceil; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2309"
+                ]
+            ]
+        },
+        {
+            "input": "&rcub",
+            "description": "Bad named entity: rcub without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rcub"
+                ]
+            ]
+        },
+        {
+            "input": "&rcub;",
+            "description": "Named entity: rcub; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "}"
+                ]
+            ]
+        },
+        {
+            "input": "&rcy",
+            "description": "Bad named entity: rcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rcy"
+                ]
+            ]
+        },
+        {
+            "input": "&rcy;",
+            "description": "Named entity: rcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0440"
+                ]
+            ]
+        },
+        {
+            "input": "&rdca",
+            "description": "Bad named entity: rdca without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rdca"
+                ]
+            ]
+        },
+        {
+            "input": "&rdca;",
+            "description": "Named entity: rdca; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2937"
+                ]
+            ]
+        },
+        {
+            "input": "&rdldhar",
+            "description": "Bad named entity: rdldhar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rdldhar"
+                ]
+            ]
+        },
+        {
+            "input": "&rdldhar;",
+            "description": "Named entity: rdldhar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2969"
+                ]
+            ]
+        },
+        {
+            "input": "&rdquo",
+            "description": "Bad named entity: rdquo without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rdquo"
+                ]
+            ]
+        },
+        {
+            "input": "&rdquo;",
+            "description": "Named entity: rdquo; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u201d"
+                ]
+            ]
+        },
+        {
+            "input": "&rdquor",
+            "description": "Bad named entity: rdquor without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rdquor"
+                ]
+            ]
+        },
+        {
+            "input": "&rdquor;",
+            "description": "Named entity: rdquor; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u201d"
+                ]
+            ]
+        },
+        {
+            "input": "&rdsh",
+            "description": "Bad named entity: rdsh without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rdsh"
+                ]
+            ]
+        },
+        {
+            "input": "&rdsh;",
+            "description": "Named entity: rdsh; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21b3"
+                ]
+            ]
+        },
+        {
+            "input": "&real",
+            "description": "Bad named entity: real without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&real"
+                ]
+            ]
+        },
+        {
+            "input": "&real;",
+            "description": "Named entity: real; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u211c"
+                ]
+            ]
+        },
+        {
+            "input": "&realine",
+            "description": "Bad named entity: realine without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&realine"
+                ]
+            ]
+        },
+        {
+            "input": "&realine;",
+            "description": "Named entity: realine; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u211b"
+                ]
+            ]
+        },
+        {
+            "input": "&realpart",
+            "description": "Bad named entity: realpart without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&realpart"
+                ]
+            ]
+        },
+        {
+            "input": "&realpart;",
+            "description": "Named entity: realpart; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u211c"
+                ]
+            ]
+        },
+        {
+            "input": "&reals",
+            "description": "Bad named entity: reals without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&reals"
+                ]
+            ]
+        },
+        {
+            "input": "&reals;",
+            "description": "Named entity: reals; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u211d"
+                ]
+            ]
+        },
+        {
+            "input": "&rect",
+            "description": "Bad named entity: rect without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rect"
+                ]
+            ]
+        },
+        {
+            "input": "&rect;",
+            "description": "Named entity: rect; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u25ad"
+                ]
+            ]
+        },
+        {
+            "input": "&reg",
+            "description": "Named entity: reg without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00ae"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 5 }
+            ]
+        },
+        {
+            "input": "&reg;",
+            "description": "Named entity: reg; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00ae"
+                ]
+            ]
+        },
+        {
+            "input": "&rfisht",
+            "description": "Bad named entity: rfisht without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rfisht"
+                ]
+            ]
+        },
+        {
+            "input": "&rfisht;",
+            "description": "Named entity: rfisht; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u297d"
+                ]
+            ]
+        },
+        {
+            "input": "&rfloor",
+            "description": "Bad named entity: rfloor without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rfloor"
+                ]
+            ]
+        },
+        {
+            "input": "&rfloor;",
+            "description": "Named entity: rfloor; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u230b"
+                ]
+            ]
+        },
+        {
+            "input": "&rfr",
+            "description": "Bad named entity: rfr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rfr"
+                ]
+            ]
+        },
+        {
+            "input": "&rfr;",
+            "description": "Named entity: rfr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd2f"
+                ]
+            ]
+        },
+        {
+            "input": "&rhard",
+            "description": "Bad named entity: rhard without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rhard"
+                ]
+            ]
+        },
+        {
+            "input": "&rhard;",
+            "description": "Named entity: rhard; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21c1"
+                ]
+            ]
+        },
+        {
+            "input": "&rharu",
+            "description": "Bad named entity: rharu without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rharu"
+                ]
+            ]
+        },
+        {
+            "input": "&rharu;",
+            "description": "Named entity: rharu; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21c0"
+                ]
+            ]
+        },
+        {
+            "input": "&rharul",
+            "description": "Bad named entity: rharul without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rharul"
+                ]
+            ]
+        },
+        {
+            "input": "&rharul;",
+            "description": "Named entity: rharul; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u296c"
+                ]
+            ]
+        },
+        {
+            "input": "&rho",
+            "description": "Bad named entity: rho without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rho"
+                ]
+            ]
+        },
+        {
+            "input": "&rho;",
+            "description": "Named entity: rho; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03c1"
+                ]
+            ]
+        },
+        {
+            "input": "&rhov",
+            "description": "Bad named entity: rhov without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rhov"
+                ]
+            ]
+        },
+        {
+            "input": "&rhov;",
+            "description": "Named entity: rhov; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03f1"
+                ]
+            ]
+        },
+        {
+            "input": "&rightarrow",
+            "description": "Bad named entity: rightarrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rightarrow"
+                ]
+            ]
+        },
+        {
+            "input": "&rightarrow;",
+            "description": "Named entity: rightarrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2192"
+                ]
+            ]
+        },
+        {
+            "input": "&rightarrowtail",
+            "description": "Bad named entity: rightarrowtail without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rightarrowtail"
+                ]
+            ]
+        },
+        {
+            "input": "&rightarrowtail;",
+            "description": "Named entity: rightarrowtail; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21a3"
+                ]
+            ]
+        },
+        {
+            "input": "&rightharpoondown",
+            "description": "Bad named entity: rightharpoondown without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rightharpoondown"
+                ]
+            ]
+        },
+        {
+            "input": "&rightharpoondown;",
+            "description": "Named entity: rightharpoondown; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21c1"
+                ]
+            ]
+        },
+        {
+            "input": "&rightharpoonup",
+            "description": "Bad named entity: rightharpoonup without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rightharpoonup"
+                ]
+            ]
+        },
+        {
+            "input": "&rightharpoonup;",
+            "description": "Named entity: rightharpoonup; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21c0"
+                ]
+            ]
+        },
+        {
+            "input": "&rightleftarrows",
+            "description": "Bad named entity: rightleftarrows without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rightleftarrows"
+                ]
+            ]
+        },
+        {
+            "input": "&rightleftarrows;",
+            "description": "Named entity: rightleftarrows; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21c4"
+                ]
+            ]
+        },
+        {
+            "input": "&rightleftharpoons",
+            "description": "Bad named entity: rightleftharpoons without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rightleftharpoons"
+                ]
+            ]
+        },
+        {
+            "input": "&rightleftharpoons;",
+            "description": "Named entity: rightleftharpoons; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21cc"
+                ]
+            ]
+        },
+        {
+            "input": "&rightrightarrows",
+            "description": "Bad named entity: rightrightarrows without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rightrightarrows"
+                ]
+            ]
+        },
+        {
+            "input": "&rightrightarrows;",
+            "description": "Named entity: rightrightarrows; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21c9"
+                ]
+            ]
+        },
+        {
+            "input": "&rightsquigarrow",
+            "description": "Bad named entity: rightsquigarrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rightsquigarrow"
+                ]
+            ]
+        },
+        {
+            "input": "&rightsquigarrow;",
+            "description": "Named entity: rightsquigarrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u219d"
+                ]
+            ]
+        },
+        {
+            "input": "&rightthreetimes",
+            "description": "Bad named entity: rightthreetimes without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rightthreetimes"
+                ]
+            ]
+        },
+        {
+            "input": "&rightthreetimes;",
+            "description": "Named entity: rightthreetimes; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22cc"
+                ]
+            ]
+        },
+        {
+            "input": "&ring",
+            "description": "Bad named entity: ring without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ring"
+                ]
+            ]
+        },
+        {
+            "input": "&ring;",
+            "description": "Named entity: ring; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u02da"
+                ]
+            ]
+        },
+        {
+            "input": "&risingdotseq",
+            "description": "Bad named entity: risingdotseq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&risingdotseq"
+                ]
+            ]
+        },
+        {
+            "input": "&risingdotseq;",
+            "description": "Named entity: risingdotseq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2253"
+                ]
+            ]
+        },
+        {
+            "input": "&rlarr",
+            "description": "Bad named entity: rlarr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rlarr"
+                ]
+            ]
+        },
+        {
+            "input": "&rlarr;",
+            "description": "Named entity: rlarr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21c4"
+                ]
+            ]
+        },
+        {
+            "input": "&rlhar",
+            "description": "Bad named entity: rlhar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rlhar"
+                ]
+            ]
+        },
+        {
+            "input": "&rlhar;",
+            "description": "Named entity: rlhar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21cc"
+                ]
+            ]
+        },
+        {
+            "input": "&rlm",
+            "description": "Bad named entity: rlm without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rlm"
+                ]
+            ]
+        },
+        {
+            "input": "&rlm;",
+            "description": "Named entity: rlm; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u200f"
+                ]
+            ]
+        },
+        {
+            "input": "&rmoust",
+            "description": "Bad named entity: rmoust without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rmoust"
+                ]
+            ]
+        },
+        {
+            "input": "&rmoust;",
+            "description": "Named entity: rmoust; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u23b1"
+                ]
+            ]
+        },
+        {
+            "input": "&rmoustache",
+            "description": "Bad named entity: rmoustache without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rmoustache"
+                ]
+            ]
+        },
+        {
+            "input": "&rmoustache;",
+            "description": "Named entity: rmoustache; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u23b1"
+                ]
+            ]
+        },
+        {
+            "input": "&rnmid",
+            "description": "Bad named entity: rnmid without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rnmid"
+                ]
+            ]
+        },
+        {
+            "input": "&rnmid;",
+            "description": "Named entity: rnmid; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2aee"
+                ]
+            ]
+        },
+        {
+            "input": "&roang",
+            "description": "Bad named entity: roang without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&roang"
+                ]
+            ]
+        },
+        {
+            "input": "&roang;",
+            "description": "Named entity: roang; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u27ed"
+                ]
+            ]
+        },
+        {
+            "input": "&roarr",
+            "description": "Bad named entity: roarr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&roarr"
+                ]
+            ]
+        },
+        {
+            "input": "&roarr;",
+            "description": "Named entity: roarr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21fe"
+                ]
+            ]
+        },
+        {
+            "input": "&robrk",
+            "description": "Bad named entity: robrk without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&robrk"
+                ]
+            ]
+        },
+        {
+            "input": "&robrk;",
+            "description": "Named entity: robrk; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u27e7"
+                ]
+            ]
+        },
+        {
+            "input": "&ropar",
+            "description": "Bad named entity: ropar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ropar"
+                ]
+            ]
+        },
+        {
+            "input": "&ropar;",
+            "description": "Named entity: ropar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2986"
+                ]
+            ]
+        },
+        {
+            "input": "&ropf",
+            "description": "Bad named entity: ropf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ropf"
+                ]
+            ]
+        },
+        {
+            "input": "&ropf;",
+            "description": "Named entity: ropf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd63"
+                ]
+            ]
+        },
+        {
+            "input": "&roplus",
+            "description": "Bad named entity: roplus without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&roplus"
+                ]
+            ]
+        },
+        {
+            "input": "&roplus;",
+            "description": "Named entity: roplus; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a2e"
+                ]
+            ]
+        },
+        {
+            "input": "&rotimes",
+            "description": "Bad named entity: rotimes without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rotimes"
+                ]
+            ]
+        },
+        {
+            "input": "&rotimes;",
+            "description": "Named entity: rotimes; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a35"
+                ]
+            ]
+        },
+        {
+            "input": "&rpar",
+            "description": "Bad named entity: rpar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rpar"
+                ]
+            ]
+        },
+        {
+            "input": "&rpar;",
+            "description": "Named entity: rpar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    ")"
+                ]
+            ]
+        },
+        {
+            "input": "&rpargt",
+            "description": "Bad named entity: rpargt without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rpargt"
+                ]
+            ]
+        },
+        {
+            "input": "&rpargt;",
+            "description": "Named entity: rpargt; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2994"
+                ]
+            ]
+        },
+        {
+            "input": "&rppolint",
+            "description": "Bad named entity: rppolint without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rppolint"
+                ]
+            ]
+        },
+        {
+            "input": "&rppolint;",
+            "description": "Named entity: rppolint; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a12"
+                ]
+            ]
+        },
+        {
+            "input": "&rrarr",
+            "description": "Bad named entity: rrarr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rrarr"
+                ]
+            ]
+        },
+        {
+            "input": "&rrarr;",
+            "description": "Named entity: rrarr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21c9"
+                ]
+            ]
+        },
+        {
+            "input": "&rsaquo",
+            "description": "Bad named entity: rsaquo without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rsaquo"
+                ]
+            ]
+        },
+        {
+            "input": "&rsaquo;",
+            "description": "Named entity: rsaquo; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u203a"
+                ]
+            ]
+        },
+        {
+            "input": "&rscr",
+            "description": "Bad named entity: rscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rscr"
+                ]
+            ]
+        },
+        {
+            "input": "&rscr;",
+            "description": "Named entity: rscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udcc7"
+                ]
+            ]
+        },
+        {
+            "input": "&rsh",
+            "description": "Bad named entity: rsh without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rsh"
+                ]
+            ]
+        },
+        {
+            "input": "&rsh;",
+            "description": "Named entity: rsh; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21b1"
+                ]
+            ]
+        },
+        {
+            "input": "&rsqb",
+            "description": "Bad named entity: rsqb without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rsqb"
+                ]
+            ]
+        },
+        {
+            "input": "&rsqb;",
+            "description": "Named entity: rsqb; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "]"
+                ]
+            ]
+        },
+        {
+            "input": "&rsquo",
+            "description": "Bad named entity: rsquo without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rsquo"
+                ]
+            ]
+        },
+        {
+            "input": "&rsquo;",
+            "description": "Named entity: rsquo; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2019"
+                ]
+            ]
+        },
+        {
+            "input": "&rsquor",
+            "description": "Bad named entity: rsquor without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rsquor"
+                ]
+            ]
+        },
+        {
+            "input": "&rsquor;",
+            "description": "Named entity: rsquor; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2019"
+                ]
+            ]
+        },
+        {
+            "input": "&rthree",
+            "description": "Bad named entity: rthree without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rthree"
+                ]
+            ]
+        },
+        {
+            "input": "&rthree;",
+            "description": "Named entity: rthree; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22cc"
+                ]
+            ]
+        },
+        {
+            "input": "&rtimes",
+            "description": "Bad named entity: rtimes without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rtimes"
+                ]
+            ]
+        },
+        {
+            "input": "&rtimes;",
+            "description": "Named entity: rtimes; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22ca"
+                ]
+            ]
+        },
+        {
+            "input": "&rtri",
+            "description": "Bad named entity: rtri without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rtri"
+                ]
+            ]
+        },
+        {
+            "input": "&rtri;",
+            "description": "Named entity: rtri; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u25b9"
+                ]
+            ]
+        },
+        {
+            "input": "&rtrie",
+            "description": "Bad named entity: rtrie without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rtrie"
+                ]
+            ]
+        },
+        {
+            "input": "&rtrie;",
+            "description": "Named entity: rtrie; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22b5"
+                ]
+            ]
+        },
+        {
+            "input": "&rtrif",
+            "description": "Bad named entity: rtrif without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rtrif"
+                ]
+            ]
+        },
+        {
+            "input": "&rtrif;",
+            "description": "Named entity: rtrif; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u25b8"
+                ]
+            ]
+        },
+        {
+            "input": "&rtriltri",
+            "description": "Bad named entity: rtriltri without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rtriltri"
+                ]
+            ]
+        },
+        {
+            "input": "&rtriltri;",
+            "description": "Named entity: rtriltri; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29ce"
+                ]
+            ]
+        },
+        {
+            "input": "&ruluhar",
+            "description": "Bad named entity: ruluhar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ruluhar"
+                ]
+            ]
+        },
+        {
+            "input": "&ruluhar;",
+            "description": "Named entity: ruluhar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2968"
+                ]
+            ]
+        },
+        {
+            "input": "&rx",
+            "description": "Bad named entity: rx without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&rx"
+                ]
+            ]
+        },
+        {
+            "input": "&rx;",
+            "description": "Named entity: rx; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u211e"
+                ]
+            ]
+        },
+        {
+            "input": "&sacute",
+            "description": "Bad named entity: sacute without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&sacute"
+                ]
+            ]
+        },
+        {
+            "input": "&sacute;",
+            "description": "Named entity: sacute; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u015b"
+                ]
+            ]
+        },
+        {
+            "input": "&sbquo",
+            "description": "Bad named entity: sbquo without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&sbquo"
+                ]
+            ]
+        },
+        {
+            "input": "&sbquo;",
+            "description": "Named entity: sbquo; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u201a"
+                ]
+            ]
+        },
+        {
+            "input": "&sc",
+            "description": "Bad named entity: sc without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&sc"
+                ]
+            ]
+        },
+        {
+            "input": "&sc;",
+            "description": "Named entity: sc; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u227b"
+                ]
+            ]
+        },
+        {
+            "input": "&scE",
+            "description": "Bad named entity: scE without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&scE"
+                ]
+            ]
+        },
+        {
+            "input": "&scE;",
+            "description": "Named entity: scE; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ab4"
+                ]
+            ]
+        },
+        {
+            "input": "&scap",
+            "description": "Bad named entity: scap without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&scap"
+                ]
+            ]
+        },
+        {
+            "input": "&scap;",
+            "description": "Named entity: scap; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ab8"
+                ]
+            ]
+        },
+        {
+            "input": "&scaron",
+            "description": "Bad named entity: scaron without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&scaron"
+                ]
+            ]
+        },
+        {
+            "input": "&scaron;",
+            "description": "Named entity: scaron; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0161"
+                ]
+            ]
+        },
+        {
+            "input": "&sccue",
+            "description": "Bad named entity: sccue without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&sccue"
+                ]
+            ]
+        },
+        {
+            "input": "&sccue;",
+            "description": "Named entity: sccue; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u227d"
+                ]
+            ]
+        },
+        {
+            "input": "&sce",
+            "description": "Bad named entity: sce without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&sce"
+                ]
+            ]
+        },
+        {
+            "input": "&sce;",
+            "description": "Named entity: sce; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ab0"
+                ]
+            ]
+        },
+        {
+            "input": "&scedil",
+            "description": "Bad named entity: scedil without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&scedil"
+                ]
+            ]
+        },
+        {
+            "input": "&scedil;",
+            "description": "Named entity: scedil; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u015f"
+                ]
+            ]
+        },
+        {
+            "input": "&scirc",
+            "description": "Bad named entity: scirc without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&scirc"
+                ]
+            ]
+        },
+        {
+            "input": "&scirc;",
+            "description": "Named entity: scirc; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u015d"
+                ]
+            ]
+        },
+        {
+            "input": "&scnE",
+            "description": "Bad named entity: scnE without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&scnE"
+                ]
+            ]
+        },
+        {
+            "input": "&scnE;",
+            "description": "Named entity: scnE; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ab6"
+                ]
+            ]
+        },
+        {
+            "input": "&scnap",
+            "description": "Bad named entity: scnap without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&scnap"
+                ]
+            ]
+        },
+        {
+            "input": "&scnap;",
+            "description": "Named entity: scnap; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2aba"
+                ]
+            ]
+        },
+        {
+            "input": "&scnsim",
+            "description": "Bad named entity: scnsim without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&scnsim"
+                ]
+            ]
+        },
+        {
+            "input": "&scnsim;",
+            "description": "Named entity: scnsim; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22e9"
+                ]
+            ]
+        },
+        {
+            "input": "&scpolint",
+            "description": "Bad named entity: scpolint without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&scpolint"
+                ]
+            ]
+        },
+        {
+            "input": "&scpolint;",
+            "description": "Named entity: scpolint; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a13"
+                ]
+            ]
+        },
+        {
+            "input": "&scsim",
+            "description": "Bad named entity: scsim without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&scsim"
+                ]
+            ]
+        },
+        {
+            "input": "&scsim;",
+            "description": "Named entity: scsim; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u227f"
+                ]
+            ]
+        },
+        {
+            "input": "&scy",
+            "description": "Bad named entity: scy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&scy"
+                ]
+            ]
+        },
+        {
+            "input": "&scy;",
+            "description": "Named entity: scy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0441"
+                ]
+            ]
+        },
+        {
+            "input": "&sdot",
+            "description": "Bad named entity: sdot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&sdot"
+                ]
+            ]
+        },
+        {
+            "input": "&sdot;",
+            "description": "Named entity: sdot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22c5"
+                ]
+            ]
+        },
+        {
+            "input": "&sdotb",
+            "description": "Bad named entity: sdotb without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&sdotb"
+                ]
+            ]
+        },
+        {
+            "input": "&sdotb;",
+            "description": "Named entity: sdotb; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22a1"
+                ]
+            ]
+        },
+        {
+            "input": "&sdote",
+            "description": "Bad named entity: sdote without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&sdote"
+                ]
+            ]
+        },
+        {
+            "input": "&sdote;",
+            "description": "Named entity: sdote; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a66"
+                ]
+            ]
+        },
+        {
+            "input": "&seArr",
+            "description": "Bad named entity: seArr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&seArr"
+                ]
+            ]
+        },
+        {
+            "input": "&seArr;",
+            "description": "Named entity: seArr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21d8"
+                ]
+            ]
+        },
+        {
+            "input": "&searhk",
+            "description": "Bad named entity: searhk without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&searhk"
+                ]
+            ]
+        },
+        {
+            "input": "&searhk;",
+            "description": "Named entity: searhk; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2925"
+                ]
+            ]
+        },
+        {
+            "input": "&searr",
+            "description": "Bad named entity: searr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&searr"
+                ]
+            ]
+        },
+        {
+            "input": "&searr;",
+            "description": "Named entity: searr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2198"
+                ]
+            ]
+        },
+        {
+            "input": "&searrow",
+            "description": "Bad named entity: searrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&searrow"
+                ]
+            ]
+        },
+        {
+            "input": "&searrow;",
+            "description": "Named entity: searrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2198"
+                ]
+            ]
+        },
+        {
+            "input": "&sect",
+            "description": "Named entity: sect without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00a7"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 6 }
+            ]
+        },
+        {
+            "input": "&sect;",
+            "description": "Named entity: sect; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00a7"
+                ]
+            ]
+        },
+        {
+            "input": "&semi",
+            "description": "Bad named entity: semi without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&semi"
+                ]
+            ]
+        },
+        {
+            "input": "&semi;",
+            "description": "Named entity: semi; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    ";"
+                ]
+            ]
+        },
+        {
+            "input": "&seswar",
+            "description": "Bad named entity: seswar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&seswar"
+                ]
+            ]
+        },
+        {
+            "input": "&seswar;",
+            "description": "Named entity: seswar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2929"
+                ]
+            ]
+        },
+        {
+            "input": "&setminus",
+            "description": "Bad named entity: setminus without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&setminus"
+                ]
+            ]
+        },
+        {
+            "input": "&setminus;",
+            "description": "Named entity: setminus; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2216"
+                ]
+            ]
+        },
+        {
+            "input": "&setmn",
+            "description": "Bad named entity: setmn without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&setmn"
+                ]
+            ]
+        },
+        {
+            "input": "&setmn;",
+            "description": "Named entity: setmn; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2216"
+                ]
+            ]
+        },
+        {
+            "input": "&sext",
+            "description": "Bad named entity: sext without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&sext"
+                ]
+            ]
+        },
+        {
+            "input": "&sext;",
+            "description": "Named entity: sext; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2736"
+                ]
+            ]
+        },
+        {
+            "input": "&sfr",
+            "description": "Bad named entity: sfr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&sfr"
+                ]
+            ]
+        },
+        {
+            "input": "&sfr;",
+            "description": "Named entity: sfr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd30"
+                ]
+            ]
+        },
+        {
+            "input": "&sfrown",
+            "description": "Bad named entity: sfrown without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&sfrown"
+                ]
+            ]
+        },
+        {
+            "input": "&sfrown;",
+            "description": "Named entity: sfrown; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2322"
+                ]
+            ]
+        },
+        {
+            "input": "&sharp",
+            "description": "Bad named entity: sharp without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&sharp"
+                ]
+            ]
+        },
+        {
+            "input": "&sharp;",
+            "description": "Named entity: sharp; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u266f"
+                ]
+            ]
+        },
+        {
+            "input": "&shchcy",
+            "description": "Bad named entity: shchcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&shchcy"
+                ]
+            ]
+        },
+        {
+            "input": "&shchcy;",
+            "description": "Named entity: shchcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0449"
+                ]
+            ]
+        },
+        {
+            "input": "&shcy",
+            "description": "Bad named entity: shcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&shcy"
+                ]
+            ]
+        },
+        {
+            "input": "&shcy;",
+            "description": "Named entity: shcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0448"
+                ]
+            ]
+        },
+        {
+            "input": "&shortmid",
+            "description": "Bad named entity: shortmid without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&shortmid"
+                ]
+            ]
+        },
+        {
+            "input": "&shortmid;",
+            "description": "Named entity: shortmid; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2223"
+                ]
+            ]
+        },
+        {
+            "input": "&shortparallel",
+            "description": "Bad named entity: shortparallel without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&shortparallel"
+                ]
+            ]
+        },
+        {
+            "input": "&shortparallel;",
+            "description": "Named entity: shortparallel; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2225"
+                ]
+            ]
+        },
+        {
+            "input": "&shy",
+            "description": "Named entity: shy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00ad"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 5 }
+            ]
+        },
+        {
+            "input": "&shy;",
+            "description": "Named entity: shy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00ad"
+                ]
+            ]
+        },
+        {
+            "input": "&sigma",
+            "description": "Bad named entity: sigma without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&sigma"
+                ]
+            ]
+        },
+        {
+            "input": "&sigma;",
+            "description": "Named entity: sigma; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03c3"
+                ]
+            ]
+        },
+        {
+            "input": "&sigmaf",
+            "description": "Bad named entity: sigmaf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&sigmaf"
+                ]
+            ]
+        },
+        {
+            "input": "&sigmaf;",
+            "description": "Named entity: sigmaf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03c2"
+                ]
+            ]
+        },
+        {
+            "input": "&sigmav",
+            "description": "Bad named entity: sigmav without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&sigmav"
+                ]
+            ]
+        },
+        {
+            "input": "&sigmav;",
+            "description": "Named entity: sigmav; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03c2"
+                ]
+            ]
+        },
+        {
+            "input": "&sim",
+            "description": "Bad named entity: sim without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&sim"
+                ]
+            ]
+        },
+        {
+            "input": "&sim;",
+            "description": "Named entity: sim; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u223c"
+                ]
+            ]
+        },
+        {
+            "input": "&simdot",
+            "description": "Bad named entity: simdot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&simdot"
+                ]
+            ]
+        },
+        {
+            "input": "&simdot;",
+            "description": "Named entity: simdot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a6a"
+                ]
+            ]
+        },
+        {
+            "input": "&sime",
+            "description": "Bad named entity: sime without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&sime"
+                ]
+            ]
+        },
+        {
+            "input": "&sime;",
+            "description": "Named entity: sime; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2243"
+                ]
+            ]
+        },
+        {
+            "input": "&simeq",
+            "description": "Bad named entity: simeq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&simeq"
+                ]
+            ]
+        },
+        {
+            "input": "&simeq;",
+            "description": "Named entity: simeq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2243"
+                ]
+            ]
+        },
+        {
+            "input": "&simg",
+            "description": "Bad named entity: simg without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&simg"
+                ]
+            ]
+        },
+        {
+            "input": "&simg;",
+            "description": "Named entity: simg; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a9e"
+                ]
+            ]
+        },
+        {
+            "input": "&simgE",
+            "description": "Bad named entity: simgE without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&simgE"
+                ]
+            ]
+        },
+        {
+            "input": "&simgE;",
+            "description": "Named entity: simgE; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2aa0"
+                ]
+            ]
+        },
+        {
+            "input": "&siml",
+            "description": "Bad named entity: siml without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&siml"
+                ]
+            ]
+        },
+        {
+            "input": "&siml;",
+            "description": "Named entity: siml; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a9d"
+                ]
+            ]
+        },
+        {
+            "input": "&simlE",
+            "description": "Bad named entity: simlE without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&simlE"
+                ]
+            ]
+        },
+        {
+            "input": "&simlE;",
+            "description": "Named entity: simlE; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a9f"
+                ]
+            ]
+        },
+        {
+            "input": "&simne",
+            "description": "Bad named entity: simne without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&simne"
+                ]
+            ]
+        },
+        {
+            "input": "&simne;",
+            "description": "Named entity: simne; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2246"
+                ]
+            ]
+        },
+        {
+            "input": "&simplus",
+            "description": "Bad named entity: simplus without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&simplus"
+                ]
+            ]
+        },
+        {
+            "input": "&simplus;",
+            "description": "Named entity: simplus; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a24"
+                ]
+            ]
+        },
+        {
+            "input": "&simrarr",
+            "description": "Bad named entity: simrarr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&simrarr"
+                ]
+            ]
+        },
+        {
+            "input": "&simrarr;",
+            "description": "Named entity: simrarr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2972"
+                ]
+            ]
+        },
+        {
+            "input": "&slarr",
+            "description": "Bad named entity: slarr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&slarr"
+                ]
+            ]
+        },
+        {
+            "input": "&slarr;",
+            "description": "Named entity: slarr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2190"
+                ]
+            ]
+        },
+        {
+            "input": "&smallsetminus",
+            "description": "Bad named entity: smallsetminus without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&smallsetminus"
+                ]
+            ]
+        },
+        {
+            "input": "&smallsetminus;",
+            "description": "Named entity: smallsetminus; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2216"
+                ]
+            ]
+        },
+        {
+            "input": "&smashp",
+            "description": "Bad named entity: smashp without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&smashp"
+                ]
+            ]
+        },
+        {
+            "input": "&smashp;",
+            "description": "Named entity: smashp; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a33"
+                ]
+            ]
+        },
+        {
+            "input": "&smeparsl",
+            "description": "Bad named entity: smeparsl without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&smeparsl"
+                ]
+            ]
+        },
+        {
+            "input": "&smeparsl;",
+            "description": "Named entity: smeparsl; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29e4"
+                ]
+            ]
+        },
+        {
+            "input": "&smid",
+            "description": "Bad named entity: smid without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&smid"
+                ]
+            ]
+        },
+        {
+            "input": "&smid;",
+            "description": "Named entity: smid; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2223"
+                ]
+            ]
+        },
+        {
+            "input": "&smile",
+            "description": "Bad named entity: smile without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&smile"
+                ]
+            ]
+        },
+        {
+            "input": "&smile;",
+            "description": "Named entity: smile; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2323"
+                ]
+            ]
+        },
+        {
+            "input": "&smt",
+            "description": "Bad named entity: smt without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&smt"
+                ]
+            ]
+        },
+        {
+            "input": "&smt;",
+            "description": "Named entity: smt; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2aaa"
+                ]
+            ]
+        },
+        {
+            "input": "&smte",
+            "description": "Bad named entity: smte without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&smte"
+                ]
+            ]
+        },
+        {
+            "input": "&smte;",
+            "description": "Named entity: smte; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2aac"
+                ]
+            ]
+        },
+        {
+            "input": "&smtes",
+            "description": "Bad named entity: smtes without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&smtes"
+                ]
+            ]
+        },
+        {
+            "input": "&smtes;",
+            "description": "Named entity: smtes; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2aac\ufe00"
+                ]
+            ]
+        },
+        {
+            "input": "&softcy",
+            "description": "Bad named entity: softcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&softcy"
+                ]
+            ]
+        },
+        {
+            "input": "&softcy;",
+            "description": "Named entity: softcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u044c"
+                ]
+            ]
+        },
+        {
+            "input": "&sol",
+            "description": "Bad named entity: sol without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&sol"
+                ]
+            ]
+        },
+        {
+            "input": "&sol;",
+            "description": "Named entity: sol; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "/"
+                ]
+            ]
+        },
+        {
+            "input": "&solb",
+            "description": "Bad named entity: solb without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&solb"
+                ]
+            ]
+        },
+        {
+            "input": "&solb;",
+            "description": "Named entity: solb; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29c4"
+                ]
+            ]
+        },
+        {
+            "input": "&solbar",
+            "description": "Bad named entity: solbar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&solbar"
+                ]
+            ]
+        },
+        {
+            "input": "&solbar;",
+            "description": "Named entity: solbar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u233f"
+                ]
+            ]
+        },
+        {
+            "input": "&sopf",
+            "description": "Bad named entity: sopf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&sopf"
+                ]
+            ]
+        },
+        {
+            "input": "&sopf;",
+            "description": "Named entity: sopf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd64"
+                ]
+            ]
+        },
+        {
+            "input": "&spades",
+            "description": "Bad named entity: spades without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&spades"
+                ]
+            ]
+        },
+        {
+            "input": "&spades;",
+            "description": "Named entity: spades; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2660"
+                ]
+            ]
+        },
+        {
+            "input": "&spadesuit",
+            "description": "Bad named entity: spadesuit without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&spadesuit"
+                ]
+            ]
+        },
+        {
+            "input": "&spadesuit;",
+            "description": "Named entity: spadesuit; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2660"
+                ]
+            ]
+        },
+        {
+            "input": "&spar",
+            "description": "Bad named entity: spar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&spar"
+                ]
+            ]
+        },
+        {
+            "input": "&spar;",
+            "description": "Named entity: spar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2225"
+                ]
+            ]
+        },
+        {
+            "input": "&sqcap",
+            "description": "Bad named entity: sqcap without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&sqcap"
+                ]
+            ]
+        },
+        {
+            "input": "&sqcap;",
+            "description": "Named entity: sqcap; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2293"
+                ]
+            ]
+        },
+        {
+            "input": "&sqcaps",
+            "description": "Bad named entity: sqcaps without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&sqcaps"
+                ]
+            ]
+        },
+        {
+            "input": "&sqcaps;",
+            "description": "Named entity: sqcaps; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2293\ufe00"
+                ]
+            ]
+        },
+        {
+            "input": "&sqcup",
+            "description": "Bad named entity: sqcup without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&sqcup"
+                ]
+            ]
+        },
+        {
+            "input": "&sqcup;",
+            "description": "Named entity: sqcup; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2294"
+                ]
+            ]
+        },
+        {
+            "input": "&sqcups",
+            "description": "Bad named entity: sqcups without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&sqcups"
+                ]
+            ]
+        },
+        {
+            "input": "&sqcups;",
+            "description": "Named entity: sqcups; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2294\ufe00"
+                ]
+            ]
+        },
+        {
+            "input": "&sqsub",
+            "description": "Bad named entity: sqsub without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&sqsub"
+                ]
+            ]
+        },
+        {
+            "input": "&sqsub;",
+            "description": "Named entity: sqsub; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u228f"
+                ]
+            ]
+        },
+        {
+            "input": "&sqsube",
+            "description": "Bad named entity: sqsube without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&sqsube"
+                ]
+            ]
+        },
+        {
+            "input": "&sqsube;",
+            "description": "Named entity: sqsube; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2291"
+                ]
+            ]
+        },
+        {
+            "input": "&sqsubset",
+            "description": "Bad named entity: sqsubset without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&sqsubset"
+                ]
+            ]
+        },
+        {
+            "input": "&sqsubset;",
+            "description": "Named entity: sqsubset; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u228f"
+                ]
+            ]
+        },
+        {
+            "input": "&sqsubseteq",
+            "description": "Bad named entity: sqsubseteq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&sqsubseteq"
+                ]
+            ]
+        },
+        {
+            "input": "&sqsubseteq;",
+            "description": "Named entity: sqsubseteq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2291"
+                ]
+            ]
+        },
+        {
+            "input": "&sqsup",
+            "description": "Bad named entity: sqsup without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&sqsup"
+                ]
+            ]
+        },
+        {
+            "input": "&sqsup;",
+            "description": "Named entity: sqsup; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2290"
+                ]
+            ]
+        },
+        {
+            "input": "&sqsupe",
+            "description": "Bad named entity: sqsupe without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&sqsupe"
+                ]
+            ]
+        },
+        {
+            "input": "&sqsupe;",
+            "description": "Named entity: sqsupe; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2292"
+                ]
+            ]
+        },
+        {
+            "input": "&sqsupset",
+            "description": "Bad named entity: sqsupset without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&sqsupset"
+                ]
+            ]
+        },
+        {
+            "input": "&sqsupset;",
+            "description": "Named entity: sqsupset; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2290"
+                ]
+            ]
+        },
+        {
+            "input": "&sqsupseteq",
+            "description": "Bad named entity: sqsupseteq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&sqsupseteq"
+                ]
+            ]
+        },
+        {
+            "input": "&sqsupseteq;",
+            "description": "Named entity: sqsupseteq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2292"
+                ]
+            ]
+        },
+        {
+            "input": "&squ",
+            "description": "Bad named entity: squ without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&squ"
+                ]
+            ]
+        },
+        {
+            "input": "&squ;",
+            "description": "Named entity: squ; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u25a1"
+                ]
+            ]
+        },
+        {
+            "input": "&square",
+            "description": "Bad named entity: square without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&square"
+                ]
+            ]
+        },
+        {
+            "input": "&square;",
+            "description": "Named entity: square; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u25a1"
+                ]
+            ]
+        },
+        {
+            "input": "&squarf",
+            "description": "Bad named entity: squarf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&squarf"
+                ]
+            ]
+        },
+        {
+            "input": "&squarf;",
+            "description": "Named entity: squarf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u25aa"
+                ]
+            ]
+        },
+        {
+            "input": "&squf",
+            "description": "Bad named entity: squf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&squf"
+                ]
+            ]
+        },
+        {
+            "input": "&squf;",
+            "description": "Named entity: squf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u25aa"
+                ]
+            ]
+        },
+        {
+            "input": "&srarr",
+            "description": "Bad named entity: srarr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&srarr"
+                ]
+            ]
+        },
+        {
+            "input": "&srarr;",
+            "description": "Named entity: srarr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2192"
+                ]
+            ]
+        },
+        {
+            "input": "&sscr",
+            "description": "Bad named entity: sscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&sscr"
+                ]
+            ]
+        },
+        {
+            "input": "&sscr;",
+            "description": "Named entity: sscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udcc8"
+                ]
+            ]
+        },
+        {
+            "input": "&ssetmn",
+            "description": "Bad named entity: ssetmn without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ssetmn"
+                ]
+            ]
+        },
+        {
+            "input": "&ssetmn;",
+            "description": "Named entity: ssetmn; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2216"
+                ]
+            ]
+        },
+        {
+            "input": "&ssmile",
+            "description": "Bad named entity: ssmile without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ssmile"
+                ]
+            ]
+        },
+        {
+            "input": "&ssmile;",
+            "description": "Named entity: ssmile; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2323"
+                ]
+            ]
+        },
+        {
+            "input": "&sstarf",
+            "description": "Bad named entity: sstarf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&sstarf"
+                ]
+            ]
+        },
+        {
+            "input": "&sstarf;",
+            "description": "Named entity: sstarf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22c6"
+                ]
+            ]
+        },
+        {
+            "input": "&star",
+            "description": "Bad named entity: star without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&star"
+                ]
+            ]
+        },
+        {
+            "input": "&star;",
+            "description": "Named entity: star; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2606"
+                ]
+            ]
+        },
+        {
+            "input": "&starf",
+            "description": "Bad named entity: starf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&starf"
+                ]
+            ]
+        },
+        {
+            "input": "&starf;",
+            "description": "Named entity: starf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2605"
+                ]
+            ]
+        },
+        {
+            "input": "&straightepsilon",
+            "description": "Bad named entity: straightepsilon without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&straightepsilon"
+                ]
+            ]
+        },
+        {
+            "input": "&straightepsilon;",
+            "description": "Named entity: straightepsilon; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03f5"
+                ]
+            ]
+        },
+        {
+            "input": "&straightphi",
+            "description": "Bad named entity: straightphi without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&straightphi"
+                ]
+            ]
+        },
+        {
+            "input": "&straightphi;",
+            "description": "Named entity: straightphi; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03d5"
+                ]
+            ]
+        },
+        {
+            "input": "&strns",
+            "description": "Bad named entity: strns without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&strns"
+                ]
+            ]
+        },
+        {
+            "input": "&strns;",
+            "description": "Named entity: strns; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00af"
+                ]
+            ]
+        },
+        {
+            "input": "&sub",
+            "description": "Bad named entity: sub without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&sub"
+                ]
+            ]
+        },
+        {
+            "input": "&sub;",
+            "description": "Named entity: sub; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2282"
+                ]
+            ]
+        },
+        {
+            "input": "&subE",
+            "description": "Bad named entity: subE without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&subE"
+                ]
+            ]
+        },
+        {
+            "input": "&subE;",
+            "description": "Named entity: subE; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ac5"
+                ]
+            ]
+        },
+        {
+            "input": "&subdot",
+            "description": "Bad named entity: subdot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&subdot"
+                ]
+            ]
+        },
+        {
+            "input": "&subdot;",
+            "description": "Named entity: subdot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2abd"
+                ]
+            ]
+        },
+        {
+            "input": "&sube",
+            "description": "Bad named entity: sube without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&sube"
+                ]
+            ]
+        },
+        {
+            "input": "&sube;",
+            "description": "Named entity: sube; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2286"
+                ]
+            ]
+        },
+        {
+            "input": "&subedot",
+            "description": "Bad named entity: subedot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&subedot"
+                ]
+            ]
+        },
+        {
+            "input": "&subedot;",
+            "description": "Named entity: subedot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ac3"
+                ]
+            ]
+        },
+        {
+            "input": "&submult",
+            "description": "Bad named entity: submult without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&submult"
+                ]
+            ]
+        },
+        {
+            "input": "&submult;",
+            "description": "Named entity: submult; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ac1"
+                ]
+            ]
+        },
+        {
+            "input": "&subnE",
+            "description": "Bad named entity: subnE without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&subnE"
+                ]
+            ]
+        },
+        {
+            "input": "&subnE;",
+            "description": "Named entity: subnE; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2acb"
+                ]
+            ]
+        },
+        {
+            "input": "&subne",
+            "description": "Bad named entity: subne without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&subne"
+                ]
+            ]
+        },
+        {
+            "input": "&subne;",
+            "description": "Named entity: subne; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u228a"
+                ]
+            ]
+        },
+        {
+            "input": "&subplus",
+            "description": "Bad named entity: subplus without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&subplus"
+                ]
+            ]
+        },
+        {
+            "input": "&subplus;",
+            "description": "Named entity: subplus; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2abf"
+                ]
+            ]
+        },
+        {
+            "input": "&subrarr",
+            "description": "Bad named entity: subrarr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&subrarr"
+                ]
+            ]
+        },
+        {
+            "input": "&subrarr;",
+            "description": "Named entity: subrarr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2979"
+                ]
+            ]
+        },
+        {
+            "input": "&subset",
+            "description": "Bad named entity: subset without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&subset"
+                ]
+            ]
+        },
+        {
+            "input": "&subset;",
+            "description": "Named entity: subset; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2282"
+                ]
+            ]
+        },
+        {
+            "input": "&subseteq",
+            "description": "Bad named entity: subseteq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&subseteq"
+                ]
+            ]
+        },
+        {
+            "input": "&subseteq;",
+            "description": "Named entity: subseteq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2286"
+                ]
+            ]
+        },
+        {
+            "input": "&subseteqq",
+            "description": "Bad named entity: subseteqq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&subseteqq"
+                ]
+            ]
+        },
+        {
+            "input": "&subseteqq;",
+            "description": "Named entity: subseteqq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ac5"
+                ]
+            ]
+        },
+        {
+            "input": "&subsetneq",
+            "description": "Bad named entity: subsetneq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&subsetneq"
+                ]
+            ]
+        },
+        {
+            "input": "&subsetneq;",
+            "description": "Named entity: subsetneq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u228a"
+                ]
+            ]
+        },
+        {
+            "input": "&subsetneqq",
+            "description": "Bad named entity: subsetneqq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&subsetneqq"
+                ]
+            ]
+        },
+        {
+            "input": "&subsetneqq;",
+            "description": "Named entity: subsetneqq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2acb"
+                ]
+            ]
+        },
+        {
+            "input": "&subsim",
+            "description": "Bad named entity: subsim without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&subsim"
+                ]
+            ]
+        },
+        {
+            "input": "&subsim;",
+            "description": "Named entity: subsim; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ac7"
+                ]
+            ]
+        },
+        {
+            "input": "&subsub",
+            "description": "Bad named entity: subsub without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&subsub"
+                ]
+            ]
+        },
+        {
+            "input": "&subsub;",
+            "description": "Named entity: subsub; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ad5"
+                ]
+            ]
+        },
+        {
+            "input": "&subsup",
+            "description": "Bad named entity: subsup without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&subsup"
+                ]
+            ]
+        },
+        {
+            "input": "&subsup;",
+            "description": "Named entity: subsup; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ad3"
+                ]
+            ]
+        },
+        {
+            "input": "&succ",
+            "description": "Bad named entity: succ without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&succ"
+                ]
+            ]
+        },
+        {
+            "input": "&succ;",
+            "description": "Named entity: succ; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u227b"
+                ]
+            ]
+        },
+        {
+            "input": "&succapprox",
+            "description": "Bad named entity: succapprox without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&succapprox"
+                ]
+            ]
+        },
+        {
+            "input": "&succapprox;",
+            "description": "Named entity: succapprox; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ab8"
+                ]
+            ]
+        },
+        {
+            "input": "&succcurlyeq",
+            "description": "Bad named entity: succcurlyeq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&succcurlyeq"
+                ]
+            ]
+        },
+        {
+            "input": "&succcurlyeq;",
+            "description": "Named entity: succcurlyeq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u227d"
+                ]
+            ]
+        },
+        {
+            "input": "&succeq",
+            "description": "Bad named entity: succeq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&succeq"
+                ]
+            ]
+        },
+        {
+            "input": "&succeq;",
+            "description": "Named entity: succeq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ab0"
+                ]
+            ]
+        },
+        {
+            "input": "&succnapprox",
+            "description": "Bad named entity: succnapprox without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&succnapprox"
+                ]
+            ]
+        },
+        {
+            "input": "&succnapprox;",
+            "description": "Named entity: succnapprox; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2aba"
+                ]
+            ]
+        },
+        {
+            "input": "&succneqq",
+            "description": "Bad named entity: succneqq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&succneqq"
+                ]
+            ]
+        },
+        {
+            "input": "&succneqq;",
+            "description": "Named entity: succneqq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ab6"
+                ]
+            ]
+        },
+        {
+            "input": "&succnsim",
+            "description": "Bad named entity: succnsim without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&succnsim"
+                ]
+            ]
+        },
+        {
+            "input": "&succnsim;",
+            "description": "Named entity: succnsim; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22e9"
+                ]
+            ]
+        },
+        {
+            "input": "&succsim",
+            "description": "Bad named entity: succsim without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&succsim"
+                ]
+            ]
+        },
+        {
+            "input": "&succsim;",
+            "description": "Named entity: succsim; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u227f"
+                ]
+            ]
+        },
+        {
+            "input": "&sum",
+            "description": "Bad named entity: sum without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&sum"
+                ]
+            ]
+        },
+        {
+            "input": "&sum;",
+            "description": "Named entity: sum; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2211"
+                ]
+            ]
+        },
+        {
+            "input": "&sung",
+            "description": "Bad named entity: sung without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&sung"
+                ]
+            ]
+        },
+        {
+            "input": "&sung;",
+            "description": "Named entity: sung; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u266a"
+                ]
+            ]
+        },
+        {
+            "input": "&sup",
+            "description": "Bad named entity: sup without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&sup"
+                ]
+            ]
+        },
+        {
+            "input": "&sup1",
+            "description": "Named entity: sup1 without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00b9"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 6 }
+            ]
+        },
+        {
+            "input": "&sup1;",
+            "description": "Named entity: sup1; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00b9"
+                ]
+            ]
+        },
+        {
+            "input": "&sup2",
+            "description": "Named entity: sup2 without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00b2"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 6 }
+            ]
+        },
+        {
+            "input": "&sup2;",
+            "description": "Named entity: sup2; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00b2"
+                ]
+            ]
+        },
+        {
+            "input": "&sup3",
+            "description": "Named entity: sup3 without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00b3"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 6 }
+            ]
+        },
+        {
+            "input": "&sup3;",
+            "description": "Named entity: sup3; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00b3"
+                ]
+            ]
+        },
+        {
+            "input": "&sup;",
+            "description": "Named entity: sup; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2283"
+                ]
+            ]
+        },
+        {
+            "input": "&supE",
+            "description": "Bad named entity: supE without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&supE"
+                ]
+            ]
+        },
+        {
+            "input": "&supE;",
+            "description": "Named entity: supE; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ac6"
+                ]
+            ]
+        },
+        {
+            "input": "&supdot",
+            "description": "Bad named entity: supdot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&supdot"
+                ]
+            ]
+        },
+        {
+            "input": "&supdot;",
+            "description": "Named entity: supdot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2abe"
+                ]
+            ]
+        },
+        {
+            "input": "&supdsub",
+            "description": "Bad named entity: supdsub without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&supdsub"
+                ]
+            ]
+        },
+        {
+            "input": "&supdsub;",
+            "description": "Named entity: supdsub; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ad8"
+                ]
+            ]
+        },
+        {
+            "input": "&supe",
+            "description": "Bad named entity: supe without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&supe"
+                ]
+            ]
+        },
+        {
+            "input": "&supe;",
+            "description": "Named entity: supe; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2287"
+                ]
+            ]
+        },
+        {
+            "input": "&supedot",
+            "description": "Bad named entity: supedot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&supedot"
+                ]
+            ]
+        },
+        {
+            "input": "&supedot;",
+            "description": "Named entity: supedot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ac4"
+                ]
+            ]
+        },
+        {
+            "input": "&suphsol",
+            "description": "Bad named entity: suphsol without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&suphsol"
+                ]
+            ]
+        },
+        {
+            "input": "&suphsol;",
+            "description": "Named entity: suphsol; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u27c9"
+                ]
+            ]
+        },
+        {
+            "input": "&suphsub",
+            "description": "Bad named entity: suphsub without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&suphsub"
+                ]
+            ]
+        },
+        {
+            "input": "&suphsub;",
+            "description": "Named entity: suphsub; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ad7"
+                ]
+            ]
+        },
+        {
+            "input": "&suplarr",
+            "description": "Bad named entity: suplarr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&suplarr"
+                ]
+            ]
+        },
+        {
+            "input": "&suplarr;",
+            "description": "Named entity: suplarr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u297b"
+                ]
+            ]
+        },
+        {
+            "input": "&supmult",
+            "description": "Bad named entity: supmult without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&supmult"
+                ]
+            ]
+        },
+        {
+            "input": "&supmult;",
+            "description": "Named entity: supmult; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ac2"
+                ]
+            ]
+        },
+        {
+            "input": "&supnE",
+            "description": "Bad named entity: supnE without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&supnE"
+                ]
+            ]
+        },
+        {
+            "input": "&supnE;",
+            "description": "Named entity: supnE; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2acc"
+                ]
+            ]
+        },
+        {
+            "input": "&supne",
+            "description": "Bad named entity: supne without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&supne"
+                ]
+            ]
+        },
+        {
+            "input": "&supne;",
+            "description": "Named entity: supne; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u228b"
+                ]
+            ]
+        },
+        {
+            "input": "&supplus",
+            "description": "Bad named entity: supplus without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&supplus"
+                ]
+            ]
+        },
+        {
+            "input": "&supplus;",
+            "description": "Named entity: supplus; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ac0"
+                ]
+            ]
+        },
+        {
+            "input": "&supset",
+            "description": "Bad named entity: supset without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&supset"
+                ]
+            ]
+        },
+        {
+            "input": "&supset;",
+            "description": "Named entity: supset; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2283"
+                ]
+            ]
+        },
+        {
+            "input": "&supseteq",
+            "description": "Bad named entity: supseteq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&supseteq"
+                ]
+            ]
+        },
+        {
+            "input": "&supseteq;",
+            "description": "Named entity: supseteq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2287"
+                ]
+            ]
+        },
+        {
+            "input": "&supseteqq",
+            "description": "Bad named entity: supseteqq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&supseteqq"
+                ]
+            ]
+        },
+        {
+            "input": "&supseteqq;",
+            "description": "Named entity: supseteqq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ac6"
+                ]
+            ]
+        },
+        {
+            "input": "&supsetneq",
+            "description": "Bad named entity: supsetneq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&supsetneq"
+                ]
+            ]
+        },
+        {
+            "input": "&supsetneq;",
+            "description": "Named entity: supsetneq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u228b"
+                ]
+            ]
+        },
+        {
+            "input": "&supsetneqq",
+            "description": "Bad named entity: supsetneqq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&supsetneqq"
+                ]
+            ]
+        },
+        {
+            "input": "&supsetneqq;",
+            "description": "Named entity: supsetneqq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2acc"
+                ]
+            ]
+        },
+        {
+            "input": "&supsim",
+            "description": "Bad named entity: supsim without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&supsim"
+                ]
+            ]
+        },
+        {
+            "input": "&supsim;",
+            "description": "Named entity: supsim; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ac8"
+                ]
+            ]
+        },
+        {
+            "input": "&supsub",
+            "description": "Bad named entity: supsub without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&supsub"
+                ]
+            ]
+        },
+        {
+            "input": "&supsub;",
+            "description": "Named entity: supsub; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ad4"
+                ]
+            ]
+        },
+        {
+            "input": "&supsup",
+            "description": "Bad named entity: supsup without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&supsup"
+                ]
+            ]
+        },
+        {
+            "input": "&supsup;",
+            "description": "Named entity: supsup; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ad6"
+                ]
+            ]
+        },
+        {
+            "input": "&swArr",
+            "description": "Bad named entity: swArr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&swArr"
+                ]
+            ]
+        },
+        {
+            "input": "&swArr;",
+            "description": "Named entity: swArr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21d9"
+                ]
+            ]
+        },
+        {
+            "input": "&swarhk",
+            "description": "Bad named entity: swarhk without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&swarhk"
+                ]
+            ]
+        },
+        {
+            "input": "&swarhk;",
+            "description": "Named entity: swarhk; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2926"
+                ]
+            ]
+        },
+        {
+            "input": "&swarr",
+            "description": "Bad named entity: swarr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&swarr"
+                ]
+            ]
+        },
+        {
+            "input": "&swarr;",
+            "description": "Named entity: swarr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2199"
+                ]
+            ]
+        },
+        {
+            "input": "&swarrow",
+            "description": "Bad named entity: swarrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&swarrow"
+                ]
+            ]
+        },
+        {
+            "input": "&swarrow;",
+            "description": "Named entity: swarrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2199"
+                ]
+            ]
+        },
+        {
+            "input": "&swnwar",
+            "description": "Bad named entity: swnwar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&swnwar"
+                ]
+            ]
+        },
+        {
+            "input": "&swnwar;",
+            "description": "Named entity: swnwar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u292a"
+                ]
+            ]
+        },
+        {
+            "input": "&szlig",
+            "description": "Named entity: szlig without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00df"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 7 }
+            ]
+        },
+        {
+            "input": "&szlig;",
+            "description": "Named entity: szlig; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00df"
+                ]
+            ]
+        },
+        {
+            "input": "&target",
+            "description": "Bad named entity: target without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&target"
+                ]
+            ]
+        },
+        {
+            "input": "&target;",
+            "description": "Named entity: target; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2316"
+                ]
+            ]
+        },
+        {
+            "input": "&tau",
+            "description": "Bad named entity: tau without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&tau"
+                ]
+            ]
+        },
+        {
+            "input": "&tau;",
+            "description": "Named entity: tau; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03c4"
+                ]
+            ]
+        },
+        {
+            "input": "&tbrk",
+            "description": "Bad named entity: tbrk without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&tbrk"
+                ]
+            ]
+        },
+        {
+            "input": "&tbrk;",
+            "description": "Named entity: tbrk; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u23b4"
+                ]
+            ]
+        },
+        {
+            "input": "&tcaron",
+            "description": "Bad named entity: tcaron without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&tcaron"
+                ]
+            ]
+        },
+        {
+            "input": "&tcaron;",
+            "description": "Named entity: tcaron; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0165"
+                ]
+            ]
+        },
+        {
+            "input": "&tcedil",
+            "description": "Bad named entity: tcedil without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&tcedil"
+                ]
+            ]
+        },
+        {
+            "input": "&tcedil;",
+            "description": "Named entity: tcedil; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0163"
+                ]
+            ]
+        },
+        {
+            "input": "&tcy",
+            "description": "Bad named entity: tcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&tcy"
+                ]
+            ]
+        },
+        {
+            "input": "&tcy;",
+            "description": "Named entity: tcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0442"
+                ]
+            ]
+        },
+        {
+            "input": "&tdot",
+            "description": "Bad named entity: tdot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&tdot"
+                ]
+            ]
+        },
+        {
+            "input": "&tdot;",
+            "description": "Named entity: tdot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u20db"
+                ]
+            ]
+        },
+        {
+            "input": "&telrec",
+            "description": "Bad named entity: telrec without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&telrec"
+                ]
+            ]
+        },
+        {
+            "input": "&telrec;",
+            "description": "Named entity: telrec; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2315"
+                ]
+            ]
+        },
+        {
+            "input": "&tfr",
+            "description": "Bad named entity: tfr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&tfr"
+                ]
+            ]
+        },
+        {
+            "input": "&tfr;",
+            "description": "Named entity: tfr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd31"
+                ]
+            ]
+        },
+        {
+            "input": "&there4",
+            "description": "Bad named entity: there4 without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&there4"
+                ]
+            ]
+        },
+        {
+            "input": "&there4;",
+            "description": "Named entity: there4; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2234"
+                ]
+            ]
+        },
+        {
+            "input": "&therefore",
+            "description": "Bad named entity: therefore without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&therefore"
+                ]
+            ]
+        },
+        {
+            "input": "&therefore;",
+            "description": "Named entity: therefore; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2234"
+                ]
+            ]
+        },
+        {
+            "input": "&theta",
+            "description": "Bad named entity: theta without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&theta"
+                ]
+            ]
+        },
+        {
+            "input": "&theta;",
+            "description": "Named entity: theta; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03b8"
+                ]
+            ]
+        },
+        {
+            "input": "&thetasym",
+            "description": "Bad named entity: thetasym without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&thetasym"
+                ]
+            ]
+        },
+        {
+            "input": "&thetasym;",
+            "description": "Named entity: thetasym; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03d1"
+                ]
+            ]
+        },
+        {
+            "input": "&thetav",
+            "description": "Bad named entity: thetav without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&thetav"
+                ]
+            ]
+        },
+        {
+            "input": "&thetav;",
+            "description": "Named entity: thetav; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03d1"
+                ]
+            ]
+        },
+        {
+            "input": "&thickapprox",
+            "description": "Bad named entity: thickapprox without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&thickapprox"
+                ]
+            ]
+        },
+        {
+            "input": "&thickapprox;",
+            "description": "Named entity: thickapprox; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2248"
+                ]
+            ]
+        },
+        {
+            "input": "&thicksim",
+            "description": "Bad named entity: thicksim without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&thicksim"
+                ]
+            ]
+        },
+        {
+            "input": "&thicksim;",
+            "description": "Named entity: thicksim; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u223c"
+                ]
+            ]
+        },
+        {
+            "input": "&thinsp",
+            "description": "Bad named entity: thinsp without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&thinsp"
+                ]
+            ]
+        },
+        {
+            "input": "&thinsp;",
+            "description": "Named entity: thinsp; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2009"
+                ]
+            ]
+        },
+        {
+            "input": "&thkap",
+            "description": "Bad named entity: thkap without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&thkap"
+                ]
+            ]
+        },
+        {
+            "input": "&thkap;",
+            "description": "Named entity: thkap; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2248"
+                ]
+            ]
+        },
+        {
+            "input": "&thksim",
+            "description": "Bad named entity: thksim without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&thksim"
+                ]
+            ]
+        },
+        {
+            "input": "&thksim;",
+            "description": "Named entity: thksim; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u223c"
+                ]
+            ]
+        },
+        {
+            "input": "&thorn",
+            "description": "Named entity: thorn without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00fe"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 7 }
+            ]
+        },
+        {
+            "input": "&thorn;",
+            "description": "Named entity: thorn; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00fe"
+                ]
+            ]
+        },
+        {
+            "input": "&tilde",
+            "description": "Bad named entity: tilde without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&tilde"
+                ]
+            ]
+        },
+        {
+            "input": "&tilde;",
+            "description": "Named entity: tilde; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u02dc"
+                ]
+            ]
+        },
+        {
+            "input": "&times",
+            "description": "Named entity: times without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00d7"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 7 }
+            ]
+        },
+        {
+            "input": "&times;",
+            "description": "Named entity: times; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00d7"
+                ]
+            ]
+        },
+        {
+            "input": "&timesb;",
+            "description": "Named entity: timesb; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22a0"
+                ]
+            ]
+        },
+        {
+            "input": "&timesbar;",
+            "description": "Named entity: timesbar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a31"
+                ]
+            ]
+        },
+        {
+            "input": "&timesd;",
+            "description": "Named entity: timesd; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a30"
+                ]
+            ]
+        },
+        {
+            "input": "&tint",
+            "description": "Bad named entity: tint without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&tint"
+                ]
+            ]
+        },
+        {
+            "input": "&tint;",
+            "description": "Named entity: tint; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u222d"
+                ]
+            ]
+        },
+        {
+            "input": "&toea",
+            "description": "Bad named entity: toea without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&toea"
+                ]
+            ]
+        },
+        {
+            "input": "&toea;",
+            "description": "Named entity: toea; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2928"
+                ]
+            ]
+        },
+        {
+            "input": "&top",
+            "description": "Bad named entity: top without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&top"
+                ]
+            ]
+        },
+        {
+            "input": "&top;",
+            "description": "Named entity: top; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22a4"
+                ]
+            ]
+        },
+        {
+            "input": "&topbot",
+            "description": "Bad named entity: topbot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&topbot"
+                ]
+            ]
+        },
+        {
+            "input": "&topbot;",
+            "description": "Named entity: topbot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2336"
+                ]
+            ]
+        },
+        {
+            "input": "&topcir",
+            "description": "Bad named entity: topcir without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&topcir"
+                ]
+            ]
+        },
+        {
+            "input": "&topcir;",
+            "description": "Named entity: topcir; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2af1"
+                ]
+            ]
+        },
+        {
+            "input": "&topf",
+            "description": "Bad named entity: topf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&topf"
+                ]
+            ]
+        },
+        {
+            "input": "&topf;",
+            "description": "Named entity: topf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd65"
+                ]
+            ]
+        },
+        {
+            "input": "&topfork",
+            "description": "Bad named entity: topfork without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&topfork"
+                ]
+            ]
+        },
+        {
+            "input": "&topfork;",
+            "description": "Named entity: topfork; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ada"
+                ]
+            ]
+        },
+        {
+            "input": "&tosa",
+            "description": "Bad named entity: tosa without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&tosa"
+                ]
+            ]
+        },
+        {
+            "input": "&tosa;",
+            "description": "Named entity: tosa; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2929"
+                ]
+            ]
+        },
+        {
+            "input": "&tprime",
+            "description": "Bad named entity: tprime without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&tprime"
+                ]
+            ]
+        },
+        {
+            "input": "&tprime;",
+            "description": "Named entity: tprime; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2034"
+                ]
+            ]
+        },
+        {
+            "input": "&trade",
+            "description": "Bad named entity: trade without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&trade"
+                ]
+            ]
+        },
+        {
+            "input": "&trade;",
+            "description": "Named entity: trade; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2122"
+                ]
+            ]
+        },
+        {
+            "input": "&triangle",
+            "description": "Bad named entity: triangle without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&triangle"
+                ]
+            ]
+        },
+        {
+            "input": "&triangle;",
+            "description": "Named entity: triangle; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u25b5"
+                ]
+            ]
+        },
+        {
+            "input": "&triangledown",
+            "description": "Bad named entity: triangledown without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&triangledown"
+                ]
+            ]
+        },
+        {
+            "input": "&triangledown;",
+            "description": "Named entity: triangledown; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u25bf"
+                ]
+            ]
+        },
+        {
+            "input": "&triangleleft",
+            "description": "Bad named entity: triangleleft without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&triangleleft"
+                ]
+            ]
+        },
+        {
+            "input": "&triangleleft;",
+            "description": "Named entity: triangleleft; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u25c3"
+                ]
+            ]
+        },
+        {
+            "input": "&trianglelefteq",
+            "description": "Bad named entity: trianglelefteq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&trianglelefteq"
+                ]
+            ]
+        },
+        {
+            "input": "&trianglelefteq;",
+            "description": "Named entity: trianglelefteq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22b4"
+                ]
+            ]
+        },
+        {
+            "input": "&triangleq",
+            "description": "Bad named entity: triangleq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&triangleq"
+                ]
+            ]
+        },
+        {
+            "input": "&triangleq;",
+            "description": "Named entity: triangleq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u225c"
+                ]
+            ]
+        },
+        {
+            "input": "&triangleright",
+            "description": "Bad named entity: triangleright without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&triangleright"
+                ]
+            ]
+        },
+        {
+            "input": "&triangleright;",
+            "description": "Named entity: triangleright; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u25b9"
+                ]
+            ]
+        },
+        {
+            "input": "&trianglerighteq",
+            "description": "Bad named entity: trianglerighteq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&trianglerighteq"
+                ]
+            ]
+        },
+        {
+            "input": "&trianglerighteq;",
+            "description": "Named entity: trianglerighteq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22b5"
+                ]
+            ]
+        },
+        {
+            "input": "&tridot",
+            "description": "Bad named entity: tridot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&tridot"
+                ]
+            ]
+        },
+        {
+            "input": "&tridot;",
+            "description": "Named entity: tridot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u25ec"
+                ]
+            ]
+        },
+        {
+            "input": "&trie",
+            "description": "Bad named entity: trie without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&trie"
+                ]
+            ]
+        },
+        {
+            "input": "&trie;",
+            "description": "Named entity: trie; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u225c"
+                ]
+            ]
+        },
+        {
+            "input": "&triminus",
+            "description": "Bad named entity: triminus without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&triminus"
+                ]
+            ]
+        },
+        {
+            "input": "&triminus;",
+            "description": "Named entity: triminus; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a3a"
+                ]
+            ]
+        },
+        {
+            "input": "&triplus",
+            "description": "Bad named entity: triplus without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&triplus"
+                ]
+            ]
+        },
+        {
+            "input": "&triplus;",
+            "description": "Named entity: triplus; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a39"
+                ]
+            ]
+        },
+        {
+            "input": "&trisb",
+            "description": "Bad named entity: trisb without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&trisb"
+                ]
+            ]
+        },
+        {
+            "input": "&trisb;",
+            "description": "Named entity: trisb; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29cd"
+                ]
+            ]
+        },
+        {
+            "input": "&tritime",
+            "description": "Bad named entity: tritime without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&tritime"
+                ]
+            ]
+        },
+        {
+            "input": "&tritime;",
+            "description": "Named entity: tritime; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a3b"
+                ]
+            ]
+        },
+        {
+            "input": "&trpezium",
+            "description": "Bad named entity: trpezium without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&trpezium"
+                ]
+            ]
+        },
+        {
+            "input": "&trpezium;",
+            "description": "Named entity: trpezium; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u23e2"
+                ]
+            ]
+        },
+        {
+            "input": "&tscr",
+            "description": "Bad named entity: tscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&tscr"
+                ]
+            ]
+        },
+        {
+            "input": "&tscr;",
+            "description": "Named entity: tscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udcc9"
+                ]
+            ]
+        },
+        {
+            "input": "&tscy",
+            "description": "Bad named entity: tscy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&tscy"
+                ]
+            ]
+        },
+        {
+            "input": "&tscy;",
+            "description": "Named entity: tscy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0446"
+                ]
+            ]
+        },
+        {
+            "input": "&tshcy",
+            "description": "Bad named entity: tshcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&tshcy"
+                ]
+            ]
+        },
+        {
+            "input": "&tshcy;",
+            "description": "Named entity: tshcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u045b"
+                ]
+            ]
+        },
+        {
+            "input": "&tstrok",
+            "description": "Bad named entity: tstrok without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&tstrok"
+                ]
+            ]
+        },
+        {
+            "input": "&tstrok;",
+            "description": "Named entity: tstrok; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0167"
+                ]
+            ]
+        },
+        {
+            "input": "&twixt",
+            "description": "Bad named entity: twixt without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&twixt"
+                ]
+            ]
+        },
+        {
+            "input": "&twixt;",
+            "description": "Named entity: twixt; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u226c"
+                ]
+            ]
+        },
+        {
+            "input": "&twoheadleftarrow",
+            "description": "Bad named entity: twoheadleftarrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&twoheadleftarrow"
+                ]
+            ]
+        },
+        {
+            "input": "&twoheadleftarrow;",
+            "description": "Named entity: twoheadleftarrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u219e"
+                ]
+            ]
+        },
+        {
+            "input": "&twoheadrightarrow",
+            "description": "Bad named entity: twoheadrightarrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&twoheadrightarrow"
+                ]
+            ]
+        },
+        {
+            "input": "&twoheadrightarrow;",
+            "description": "Named entity: twoheadrightarrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21a0"
+                ]
+            ]
+        },
+        {
+            "input": "&uArr",
+            "description": "Bad named entity: uArr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&uArr"
+                ]
+            ]
+        },
+        {
+            "input": "&uArr;",
+            "description": "Named entity: uArr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21d1"
+                ]
+            ]
+        },
+        {
+            "input": "&uHar",
+            "description": "Bad named entity: uHar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&uHar"
+                ]
+            ]
+        },
+        {
+            "input": "&uHar;",
+            "description": "Named entity: uHar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2963"
+                ]
+            ]
+        },
+        {
+            "input": "&uacute",
+            "description": "Named entity: uacute without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00fa"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 8 }
+            ]
+        },
+        {
+            "input": "&uacute;",
+            "description": "Named entity: uacute; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00fa"
+                ]
+            ]
+        },
+        {
+            "input": "&uarr",
+            "description": "Bad named entity: uarr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&uarr"
+                ]
+            ]
+        },
+        {
+            "input": "&uarr;",
+            "description": "Named entity: uarr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2191"
+                ]
+            ]
+        },
+        {
+            "input": "&ubrcy",
+            "description": "Bad named entity: ubrcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ubrcy"
+                ]
+            ]
+        },
+        {
+            "input": "&ubrcy;",
+            "description": "Named entity: ubrcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u045e"
+                ]
+            ]
+        },
+        {
+            "input": "&ubreve",
+            "description": "Bad named entity: ubreve without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ubreve"
+                ]
+            ]
+        },
+        {
+            "input": "&ubreve;",
+            "description": "Named entity: ubreve; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u016d"
+                ]
+            ]
+        },
+        {
+            "input": "&ucirc",
+            "description": "Named entity: ucirc without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00fb"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 7 }
+            ]
+        },
+        {
+            "input": "&ucirc;",
+            "description": "Named entity: ucirc; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00fb"
+                ]
+            ]
+        },
+        {
+            "input": "&ucy",
+            "description": "Bad named entity: ucy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ucy"
+                ]
+            ]
+        },
+        {
+            "input": "&ucy;",
+            "description": "Named entity: ucy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0443"
+                ]
+            ]
+        },
+        {
+            "input": "&udarr",
+            "description": "Bad named entity: udarr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&udarr"
+                ]
+            ]
+        },
+        {
+            "input": "&udarr;",
+            "description": "Named entity: udarr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21c5"
+                ]
+            ]
+        },
+        {
+            "input": "&udblac",
+            "description": "Bad named entity: udblac without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&udblac"
+                ]
+            ]
+        },
+        {
+            "input": "&udblac;",
+            "description": "Named entity: udblac; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0171"
+                ]
+            ]
+        },
+        {
+            "input": "&udhar",
+            "description": "Bad named entity: udhar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&udhar"
+                ]
+            ]
+        },
+        {
+            "input": "&udhar;",
+            "description": "Named entity: udhar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u296e"
+                ]
+            ]
+        },
+        {
+            "input": "&ufisht",
+            "description": "Bad named entity: ufisht without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ufisht"
+                ]
+            ]
+        },
+        {
+            "input": "&ufisht;",
+            "description": "Named entity: ufisht; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u297e"
+                ]
+            ]
+        },
+        {
+            "input": "&ufr",
+            "description": "Bad named entity: ufr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ufr"
+                ]
+            ]
+        },
+        {
+            "input": "&ufr;",
+            "description": "Named entity: ufr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd32"
+                ]
+            ]
+        },
+        {
+            "input": "&ugrave",
+            "description": "Named entity: ugrave without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00f9"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 8 }
+            ]
+        },
+        {
+            "input": "&ugrave;",
+            "description": "Named entity: ugrave; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00f9"
+                ]
+            ]
+        },
+        {
+            "input": "&uharl",
+            "description": "Bad named entity: uharl without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&uharl"
+                ]
+            ]
+        },
+        {
+            "input": "&uharl;",
+            "description": "Named entity: uharl; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21bf"
+                ]
+            ]
+        },
+        {
+            "input": "&uharr",
+            "description": "Bad named entity: uharr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&uharr"
+                ]
+            ]
+        },
+        {
+            "input": "&uharr;",
+            "description": "Named entity: uharr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21be"
+                ]
+            ]
+        },
+        {
+            "input": "&uhblk",
+            "description": "Bad named entity: uhblk without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&uhblk"
+                ]
+            ]
+        },
+        {
+            "input": "&uhblk;",
+            "description": "Named entity: uhblk; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2580"
+                ]
+            ]
+        },
+        {
+            "input": "&ulcorn",
+            "description": "Bad named entity: ulcorn without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ulcorn"
+                ]
+            ]
+        },
+        {
+            "input": "&ulcorn;",
+            "description": "Named entity: ulcorn; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u231c"
+                ]
+            ]
+        },
+        {
+            "input": "&ulcorner",
+            "description": "Bad named entity: ulcorner without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ulcorner"
+                ]
+            ]
+        },
+        {
+            "input": "&ulcorner;",
+            "description": "Named entity: ulcorner; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u231c"
+                ]
+            ]
+        },
+        {
+            "input": "&ulcrop",
+            "description": "Bad named entity: ulcrop without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ulcrop"
+                ]
+            ]
+        },
+        {
+            "input": "&ulcrop;",
+            "description": "Named entity: ulcrop; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u230f"
+                ]
+            ]
+        },
+        {
+            "input": "&ultri",
+            "description": "Bad named entity: ultri without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ultri"
+                ]
+            ]
+        },
+        {
+            "input": "&ultri;",
+            "description": "Named entity: ultri; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u25f8"
+                ]
+            ]
+        },
+        {
+            "input": "&umacr",
+            "description": "Bad named entity: umacr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&umacr"
+                ]
+            ]
+        },
+        {
+            "input": "&umacr;",
+            "description": "Named entity: umacr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u016b"
+                ]
+            ]
+        },
+        {
+            "input": "&uml",
+            "description": "Named entity: uml without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00a8"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 5 }
+            ]
+        },
+        {
+            "input": "&uml;",
+            "description": "Named entity: uml; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00a8"
+                ]
+            ]
+        },
+        {
+            "input": "&uogon",
+            "description": "Bad named entity: uogon without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&uogon"
+                ]
+            ]
+        },
+        {
+            "input": "&uogon;",
+            "description": "Named entity: uogon; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0173"
+                ]
+            ]
+        },
+        {
+            "input": "&uopf",
+            "description": "Bad named entity: uopf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&uopf"
+                ]
+            ]
+        },
+        {
+            "input": "&uopf;",
+            "description": "Named entity: uopf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd66"
+                ]
+            ]
+        },
+        {
+            "input": "&uparrow",
+            "description": "Bad named entity: uparrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&uparrow"
+                ]
+            ]
+        },
+        {
+            "input": "&uparrow;",
+            "description": "Named entity: uparrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2191"
+                ]
+            ]
+        },
+        {
+            "input": "&updownarrow",
+            "description": "Bad named entity: updownarrow without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&updownarrow"
+                ]
+            ]
+        },
+        {
+            "input": "&updownarrow;",
+            "description": "Named entity: updownarrow; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2195"
+                ]
+            ]
+        },
+        {
+            "input": "&upharpoonleft",
+            "description": "Bad named entity: upharpoonleft without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&upharpoonleft"
+                ]
+            ]
+        },
+        {
+            "input": "&upharpoonleft;",
+            "description": "Named entity: upharpoonleft; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21bf"
+                ]
+            ]
+        },
+        {
+            "input": "&upharpoonright",
+            "description": "Bad named entity: upharpoonright without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&upharpoonright"
+                ]
+            ]
+        },
+        {
+            "input": "&upharpoonright;",
+            "description": "Named entity: upharpoonright; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21be"
+                ]
+            ]
+        },
+        {
+            "input": "&uplus",
+            "description": "Bad named entity: uplus without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&uplus"
+                ]
+            ]
+        },
+        {
+            "input": "&uplus;",
+            "description": "Named entity: uplus; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u228e"
+                ]
+            ]
+        },
+        {
+            "input": "&upsi",
+            "description": "Bad named entity: upsi without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&upsi"
+                ]
+            ]
+        },
+        {
+            "input": "&upsi;",
+            "description": "Named entity: upsi; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03c5"
+                ]
+            ]
+        },
+        {
+            "input": "&upsih",
+            "description": "Bad named entity: upsih without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&upsih"
+                ]
+            ]
+        },
+        {
+            "input": "&upsih;",
+            "description": "Named entity: upsih; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03d2"
+                ]
+            ]
+        },
+        {
+            "input": "&upsilon",
+            "description": "Bad named entity: upsilon without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&upsilon"
+                ]
+            ]
+        },
+        {
+            "input": "&upsilon;",
+            "description": "Named entity: upsilon; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03c5"
+                ]
+            ]
+        },
+        {
+            "input": "&upuparrows",
+            "description": "Bad named entity: upuparrows without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&upuparrows"
+                ]
+            ]
+        },
+        {
+            "input": "&upuparrows;",
+            "description": "Named entity: upuparrows; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21c8"
+                ]
+            ]
+        },
+        {
+            "input": "&urcorn",
+            "description": "Bad named entity: urcorn without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&urcorn"
+                ]
+            ]
+        },
+        {
+            "input": "&urcorn;",
+            "description": "Named entity: urcorn; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u231d"
+                ]
+            ]
+        },
+        {
+            "input": "&urcorner",
+            "description": "Bad named entity: urcorner without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&urcorner"
+                ]
+            ]
+        },
+        {
+            "input": "&urcorner;",
+            "description": "Named entity: urcorner; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u231d"
+                ]
+            ]
+        },
+        {
+            "input": "&urcrop",
+            "description": "Bad named entity: urcrop without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&urcrop"
+                ]
+            ]
+        },
+        {
+            "input": "&urcrop;",
+            "description": "Named entity: urcrop; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u230e"
+                ]
+            ]
+        },
+        {
+            "input": "&uring",
+            "description": "Bad named entity: uring without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&uring"
+                ]
+            ]
+        },
+        {
+            "input": "&uring;",
+            "description": "Named entity: uring; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u016f"
+                ]
+            ]
+        },
+        {
+            "input": "&urtri",
+            "description": "Bad named entity: urtri without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&urtri"
+                ]
+            ]
+        },
+        {
+            "input": "&urtri;",
+            "description": "Named entity: urtri; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u25f9"
+                ]
+            ]
+        },
+        {
+            "input": "&uscr",
+            "description": "Bad named entity: uscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&uscr"
+                ]
+            ]
+        },
+        {
+            "input": "&uscr;",
+            "description": "Named entity: uscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udcca"
+                ]
+            ]
+        },
+        {
+            "input": "&utdot",
+            "description": "Bad named entity: utdot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&utdot"
+                ]
+            ]
+        },
+        {
+            "input": "&utdot;",
+            "description": "Named entity: utdot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22f0"
+                ]
+            ]
+        },
+        {
+            "input": "&utilde",
+            "description": "Bad named entity: utilde without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&utilde"
+                ]
+            ]
+        },
+        {
+            "input": "&utilde;",
+            "description": "Named entity: utilde; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0169"
+                ]
+            ]
+        },
+        {
+            "input": "&utri",
+            "description": "Bad named entity: utri without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&utri"
+                ]
+            ]
+        },
+        {
+            "input": "&utri;",
+            "description": "Named entity: utri; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u25b5"
+                ]
+            ]
+        },
+        {
+            "input": "&utrif",
+            "description": "Bad named entity: utrif without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&utrif"
+                ]
+            ]
+        },
+        {
+            "input": "&utrif;",
+            "description": "Named entity: utrif; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u25b4"
+                ]
+            ]
+        },
+        {
+            "input": "&uuarr",
+            "description": "Bad named entity: uuarr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&uuarr"
+                ]
+            ]
+        },
+        {
+            "input": "&uuarr;",
+            "description": "Named entity: uuarr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21c8"
+                ]
+            ]
+        },
+        {
+            "input": "&uuml",
+            "description": "Named entity: uuml without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00fc"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 6 }
+            ]
+        },
+        {
+            "input": "&uuml;",
+            "description": "Named entity: uuml; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00fc"
+                ]
+            ]
+        },
+        {
+            "input": "&uwangle",
+            "description": "Bad named entity: uwangle without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&uwangle"
+                ]
+            ]
+        },
+        {
+            "input": "&uwangle;",
+            "description": "Named entity: uwangle; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u29a7"
+                ]
+            ]
+        },
+        {
+            "input": "&vArr",
+            "description": "Bad named entity: vArr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&vArr"
+                ]
+            ]
+        },
+        {
+            "input": "&vArr;",
+            "description": "Named entity: vArr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21d5"
+                ]
+            ]
+        },
+        {
+            "input": "&vBar",
+            "description": "Bad named entity: vBar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&vBar"
+                ]
+            ]
+        },
+        {
+            "input": "&vBar;",
+            "description": "Named entity: vBar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ae8"
+                ]
+            ]
+        },
+        {
+            "input": "&vBarv",
+            "description": "Bad named entity: vBarv without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&vBarv"
+                ]
+            ]
+        },
+        {
+            "input": "&vBarv;",
+            "description": "Named entity: vBarv; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2ae9"
+                ]
+            ]
+        },
+        {
+            "input": "&vDash",
+            "description": "Bad named entity: vDash without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&vDash"
+                ]
+            ]
+        },
+        {
+            "input": "&vDash;",
+            "description": "Named entity: vDash; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22a8"
+                ]
+            ]
+        },
+        {
+            "input": "&vangrt",
+            "description": "Bad named entity: vangrt without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&vangrt"
+                ]
+            ]
+        },
+        {
+            "input": "&vangrt;",
+            "description": "Named entity: vangrt; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u299c"
+                ]
+            ]
+        },
+        {
+            "input": "&varepsilon",
+            "description": "Bad named entity: varepsilon without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&varepsilon"
+                ]
+            ]
+        },
+        {
+            "input": "&varepsilon;",
+            "description": "Named entity: varepsilon; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03f5"
+                ]
+            ]
+        },
+        {
+            "input": "&varkappa",
+            "description": "Bad named entity: varkappa without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&varkappa"
+                ]
+            ]
+        },
+        {
+            "input": "&varkappa;",
+            "description": "Named entity: varkappa; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03f0"
+                ]
+            ]
+        },
+        {
+            "input": "&varnothing",
+            "description": "Bad named entity: varnothing without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&varnothing"
+                ]
+            ]
+        },
+        {
+            "input": "&varnothing;",
+            "description": "Named entity: varnothing; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2205"
+                ]
+            ]
+        },
+        {
+            "input": "&varphi",
+            "description": "Bad named entity: varphi without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&varphi"
+                ]
+            ]
+        },
+        {
+            "input": "&varphi;",
+            "description": "Named entity: varphi; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03d5"
+                ]
+            ]
+        },
+        {
+            "input": "&varpi",
+            "description": "Bad named entity: varpi without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&varpi"
+                ]
+            ]
+        },
+        {
+            "input": "&varpi;",
+            "description": "Named entity: varpi; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03d6"
+                ]
+            ]
+        },
+        {
+            "input": "&varpropto",
+            "description": "Bad named entity: varpropto without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&varpropto"
+                ]
+            ]
+        },
+        {
+            "input": "&varpropto;",
+            "description": "Named entity: varpropto; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u221d"
+                ]
+            ]
+        },
+        {
+            "input": "&varr",
+            "description": "Bad named entity: varr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&varr"
+                ]
+            ]
+        },
+        {
+            "input": "&varr;",
+            "description": "Named entity: varr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2195"
+                ]
+            ]
+        },
+        {
+            "input": "&varrho",
+            "description": "Bad named entity: varrho without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&varrho"
+                ]
+            ]
+        },
+        {
+            "input": "&varrho;",
+            "description": "Named entity: varrho; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03f1"
+                ]
+            ]
+        },
+        {
+            "input": "&varsigma",
+            "description": "Bad named entity: varsigma without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&varsigma"
+                ]
+            ]
+        },
+        {
+            "input": "&varsigma;",
+            "description": "Named entity: varsigma; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03c2"
+                ]
+            ]
+        },
+        {
+            "input": "&varsubsetneq",
+            "description": "Bad named entity: varsubsetneq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&varsubsetneq"
+                ]
+            ]
+        },
+        {
+            "input": "&varsubsetneq;",
+            "description": "Named entity: varsubsetneq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u228a\ufe00"
+                ]
+            ]
+        },
+        {
+            "input": "&varsubsetneqq",
+            "description": "Bad named entity: varsubsetneqq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&varsubsetneqq"
+                ]
+            ]
+        },
+        {
+            "input": "&varsubsetneqq;",
+            "description": "Named entity: varsubsetneqq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2acb\ufe00"
+                ]
+            ]
+        },
+        {
+            "input": "&varsupsetneq",
+            "description": "Bad named entity: varsupsetneq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&varsupsetneq"
+                ]
+            ]
+        },
+        {
+            "input": "&varsupsetneq;",
+            "description": "Named entity: varsupsetneq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u228b\ufe00"
+                ]
+            ]
+        },
+        {
+            "input": "&varsupsetneqq",
+            "description": "Bad named entity: varsupsetneqq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&varsupsetneqq"
+                ]
+            ]
+        },
+        {
+            "input": "&varsupsetneqq;",
+            "description": "Named entity: varsupsetneqq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2acc\ufe00"
+                ]
+            ]
+        },
+        {
+            "input": "&vartheta",
+            "description": "Bad named entity: vartheta without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&vartheta"
+                ]
+            ]
+        },
+        {
+            "input": "&vartheta;",
+            "description": "Named entity: vartheta; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03d1"
+                ]
+            ]
+        },
+        {
+            "input": "&vartriangleleft",
+            "description": "Bad named entity: vartriangleleft without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&vartriangleleft"
+                ]
+            ]
+        },
+        {
+            "input": "&vartriangleleft;",
+            "description": "Named entity: vartriangleleft; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22b2"
+                ]
+            ]
+        },
+        {
+            "input": "&vartriangleright",
+            "description": "Bad named entity: vartriangleright without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&vartriangleright"
+                ]
+            ]
+        },
+        {
+            "input": "&vartriangleright;",
+            "description": "Named entity: vartriangleright; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22b3"
+                ]
+            ]
+        },
+        {
+            "input": "&vcy",
+            "description": "Bad named entity: vcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&vcy"
+                ]
+            ]
+        },
+        {
+            "input": "&vcy;",
+            "description": "Named entity: vcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0432"
+                ]
+            ]
+        },
+        {
+            "input": "&vdash",
+            "description": "Bad named entity: vdash without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&vdash"
+                ]
+            ]
+        },
+        {
+            "input": "&vdash;",
+            "description": "Named entity: vdash; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22a2"
+                ]
+            ]
+        },
+        {
+            "input": "&vee",
+            "description": "Bad named entity: vee without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&vee"
+                ]
+            ]
+        },
+        {
+            "input": "&vee;",
+            "description": "Named entity: vee; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2228"
+                ]
+            ]
+        },
+        {
+            "input": "&veebar",
+            "description": "Bad named entity: veebar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&veebar"
+                ]
+            ]
+        },
+        {
+            "input": "&veebar;",
+            "description": "Named entity: veebar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22bb"
+                ]
+            ]
+        },
+        {
+            "input": "&veeeq",
+            "description": "Bad named entity: veeeq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&veeeq"
+                ]
+            ]
+        },
+        {
+            "input": "&veeeq;",
+            "description": "Named entity: veeeq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u225a"
+                ]
+            ]
+        },
+        {
+            "input": "&vellip",
+            "description": "Bad named entity: vellip without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&vellip"
+                ]
+            ]
+        },
+        {
+            "input": "&vellip;",
+            "description": "Named entity: vellip; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22ee"
+                ]
+            ]
+        },
+        {
+            "input": "&verbar",
+            "description": "Bad named entity: verbar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&verbar"
+                ]
+            ]
+        },
+        {
+            "input": "&verbar;",
+            "description": "Named entity: verbar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "|"
+                ]
+            ]
+        },
+        {
+            "input": "&vert",
+            "description": "Bad named entity: vert without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&vert"
+                ]
+            ]
+        },
+        {
+            "input": "&vert;",
+            "description": "Named entity: vert; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "|"
+                ]
+            ]
+        },
+        {
+            "input": "&vfr",
+            "description": "Bad named entity: vfr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&vfr"
+                ]
+            ]
+        },
+        {
+            "input": "&vfr;",
+            "description": "Named entity: vfr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd33"
+                ]
+            ]
+        },
+        {
+            "input": "&vltri",
+            "description": "Bad named entity: vltri without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&vltri"
+                ]
+            ]
+        },
+        {
+            "input": "&vltri;",
+            "description": "Named entity: vltri; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22b2"
+                ]
+            ]
+        },
+        {
+            "input": "&vnsub",
+            "description": "Bad named entity: vnsub without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&vnsub"
+                ]
+            ]
+        },
+        {
+            "input": "&vnsub;",
+            "description": "Named entity: vnsub; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2282\u20d2"
+                ]
+            ]
+        },
+        {
+            "input": "&vnsup",
+            "description": "Bad named entity: vnsup without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&vnsup"
+                ]
+            ]
+        },
+        {
+            "input": "&vnsup;",
+            "description": "Named entity: vnsup; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2283\u20d2"
+                ]
+            ]
+        },
+        {
+            "input": "&vopf",
+            "description": "Bad named entity: vopf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&vopf"
+                ]
+            ]
+        },
+        {
+            "input": "&vopf;",
+            "description": "Named entity: vopf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd67"
+                ]
+            ]
+        },
+        {
+            "input": "&vprop",
+            "description": "Bad named entity: vprop without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&vprop"
+                ]
+            ]
+        },
+        {
+            "input": "&vprop;",
+            "description": "Named entity: vprop; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u221d"
+                ]
+            ]
+        },
+        {
+            "input": "&vrtri",
+            "description": "Bad named entity: vrtri without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&vrtri"
+                ]
+            ]
+        },
+        {
+            "input": "&vrtri;",
+            "description": "Named entity: vrtri; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22b3"
+                ]
+            ]
+        },
+        {
+            "input": "&vscr",
+            "description": "Bad named entity: vscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&vscr"
+                ]
+            ]
+        },
+        {
+            "input": "&vscr;",
+            "description": "Named entity: vscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udccb"
+                ]
+            ]
+        },
+        {
+            "input": "&vsubnE",
+            "description": "Bad named entity: vsubnE without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&vsubnE"
+                ]
+            ]
+        },
+        {
+            "input": "&vsubnE;",
+            "description": "Named entity: vsubnE; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2acb\ufe00"
+                ]
+            ]
+        },
+        {
+            "input": "&vsubne",
+            "description": "Bad named entity: vsubne without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&vsubne"
+                ]
+            ]
+        },
+        {
+            "input": "&vsubne;",
+            "description": "Named entity: vsubne; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u228a\ufe00"
+                ]
+            ]
+        },
+        {
+            "input": "&vsupnE",
+            "description": "Bad named entity: vsupnE without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&vsupnE"
+                ]
+            ]
+        },
+        {
+            "input": "&vsupnE;",
+            "description": "Named entity: vsupnE; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2acc\ufe00"
+                ]
+            ]
+        },
+        {
+            "input": "&vsupne",
+            "description": "Bad named entity: vsupne without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&vsupne"
+                ]
+            ]
+        },
+        {
+            "input": "&vsupne;",
+            "description": "Named entity: vsupne; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u228b\ufe00"
+                ]
+            ]
+        },
+        {
+            "input": "&vzigzag",
+            "description": "Bad named entity: vzigzag without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&vzigzag"
+                ]
+            ]
+        },
+        {
+            "input": "&vzigzag;",
+            "description": "Named entity: vzigzag; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u299a"
+                ]
+            ]
+        },
+        {
+            "input": "&wcirc",
+            "description": "Bad named entity: wcirc without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&wcirc"
+                ]
+            ]
+        },
+        {
+            "input": "&wcirc;",
+            "description": "Named entity: wcirc; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0175"
+                ]
+            ]
+        },
+        {
+            "input": "&wedbar",
+            "description": "Bad named entity: wedbar without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&wedbar"
+                ]
+            ]
+        },
+        {
+            "input": "&wedbar;",
+            "description": "Named entity: wedbar; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a5f"
+                ]
+            ]
+        },
+        {
+            "input": "&wedge",
+            "description": "Bad named entity: wedge without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&wedge"
+                ]
+            ]
+        },
+        {
+            "input": "&wedge;",
+            "description": "Named entity: wedge; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2227"
+                ]
+            ]
+        },
+        {
+            "input": "&wedgeq",
+            "description": "Bad named entity: wedgeq without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&wedgeq"
+                ]
+            ]
+        },
+        {
+            "input": "&wedgeq;",
+            "description": "Named entity: wedgeq; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2259"
+                ]
+            ]
+        },
+        {
+            "input": "&weierp",
+            "description": "Bad named entity: weierp without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&weierp"
+                ]
+            ]
+        },
+        {
+            "input": "&weierp;",
+            "description": "Named entity: weierp; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2118"
+                ]
+            ]
+        },
+        {
+            "input": "&wfr",
+            "description": "Bad named entity: wfr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&wfr"
+                ]
+            ]
+        },
+        {
+            "input": "&wfr;",
+            "description": "Named entity: wfr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd34"
+                ]
+            ]
+        },
+        {
+            "input": "&wopf",
+            "description": "Bad named entity: wopf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&wopf"
+                ]
+            ]
+        },
+        {
+            "input": "&wopf;",
+            "description": "Named entity: wopf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd68"
+                ]
+            ]
+        },
+        {
+            "input": "&wp",
+            "description": "Bad named entity: wp without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&wp"
+                ]
+            ]
+        },
+        {
+            "input": "&wp;",
+            "description": "Named entity: wp; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2118"
+                ]
+            ]
+        },
+        {
+            "input": "&wr",
+            "description": "Bad named entity: wr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&wr"
+                ]
+            ]
+        },
+        {
+            "input": "&wr;",
+            "description": "Named entity: wr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2240"
+                ]
+            ]
+        },
+        {
+            "input": "&wreath",
+            "description": "Bad named entity: wreath without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&wreath"
+                ]
+            ]
+        },
+        {
+            "input": "&wreath;",
+            "description": "Named entity: wreath; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2240"
+                ]
+            ]
+        },
+        {
+            "input": "&wscr",
+            "description": "Bad named entity: wscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&wscr"
+                ]
+            ]
+        },
+        {
+            "input": "&wscr;",
+            "description": "Named entity: wscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udccc"
+                ]
+            ]
+        },
+        {
+            "input": "&xcap",
+            "description": "Bad named entity: xcap without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&xcap"
+                ]
+            ]
+        },
+        {
+            "input": "&xcap;",
+            "description": "Named entity: xcap; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22c2"
+                ]
+            ]
+        },
+        {
+            "input": "&xcirc",
+            "description": "Bad named entity: xcirc without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&xcirc"
+                ]
+            ]
+        },
+        {
+            "input": "&xcirc;",
+            "description": "Named entity: xcirc; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u25ef"
+                ]
+            ]
+        },
+        {
+            "input": "&xcup",
+            "description": "Bad named entity: xcup without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&xcup"
+                ]
+            ]
+        },
+        {
+            "input": "&xcup;",
+            "description": "Named entity: xcup; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22c3"
+                ]
+            ]
+        },
+        {
+            "input": "&xdtri",
+            "description": "Bad named entity: xdtri without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&xdtri"
+                ]
+            ]
+        },
+        {
+            "input": "&xdtri;",
+            "description": "Named entity: xdtri; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u25bd"
+                ]
+            ]
+        },
+        {
+            "input": "&xfr",
+            "description": "Bad named entity: xfr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&xfr"
+                ]
+            ]
+        },
+        {
+            "input": "&xfr;",
+            "description": "Named entity: xfr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd35"
+                ]
+            ]
+        },
+        {
+            "input": "&xhArr",
+            "description": "Bad named entity: xhArr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&xhArr"
+                ]
+            ]
+        },
+        {
+            "input": "&xhArr;",
+            "description": "Named entity: xhArr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u27fa"
+                ]
+            ]
+        },
+        {
+            "input": "&xharr",
+            "description": "Bad named entity: xharr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&xharr"
+                ]
+            ]
+        },
+        {
+            "input": "&xharr;",
+            "description": "Named entity: xharr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u27f7"
+                ]
+            ]
+        },
+        {
+            "input": "&xi",
+            "description": "Bad named entity: xi without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&xi"
+                ]
+            ]
+        },
+        {
+            "input": "&xi;",
+            "description": "Named entity: xi; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03be"
+                ]
+            ]
+        },
+        {
+            "input": "&xlArr",
+            "description": "Bad named entity: xlArr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&xlArr"
+                ]
+            ]
+        },
+        {
+            "input": "&xlArr;",
+            "description": "Named entity: xlArr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u27f8"
+                ]
+            ]
+        },
+        {
+            "input": "&xlarr",
+            "description": "Bad named entity: xlarr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&xlarr"
+                ]
+            ]
+        },
+        {
+            "input": "&xlarr;",
+            "description": "Named entity: xlarr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u27f5"
+                ]
+            ]
+        },
+        {
+            "input": "&xmap",
+            "description": "Bad named entity: xmap without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&xmap"
+                ]
+            ]
+        },
+        {
+            "input": "&xmap;",
+            "description": "Named entity: xmap; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u27fc"
+                ]
+            ]
+        },
+        {
+            "input": "&xnis",
+            "description": "Bad named entity: xnis without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&xnis"
+                ]
+            ]
+        },
+        {
+            "input": "&xnis;",
+            "description": "Named entity: xnis; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22fb"
+                ]
+            ]
+        },
+        {
+            "input": "&xodot",
+            "description": "Bad named entity: xodot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&xodot"
+                ]
+            ]
+        },
+        {
+            "input": "&xodot;",
+            "description": "Named entity: xodot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a00"
+                ]
+            ]
+        },
+        {
+            "input": "&xopf",
+            "description": "Bad named entity: xopf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&xopf"
+                ]
+            ]
+        },
+        {
+            "input": "&xopf;",
+            "description": "Named entity: xopf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd69"
+                ]
+            ]
+        },
+        {
+            "input": "&xoplus",
+            "description": "Bad named entity: xoplus without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&xoplus"
+                ]
+            ]
+        },
+        {
+            "input": "&xoplus;",
+            "description": "Named entity: xoplus; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a01"
+                ]
+            ]
+        },
+        {
+            "input": "&xotime",
+            "description": "Bad named entity: xotime without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&xotime"
+                ]
+            ]
+        },
+        {
+            "input": "&xotime;",
+            "description": "Named entity: xotime; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a02"
+                ]
+            ]
+        },
+        {
+            "input": "&xrArr",
+            "description": "Bad named entity: xrArr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&xrArr"
+                ]
+            ]
+        },
+        {
+            "input": "&xrArr;",
+            "description": "Named entity: xrArr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u27f9"
+                ]
+            ]
+        },
+        {
+            "input": "&xrarr",
+            "description": "Bad named entity: xrarr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&xrarr"
+                ]
+            ]
+        },
+        {
+            "input": "&xrarr;",
+            "description": "Named entity: xrarr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u27f6"
+                ]
+            ]
+        },
+        {
+            "input": "&xscr",
+            "description": "Bad named entity: xscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&xscr"
+                ]
+            ]
+        },
+        {
+            "input": "&xscr;",
+            "description": "Named entity: xscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udccd"
+                ]
+            ]
+        },
+        {
+            "input": "&xsqcup",
+            "description": "Bad named entity: xsqcup without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&xsqcup"
+                ]
+            ]
+        },
+        {
+            "input": "&xsqcup;",
+            "description": "Named entity: xsqcup; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a06"
+                ]
+            ]
+        },
+        {
+            "input": "&xuplus",
+            "description": "Bad named entity: xuplus without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&xuplus"
+                ]
+            ]
+        },
+        {
+            "input": "&xuplus;",
+            "description": "Named entity: xuplus; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2a04"
+                ]
+            ]
+        },
+        {
+            "input": "&xutri",
+            "description": "Bad named entity: xutri without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&xutri"
+                ]
+            ]
+        },
+        {
+            "input": "&xutri;",
+            "description": "Named entity: xutri; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u25b3"
+                ]
+            ]
+        },
+        {
+            "input": "&xvee",
+            "description": "Bad named entity: xvee without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&xvee"
+                ]
+            ]
+        },
+        {
+            "input": "&xvee;",
+            "description": "Named entity: xvee; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22c1"
+                ]
+            ]
+        },
+        {
+            "input": "&xwedge",
+            "description": "Bad named entity: xwedge without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&xwedge"
+                ]
+            ]
+        },
+        {
+            "input": "&xwedge;",
+            "description": "Named entity: xwedge; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u22c0"
+                ]
+            ]
+        },
+        {
+            "input": "&yacute",
+            "description": "Named entity: yacute without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00fd"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 8 }
+            ]
+        },
+        {
+            "input": "&yacute;",
+            "description": "Named entity: yacute; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00fd"
+                ]
+            ]
+        },
+        {
+            "input": "&yacy",
+            "description": "Bad named entity: yacy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&yacy"
+                ]
+            ]
+        },
+        {
+            "input": "&yacy;",
+            "description": "Named entity: yacy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u044f"
+                ]
+            ]
+        },
+        {
+            "input": "&ycirc",
+            "description": "Bad named entity: ycirc without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ycirc"
+                ]
+            ]
+        },
+        {
+            "input": "&ycirc;",
+            "description": "Named entity: ycirc; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0177"
+                ]
+            ]
+        },
+        {
+            "input": "&ycy",
+            "description": "Bad named entity: ycy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&ycy"
+                ]
+            ]
+        },
+        {
+            "input": "&ycy;",
+            "description": "Named entity: ycy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u044b"
+                ]
+            ]
+        },
+        {
+            "input": "&yen",
+            "description": "Named entity: yen without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00a5"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 5 }
+            ]
+        },
+        {
+            "input": "&yen;",
+            "description": "Named entity: yen; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00a5"
+                ]
+            ]
+        },
+        {
+            "input": "&yfr",
+            "description": "Bad named entity: yfr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&yfr"
+                ]
+            ]
+        },
+        {
+            "input": "&yfr;",
+            "description": "Named entity: yfr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd36"
+                ]
+            ]
+        },
+        {
+            "input": "&yicy",
+            "description": "Bad named entity: yicy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&yicy"
+                ]
+            ]
+        },
+        {
+            "input": "&yicy;",
+            "description": "Named entity: yicy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0457"
+                ]
+            ]
+        },
+        {
+            "input": "&yopf",
+            "description": "Bad named entity: yopf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&yopf"
+                ]
+            ]
+        },
+        {
+            "input": "&yopf;",
+            "description": "Named entity: yopf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd6a"
+                ]
+            ]
+        },
+        {
+            "input": "&yscr",
+            "description": "Bad named entity: yscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&yscr"
+                ]
+            ]
+        },
+        {
+            "input": "&yscr;",
+            "description": "Named entity: yscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udcce"
+                ]
+            ]
+        },
+        {
+            "input": "&yucy",
+            "description": "Bad named entity: yucy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&yucy"
+                ]
+            ]
+        },
+        {
+            "input": "&yucy;",
+            "description": "Named entity: yucy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u044e"
+                ]
+            ]
+        },
+        {
+            "input": "&yuml",
+            "description": "Named entity: yuml without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00ff"
+                ]
+            ],
+            "errors": [
+                { "code": "missing-semicolon-after-character-reference", "line": 1,  "col": 6 }
+            ]
+        },
+        {
+            "input": "&yuml;",
+            "description": "Named entity: yuml; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u00ff"
+                ]
+            ]
+        },
+        {
+            "input": "&zacute",
+            "description": "Bad named entity: zacute without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&zacute"
+                ]
+            ]
+        },
+        {
+            "input": "&zacute;",
+            "description": "Named entity: zacute; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u017a"
+                ]
+            ]
+        },
+        {
+            "input": "&zcaron",
+            "description": "Bad named entity: zcaron without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&zcaron"
+                ]
+            ]
+        },
+        {
+            "input": "&zcaron;",
+            "description": "Named entity: zcaron; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u017e"
+                ]
+            ]
+        },
+        {
+            "input": "&zcy",
+            "description": "Bad named entity: zcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&zcy"
+                ]
+            ]
+        },
+        {
+            "input": "&zcy;",
+            "description": "Named entity: zcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0437"
+                ]
+            ]
+        },
+        {
+            "input": "&zdot",
+            "description": "Bad named entity: zdot without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&zdot"
+                ]
+            ]
+        },
+        {
+            "input": "&zdot;",
+            "description": "Named entity: zdot; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u017c"
+                ]
+            ]
+        },
+        {
+            "input": "&zeetrf",
+            "description": "Bad named entity: zeetrf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&zeetrf"
+                ]
+            ]
+        },
+        {
+            "input": "&zeetrf;",
+            "description": "Named entity: zeetrf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u2128"
+                ]
+            ]
+        },
+        {
+            "input": "&zeta",
+            "description": "Bad named entity: zeta without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&zeta"
+                ]
+            ]
+        },
+        {
+            "input": "&zeta;",
+            "description": "Named entity: zeta; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u03b6"
+                ]
+            ]
+        },
+        {
+            "input": "&zfr",
+            "description": "Bad named entity: zfr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&zfr"
+                ]
+            ]
+        },
+        {
+            "input": "&zfr;",
+            "description": "Named entity: zfr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd37"
+                ]
+            ]
+        },
+        {
+            "input": "&zhcy",
+            "description": "Bad named entity: zhcy without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&zhcy"
+                ]
+            ]
+        },
+        {
+            "input": "&zhcy;",
+            "description": "Named entity: zhcy; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u0436"
+                ]
+            ]
+        },
+        {
+            "input": "&zigrarr",
+            "description": "Bad named entity: zigrarr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&zigrarr"
+                ]
+            ]
+        },
+        {
+            "input": "&zigrarr;",
+            "description": "Named entity: zigrarr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u21dd"
+                ]
+            ]
+        },
+        {
+            "input": "&zopf",
+            "description": "Bad named entity: zopf without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&zopf"
+                ]
+            ]
+        },
+        {
+            "input": "&zopf;",
+            "description": "Named entity: zopf; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udd6b"
+                ]
+            ]
+        },
+        {
+            "input": "&zscr",
+            "description": "Bad named entity: zscr without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&zscr"
+                ]
+            ]
+        },
+        {
+            "input": "&zscr;",
+            "description": "Named entity: zscr; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\ud835\udccf"
+                ]
+            ]
+        },
+        {
+            "input": "&zwj",
+            "description": "Bad named entity: zwj without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&zwj"
+                ]
+            ]
+        },
+        {
+            "input": "&zwj;",
+            "description": "Named entity: zwj; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u200d"
+                ]
+            ]
+        },
+        {
+            "input": "&zwnj",
+            "description": "Bad named entity: zwnj without a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "&zwnj"
+                ]
+            ]
+        },
+        {
+            "input": "&zwnj;",
+            "description": "Named entity: zwnj; with a semi-colon",
+            "output": [
+                [
+                    "Character",
+                    "\u200c"
+                ]
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/lib/html5lib/tests/testdata/tokenizer/numericEntities.test b/lib/html5lib/tests/testdata/tokenizer/numericEntities.test
new file mode 100644
index 00000000..085109b7
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tokenizer/numericEntities.test
@@ -0,0 +1,1677 @@
+{"tests": [
+
+{"description": "Invalid unterminated numeric entity character overflow before EOF",
+"input": "&#11111111111",
+"output": [["Character", "\uFFFD"]],
+"errors":[
+    { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 14 },
+    { "code": "character-reference-outside-unicode-range", "line": 1, "col": 14 }
+]},
+
+{"description": "Invalid unterminated numeric entity character overflow before EOF",
+"input": "&#1111111111",
+"output": [["Character", "\uFFFD"]],
+"errors":[
+    { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 13 },
+    { "code": "character-reference-outside-unicode-range", "line": 1, "col": 13 }
+]},
+
+{"description": "Invalid unterminated numeric entity character overflow before EOF",
+"input": "&#111111111111",
+"output": [["Character", "\uFFFD"]],
+"errors":[
+    { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 15 },
+    { "code": "character-reference-outside-unicode-range", "line": 1, "col": 15 }
+]},
+
+{"description": "Invalid unterminated numeric entity character overflow",
+"input": "&#11111111111x",
+"output": [["Character", "\uFFFDx"]],
+"errors":[
+    { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 14 },
+    { "code": "character-reference-outside-unicode-range", "line": 1, "col": 14 }
+]},
+
+{"description": "Invalid unterminated numeric entity character overflow",
+"input": "&#1111111111x",
+"output": [["Character", "\uFFFDx"]],
+"errors":[
+    { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 13 },
+    { "code": "character-reference-outside-unicode-range", "line": 1, "col": 13 }
+]},
+
+{"description": "Invalid unterminated numeric entity character overflow",
+"input": "&#111111111111x",
+"output": [["Character", "\uFFFDx"]],
+"errors":[
+    { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 15 },
+    { "code": "character-reference-outside-unicode-range", "line": 1, "col": 15 }
+]},
+
+{"description": "Invalid numeric entity character overflow",
+"input": "&#11111111111;",
+"output": [["Character", "\uFFFD"]],
+"errors":[
+    { "code": "character-reference-outside-unicode-range", "line": 1, "col": 15 }
+]},
+
+{"description": "Invalid numeric entity character overflow",
+"input": "&#1111111111;",
+"output": [["Character", "\uFFFD"]],
+"errors":[
+    { "code": "character-reference-outside-unicode-range", "line": 1, "col": 14 }
+]},
+
+{"description": "Invalid numeric entity character overflow",
+"input": "&#111111111111;",
+"output": [["Character", "\uFFFD"]],
+"errors":[
+    { "code": "character-reference-outside-unicode-range", "line": 1, "col": 16 }
+]},
+
+{"description": "Invalid numeric entity character U+0000",
+"input": "&#x0000;",
+"output": [["Character", "\uFFFD"]],
+"errors":[
+    { "code": "null-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+0001",
+"input": "",
+"output": [["Character", "\u0001"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+0002",
+"input": "",
+"output": [["Character", "\u0002"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 9 }
+]},
+
+
+{"description": "Invalid numeric entity character U+0003",
+"input": "",
+"output": [["Character", "\u0003"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 9 }
+]},
+
+
+{"description": "Invalid numeric entity character U+0004",
+"input": "",
+"output": [["Character", "\u0004"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 9 }
+]},
+
+
+{"description": "Invalid numeric entity character U+0005",
+"input": "&#x0005;",
+"output": [["Character", "\u0005"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 9 }
+]},
+
+
+{"description": "Invalid numeric entity character U+0006",
+"input": "&#x0006;",
+"output": [["Character", "\u0006"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+0007",
+"input": "&#x0007;",
+"output": [["Character", "\u0007"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+0008",
+"input": "&#x0008;",
+"output": [["Character", "\u0008"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+000B",
+"input": "&#x000b;",
+"output": [["Character", "\u000b"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+000E",
+"input": "&#x000e;",
+"output": [["Character", "\u000e"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+000F",
+"input": "&#x000f;",
+"output": [["Character", "\u000f"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+0010",
+"input": "&#x0010;",
+"output": [["Character", "\u0010"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+0011",
+"input": "&#x0011;",
+"output": [["Character", "\u0011"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+0012",
+"input": "&#x0012;",
+"output": [["Character", "\u0012"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+0013",
+"input": "&#x0013;",
+"output": [["Character", "\u0013"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+0014",
+"input": "&#x0014;",
+"output": [["Character", "\u0014"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+0015",
+"input": "&#x0015;",
+"output": [["Character", "\u0015"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+0016",
+"input": "&#x0016;",
+"output": [["Character", "\u0016"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+0017",
+"input": "&#x0017;",
+"output": [["Character", "\u0017"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+0018",
+"input": "&#x0018;",
+"output": [["Character", "\u0018"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+0019",
+"input": "&#x0019;",
+"output": [["Character", "\u0019"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+001A",
+"input": "&#x001a;",
+"output": [["Character", "\u001a"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+001B",
+"input": "&#x001b;",
+"output": [["Character", "\u001b"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+001C",
+"input": "&#x001c;",
+"output": [["Character", "\u001c"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+001D",
+"input": "&#x001d;",
+"output": [["Character", "\u001d"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+001E",
+"input": "&#x001e;",
+"output": [["Character", "\u001e"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+001F",
+"input": "&#x001f;",
+"output": [["Character", "\u001f"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+007F",
+"input": "&#x007f;",
+"output": [["Character", "\u007f"]],
+"errors":[
+    { "code": "control-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+D800",
+"input": "&#xd800;",
+"output": [["Character", "\uFFFD"]],
+"errors":[
+    { "code": "surrogate-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+DFFF",
+"input": "&#xdfff;",
+"output": [["Character", "\uFFFD"]],
+"errors":[
+    { "code": "surrogate-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+FDD0",
+"input": "&#xfdd0;",
+"output": [["Character", "\ufdd0"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+FDD1",
+"input": "&#xfdd1;",
+"output": [["Character", "\ufdd1"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+FDD2",
+"input": "&#xfdd2;",
+"output": [["Character", "\ufdd2"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+FDD3",
+"input": "&#xfdd3;",
+"output": [["Character", "\ufdd3"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+FDD4",
+"input": "&#xfdd4;",
+"output": [["Character", "\ufdd4"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+FDD5",
+"input": "&#xfdd5;",
+"output": [["Character", "\ufdd5"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+FDD6",
+"input": "&#xfdd6;",
+"output": [["Character", "\ufdd6"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+FDD7",
+"input": "&#xfdd7;",
+"output": [["Character", "\ufdd7"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+FDD8",
+"input": "&#xfdd8;",
+"output": [["Character", "\ufdd8"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+FDD9",
+"input": "&#xfdd9;",
+"output": [["Character", "\ufdd9"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+FDDA",
+"input": "&#xfdda;",
+"output": [["Character", "\ufdda"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+FDDB",
+"input": "&#xfddb;",
+"output": [["Character", "\ufddb"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+FDDC",
+"input": "&#xfddc;",
+"output": [["Character", "\ufddc"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+FDDD",
+"input": "&#xfddd;",
+"output": [["Character", "\ufddd"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+FDDE",
+"input": "&#xfdde;",
+"output": [["Character", "\ufdde"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+FDDF",
+"input": "&#xfddf;",
+"output": [["Character", "\ufddf"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+FDE0",
+"input": "&#xfde0;",
+"output": [["Character", "\ufde0"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+FDE1",
+"input": "&#xfde1;",
+"output": [["Character", "\ufde1"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+FDE2",
+"input": "&#xfde2;",
+"output": [["Character", "\ufde2"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+FDE3",
+"input": "&#xfde3;",
+"output": [["Character", "\ufde3"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+FDE4",
+"input": "&#xfde4;",
+"output": [["Character", "\ufde4"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+FDE5",
+"input": "&#xfde5;",
+"output": [["Character", "\ufde5"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+FDE6",
+"input": "&#xfde6;",
+"output": [["Character", "\ufde6"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+FDE7",
+"input": "&#xfde7;",
+"output": [["Character", "\ufde7"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+FDE8",
+"input": "&#xfde8;",
+"output": [["Character", "\ufde8"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+FDE9",
+"input": "&#xfde9;",
+"output": [["Character", "\ufde9"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+FDEA",
+"input": "&#xfdea;",
+"output": [["Character", "\ufdea"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+FDEB",
+"input": "&#xfdeb;",
+"output": [["Character", "\ufdeb"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+FDEC",
+"input": "&#xfdec;",
+"output": [["Character", "\ufdec"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+FDED",
+"input": "&#xfded;",
+"output": [["Character", "\ufded"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+FDEE",
+"input": "&#xfdee;",
+"output": [["Character", "\ufdee"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+FDEF",
+"input": "&#xfdef;",
+"output": [["Character", "\ufdef"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+FFFE",
+"input": "&#xfffe;",
+"output": [["Character", "\ufffe"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+FFFF",
+"input": "&#xffff;",
+"output": [["Character", "\uffff"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description": "Invalid numeric entity character U+1FFFE",
+"input": "&#x1fffe;",
+"output": [["Character", "\uD83F\uDFFE"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 10 }
+]},
+
+{"description": "Invalid numeric entity character U+1FFFF",
+"input": "&#x1ffff;",
+"output": [["Character", "\uD83F\uDFFF"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 10 }
+]},
+
+{"description": "Invalid numeric entity character U+2FFFE",
+"input": "&#x2fffe;",
+"output": [["Character", "\uD87F\uDFFE"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 10 }
+]},
+
+{"description": "Invalid numeric entity character U+2FFFF",
+"input": "&#x2ffff;",
+"output": [["Character", "\uD87F\uDFFF"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 10 }
+]},
+
+{"description": "Invalid numeric entity character U+3FFFE",
+"input": "&#x3fffe;",
+"output": [["Character", "\uD8BF\uDFFE"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 10 }
+]},
+
+{"description": "Invalid numeric entity character U+3FFFF",
+"input": "&#x3ffff;",
+"output": [["Character", "\uD8BF\uDFFF"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 10 }
+]},
+
+{"description": "Invalid numeric entity character U+4FFFE",
+"input": "&#x4fffe;",
+"output": [["Character", "\uD8FF\uDFFE"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 10 }
+]},
+
+{"description": "Invalid numeric entity character U+4FFFF",
+"input": "&#x4ffff;",
+"output": [["Character", "\uD8FF\uDFFF"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 10 }
+]},
+
+{"description": "Invalid numeric entity character U+5FFFE",
+"input": "&#x5fffe;",
+"output": [["Character", "\uD93F\uDFFE"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 10 }
+]},
+
+{"description": "Invalid numeric entity character U+5FFFF",
+"input": "&#x5ffff;",
+"output": [["Character", "\uD93F\uDFFF"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 10 }
+]},
+
+{"description": "Invalid numeric entity character U+6FFFE",
+"input": "&#x6fffe;",
+"output": [["Character", "\uD97F\uDFFE"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 10 }
+]},
+
+{"description": "Invalid numeric entity character U+6FFFF",
+"input": "&#x6ffff;",
+"output": [["Character", "\uD97F\uDFFF"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 10 }
+]},
+
+{"description": "Invalid numeric entity character U+7FFFE",
+"input": "&#x7fffe;",
+"output": [["Character", "\uD9BF\uDFFE"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 10 }
+]},
+
+{"description": "Invalid numeric entity character U+7FFFF",
+"input": "&#x7ffff;",
+"output": [["Character", "\uD9BF\uDFFF"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 10 }
+]},
+
+{"description": "Invalid numeric entity character U+8FFFE",
+"input": "&#x8fffe;",
+"output": [["Character", "\uD9FF\uDFFE"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 10 }
+]},
+
+{"description": "Invalid numeric entity character U+8FFFF",
+"input": "&#x8ffff;",
+"output": [["Character", "\uD9FF\uDFFF"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 10 }
+]},
+
+{"description": "Invalid numeric entity character U+9FFFE",
+"input": "&#x9fffe;",
+"output": [["Character", "\uDA3F\uDFFE"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 10 }
+]},
+
+{"description": "Invalid numeric entity character U+9FFFF",
+"input": "&#x9ffff;",
+"output": [["Character", "\uDA3F\uDFFF"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 10 }
+]},
+
+{"description": "Invalid numeric entity character U+AFFFE",
+"input": "&#xafffe;",
+"output": [["Character", "\uDA7F\uDFFE"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 10 }
+]},
+
+{"description": "Invalid numeric entity character U+AFFFF",
+"input": "&#xaffff;",
+"output": [["Character", "\uDA7F\uDFFF"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 10 }
+]},
+
+{"description": "Invalid numeric entity character U+BFFFE",
+"input": "&#xbfffe;",
+"output": [["Character", "\uDABF\uDFFE"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 10 }
+]},
+
+{"description": "Invalid numeric entity character U+BFFFF",
+"input": "&#xbffff;",
+"output": [["Character", "\uDABF\uDFFF"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 10 }
+]},
+
+{"description": "Invalid numeric entity character U+CFFFE",
+"input": "&#xcfffe;",
+"output": [["Character", "\uDAFF\uDFFE"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 10 }
+]},
+
+{"description": "Invalid numeric entity character U+CFFFF",
+"input": "&#xcffff;",
+"output": [["Character", "\uDAFF\uDFFF"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 10 }
+]},
+
+{"description": "Invalid numeric entity character U+DFFFE",
+"input": "&#xdfffe;",
+"output": [["Character", "\uDB3F\uDFFE"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 10 }
+]},
+
+{"description": "Invalid numeric entity character U+DFFFF",
+"input": "&#xdffff;",
+"output": [["Character", "\uDB3F\uDFFF"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 10 }
+]},
+
+{"description": "Invalid numeric entity character U+EFFFE",
+"input": "&#xefffe;",
+"output": [["Character", "\uDB7F\uDFFE"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 10 }
+]},
+
+{"description": "Invalid numeric entity character U+EFFFF",
+"input": "&#xeffff;",
+"output": [["Character", "\uDB7F\uDFFF"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 10 }
+]},
+
+{"description": "Invalid numeric entity character U+FFFFE",
+"input": "&#xffffe;",
+"output": [["Character", "\uDBBF\uDFFE"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 10 }
+]},
+
+{"description": "Invalid numeric entity character U+FFFFF",
+"input": "&#xfffff;",
+"output": [["Character", "\uDBBF\uDFFF"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 10 }
+]},
+
+{"description": "Invalid numeric entity character U+10FFFE",
+"input": "&#x10fffe;",
+"output": [["Character", "\uDBFF\uDFFE"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 11 }
+]},
+
+{"description": "Invalid numeric entity character U+10FFFF",
+"input": "&#x10ffff;",
+"output": [["Character", "\uDBFF\uDFFF"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 11 }
+]},
+
+{"description": "Valid numeric entity character U+0009",
+"input": "&#x0009;",
+"output": [["Character", "\u0009"]]},
+
+{"description": "Valid numeric entity character U+000A",
+"input": "&#x000a;",
+"output": [["Character", "\u000A"]]},
+
+{"description": "Valid numeric entity character U+0020",
+"input": "&#x0020;",
+"output": [["Character", "\u0020"]]},
+
+{"description": "Valid numeric entity character U+0021",
+"input": "&#x0021;",
+"output": [["Character", "\u0021"]]},
+
+{"description": "Valid numeric entity character U+0022",
+"input": "&#x0022;",
+"output": [["Character", "\u0022"]]},
+
+{"description": "Valid numeric entity character U+0023",
+"input": "&#x0023;",
+"output": [["Character", "\u0023"]]},
+
+{"description": "Valid numeric entity character U+0024",
+"input": "&#x0024;",
+"output": [["Character", "\u0024"]]},
+
+{"description": "Valid numeric entity character U+0025",
+"input": "&#x0025;",
+"output": [["Character", "\u0025"]]},
+
+{"description": "Valid numeric entity character U+0026",
+"input": "&#x0026;",
+"output": [["Character", "\u0026"]]},
+
+{"description": "Valid numeric entity character U+0027",
+"input": "&#x0027;",
+"output": [["Character", "\u0027"]]},
+
+{"description": "Valid numeric entity character U+0028",
+"input": "&#x0028;",
+"output": [["Character", "\u0028"]]},
+
+{"description": "Valid numeric entity character U+0029",
+"input": "&#x0029;",
+"output": [["Character", "\u0029"]]},
+
+{"description": "Valid numeric entity character U+002A",
+"input": "&#x002a;",
+"output": [["Character", "\u002A"]]},
+
+{"description": "Valid numeric entity character U+002B",
+"input": "&#x002b;",
+"output": [["Character", "\u002B"]]},
+
+{"description": "Valid numeric entity character U+002C",
+"input": "&#x002c;",
+"output": [["Character", "\u002C"]]},
+
+{"description": "Valid numeric entity character U+002D",
+"input": "&#x002d;",
+"output": [["Character", "\u002D"]]},
+
+{"description": "Valid numeric entity character U+002E",
+"input": "&#x002e;",
+"output": [["Character", "\u002E"]]},
+
+{"description": "Valid numeric entity character U+002F",
+"input": "&#x002f;",
+"output": [["Character", "\u002F"]]},
+
+{"description": "Valid numeric entity character U+0030",
+"input": "&#x0030;",
+"output": [["Character", "\u0030"]]},
+
+{"description": "Valid numeric entity character U+0031",
+"input": "&#x0031;",
+"output": [["Character", "\u0031"]]},
+
+{"description": "Valid numeric entity character U+0032",
+"input": "&#x0032;",
+"output": [["Character", "\u0032"]]},
+
+{"description": "Valid numeric entity character U+0033",
+"input": "&#x0033;",
+"output": [["Character", "\u0033"]]},
+
+{"description": "Valid numeric entity character U+0034",
+"input": "&#x0034;",
+"output": [["Character", "\u0034"]]},
+
+{"description": "Valid numeric entity character U+0035",
+"input": "&#x0035;",
+"output": [["Character", "\u0035"]]},
+
+{"description": "Valid numeric entity character U+0036",
+"input": "&#x0036;",
+"output": [["Character", "\u0036"]]},
+
+{"description": "Valid numeric entity character U+0037",
+"input": "&#x0037;",
+"output": [["Character", "\u0037"]]},
+
+{"description": "Valid numeric entity character U+0038",
+"input": "&#x0038;",
+"output": [["Character", "\u0038"]]},
+
+{"description": "Valid numeric entity character U+0039",
+"input": "&#x0039;",
+"output": [["Character", "\u0039"]]},
+
+{"description": "Valid numeric entity character U+003A",
+"input": "&#x003a;",
+"output": [["Character", "\u003A"]]},
+
+{"description": "Valid numeric entity character U+003B",
+"input": "&#x003b;",
+"output": [["Character", "\u003B"]]},
+
+{"description": "Valid numeric entity character U+003C",
+"input": "&#x003c;",
+"output": [["Character", "\u003C"]]},
+
+{"description": "Valid numeric entity character U+003D",
+"input": "&#x003d;",
+"output": [["Character", "\u003D"]]},
+
+{"description": "Valid numeric entity character U+003E",
+"input": "&#x003e;",
+"output": [["Character", "\u003E"]]},
+
+{"description": "Valid numeric entity character U+003F",
+"input": "&#x003f;",
+"output": [["Character", "\u003F"]]},
+
+{"description": "Valid numeric entity character U+0040",
+"input": "&#x0040;",
+"output": [["Character", "\u0040"]]},
+
+{"description": "Valid numeric entity character U+0041",
+"input": "&#x0041;",
+"output": [["Character", "\u0041"]]},
+
+{"description": "Valid numeric entity character U+0042",
+"input": "&#x0042;",
+"output": [["Character", "\u0042"]]},
+
+{"description": "Valid numeric entity character U+0043",
+"input": "&#x0043;",
+"output": [["Character", "\u0043"]]},
+
+{"description": "Valid numeric entity character U+0044",
+"input": "&#x0044;",
+"output": [["Character", "\u0044"]]},
+
+{"description": "Valid numeric entity character U+0045",
+"input": "&#x0045;",
+"output": [["Character", "\u0045"]]},
+
+{"description": "Valid numeric entity character U+0046",
+"input": "&#x0046;",
+"output": [["Character", "\u0046"]]},
+
+{"description": "Valid numeric entity character U+0047",
+"input": "&#x0047;",
+"output": [["Character", "\u0047"]]},
+
+{"description": "Valid numeric entity character U+0048",
+"input": "&#x0048;",
+"output": [["Character", "\u0048"]]},
+
+{"description": "Valid numeric entity character U+0049",
+"input": "&#x0049;",
+"output": [["Character", "\u0049"]]},
+
+{"description": "Valid numeric entity character U+004A",
+"input": "&#x004a;",
+"output": [["Character", "\u004A"]]},
+
+{"description": "Valid numeric entity character U+004B",
+"input": "&#x004b;",
+"output": [["Character", "\u004B"]]},
+
+{"description": "Valid numeric entity character U+004C",
+"input": "&#x004c;",
+"output": [["Character", "\u004C"]]},
+
+{"description": "Valid numeric entity character U+004D",
+"input": "&#x004d;",
+"output": [["Character", "\u004D"]]},
+
+{"description": "Valid numeric entity character U+004E",
+"input": "&#x004e;",
+"output": [["Character", "\u004E"]]},
+
+{"description": "Valid numeric entity character U+004F",
+"input": "&#x004f;",
+"output": [["Character", "\u004F"]]},
+
+{"description": "Valid numeric entity character U+0050",
+"input": "&#x0050;",
+"output": [["Character", "\u0050"]]},
+
+{"description": "Valid numeric entity character U+0051",
+"input": "&#x0051;",
+"output": [["Character", "\u0051"]]},
+
+{"description": "Valid numeric entity character U+0052",
+"input": "&#x0052;",
+"output": [["Character", "\u0052"]]},
+
+{"description": "Valid numeric entity character U+0053",
+"input": "&#x0053;",
+"output": [["Character", "\u0053"]]},
+
+{"description": "Valid numeric entity character U+0054",
+"input": "&#x0054;",
+"output": [["Character", "\u0054"]]},
+
+{"description": "Valid numeric entity character U+0055",
+"input": "&#x0055;",
+"output": [["Character", "\u0055"]]},
+
+{"description": "Valid numeric entity character U+0056",
+"input": "&#x0056;",
+"output": [["Character", "\u0056"]]},
+
+{"description": "Valid numeric entity character U+0057",
+"input": "&#x0057;",
+"output": [["Character", "\u0057"]]},
+
+{"description": "Valid numeric entity character U+0058",
+"input": "&#x0058;",
+"output": [["Character", "\u0058"]]},
+
+{"description": "Valid numeric entity character U+0059",
+"input": "&#x0059;",
+"output": [["Character", "\u0059"]]},
+
+{"description": "Valid numeric entity character U+005A",
+"input": "&#x005a;",
+"output": [["Character", "\u005A"]]},
+
+{"description": "Valid numeric entity character U+005B",
+"input": "&#x005b;",
+"output": [["Character", "\u005B"]]},
+
+{"description": "Valid numeric entity character U+005C",
+"input": "&#x005c;",
+"output": [["Character", "\u005C"]]},
+
+{"description": "Valid numeric entity character U+005D",
+"input": "&#x005d;",
+"output": [["Character", "\u005D"]]},
+
+{"description": "Valid numeric entity character U+005E",
+"input": "&#x005e;",
+"output": [["Character", "\u005E"]]},
+
+{"description": "Valid numeric entity character U+005F",
+"input": "&#x005f;",
+"output": [["Character", "\u005F"]]},
+
+{"description": "Valid numeric entity character U+0060",
+"input": "&#x0060;",
+"output": [["Character", "\u0060"]]},
+
+{"description": "Valid numeric entity character U+0061",
+"input": "&#x0061;",
+"output": [["Character", "\u0061"]]},
+
+{"description": "Valid numeric entity character U+0062",
+"input": "&#x0062;",
+"output": [["Character", "\u0062"]]},
+
+{"description": "Valid numeric entity character U+0063",
+"input": "&#x0063;",
+"output": [["Character", "\u0063"]]},
+
+{"description": "Valid numeric entity character U+0064",
+"input": "&#x0064;",
+"output": [["Character", "\u0064"]]},
+
+{"description": "Valid numeric entity character U+0065",
+"input": "&#x0065;",
+"output": [["Character", "\u0065"]]},
+
+{"description": "Valid numeric entity character U+0066",
+"input": "&#x0066;",
+"output": [["Character", "\u0066"]]},
+
+{"description": "Valid numeric entity character U+0067",
+"input": "&#x0067;",
+"output": [["Character", "\u0067"]]},
+
+{"description": "Valid numeric entity character U+0068",
+"input": "&#x0068;",
+"output": [["Character", "\u0068"]]},
+
+{"description": "Valid numeric entity character U+0069",
+"input": "&#x0069;",
+"output": [["Character", "\u0069"]]},
+
+{"description": "Valid numeric entity character U+006A",
+"input": "&#x006a;",
+"output": [["Character", "\u006A"]]},
+
+{"description": "Valid numeric entity character U+006B",
+"input": "&#x006b;",
+"output": [["Character", "\u006B"]]},
+
+{"description": "Valid numeric entity character U+006C",
+"input": "&#x006c;",
+"output": [["Character", "\u006C"]]},
+
+{"description": "Valid numeric entity character U+006D",
+"input": "&#x006d;",
+"output": [["Character", "\u006D"]]},
+
+{"description": "Valid numeric entity character U+006E",
+"input": "&#x006e;",
+"output": [["Character", "\u006E"]]},
+
+{"description": "Valid numeric entity character U+006F",
+"input": "&#x006f;",
+"output": [["Character", "\u006F"]]},
+
+{"description": "Valid numeric entity character U+0070",
+"input": "&#x0070;",
+"output": [["Character", "\u0070"]]},
+
+{"description": "Valid numeric entity character U+0071",
+"input": "&#x0071;",
+"output": [["Character", "\u0071"]]},
+
+{"description": "Valid numeric entity character U+0072",
+"input": "&#x0072;",
+"output": [["Character", "\u0072"]]},
+
+{"description": "Valid numeric entity character U+0073",
+"input": "&#x0073;",
+"output": [["Character", "\u0073"]]},
+
+{"description": "Valid numeric entity character U+0074",
+"input": "&#x0074;",
+"output": [["Character", "\u0074"]]},
+
+{"description": "Valid numeric entity character U+0075",
+"input": "&#x0075;",
+"output": [["Character", "\u0075"]]},
+
+{"description": "Valid numeric entity character U+0076",
+"input": "&#x0076;",
+"output": [["Character", "\u0076"]]},
+
+{"description": "Valid numeric entity character U+0077",
+"input": "&#x0077;",
+"output": [["Character", "\u0077"]]},
+
+{"description": "Valid numeric entity character U+0078",
+"input": "&#x0078;",
+"output": [["Character", "\u0078"]]},
+
+{"description": "Valid numeric entity character U+0079",
+"input": "&#x0079;",
+"output": [["Character", "\u0079"]]},
+
+{"description": "Valid numeric entity character U+007A",
+"input": "&#x007a;",
+"output": [["Character", "\u007A"]]},
+
+{"description": "Valid numeric entity character U+007B",
+"input": "&#x007b;",
+"output": [["Character", "\u007B"]]},
+
+{"description": "Valid numeric entity character U+007C",
+"input": "&#x007c;",
+"output": [["Character", "\u007C"]]},
+
+{"description": "Valid numeric entity character U+007D",
+"input": "&#x007d;",
+"output": [["Character", "\u007D"]]},
+
+{"description": "Valid numeric entity character U+007E",
+"input": "&#x007e;",
+"output": [["Character", "\u007E"]]},
+
+{"description": "Valid numeric entity character U+00A0",
+"input": "&#x00a0;",
+"output": [["Character", "\u00A0"]]},
+
+{"description": "Valid numeric entity character U+00A1",
+"input": "&#x00a1;",
+"output": [["Character", "\u00A1"]]},
+
+{"description": "Valid numeric entity character U+00A2",
+"input": "&#x00a2;",
+"output": [["Character", "\u00A2"]]},
+
+{"description": "Valid numeric entity character U+00A3",
+"input": "&#x00a3;",
+"output": [["Character", "\u00A3"]]},
+
+{"description": "Valid numeric entity character U+00A4",
+"input": "&#x00a4;",
+"output": [["Character", "\u00A4"]]},
+
+{"description": "Valid numeric entity character U+00A5",
+"input": "&#x00a5;",
+"output": [["Character", "\u00A5"]]},
+
+{"description": "Valid numeric entity character U+00A6",
+"input": "&#x00a6;",
+"output": [["Character", "\u00A6"]]},
+
+{"description": "Valid numeric entity character U+00A7",
+"input": "&#x00a7;",
+"output": [["Character", "\u00A7"]]},
+
+{"description": "Valid numeric entity character U+00A8",
+"input": "&#x00a8;",
+"output": [["Character", "\u00A8"]]},
+
+{"description": "Valid numeric entity character U+00A9",
+"input": "&#x00a9;",
+"output": [["Character", "\u00A9"]]},
+
+{"description": "Valid numeric entity character U+00AA",
+"input": "&#x00aa;",
+"output": [["Character", "\u00AA"]]},
+
+{"description": "Valid numeric entity character U+00AB",
+"input": "&#x00ab;",
+"output": [["Character", "\u00AB"]]},
+
+{"description": "Valid numeric entity character U+00AC",
+"input": "&#x00ac;",
+"output": [["Character", "\u00AC"]]},
+
+{"description": "Valid numeric entity character U+00AD",
+"input": "&#x00ad;",
+"output": [["Character", "\u00AD"]]},
+
+{"description": "Valid numeric entity character U+00AE",
+"input": "&#x00ae;",
+"output": [["Character", "\u00AE"]]},
+
+{"description": "Valid numeric entity character U+00AF",
+"input": "&#x00af;",
+"output": [["Character", "\u00AF"]]},
+
+{"description": "Valid numeric entity character U+00B0",
+"input": "&#x00b0;",
+"output": [["Character", "\u00B0"]]},
+
+{"description": "Valid numeric entity character U+00B1",
+"input": "&#x00b1;",
+"output": [["Character", "\u00B1"]]},
+
+{"description": "Valid numeric entity character U+00B2",
+"input": "&#x00b2;",
+"output": [["Character", "\u00B2"]]},
+
+{"description": "Valid numeric entity character U+00B3",
+"input": "&#x00b3;",
+"output": [["Character", "\u00B3"]]},
+
+{"description": "Valid numeric entity character U+00B4",
+"input": "&#x00b4;",
+"output": [["Character", "\u00B4"]]},
+
+{"description": "Valid numeric entity character U+00B5",
+"input": "&#x00b5;",
+"output": [["Character", "\u00B5"]]},
+
+{"description": "Valid numeric entity character U+00B6",
+"input": "&#x00b6;",
+"output": [["Character", "\u00B6"]]},
+
+{"description": "Valid numeric entity character U+00B7",
+"input": "&#x00b7;",
+"output": [["Character", "\u00B7"]]},
+
+{"description": "Valid numeric entity character U+00B8",
+"input": "&#x00b8;",
+"output": [["Character", "\u00B8"]]},
+
+{"description": "Valid numeric entity character U+00B9",
+"input": "&#x00b9;",
+"output": [["Character", "\u00B9"]]},
+
+{"description": "Valid numeric entity character U+00BA",
+"input": "&#x00ba;",
+"output": [["Character", "\u00BA"]]},
+
+{"description": "Valid numeric entity character U+00BB",
+"input": "&#x00bb;",
+"output": [["Character", "\u00BB"]]},
+
+{"description": "Valid numeric entity character U+00BC",
+"input": "&#x00bc;",
+"output": [["Character", "\u00BC"]]},
+
+{"description": "Valid numeric entity character U+00BD",
+"input": "&#x00bd;",
+"output": [["Character", "\u00BD"]]},
+
+{"description": "Valid numeric entity character U+00BE",
+"input": "&#x00be;",
+"output": [["Character", "\u00BE"]]},
+
+{"description": "Valid numeric entity character U+00BF",
+"input": "&#x00bf;",
+"output": [["Character", "\u00BF"]]},
+
+{"description": "Valid numeric entity character U+00C0",
+"input": "&#x00c0;",
+"output": [["Character", "\u00C0"]]},
+
+{"description": "Valid numeric entity character U+00C1",
+"input": "&#x00c1;",
+"output": [["Character", "\u00C1"]]},
+
+{"description": "Valid numeric entity character U+00C2",
+"input": "&#x00c2;",
+"output": [["Character", "\u00C2"]]},
+
+{"description": "Valid numeric entity character U+00C3",
+"input": "&#x00c3;",
+"output": [["Character", "\u00C3"]]},
+
+{"description": "Valid numeric entity character U+00C4",
+"input": "&#x00c4;",
+"output": [["Character", "\u00C4"]]},
+
+{"description": "Valid numeric entity character U+00C5",
+"input": "&#x00c5;",
+"output": [["Character", "\u00C5"]]},
+
+{"description": "Valid numeric entity character U+00C6",
+"input": "&#x00c6;",
+"output": [["Character", "\u00C6"]]},
+
+{"description": "Valid numeric entity character U+00C7",
+"input": "&#x00c7;",
+"output": [["Character", "\u00C7"]]},
+
+{"description": "Valid numeric entity character U+00C8",
+"input": "&#x00c8;",
+"output": [["Character", "\u00C8"]]},
+
+{"description": "Valid numeric entity character U+00C9",
+"input": "&#x00c9;",
+"output": [["Character", "\u00C9"]]},
+
+{"description": "Valid numeric entity character U+00CA",
+"input": "&#x00ca;",
+"output": [["Character", "\u00CA"]]},
+
+{"description": "Valid numeric entity character U+00CB",
+"input": "&#x00cb;",
+"output": [["Character", "\u00CB"]]},
+
+{"description": "Valid numeric entity character U+00CC",
+"input": "&#x00cc;",
+"output": [["Character", "\u00CC"]]},
+
+{"description": "Valid numeric entity character U+00CD",
+"input": "&#x00cd;",
+"output": [["Character", "\u00CD"]]},
+
+{"description": "Valid numeric entity character U+00CE",
+"input": "&#x00ce;",
+"output": [["Character", "\u00CE"]]},
+
+{"description": "Valid numeric entity character U+00CF",
+"input": "&#x00cf;",
+"output": [["Character", "\u00CF"]]},
+
+{"description": "Valid numeric entity character U+00D0",
+"input": "&#x00d0;",
+"output": [["Character", "\u00D0"]]},
+
+{"description": "Valid numeric entity character U+00D1",
+"input": "&#x00d1;",
+"output": [["Character", "\u00D1"]]},
+
+{"description": "Valid numeric entity character U+00D2",
+"input": "&#x00d2;",
+"output": [["Character", "\u00D2"]]},
+
+{"description": "Valid numeric entity character U+00D3",
+"input": "&#x00d3;",
+"output": [["Character", "\u00D3"]]},
+
+{"description": "Valid numeric entity character U+00D4",
+"input": "&#x00d4;",
+"output": [["Character", "\u00D4"]]},
+
+{"description": "Valid numeric entity character U+00D5",
+"input": "&#x00d5;",
+"output": [["Character", "\u00D5"]]},
+
+{"description": "Valid numeric entity character U+00D6",
+"input": "&#x00d6;",
+"output": [["Character", "\u00D6"]]},
+
+{"description": "Valid numeric entity character U+00D7",
+"input": "&#x00d7;",
+"output": [["Character", "\u00D7"]]},
+
+{"description": "Valid numeric entity character U+00D8",
+"input": "&#x00d8;",
+"output": [["Character", "\u00D8"]]},
+
+{"description": "Valid numeric entity character U+00D9",
+"input": "&#x00d9;",
+"output": [["Character", "\u00D9"]]},
+
+{"description": "Valid numeric entity character U+00DA",
+"input": "&#x00da;",
+"output": [["Character", "\u00DA"]]},
+
+{"description": "Valid numeric entity character U+00DB",
+"input": "&#x00db;",
+"output": [["Character", "\u00DB"]]},
+
+{"description": "Valid numeric entity character U+00DC",
+"input": "&#x00dc;",
+"output": [["Character", "\u00DC"]]},
+
+{"description": "Valid numeric entity character U+00DD",
+"input": "&#x00dd;",
+"output": [["Character", "\u00DD"]]},
+
+{"description": "Valid numeric entity character U+00DE",
+"input": "&#x00de;",
+"output": [["Character", "\u00DE"]]},
+
+{"description": "Valid numeric entity character U+00DF",
+"input": "&#x00df;",
+"output": [["Character", "\u00DF"]]},
+
+{"description": "Valid numeric entity character U+00E0",
+"input": "&#x00e0;",
+"output": [["Character", "\u00E0"]]},
+
+{"description": "Valid numeric entity character U+00E1",
+"input": "&#x00e1;",
+"output": [["Character", "\u00E1"]]},
+
+{"description": "Valid numeric entity character U+00E2",
+"input": "&#x00e2;",
+"output": [["Character", "\u00E2"]]},
+
+{"description": "Valid numeric entity character U+00E3",
+"input": "&#x00e3;",
+"output": [["Character", "\u00E3"]]},
+
+{"description": "Valid numeric entity character U+00E4",
+"input": "&#x00e4;",
+"output": [["Character", "\u00E4"]]},
+
+{"description": "Valid numeric entity character U+00E5",
+"input": "&#x00e5;",
+"output": [["Character", "\u00E5"]]},
+
+{"description": "Valid numeric entity character U+00E6",
+"input": "&#x00e6;",
+"output": [["Character", "\u00E6"]]},
+
+{"description": "Valid numeric entity character U+00E7",
+"input": "&#x00e7;",
+"output": [["Character", "\u00E7"]]},
+
+{"description": "Valid numeric entity character U+00E8",
+"input": "&#x00e8;",
+"output": [["Character", "\u00E8"]]},
+
+{"description": "Valid numeric entity character U+00E9",
+"input": "&#x00e9;",
+"output": [["Character", "\u00E9"]]},
+
+{"description": "Valid numeric entity character U+00EA",
+"input": "&#x00ea;",
+"output": [["Character", "\u00EA"]]},
+
+{"description": "Valid numeric entity character U+00EB",
+"input": "&#x00eb;",
+"output": [["Character", "\u00EB"]]},
+
+{"description": "Valid numeric entity character U+00EC",
+"input": "&#x00ec;",
+"output": [["Character", "\u00EC"]]},
+
+{"description": "Valid numeric entity character U+00ED",
+"input": "&#x00ed;",
+"output": [["Character", "\u00ED"]]},
+
+{"description": "Valid numeric entity character U+00EE",
+"input": "&#x00ee;",
+"output": [["Character", "\u00EE"]]},
+
+{"description": "Valid numeric entity character U+00EF",
+"input": "&#x00ef;",
+"output": [["Character", "\u00EF"]]},
+
+{"description": "Valid numeric entity character U+00F0",
+"input": "&#x00f0;",
+"output": [["Character", "\u00F0"]]},
+
+{"description": "Valid numeric entity character U+00F1",
+"input": "&#x00f1;",
+"output": [["Character", "\u00F1"]]},
+
+{"description": "Valid numeric entity character U+00F2",
+"input": "&#x00f2;",
+"output": [["Character", "\u00F2"]]},
+
+{"description": "Valid numeric entity character U+00F3",
+"input": "&#x00f3;",
+"output": [["Character", "\u00F3"]]},
+
+{"description": "Valid numeric entity character U+00F4",
+"input": "&#x00f4;",
+"output": [["Character", "\u00F4"]]},
+
+{"description": "Valid numeric entity character U+00F5",
+"input": "&#x00f5;",
+"output": [["Character", "\u00F5"]]},
+
+{"description": "Valid numeric entity character U+00F6",
+"input": "&#x00f6;",
+"output": [["Character", "\u00F6"]]},
+
+{"description": "Valid numeric entity character U+00F7",
+"input": "&#x00f7;",
+"output": [["Character", "\u00F7"]]},
+
+{"description": "Valid numeric entity character U+00F8",
+"input": "&#x00f8;",
+"output": [["Character", "\u00F8"]]},
+
+{"description": "Valid numeric entity character U+00F9",
+"input": "&#x00f9;",
+"output": [["Character", "\u00F9"]]},
+
+{"description": "Valid numeric entity character U+00FA",
+"input": "&#x00fa;",
+"output": [["Character", "\u00FA"]]},
+
+{"description": "Valid numeric entity character U+00FB",
+"input": "&#x00fb;",
+"output": [["Character", "\u00FB"]]},
+
+{"description": "Valid numeric entity character U+00FC",
+"input": "&#x00fc;",
+"output": [["Character", "\u00FC"]]},
+
+{"description": "Valid numeric entity character U+00FD",
+"input": "&#x00fd;",
+"output": [["Character", "\u00FD"]]},
+
+{"description": "Valid numeric entity character U+00FE",
+"input": "&#x00fe;",
+"output": [["Character", "\u00FE"]]},
+
+{"description": "Valid numeric entity character U+00FF",
+"input": "&#x00ff;",
+"output": [["Character", "\u00FF"]]},
+
+{"description": "Valid numeric entity character U+D7FF",
+"input": "&#xd7ff;",
+"output": [["Character", "\uD7FF"]]},
+
+{"description": "Valid numeric entity character U+E000",
+"input": "&#xe000;",
+"output": [["Character", "\uE000"]]},
+
+{"description": "Valid numeric entity character U+FDCF",
+"input": "&#xfdcf;",
+"output": [["Character", "\uFDCF"]]},
+
+{"description": "Valid numeric entity character U+FDF0",
+"input": "&#xfdf0;",
+"output": [["Character", "\uFDF0"]]},
+
+{"description": "Valid numeric entity character U+FFFD",
+"input": "&#xfffd;",
+"output": [["Character", "\uFFFD"]]},
+
+{"description": "Valid numeric entity character U+10000",
+"input": "&#x10000;",
+"output": [["Character", "\uD800\uDC00"]]},
+
+{"description": "Valid numeric entity character U+1FFFD",
+"input": "&#x1fffd;",
+"output": [["Character", "\uD83F\uDFFD"]]},
+
+{"description": "Valid numeric entity character U+20000",
+"input": "&#x20000;",
+"output": [["Character", "\uD840\uDC00"]]},
+
+{"description": "Valid numeric entity character U+2FFFD",
+"input": "&#x2fffd;",
+"output": [["Character", "\uD87F\uDFFD"]]},
+
+{"description": "Valid numeric entity character U+30000",
+"input": "&#x30000;",
+"output": [["Character", "\uD880\uDC00"]]},
+
+{"description": "Valid numeric entity character U+3FFFD",
+"input": "&#x3fffd;",
+"output": [["Character", "\uD8BF\uDFFD"]]},
+
+{"description": "Valid numeric entity character U+40000",
+"input": "&#x40000;",
+"output": [["Character", "\uD8C0\uDC00"]]},
+
+{"description": "Valid numeric entity character U+4FFFD",
+"input": "&#x4fffd;",
+"output": [["Character", "\uD8FF\uDFFD"]]},
+
+{"description": "Valid numeric entity character U+50000",
+"input": "&#x50000;",
+"output": [["Character", "\uD900\uDC00"]]},
+
+{"description": "Valid numeric entity character U+5FFFD",
+"input": "&#x5fffd;",
+"output": [["Character", "\uD93F\uDFFD"]]},
+
+{"description": "Valid numeric entity character U+60000",
+"input": "&#x60000;",
+"output": [["Character", "\uD940\uDC00"]]},
+
+{"description": "Valid numeric entity character U+6FFFD",
+"input": "&#x6fffd;",
+"output": [["Character", "\uD97F\uDFFD"]]},
+
+{"description": "Valid numeric entity character U+70000",
+"input": "&#x70000;",
+"output": [["Character", "\uD980\uDC00"]]},
+
+{"description": "Valid numeric entity character U+7FFFD",
+"input": "&#x7fffd;",
+"output": [["Character", "\uD9BF\uDFFD"]]},
+
+{"description": "Valid numeric entity character U+80000",
+"input": "&#x80000;",
+"output": [["Character", "\uD9C0\uDC00"]]},
+
+{"description": "Valid numeric entity character U+8FFFD",
+"input": "&#x8fffd;",
+"output": [["Character", "\uD9FF\uDFFD"]]},
+
+{"description": "Valid numeric entity character U+90000",
+"input": "&#x90000;",
+"output": [["Character", "\uDA00\uDC00"]]},
+
+{"description": "Valid numeric entity character U+9FFFD",
+"input": "&#x9fffd;",
+"output": [["Character", "\uDA3F\uDFFD"]]},
+
+{"description": "Valid numeric entity character U+A0000",
+"input": "&#xa0000;",
+"output": [["Character", "\uDA40\uDC00"]]},
+
+{"description": "Valid numeric entity character U+AFFFD",
+"input": "&#xafffd;",
+"output": [["Character", "\uDA7F\uDFFD"]]},
+
+{"description": "Valid numeric entity character U+B0000",
+"input": "&#xb0000;",
+"output": [["Character", "\uDA80\uDC00"]]},
+
+{"description": "Valid numeric entity character U+BFFFD",
+"input": "&#xbfffd;",
+"output": [["Character", "\uDABF\uDFFD"]]},
+
+{"description": "Valid numeric entity character U+C0000",
+"input": "&#xc0000;",
+"output": [["Character", "\uDAC0\uDC00"]]},
+
+{"description": "Valid numeric entity character U+CFFFD",
+"input": "&#xcfffd;",
+"output": [["Character", "\uDAFF\uDFFD"]]},
+
+{"description": "Valid numeric entity character U+D0000",
+"input": "&#xd0000;",
+"output": [["Character", "\uDB00\uDC00"]]},
+
+{"description": "Valid numeric entity character U+DFFFD",
+"input": "&#xdfffd;",
+"output": [["Character", "\uDB3F\uDFFD"]]},
+
+{"description": "Valid numeric entity character U+E0000",
+"input": "&#xe0000;",
+"output": [["Character", "\uDB40\uDC00"]]},
+
+{"description": "Valid numeric entity character U+EFFFD",
+"input": "&#xefffd;",
+"output": [["Character", "\uDB7F\uDFFD"]]},
+
+{"description": "Valid numeric entity character U+F0000",
+"input": "&#xf0000;",
+"output": [["Character", "\uDB80\uDC00"]]},
+
+{"description": "Valid numeric entity character U+FFFFD",
+"input": "&#xffffd;",
+"output": [["Character", "\uDBBF\uDFFD"]]},
+
+{"description": "Valid numeric entity character U+100000",
+"input": "&#x100000;",
+"output": [["Character", "\uDBC0\uDC00"]]},
+
+{"description": "Valid numeric entity character U+10FFFD",
+"input": "&#x10fffd;",
+"output": [["Character", "\uDBFF\uDFFD"]]}
+
+]}
+
+
diff --git a/lib/html5lib/tests/testdata/tokenizer/pendingSpecChanges.test b/lib/html5lib/tests/testdata/tokenizer/pendingSpecChanges.test
new file mode 100644
index 00000000..191434f1
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tokenizer/pendingSpecChanges.test
@@ -0,0 +1,9 @@
+{"tests": [
+
+{"description":"<!---- >",
+"input":"<!---- >",
+"output":[["Comment","-- >"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 9 }
+]}
+]}
diff --git a/lib/html5lib/tests/testdata/tokenizer/test1.test b/lib/html5lib/tests/testdata/tokenizer/test1.test
new file mode 100644
index 00000000..cb0eb48a
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tokenizer/test1.test
@@ -0,0 +1,349 @@
+{"tests": [
+
+{"description":"Correct Doctype lowercase",
+"input":"<!DOCTYPE html>",
+"output":[["DOCTYPE", "html", null, null, true]]},
+
+
+{"description":"Correct Doctype uppercase",
+"input":"<!DOCTYPE HTML>",
+"output":[["DOCTYPE", "html", null, null, true]]},
+
+{"description":"Correct Doctype mixed case",
+"input":"<!DOCTYPE HtMl>",
+"output":[["DOCTYPE", "html", null, null, true]]},
+
+{"description":"Correct Doctype case with EOF",
+"input":"<!DOCTYPE HtMl",
+"output":[["DOCTYPE", "html", null, null, false]],
+"errors":[
+    { "code": "eof-in-doctype", "line": 1, "col": 15 }
+]},
+
+{"description":"Truncated doctype start",
+"input":"<!DOC>",
+"output":[["Comment", "DOC"]],
+"errors":[
+    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 }
+]},
+
+{"description":"Doctype in error",
+"input":"<!DOCTYPE foo>",
+"output":[["DOCTYPE", "foo", null, null, true]]},
+
+{"description":"Single Start Tag",
+"input":"<h>",
+"output":[["StartTag", "h", {}]]},
+
+{"description":"Empty end tag",
+"input":"</>",
+"output":[],
+"errors":[
+    { "code": "missing-end-tag-name", "line": 1, "col": 3 }
+]},
+
+{"description":"Empty start tag",
+"input":"<>",
+"output":[["Character", "<>"]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"Start Tag w/attribute",
+"input":"<h a='b'>",
+"output":[["StartTag", "h", {"a":"b"}]]},
+
+{"description":"Start Tag w/attribute no quotes",
+"input":"<h a=b>",
+"output":[["StartTag", "h", {"a":"b"}]]},
+
+{"description":"Start/End Tag",
+"input":"<h></h>",
+"output":[["StartTag", "h", {}], ["EndTag", "h"]]},
+
+{"description":"Two unclosed start tags",
+"input":"<p>One<p>Two",
+"output":[["StartTag", "p", {}], ["Character", "One"], ["StartTag", "p", {}], ["Character", "Two"]]},
+
+{"description":"End Tag w/attribute",
+"input":"<h></h a='b'>",
+"output":[["StartTag", "h", {}], ["EndTag", "h"]],
+"errors":[
+    { "code": "end-tag-with-attributes", "line": 1, "col": 13 }
+]},
+
+{"description":"Multiple atts",
+"input":"<h a='b' c='d'>",
+"output":[["StartTag", "h", {"a":"b", "c":"d"}]]},
+
+{"description":"Multiple atts no space",
+"input":"<h a='b'c='d'>",
+"output":[["StartTag", "h", {"a":"b", "c":"d"}]],
+"errors":[
+    { "code": "missing-whitespace-between-attributes", "line": 1, "col": 9 }
+]},
+
+{"description":"Repeated attr",
+ "input":"<h a='b' a='d'>",
+ "output":[["StartTag", "h", {"a":"b"}]],
+ "errors":[
+    { "code": "duplicate-attribute", "line": 1, "col": 11 }
+]},
+
+{"description":"Simple comment",
+ "input":"<!--comment-->",
+ "output":[["Comment", "comment"]]},
+
+{"description":"Comment, Central dash no space",
+ "input":"<!----->",
+ "output":[["Comment", "-"]]},
+
+{"description":"Comment, two central dashes",
+"input":"<!-- --comment -->",
+"output":[["Comment", " --comment "]]},
+
+{"description":"Comment, central less-than bang",
+"input":"<!--<!-->",
+"output":[["Comment", "<!"]]},
+
+{"description":"Unfinished comment",
+"input":"<!--comment",
+"output":[["Comment", "comment"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 12 }
+]},
+
+{"description":"Unfinished comment after start of nested comment",
+"input":"<!-- <!--",
+"output":[["Comment", " <!"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 10 }
+]},
+
+{"description":"Start of a comment",
+"input":"<!-",
+"output":[["Comment", "-"]],
+"errors":[
+    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 }
+]},
+
+{"description":"Short comment",
+"input":"<!-->",
+"output":[["Comment", ""]],
+"errors":[
+    { "code": "abrupt-closing-of-empty-comment", "line": 1, "col": 5 }
+]},
+
+{"description":"Short comment two",
+"input":"<!--->",
+"output":[["Comment", ""]],
+"errors":[
+    { "code": "abrupt-closing-of-empty-comment", "line": 1, "col": 6 }
+]},
+
+{"description":"Short comment three",
+ "input":"<!---->",
+ "output":[["Comment", ""]]},
+
+{"description":"< in comment",
+"input":"<!-- <test-->",
+"output":[["Comment", " <test"]]},
+
+{"description":"<! in comment",
+"input":"<!-- <!test-->",
+"output":[["Comment", " <!test"]]},
+
+{"description":"<!- in comment",
+"input":"<!-- <!-test-->",
+"output":[["Comment", " <!-test"]]},
+
+{"description":"Nested comment",
+"input":"<!-- <!--test-->",
+"output":[["Comment", " <!--test"]],
+"errors":[
+    { "code": "nested-comment", "line": 1, "col": 10 }
+]},
+
+{"description":"Nested comment with extra <",
+"input":"<!-- <<!--test-->",
+"output":[["Comment", " <<!--test"]],
+"errors":[
+    { "code": "nested-comment", "line": 1, "col": 11 }
+]},
+
+{"description":"< in script data",
+"initialStates":["Script data state"],
+"input":"<test-->",
+"output":[["Character", "<test-->"]]},
+
+{"description":"<! in script data",
+"initialStates":["Script data state"],
+"input":"<!test-->",
+"output":[["Character", "<!test-->"]]},
+
+{"description":"<!- in script data",
+"initialStates":["Script data state"],
+"input":"<!-test-->",
+"output":[["Character", "<!-test-->"]]},
+
+{"description":"Escaped script data",
+"initialStates":["Script data state"],
+"input":"<!--test-->",
+"output":[["Character", "<!--test-->"]]},
+
+{"description":"< in script HTML comment",
+"initialStates":["Script data state"],
+"input":"<!-- < test -->",
+"output":[["Character", "<!-- < test -->"]]},
+
+{"description":"</ in script HTML comment",
+"initialStates":["Script data state"],
+"input":"<!-- </ test -->",
+"output":[["Character", "<!-- </ test -->"]]},
+
+{"description":"Start tag in script HTML comment",
+"initialStates":["Script data state"],
+"input":"<!-- <test> -->",
+"output":[["Character", "<!-- <test> -->"]]},
+
+{"description":"End tag in script HTML comment",
+"initialStates":["Script data state"],
+"input":"<!-- </test> -->",
+"output":[["Character", "<!-- </test> -->"]]},
+
+{"description":"- in script HTML comment double escaped",
+"initialStates":["Script data state"],
+"input":"<!--<script>-</script>-->",
+"output":[["Character", "<!--<script>-</script>-->"]]},
+
+{"description":"-- in script HTML comment double escaped",
+"initialStates":["Script data state"],
+"input":"<!--<script>--</script>-->",
+"output":[["Character", "<!--<script>--</script>-->"]]},
+
+{"description":"--- in script HTML comment double escaped",
+"initialStates":["Script data state"],
+"input":"<!--<script>---</script>-->",
+"output":[["Character", "<!--<script>---</script>-->"]]},
+
+{"description":"- spaced in script HTML comment double escaped",
+"initialStates":["Script data state"],
+"input":"<!--<script> - </script>-->",
+"output":[["Character", "<!--<script> - </script>-->"]]},
+
+{"description":"-- spaced in script HTML comment double escaped",
+"initialStates":["Script data state"],
+"input":"<!--<script> -- </script>-->",
+"output":[["Character", "<!--<script> -- </script>-->"]]},
+
+{"description":"Ampersand EOF",
+"input":"&",
+"output":[["Character", "&"]]},
+
+{"description":"Ampersand ampersand EOF",
+"input":"&&",
+"output":[["Character", "&&"]]},
+
+{"description":"Ampersand space EOF",
+"input":"& ",
+"output":[["Character", "& "]]},
+
+{"description":"Unfinished entity",
+"input":"&f",
+"output":[["Character", "&f"]]},
+
+{"description":"Ampersand, number sign",
+"input":"&#",
+"output":[["Character", "&#"]],
+"errors":[
+    { "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 3 }
+]},
+
+{"description":"Unfinished numeric entity",
+"input":"&#x",
+"output":[["Character", "&#x"]],
+"errors":[
+    { "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 4 }
+]},
+
+{"description":"Entity with trailing semicolon (1)",
+"input":"I'm &not;it",
+"output":[["Character","I'm \u00ACit"]]},
+
+{"description":"Entity with trailing semicolon (2)",
+"input":"I'm &notin;",
+"output":[["Character","I'm \u2209"]]},
+
+{"description":"Entity without trailing semicolon (1)",
+"input":"I'm &notit",
+"output":[["Character","I'm \u00ACit"]],
+"errors": [
+    {"code" : "missing-semicolon-after-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description":"Entity without trailing semicolon (2)",
+"input":"I'm &notin",
+"output":[["Character","I'm \u00ACin"]],
+"errors": [
+    {"code" : "missing-semicolon-after-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description":"Partial entity match at end of file",
+"input":"I'm &no",
+"output":[["Character","I'm &no"]]},
+
+{"description":"Non-ASCII character reference name",
+"input":"&\u00AC;",
+"output":[["Character", "&\u00AC;"]]},
+
+{"description":"ASCII decimal entity",
+"input":"&#0036;",
+"output":[["Character","$"]]},
+
+{"description":"ASCII hexadecimal entity",
+"input":"&#x3f;",
+"output":[["Character","?"]]},
+
+{"description":"Hexadecimal entity in attribute",
+"input":"<h a='&#x3f;'></h>",
+"output":[["StartTag", "h", {"a":"?"}], ["EndTag", "h"]]},
+
+{"description":"Entity in attribute without semicolon ending in x",
+"input":"<h a='&notx'>",
+"output":[["StartTag", "h", {"a":"&notx"}]]},
+
+{"description":"Entity in attribute without semicolon ending in 1",
+"input":"<h a='&not1'>",
+"output":[["StartTag", "h", {"a":"&not1"}]]},
+
+{"description":"Entity in attribute without semicolon ending in i",
+"input":"<h a='&noti'>",
+"output":[["StartTag", "h", {"a":"&noti"}]]},
+
+{"description":"Entity in attribute without semicolon",
+"input":"<h a='&COPY'>",
+"output":[["StartTag", "h", {"a":"\u00A9"}]],
+"errors": [
+    {"code" : "missing-semicolon-after-character-reference", "line": 1, "col": 12 }
+]},
+
+{"description":"Unquoted attribute ending in ampersand",
+"input":"<s o=& t>",
+"output":[["StartTag","s",{"o":"&","t":""}]]},
+
+{"description":"Unquoted attribute at end of tag with final character of &, with tag followed by characters",
+"input":"<a a=a&>foo",
+"output":[["StartTag", "a", {"a":"a&"}], ["Character", "foo"]]},
+
+{"description":"plaintext element",
+ "input":"<plaintext>foobar",
+ "output":[["StartTag","plaintext",{}], ["Character","foobar"]]},
+
+{"description":"Open angled bracket in unquoted attribute value state",
+ "input":"<a a=f<>",
+ "output":[["StartTag", "a", {"a":"f<"}]],
+ "errors":[
+    { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 7 }
+]}
+
+]}
diff --git a/lib/html5lib/tests/testdata/tokenizer/test2.test b/lib/html5lib/tests/testdata/tokenizer/test2.test
new file mode 100644
index 00000000..f80f27d1
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tokenizer/test2.test
@@ -0,0 +1,275 @@
+{"tests": [
+
+{"description":"DOCTYPE without name",
+"input":"<!DOCTYPE>",
+"output":[["DOCTYPE", null, null, null, false]],
+"errors":[
+    { "code": "missing-doctype-name", "line": 1, "col": 10 }
+]},
+
+{"description":"DOCTYPE without space before name",
+"input":"<!DOCTYPEhtml>",
+"output":[["DOCTYPE", "html", null, null, true]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 }
+]},
+
+{"description":"Incorrect DOCTYPE without a space before name",
+"input":"<!DOCTYPEfoo>",
+"output":[["DOCTYPE", "foo", null, null, true]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 }
+]},
+
+{"description":"DOCTYPE with publicId",
+"input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\">",
+"output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", null, true]]},
+
+{"description":"DOCTYPE with EOF after PUBLIC",
+"input":"<!DOCTYPE html PUBLIC",
+"output":[["DOCTYPE", "html", null, null, false]],
+"errors": [
+    { "code": "eof-in-doctype", "col": 22, "line": 1 }
+]},
+
+{"description":"DOCTYPE with EOF after PUBLIC '",
+"input":"<!DOCTYPE html PUBLIC '",
+"output":[["DOCTYPE", "html", "", null, false]],
+"errors": [
+    { "code": "eof-in-doctype", "col": 24, "line": 1 }
+]},
+
+{"description":"DOCTYPE with EOF after PUBLIC 'x",
+"input":"<!DOCTYPE html PUBLIC 'x",
+"output":[["DOCTYPE", "html", "x", null, false]],
+"errors": [
+    { "code": "eof-in-doctype", "col": 25, "line": 1 }
+]},
+
+{"description":"DOCTYPE with systemId",
+"input":"<!DOCTYPE html SYSTEM \"-//W3C//DTD HTML Transitional 4.01//EN\">",
+"output":[["DOCTYPE", "html", null, "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
+
+{"description":"DOCTYPE with single-quoted systemId",
+"input":"<!DOCTYPE html SYSTEM '-//W3C//DTD HTML Transitional 4.01//EN'>",
+"output":[["DOCTYPE", "html", null, "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
+
+{"description":"DOCTYPE with publicId and systemId",
+"input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\" \"-//W3C//DTD HTML Transitional 4.01//EN\">",
+"output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
+
+{"description":"DOCTYPE with > in double-quoted publicId",
+"input":"<!DOCTYPE html PUBLIC \">x",
+"output":[["DOCTYPE", "html", "", null, false], ["Character", "x"]],
+"errors": [
+    { "code": "abrupt-doctype-public-identifier", "col": 24, "line": 1 }
+]},
+
+{"description":"DOCTYPE with > in single-quoted publicId",
+"input":"<!DOCTYPE html PUBLIC '>x",
+"output":[["DOCTYPE", "html", "", null, false], ["Character", "x"]],
+"errors": [
+    { "code": "abrupt-doctype-public-identifier", "col": 24, "line": 1 }
+]},
+
+{"description":"DOCTYPE with > in double-quoted systemId",
+"input":"<!DOCTYPE html PUBLIC \"foo\" \">x",
+"output":[["DOCTYPE", "html", "foo", "", false], ["Character", "x"]],
+"errors": [
+    { "code": "abrupt-doctype-system-identifier", "col": 30, "line": 1 }
+]},
+
+{"description":"DOCTYPE with > in single-quoted systemId",
+"input":"<!DOCTYPE html PUBLIC 'foo' '>x",
+"output":[["DOCTYPE", "html", "foo", "", false], ["Character", "x"]],
+"errors": [
+    { "code": "abrupt-doctype-system-identifier", "col": 30, "line": 1 }
+]},
+
+{"description":"Incomplete doctype",
+"input":"<!DOCTYPE html ",
+"output":[["DOCTYPE", "html", null, null, false]],
+"errors":[
+    { "code": "eof-in-doctype", "line": 1, "col": 16 }
+]},
+
+{"description":"Numeric entity representing the NUL character",
+"input":"&#0000;",
+"output":[["Character", "\uFFFD"]],
+"errors":[
+    { "code": "null-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description":"Hexadecimal entity representing the NUL character",
+"input":"&#x0000;",
+"output":[["Character", "\uFFFD"]],
+"errors":[
+    { "code": "null-character-reference", "line": 1, "col": 9 }
+]},
+
+{"description":"Numeric entity representing a codepoint after 1114111 (U+10FFFF)",
+"input":"&#2225222;",
+"output":[["Character", "\uFFFD"]],
+"errors":[
+    { "code": "character-reference-outside-unicode-range", "line": 1, "col": 11 }
+]},
+
+{"description":"Hexadecimal entity representing a codepoint after 1114111 (U+10FFFF)",
+"input":"&#x1010FFFF;",
+"output":[["Character", "\uFFFD"]],
+"errors":[
+    { "code": "character-reference-outside-unicode-range", "line": 1, "col": 13 }
+]},
+
+{"description":"Hexadecimal entity pair representing a surrogate pair",
+"input":"&#xD869;&#xDED6;",
+"output":[["Character", "\uFFFD\uFFFD"]],
+"errors":[
+    { "code": "surrogate-character-reference", "line": 1, "col": 9 },
+    { "code": "surrogate-character-reference", "line": 1, "col": 17 }
+]},
+
+{"description":"Hexadecimal entity with mixed uppercase and lowercase",
+"input":"&#xaBcD;",
+"output":[["Character", "\uABCD"]]},
+
+{"description":"Entity without a name",
+"input":"&;",
+"output":[["Character", "&;"]]},
+
+{"description":"Unescaped ampersand in attribute value",
+"input":"<h a='&'>",
+"output":[["StartTag", "h", { "a":"&" }]]},
+
+
+{"description":"StartTag containing <",
+"input":"<a<b>",
+"output":[["StartTag", "a<b", { }]]},
+
+{"description":"Non-void element containing trailing /",
+"input":"<h/>",
+"output":[["StartTag","h",{},true]]},
+
+{"description":"Void element with permitted slash",
+"input":"<br/>",
+"output":[["StartTag","br",{},true]]},
+
+{"description":"Void element with permitted slash (with attribute)",
+"input":"<br foo='bar'/>",
+"output":[["StartTag","br",{"foo":"bar"},true]]},
+
+{"description":"StartTag containing /",
+"input":"<h/a='b'>",
+"output":[["StartTag", "h", { "a":"b" }]],
+"errors":[
+    { "code": "unexpected-solidus-in-tag", "line": 1, "col": 4 }
+]},
+
+{"description":"Double-quoted attribute value",
+"input":"<h a=\"b\">",
+"output":[["StartTag", "h", { "a":"b" }]]},
+
+{"description":"Unescaped </",
+"input":"</",
+"output":[["Character", "</"]],
+"errors":[
+    { "code": "eof-before-tag-name", "line": 1, "col": 3 }
+]},
+
+{"description":"Illegal end tag name",
+"input":"</1>",
+"output":[["Comment", "1"]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 3 }
+]},
+
+{"description":"Simili processing instruction",
+"input":"<?namespace>",
+"output":[["Comment", "?namespace"]],
+"errors":[
+    { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"A bogus comment stops at >, even if preceeded by two dashes",
+"input":"<?foo-->",
+"output":[["Comment", "?foo--"]],
+"errors":[
+    { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"Unescaped <",
+"input":"foo < bar",
+"output":[["Character", "foo < bar"]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 6 }
+]},
+
+{"description":"Null Byte Replacement",
+"input":"\u0000",
+"output":[["Character", "\u0000"]],
+"errors":[
+    { "code": "unexpected-null-character", "line": 1, "col": 1 }
+]},
+
+{"description":"Comment with dash",
+"input":"<!---x",
+"output":[["Comment", "-x"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"Entity + newline",
+"input":"\nx\n&gt;\n",
+"output":[["Character","\nx\n>\n"]]},
+
+{"description":"Start tag with no attributes but space before the greater-than sign",
+"input":"<h >",
+"output":[["StartTag", "h", {}]]},
+
+{"description":"Empty attribute followed by uppercase attribute",
+"input":"<h a B=''>",
+"output":[["StartTag", "h", {"a":"", "b":""}]]},
+
+{"description":"Double-quote after attribute name",
+"input":"<h a \">",
+"output":[["StartTag", "h", {"a":"", "\"":""}]],
+"errors":[
+    { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 6 }
+]},
+
+{"description":"Single-quote after attribute name",
+"input":"<h a '>",
+"output":[["StartTag", "h", {"a":"", "'":""}]],
+"errors":[
+    { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 6 }
+]},
+
+{"description":"Empty end tag with following characters",
+"input":"a</>bc",
+"output":[["Character", "abc"]],
+"errors":[
+    { "code": "missing-end-tag-name", "line": 1, "col": 4 }
+]},
+
+{"description":"Empty end tag with following tag",
+"input":"a</><b>c",
+"output":[["Character", "a"], ["StartTag", "b", {}], ["Character", "c"]],
+"errors":[
+    { "code": "missing-end-tag-name", "line": 1, "col": 4 }
+]},
+
+{"description":"Empty end tag with following comment",
+"input":"a</><!--b-->c",
+"output":[["Character", "a"], ["Comment", "b"], ["Character", "c"]],
+"errors":[
+    { "code": "missing-end-tag-name", "line": 1, "col": 4 }
+]},
+
+{"description":"Empty end tag with following end tag",
+"input":"a</></b>c",
+"output":[["Character", "a"], ["EndTag", "b"], ["Character", "c"]],
+"errors":[
+    { "code": "missing-end-tag-name", "line": 1, "col": 4 }
+]}
+
+]}
diff --git a/lib/html5lib/tests/testdata/tokenizer/test3.test b/lib/html5lib/tests/testdata/tokenizer/test3.test
new file mode 100644
index 00000000..814482c4
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tokenizer/test3.test
@@ -0,0 +1,11233 @@
+{"tests": [
+
+{"description":"[empty]",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":"",
+"output":[]},
+
+{"description":"[empty]",
+"initialStates":["CDATA section state"],
+"input":"",
+"output":[],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
+{"description":"\\u0009",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":"\u0009",
+"output":[["Character", "\u0009"]]},
+
+{"description":"\\u0009",
+"initialStates":["CDATA section state"],
+"input":"\u0009",
+"output":[["Character", "\u0009"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
+{"description":"\\u000A",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":"\u000A",
+"output":[["Character", "\u000A"]]},
+
+{"description":"\\u000A",
+"initialStates":["CDATA section state"],
+"input":"\u000A",
+"output":[["Character", "\u000A"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
+{"description":"\\u000B",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":"\u000B",
+"output":[["Character", "\u000B"]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description":"\\u000B",
+"initialStates":["CDATA section state"],
+"input":"\u000B",
+"output":[["Character", "\u000B"]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 1 },
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
+{"description":"\\u000C",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":"\u000C",
+"output":[["Character", "\u000C"]]},
+
+{"description":"\\u000C",
+"initialStates":["CDATA section state"],
+"input":"\u000C",
+"output":[["Character", "\u000C"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
+{"description":" ",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":" ",
+"output":[["Character", " "]]},
+
+{"description":" ",
+"initialStates":["CDATA section state"],
+"input":" ",
+"output":[["Character", " "]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
+{"description":"!",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":"!",
+"output":[["Character", "!"]]},
+
+{"description":"!",
+"initialStates":["CDATA section state"],
+"input":"!",
+"output":[["Character", "!"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
+{"description":"\"",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":"\"",
+"output":[["Character", "\""]]},
+
+{"description":"\"",
+"initialStates":["CDATA section state"],
+"input":"\"",
+"output":[["Character", "\""]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
+{"description":"%",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":"%",
+"output":[["Character", "%"]]},
+
+{"description":"%",
+"initialStates":["CDATA section state"],
+"input":"%",
+"output":[["Character", "%"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
+{"description":"&",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":"&",
+"output":[["Character", "&"]]},
+
+{"description":"&",
+"initialStates":["CDATA section state"],
+"input":"&",
+"output":[["Character", "&"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
+{"description":"'",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":"'",
+"output":[["Character", "'"]]},
+
+{"description":"'",
+"initialStates":["CDATA section state"],
+"input":"'",
+"output":[["Character", "'"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
+{"description":",",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":",",
+"output":[["Character", ","]]},
+
+{"description":",",
+"initialStates":["CDATA section state"],
+"input":",",
+"output":[["Character", ","]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
+{"description":"-",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":"-",
+"output":[["Character", "-"]]},
+
+{"description":"-",
+"initialStates":["CDATA section state"],
+"input":"-",
+"output":[["Character", "-"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
+{"description":".",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":".",
+"output":[["Character", "."]]},
+
+{"description":".",
+"initialStates":["CDATA section state"],
+"input":".",
+"output":[["Character", "."]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
+{"description":"/",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":"/",
+"output":[["Character", "/"]]},
+
+{"description":"/",
+"initialStates":["CDATA section state"],
+"input":"/",
+"output":[["Character", "/"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
+{"description":"0",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":"0",
+"output":[["Character", "0"]]},
+
+{"description":"0",
+"initialStates":["CDATA section state"],
+"input":"0",
+"output":[["Character", "0"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
+{"description":"1",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":"1",
+"output":[["Character", "1"]]},
+
+{"description":"1",
+"initialStates":["CDATA section state"],
+"input":"1",
+"output":[["Character", "1"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
+{"description":"9",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":"9",
+"output":[["Character", "9"]]},
+
+{"description":"9",
+"initialStates":["CDATA section state"],
+"input":"9",
+"output":[["Character", "9"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
+{"description":";",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":";",
+"output":[["Character", ";"]]},
+
+{"description":";",
+"initialStates":["CDATA section state"],
+"input":";",
+"output":[["Character", ";"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
+{"description":";=",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":";=",
+"output":[["Character", ";="]]},
+
+{"description":";=",
+"initialStates":["CDATA section state"],
+"input":";=",
+"output":[["Character", ";="]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 3 }
+]},
+
+{"description":";>",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":";>",
+"output":[["Character", ";>"]]},
+
+{"description":";>",
+"initialStates":["CDATA section state"],
+"input":";>",
+"output":[["Character", ";>"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 3 }
+]},
+
+{"description":";?",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":";?",
+"output":[["Character", ";?"]]},
+
+{"description":";?",
+"initialStates":["CDATA section state"],
+"input":";?",
+"output":[["Character", ";?"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 3 }
+]},
+
+{"description":";@",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":";@",
+"output":[["Character", ";@"]]},
+
+{"description":";@",
+"initialStates":["CDATA section state"],
+"input":";@",
+"output":[["Character", ";@"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 3 }
+]},
+
+{"description":";A",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":";A",
+"output":[["Character", ";A"]]},
+
+{"description":";A",
+"initialStates":["CDATA section state"],
+"input":";A",
+"output":[["Character", ";A"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 3 }
+]},
+
+{"description":";B",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":";B",
+"output":[["Character", ";B"]]},
+
+{"description":";B",
+"initialStates":["CDATA section state"],
+"input":";B",
+"output":[["Character", ";B"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 3 }
+]},
+
+{"description":";Y",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":";Y",
+"output":[["Character", ";Y"]]},
+
+{"description":";Y",
+"initialStates":["CDATA section state"],
+"input":";Y",
+"output":[["Character", ";Y"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 3 }
+]},
+
+{"description":";Z",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":";Z",
+"output":[["Character", ";Z"]]},
+
+{"description":";Z",
+"initialStates":["CDATA section state"],
+"input":";Z",
+"output":[["Character", ";Z"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 3 }
+]},
+
+{"description":";`",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":";`",
+"output":[["Character", ";`"]]},
+
+{"description":";`",
+"initialStates":["CDATA section state"],
+"input":";`",
+"output":[["Character", ";`"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 3 }
+]},
+
+{"description":";a",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":";a",
+"output":[["Character", ";a"]]},
+
+{"description":";a",
+"initialStates":["CDATA section state"],
+"input":";a",
+"output":[["Character", ";a"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 3 }
+]},
+
+{"description":";b",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":";b",
+"output":[["Character", ";b"]]},
+
+{"description":";b",
+"initialStates":["CDATA section state"],
+"input":";b",
+"output":[["Character", ";b"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 3 }
+]},
+
+{"description":";y",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":";y",
+"output":[["Character", ";y"]]},
+
+{"description":";y",
+"initialStates":["CDATA section state"],
+"input":";y",
+"output":[["Character", ";y"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 3 }
+]},
+
+{"description":";z",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":";z",
+"output":[["Character", ";z"]]},
+
+{"description":";z",
+"initialStates":["CDATA section state"],
+"input":";z",
+"output":[["Character", ";z"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 3 }
+]},
+
+{"description":";{",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":";{",
+"output":[["Character", ";{"]]},
+
+{"description":";{",
+"initialStates":["CDATA section state"],
+"input":";{",
+"output":[["Character", ";{"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 3 }
+]},
+
+{"description":";\\uDBC0\\uDC00",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":";\uDBC0\uDC00",
+"output":[["Character", ";\uDBC0\uDC00"]]},
+
+{"description":";\\uDBC0\\uDC00",
+"initialStates":["CDATA section state"],
+"input":";\uDBC0\uDC00",
+"output":[["Character", ";\uDBC0\uDC00"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 3 }
+]},
+
+{"description":"<",
+"input":"<",
+"output":[["Character", "<"]],
+"errors":[
+    { "code": "eof-before-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<\\u0000",
+"input":"<\u0000",
+"output":[["Character", "<\u0000"]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 2 },
+    { "code": "unexpected-null-character", "line": 1, "col": 2 }
+]},
+
+{"description":"<\\u0009",
+"input":"<\u0009",
+"output":[["Character", "<\u0009"]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<\\u000A",
+"input":"<\u000A",
+"output":[["Character", "<\u000A"]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<\\u000B",
+"input":"<\u000B",
+"output":[["Character", "<\u000B"]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 2 },
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<\\u000C",
+"input":"<\u000C",
+"output":[["Character", "<\u000C"]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"< ",
+"input":"< ",
+"output":[["Character", "< "]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<!",
+"input":"<!",
+"output":[["Comment", ""]],
+"errors":[
+    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 }
+]},
+
+{"description":"<!\\u0000",
+"input":"<!\u0000",
+"output":[["Comment", "\uFFFD"]],
+"errors":[
+    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 },
+    { "code": "unexpected-null-character", "line": 1, "col": 3 }
+]},
+
+{"description":"<!\\u0009",
+"input":"<!\u0009",
+"output":[["Comment", "\u0009"]],
+"errors":[
+    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 }
+]},
+
+{"description":"<!\\u000A",
+"input":"<!\u000A",
+"output":[["Comment", "\u000A"]],
+"errors":[
+    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 }
+]},
+
+{"description":"<!\\u000B",
+"input":"<!\u000B",
+"output":[["Comment", "\u000B"]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 3 },
+    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 }
+]},
+
+{"description":"<!\\u000C",
+"input":"<!\u000C",
+"output":[["Comment", "\u000C"]],
+"errors":[
+    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 }
+]},
+
+{"description":"<! ",
+"input":"<! ",
+"output":[["Comment", " "]],
+"errors":[
+    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 }
+]},
+
+{"description":"<! \\u0000",
+"input":"<! \u0000",
+"output":[["Comment", " \uFFFD"]],
+"errors":[
+    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 },
+    { "code": "unexpected-null-character", "line": 1, "col": 4 }
+]},
+
+{"description":"<!!",
+"input":"<!!",
+"output":[["Comment", "!"]],
+"errors":[
+    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 }
+]},
+
+{"description":"<!\"",
+"input":"<!\"",
+"output":[["Comment", "\""]],
+"errors":[
+    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 }
+]},
+
+{"description":"<!&",
+"input":"<!&",
+"output":[["Comment", "&"]],
+"errors":[
+    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 }
+]},
+
+{"description":"<!'",
+"input":"<!'",
+"output":[["Comment", "'"]],
+"errors":[
+    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 }
+]},
+
+{"description":"<!-",
+"input":"<!-",
+"output":[["Comment", "-"]],
+"errors":[
+    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 }
+]},
+
+{"description":"<!--",
+"input":"<!--",
+"output":[["Comment", ""]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 5 }
+]},
+
+{"description":"<!--\\u0000",
+"input":"<!--\u0000",
+"output":[["Comment", "\uFFFD"]],
+"errors":[
+    { "code": "unexpected-null-character", "line": 1, "col": 5 },
+    { "code": "eof-in-comment", "line": 1, "col": 6 }
+]},
+
+{"description":"<!--\\u0009",
+"input":"<!--\u0009",
+"output":[["Comment", "\u0009"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 6 }
+]},
+
+{"description":"<!--\\u000A",
+"input":"<!--\u000A",
+"output":[["Comment", "\u000A"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 2, "col": 1 }
+]},
+
+{"description":"<!--\\u000B",
+"input":"<!--\u000B",
+"output":[["Comment", "\u000B"]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 5 },
+    { "code": "eof-in-comment", "line": 1, "col": 6 }
+]},
+
+{"description":"<!--\\u000C",
+"input":"<!--\u000C",
+"output":[["Comment", "\u000C"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 6 }
+]},
+
+{"description":"<!-- ",
+"input":"<!-- ",
+"output":[["Comment", " "]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 6 }
+]},
+
+{"description":"<!-- \\u0000",
+"input":"<!-- \u0000",
+"output":[["Comment", " \uFFFD"]],
+"errors":[
+    { "code": "unexpected-null-character", "line": 1, "col": 6 },
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!-- \\u0009",
+"input":"<!-- \u0009",
+"output":[["Comment", " \u0009"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!-- \\u000A",
+"input":"<!-- \u000A",
+"output":[["Comment", " \u000A"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 2, "col": 1 }
+]},
+
+{"description":"<!-- \\u000B",
+"input":"<!-- \u000B",
+"output":[["Comment", " \u000B"]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 6 },
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!-- \\u000C",
+"input":"<!-- \u000C",
+"output":[["Comment", " \u000C"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!--  ",
+"input":"<!--  ",
+"output":[["Comment", "  "]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!-- !",
+"input":"<!-- !",
+"output":[["Comment", " !"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!-- \"",
+"input":"<!-- \"",
+"output":[["Comment", " \""]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!-- &",
+"input":"<!-- &",
+"output":[["Comment", " &"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!-- '",
+"input":"<!-- '",
+"output":[["Comment", " '"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!-- ,",
+"input":"<!-- ,",
+"output":[["Comment", " ,"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!-- -",
+"input":"<!-- -",
+"output":[["Comment", " "]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!-- -\\u0000",
+"input":"<!-- -\u0000",
+"output":[["Comment", " -\uFFFD"]],
+"errors":[
+    { "code": "unexpected-null-character", "line": 1, "col": 7 },
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!-- -\\u0009",
+"input":"<!-- -\u0009",
+"output":[["Comment", " -\u0009"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!-- -\\u000A",
+"input":"<!-- -\u000A",
+"output":[["Comment", " -\u000A"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 2, "col": 1 }
+]},
+
+{"description":"<!-- -\\u000B",
+"input":"<!-- -\u000B",
+"output":[["Comment", " -\u000B"]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 7 },
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!-- -\\u000C",
+"input":"<!-- -\u000C",
+"output":[["Comment", " -\u000C"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!-- - ",
+"input":"<!-- - ",
+"output":[["Comment", " - "]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!-- -!",
+"input":"<!-- -!",
+"output":[["Comment", " -!"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!-- -\"",
+"input":"<!-- -\"",
+"output":[["Comment", " -\""]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!-- -&",
+"input":"<!-- -&",
+"output":[["Comment", " -&"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!-- -'",
+"input":"<!-- -'",
+"output":[["Comment", " -'"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!-- -,",
+"input":"<!-- -,",
+"output":[["Comment", " -,"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!-- --",
+"input":"<!-- --",
+"output":[["Comment", " "]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!-- -.",
+"input":"<!-- -.",
+"output":[["Comment", " -."]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!-- -/",
+"input":"<!-- -/",
+"output":[["Comment", " -/"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!-- -0",
+"input":"<!-- -0",
+"output":[["Comment", " -0"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!-- -1",
+"input":"<!-- -1",
+"output":[["Comment", " -1"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!-- -9",
+"input":"<!-- -9",
+"output":[["Comment", " -9"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!-- -<",
+"input":"<!-- -<",
+"output":[["Comment", " -<"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!-- -=",
+"input":"<!-- -=",
+"output":[["Comment", " -="]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!-- ->",
+"input":"<!-- ->",
+"output":[["Comment", " ->"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!-- -?",
+"input":"<!-- -?",
+"output":[["Comment", " -?"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!-- -@",
+"input":"<!-- -@",
+"output":[["Comment", " -@"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!-- -A",
+"input":"<!-- -A",
+"output":[["Comment", " -A"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!-- -B",
+"input":"<!-- -B",
+"output":[["Comment", " -B"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!-- -Y",
+"input":"<!-- -Y",
+"output":[["Comment", " -Y"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!-- -Z",
+"input":"<!-- -Z",
+"output":[["Comment", " -Z"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!-- -`",
+"input":"<!-- -`",
+"output":[["Comment", " -`"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!-- -a",
+"input":"<!-- -a",
+"output":[["Comment", " -a"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!-- -b",
+"input":"<!-- -b",
+"output":[["Comment", " -b"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!-- -y",
+"input":"<!-- -y",
+"output":[["Comment", " -y"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!-- -z",
+"input":"<!-- -z",
+"output":[["Comment", " -z"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!-- -{",
+"input":"<!-- -{",
+"output":[["Comment", " -{"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!-- -\\uDBC0\\uDC00",
+"input":"<!-- -\uDBC0\uDC00",
+"output":[["Comment", " -\uDBC0\uDC00"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 9 }
+]},
+
+{"description":"<!-- .",
+"input":"<!-- .",
+"output":[["Comment", " ."]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!-- /",
+"input":"<!-- /",
+"output":[["Comment", " /"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!-- 0",
+"input":"<!-- 0",
+"output":[["Comment", " 0"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!-- 1",
+"input":"<!-- 1",
+"output":[["Comment", " 1"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!-- 9",
+"input":"<!-- 9",
+"output":[["Comment", " 9"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!-- <",
+"input":"<!-- <",
+"output":[["Comment", " <"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!-- =",
+"input":"<!-- =",
+"output":[["Comment", " ="]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!-- >",
+"input":"<!-- >",
+"output":[["Comment", " >"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!-- ?",
+"input":"<!-- ?",
+"output":[["Comment", " ?"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!-- @",
+"input":"<!-- @",
+"output":[["Comment", " @"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!-- A",
+"input":"<!-- A",
+"output":[["Comment", " A"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!-- B",
+"input":"<!-- B",
+"output":[["Comment", " B"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!-- Y",
+"input":"<!-- Y",
+"output":[["Comment", " Y"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!-- Z",
+"input":"<!-- Z",
+"output":[["Comment", " Z"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!-- `",
+"input":"<!-- `",
+"output":[["Comment", " `"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!-- a",
+"input":"<!-- a",
+"output":[["Comment", " a"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!-- b",
+"input":"<!-- b",
+"output":[["Comment", " b"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!-- y",
+"input":"<!-- y",
+"output":[["Comment", " y"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!-- z",
+"input":"<!-- z",
+"output":[["Comment", " z"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!-- {",
+"input":"<!-- {",
+"output":[["Comment", " {"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!-- \\uDBC0\\uDC00",
+"input":"<!-- \uDBC0\uDC00",
+"output":[["Comment", " \uDBC0\uDC00"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!--!",
+"input":"<!--!",
+"output":[["Comment", "!"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 6 }
+]},
+
+{"description":"<!--\"",
+"input":"<!--\"",
+"output":[["Comment", "\""]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 6 }
+]},
+
+{"description":"<!--&",
+"input":"<!--&",
+"output":[["Comment", "&"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 6 }
+]},
+
+{"description":"<!--'",
+"input":"<!--'",
+"output":[["Comment", "'"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 6 }
+]},
+
+{"description":"<!--,",
+"input":"<!--,",
+"output":[["Comment", ","]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 6 }
+]},
+
+{"description":"<!---",
+"input":"<!---",
+"output":[["Comment", ""]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 6 }
+]},
+
+{"description":"<!---\\u0000",
+"input":"<!---\u0000",
+"output":[["Comment", "-\uFFFD"]],
+"errors":[
+    { "code": "unexpected-null-character", "line": 1, "col": 6 },
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!---\\u0009",
+"input":"<!---\u0009",
+"output":[["Comment", "-\u0009"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!---\\u000A",
+"input":"<!---\u000A",
+"output":[["Comment", "-\u000A"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 2, "col": 1 }
+]},
+
+{"description":"<!---\\u000B",
+"input":"<!---\u000B",
+"output":[["Comment", "-\u000B"]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 6 },
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!---\\u000C",
+"input":"<!---\u000C",
+"output":[["Comment", "-\u000C"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!--- ",
+"input":"<!--- ",
+"output":[["Comment", "- "]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!---!",
+"input":"<!---!",
+"output":[["Comment", "-!"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!---\"",
+"input":"<!---\"",
+"output":[["Comment", "-\""]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!---&",
+"input":"<!---&",
+"output":[["Comment", "-&"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!---'",
+"input":"<!---'",
+"output":[["Comment", "-'"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!---,",
+"input":"<!---,",
+"output":[["Comment", "-,"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!----",
+"input":"<!----",
+"output":[["Comment", ""]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!----\\u0000",
+"input":"<!----\u0000",
+"output":[["Comment", "--\uFFFD"]],
+"errors":[
+    { "code": "unexpected-null-character", "line": 1, "col": 7 },
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!----\\u0009",
+"input":"<!----\u0009",
+"output":[["Comment", "--\u0009"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!----\\u000A",
+"input":"<!----\u000A",
+"output":[["Comment", "--\u000A"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 2, "col": 1 }
+]},
+
+{"description":"<!----\\u000B",
+"input":"<!----\u000B",
+"output":[["Comment", "--\u000B"]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 7 },
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!----\\u000C",
+"input":"<!----\u000C",
+"output":[["Comment", "--\u000C"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!---- ",
+"input":"<!---- ",
+"output":[["Comment", "-- "]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!---- -",
+"input":"<!---- -",
+"output":[["Comment", "-- "]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 9 }
+]},
+
+
+{"description":"<!---- --",
+"input":"<!---- --",
+"output":[["Comment", "-- "]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 10 }
+]},
+
+{"description":"<!---- -->",
+"input":"<!---- -->",
+"output":[["Comment", "-- "]]},
+
+{"description":"<!----  -->",
+"input":"<!----  -->",
+"output":[["Comment", "--  "]]},
+
+{"description":"<!---- a-->",
+"input":"<!---- a-->",
+"output":[["Comment", "-- a"]]},
+
+{"description":"<!----!",
+"input":"<!----!",
+"output":[["Comment", ""]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!----!>",
+"input":"<!----!>",
+"output":[["Comment", ""]],
+"errors":[
+    { "code": "incorrectly-closed-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!----! >",
+"input":"<!----! >",
+"output":[["Comment", "--! >"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 9 }
+]},
+
+{"description":"<!----!LF>",
+"input":"<!----!\n>",
+"output":[["Comment", "--!\n>"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 9 }
+]},
+
+{"description":"<!----!CR>",
+"input":"<!----!\r>",
+"output":[["Comment", "--!\n>"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 9 }
+]},
+
+{"description":"<!----!CRLF>",
+"input":"<!----!\r\n>",
+"output":[["Comment", "--!\n>"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 9 }
+]},
+
+{"description":"<!----!a",
+"input":"<!----!a",
+"output":[["Comment", "--!a"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 9 }
+]},
+
+{"description":"<!----!a-",
+"input":"<!----!a-",
+"output":[["Comment", "--!a"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 10 }
+]},
+
+{"description":"<!----!a--",
+"input":"<!----!a--",
+"output":[["Comment", "--!a"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 11 }
+]},
+
+{"description":"<!----!a-->",
+"input":"<!----!a-->",
+"output":[["Comment", "--!a"]]},
+
+{"description":"<!----!-",
+"input":"<!----!-",
+"output":[["Comment", "--!"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 9 }
+]},
+
+{"description":"<!----!--",
+"input":"<!----!--",
+"output":[["Comment", "--!"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 10 }
+]},
+
+{"description":"<!----!-->",
+"input":"<!----!-->",
+"output":[["Comment", "--!"]]},
+
+{"description":"<!----\"",
+"input":"<!----\"",
+"output":[["Comment", "--\""]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!----&",
+"input":"<!----&",
+"output":[["Comment", "--&"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!----'",
+"input":"<!----'",
+"output":[["Comment", "--'"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!----,",
+"input":"<!----,",
+"output":[["Comment", "--,"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!-----",
+"input":"<!-----",
+"output":[["Comment", "-"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!----.",
+"input":"<!----.",
+"output":[["Comment", "--."]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!----/",
+"input":"<!----/",
+"output":[["Comment", "--/"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!----0",
+"input":"<!----0",
+"output":[["Comment", "--0"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!----1",
+"input":"<!----1",
+"output":[["Comment", "--1"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!----9",
+"input":"<!----9",
+"output":[["Comment", "--9"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!----<",
+"input":"<!----<",
+"output":[["Comment", "--<"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!----=",
+"input":"<!----=",
+"output":[["Comment", "--="]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!---->",
+"input":"<!---->",
+"output":[["Comment", ""]]},
+
+{"description":"<!----?",
+"input":"<!----?",
+"output":[["Comment", "--?"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!----@",
+"input":"<!----@",
+"output":[["Comment", "--@"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!----A",
+"input":"<!----A",
+"output":[["Comment", "--A"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!----B",
+"input":"<!----B",
+"output":[["Comment", "--B"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!----Y",
+"input":"<!----Y",
+"output":[["Comment", "--Y"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!----Z",
+"input":"<!----Z",
+"output":[["Comment", "--Z"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!----`",
+"input":"<!----`",
+"output":[["Comment", "--`"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!----a",
+"input":"<!----a",
+"output":[["Comment", "--a"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!----b",
+"input":"<!----b",
+"output":[["Comment", "--b"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!----y",
+"input":"<!----y",
+"output":[["Comment", "--y"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!----z",
+"input":"<!----z",
+"output":[["Comment", "--z"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!----{",
+"input":"<!----{",
+"output":[["Comment", "--{"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!----\\uDBC0\\uDC00",
+"input":"<!----\uDBC0\uDC00",
+"output":[["Comment", "--\uDBC0\uDC00"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 9 }
+]},
+
+{"description":"<!---.",
+"input":"<!---.",
+"output":[["Comment", "-."]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!---/",
+"input":"<!---/",
+"output":[["Comment", "-/"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!---0",
+"input":"<!---0",
+"output":[["Comment", "-0"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!---1",
+"input":"<!---1",
+"output":[["Comment", "-1"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!---9",
+"input":"<!---9",
+"output":[["Comment", "-9"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!---<",
+"input":"<!---<",
+"output":[["Comment", "-<"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!---=",
+"input":"<!---=",
+"output":[["Comment", "-="]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!---?",
+"input":"<!---?",
+"output":[["Comment", "-?"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!---@",
+"input":"<!---@",
+"output":[["Comment", "-@"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!---A",
+"input":"<!---A",
+"output":[["Comment", "-A"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!---B",
+"input":"<!---B",
+"output":[["Comment", "-B"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!---Y",
+"input":"<!---Y",
+"output":[["Comment", "-Y"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!---Z",
+"input":"<!---Z",
+"output":[["Comment", "-Z"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!---`",
+"input":"<!---`",
+"output":[["Comment", "-`"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!---a",
+"input":"<!---a",
+"output":[["Comment", "-a"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!---b",
+"input":"<!---b",
+"output":[["Comment", "-b"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!---y",
+"input":"<!---y",
+"output":[["Comment", "-y"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!---z",
+"input":"<!---z",
+"output":[["Comment", "-z"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!---{",
+"input":"<!---{",
+"output":[["Comment", "-{"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!---\\uDBC0\\uDC00",
+"input":"<!---\uDBC0\uDC00",
+"output":[["Comment", "-\uDBC0\uDC00"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 8 }
+]},
+
+{"description":"<!--.",
+"input":"<!--.",
+"output":[["Comment", "."]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 6 }
+]},
+
+{"description":"<!--/",
+"input":"<!--/",
+"output":[["Comment", "/"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 6 }
+]},
+
+{"description":"<!--0",
+"input":"<!--0",
+"output":[["Comment", "0"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 6 }
+]},
+
+{"description":"<!--1",
+"input":"<!--1",
+"output":[["Comment", "1"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 6 }
+]},
+
+{"description":"<!--9",
+"input":"<!--9",
+"output":[["Comment", "9"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 6 }
+]},
+
+{"description":"<!--<",
+"input":"<!--<",
+"output":[["Comment", "<"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 6 }
+]},
+
+{"description":"<!--=",
+"input":"<!--=",
+"output":[["Comment", "="]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 6 }
+]},
+
+{"description":"<!--?",
+"input":"<!--?",
+"output":[["Comment", "?"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 6 }
+]},
+
+{"description":"<!--@",
+"input":"<!--@",
+"output":[["Comment", "@"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 6 }
+]},
+
+{"description":"<!--A",
+"input":"<!--A",
+"output":[["Comment", "A"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 6 }
+]},
+
+{"description":"<!--B",
+"input":"<!--B",
+"output":[["Comment", "B"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 6 }
+]},
+
+{"description":"<!--Y",
+"input":"<!--Y",
+"output":[["Comment", "Y"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 6 }
+]},
+
+{"description":"<!--Z",
+"input":"<!--Z",
+"output":[["Comment", "Z"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 6 }
+]},
+
+{"description":"<!--`",
+"input":"<!--`",
+"output":[["Comment", "`"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 6 }
+]},
+
+{"description":"<!--a",
+"input":"<!--a",
+"output":[["Comment", "a"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 6 }
+]},
+
+{"description":"<!--b",
+"input":"<!--b",
+"output":[["Comment", "b"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 6 }
+]},
+
+{"description":"<!--y",
+"input":"<!--y",
+"output":[["Comment", "y"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 6 }
+]},
+
+{"description":"<!--z",
+"input":"<!--z",
+"output":[["Comment", "z"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 6 }
+]},
+
+{"description":"<!--{",
+"input":"<!--{",
+"output":[["Comment", "{"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 6 }
+]},
+
+{"description":"<!--\\uDBC0\\uDC00",
+"input":"<!--\uDBC0\uDC00",
+"output":[["Comment", "\uDBC0\uDC00"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 7 }
+]},
+
+{"description":"<!/",
+"input":"<!/",
+"output":[["Comment", "/"]],
+"errors":[
+    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 }
+]},
+
+{"description":"<!0",
+"input":"<!0",
+"output":[["Comment", "0"]],
+"errors":[
+    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 }
+]},
+
+{"description":"<!1",
+"input":"<!1",
+"output":[["Comment", "1"]],
+"errors":[
+    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 }
+]},
+
+{"description":"<!9",
+"input":"<!9",
+"output":[["Comment", "9"]],
+"errors":[
+    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 }
+]},
+
+{"description":"<!<",
+"input":"<!<",
+"output":[["Comment", "<"]],
+"errors":[
+    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 }
+]},
+
+{"description":"<!=",
+"input":"<!=",
+"output":[["Comment", "="]],
+"errors":[
+    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 }
+]},
+
+{"description":"<!>",
+"input":"<!>",
+"output":[["Comment", ""]],
+"errors":[
+    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 }
+]},
+
+{"description":"<!?",
+"input":"<!?",
+"output":[["Comment", "?"]],
+"errors":[
+    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 }
+]},
+
+{"description":"<!@",
+"input":"<!@",
+"output":[["Comment", "@"]],
+"errors":[
+    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 }
+]},
+
+{"description":"<!A",
+"input":"<!A",
+"output":[["Comment", "A"]],
+"errors":[
+    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 }
+]},
+
+{"description":"<!B",
+"input":"<!B",
+"output":[["Comment", "B"]],
+"errors":[
+    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 }
+]},
+
+{"description":"<!DOCTYPE",
+"input":"<!DOCTYPE",
+"output":[["DOCTYPE", null, null, null, false]],
+"errors":[
+    { "code": "eof-in-doctype", "line": 1, "col": 10 }
+]},
+
+{"description":"<!DOCTYPE\\u0000",
+"input":"<!DOCTYPE\u0000",
+"output":[["DOCTYPE", "\uFFFD", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "unexpected-null-character", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 11 }
+]},
+
+{"description":"<!DOCTYPE\\u0008",
+"input":"<!DOCTYPE\u0008",
+"output":[["DOCTYPE", "\u0008", null, null, false]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 11 }
+]},
+
+{"description":"<!DOCTYPE\\u0009",
+"input":"<!DOCTYPE\u0009",
+"output":[["DOCTYPE", null, null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 11 }
+]},
+
+{"description":"<!DOCTYPE\\u000A",
+"input":"<!DOCTYPE\u000A",
+"output":[["DOCTYPE", null, null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 2, "col": 1 }
+]},
+
+{"description":"<!DOCTYPE\\u000B",
+"input":"<!DOCTYPE\u000B",
+"output":[["DOCTYPE", "\u000B", null, null, false]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 11 }
+]},
+
+{"description":"<!DOCTYPE\\u000C",
+"input":"<!DOCTYPE\u000C",
+"output":[["DOCTYPE", null, null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 11 }
+]},
+
+{"description":"<!DOCTYPE\\u000D",
+"input":"<!DOCTYPE\u000D",
+"output":[["DOCTYPE", null, null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 2, "col": 1 }
+]},
+
+{"description":"<!DOCTYPE\\u001F",
+"input":"<!DOCTYPE\u001F",
+"output":[["DOCTYPE", "\u001F", null, null, false]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 11 }
+]},
+
+{"description":"<!DOCTYPE ",
+"input":"<!DOCTYPE ",
+"output":[["DOCTYPE", null, null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 11 }
+]},
+
+{"description":"<!DOCTYPE \\u0000",
+"input":"<!DOCTYPE \u0000",
+"output":[["DOCTYPE", "\uFFFD", null, null, false]],
+"errors":[
+   { "code": "unexpected-null-character", "line": 1, "col": 11 },
+   { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPE \\u0008",
+"input":"<!DOCTYPE \u0008",
+"output":[["DOCTYPE", "\u0008", null, null, false]],
+"errors":[
+   { "code": "control-character-in-input-stream", "line": 1, "col": 11 },
+   { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPE \\u0009",
+"input":"<!DOCTYPE \u0009",
+"output":[["DOCTYPE", null, null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPE \\u000A",
+"input":"<!DOCTYPE \u000A",
+"output":[["DOCTYPE", null, null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 2, "col": 1 }
+]},
+
+{"description":"<!DOCTYPE \\u000B",
+"input":"<!DOCTYPE \u000B",
+"output":[["DOCTYPE", "\u000B", null, null, false]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 11 },
+    { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPE \\u000C",
+"input":"<!DOCTYPE \u000C",
+"output":[["DOCTYPE", null, null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPE \\u000D",
+"input":"<!DOCTYPE \u000D",
+"output":[["DOCTYPE", null, null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 2, "col": 1 }
+]},
+
+{"description":"<!DOCTYPE \\u001F",
+"input":"<!DOCTYPE \u001F",
+"output":[["DOCTYPE", "\u001F", null, null, false]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 11 },
+    { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPE  ",
+"input":"<!DOCTYPE  ",
+"output":[["DOCTYPE", null, null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPE !",
+"input":"<!DOCTYPE !",
+"output":[["DOCTYPE", "!", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPE \"",
+"input":"<!DOCTYPE \"",
+"output":[["DOCTYPE", "\"", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPE &",
+"input":"<!DOCTYPE &",
+"output":[["DOCTYPE", "&", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPE '",
+"input":"<!DOCTYPE '",
+"output":[["DOCTYPE", "'", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPE -",
+"input":"<!DOCTYPE -",
+"output":[["DOCTYPE", "-", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPE /",
+"input":"<!DOCTYPE /",
+"output":[["DOCTYPE", "/", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPE 0",
+"input":"<!DOCTYPE 0",
+"output":[["DOCTYPE", "0", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPE 1",
+"input":"<!DOCTYPE 1",
+"output":[["DOCTYPE", "1", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPE 9",
+"input":"<!DOCTYPE 9",
+"output":[["DOCTYPE", "9", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPE <",
+"input":"<!DOCTYPE <",
+"output":[["DOCTYPE", "<", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPE =",
+"input":"<!DOCTYPE =",
+"output":[["DOCTYPE", "=", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPE >",
+"input":"<!DOCTYPE >",
+"output":[["DOCTYPE", null, null, null, false]],
+"errors":[
+    { "code": "missing-doctype-name", "line": 1, "col": 11 }
+]},
+
+{"description":"<!DOCTYPE ?",
+"input":"<!DOCTYPE ?",
+"output":[["DOCTYPE", "?", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPE @",
+"input":"<!DOCTYPE @",
+"output":[["DOCTYPE", "@", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPE A",
+"input":"<!DOCTYPE A",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPE B",
+"input":"<!DOCTYPE B",
+"output":[["DOCTYPE", "b", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPE Y",
+"input":"<!DOCTYPE Y",
+"output":[["DOCTYPE", "y", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPE Z",
+"input":"<!DOCTYPE Z",
+"output":[["DOCTYPE", "z", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPE [",
+"input":"<!DOCTYPE [",
+"output":[["DOCTYPE", "[", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPE `",
+"input":"<!DOCTYPE `",
+"output":[["DOCTYPE", "`", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPE a",
+"input":"<!DOCTYPE a",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPE a\\u0000",
+"input":"<!DOCTYPE a\u0000",
+"output":[["DOCTYPE", "a\uFFFD", null, null, false]],
+"errors":[
+    { "code": "unexpected-null-character", "line": 1, "col": 12 },
+    { "code": "eof-in-doctype", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a\\u0008",
+"input":"<!DOCTYPE a\u0008",
+"output":[["DOCTYPE", "a\u0008", null, null, false]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 12 },
+    { "code": "eof-in-doctype", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a\\u0009",
+"input":"<!DOCTYPE a\u0009",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "eof-in-doctype", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a\\u000A",
+"input":"<!DOCTYPE a\u000A",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "eof-in-doctype", "line": 2, "col": 1 }
+]},
+
+{"description":"<!DOCTYPE a\\u000B",
+"input":"<!DOCTYPE a\u000B",
+"output":[["DOCTYPE", "a\u000B", null, null, false]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 12 },
+    { "code": "eof-in-doctype", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a\\u000C",
+"input":"<!DOCTYPE a\u000C",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "eof-in-doctype", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a\\u000D",
+"input":"<!DOCTYPE a\u000D",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "eof-in-doctype", "line": 2, "col": 1 }
+]},
+
+{"description":"<!DOCTYPE a\\u001F",
+"input":"<!DOCTYPE a\u001F",
+"output":[["DOCTYPE", "a\u001F", null, null, false]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 12 },
+    { "code": "eof-in-doctype", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a ",
+"input":"<!DOCTYPE a ",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "eof-in-doctype", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a \\u0000",
+"input":"<!DOCTYPE a \u0000",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 },
+    { "code": "unexpected-null-character", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a \\u0008",
+"input":"<!DOCTYPE a \u0008",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 13 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a \\u0009",
+"input":"<!DOCTYPE a \u0009",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "eof-in-doctype", "line": 1, "col": 14 }
+]},
+
+{"description":"<!DOCTYPE a \\u000A",
+"input":"<!DOCTYPE a \u000A",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "eof-in-doctype", "line": 2, "col": 1 }
+]},
+
+{"description":"<!DOCTYPE a \\u000B",
+"input":"<!DOCTYPE a \u000B",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 13 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a \\u000C",
+"input":"<!DOCTYPE a \u000C",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "eof-in-doctype", "line": 1, "col": 14 }
+]},
+
+{"description":"<!DOCTYPE a \\u000D",
+"input":"<!DOCTYPE a \u000D",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "eof-in-doctype", "line": 2, "col": 1 }
+]},
+
+{"description":"<!DOCTYPE a \\u001F",
+"input":"<!DOCTYPE a \u001F",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 13 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a  ",
+"input":"<!DOCTYPE a  ",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "eof-in-doctype", "line": 1, "col": 14 }
+]},
+
+{"description":"<!DOCTYPE a !",
+"input":"<!DOCTYPE a !",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a \"",
+"input":"<!DOCTYPE a \"",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a &",
+"input":"<!DOCTYPE a &",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a '",
+"input":"<!DOCTYPE a '",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a -",
+"input":"<!DOCTYPE a -",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a /",
+"input":"<!DOCTYPE a /",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a 0",
+"input":"<!DOCTYPE a 0",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a 1",
+"input":"<!DOCTYPE a 1",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a 9",
+"input":"<!DOCTYPE a 9",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a <",
+"input":"<!DOCTYPE a <",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a =",
+"input":"<!DOCTYPE a =",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a >",
+"input":"<!DOCTYPE a >",
+"output":[["DOCTYPE", "a", null, null, true]]},
+
+{"description":"<!DOCTYPE a ?",
+"input":"<!DOCTYPE a ?",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a @",
+"input":"<!DOCTYPE a @",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a A",
+"input":"<!DOCTYPE a A",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a B",
+"input":"<!DOCTYPE a B",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC",
+"input":"<!DOCTYPE a PUBLIC",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors": [
+    { "code": "eof-in-doctype", "col": 19, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC\\u0000",
+"input":"<!DOCTYPE a PUBLIC\u0000",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors": [
+    { "code": "missing-quote-before-doctype-public-identifier", "col": 19, "line": 1 },
+    { "code": "unexpected-null-character", "line": 1, "col": 19 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC\\u0008",
+"input":"<!DOCTYPE a PUBLIC\u0008",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 19 },
+    { "code": "missing-quote-before-doctype-public-identifier", "line": 1, "col": 19 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC\\u0009",
+"input":"<!DOCTYPE a PUBLIC\u0009",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors": [
+    { "code": "eof-in-doctype", "col": 20, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC\\u000A",
+"input":"<!DOCTYPE a PUBLIC\u000A",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors": [
+    { "code": "eof-in-doctype", "col": 1, "line": 2 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC\\u000B",
+"input":"<!DOCTYPE a PUBLIC\u000B",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 19 },
+    { "code": "missing-quote-before-doctype-public-identifier", "line": 1, "col": 19 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC\\u000C",
+"input":"<!DOCTYPE a PUBLIC\u000C",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors": [
+    { "code": "eof-in-doctype", "col": 20, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC\\u000D",
+"input":"<!DOCTYPE a PUBLIC\u000D",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors": [
+    { "code": "eof-in-doctype", "col": 1, "line": 2 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC\\u001F",
+"input":"<!DOCTYPE a PUBLIC\u001F",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 19 },
+    { "code": "missing-quote-before-doctype-public-identifier", "line": 1, "col": 19 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC ",
+"input":"<!DOCTYPE a PUBLIC ",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors": [
+    { "code": "eof-in-doctype", "col": 20, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC!",
+"input":"<!DOCTYPE a PUBLIC!",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors": [
+    { "code": "missing-quote-before-doctype-public-identifier", "col": 19, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC\"",
+"input":"<!DOCTYPE a PUBLIC\"",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 20, "line": 1 }
+
+]},
+
+{"description":"<!DOCTYPE a PUBLIC\"\\u0000",
+"input":"<!DOCTYPE a PUBLIC\"\u0000",
+"output":[["DOCTYPE", "a", "\uFFFD", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "unexpected-null-character", "col": 20, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC\"\\u0009",
+"input":"<!DOCTYPE a PUBLIC\"\u0009",
+"output":[["DOCTYPE", "a", "\u0009", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC\"\\u000A",
+"input":"<!DOCTYPE a PUBLIC\"\u000A",
+"output":[["DOCTYPE", "a", "\u000A", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 1, "line": 2 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC\"\\u000B",
+"input":"<!DOCTYPE a PUBLIC\"\u000B",
+"output":[["DOCTYPE", "a", "\u000B", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "control-character-in-input-stream", "line": 1, "col": 20 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC\"\\u000C",
+"input":"<!DOCTYPE a PUBLIC\"\u000C",
+"output":[["DOCTYPE", "a", "\u000C", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC\" ",
+"input":"<!DOCTYPE a PUBLIC\" ",
+"output":[["DOCTYPE", "a", " ", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC\"!",
+"input":"<!DOCTYPE a PUBLIC\"!",
+"output":[["DOCTYPE", "a", "!", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC\"\"",
+"input":"<!DOCTYPE a PUBLIC\"\"",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC\"\"\\u0000",
+"input":"<!DOCTYPE a PUBLIC\"\"\u0000",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "missing-quote-before-doctype-system-identifier", "col": 21, "line": 1 },
+    { "code": "unexpected-null-character", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC\"\" \\u0000",
+"input":"<!DOCTYPE a PUBLIC\"\" \u0000",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "missing-quote-before-doctype-system-identifier", "col": 22, "line": 1 },
+    { "code": "unexpected-null-character", "col": 22, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC\"#",
+"input":"<!DOCTYPE a PUBLIC\"#",
+"output":[["DOCTYPE", "a", "#", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC\"&",
+"input":"<!DOCTYPE a PUBLIC\"&",
+"output":[["DOCTYPE", "a", "&", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC\"'",
+"input":"<!DOCTYPE a PUBLIC\"'",
+"output":[["DOCTYPE", "a", "'", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC\"-",
+"input":"<!DOCTYPE a PUBLIC\"-",
+"output":[["DOCTYPE", "a", "-", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC\"/",
+"input":"<!DOCTYPE a PUBLIC\"/",
+"output":[["DOCTYPE", "a", "/", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC\"0",
+"input":"<!DOCTYPE a PUBLIC\"0",
+"output":[["DOCTYPE", "a", "0", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC\"1",
+"input":"<!DOCTYPE a PUBLIC\"1",
+"output":[["DOCTYPE", "a", "1", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC\"9",
+"input":"<!DOCTYPE a PUBLIC\"9",
+"output":[["DOCTYPE", "a", "9", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC\"<",
+"input":"<!DOCTYPE a PUBLIC\"<",
+"output":[["DOCTYPE", "a", "<", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC\"=",
+"input":"<!DOCTYPE a PUBLIC\"=",
+"output":[["DOCTYPE", "a", "=", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC\">",
+"input":"<!DOCTYPE a PUBLIC\">",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "abrupt-doctype-public-identifier", "col": 20, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC\"?",
+"input":"<!DOCTYPE a PUBLIC\"?",
+"output":[["DOCTYPE", "a", "?", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC\"@",
+"input":"<!DOCTYPE a PUBLIC\"@",
+"output":[["DOCTYPE", "a", "@", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC\"A",
+"input":"<!DOCTYPE a PUBLIC\"A",
+"output":[["DOCTYPE", "a", "A", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC\"B",
+"input":"<!DOCTYPE a PUBLIC\"B",
+"output":[["DOCTYPE", "a", "B", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC\"Y",
+"input":"<!DOCTYPE a PUBLIC\"Y",
+"output":[["DOCTYPE", "a", "Y", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC\"Z",
+"input":"<!DOCTYPE a PUBLIC\"Z",
+"output":[["DOCTYPE", "a", "Z", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC\"`",
+"input":"<!DOCTYPE a PUBLIC\"`",
+"output":[["DOCTYPE", "a", "`", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC\"a",
+"input":"<!DOCTYPE a PUBLIC\"a",
+"output":[["DOCTYPE", "a", "a", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC\"b",
+"input":"<!DOCTYPE a PUBLIC\"b",
+"output":[["DOCTYPE", "a", "b", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC\"y",
+"input":"<!DOCTYPE a PUBLIC\"y",
+"output":[["DOCTYPE", "a", "y", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC\"z",
+"input":"<!DOCTYPE a PUBLIC\"z",
+"output":[["DOCTYPE", "a", "z", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC\"{",
+"input":"<!DOCTYPE a PUBLIC\"{",
+"output":[["DOCTYPE", "a", "{", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC\"\\uDBC0\\uDC00",
+"input":"<!DOCTYPE a PUBLIC\"\uDBC0\uDC00",
+"output":[["DOCTYPE", "a", "\uDBC0\uDC00", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 22, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC#",
+"input":"<!DOCTYPE a PUBLIC#",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors": [
+    { "code": "missing-quote-before-doctype-public-identifier", "col": 19, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC&",
+"input":"<!DOCTYPE a PUBLIC&",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors": [
+    { "code": "missing-quote-before-doctype-public-identifier", "col": 19, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC'",
+"input":"<!DOCTYPE a PUBLIC'",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 20, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC'\\u0000",
+"input":"<!DOCTYPE a PUBLIC'\u0000",
+"output":[["DOCTYPE", "a", "\uFFFD", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "unexpected-null-character", "col": 20, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC'\\u0009",
+"input":"<!DOCTYPE a PUBLIC'\u0009",
+"output":[["DOCTYPE", "a", "\u0009", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC'\\u000A",
+"input":"<!DOCTYPE a PUBLIC'\u000A",
+"output":[["DOCTYPE", "a", "\u000A", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 1, "line": 2 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC'\\u000B",
+"input":"<!DOCTYPE a PUBLIC'\u000B",
+"output":[["DOCTYPE", "a", "\u000B", null, false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "control-character-in-input-stream", "line": 1, "col": 20 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC'\\u000C",
+"input":"<!DOCTYPE a PUBLIC'\u000C",
+"output":[["DOCTYPE", "a", "\u000C", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC' ",
+"input":"<!DOCTYPE a PUBLIC' ",
+"output":[["DOCTYPE", "a", " ", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC'!",
+"input":"<!DOCTYPE a PUBLIC'!",
+"output":[["DOCTYPE", "a", "!", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC'\"",
+"input":"<!DOCTYPE a PUBLIC'\"",
+"output":[["DOCTYPE", "a", "\"", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC'&",
+"input":"<!DOCTYPE a PUBLIC'&",
+"output":[["DOCTYPE", "a", "&", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC''",
+"input":"<!DOCTYPE a PUBLIC''",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC''\\u0000",
+"input":"<!DOCTYPE a PUBLIC''\u0000",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "missing-quote-before-doctype-system-identifier", "col": 21, "line": 1 },
+    { "code": "unexpected-null-character", "line": 1, "col": 21 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC''\\u0008",
+"input":"<!DOCTYPE a PUBLIC''\u0008",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "control-character-in-input-stream", "line": 1, "col": 21 },
+    { "code": "missing-quote-before-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC''\\u0009",
+"input":"<!DOCTYPE a PUBLIC''\u0009",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 22, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC''\\u000A",
+"input":"<!DOCTYPE a PUBLIC''\u000A",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 1, "line": 2 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC''\\u000B",
+"input":"<!DOCTYPE a PUBLIC''\u000B",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "control-character-in-input-stream", "line": 1, "col": 21 },
+    { "code" : "missing-quote-before-doctype-system-identifier", "col": 21, "line" : 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC''\\u000C",
+"input":"<!DOCTYPE a PUBLIC''\u000C",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 22, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC''\\u000D",
+"input":"<!DOCTYPE a PUBLIC''\u000D",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 1, "line": 2 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC''\\u001F",
+"input":"<!DOCTYPE a PUBLIC''\u001F",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "control-character-in-input-stream", "line": 1, "col": 21 },
+    { "code" : "missing-quote-before-doctype-system-identifier", "col": 21, "line" : 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC'' ",
+"input":"<!DOCTYPE a PUBLIC'' ",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 22, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC''!",
+"input":"<!DOCTYPE a PUBLIC''!",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "missing-quote-before-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC''\"",
+"input":"<!DOCTYPE a PUBLIC''\"",
+"output":[["DOCTYPE", "a", "", "", false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "missing-whitespace-between-doctype-public-and-system-identifiers", "col": 21, "line": 1 },
+    { "code": "eof-in-doctype", "col": 22, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC''#",
+"input":"<!DOCTYPE a PUBLIC''#",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "missing-quote-before-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC''&",
+"input":"<!DOCTYPE a PUBLIC''&",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "missing-quote-before-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC'''",
+"input":"<!DOCTYPE a PUBLIC'''",
+"output":[["DOCTYPE", "a", "", "", false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "missing-whitespace-between-doctype-public-and-system-identifiers", "col": 21, "line": 1 },
+    { "code": "eof-in-doctype", "col": 22, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC''''\\u0000",
+"input":"<!DOCTYPE a PUBLIC''''\u0000",
+"output":[["DOCTYPE", "a", "", "", true]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "missing-whitespace-between-doctype-public-and-system-identifiers", "col": 21, "line": 1 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "line": 1, "col": 23 },
+    { "code": "unexpected-null-character", "line": 1, "col": 23 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC''''x\\u0000",
+"input":"<!DOCTYPE a PUBLIC''''x\u0000",
+"output":[["DOCTYPE", "a", "", "", true]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "missing-whitespace-between-doctype-public-and-system-identifiers", "col": 21, "line": 1 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "line": 1, "col": 23 },
+    { "code": "unexpected-null-character", "line": 1, "col": 24 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC'''' \\u0000",
+"input":"<!DOCTYPE a PUBLIC'''' \u0000",
+"output":[["DOCTYPE", "a", "", "", true]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "missing-whitespace-between-doctype-public-and-system-identifiers", "col": 21, "line": 1 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "line": 1, "col": 24 },
+    { "code": "unexpected-null-character", "line": 1, "col": 24 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC'''' x\\u0000",
+"input":"<!DOCTYPE a PUBLIC'''' x\u0000",
+"output":[["DOCTYPE", "a", "", "", true]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "missing-whitespace-between-doctype-public-and-system-identifiers", "col": 21, "line": 1 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "line": 1, "col": 24 },
+    { "code": "unexpected-null-character", "line": 1, "col": 25 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC''(",
+"input":"<!DOCTYPE a PUBLIC''(",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "missing-quote-before-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC''-",
+"input":"<!DOCTYPE a PUBLIC''-",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "missing-quote-before-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC''/",
+"input":"<!DOCTYPE a PUBLIC''/",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "missing-quote-before-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC''0",
+"input":"<!DOCTYPE a PUBLIC''0",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "missing-quote-before-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC''1",
+"input":"<!DOCTYPE a PUBLIC''1",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "missing-quote-before-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC''9",
+"input":"<!DOCTYPE a PUBLIC''9",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "missing-quote-before-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC''<",
+"input":"<!DOCTYPE a PUBLIC''<",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "missing-quote-before-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC''=",
+"input":"<!DOCTYPE a PUBLIC''=",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "missing-quote-before-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC''>",
+"input":"<!DOCTYPE a PUBLIC''>",
+"output":[["DOCTYPE", "a", "", null, true]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC''?",
+"input":"<!DOCTYPE a PUBLIC''?",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "missing-quote-before-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC''@",
+"input":"<!DOCTYPE a PUBLIC''@",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "missing-quote-before-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC''A",
+"input":"<!DOCTYPE a PUBLIC''A",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "missing-quote-before-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC''B",
+"input":"<!DOCTYPE a PUBLIC''B",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "missing-quote-before-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC''Y",
+"input":"<!DOCTYPE a PUBLIC''Y",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "missing-quote-before-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC''Z",
+"input":"<!DOCTYPE a PUBLIC''Z",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "missing-quote-before-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC''`",
+"input":"<!DOCTYPE a PUBLIC''`",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "missing-quote-before-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC''a",
+"input":"<!DOCTYPE a PUBLIC''a",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "missing-quote-before-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC''b",
+"input":"<!DOCTYPE a PUBLIC''b",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "missing-quote-before-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC''y",
+"input":"<!DOCTYPE a PUBLIC''y",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "missing-quote-before-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC''z",
+"input":"<!DOCTYPE a PUBLIC''z",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "missing-quote-before-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC''{",
+"input":"<!DOCTYPE a PUBLIC''{",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "missing-quote-before-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC''\\uDBC0\\uDC00",
+"input":"<!DOCTYPE a PUBLIC''\uDBC0\uDC00",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "missing-quote-before-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC'(",
+"input":"<!DOCTYPE a PUBLIC'(",
+"output":[["DOCTYPE", "a", "(", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC'-",
+"input":"<!DOCTYPE a PUBLIC'-",
+"output":[["DOCTYPE", "a", "-", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC'/",
+"input":"<!DOCTYPE a PUBLIC'/",
+"output":[["DOCTYPE", "a", "/", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC'0",
+"input":"<!DOCTYPE a PUBLIC'0",
+"output":[["DOCTYPE", "a", "0", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC'1",
+"input":"<!DOCTYPE a PUBLIC'1",
+"output":[["DOCTYPE", "a", "1", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC'9",
+"input":"<!DOCTYPE a PUBLIC'9",
+"output":[["DOCTYPE", "a", "9", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC'<",
+"input":"<!DOCTYPE a PUBLIC'<",
+"output":[["DOCTYPE", "a", "<", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC'=",
+"input":"<!DOCTYPE a PUBLIC'=",
+"output":[["DOCTYPE", "a", "=", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC'>",
+"input":"<!DOCTYPE a PUBLIC'>",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "abrupt-doctype-public-identifier", "col": 20, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC'?",
+"input":"<!DOCTYPE a PUBLIC'?",
+"output":[["DOCTYPE", "a", "?", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC'@",
+"input":"<!DOCTYPE a PUBLIC'@",
+"output":[["DOCTYPE", "a", "@", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC'A",
+"input":"<!DOCTYPE a PUBLIC'A",
+"output":[["DOCTYPE", "a", "A", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC'B",
+"input":"<!DOCTYPE a PUBLIC'B",
+"output":[["DOCTYPE", "a", "B", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC'Y",
+"input":"<!DOCTYPE a PUBLIC'Y",
+"output":[["DOCTYPE", "a", "Y", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC'Z",
+"input":"<!DOCTYPE a PUBLIC'Z",
+"output":[["DOCTYPE", "a", "Z", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC'`",
+"input":"<!DOCTYPE a PUBLIC'`",
+"output":[["DOCTYPE", "a", "`", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC'a",
+"input":"<!DOCTYPE a PUBLIC'a",
+"output":[["DOCTYPE", "a", "a", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC'b",
+"input":"<!DOCTYPE a PUBLIC'b",
+"output":[["DOCTYPE", "a", "b", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC'y",
+"input":"<!DOCTYPE a PUBLIC'y",
+"output":[["DOCTYPE", "a", "y", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC'z",
+"input":"<!DOCTYPE a PUBLIC'z",
+"output":[["DOCTYPE", "a", "z", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC'{",
+"input":"<!DOCTYPE a PUBLIC'{",
+"output":[["DOCTYPE", "a", "{", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC'\\uDBC0\\uDC00",
+"input":"<!DOCTYPE a PUBLIC'\uDBC0\uDC00",
+"output":[["DOCTYPE", "a", "\uDBC0\uDC00", null, false]],
+"errors": [
+    { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 },
+    { "code": "eof-in-doctype", "col": 22, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC(",
+"input":"<!DOCTYPE a PUBLIC(",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors": [
+    { "code": "missing-quote-before-doctype-public-identifier", "col": 19, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC-",
+"input":"<!DOCTYPE a PUBLIC-",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors": [
+    { "code": "missing-quote-before-doctype-public-identifier", "col": 19, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC/",
+"input":"<!DOCTYPE a PUBLIC/",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors": [
+    { "code": "missing-quote-before-doctype-public-identifier", "col": 19, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC0",
+"input":"<!DOCTYPE a PUBLIC0",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors": [
+    { "code": "missing-quote-before-doctype-public-identifier", "col": 19, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC1",
+"input":"<!DOCTYPE a PUBLIC1",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors": [
+    { "code": "missing-quote-before-doctype-public-identifier", "col": 19, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC9",
+"input":"<!DOCTYPE a PUBLIC9",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors": [
+    { "code": "missing-quote-before-doctype-public-identifier", "col": 19, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC<",
+"input":"<!DOCTYPE a PUBLIC<",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors": [
+    { "code": "missing-quote-before-doctype-public-identifier", "col": 19, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC=",
+"input":"<!DOCTYPE a PUBLIC=",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors": [
+    { "code": "missing-quote-before-doctype-public-identifier", "col": 19, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC>",
+"input":"<!DOCTYPE a PUBLIC>",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors": [
+    { "code": "missing-doctype-public-identifier", "col": 19, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC?",
+"input":"<!DOCTYPE a PUBLIC?",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors": [
+    { "code": "missing-quote-before-doctype-public-identifier", "col": 19, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC@",
+"input":"<!DOCTYPE a PUBLIC@",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors": [
+    { "code": "missing-quote-before-doctype-public-identifier", "col": 19, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLICA",
+"input":"<!DOCTYPE a PUBLICA",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors": [
+    { "code": "missing-quote-before-doctype-public-identifier", "col": 19, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLICB",
+"input":"<!DOCTYPE a PUBLICB",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors": [
+    { "code": "missing-quote-before-doctype-public-identifier", "col": 19, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLICY",
+"input":"<!DOCTYPE a PUBLICY",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors": [
+    { "code": "missing-quote-before-doctype-public-identifier", "col": 19, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLICZ",
+"input":"<!DOCTYPE a PUBLICZ",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors": [
+    { "code": "missing-quote-before-doctype-public-identifier", "col": 19, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC`",
+"input":"<!DOCTYPE a PUBLIC`",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors": [
+    { "code": "missing-quote-before-doctype-public-identifier", "col": 19, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLICa",
+"input":"<!DOCTYPE a PUBLICa",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors": [
+    { "code": "missing-quote-before-doctype-public-identifier", "col": 19, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLICb",
+"input":"<!DOCTYPE a PUBLICb",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors": [
+    { "code": "missing-quote-before-doctype-public-identifier", "col": 19, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLICy",
+"input":"<!DOCTYPE a PUBLICy",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors": [
+    { "code": "missing-quote-before-doctype-public-identifier", "col": 19, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLICz",
+"input":"<!DOCTYPE a PUBLICz",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors": [
+    { "code": "missing-quote-before-doctype-public-identifier", "col": 19, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC{",
+"input":"<!DOCTYPE a PUBLIC{",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors": [
+    { "code": "missing-quote-before-doctype-public-identifier", "col": 19, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a PUBLIC\\uDBC0\\uDC00",
+"input":"<!DOCTYPE a PUBLIC\uDBC0\uDC00",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors": [
+    { "code": "missing-quote-before-doctype-public-identifier", "col": 19, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM",
+"input":"<!DOCTYPE a SYSTEM",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors": [
+    { "code": "eof-in-doctype", "col": 19, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM\\u0000",
+"input":"<!DOCTYPE a SYSTEM\u0000",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors": [
+    { "code": "missing-quote-before-doctype-system-identifier", "col": 19, "line": 1 },
+    { "code": "unexpected-null-character", "line": 1, "col": 19 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM \\u0000",
+"input":"<!DOCTYPE a SYSTEM \u0000",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors": [
+    { "code": "missing-quote-before-doctype-system-identifier", "col": 20, "line": 1 },
+    { "code": "unexpected-null-character", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM x\\u0000",
+"input":"<!DOCTYPE a SYSTEM \u0000",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors": [
+    { "code": "missing-quote-before-doctype-system-identifier", "col": 20, "line": 1 },
+    { "code": "unexpected-null-character", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM\\u0008",
+"input":"<!DOCTYPE a SYSTEM\u0008",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 19 },
+    { "code": "missing-quote-before-doctype-system-identifier", "col": 19, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM\\u0009",
+"input":"<!DOCTYPE a SYSTEM\u0009",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors": [
+    { "code": "eof-in-doctype", "col": 20, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM\\u000A",
+"input":"<!DOCTYPE a SYSTEM\u000A",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors": [
+    { "code": "eof-in-doctype", "col": 1, "line": 2 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM\\u000B",
+"input":"<!DOCTYPE a SYSTEM\u000B",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 19 },
+    { "code": "missing-quote-before-doctype-system-identifier", "col": 19, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM\\u000C",
+"input":"<!DOCTYPE a SYSTEM\u000C",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors": [
+    { "code": "eof-in-doctype", "col": 20, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM\\u000D",
+"input":"<!DOCTYPE a SYSTEM\u000D",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors": [
+    { "code": "eof-in-doctype", "col": 1, "line": 2 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM\\u001F",
+"input":"<!DOCTYPE a SYSTEM\u001F",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 19 },
+    { "code": "missing-quote-before-doctype-system-identifier", "col": 19, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM ",
+"input":"<!DOCTYPE a SYSTEM ",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors": [
+    { "code": "eof-in-doctype", "col": 20, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM!",
+"input":"<!DOCTYPE a SYSTEM!",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-quote-before-doctype-system-identifier", "col": 19, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM\"",
+"input":"<!DOCTYPE a SYSTEM\"",
+"output":[["DOCTYPE", "a", null, "", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 20, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM\"\\u0000",
+"input":"<!DOCTYPE a SYSTEM\"\u0000",
+"output":[["DOCTYPE", "a", null, "\uFFFD", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "unexpected-null-character", "col": 20, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM\"\\u0009",
+"input":"<!DOCTYPE a SYSTEM\"\u0009",
+"output":[["DOCTYPE", "a", null, "\u0009", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM\"\\u000A",
+"input":"<!DOCTYPE a SYSTEM\"\u000A",
+"output":[["DOCTYPE", "a", null, "\u000A", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 1, "line": 2 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM\"\\u000B",
+"input":"<!DOCTYPE a SYSTEM\"\u000B",
+"output":[["DOCTYPE", "a", null, "\u000B", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "control-character-in-input-stream", "line": 1, "col": 20 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM\"\\u000C",
+"input":"<!DOCTYPE a SYSTEM\"\u000C",
+"output":[["DOCTYPE", "a", null, "\u000C", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM\" ",
+"input":"<!DOCTYPE a SYSTEM\" ",
+"output":[["DOCTYPE", "a", null, " ", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM\"!",
+"input":"<!DOCTYPE a SYSTEM\"!",
+"output":[["DOCTYPE", "a", null, "!", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM\"\"",
+"input":"<!DOCTYPE a SYSTEM\"\"",
+"output":[["DOCTYPE", "a", null, "", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM\"#",
+"input":"<!DOCTYPE a SYSTEM\"#",
+"output":[["DOCTYPE", "a", null, "#", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM\"&",
+"input":"<!DOCTYPE a SYSTEM\"&",
+"output":[["DOCTYPE", "a", null, "&", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM\"'",
+"input":"<!DOCTYPE a SYSTEM\"'",
+"output":[["DOCTYPE", "a", null, "'", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM\"-",
+"input":"<!DOCTYPE a SYSTEM\"-",
+"output":[["DOCTYPE", "a", null, "-", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM\"/",
+"input":"<!DOCTYPE a SYSTEM\"/",
+"output":[["DOCTYPE", "a", null, "/", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM\"0",
+"input":"<!DOCTYPE a SYSTEM\"0",
+"output":[["DOCTYPE", "a", null, "0", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM\"1",
+"input":"<!DOCTYPE a SYSTEM\"1",
+"output":[["DOCTYPE", "a", null, "1", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM\"9",
+"input":"<!DOCTYPE a SYSTEM\"9",
+"output":[["DOCTYPE", "a", null, "9", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM\"<",
+"input":"<!DOCTYPE a SYSTEM\"<",
+"output":[["DOCTYPE", "a", null, "<", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM\"=",
+"input":"<!DOCTYPE a SYSTEM\"=",
+"output":[["DOCTYPE", "a", null, "=", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM\">",
+"input":"<!DOCTYPE a SYSTEM\">",
+"output":[["DOCTYPE", "a", null, "", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "abrupt-doctype-system-identifier", "col": 20, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM\"?",
+"input":"<!DOCTYPE a SYSTEM\"?",
+"output":[["DOCTYPE", "a", null, "?", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM\"@",
+"input":"<!DOCTYPE a SYSTEM\"@",
+"output":[["DOCTYPE", "a", null, "@", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM\"A",
+"input":"<!DOCTYPE a SYSTEM\"A",
+"output":[["DOCTYPE", "a", null, "A", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM\"B",
+"input":"<!DOCTYPE a SYSTEM\"B",
+"output":[["DOCTYPE", "a", null, "B", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM\"Y",
+"input":"<!DOCTYPE a SYSTEM\"Y",
+"output":[["DOCTYPE", "a", null, "Y", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM\"Z",
+"input":"<!DOCTYPE a SYSTEM\"Z",
+"output":[["DOCTYPE", "a", null, "Z", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM\"`",
+"input":"<!DOCTYPE a SYSTEM\"`",
+"output":[["DOCTYPE", "a", null, "`", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM\"a",
+"input":"<!DOCTYPE a SYSTEM\"a",
+"output":[["DOCTYPE", "a", null, "a", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM\"b",
+"input":"<!DOCTYPE a SYSTEM\"b",
+"output":[["DOCTYPE", "a", null, "b", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM\"y",
+"input":"<!DOCTYPE a SYSTEM\"y",
+"output":[["DOCTYPE", "a", null, "y", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM\"z",
+"input":"<!DOCTYPE a SYSTEM\"z",
+"output":[["DOCTYPE", "a", null, "z", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM\"{",
+"input":"<!DOCTYPE a SYSTEM\"{",
+"output":[["DOCTYPE", "a", null, "{", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM\"\\uDBC0\\uDC00",
+"input":"<!DOCTYPE a SYSTEM\"\uDBC0\uDC00",
+"output":[["DOCTYPE", "a", null, "\uDBC0\uDC00", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 22, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM#",
+"input":"<!DOCTYPE a SYSTEM#",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 19 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM&",
+"input":"<!DOCTYPE a SYSTEM&",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 19 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM'",
+"input":"<!DOCTYPE a SYSTEM'",
+"output":[["DOCTYPE", "a", null, "", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 20, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM'\\u0000",
+"input":"<!DOCTYPE a SYSTEM'\u0000",
+"output":[["DOCTYPE", "a", null, "\uFFFD", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "unexpected-null-character", "col": 20, "line": 1 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM'\\u0009",
+"input":"<!DOCTYPE a SYSTEM'\u0009",
+"output":[["DOCTYPE", "a", null, "\u0009", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM'\\u000A",
+"input":"<!DOCTYPE a SYSTEM'\u000A",
+"output":[["DOCTYPE", "a", null, "\u000A", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 1, "line": 2 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM'\\u000B",
+"input":"<!DOCTYPE a SYSTEM'\u000B",
+"output":[["DOCTYPE", "a", null, "\u000B", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "control-character-in-input-stream", "line": 1, "col": 20 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM'\\u000C",
+"input":"<!DOCTYPE a SYSTEM'\u000C",
+"output":[["DOCTYPE", "a", null, "\u000C", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM' ",
+"input":"<!DOCTYPE a SYSTEM' ",
+"output":[["DOCTYPE", "a", null, " ", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM'!",
+"input":"<!DOCTYPE a SYSTEM'!",
+"output":[["DOCTYPE", "a", null, "!", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM'\"",
+"input":"<!DOCTYPE a SYSTEM'\"",
+"output":[["DOCTYPE", "a", null, "\"", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM'&",
+"input":"<!DOCTYPE a SYSTEM'&",
+"output":[["DOCTYPE", "a", null, "&", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM''",
+"input":"<!DOCTYPE a SYSTEM''",
+"output":[["DOCTYPE", "a", null, "", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM''\\u0000",
+"input":"<!DOCTYPE a SYSTEM''\u0000",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "col": 21, "line": 1 },
+    { "code": "unexpected-null-character", "line": 1, "col": 21 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM''\\u0008",
+"input":"<!DOCTYPE a SYSTEM''\u0008",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "control-character-in-input-stream", "line": 1, "col": 21 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "line": 1, "col": 21 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM''\\u0009",
+"input":"<!DOCTYPE a SYSTEM''\u0009",
+"output":[["DOCTYPE", "a", null, "", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 22, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM''\\u000A",
+"input":"<!DOCTYPE a SYSTEM''\u000A",
+"output":[["DOCTYPE", "a", null, "", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 1, "line": 2 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM''\\u000B",
+"input":"<!DOCTYPE a SYSTEM''\u000B",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "control-character-in-input-stream", "line": 1, "col": 21 },
+    { "code" : "unexpected-character-after-doctype-system-identifier", "line": 1, "col": 21}
+]},
+
+{"description":"<!DOCTYPE a SYSTEM''\\u000C",
+"input":"<!DOCTYPE a SYSTEM''\u000C",
+"output":[["DOCTYPE", "a", null, "", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 22, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM''\\u000D",
+"input":"<!DOCTYPE a SYSTEM''\u000D",
+"output":[["DOCTYPE", "a", null, "", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 1, "line": 2 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM''\\u001F",
+"input":"<!DOCTYPE a SYSTEM''\u001F",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "control-character-in-input-stream", "line": 1, "col": 21 },
+    { "code" : "unexpected-character-after-doctype-system-identifier", "line": 1, "col": 21}
+]},
+
+{"description":"<!DOCTYPE a SYSTEM'' ",
+"input":"<!DOCTYPE a SYSTEM'' ",
+"output":[["DOCTYPE", "a", null, "", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 22, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM'' \\u0000",
+"input":"<!DOCTYPE a SYSTEM'' \u0000",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "col": 22, "line": 1 },
+    { "code": "unexpected-null-character", "line": 1, "col": 22 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM'' x\\u0000",
+"input":"<!DOCTYPE a SYSTEM'' x\u0000",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "col": 22, "line": 1 },
+    { "code": "unexpected-null-character", "line": 1, "col": 23 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM''!",
+"input":"<!DOCTYPE a SYSTEM''!",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM''\"",
+"input":"<!DOCTYPE a SYSTEM''\"",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM''&",
+"input":"<!DOCTYPE a SYSTEM''&",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM'''",
+"input":"<!DOCTYPE a SYSTEM'''",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM''-",
+"input":"<!DOCTYPE a SYSTEM''-",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM''/",
+"input":"<!DOCTYPE a SYSTEM''/",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM''0",
+"input":"<!DOCTYPE a SYSTEM''0",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM''1",
+"input":"<!DOCTYPE a SYSTEM''1",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM''9",
+"input":"<!DOCTYPE a SYSTEM''9",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM''<",
+"input":"<!DOCTYPE a SYSTEM''<",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM''=",
+"input":"<!DOCTYPE a SYSTEM''=",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM''>",
+"input":"<!DOCTYPE a SYSTEM''>",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM''?",
+"input":"<!DOCTYPE a SYSTEM''?",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM''@",
+"input":"<!DOCTYPE a SYSTEM''@",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM''A",
+"input":"<!DOCTYPE a SYSTEM''A",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM''B",
+"input":"<!DOCTYPE a SYSTEM''B",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM''Y",
+"input":"<!DOCTYPE a SYSTEM''Y",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM''Z",
+"input":"<!DOCTYPE a SYSTEM''Z",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM''`",
+"input":"<!DOCTYPE a SYSTEM''`",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM''a",
+"input":"<!DOCTYPE a SYSTEM''a",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM''b",
+"input":"<!DOCTYPE a SYSTEM''b",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM''y",
+"input":"<!DOCTYPE a SYSTEM''y",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM''z",
+"input":"<!DOCTYPE a SYSTEM''z",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM''{",
+"input":"<!DOCTYPE a SYSTEM''{",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM''\\uDBC0\\uDC00",
+"input":"<!DOCTYPE a SYSTEM''\uDBC0\uDC00",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM'(",
+"input":"<!DOCTYPE a SYSTEM'(",
+"output":[["DOCTYPE", "a", null, "(", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM'-",
+"input":"<!DOCTYPE a SYSTEM'-",
+"output":[["DOCTYPE", "a", null, "-", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM'/",
+"input":"<!DOCTYPE a SYSTEM'/",
+"output":[["DOCTYPE", "a", null, "/", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM'0",
+"input":"<!DOCTYPE a SYSTEM'0",
+"output":[["DOCTYPE", "a", null, "0", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM'1",
+"input":"<!DOCTYPE a SYSTEM'1",
+"output":[["DOCTYPE", "a", null, "1", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM'9",
+"input":"<!DOCTYPE a SYSTEM'9",
+"output":[["DOCTYPE", "a", null, "9", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM'<",
+"input":"<!DOCTYPE a SYSTEM'<",
+"output":[["DOCTYPE", "a", null, "<", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM'=",
+"input":"<!DOCTYPE a SYSTEM'=",
+"output":[["DOCTYPE", "a", null, "=", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM'>",
+"input":"<!DOCTYPE a SYSTEM'>",
+"output":[["DOCTYPE", "a", null, "", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "abrupt-doctype-system-identifier", "col": 20, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM'?",
+"input":"<!DOCTYPE a SYSTEM'?",
+"output":[["DOCTYPE", "a", null, "?", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM'@",
+"input":"<!DOCTYPE a SYSTEM'@",
+"output":[["DOCTYPE", "a", null, "@", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM'A",
+"input":"<!DOCTYPE a SYSTEM'A",
+"output":[["DOCTYPE", "a", null, "A", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM'B",
+"input":"<!DOCTYPE a SYSTEM'B",
+"output":[["DOCTYPE", "a", null, "B", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM'Y",
+"input":"<!DOCTYPE a SYSTEM'Y",
+"output":[["DOCTYPE", "a", null, "Y", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM'Z",
+"input":"<!DOCTYPE a SYSTEM'Z",
+"output":[["DOCTYPE", "a", null, "Z", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM'`",
+"input":"<!DOCTYPE a SYSTEM'`",
+"output":[["DOCTYPE", "a", null, "`", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM'a",
+"input":"<!DOCTYPE a SYSTEM'a",
+"output":[["DOCTYPE", "a", null, "a", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM'b",
+"input":"<!DOCTYPE a SYSTEM'b",
+"output":[["DOCTYPE", "a", null, "b", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM'y",
+"input":"<!DOCTYPE a SYSTEM'y",
+"output":[["DOCTYPE", "a", null, "y", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM'z",
+"input":"<!DOCTYPE a SYSTEM'z",
+"output":[["DOCTYPE", "a", null, "z", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM'{",
+"input":"<!DOCTYPE a SYSTEM'{",
+"output":[["DOCTYPE", "a", null, "{", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 21, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM'\\uDBC0\\uDC00",
+"input":"<!DOCTYPE a SYSTEM'\uDBC0\uDC00",
+"output":[["DOCTYPE", "a", null, "\uDBC0\uDC00", false]],
+"errors":[
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "col": 22, "line": 1 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM(",
+"input":"<!DOCTYPE a SYSTEM(",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 19 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM-",
+"input":"<!DOCTYPE a SYSTEM-",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 19 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM/",
+"input":"<!DOCTYPE a SYSTEM/",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 19 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM0",
+"input":"<!DOCTYPE a SYSTEM0",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 19 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM1",
+"input":"<!DOCTYPE a SYSTEM1",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 19 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM9",
+"input":"<!DOCTYPE a SYSTEM9",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 19 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM<",
+"input":"<!DOCTYPE a SYSTEM<",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 19 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM=",
+"input":"<!DOCTYPE a SYSTEM=",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 19 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM>",
+"input":"<!DOCTYPE a SYSTEM>",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-doctype-system-identifier", "line": 1, "col": 19 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM?",
+"input":"<!DOCTYPE a SYSTEM?",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 19 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM@",
+"input":"<!DOCTYPE a SYSTEM@",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 19 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEMA",
+"input":"<!DOCTYPE a SYSTEMA",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 19 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEMB",
+"input":"<!DOCTYPE a SYSTEMB",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 19 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEMY",
+"input":"<!DOCTYPE a SYSTEMY",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 19 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEMZ",
+"input":"<!DOCTYPE a SYSTEMZ",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 19 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM`",
+"input":"<!DOCTYPE a SYSTEM`",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 19 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEMa",
+"input":"<!DOCTYPE a SYSTEMa",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 19 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEMb",
+"input":"<!DOCTYPE a SYSTEMb",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 19 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEMy",
+"input":"<!DOCTYPE a SYSTEMy",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 19 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEMz",
+"input":"<!DOCTYPE a SYSTEMz",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 19 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM{",
+"input":"<!DOCTYPE a SYSTEM{",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 19 }
+]},
+
+{"description":"<!DOCTYPE a SYSTEM\\uDBC0\\uDC00",
+"input":"<!DOCTYPE a SYSTEM\uDBC0\uDC00",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 19 }
+]},
+
+{"description":"<!DOCTYPE a Y",
+"input":"<!DOCTYPE a Y",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a Z",
+"input":"<!DOCTYPE a Z",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a `",
+"input":"<!DOCTYPE a `",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a a",
+"input":"<!DOCTYPE a a",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a a\\u0000",
+"input":"<!DOCTYPE a a\u0000",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 },
+    { "code": "unexpected-null-character", "line": 1, "col": 14 }
+]},
+
+{"description":"<!DOCTYPE a a\\u0009",
+"input":"<!DOCTYPE a a\u0009",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a a\\u000A",
+"input":"<!DOCTYPE a a\u000A",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a a\\u000B",
+"input":"<!DOCTYPE a a\u000B",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 },
+    { "code": "control-character-in-input-stream", "line": 1, "col": 14 }
+]},
+
+{"description":"<!DOCTYPE a a\\u000C",
+"input":"<!DOCTYPE a a\u000C",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a a ",
+"input":"<!DOCTYPE a a ",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a a!",
+"input":"<!DOCTYPE a a!",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a a\"",
+"input":"<!DOCTYPE a a\"",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a a&",
+"input":"<!DOCTYPE a a&",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a a'",
+"input":"<!DOCTYPE a a'",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a a-",
+"input":"<!DOCTYPE a a-",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a a/",
+"input":"<!DOCTYPE a a/",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a a0",
+"input":"<!DOCTYPE a a0",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a a1",
+"input":"<!DOCTYPE a a1",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a a9",
+"input":"<!DOCTYPE a a9",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a a<",
+"input":"<!DOCTYPE a a<",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a a=",
+"input":"<!DOCTYPE a a=",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a a>",
+"input":"<!DOCTYPE a a>",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a a?",
+"input":"<!DOCTYPE a a?",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a a@",
+"input":"<!DOCTYPE a a@",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a aA",
+"input":"<!DOCTYPE a aA",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a aB",
+"input":"<!DOCTYPE a aB",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a aY",
+"input":"<!DOCTYPE a aY",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a aZ",
+"input":"<!DOCTYPE a aZ",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a a`",
+"input":"<!DOCTYPE a a`",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a aa",
+"input":"<!DOCTYPE a aa",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a ab",
+"input":"<!DOCTYPE a ab",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a ay",
+"input":"<!DOCTYPE a ay",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a az",
+"input":"<!DOCTYPE a az",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a a{",
+"input":"<!DOCTYPE a a{",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a a\\uDBC0\\uDC00",
+"input":"<!DOCTYPE a a\uDBC0\uDC00",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a b",
+"input":"<!DOCTYPE a b",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a y",
+"input":"<!DOCTYPE a y",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a z",
+"input":"<!DOCTYPE a z",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a {",
+"input":"<!DOCTYPE a {",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a \\uDBC0\\uDC00",
+"input":"<!DOCTYPE a \uDBC0\uDC00",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a!",
+"input":"<!DOCTYPE a!",
+"output":[["DOCTYPE", "a!", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a\"",
+"input":"<!DOCTYPE a\"",
+"output":[["DOCTYPE", "a\"", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a&",
+"input":"<!DOCTYPE a&",
+"output":[["DOCTYPE", "a&", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a'",
+"input":"<!DOCTYPE a'",
+"output":[["DOCTYPE", "a'", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a-",
+"input":"<!DOCTYPE a-",
+"output":[["DOCTYPE", "a-", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a/",
+"input":"<!DOCTYPE a/",
+"output":[["DOCTYPE", "a/", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a0",
+"input":"<!DOCTYPE a0",
+"output":[["DOCTYPE", "a0", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a1",
+"input":"<!DOCTYPE a1",
+"output":[["DOCTYPE", "a1", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a9",
+"input":"<!DOCTYPE a9",
+"output":[["DOCTYPE", "a9", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a<",
+"input":"<!DOCTYPE a<",
+"output":[["DOCTYPE", "a<", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a=",
+"input":"<!DOCTYPE a=",
+"output":[["DOCTYPE", "a=", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a>",
+"input":"<!DOCTYPE a>",
+"output":[["DOCTYPE", "a", null, null, true]]},
+
+{"description":"<!DOCTYPE a?",
+"input":"<!DOCTYPE a?",
+"output":[["DOCTYPE", "a?", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a@",
+"input":"<!DOCTYPE a@",
+"output":[["DOCTYPE", "a@", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE aA",
+"input":"<!DOCTYPE aA",
+"output":[["DOCTYPE", "aa", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE aB",
+"input":"<!DOCTYPE aB",
+"output":[["DOCTYPE", "ab", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE aY",
+"input":"<!DOCTYPE aY",
+"output":[["DOCTYPE", "ay", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE aZ",
+"input":"<!DOCTYPE aZ",
+"output":[["DOCTYPE", "az", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a[",
+"input":"<!DOCTYPE a[",
+"output":[["DOCTYPE", "a[", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a`",
+"input":"<!DOCTYPE a`",
+"output":[["DOCTYPE", "a`", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE aa",
+"input":"<!DOCTYPE aa",
+"output":[["DOCTYPE", "aa", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE ab",
+"input":"<!DOCTYPE ab",
+"output":[["DOCTYPE", "ab", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE ay",
+"input":"<!DOCTYPE ay",
+"output":[["DOCTYPE", "ay", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE az",
+"input":"<!DOCTYPE az",
+"output":[["DOCTYPE", "az", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a{",
+"input":"<!DOCTYPE a{",
+"output":[["DOCTYPE", "a{", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE a\\uDBC0\\uDC00",
+"input":"<!DOCTYPE a\uDBC0\uDC00",
+"output":[["DOCTYPE", "a\uDBC0\uDC00", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 14 }
+]},
+
+{"description":"<!DOCTYPE b",
+"input":"<!DOCTYPE b",
+"output":[["DOCTYPE", "b", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPE y",
+"input":"<!DOCTYPE y",
+"output":[["DOCTYPE", "y", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPE z",
+"input":"<!DOCTYPE z",
+"output":[["DOCTYPE", "z", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPE {",
+"input":"<!DOCTYPE {",
+"output":[["DOCTYPE", "{", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPE \\uDBC0\\uDC00",
+"input":"<!DOCTYPE \uDBC0\uDC00",
+"output":[["DOCTYPE", "\uDBC0\uDC00", null, null, false]],
+"errors":[
+   { "code": "eof-in-doctype", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPE!",
+"input":"<!DOCTYPE!",
+"output":[["DOCTYPE", "!", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 11 }
+]},
+
+{"description":"<!DOCTYPE\"",
+"input":"<!DOCTYPE\"",
+"output":[["DOCTYPE", "\"", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 11 }
+]},
+
+{"description":"<!DOCTYPE&",
+"input":"<!DOCTYPE&",
+"output":[["DOCTYPE", "&", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 11 }
+]},
+
+{"description":"<!DOCTYPE'",
+"input":"<!DOCTYPE'",
+"output":[["DOCTYPE", "'", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 11 }
+]},
+
+{"description":"<!DOCTYPE-",
+"input":"<!DOCTYPE-",
+"output":[["DOCTYPE", "-", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 11 }
+]},
+
+{"description":"<!DOCTYPE/",
+"input":"<!DOCTYPE/",
+"output":[["DOCTYPE", "/", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 11 }
+]},
+
+{"description":"<!DOCTYPE0",
+"input":"<!DOCTYPE0",
+"output":[["DOCTYPE", "0", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 11 }
+]},
+
+{"description":"<!DOCTYPE1",
+"input":"<!DOCTYPE1",
+"output":[["DOCTYPE", "1", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 11 }
+]},
+
+{"description":"<!DOCTYPE9",
+"input":"<!DOCTYPE9",
+"output":[["DOCTYPE", "9", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 11 }
+]},
+
+{"description":"<!DOCTYPE<",
+"input":"<!DOCTYPE<",
+"output":[["DOCTYPE", "<", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 11 }
+]},
+
+{"description":"<!DOCTYPE=",
+"input":"<!DOCTYPE=",
+"output":[["DOCTYPE", "=", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 11 }
+]},
+
+{"description":"<!DOCTYPE>",
+"input":"<!DOCTYPE>",
+"output":[["DOCTYPE", null, null, null, false]],
+"errors":[
+    { "code": "missing-doctype-name", "line": 1, "col": 10 }
+]},
+
+{"description":"<!DOCTYPE?",
+"input":"<!DOCTYPE?",
+"output":[["DOCTYPE", "?", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 11 }
+]},
+
+{"description":"<!DOCTYPE@",
+"input":"<!DOCTYPE@",
+"output":[["DOCTYPE", "@", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 11 }
+]},
+
+{"description":"<!DOCTYPEA",
+"input":"<!DOCTYPEA",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 11 }
+]},
+
+{"description":"<!DOCTYPEB",
+"input":"<!DOCTYPEB",
+"output":[["DOCTYPE", "b", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 11 }
+]},
+
+{"description":"<!DOCTYPEY",
+"input":"<!DOCTYPEY",
+"output":[["DOCTYPE", "y", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 11 }
+]},
+
+{"description":"<!DOCTYPEZ",
+"input":"<!DOCTYPEZ",
+"output":[["DOCTYPE", "z", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 11 }
+]},
+
+{"description":"<!DOCTYPE`",
+"input":"<!DOCTYPE`",
+"output":[["DOCTYPE", "`", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 11 }
+]},
+
+{"description":"<!DOCTYPEa",
+"input":"<!DOCTYPEa",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 11 }
+]},
+
+{"description":"<!DOCTYPEa\\u0000",
+"input":"<!DOCTYPEa\u0000",
+"output":[["DOCTYPE", "a\uFFFD", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "unexpected-null-character", "line": 1, "col": 11 },
+    { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa\\u0008",
+"input":"<!DOCTYPEa\u0008",
+"output":[["DOCTYPE", "a\u0008", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "control-character-in-input-stream", "line": 1, "col": 11 },
+    { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa\\u0009",
+"input":"<!DOCTYPEa\u0009",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa\\u000A",
+"input":"<!DOCTYPEa\u000A",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 2, "col": 1 }
+]},
+
+{"description":"<!DOCTYPEa\\u000B",
+"input":"<!DOCTYPEa\u000B",
+"output":[["DOCTYPE", "a\u000B", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "control-character-in-input-stream", "line": 1, "col": 11 },
+    { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa\\u000C",
+"input":"<!DOCTYPEa\u000C",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa\\u000D",
+"input":"<!DOCTYPEa\u000D",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 2, "col": 1 }
+]},
+
+{"description":"<!DOCTYPEa\\u001F",
+"input":"<!DOCTYPEa\u001F",
+"output":[["DOCTYPE", "a\u001F", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "control-character-in-input-stream", "line": 1, "col": 11 },
+    { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa ",
+"input":"<!DOCTYPEa ",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa \\u0000",
+"input":"<!DOCTYPEa \u0000",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 },
+    { "code": "unexpected-null-character", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa \\u0008",
+"input":"<!DOCTYPEa \u0008",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "control-character-in-input-stream", "line": 1, "col": 12 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa \\u0009",
+"input":"<!DOCTYPEa \u0009",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPEa \\u000A",
+"input":"<!DOCTYPEa \u000A",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 2, "col": 1 }
+]},
+
+{"description":"<!DOCTYPEa \\u000B",
+"input":"<!DOCTYPEa \u000B",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "control-character-in-input-stream", "line": 1, "col": 12 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa \\u000C",
+"input":"<!DOCTYPEa \u000C",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPEa \\u000D",
+"input":"<!DOCTYPEa \u000D",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 2, "col": 1 }
+]},
+
+{"description":"<!DOCTYPEa \\u001F",
+"input":"<!DOCTYPEa \u001F",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "control-character-in-input-stream", "line": 1, "col": 12 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa  ",
+"input":"<!DOCTYPEa  ",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPEa !",
+"input":"<!DOCTYPEa !",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa \"",
+"input":"<!DOCTYPEa \"",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa &",
+"input":"<!DOCTYPEa &",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa '",
+"input":"<!DOCTYPEa '",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa -",
+"input":"<!DOCTYPEa -",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa /",
+"input":"<!DOCTYPEa /",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa 0",
+"input":"<!DOCTYPEa 0",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa 1",
+"input":"<!DOCTYPEa 1",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa 9",
+"input":"<!DOCTYPEa 9",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa <",
+"input":"<!DOCTYPEa <",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa =",
+"input":"<!DOCTYPEa =",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa >",
+"input":"<!DOCTYPEa >",
+"output":[["DOCTYPE", "a", null, null, true]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 }
+]},
+
+{"description":"<!DOCTYPEa ?",
+"input":"<!DOCTYPEa ?",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa @",
+"input":"<!DOCTYPEa @",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa A",
+"input":"<!DOCTYPEa A",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa B",
+"input":"<!DOCTYPEa B",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC",
+"input":"<!DOCTYPEa PUBLIC",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC\\u0000",
+"input":"<!DOCTYPEa PUBLIC\u0000",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-public-identifier", "line": 1, "col": 18 },
+    { "code": "unexpected-null-character", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC\\u0008",
+"input":"<!DOCTYPEa PUBLIC\u0008",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "control-character-in-input-stream", "line": 1, "col": 18 },
+    { "code": "missing-quote-before-doctype-public-identifier", "line": 1, "col": 18 }
+
+]},
+
+{"description":"<!DOCTYPEa PUBLIC\\u0009",
+"input":"<!DOCTYPEa PUBLIC\u0009",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 19 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC\\u000A",
+"input":"<!DOCTYPEa PUBLIC\u000A",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 2, "col": 1 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC\\u000B",
+"input":"<!DOCTYPEa PUBLIC\u000B",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "control-character-in-input-stream", "line": 1, "col": 18 },
+    { "code": "missing-quote-before-doctype-public-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC\\u000C",
+"input":"<!DOCTYPEa PUBLIC\u000C",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 19 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC\\u000D",
+"input":"<!DOCTYPEa PUBLIC\u000D",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 2, "col": 1 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC\\u001F",
+"input":"<!DOCTYPEa PUBLIC\u001F",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "control-character-in-input-stream", "line": 1, "col": 18 },
+    { "code": "missing-quote-before-doctype-public-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC ",
+"input":"<!DOCTYPEa PUBLIC ",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 19 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC!",
+"input":"<!DOCTYPEa PUBLIC!",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-public-identifier", "line": 1, "col": 18 }
+
+]},
+
+{"description":"<!DOCTYPEa PUBLIC\"",
+"input":"<!DOCTYPEa PUBLIC\"",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 19 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC\"\\u0000",
+"input":"<!DOCTYPEa PUBLIC\"\u0000",
+"output":[["DOCTYPE", "a", "\uFFFD", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "unexpected-null-character", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC\"\\u0009",
+"input":"<!DOCTYPEa PUBLIC\"\u0009",
+"output":[["DOCTYPE", "a", "\u0009", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC\"\\u000A",
+"input":"<!DOCTYPEa PUBLIC\"\u000A",
+"output":[["DOCTYPE", "a", "\u000A", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 2, "col": 1 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC\"\\u000B",
+"input":"<!DOCTYPEa PUBLIC\"\u000B",
+"output":[["DOCTYPE", "a", "\u000B", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "control-character-in-input-stream", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC\"\\u000C",
+"input":"<!DOCTYPEa PUBLIC\"\u000C",
+"output":[["DOCTYPE", "a", "\u000C", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC\" ",
+"input":"<!DOCTYPEa PUBLIC\" ",
+"output":[["DOCTYPE", "a", " ", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC\"!",
+"input":"<!DOCTYPEa PUBLIC\"!",
+"output":[["DOCTYPE", "a", "!", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC\"\"",
+"input":"<!DOCTYPEa PUBLIC\"\"",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC\"#",
+"input":"<!DOCTYPEa PUBLIC\"#",
+"output":[["DOCTYPE", "a", "#", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC\"&",
+"input":"<!DOCTYPEa PUBLIC\"&",
+"output":[["DOCTYPE", "a", "&", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC\"'",
+"input":"<!DOCTYPEa PUBLIC\"'",
+"output":[["DOCTYPE", "a", "'", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC\"-",
+"input":"<!DOCTYPEa PUBLIC\"-",
+"output":[["DOCTYPE", "a", "-", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC\"/",
+"input":"<!DOCTYPEa PUBLIC\"/",
+"output":[["DOCTYPE", "a", "/", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC\"0",
+"input":"<!DOCTYPEa PUBLIC\"0",
+"output":[["DOCTYPE", "a", "0", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC\"1",
+"input":"<!DOCTYPEa PUBLIC\"1",
+"output":[["DOCTYPE", "a", "1", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC\"9",
+"input":"<!DOCTYPEa PUBLIC\"9",
+"output":[["DOCTYPE", "a", "9", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC\"<",
+"input":"<!DOCTYPEa PUBLIC\"<",
+"output":[["DOCTYPE", "a", "<", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC\"=",
+"input":"<!DOCTYPEa PUBLIC\"=",
+"output":[["DOCTYPE", "a", "=", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC\">",
+"input":"<!DOCTYPEa PUBLIC\">",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "abrupt-doctype-public-identifier", "line": 1, "col": 19 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC\"?",
+"input":"<!DOCTYPEa PUBLIC\"?",
+"output":[["DOCTYPE", "a", "?", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC\"@",
+"input":"<!DOCTYPEa PUBLIC\"@",
+"output":[["DOCTYPE", "a", "@", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC\"A",
+"input":"<!DOCTYPEa PUBLIC\"A",
+"output":[["DOCTYPE", "a", "A", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC\"B",
+"input":"<!DOCTYPEa PUBLIC\"B",
+"output":[["DOCTYPE", "a", "B", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC\"Y",
+"input":"<!DOCTYPEa PUBLIC\"Y",
+"output":[["DOCTYPE", "a", "Y", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC\"Z",
+"input":"<!DOCTYPEa PUBLIC\"Z",
+"output":[["DOCTYPE", "a", "Z", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC\"`",
+"input":"<!DOCTYPEa PUBLIC\"`",
+"output":[["DOCTYPE", "a", "`", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC\"a",
+"input":"<!DOCTYPEa PUBLIC\"a",
+"output":[["DOCTYPE", "a", "a", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC\"b",
+"input":"<!DOCTYPEa PUBLIC\"b",
+"output":[["DOCTYPE", "a", "b", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC\"y",
+"input":"<!DOCTYPEa PUBLIC\"y",
+"output":[["DOCTYPE", "a", "y", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC\"z",
+"input":"<!DOCTYPEa PUBLIC\"z",
+"output":[["DOCTYPE", "a", "z", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC\"{",
+"input":"<!DOCTYPEa PUBLIC\"{",
+"output":[["DOCTYPE", "a", "{", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC\"\\uDBC0\\uDC00",
+"input":"<!DOCTYPEa PUBLIC\"\uDBC0\uDC00",
+"output":[["DOCTYPE", "a", "\uDBC0\uDC00", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 21 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC#",
+"input":"<!DOCTYPEa PUBLIC#",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-public-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC&",
+"input":"<!DOCTYPEa PUBLIC&",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-public-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC'",
+"input":"<!DOCTYPEa PUBLIC'",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 19 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC'\\u0000",
+"input":"<!DOCTYPEa PUBLIC'\u0000",
+"output":[["DOCTYPE", "a", "\uFFFD", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "unexpected-null-character", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC'\\u0009",
+"input":"<!DOCTYPEa PUBLIC'\u0009",
+"output":[["DOCTYPE", "a", "\u0009", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC'\\u000A",
+"input":"<!DOCTYPEa PUBLIC'\u000A",
+"output":[["DOCTYPE", "a", "\u000A", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 2, "col": 1 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC'\\u000B",
+"input":"<!DOCTYPEa PUBLIC'\u000B",
+"output":[["DOCTYPE", "a", "\u000B", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "control-character-in-input-stream", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC'\\u000C",
+"input":"<!DOCTYPEa PUBLIC'\u000C",
+"output":[["DOCTYPE", "a", "\u000C", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC' ",
+"input":"<!DOCTYPEa PUBLIC' ",
+"output":[["DOCTYPE", "a", " ", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC'!",
+"input":"<!DOCTYPEa PUBLIC'!",
+"output":[["DOCTYPE", "a", "!", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC'\"",
+"input":"<!DOCTYPEa PUBLIC'\"",
+"output":[["DOCTYPE", "a", "\"", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC'&",
+"input":"<!DOCTYPEa PUBLIC'&",
+"output":[["DOCTYPE", "a", "&", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC''",
+"input":"<!DOCTYPEa PUBLIC''",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC''\\u0000",
+"input":"<!DOCTYPEa PUBLIC''\u0000",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 20 },
+    { "code": "unexpected-null-character", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC''\\u0008",
+"input":"<!DOCTYPEa PUBLIC''\u0008",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "control-character-in-input-stream", "line": 1, "col": 20 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC''\\u0009",
+"input":"<!DOCTYPEa PUBLIC''\u0009",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 21 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC''\\u000A",
+"input":"<!DOCTYPEa PUBLIC''\u000A",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 2, "col": 1 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC''\\u000B",
+"input":"<!DOCTYPEa PUBLIC''\u000B",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "control-character-in-input-stream", "line": 1, "col": 20 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC''\\u000C",
+"input":"<!DOCTYPEa PUBLIC''\u000C",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 21 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC''\\u000D",
+"input":"<!DOCTYPEa PUBLIC''\u000D",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 2, "col": 1 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC''\\u001F",
+"input":"<!DOCTYPEa PUBLIC''\u001F",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "control-character-in-input-stream", "line": 1, "col": 20 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC'' ",
+"input":"<!DOCTYPEa PUBLIC'' ",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 21 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC''!",
+"input":"<!DOCTYPEa PUBLIC''!",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC''\"",
+"input":"<!DOCTYPEa PUBLIC''\"",
+"output":[["DOCTYPE", "a", "", "", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "missing-whitespace-between-doctype-public-and-system-identifiers", "line": 1, "col": 20 },
+    { "code": "eof-in-doctype", "line": 1, "col": 21 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC''#",
+"input":"<!DOCTYPEa PUBLIC''#",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC''&",
+"input":"<!DOCTYPEa PUBLIC''&",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC'''",
+"input":"<!DOCTYPEa PUBLIC'''",
+"output":[["DOCTYPE", "a", "", "", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "missing-whitespace-between-doctype-public-and-system-identifiers", "line": 1, "col": 20 },
+    { "code": "eof-in-doctype", "line": 1, "col": 21 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC''(",
+"input":"<!DOCTYPEa PUBLIC''(",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC''-",
+"input":"<!DOCTYPEa PUBLIC''-",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC''/",
+"input":"<!DOCTYPEa PUBLIC''/",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC''0",
+"input":"<!DOCTYPEa PUBLIC''0",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC''1",
+"input":"<!DOCTYPEa PUBLIC''1",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC''9",
+"input":"<!DOCTYPEa PUBLIC''9",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC''<",
+"input":"<!DOCTYPEa PUBLIC''<",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC''=",
+"input":"<!DOCTYPEa PUBLIC''=",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC''>",
+"input":"<!DOCTYPEa PUBLIC''>",
+"output":[["DOCTYPE", "a", "", null, true]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC''?",
+"input":"<!DOCTYPEa PUBLIC''?",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC''@",
+"input":"<!DOCTYPEa PUBLIC''@",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC''A",
+"input":"<!DOCTYPEa PUBLIC''A",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC''B",
+"input":"<!DOCTYPEa PUBLIC''B",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC''Y",
+"input":"<!DOCTYPEa PUBLIC''Y",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC''Z",
+"input":"<!DOCTYPEa PUBLIC''Z",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC''`",
+"input":"<!DOCTYPEa PUBLIC''`",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC''a",
+"input":"<!DOCTYPEa PUBLIC''a",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC''b",
+"input":"<!DOCTYPEa PUBLIC''b",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC''y",
+"input":"<!DOCTYPEa PUBLIC''y",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC''z",
+"input":"<!DOCTYPEa PUBLIC''z",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC''{",
+"input":"<!DOCTYPEa PUBLIC''{",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC''\\uDBC0\\uDC00",
+"input":"<!DOCTYPEa PUBLIC''\uDBC0\uDC00",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC'(",
+"input":"<!DOCTYPEa PUBLIC'(",
+"output":[["DOCTYPE", "a", "(", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC'-",
+"input":"<!DOCTYPEa PUBLIC'-",
+"output":[["DOCTYPE", "a", "-", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC'/",
+"input":"<!DOCTYPEa PUBLIC'/",
+"output":[["DOCTYPE", "a", "/", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC'0",
+"input":"<!DOCTYPEa PUBLIC'0",
+"output":[["DOCTYPE", "a", "0", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC'1",
+"input":"<!DOCTYPEa PUBLIC'1",
+"output":[["DOCTYPE", "a", "1", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC'9",
+"input":"<!DOCTYPEa PUBLIC'9",
+"output":[["DOCTYPE", "a", "9", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC'<",
+"input":"<!DOCTYPEa PUBLIC'<",
+"output":[["DOCTYPE", "a", "<", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC'=",
+"input":"<!DOCTYPEa PUBLIC'=",
+"output":[["DOCTYPE", "a", "=", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC'>",
+"input":"<!DOCTYPEa PUBLIC'>",
+"output":[["DOCTYPE", "a", "", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "abrupt-doctype-public-identifier", "line": 1, "col": 19 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC'?",
+"input":"<!DOCTYPEa PUBLIC'?",
+"output":[["DOCTYPE", "a", "?", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC'@",
+"input":"<!DOCTYPEa PUBLIC'@",
+"output":[["DOCTYPE", "a", "@", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC'A",
+"input":"<!DOCTYPEa PUBLIC'A",
+"output":[["DOCTYPE", "a", "A", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC'B",
+"input":"<!DOCTYPEa PUBLIC'B",
+"output":[["DOCTYPE", "a", "B", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC'Y",
+"input":"<!DOCTYPEa PUBLIC'Y",
+"output":[["DOCTYPE", "a", "Y", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC'Z",
+"input":"<!DOCTYPEa PUBLIC'Z",
+"output":[["DOCTYPE", "a", "Z", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC'`",
+"input":"<!DOCTYPEa PUBLIC'`",
+"output":[["DOCTYPE", "a", "`", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC'a",
+"input":"<!DOCTYPEa PUBLIC'a",
+"output":[["DOCTYPE", "a", "a", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC'b",
+"input":"<!DOCTYPEa PUBLIC'b",
+"output":[["DOCTYPE", "a", "b", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC'y",
+"input":"<!DOCTYPEa PUBLIC'y",
+"output":[["DOCTYPE", "a", "y", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC'z",
+"input":"<!DOCTYPEa PUBLIC'z",
+"output":[["DOCTYPE", "a", "z", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC'{",
+"input":"<!DOCTYPEa PUBLIC'{",
+"output":[["DOCTYPE", "a", "{", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC'\\uDBC0\\uDC00",
+"input":"<!DOCTYPEa PUBLIC'\uDBC0\uDC00",
+"output":[["DOCTYPE", "a", "\uDBC0\uDC00", null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 21 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC(",
+"input":"<!DOCTYPEa PUBLIC(",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-public-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC-",
+"input":"<!DOCTYPEa PUBLIC-",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-public-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC/",
+"input":"<!DOCTYPEa PUBLIC/",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-public-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC0",
+"input":"<!DOCTYPEa PUBLIC0",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-public-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC1",
+"input":"<!DOCTYPEa PUBLIC1",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-public-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC9",
+"input":"<!DOCTYPEa PUBLIC9",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-public-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC<",
+"input":"<!DOCTYPEa PUBLIC<",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-public-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC=",
+"input":"<!DOCTYPEa PUBLIC=",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-public-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC>",
+"input":"<!DOCTYPEa PUBLIC>",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-doctype-public-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC?",
+"input":"<!DOCTYPEa PUBLIC?",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-public-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC@",
+"input":"<!DOCTYPEa PUBLIC@",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-public-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa PUBLICA",
+"input":"<!DOCTYPEa PUBLICA",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-public-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa PUBLICB",
+"input":"<!DOCTYPEa PUBLICB",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-public-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa PUBLICY",
+"input":"<!DOCTYPEa PUBLICY",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-public-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa PUBLICZ",
+"input":"<!DOCTYPEa PUBLICZ",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-public-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC`",
+"input":"<!DOCTYPEa PUBLIC`",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-public-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa PUBLICa",
+"input":"<!DOCTYPEa PUBLICa",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-public-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa PUBLICb",
+"input":"<!DOCTYPEa PUBLICb",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-public-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa PUBLICy",
+"input":"<!DOCTYPEa PUBLICy",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-public-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa PUBLICz",
+"input":"<!DOCTYPEa PUBLICz",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-public-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC{",
+"input":"<!DOCTYPEa PUBLIC{",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-public-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa PUBLIC\\uDBC0\\uDC00",
+"input":"<!DOCTYPEa PUBLIC\uDBC0\uDC00",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-public-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM",
+"input":"<!DOCTYPEa SYSTEM",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM\\u0000",
+"input":"<!DOCTYPEa SYSTEM\u0000",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 18 },
+    { "code": "unexpected-null-character", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM\\u0008",
+"input":"<!DOCTYPEa SYSTEM\u0008",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "control-character-in-input-stream", "line": 1, "col": 18 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM\\u0009",
+"input":"<!DOCTYPEa SYSTEM\u0009",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 19 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM\\u000A",
+"input":"<!DOCTYPEa SYSTEM\u000A",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 2, "col": 1 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM\\u000B",
+"input":"<!DOCTYPEa SYSTEM\u000B",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "control-character-in-input-stream", "line": 1, "col": 18 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM\\u000C",
+"input":"<!DOCTYPEa SYSTEM\u000C",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 19 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM\\u000D",
+"input":"<!DOCTYPEa SYSTEM\u000D",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 2, "col": 1 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM\\u001F",
+"input":"<!DOCTYPEa SYSTEM\u001F",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "control-character-in-input-stream", "line": 1, "col": 18 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM ",
+"input":"<!DOCTYPEa SYSTEM ",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 19 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM!",
+"input":"<!DOCTYPEa SYSTEM!",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM\"",
+"input":"<!DOCTYPEa SYSTEM\"",
+"output":[["DOCTYPE", "a", null, "", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 19 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM\"\\u0000",
+"input":"<!DOCTYPEa SYSTEM\"\u0000",
+"output":[["DOCTYPE", "a", null, "\uFFFD", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "unexpected-null-character", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM\"\\u0009",
+"input":"<!DOCTYPEa SYSTEM\"\u0009",
+"output":[["DOCTYPE", "a", null, "\u0009", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM\"\\u000A",
+"input":"<!DOCTYPEa SYSTEM\"\u000A",
+"output":[["DOCTYPE", "a", null, "\u000A", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 2, "col": 1 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM\"\\u000B",
+"input":"<!DOCTYPEa SYSTEM\"\u000B",
+"output":[["DOCTYPE", "a", null, "\u000B", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "control-character-in-input-stream", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM\"\\u000C",
+"input":"<!DOCTYPEa SYSTEM\"\u000C",
+"output":[["DOCTYPE", "a", null, "\u000C", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM\" ",
+"input":"<!DOCTYPEa SYSTEM\" ",
+"output":[["DOCTYPE", "a", null, " ", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM\"!",
+"input":"<!DOCTYPEa SYSTEM\"!",
+"output":[["DOCTYPE", "a", null, "!", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM\"\"",
+"input":"<!DOCTYPEa SYSTEM\"\"",
+"output":[["DOCTYPE", "a", null, "", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM\"#",
+"input":"<!DOCTYPEa SYSTEM\"#",
+"output":[["DOCTYPE", "a", null, "#", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM\"&",
+"input":"<!DOCTYPEa SYSTEM\"&",
+"output":[["DOCTYPE", "a", null, "&", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM\"'",
+"input":"<!DOCTYPEa SYSTEM\"'",
+"output":[["DOCTYPE", "a", null, "'", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM\"-",
+"input":"<!DOCTYPEa SYSTEM\"-",
+"output":[["DOCTYPE", "a", null, "-", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM\"/",
+"input":"<!DOCTYPEa SYSTEM\"/",
+"output":[["DOCTYPE", "a", null, "/", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM\"0",
+"input":"<!DOCTYPEa SYSTEM\"0",
+"output":[["DOCTYPE", "a", null, "0", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM\"1",
+"input":"<!DOCTYPEa SYSTEM\"1",
+"output":[["DOCTYPE", "a", null, "1", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM\"9",
+"input":"<!DOCTYPEa SYSTEM\"9",
+"output":[["DOCTYPE", "a", null, "9", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM\"<",
+"input":"<!DOCTYPEa SYSTEM\"<",
+"output":[["DOCTYPE", "a", null, "<", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM\"=",
+"input":"<!DOCTYPEa SYSTEM\"=",
+"output":[["DOCTYPE", "a", null, "=", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+
+]},
+
+{"description":"<!DOCTYPEa SYSTEM\">",
+"input":"<!DOCTYPEa SYSTEM\">",
+"output":[["DOCTYPE", "a", null, "", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "abrupt-doctype-system-identifier", "line": 1, "col": 19 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM\"?",
+"input":"<!DOCTYPEa SYSTEM\"?",
+"output":[["DOCTYPE", "a", null, "?", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM\"@",
+"input":"<!DOCTYPEa SYSTEM\"@",
+"output":[["DOCTYPE", "a", null, "@", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM\"A",
+"input":"<!DOCTYPEa SYSTEM\"A",
+"output":[["DOCTYPE", "a", null, "A", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM\"B",
+"input":"<!DOCTYPEa SYSTEM\"B",
+"output":[["DOCTYPE", "a", null, "B", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM\"Y",
+"input":"<!DOCTYPEa SYSTEM\"Y",
+"output":[["DOCTYPE", "a", null, "Y", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM\"Z",
+"input":"<!DOCTYPEa SYSTEM\"Z",
+"output":[["DOCTYPE", "a", null, "Z", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM\"`",
+"input":"<!DOCTYPEa SYSTEM\"`",
+"output":[["DOCTYPE", "a", null, "`", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM\"a",
+"input":"<!DOCTYPEa SYSTEM\"a",
+"output":[["DOCTYPE", "a", null, "a", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM\"b",
+"input":"<!DOCTYPEa SYSTEM\"b",
+"output":[["DOCTYPE", "a", null, "b", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM\"y",
+"input":"<!DOCTYPEa SYSTEM\"y",
+"output":[["DOCTYPE", "a", null, "y", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM\"z",
+"input":"<!DOCTYPEa SYSTEM\"z",
+"output":[["DOCTYPE", "a", null, "z", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM\"{",
+"input":"<!DOCTYPEa SYSTEM\"{",
+"output":[["DOCTYPE", "a", null, "{", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM\"\\uDBC0\\uDC00",
+"input":"<!DOCTYPEa SYSTEM\"\uDBC0\uDC00",
+"output":[["DOCTYPE", "a", null, "\uDBC0\uDC00", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 21 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM#",
+"input":"<!DOCTYPEa SYSTEM#",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM&",
+"input":"<!DOCTYPEa SYSTEM&",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM'",
+"input":"<!DOCTYPEa SYSTEM'",
+"output":[["DOCTYPE", "a", null, "", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 19 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM'\\u0000",
+"input":"<!DOCTYPEa SYSTEM'\u0000",
+"output":[["DOCTYPE", "a", null, "\uFFFD", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "unexpected-null-character", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM'\\u0009",
+"input":"<!DOCTYPEa SYSTEM'\u0009",
+"output":[["DOCTYPE", "a", null, "\u0009", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM'\\u000A",
+"input":"<!DOCTYPEa SYSTEM'\u000A",
+"output":[["DOCTYPE", "a", null, "\u000A", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 2, "col": 1 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM'\\u000B",
+"input":"<!DOCTYPEa SYSTEM'\u000B",
+"output":[["DOCTYPE", "a", null, "\u000B", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "control-character-in-input-stream", "line": 1, "col": 19 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM'\\u000C",
+"input":"<!DOCTYPEa SYSTEM'\u000C",
+"output":[["DOCTYPE", "a", null, "\u000C", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM' ",
+"input":"<!DOCTYPEa SYSTEM' ",
+"output":[["DOCTYPE", "a", null, " ", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM'!",
+"input":"<!DOCTYPEa SYSTEM'!",
+"output":[["DOCTYPE", "a", null, "!", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM'\"",
+"input":"<!DOCTYPEa SYSTEM'\"",
+"output":[["DOCTYPE", "a", null, "\"", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM'&",
+"input":"<!DOCTYPEa SYSTEM'&",
+"output":[["DOCTYPE", "a", null, "&", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM''",
+"input":"<!DOCTYPEa SYSTEM''",
+"output":[["DOCTYPE", "a", null, "", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM''\\u0000",
+"input":"<!DOCTYPEa SYSTEM''\u0000",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "line": 1, "col": 20 },
+    { "code": "unexpected-null-character", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM''\\u0008",
+"input":"<!DOCTYPEa SYSTEM''\u0008",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "control-character-in-input-stream", "line": 1, "col": 20 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM''\\u0009",
+"input":"<!DOCTYPEa SYSTEM''\u0009",
+"output":[["DOCTYPE", "a", null, "", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 21 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM''\\u000A",
+"input":"<!DOCTYPEa SYSTEM''\u000A",
+"output":[["DOCTYPE", "a", null, "", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 2, "col": 1 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM''\\u000B",
+"input":"<!DOCTYPEa SYSTEM''\u000B",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "control-character-in-input-stream", "line": 1, "col": 20 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM''\\u000C",
+"input":"<!DOCTYPEa SYSTEM''\u000C",
+"output":[["DOCTYPE", "a", null, "", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 21 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM''\\u000D",
+"input":"<!DOCTYPEa SYSTEM''\u000D",
+"output":[["DOCTYPE", "a", null, "", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 2, "col": 1 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM''\\u001F",
+"input":"<!DOCTYPEa SYSTEM''\u001F",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "control-character-in-input-stream", "line": 1, "col": 20 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM'' ",
+"input":"<!DOCTYPEa SYSTEM'' ",
+"output":[["DOCTYPE", "a", null, "", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+     { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 21 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM''!",
+"input":"<!DOCTYPEa SYSTEM''!",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM''\"",
+"input":"<!DOCTYPEa SYSTEM''\"",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM''&",
+"input":"<!DOCTYPEa SYSTEM''&",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM'''",
+"input":"<!DOCTYPEa SYSTEM'''",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM''-",
+"input":"<!DOCTYPEa SYSTEM''-",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM''/",
+"input":"<!DOCTYPEa SYSTEM''/",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM''0",
+"input":"<!DOCTYPEa SYSTEM''0",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM''1",
+"input":"<!DOCTYPEa SYSTEM''1",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM''9",
+"input":"<!DOCTYPEa SYSTEM''9",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM''<",
+"input":"<!DOCTYPEa SYSTEM''<",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM''=",
+"input":"<!DOCTYPEa SYSTEM''=",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM''>",
+"input":"<!DOCTYPEa SYSTEM''>",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM''?",
+"input":"<!DOCTYPEa SYSTEM''?",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM''@",
+"input":"<!DOCTYPEa SYSTEM''@",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM''A",
+"input":"<!DOCTYPEa SYSTEM''A",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM''B",
+"input":"<!DOCTYPEa SYSTEM''B",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM''Y",
+"input":"<!DOCTYPEa SYSTEM''Y",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM''Z",
+"input":"<!DOCTYPEa SYSTEM''Z",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM''`",
+"input":"<!DOCTYPEa SYSTEM''`",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM''a",
+"input":"<!DOCTYPEa SYSTEM''a",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM''b",
+"input":"<!DOCTYPEa SYSTEM''b",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM''y",
+"input":"<!DOCTYPEa SYSTEM''y",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM''z",
+"input":"<!DOCTYPEa SYSTEM''z",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM''{",
+"input":"<!DOCTYPEa SYSTEM''{",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM''\\uDBC0\\uDC00",
+"input":"<!DOCTYPEa SYSTEM''\uDBC0\uDC00",
+"output":[["DOCTYPE", "a", null, "", true]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "unexpected-character-after-doctype-system-identifier", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM'(",
+"input":"<!DOCTYPEa SYSTEM'(",
+"output":[["DOCTYPE", "a", null, "(", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM'-",
+"input":"<!DOCTYPEa SYSTEM'-",
+"output":[["DOCTYPE", "a", null, "-", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM'/",
+"input":"<!DOCTYPEa SYSTEM'/",
+"output":[["DOCTYPE", "a", null, "/", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM'0",
+"input":"<!DOCTYPEa SYSTEM'0",
+"output":[["DOCTYPE", "a", null, "0", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM'1",
+"input":"<!DOCTYPEa SYSTEM'1",
+"output":[["DOCTYPE", "a", null, "1", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM'9",
+"input":"<!DOCTYPEa SYSTEM'9",
+"output":[["DOCTYPE", "a", null, "9", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM'<",
+"input":"<!DOCTYPEa SYSTEM'<",
+"output":[["DOCTYPE", "a", null, "<", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM'=",
+"input":"<!DOCTYPEa SYSTEM'=",
+"output":[["DOCTYPE", "a", null, "=", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM'>",
+"input":"<!DOCTYPEa SYSTEM'>",
+"output":[["DOCTYPE", "a", null, "", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "abrupt-doctype-system-identifier", "line": 1, "col": 19 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM'?",
+"input":"<!DOCTYPEa SYSTEM'?",
+"output":[["DOCTYPE", "a", null, "?", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM'@",
+"input":"<!DOCTYPEa SYSTEM'@",
+"output":[["DOCTYPE", "a", null, "@", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM'A",
+"input":"<!DOCTYPEa SYSTEM'A",
+"output":[["DOCTYPE", "a", null, "A", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM'B",
+"input":"<!DOCTYPEa SYSTEM'B",
+"output":[["DOCTYPE", "a", null, "B", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM'Y",
+"input":"<!DOCTYPEa SYSTEM'Y",
+"output":[["DOCTYPE", "a", null, "Y", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM'Z",
+"input":"<!DOCTYPEa SYSTEM'Z",
+"output":[["DOCTYPE", "a", null, "Z", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM'`",
+"input":"<!DOCTYPEa SYSTEM'`",
+"output":[["DOCTYPE", "a", null, "`", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM'a",
+"input":"<!DOCTYPEa SYSTEM'a",
+"output":[["DOCTYPE", "a", null, "a", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM'b",
+"input":"<!DOCTYPEa SYSTEM'b",
+"output":[["DOCTYPE", "a", null, "b", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM'y",
+"input":"<!DOCTYPEa SYSTEM'y",
+"output":[["DOCTYPE", "a", null, "y", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM'z",
+"input":"<!DOCTYPEa SYSTEM'z",
+"output":[["DOCTYPE", "a", null, "z", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM'{",
+"input":"<!DOCTYPEa SYSTEM'{",
+"output":[["DOCTYPE", "a", null, "{", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 20 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM'\\uDBC0\\uDC00",
+"input":"<!DOCTYPEa SYSTEM'\uDBC0\uDC00",
+"output":[["DOCTYPE", "a", null, "\uDBC0\uDC00", false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 },
+    { "code": "eof-in-doctype", "line": 1, "col": 21 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM(",
+"input":"<!DOCTYPEa SYSTEM(",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM-",
+"input":"<!DOCTYPEa SYSTEM-",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM/",
+"input":"<!DOCTYPEa SYSTEM/",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM0",
+"input":"<!DOCTYPEa SYSTEM0",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM1",
+"input":"<!DOCTYPEa SYSTEM1",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM9",
+"input":"<!DOCTYPEa SYSTEM9",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM<",
+"input":"<!DOCTYPEa SYSTEM<",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM=",
+"input":"<!DOCTYPEa SYSTEM=",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM>",
+"input":"<!DOCTYPEa SYSTEM>",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-doctype-system-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM?",
+"input":"<!DOCTYPEa SYSTEM?",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM@",
+"input":"<!DOCTYPEa SYSTEM@",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEMA",
+"input":"<!DOCTYPEa SYSTEMA",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEMB",
+"input":"<!DOCTYPEa SYSTEMB",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEMY",
+"input":"<!DOCTYPEa SYSTEMY",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEMZ",
+"input":"<!DOCTYPEa SYSTEMZ",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM`",
+"input":"<!DOCTYPEa SYSTEM`",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEMa",
+"input":"<!DOCTYPEa SYSTEMa",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEMb",
+"input":"<!DOCTYPEa SYSTEMb",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEMy",
+"input":"<!DOCTYPEa SYSTEMy",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEMz",
+"input":"<!DOCTYPEa SYSTEMz",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM{",
+"input":"<!DOCTYPEa SYSTEM{",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa SYSTEM\\uDBC0\\uDC00",
+"input":"<!DOCTYPEa SYSTEM\uDBC0\uDC00",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "missing-quote-before-doctype-system-identifier", "line": 1, "col": 18 }
+]},
+
+{"description":"<!DOCTYPEa Y",
+"input":"<!DOCTYPEa Y",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa Z",
+"input":"<!DOCTYPEa Z",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa `",
+"input":"<!DOCTYPEa `",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa a",
+"input":"<!DOCTYPEa a",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa a\\u0000",
+"input":"<!DOCTYPEa a\u0000",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 },
+    { "code": "unexpected-null-character", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPEa a\\u0009",
+"input":"<!DOCTYPEa a\u0009",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa a\\u000A",
+"input":"<!DOCTYPEa a\u000A",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa a\\u000B",
+"input":"<!DOCTYPEa a\u000B",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 },
+    { "code": "control-character-in-input-stream", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPEa a\\u000C",
+"input":"<!DOCTYPEa a\u000C",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa a ",
+"input":"<!DOCTYPEa a ",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa a!",
+"input":"<!DOCTYPEa a!",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa a\"",
+"input":"<!DOCTYPEa a\"",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa a&",
+"input":"<!DOCTYPEa a&",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa a'",
+"input":"<!DOCTYPEa a'",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa a-",
+"input":"<!DOCTYPEa a-",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa a/",
+"input":"<!DOCTYPEa a/",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa a0",
+"input":"<!DOCTYPEa a0",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa a1",
+"input":"<!DOCTYPEa a1",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa a9",
+"input":"<!DOCTYPEa a9",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa a<",
+"input":"<!DOCTYPEa a<",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa a=",
+"input":"<!DOCTYPEa a=",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa a>",
+"input":"<!DOCTYPEa a>",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa a?",
+"input":"<!DOCTYPEa a?",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa a@",
+"input":"<!DOCTYPEa a@",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa aA",
+"input":"<!DOCTYPEa aA",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa aB",
+"input":"<!DOCTYPEa aB",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa aY",
+"input":"<!DOCTYPEa aY",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa aZ",
+"input":"<!DOCTYPEa aZ",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa a`",
+"input":"<!DOCTYPEa a`",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa aa",
+"input":"<!DOCTYPEa aa",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa ab",
+"input":"<!DOCTYPEa ab",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa ay",
+"input":"<!DOCTYPEa ay",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa az",
+"input":"<!DOCTYPEa az",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa a{",
+"input":"<!DOCTYPEa a{",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa a\\uDBC0\\uDC00",
+"input":"<!DOCTYPEa a\uDBC0\uDC00",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa b",
+"input":"<!DOCTYPEa b",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa y",
+"input":"<!DOCTYPEa y",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa z",
+"input":"<!DOCTYPEa z",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa {",
+"input":"<!DOCTYPEa {",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa \\uDBC0\\uDC00",
+"input":"<!DOCTYPEa \uDBC0\uDC00",
+"output":[["DOCTYPE", "a", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa!",
+"input":"<!DOCTYPEa!",
+"output":[["DOCTYPE", "a!", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa\"",
+"input":"<!DOCTYPEa\"",
+"output":[["DOCTYPE", "a\"", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa&",
+"input":"<!DOCTYPEa&",
+"output":[["DOCTYPE", "a&", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa'",
+"input":"<!DOCTYPEa'",
+"output":[["DOCTYPE", "a'", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa-",
+"input":"<!DOCTYPEa-",
+"output":[["DOCTYPE", "a-", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa/",
+"input":"<!DOCTYPEa/",
+"output":[["DOCTYPE", "a/", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa0",
+"input":"<!DOCTYPEa0",
+"output":[["DOCTYPE", "a0", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa1",
+"input":"<!DOCTYPEa1",
+"output":[["DOCTYPE", "a1", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa9",
+"input":"<!DOCTYPEa9",
+"output":[["DOCTYPE", "a9", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa<",
+"input":"<!DOCTYPEa<",
+"output":[["DOCTYPE", "a<", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa=",
+"input":"<!DOCTYPEa=",
+"output":[["DOCTYPE", "a=", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa>",
+"input":"<!DOCTYPEa>",
+"output":[["DOCTYPE", "a", null, null, true]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 }
+]},
+
+{"description":"<!DOCTYPEa?",
+"input":"<!DOCTYPEa?",
+"output":[["DOCTYPE", "a?", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa@",
+"input":"<!DOCTYPEa@",
+"output":[["DOCTYPE", "a@", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEaA",
+"input":"<!DOCTYPEaA",
+"output":[["DOCTYPE", "aa", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEaB",
+"input":"<!DOCTYPEaB",
+"output":[["DOCTYPE", "ab", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEaY",
+"input":"<!DOCTYPEaY",
+"output":[["DOCTYPE", "ay", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEaZ",
+"input":"<!DOCTYPEaZ",
+"output":[["DOCTYPE", "az", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa[",
+"input":"<!DOCTYPEa[",
+"output":[["DOCTYPE", "a[", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa`",
+"input":"<!DOCTYPEa`",
+"output":[["DOCTYPE", "a`", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEaa",
+"input":"<!DOCTYPEaa",
+"output":[["DOCTYPE", "aa", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEab",
+"input":"<!DOCTYPEab",
+"output":[["DOCTYPE", "ab", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEay",
+"input":"<!DOCTYPEay",
+"output":[["DOCTYPE", "ay", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEaz",
+"input":"<!DOCTYPEaz",
+"output":[["DOCTYPE", "az", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa{",
+"input":"<!DOCTYPEa{",
+"output":[["DOCTYPE", "a{", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!DOCTYPEa\\uDBC0\\uDC00",
+"input":"<!DOCTYPEa\uDBC0\uDC00",
+"output":[["DOCTYPE", "a\uDBC0\uDC00", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 13 }
+]},
+
+{"description":"<!DOCTYPEb",
+"input":"<!DOCTYPEb",
+"output":[["DOCTYPE", "b", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 11 }
+]},
+
+{"description":"<!DOCTYPEy",
+"input":"<!DOCTYPEy",
+"output":[["DOCTYPE", "y", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 11 }
+]},
+
+{"description":"<!DOCTYPEz",
+"input":"<!DOCTYPEz",
+"output":[["DOCTYPE", "z", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 11 }
+]},
+
+{"description":"<!DOCTYPE{",
+"input":"<!DOCTYPE{",
+"output":[["DOCTYPE", "{", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 11 }
+]},
+
+{"description":"<!DOCTYPE\\uDBC0\\uDC00",
+"input":"<!DOCTYPE\uDBC0\uDC00",
+"output":[["DOCTYPE", "\uDBC0\uDC00", null, null, false]],
+"errors":[
+    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 },
+    { "code": "eof-in-doctype", "line": 1, "col": 12 }
+]},
+
+{"description":"<!Y",
+"input":"<!Y",
+"output":[["Comment", "Y"]],
+"errors":[
+    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 }
+]},
+
+{"description":"<!Z",
+"input":"<!Z",
+"output":[["Comment", "Z"]],
+"errors":[
+    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 }
+]},
+
+{"description":"<!`",
+"input":"<!`",
+"output":[["Comment", "`"]],
+"errors":[
+    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 }
+]},
+
+{"description":"<!a",
+"input":"<!a",
+"output":[["Comment", "a"]],
+"errors":[
+    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 }
+]},
+
+{"description":"<!b",
+"input":"<!b",
+"output":[["Comment", "b"]],
+"errors":[
+    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 }
+]},
+
+{"description":"<!y",
+"input":"<!y",
+"output":[["Comment", "y"]],
+"errors":[
+    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 }
+]},
+
+{"description":"<!z",
+"input":"<!z",
+"output":[["Comment", "z"]],
+"errors":[
+    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 }
+]},
+
+{"description":"<!{",
+"input":"<!{",
+"output":[["Comment", "{"]],
+"errors":[
+    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 }
+]},
+
+{"description":"<!\\uDBC0\\uDC00",
+"input":"<!\uDBC0\uDC00",
+"output":[["Comment", "\uDBC0\uDC00"]],
+"errors":[
+    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 }
+]},
+
+{"description":"<\"",
+"input":"<\"",
+"output":[["Character", "<\""]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<&",
+"input":"<&",
+"output":[["Character", "<&"]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<'",
+"input":"<'",
+"output":[["Character", "<'"]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<-",
+"input":"<-",
+"output":[["Character", "<-"]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<.",
+"input":"<.",
+"output":[["Character", "<."]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"</",
+"input":"</",
+"output":[["Character", "</"]],
+"errors":[
+    { "code": "eof-before-tag-name", "line": 1, "col": 3 }
+]},
+
+{"description":"</\\u0000",
+"input":"</\u0000",
+"output":[["Comment", "\uFFFD"]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 3 },
+    { "code": "unexpected-null-character", "line": 1, "col": 3 }
+]},
+
+{"description":"</\\u0009",
+"input":"</\u0009",
+"output":[["Comment", "\u0009"]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 3 }
+]},
+
+{"description":"</\\u000A",
+"input":"</\u000A",
+"output":[["Comment", "\u000A"]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 3 }
+]},
+
+{"description":"</\\u000B",
+"input":"</\u000B",
+"output":[["Comment", "\u000B"]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 3 },
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 3 }
+]},
+
+{"description":"</\\u000C",
+"input":"</\u000C",
+"output":[["Comment", "\u000C"]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 3 }
+]},
+
+{"description":"</ ",
+"input":"</ ",
+"output":[["Comment", " "]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 3 }
+]},
+
+{"description":"</ \\u0000",
+"input":"</ \u0000",
+"output":[["Comment", " \uFFFD"]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 3 },
+    { "code": "unexpected-null-character", "line": 1, "col": 4 }
+]},
+
+{"description":"</!",
+"input":"</!",
+"output":[["Comment", "!"]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 3 }
+]},
+
+{"description":"</\"",
+"input":"</\"",
+"output":[["Comment", "\""]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 3 }
+]},
+
+{"description":"</&",
+"input":"</&",
+"output":[["Comment", "&"]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 3 }
+]},
+
+{"description":"</'",
+"input":"</'",
+"output":[["Comment", "'"]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 3 }
+]},
+
+{"description":"</-",
+"input":"</-",
+"output":[["Comment", "-"]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 3 }
+]},
+
+{"description":"<//",
+"input":"<//",
+"output":[["Comment", "/"]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 3 }
+]},
+
+{"description":"</0",
+"input":"</0",
+"output":[["Comment", "0"]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 3 }
+]},
+
+{"description":"</1",
+"input":"</1",
+"output":[["Comment", "1"]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 3 }
+]},
+
+{"description":"</9",
+"input":"</9",
+"output":[["Comment", "9"]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 3 }
+]},
+
+{"description":"</<",
+"input":"</<",
+"output":[["Comment", "<"]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 3 }
+]},
+
+{"description":"</=",
+"input":"</=",
+"output":[["Comment", "="]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 3 }
+]},
+
+{"description":"</>",
+"input":"</>",
+"output":[],
+"errors":[
+    { "code": "missing-end-tag-name", "line": 1, "col": 3 }
+]},
+
+{"description":"</?",
+"input":"</?",
+"output":[["Comment", "?"]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 3 }
+]},
+
+{"description":"</@",
+"input":"</@",
+"output":[["Comment", "@"]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 3 }
+]},
+
+{"description":"</A>",
+"input":"</A>",
+"output":[["EndTag", "a"]]},
+
+{"description":"</B>",
+"input":"</B>",
+"output":[["EndTag", "b"]]},
+
+{"description":"</Y>",
+"input":"</Y>",
+"output":[["EndTag", "y"]]},
+
+{"description":"</Z>",
+"input":"</Z>",
+"output":[["EndTag", "z"]]},
+
+{"description":"</[",
+"input":"</[",
+"output":[["Comment", "["]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 3 }
+]},
+
+{"description":"</`",
+"input":"</`",
+"output":[["Comment", "`"]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 3 }
+]},
+
+{"description":"</a>",
+"input":"</a>",
+"output":[["EndTag", "a"]]},
+
+{"description":"</b>",
+"input":"</b>",
+"output":[["EndTag", "b"]]},
+
+{"description":"</y>",
+"input":"</y>",
+"output":[["EndTag", "y"]]},
+
+{"description":"</z>",
+"input":"</z>",
+"output":[["EndTag", "z"]]},
+
+{"description":"</{",
+"input":"</{",
+"output":[["Comment", "{"]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 3 }
+]},
+
+{"description":"</\\uDBC0\\uDC00",
+"input":"</\uDBC0\uDC00",
+"output":[["Comment", "\uDBC0\uDC00"]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 3 }
+]},
+
+{"description":"<0",
+"input":"<0",
+"output":[["Character", "<0"]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<1",
+"input":"<1",
+"output":[["Character", "<1"]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<9",
+"input":"<9",
+"output":[["Character", "<9"]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<<",
+"input":"<<",
+"output":[["Character", "<<"]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 2 },
+    { "code": "eof-before-tag-name", "line": 1, "col": 3 }
+]},
+
+{"description":"<=",
+"input":"<=",
+"output":[["Character", "<="]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<>",
+"input":"<>",
+"output":[["Character", "<>"]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<?",
+"input":"<?",
+"output":[["Comment", "?"]],
+"errors":[
+    { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<?\\u0000",
+"input":"<?\u0000",
+"output":[["Comment", "?\uFFFD"]],
+"errors":[
+    { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 },
+    { "code": "unexpected-null-character", "line": 1, "col": 3 }
+]},
+
+{"description":"<?\\u0009",
+"input":"<?\u0009",
+"output":[["Comment", "?\u0009"]],
+"errors":[
+    { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<?\\u000A",
+"input":"<?\u000A",
+"output":[["Comment", "?\u000A"]],
+"errors":[
+    { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<?\\u000B",
+"input":"<?\u000B",
+"output":[["Comment", "?\u000B"]],
+"errors":[
+    { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 },
+    { "code": "control-character-in-input-stream", "line": 1, "col": 3 }
+]},
+
+{"description":"<?\\u000C",
+"input":"<?\u000C",
+"output":[["Comment", "?\u000C"]],
+"errors":[
+    { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<? ",
+"input":"<? ",
+"output":[["Comment", "? "]],
+"errors":[
+    { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<? \\u0000",
+"input":"<? \u0000",
+"output":[["Comment", "? \uFFFD"]],
+"errors":[
+    { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 },
+    { "code": "unexpected-null-character", "line": 1, "col": 4 }
+]},
+
+{"description":"<?!",
+"input":"<?!",
+"output":[["Comment", "?!"]],
+"errors":[
+    { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<?\"",
+"input":"<?\"",
+"output":[["Comment", "?\""]],
+"errors":[
+    { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<?&",
+"input":"<?&",
+"output":[["Comment", "?&"]],
+"errors":[
+    { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<?'",
+"input":"<?'",
+"output":[["Comment", "?'"]],
+"errors":[
+    { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<?-",
+"input":"<?-",
+"output":[["Comment", "?-"]],
+"errors":[
+    { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<?/",
+"input":"<?/",
+"output":[["Comment", "?/"]],
+"errors":[
+    { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<?0",
+"input":"<?0",
+"output":[["Comment", "?0"]],
+"errors":[
+    { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<?1",
+"input":"<?1",
+"output":[["Comment", "?1"]],
+"errors":[
+    { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<?9",
+"input":"<?9",
+"output":[["Comment", "?9"]],
+"errors":[
+    { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<?<",
+"input":"<?<",
+"output":[["Comment", "?<"]],
+"errors":[
+    { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<?=",
+"input":"<?=",
+"output":[["Comment", "?="]],
+"errors":[
+    { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<?>",
+"input":"<?>",
+"output":[["Comment", "?"]],
+"errors":[
+    { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<??",
+"input":"<??",
+"output":[["Comment", "??"]],
+"errors":[
+    { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<?@",
+"input":"<?@",
+"output":[["Comment", "?@"]],
+"errors":[
+    { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<?A",
+"input":"<?A",
+"output":[["Comment", "?A"]],
+"errors":[
+    { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<?B",
+"input":"<?B",
+"output":[["Comment", "?B"]],
+"errors":[
+    { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<?Y",
+"input":"<?Y",
+"output":[["Comment", "?Y"]],
+"errors":[
+    { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<?Z",
+"input":"<?Z",
+"output":[["Comment", "?Z"]],
+"errors":[
+    { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<?`",
+"input":"<?`",
+"output":[["Comment", "?`"]],
+"errors":[
+    { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<?a",
+"input":"<?a",
+"output":[["Comment", "?a"]],
+"errors":[
+    { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<?b",
+"input":"<?b",
+"output":[["Comment", "?b"]],
+"errors":[
+    { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<?y",
+"input":"<?y",
+"output":[["Comment", "?y"]],
+"errors":[
+    { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<?z",
+"input":"<?z",
+"output":[["Comment", "?z"]],
+"errors":[
+    { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<?{",
+"input":"<?{",
+"output":[["Comment", "?{"]],
+"errors":[
+    { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<?\\uDBC0\\uDC00",
+"input":"<?\uDBC0\uDC00",
+"output":[["Comment", "?\uDBC0\uDC00"]],
+"errors":[
+    { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<@",
+"input":"<@",
+"output":[["Character", "<@"]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<A>",
+"input":"<A>",
+"output":[["StartTag", "a", {}]]},
+
+{"description":"<B>",
+"input":"<B>",
+"output":[["StartTag", "b", {}]]},
+
+{"description":"<Y>",
+"input":"<Y>",
+"output":[["StartTag", "y", {}]]},
+
+{"description":"<Z>",
+"input":"<Z>",
+"output":[["StartTag", "z", {}]]},
+
+{"description":"<[",
+"input":"<[",
+"output":[["Character", "<["]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<`",
+"input":"<`",
+"output":[["Character", "<`"]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<a>",
+"input":"<a>",
+"output":[["StartTag", "a", {}]]},
+
+{"description":"<a\\u0000>",
+"input":"<a\u0000>",
+"output":[["StartTag", "a\uFFFD", {}]],
+"errors":[
+    { "code": "unexpected-null-character", "line": 1, "col": 3 }
+]},
+
+{"description":"<a\\u0008>",
+"input":"<a\u0008>",
+"output":[["StartTag", "a\u0008", {}]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 3 }
+]},
+
+{"description":"<a\\u0009>",
+"input":"<a\u0009>",
+"output":[["StartTag", "a", {}]]},
+
+{"description":"<a\\u000A>",
+"input":"<a\u000A>",
+"output":[["StartTag", "a", {}]]},
+
+{"description":"<a\\u000B>",
+"input":"<a\u000B>",
+"output":[["StartTag", "a\u000B", {}]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 3 }
+]},
+
+{"description":"<a\\u000C>",
+"input":"<a\u000C>",
+"output":[["StartTag", "a", {}]]},
+
+{"description":"<a\\u000D>",
+"input":"<a\u000D>",
+"output":[["StartTag", "a", {}]]},
+
+{"description":"<a\\u001F>",
+"input":"<a\u001F>",
+"output":[["StartTag", "a\u001F", {}]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 3 }
+]},
+
+{"description":"<a >",
+"input":"<a >",
+"output":[["StartTag", "a", {}]]},
+
+{"description":"<a \\u0000>",
+"input":"<a \u0000>",
+"output":[["StartTag", "a", {"\uFFFD":""}]],
+"errors":[
+    { "code": "unexpected-null-character", "line": 1, "col": 4 }
+]},
+
+{"description":"<a \\u0008>",
+"input":"<a \u0008>",
+"output":[["StartTag", "a", {"\u0008":""}]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 4 }
+]},
+
+{"description":"<a \\u0009>",
+"input":"<a \u0009>",
+"output":[["StartTag", "a", {}]]},
+
+{"description":"<a \\u000A>",
+"input":"<a \u000A>",
+"output":[["StartTag", "a", {}]]},
+
+{"description":"<a \\u000B>",
+"input":"<a \u000B>",
+"output":[["StartTag", "a", {"\u000B":""}]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 4 }
+]},
+
+{"description":"<a \\u000C>",
+"input":"<a \u000C>",
+"output":[["StartTag", "a", {}]]},
+
+{"description":"<a \\u000D>",
+"input":"<a \u000D>",
+"output":[["StartTag", "a", {}]]},
+
+{"description":"<a \\u001F>",
+"input":"<a \u001F>",
+"output":[["StartTag", "a", {"\u001F":""}]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 4 }
+]},
+
+{"description":"<a  >",
+"input":"<a  >",
+"output":[["StartTag", "a", {}]]},
+
+{"description":"<a !>",
+"input":"<a !>",
+"output":[["StartTag", "a", {"!":""}]]},
+
+{"description":"<a \">",
+"input":"<a \">",
+"output":[["StartTag", "a", {"\"":""}]],
+"errors":[
+    { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 4 }
+]},
+
+{"description":"<a #>",
+"input":"<a #>",
+"output":[["StartTag", "a", {"#":""}]]},
+
+{"description":"<a &>",
+"input":"<a &>",
+"output":[["StartTag", "a", {"&":""}]]},
+
+{"description":"<a '>",
+"input":"<a '>",
+"output":[["StartTag", "a", {"'":""}]],
+"errors":[
+    { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 4 }
+]},
+
+{"description":"<a (>",
+"input":"<a (>",
+"output":[["StartTag", "a", {"(":""}]]},
+
+{"description":"<a ->",
+"input":"<a ->",
+"output":[["StartTag", "a", {"-":""}]]},
+
+{"description":"<a .>",
+"input":"<a .>",
+"output":[["StartTag", "a", {".":""}]]},
+
+{"description":"<a />",
+"input":"<a />",
+"output":[["StartTag", "a", {}, true]]},
+
+{"description":"<a 0>",
+"input":"<a 0>",
+"output":[["StartTag", "a", {"0":""}]]},
+
+{"description":"<a 1>",
+"input":"<a 1>",
+"output":[["StartTag", "a", {"1":""}]]},
+
+{"description":"<a 9>",
+"input":"<a 9>",
+"output":[["StartTag", "a", {"9":""}]]},
+
+{"description":"<a <>",
+"input":"<a <>",
+"output":[["StartTag", "a", {"<":""}]],
+"errors":[
+    { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 4 }
+]},
+
+{"description":"<a =>",
+"input":"<a =>",
+"output":[["StartTag", "a", {"=":""}]],
+"errors":[
+    { "code": "unexpected-equals-sign-before-attribute-name", "line": 1, "col": 4 }
+]},
+
+{"description":"<a >",
+"input":"<a >",
+"output":[["StartTag", "a", {}]]},
+
+{"description":"<a ?>",
+"input":"<a ?>",
+"output":[["StartTag", "a", {"?":""}]]},
+
+{"description":"<a @>",
+"input":"<a @>",
+"output":[["StartTag", "a", {"@":""}]]},
+
+{"description":"<a A>",
+"input":"<a A>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a B>",
+"input":"<a B>",
+"output":[["StartTag", "a", {"b":""}]]},
+
+{"description":"<a Y>",
+"input":"<a Y>",
+"output":[["StartTag", "a", {"y":""}]]},
+
+{"description":"<a Z>",
+"input":"<a Z>",
+"output":[["StartTag", "a", {"z":""}]]},
+
+{"description":"<a [>",
+"input":"<a [>",
+"output":[["StartTag", "a", {"[":""}]]},
+
+{"description":"<a `>",
+"input":"<a `>",
+"output":[["StartTag", "a", {"`":""}]]},
+
+{"description":"<a a>",
+"input":"<a a>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a\\u0000>",
+"input":"<a a\u0000>",
+"output":[["StartTag", "a", {"a\uFFFD":""}]],
+"errors":[
+    { "code": "unexpected-null-character", "line": 1, "col": 5 }
+]},
+
+{"description":"<a a\\u0008>",
+"input":"<a a\u0008>",
+"output":[["StartTag", "a", {"a\u0008":""}]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 5 }
+]},
+
+{"description":"<a a\\u0009>",
+"input":"<a a\u0009>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a\\u000A>",
+"input":"<a a\u000A>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a\\u000B>",
+"input":"<a a\u000B>",
+"output":[["StartTag", "a", {"a\u000B":""}]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 5 }
+]},
+
+{"description":"<a a\\u000C>",
+"input":"<a a\u000C>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a\\u000D>",
+"input":"<a a\u000D>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a\\u001F>",
+"input":"<a a\u001F>",
+"output":[["StartTag", "a", {"a\u001F":""}]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 5 }
+]},
+
+{"description":"<a a >",
+"input":"<a a >",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a \\u0000>",
+"input":"<a a \u0000>",
+"output":[["StartTag", "a", {"a":"", "\uFFFD":""}]],
+"errors":[
+    { "code": "unexpected-null-character", "line": 1, "col": 6 }
+]},
+
+{"description":"<a a \\u0008>",
+"input":"<a a \u0008>",
+"output":[["StartTag", "a", {"a":"", "\u0008":""}]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 6 }
+]},
+
+{"description":"<a a \\u0009>",
+"input":"<a a \u0009>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a \\u000A>",
+"input":"<a a \u000A>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a \\u000B>",
+"input":"<a a \u000B>",
+"output":[["StartTag", "a", {"a":"", "\u000B":""}]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 6 }
+]},
+
+{"description":"<a a \\u000C>",
+"input":"<a a \u000C>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a \\u000D>",
+"input":"<a a \u000D>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a \\u001F>",
+"input":"<a a \u001F>",
+"output":[["StartTag", "a", {"a":"", "\u001F":""}]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 6 }
+]},
+
+{"description":"<a a  >",
+"input":"<a a  >",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a !>",
+"input":"<a a !>",
+"output":[["StartTag", "a", {"a":"", "!":""}]]},
+
+{"description":"<a a \">",
+"input":"<a a \">",
+"output":[["StartTag", "a", {"a":"", "\"":""}]],
+"errors":[
+    { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 6 }
+]},
+
+{"description":"<a a #>",
+"input":"<a a #>",
+"output":[["StartTag", "a", {"a":"", "#":""}]]},
+
+{"description":"<a a &>",
+"input":"<a a &>",
+"output":[["StartTag", "a", {"a":"", "&":""}]]},
+
+{"description":"<a a '>",
+"input":"<a a '>",
+"output":[["StartTag", "a", {"a":"", "'":""}]],
+"errors":[
+    { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 6 }
+]},
+
+{"description":"<a a (>",
+"input":"<a a (>",
+"output":[["StartTag", "a", {"a":"", "(":""}]]},
+
+{"description":"<a a ->",
+"input":"<a a ->",
+"output":[["StartTag", "a", {"a":"", "-":""}]]},
+
+{"description":"<a a .>",
+"input":"<a a .>",
+"output":[["StartTag", "a", {"a":"", ".":""}]]},
+
+{"description":"<a a />",
+"input":"<a a />",
+"output":[["StartTag", "a", {"a":""}, true]]},
+
+{"description":"<a a 0>",
+"input":"<a a 0>",
+"output":[["StartTag", "a", {"a":"", "0":""}]]},
+
+{"description":"<a a 1>",
+"input":"<a a 1>",
+"output":[["StartTag", "a", {"a":"", "1":""}]]},
+
+{"description":"<a a 9>",
+"input":"<a a 9>",
+"output":[["StartTag", "a", {"a":"", "9":""}]]},
+
+{"description":"<a a <>",
+"input":"<a a <>",
+"output":[["StartTag", "a", {"a":"", "<":""}]],
+"errors":[
+    { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 6 }
+]},
+
+{"description":"<a a =>",
+"input":"<a a =>",
+"output":[["StartTag", "a", {"a":""}]],
+"errors":[
+    { "code": "missing-attribute-value", "line": 1, "col": 7 }
+]},
+
+{"description":"<a a >",
+"input":"<a a >",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a ?>",
+"input":"<a a ?>",
+"output":[["StartTag", "a", {"a":"", "?":""}]]},
+
+{"description":"<a a @>",
+"input":"<a a @>",
+"output":[["StartTag", "a", {"a":"", "@":""}]]},
+
+{"description":"<a a A>",
+"input":"<a a A>",
+"output":[["StartTag", "a", {"a":""}]],
+"errors":[
+    { "code": "duplicate-attribute", "line": 1, "col": 7 }
+]},
+
+{"description":"<a a B>",
+"input":"<a a B>",
+"output":[["StartTag", "a", {"a":"", "b":""}]]},
+
+{"description":"<a a Y>",
+"input":"<a a Y>",
+"output":[["StartTag", "a", {"a":"", "y":""}]]},
+
+{"description":"<a a Z>",
+"input":"<a a Z>",
+"output":[["StartTag", "a", {"a":"", "z":""}]]},
+
+{"description":"<a a [>",
+"input":"<a a [>",
+"output":[["StartTag", "a", {"a":"", "[":""}]]},
+
+{"description":"<a a `>",
+"input":"<a a `>",
+"output":[["StartTag", "a", {"a":"", "`":""}]]},
+
+{"description":"<a a a>",
+"input":"<a a a>",
+"output":[["StartTag", "a", {"a":""}]],
+"errors":[
+    { "code": "duplicate-attribute", "line": 1, "col": 7 }
+]},
+
+{"description":"<a a b>",
+"input":"<a a b>",
+"output":[["StartTag", "a", {"a":"", "b":""}]]},
+
+{"description":"<a a y>",
+"input":"<a a y>",
+"output":[["StartTag", "a", {"a":"", "y":""}]]},
+
+{"description":"<a a z>",
+"input":"<a a z>",
+"output":[["StartTag", "a", {"a":"", "z":""}]]},
+
+{"description":"<a a {>",
+"input":"<a a {>",
+"output":[["StartTag", "a", {"a":"", "{":""}]]},
+
+{"description":"<a a \\uDBC0\\uDC00>",
+"input":"<a a \uDBC0\uDC00>",
+"output":[["StartTag", "a", {"a":"", "\uDBC0\uDC00":""}]]},
+
+{"description":"<a a!>",
+"input":"<a a!>",
+"output":[["StartTag", "a", {"a!":""}]]},
+
+{"description":"<a a\">",
+"input":"<a a\">",
+"output":[["StartTag", "a", {"a\"":""}]],
+"errors":[
+    { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 5 }
+]},
+
+{"description":"<a a#>",
+"input":"<a a#>",
+"output":[["StartTag", "a", {"a#":""}]]},
+
+{"description":"<a a&>",
+"input":"<a a&>",
+"output":[["StartTag", "a", {"a&":""}]]},
+
+{"description":"<a a'>",
+"input":"<a a'>",
+"output":[["StartTag", "a", {"a'":""}]],
+"errors":[
+    { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 5 }
+]},
+
+{"description":"<a a(>",
+"input":"<a a(>",
+"output":[["StartTag", "a", {"a(":""}]]},
+
+{"description":"<a a->",
+"input":"<a a->",
+"output":[["StartTag", "a", {"a-":""}]]},
+
+{"description":"<a a.>",
+"input":"<a a.>",
+"output":[["StartTag", "a", {"a.":""}]]},
+
+{"description":"<a a/>",
+"input":"<a a/>",
+"output":[["StartTag", "a", {"a":""}, true]]},
+
+{"description":"<a a0>",
+"input":"<a a0>",
+"output":[["StartTag", "a", {"a0":""}]]},
+
+{"description":"<a a1>",
+"input":"<a a1>",
+"output":[["StartTag", "a", {"a1":""}]]},
+
+{"description":"<a a9>",
+"input":"<a a9>",
+"output":[["StartTag", "a", {"a9":""}]]},
+
+{"description":"<a a<>",
+"input":"<a a<>",
+"output":[["StartTag", "a", {"a<":""}]],
+"errors":[
+    { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 5 }
+]},
+
+{"description":"<a a=>",
+"input":"<a a=>",
+"output":[["StartTag", "a", {"a":""}]],
+"errors":[
+    { "code": "missing-attribute-value", "line": 1, "col": 6 }
+]},
+
+{"description":"<a a=\\u0000>",
+"input":"<a a=\u0000>",
+"output":[["StartTag", "a", {"a":"\uFFFD"}]],
+"errors":[
+    { "code": "unexpected-null-character", "line": 1, "col": 6 }
+]},
+
+{"description":"<a a=\\u0008>",
+"input":"<a a=\u0008>",
+"output":[["StartTag", "a", {"a":"\u0008"}]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 6 }
+]},
+
+{"description":"<a a=\\u0009>",
+"input":"<a a=\u0009>",
+"output":[["StartTag", "a", {"a":""}]],
+"errors":[
+    { "code": "missing-attribute-value", "line": 1, "col": 7 }
+]},
+
+{"description":"<a a=\\u000A>",
+"input":"<a a=\u000A>",
+"output":[["StartTag", "a", {"a":""}]],
+"errors":[
+    { "code": "missing-attribute-value", "line": 2, "col": 1 }
+]},
+
+{"description":"<a a=\\u000B>",
+"input":"<a a=\u000B>",
+"output":[["StartTag", "a", {"a":"\u000B"}]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 6 }
+]},
+
+{"description":"<a a=\\u000C>",
+"input":"<a a=\u000C>",
+"output":[["StartTag", "a", {"a":""}]],
+"errors":[
+    { "code": "missing-attribute-value", "line": 1, "col": 7 }
+]},
+
+{"description":"<a a=\\u000D>",
+"input":"<a a=\u000D>",
+"output":[["StartTag", "a", {"a":""}]],
+"errors":[
+    { "code": "missing-attribute-value", "line": 2, "col": 1 }
+]},
+
+{"description":"<a a=\\u001F>",
+"input":"<a a=\u001F>",
+"output":[["StartTag", "a", {"a":"\u001F"}]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 6 }
+]},
+
+{"description":"<a a= >",
+"input":"<a a= >",
+"output":[["StartTag", "a", {"a":""}]],
+"errors":[
+    { "code": "missing-attribute-value", "line": 1, "col": 7 }
+]},
+
+{"description":"<a a=!>",
+"input":"<a a=!>",
+"output":[["StartTag", "a", {"a":"!"}]]},
+
+{"description":"<a a=\"\">",
+"input":"<a a=\"\">",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a=\"\\u0000\">",
+"input":"<a a=\"\u0000\">",
+"output":[["StartTag", "a", {"a":"\uFFFD"}]],
+"errors":[
+    { "code": "unexpected-null-character", "line": 1, "col": 7 }
+]},
+
+{"description":"<a a=\"\\u0009\">",
+"input":"<a a=\"\u0009\">",
+"output":[["StartTag", "a", {"a":"\u0009"}]]},
+
+{"description":"<a a=\"\\u000A\">",
+"input":"<a a=\"\u000A\">",
+"output":[["StartTag", "a", {"a":"\u000A"}]]},
+
+{"description":"<a a=\"\\u000B\">",
+"input":"<a a=\"\u000B\">",
+"output":[["StartTag", "a", {"a":"\u000B"}]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 7 }
+]},
+
+{"description":"<a a=\"\\u000C\">",
+"input":"<a a=\"\u000C\">",
+"output":[["StartTag", "a", {"a":"\u000C"}]]},
+
+{"description":"<a a=\" \">",
+"input":"<a a=\" \">",
+"output":[["StartTag", "a", {"a":" "}]]},
+
+{"description":"<a a=\"!\">",
+"input":"<a a=\"!\">",
+"output":[["StartTag", "a", {"a":"!"}]]},
+
+{"description":"<a a=\"\">",
+"input":"<a a=\"\">",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a=\"#\">",
+"input":"<a a=\"#\">",
+"output":[["StartTag", "a", {"a":"#"}]]},
+
+{"description":"<a a=\"%\">",
+"input":"<a a=\"%\">",
+"output":[["StartTag", "a", {"a":"%"}]]},
+
+{"description":"<a a=\"&\">",
+"input":"<a a=\"&\">",
+"output":[["StartTag", "a", {"a":"&"}]]},
+
+{"description":"<a a=\"'\">",
+"input":"<a a=\"'\">",
+"output":[["StartTag", "a", {"a":"'"}]]},
+
+{"description":"<a a=\"-\">",
+"input":"<a a=\"-\">",
+"output":[["StartTag", "a", {"a":"-"}]]},
+
+{"description":"<a a=\"/\">",
+"input":"<a a=\"/\">",
+"output":[["StartTag", "a", {"a":"/"}]]},
+
+{"description":"<a a=\"0\">",
+"input":"<a a=\"0\">",
+"output":[["StartTag", "a", {"a":"0"}]]},
+
+{"description":"<a a=\"1\">",
+"input":"<a a=\"1\">",
+"output":[["StartTag", "a", {"a":"1"}]]},
+
+{"description":"<a a=\"9\">",
+"input":"<a a=\"9\">",
+"output":[["StartTag", "a", {"a":"9"}]]},
+
+{"description":"<a a=\"<\">",
+"input":"<a a=\"<\">",
+"output":[["StartTag", "a", {"a":"<"}]]},
+
+{"description":"<a a=\"=\">",
+"input":"<a a=\"=\">",
+"output":[["StartTag", "a", {"a":"="}]]},
+
+{"description":"<a a=\">\">",
+"input":"<a a=\">\">",
+"output":[["StartTag", "a", {"a":">"}]]},
+
+{"description":"<a a=\"?\">",
+"input":"<a a=\"?\">",
+"output":[["StartTag", "a", {"a":"?"}]]},
+
+{"description":"<a a=\"@\">",
+"input":"<a a=\"@\">",
+"output":[["StartTag", "a", {"a":"@"}]]},
+
+{"description":"<a a=\"A\">",
+"input":"<a a=\"A\">",
+"output":[["StartTag", "a", {"a":"A"}]]},
+
+{"description":"<a a=\"B\">",
+"input":"<a a=\"B\">",
+"output":[["StartTag", "a", {"a":"B"}]]},
+
+{"description":"<a a=\"Y\">",
+"input":"<a a=\"Y\">",
+"output":[["StartTag", "a", {"a":"Y"}]]},
+
+{"description":"<a a=\"Z\">",
+"input":"<a a=\"Z\">",
+"output":[["StartTag", "a", {"a":"Z"}]]},
+
+{"description":"<a a=\"`\">",
+"input":"<a a=\"`\">",
+"output":[["StartTag", "a", {"a":"`"}]]},
+
+{"description":"<a a=\"a\">",
+"input":"<a a=\"a\">",
+"output":[["StartTag", "a", {"a":"a"}]]},
+
+{"description":"<a a=\"b\">",
+"input":"<a a=\"b\">",
+"output":[["StartTag", "a", {"a":"b"}]]},
+
+{"description":"<a a=\"y\">",
+"input":"<a a=\"y\">",
+"output":[["StartTag", "a", {"a":"y"}]]},
+
+{"description":"<a a=\"z\">",
+"input":"<a a=\"z\">",
+"output":[["StartTag", "a", {"a":"z"}]]},
+
+{"description":"<a a=\"{\">",
+"input":"<a a=\"{\">",
+"output":[["StartTag", "a", {"a":"{"}]]},
+
+{"description":"<a a=\"\\uDBC0\\uDC00\">",
+"input":"<a a=\"\uDBC0\uDC00\">",
+"output":[["StartTag", "a", {"a":"\uDBC0\uDC00"}]]},
+
+{"description":"<a a=#>",
+"input":"<a a=#>",
+"output":[["StartTag", "a", {"a":"#"}]]},
+
+{"description":"<a a=%>",
+"input":"<a a=%>",
+"output":[["StartTag", "a", {"a":"%"}]]},
+
+{"description":"<a a=&>",
+"input":"<a a=&>",
+"output":[["StartTag", "a", {"a":"&"}]]},
+
+{"description":"<a a=''>",
+"input":"<a a=''>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a='\\u0000'>",
+"input":"<a a='\u0000'>",
+"output":[["StartTag", "a", {"a":"\uFFFD"}]],
+"errors":[
+    { "code": "unexpected-null-character", "line": 1, "col": 7 }
+]},
+
+{"description":"<a a='\\u0009'>",
+"input":"<a a='\u0009'>",
+"output":[["StartTag", "a", {"a":"\u0009"}]]},
+
+{"description":"<a a='\\u000A'>",
+"input":"<a a='\u000A'>",
+"output":[["StartTag", "a", {"a":"\u000A"}]]},
+
+{"description":"<a a='\\u000B'>",
+"input":"<a a='\u000B'>",
+"output":[["StartTag", "a", {"a":"\u000B"}]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 7 }
+]},
+
+{"description":"<a a='\\u000C'>",
+"input":"<a a='\u000C'>",
+"output":[["StartTag", "a", {"a":"\u000C"}]]},
+
+{"description":"<a a=' '>",
+"input":"<a a=' '>",
+"output":[["StartTag", "a", {"a":" "}]]},
+
+{"description":"<a a='!'>",
+"input":"<a a='!'>",
+"output":[["StartTag", "a", {"a":"!"}]]},
+
+{"description":"<a a='\"'>",
+"input":"<a a='\"'>",
+"output":[["StartTag", "a", {"a":"\""}]]},
+
+{"description":"<a a='%'>",
+"input":"<a a='%'>",
+"output":[["StartTag", "a", {"a":"%"}]]},
+
+{"description":"<a a='&'>",
+"input":"<a a='&'>",
+"output":[["StartTag", "a", {"a":"&"}]]},
+
+{"description":"<a a=''>",
+"input":"<a a=''>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a=''\\u0000>",
+"input":"<a a=''\u0000>",
+"output":[["StartTag", "a", {"a":"", "\uFFFD":""}]],
+"errors":[
+    { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 },
+    { "code": "unexpected-null-character", "line": 1, "col": 8 }
+]},
+
+{"description":"<a a=''\\u0008>",
+"input":"<a a=''\u0008>",
+"output":[["StartTag", "a", {"a":"", "\u0008":""}]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 8 },
+    { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 }
+]},
+
+{"description":"<a a=''\\u0009>",
+"input":"<a a=''\u0009>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a=''\\u000A>",
+"input":"<a a=''\u000A>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a=''\\u000B>",
+"input":"<a a=''\u000B>",
+"output":[["StartTag", "a", {"a":"", "\u000B":""}]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 8 },
+    { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 }
+]},
+
+{"description":"<a a=''\\u000C>",
+"input":"<a a=''\u000C>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a=''\\u000D>",
+"input":"<a a=''\u000D>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a=''\\u001F>",
+"input":"<a a=''\u001F>",
+"output":[["StartTag", "a", {"a":"", "\u001F":""}]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 8 },
+    { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 }
+]},
+
+{"description":"<a a='' >",
+"input":"<a a='' >",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a=''!>",
+"input":"<a a=''!>",
+"output":[["StartTag", "a", {"a":"", "!":""}]],
+"errors":[
+    { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 }
+]},
+
+{"description":"<a a=''\">",
+"input":"<a a=''\">",
+"output":[["StartTag", "a", {"a":"", "\"":""}]],
+"errors":[
+    { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 },
+    { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 8 }
+]},
+
+{"description":"<a a=''&>",
+"input":"<a a=''&>",
+"output":[["StartTag", "a", {"a":"", "&":""}]],
+"errors":[
+    { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 }
+]},
+
+{"description":"<a a='''>",
+"input":"<a a='''>",
+"output":[["StartTag", "a", {"a":"", "'":""}]],
+"errors":[
+    { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 },
+    { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 8 }
+]},
+
+{"description":"<a a=''->",
+"input":"<a a=''->",
+"output":[["StartTag", "a", {"a":"", "-":""}]],
+"errors":[
+    { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 }
+]},
+
+{"description":"<a a=''.>",
+"input":"<a a=''.>",
+"output":[["StartTag", "a", {"a":"", ".":""}]],
+"errors":[
+    { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 }
+]},
+
+{"description":"<a a=''/>",
+"input":"<a a=''/>",
+"output":[["StartTag", "a", {"a":""}, true]]},
+
+{"description":"<a a=''0>",
+"input":"<a a=''0>",
+"output":[["StartTag", "a", {"a":"", "0":""}]],
+"errors":[
+    { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 }
+]},
+
+{"description":"<a a=''1>",
+"input":"<a a=''1>",
+"output":[["StartTag", "a", {"a":"", "1":""}]],
+"errors":[
+    { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 }
+]},
+
+{"description":"<a a=''9>",
+"input":"<a a=''9>",
+"output":[["StartTag", "a", {"a":"", "9":""}]],
+"errors":[
+    { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 }
+]},
+
+{"description":"<a a=''<>",
+"input":"<a a=''<>",
+"output":[["StartTag", "a", {"a":"", "<":""}]],
+"errors":[
+    { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 },
+    { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 8 }
+]},
+
+{"description":"<a a=''=>",
+"input":"<a a=''=>",
+"output":[["StartTag", "a", {"a":"", "=":""}]],
+"errors":[
+    { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 },
+    { "code": "unexpected-equals-sign-before-attribute-name", "line": 1, "col": 8 }
+]},
+
+{"description":"<a a=''>",
+"input":"<a a=''>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a=''?>",
+"input":"<a a=''?>",
+"output":[["StartTag", "a", {"a":"", "?":""}]],
+"errors":[
+    { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 }
+]},
+
+{"description":"<a a=''@>",
+"input":"<a a=''@>",
+"output":[["StartTag", "a", {"a":"", "@":""}]],
+"errors":[
+    { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 }
+]},
+
+{"description":"<a a=''A>",
+"input":"<a a=''A>",
+"output":[["StartTag", "a", {"a":""}]],
+"errors":[
+    { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 },
+    { "code": "duplicate-attribute", "line": 1, "col": 9 }
+]},
+
+{"description":"<a a=''B>",
+"input":"<a a=''B>",
+"output":[["StartTag", "a", {"a":"", "b":""}]],
+"errors":[
+    { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 }
+]},
+
+{"description":"<a a=''Y>",
+"input":"<a a=''Y>",
+"output":[["StartTag", "a", {"a":"", "y":""}]],
+"errors":[
+    { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 }
+]},
+
+{"description":"<a a=''Z>",
+"input":"<a a=''Z>",
+"output":[["StartTag", "a", {"a":"", "z":""}]],
+"errors":[
+    { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 }
+]},
+
+{"description":"<a a=''`>",
+"input":"<a a=''`>",
+"output":[["StartTag", "a", {"a":"", "`":""}]],
+"errors":[
+    { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 }
+]},
+
+{"description":"<a a=''a>",
+"input":"<a a=''a>",
+"output":[["StartTag", "a", {"a":""}]],
+"errors":[
+    { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 },
+    { "code": "duplicate-attribute", "line": 1, "col": 9 }
+]},
+
+{"description":"<a a=''b>",
+"input":"<a a=''b>",
+"output":[["StartTag", "a", {"a":"", "b":""}]],
+"errors":[
+    { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 }
+]},
+
+{"description":"<a a=''y>",
+"input":"<a a=''y>",
+"output":[["StartTag", "a", {"a":"", "y":""}]],
+"errors":[
+    { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 }
+]},
+
+{"description":"<a a=''z>",
+"input":"<a a=''z>",
+"output":[["StartTag", "a", {"a":"", "z":""}]],
+"errors":[
+    { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 }
+]},
+
+{"description":"<a a=''{>",
+"input":"<a a=''{>",
+"output":[["StartTag", "a", {"a":"", "{":""}]],
+"errors":[
+    { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 }
+]},
+
+{"description":"<a a=''\\uDBC0\\uDC00>",
+"input":"<a a=''\uDBC0\uDC00>",
+"output":[["StartTag", "a", {"a":"", "\uDBC0\uDC00":""}]],
+"errors":[
+    { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 }
+]},
+
+{"description":"<a a='('>",
+"input":"<a a='('>",
+"output":[["StartTag", "a", {"a":"("}]]},
+
+{"description":"<a a='-'>",
+"input":"<a a='-'>",
+"output":[["StartTag", "a", {"a":"-"}]]},
+
+{"description":"<a a='/'>",
+"input":"<a a='/'>",
+"output":[["StartTag", "a", {"a":"/"}]]},
+
+{"description":"<a a='0'>",
+"input":"<a a='0'>",
+"output":[["StartTag", "a", {"a":"0"}]]},
+
+{"description":"<a a='1'>",
+"input":"<a a='1'>",
+"output":[["StartTag", "a", {"a":"1"}]]},
+
+{"description":"<a a='9'>",
+"input":"<a a='9'>",
+"output":[["StartTag", "a", {"a":"9"}]]},
+
+{"description":"<a a='<'>",
+"input":"<a a='<'>",
+"output":[["StartTag", "a", {"a":"<"}]]},
+
+{"description":"<a a='='>",
+"input":"<a a='='>",
+"output":[["StartTag", "a", {"a":"="}]]},
+
+{"description":"<a a='>'>",
+"input":"<a a='>'>",
+"output":[["StartTag", "a", {"a":">"}]]},
+
+{"description":"<a a='?'>",
+"input":"<a a='?'>",
+"output":[["StartTag", "a", {"a":"?"}]]},
+
+{"description":"<a a='@'>",
+"input":"<a a='@'>",
+"output":[["StartTag", "a", {"a":"@"}]]},
+
+{"description":"<a a='A'>",
+"input":"<a a='A'>",
+"output":[["StartTag", "a", {"a":"A"}]]},
+
+{"description":"<a a='B'>",
+"input":"<a a='B'>",
+"output":[["StartTag", "a", {"a":"B"}]]},
+
+{"description":"<a a='Y'>",
+"input":"<a a='Y'>",
+"output":[["StartTag", "a", {"a":"Y"}]]},
+
+{"description":"<a a='Z'>",
+"input":"<a a='Z'>",
+"output":[["StartTag", "a", {"a":"Z"}]]},
+
+{"description":"<a a='`'>",
+"input":"<a a='`'>",
+"output":[["StartTag", "a", {"a":"`"}]]},
+
+{"description":"<a a='a'>",
+"input":"<a a='a'>",
+"output":[["StartTag", "a", {"a":"a"}]]},
+
+{"description":"<a a='b'>",
+"input":"<a a='b'>",
+"output":[["StartTag", "a", {"a":"b"}]]},
+
+{"description":"<a a='y'>",
+"input":"<a a='y'>",
+"output":[["StartTag", "a", {"a":"y"}]]},
+
+{"description":"<a a='z'>",
+"input":"<a a='z'>",
+"output":[["StartTag", "a", {"a":"z"}]]},
+
+{"description":"<a a='{'>",
+"input":"<a a='{'>",
+"output":[["StartTag", "a", {"a":"{"}]]},
+
+{"description":"<a a='\\uDBC0\\uDC00'>",
+"input":"<a a='\uDBC0\uDC00'>",
+"output":[["StartTag", "a", {"a":"\uDBC0\uDC00"}]]},
+
+{"description":"<a a=(>",
+"input":"<a a=(>",
+"output":[["StartTag", "a", {"a":"("}]]},
+
+{"description":"<a a=->",
+"input":"<a a=->",
+"output":[["StartTag", "a", {"a":"-"}]]},
+
+{"description":"<a a=/>",
+"input":"<a a=/>",
+"output":[["StartTag", "a", {"a":"/"}]]},
+
+{"description":"<a a=0>",
+"input":"<a a=0>",
+"output":[["StartTag", "a", {"a":"0"}]]},
+
+{"description":"<a a=1>",
+"input":"<a a=1>",
+"output":[["StartTag", "a", {"a":"1"}]]},
+
+{"description":"<a a=9>",
+"input":"<a a=9>",
+"output":[["StartTag", "a", {"a":"9"}]]},
+
+{"description":"<a a=<>",
+"input":"<a a=<>",
+"output":[["StartTag", "a", {"a":"<"}]],
+"errors":[
+    { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 6 }
+]},
+
+{"description":"<a a==>",
+"input":"<a a==>",
+"output":[["StartTag", "a", {"a":"="}]],
+"errors":[
+    { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 6 }
+]},
+
+{"description":"<a a=>",
+"input":"<a a=>",
+"output":[["StartTag", "a", {"a":""}]],
+"errors":[
+    { "code": "missing-attribute-value", "line": 1, "col": 6 }
+]},
+
+{"description":"<a a=?>",
+"input":"<a a=?>",
+"output":[["StartTag", "a", {"a":"?"}]]},
+
+{"description":"<a a=@>",
+"input":"<a a=@>",
+"output":[["StartTag", "a", {"a":"@"}]]},
+
+{"description":"<a a=A>",
+"input":"<a a=A>",
+"output":[["StartTag", "a", {"a":"A"}]]},
+
+{"description":"<a a=B>",
+"input":"<a a=B>",
+"output":[["StartTag", "a", {"a":"B"}]]},
+
+{"description":"<a a=Y>",
+"input":"<a a=Y>",
+"output":[["StartTag", "a", {"a":"Y"}]]},
+
+{"description":"<a a=Z>",
+"input":"<a a=Z>",
+"output":[["StartTag", "a", {"a":"Z"}]]},
+
+{"description":"<a a=`>",
+"input":"<a a=`>",
+"output":[["StartTag", "a", {"a":"`"}]],
+"errors":[
+    { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 6 }
+]},
+
+{"description":"<a a=a>",
+"input":"<a a=a>",
+"output":[["StartTag", "a", {"a":"a"}]]},
+
+{"description":"<a a=a\\u0000>",
+"input":"<a a=a\u0000>",
+"output":[["StartTag", "a", {"a":"a\uFFFD"}]],
+"errors":[
+    { "code": "unexpected-null-character", "line": 1, "col": 7 }
+]},
+
+{"description":"<a a=a\\u0008>",
+"input":"<a a=a\u0008>",
+"output":[["StartTag", "a", {"a":"a\u0008"}]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 7 }
+]},
+
+{"description":"<a a=a\\u0009>",
+"input":"<a a=a\u0009>",
+"output":[["StartTag", "a", {"a":"a"}]]},
+
+{"description":"<a a=a\\u000A>",
+"input":"<a a=a\u000A>",
+"output":[["StartTag", "a", {"a":"a"}]]},
+
+{"description":"<a a=a\\u000B>",
+"input":"<a a=a\u000B>",
+"output":[["StartTag", "a", {"a":"a\u000B"}]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 7 }
+]},
+
+{"description":"<a a=a\\u000C>",
+"input":"<a a=a\u000C>",
+"output":[["StartTag", "a", {"a":"a"}]]},
+
+{"description":"<a a=a\\u000D>",
+"input":"<a a=a\u000D>",
+"output":[["StartTag", "a", {"a":"a"}]]},
+
+{"description":"<a a=a\\u001F>",
+"input":"<a a=a\u001F>",
+"output":[["StartTag", "a", {"a":"a\u001F"}]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 7 }
+]},
+
+{"description":"<a a=a >",
+"input":"<a a=a >",
+"output":[["StartTag", "a", {"a":"a"}]]},
+
+{"description":"<a a=a!>",
+"input":"<a a=a!>",
+"output":[["StartTag", "a", {"a":"a!"}]]},
+
+{"description":"<a a=a\">",
+"input":"<a a=a\">",
+"output":[["StartTag", "a", {"a":"a\""}]],
+"errors":[
+    { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 7 }
+]},
+
+{"description":"<a a=a#>",
+"input":"<a a=a#>",
+"output":[["StartTag", "a", {"a":"a#"}]]},
+
+{"description":"<a a=a%>",
+"input":"<a a=a%>",
+"output":[["StartTag", "a", {"a":"a%"}]]},
+
+{"description":"<a a=a&>",
+"input":"<a a=a&>",
+"output":[["StartTag", "a", {"a":"a&"}]]},
+
+{"description":"<a a=a'>",
+"input":"<a a=a'>",
+"output":[["StartTag", "a", {"a":"a'"}]],
+"errors":[
+    { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 7 }
+]},
+
+{"description":"<a a=a(>",
+"input":"<a a=a(>",
+"output":[["StartTag", "a", {"a":"a("}]]},
+
+{"description":"<a a=a->",
+"input":"<a a=a->",
+"output":[["StartTag", "a", {"a":"a-"}]]},
+
+{"description":"<a a=a/>",
+"input":"<a a=a/>",
+"output":[["StartTag", "a", {"a":"a/"}]]},
+
+{"description":"<a a=a0>",
+"input":"<a a=a0>",
+"output":[["StartTag", "a", {"a":"a0"}]]},
+
+{"description":"<a a=a1>",
+"input":"<a a=a1>",
+"output":[["StartTag", "a", {"a":"a1"}]]},
+
+{"description":"<a a=a9>",
+"input":"<a a=a9>",
+"output":[["StartTag", "a", {"a":"a9"}]]},
+
+{"description":"<a a=a<>",
+"input":"<a a=a<>",
+"output":[["StartTag", "a", {"a":"a<"}]],
+"errors":[
+    { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 7 }
+]},
+
+{"description":"<a a=a=>",
+"input":"<a a=a=>",
+"output":[["StartTag", "a", {"a":"a="}]],
+"errors":[
+    { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 7 }
+]},
+
+{"description":"<a a=a>",
+"input":"<a a=a>",
+"output":[["StartTag", "a", {"a":"a"}]]},
+
+{"description":"<a a=a?>",
+"input":"<a a=a?>",
+"output":[["StartTag", "a", {"a":"a?"}]]},
+
+{"description":"<a a=a@>",
+"input":"<a a=a@>",
+"output":[["StartTag", "a", {"a":"a@"}]]},
+
+{"description":"<a a=aA>",
+"input":"<a a=aA>",
+"output":[["StartTag", "a", {"a":"aA"}]]},
+
+{"description":"<a a=aB>",
+"input":"<a a=aB>",
+"output":[["StartTag", "a", {"a":"aB"}]]},
+
+{"description":"<a a=aY>",
+"input":"<a a=aY>",
+"output":[["StartTag", "a", {"a":"aY"}]]},
+
+{"description":"<a a=aZ>",
+"input":"<a a=aZ>",
+"output":[["StartTag", "a", {"a":"aZ"}]]},
+
+{"description":"<a a=a`>",
+"input":"<a a=a`>",
+"output":[["StartTag", "a", {"a":"a`"}]],
+"errors":[
+    { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 7 }
+]},
+
+{"description":"<a a=aa>",
+"input":"<a a=aa>",
+"output":[["StartTag", "a", {"a":"aa"}]]},
+
+{"description":"<a a=ab>",
+"input":"<a a=ab>",
+"output":[["StartTag", "a", {"a":"ab"}]]},
+
+{"description":"<a a=ay>",
+"input":"<a a=ay>",
+"output":[["StartTag", "a", {"a":"ay"}]]},
+
+{"description":"<a a=az>",
+"input":"<a a=az>",
+"output":[["StartTag", "a", {"a":"az"}]]},
+
+{"description":"<a a=a{>",
+"input":"<a a=a{>",
+"output":[["StartTag", "a", {"a":"a{"}]]},
+
+{"description":"<a a=a\\uDBC0\\uDC00>",
+"input":"<a a=a\uDBC0\uDC00>",
+"output":[["StartTag", "a", {"a":"a\uDBC0\uDC00"}]]},
+
+{"description":"<a a=b>",
+"input":"<a a=b>",
+"output":[["StartTag", "a", {"a":"b"}]]},
+
+{"description":"<a a=y>",
+"input":"<a a=y>",
+"output":[["StartTag", "a", {"a":"y"}]]},
+
+{"description":"<a a=z>",
+"input":"<a a=z>",
+"output":[["StartTag", "a", {"a":"z"}]]},
+
+{"description":"<a a={>",
+"input":"<a a={>",
+"output":[["StartTag", "a", {"a":"{"}]]},
+
+{"description":"<a a=\\uDBC0\\uDC00>",
+"input":"<a a=\uDBC0\uDC00>",
+"output":[["StartTag", "a", {"a":"\uDBC0\uDC00"}]]},
+
+{"description":"<a a>",
+"input":"<a a>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a?>",
+"input":"<a a?>",
+"output":[["StartTag", "a", {"a?":""}]]},
+
+{"description":"<a a@>",
+"input":"<a a@>",
+"output":[["StartTag", "a", {"a@":""}]]},
+
+{"description":"<a aA>",
+"input":"<a aA>",
+"output":[["StartTag", "a", {"aa":""}]]},
+
+{"description":"<a aB>",
+"input":"<a aB>",
+"output":[["StartTag", "a", {"ab":""}]]},
+
+{"description":"<a aY>",
+"input":"<a aY>",
+"output":[["StartTag", "a", {"ay":""}]]},
+
+{"description":"<a aZ>",
+"input":"<a aZ>",
+"output":[["StartTag", "a", {"az":""}]]},
+
+{"description":"<a a[>",
+"input":"<a a[>",
+"output":[["StartTag", "a", {"a[":""}]]},
+
+{"description":"<a a`>",
+"input":"<a a`>",
+"output":[["StartTag", "a", {"a`":""}]]},
+
+{"description":"<a aa>",
+"input":"<a aa>",
+"output":[["StartTag", "a", {"aa":""}]]},
+
+{"description":"<a ab>",
+"input":"<a ab>",
+"output":[["StartTag", "a", {"ab":""}]]},
+
+{"description":"<a ay>",
+"input":"<a ay>",
+"output":[["StartTag", "a", {"ay":""}]]},
+
+{"description":"<a az>",
+"input":"<a az>",
+"output":[["StartTag", "a", {"az":""}]]},
+
+{"description":"<a a{>",
+"input":"<a a{>",
+"output":[["StartTag", "a", {"a{":""}]]},
+
+{"description":"<a a\\uDBC0\\uDC00>",
+"input":"<a a\uDBC0\uDC00>",
+"output":[["StartTag", "a", {"a\uDBC0\uDC00":""}]]},
+
+{"description":"<a b>",
+"input":"<a b>",
+"output":[["StartTag", "a", {"b":""}]]},
+
+{"description":"<a y>",
+"input":"<a y>",
+"output":[["StartTag", "a", {"y":""}]]},
+
+{"description":"<a z>",
+"input":"<a z>",
+"output":[["StartTag", "a", {"z":""}]]},
+
+{"description":"<a {>",
+"input":"<a {>",
+"output":[["StartTag", "a", {"{":""}]]},
+
+{"description":"<a \\uDBC0\\uDC00>",
+"input":"<a \uDBC0\uDC00>",
+"output":[["StartTag", "a", {"\uDBC0\uDC00":""}]]},
+
+{"description":"<a!>",
+"input":"<a!>",
+"output":[["StartTag", "a!", {}]]},
+
+{"description":"<a\">",
+"input":"<a\">",
+"output":[["StartTag", "a\"", {}]]},
+
+{"description":"<a&>",
+"input":"<a&>",
+"output":[["StartTag", "a&", {}]]},
+
+{"description":"<a'>",
+"input":"<a'>",
+"output":[["StartTag", "a'", {}]]},
+
+{"description":"<a->",
+"input":"<a->",
+"output":[["StartTag", "a-", {}]]},
+
+{"description":"<a.>",
+"input":"<a.>",
+"output":[["StartTag", "a.", {}]]},
+
+{"description":"<a/>",
+"input":"<a/>",
+"output":[["StartTag", "a", {}, true]]},
+
+{"description":"<a/\\u0000>",
+"input":"<a/\u0000>",
+"output":[["StartTag", "a", {"\uFFFD":""}]],
+"errors":[
+    { "code": "unexpected-solidus-in-tag", "line": 1, "col": 4 },
+    { "code": "unexpected-null-character", "line": 1, "col": 4 }
+]},
+
+{"description":"<a/\\u0009>",
+"input":"<a/\u0009>",
+"output":[["StartTag", "a", {}]],
+"errors":[
+    { "code": "unexpected-solidus-in-tag", "line": 1, "col": 4 }
+]},
+
+{"description":"<a/\\u000A>",
+"input":"<a/\u000A>",
+"output":[["StartTag", "a", {}]],
+"errors":[
+    { "code": "unexpected-solidus-in-tag", "line": 1, "col": 4 }
+]},
+
+{"description":"<a/\\u000B>",
+"input":"<a/\u000B>",
+"output":[["StartTag", "a", {"\u000B":""}]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 4 },
+    { "code": "unexpected-solidus-in-tag", "line": 1, "col": 4 }
+]},
+
+{"description":"<a/\\u000C>",
+"input":"<a/\u000C>",
+"output":[["StartTag", "a", {}]],
+"errors":[
+    { "code": "unexpected-solidus-in-tag", "line": 1, "col": 4 }
+]},
+
+{"description":"<a/ >",
+"input":"<a/ >",
+"output":[["StartTag", "a", {}]],
+"errors":[
+    { "code": "unexpected-solidus-in-tag", "line": 1, "col": 4 }
+]},
+
+{"description":"<a/!>",
+"input":"<a/!>",
+"output":[["StartTag", "a", {"!":""}]],
+"errors":[
+    { "code": "unexpected-solidus-in-tag", "line": 1, "col": 4 }
+]},
+
+{"description":"<a/\">",
+"input":"<a/\">",
+"output":[["StartTag", "a", {"\"":""}]],
+"errors":[
+    { "code": "unexpected-solidus-in-tag", "line": 1, "col": 4 },
+    { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 4 }
+]},
+
+{"description":"<a/&>",
+"input":"<a/&>",
+"output":[["StartTag", "a", {"&":""}]],
+"errors":[
+    { "code": "unexpected-solidus-in-tag", "line": 1, "col": 4 }
+]},
+
+{"description":"<a/'>",
+"input":"<a/'>",
+"output":[["StartTag", "a", {"'":""}]],
+"errors":[
+    { "code": "unexpected-solidus-in-tag", "line": 1, "col": 4 },
+    { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 4 }
+]},
+
+{"description":"<a/->",
+"input":"<a/->",
+"output":[["StartTag", "a", {"-":""}]],
+"errors":[
+    { "code": "unexpected-solidus-in-tag", "line": 1, "col": 4 }
+]},
+
+{"description":"<a//>",
+"input":"<a//>",
+"output":[["StartTag", "a", {}, true]],
+"errors":[
+    { "code": "unexpected-solidus-in-tag", "line": 1, "col": 4 }
+]},
+
+{"description":"<a/0>",
+"input":"<a/0>",
+"output":[["StartTag", "a", {"0":""}]],
+"errors":[
+    { "code": "unexpected-solidus-in-tag", "line": 1, "col": 4 }
+]},
+
+{"description":"<a/1>",
+"input":"<a/1>",
+"output":[["StartTag", "a", {"1":""}]],
+"errors":[
+    { "code": "unexpected-solidus-in-tag", "line": 1, "col": 4 }
+]},
+
+{"description":"<a/9>",
+"input":"<a/9>",
+"output":[["StartTag", "a", {"9":""}]],
+"errors":[
+    { "code": "unexpected-solidus-in-tag", "line": 1, "col": 4 }
+]},
+
+{"description":"<a/<>",
+"input":"<a/<>",
+"output":[["StartTag", "a", {"<":""}]],
+"errors":[
+    { "code": "unexpected-solidus-in-tag", "line": 1, "col": 4 },
+    { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 4 }
+]},
+
+{"description":"<a/=>",
+"input":"<a/=>",
+"output":[["StartTag", "a", {"=":""}]],
+"errors":[
+    { "code": "unexpected-solidus-in-tag", "line": 1, "col": 4 },
+    { "code": "unexpected-equals-sign-before-attribute-name", "line": 1, "col": 4 }
+]},
+
+{"description":"<a/>",
+"input":"<a/>",
+"output":[["StartTag", "a", {}, true]]},
+
+{"description":"<a/?>",
+"input":"<a/?>",
+"output":[["StartTag", "a", {"?":""}]],
+"errors":[
+    { "code": "unexpected-solidus-in-tag", "line": 1, "col": 4 }
+]},
+
+{"description":"<a/@>",
+"input":"<a/@>",
+"output":[["StartTag", "a", {"@":""}]],
+"errors":[
+    { "code": "unexpected-solidus-in-tag", "line": 1, "col": 4 }
+]},
+
+{"description":"<a/A>",
+"input":"<a/A>",
+"output":[["StartTag", "a", {"a":""}]],
+"errors":[
+    { "code": "unexpected-solidus-in-tag", "line": 1, "col": 4 }
+]},
+
+{"description":"<a/B>",
+"input":"<a/B>",
+"output":[["StartTag", "a", {"b":""}]],
+"errors":[
+    { "code": "unexpected-solidus-in-tag", "line": 1, "col": 4 }
+]},
+
+{"description":"<a/Y>",
+"input":"<a/Y>",
+"output":[["StartTag", "a", {"y":""}]],
+"errors":[
+    { "code": "unexpected-solidus-in-tag", "line": 1, "col": 4 }
+]},
+
+{"description":"<a/Z>",
+"input":"<a/Z>",
+"output":[["StartTag", "a", {"z":""}]],
+"errors":[
+    { "code": "unexpected-solidus-in-tag", "line": 1, "col": 4 }
+]},
+
+{"description":"<a/`>",
+"input":"<a/`>",
+"output":[["StartTag", "a", {"`":""}]],
+"errors":[
+    { "code": "unexpected-solidus-in-tag", "line": 1, "col": 4 }
+]},
+
+{"description":"<a/a>",
+"input":"<a/a>",
+"output":[["StartTag", "a", {"a":""}]],
+"errors":[
+    { "code": "unexpected-solidus-in-tag", "line": 1, "col": 4 }
+]},
+
+{"description":"<a/b>",
+"input":"<a/b>",
+"output":[["StartTag", "a", {"b":""}]],
+"errors":[
+    { "code": "unexpected-solidus-in-tag", "line": 1, "col": 4 }
+]},
+
+{"description":"<a/y>",
+"input":"<a/y>",
+"output":[["StartTag", "a", {"y":""}]],
+"errors":[
+    { "code": "unexpected-solidus-in-tag", "line": 1, "col": 4 }
+]},
+
+{"description":"<a/z>",
+"input":"<a/z>",
+"output":[["StartTag", "a", {"z":""}]],
+"errors":[
+    { "code": "unexpected-solidus-in-tag", "line": 1, "col": 4 }
+]},
+
+{"description":"<a/{>",
+"input":"<a/{>",
+"output":[["StartTag", "a", {"{":""}]],
+"errors":[
+    { "code": "unexpected-solidus-in-tag", "line": 1, "col": 4 }
+]},
+
+{"description":"<a/\\uDBC0\\uDC00>",
+"input":"<a/\uDBC0\uDC00>",
+"output":[["StartTag", "a", {"\uDBC0\uDC00":""}]],
+"errors":[
+    { "code": "unexpected-solidus-in-tag", "line": 1, "col": 4 }
+]},
+
+{"description":"<a0>",
+"input":"<a0>",
+"output":[["StartTag", "a0", {}]]},
+
+{"description":"<a1>",
+"input":"<a1>",
+"output":[["StartTag", "a1", {}]]},
+
+{"description":"<a9>",
+"input":"<a9>",
+"output":[["StartTag", "a9", {}]]},
+
+{"description":"<a<>",
+"input":"<a<>",
+"output":[["StartTag", "a<", {}]]},
+
+{"description":"<a=>",
+"input":"<a=>",
+"output":[["StartTag", "a=", {}]]},
+
+{"description":"<a>",
+"input":"<a>",
+"output":[["StartTag", "a", {}]]},
+
+{"description":"<a?>",
+"input":"<a?>",
+"output":[["StartTag", "a?", {}]]},
+
+{"description":"<a@>",
+"input":"<a@>",
+"output":[["StartTag", "a@", {}]]},
+
+{"description":"<aA>",
+"input":"<aA>",
+"output":[["StartTag", "aa", {}]]},
+
+{"description":"<aB>",
+"input":"<aB>",
+"output":[["StartTag", "ab", {}]]},
+
+{"description":"<aY>",
+"input":"<aY>",
+"output":[["StartTag", "ay", {}]]},
+
+{"description":"<aZ>",
+"input":"<aZ>",
+"output":[["StartTag", "az", {}]]},
+
+{"description":"<a[>",
+"input":"<a[>",
+"output":[["StartTag", "a[", {}]]},
+
+{"description":"<a`>",
+"input":"<a`>",
+"output":[["StartTag", "a`", {}]]},
+
+{"description":"<aa>",
+"input":"<aa>",
+"output":[["StartTag", "aa", {}]]},
+
+{"description":"<ab>",
+"input":"<ab>",
+"output":[["StartTag", "ab", {}]]},
+
+{"description":"<ay>",
+"input":"<ay>",
+"output":[["StartTag", "ay", {}]]},
+
+{"description":"<az>",
+"input":"<az>",
+"output":[["StartTag", "az", {}]]},
+
+{"description":"<a{>",
+"input":"<a{>",
+"output":[["StartTag", "a{", {}]]},
+
+{"description":"<a\\uDBC0\\uDC00>",
+"input":"<a\uDBC0\uDC00>",
+"output":[["StartTag", "a\uDBC0\uDC00", {}]]},
+
+{"description":"<b>",
+"input":"<b>",
+"output":[["StartTag", "b", {}]]},
+
+{"description":"<y>",
+"input":"<y>",
+"output":[["StartTag", "y", {}]]},
+
+{"description":"<z>",
+"input":"<z>",
+"output":[["StartTag", "z", {}]]},
+
+{"description":"<{",
+"input":"<{",
+"output":[["Character", "<{"]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"<\\uDBC0\\uDC00",
+"input":"<\uDBC0\uDC00",
+"output":[["Character", "<\uDBC0\uDC00"]],
+"errors":[
+    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 2 }
+]},
+
+{"description":"=",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":"=",
+"output":[["Character", "="]]},
+
+{"description":"=",
+"initialStates":["CDATA section state"],
+"input":"=",
+"output":[["Character", "="]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
+{"description":">",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":">",
+"output":[["Character", ">"]]},
+
+{"description":">",
+"initialStates":["CDATA section state"],
+"input":">",
+"output":[["Character", ">"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
+{"description":"?",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":"?",
+"output":[["Character", "?"]]},
+
+{"description":"?",
+"initialStates":["CDATA section state"],
+"input":"?",
+"output":[["Character", "?"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
+{"description":"@",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":"@",
+"output":[["Character", "@"]]},
+
+{"description":"@",
+"initialStates":["CDATA section state"],
+"input":"@",
+"output":[["Character", "@"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
+{"description":"A",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":"A",
+"output":[["Character", "A"]]},
+
+{"description":"A",
+"initialStates":["CDATA section state"],
+"input":"A",
+"output":[["Character", "A"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
+{"description":"B",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":"B",
+"output":[["Character", "B"]]},
+
+{"description":"B",
+"initialStates":["CDATA section state"],
+"input":"B",
+"output":[["Character", "B"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
+{"description":"Y",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":"Y",
+"output":[["Character", "Y"]]},
+
+{"description":"Y",
+"initialStates":["CDATA section state"],
+"input":"Y",
+"output":[["Character", "Y"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
+{"description":"Z",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":"Z",
+"output":[["Character", "Z"]]},
+
+{"description":"Z",
+"initialStates":["CDATA section state"],
+"input":"Z",
+"output":[["Character", "Z"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
+{"description":"`",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":"`",
+"output":[["Character", "`"]]},
+
+{"description":"`",
+"initialStates":["CDATA section state"],
+"input":"`",
+"output":[["Character", "`"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
+{"description":"a",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":"a",
+"output":[["Character", "a"]]},
+
+{"description":"a",
+"initialStates":["CDATA section state"],
+"input":"a",
+"output":[["Character", "a"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
+{"description":"b",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":"b",
+"output":[["Character", "b"]]},
+
+{"description":"b",
+"initialStates":["CDATA section state"],
+"input":"b",
+"output":[["Character", "b"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
+{"description":"y",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":"y",
+"output":[["Character", "y"]]},
+
+{"description":"y",
+"initialStates":["CDATA section state"],
+"input":"y",
+"output":[["Character", "y"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
+{"description":"z",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":"z",
+"output":[["Character", "z"]]},
+
+{"description":"z",
+"initialStates":["CDATA section state"],
+"input":"z",
+"output":[["Character", "z"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
+{"description":"{",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":"{",
+"output":[["Character", "{"]]},
+
+{"description":"{",
+"initialStates":["CDATA section state"],
+"input":"{",
+"output":[["Character", "{"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
+{"description":"\\uDBC0\\uDC00",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":"\uDBC0\uDC00",
+"output":[["Character", "\uDBC0\uDC00"]]},
+
+{"description":"\\uDBC0\\uDC00",
+"initialStates":["CDATA section state"],
+"input":"\uDBC0\uDC00",
+"output":[["Character", "\uDBC0\uDC00"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]}
+
+]}
diff --git a/lib/html5lib/tests/testdata/tokenizer/test4.test b/lib/html5lib/tests/testdata/tokenizer/test4.test
new file mode 100644
index 00000000..8963c747
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tokenizer/test4.test
@@ -0,0 +1,532 @@
+{"tests": [
+
+{"description":"< in attribute name",
+"input":"<z/0  <>",
+"output":[["StartTag", "z", {"0": "", "<": ""}]],
+"errors":[
+    { "code": "unexpected-solidus-in-tag", "line": 1, "col": 4 },
+    { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 7 }
+]},
+
+{"description":"< in unquoted attribute value",
+"input":"<z x=<>",
+"output":[["StartTag", "z", {"x": "<"}]],
+"errors":[
+    { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 6 }
+]},
+
+{"description":"= in unquoted attribute value",
+"input":"<z z=z=z>",
+"output":[["StartTag", "z", {"z": "z=z"}]],
+"errors":[
+    { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 7 }
+]},
+
+{"description":"= attribute",
+"input":"<z =>",
+"output":[["StartTag", "z", {"=": ""}]],
+"errors":[
+    { "code": "unexpected-equals-sign-before-attribute-name", "line": 1, "col": 4 }
+]},
+
+{"description":"== attribute",
+"input":"<z ==>",
+"output":[["StartTag", "z", {"=": ""}]],
+"errors":[
+    { "code": "unexpected-equals-sign-before-attribute-name", "line": 1, "col": 4 },
+    { "code": "missing-attribute-value", "line": 1, "col": 6 }
+]},
+
+{"description":"=== attribute",
+"input":"<z ===>",
+"output":[["StartTag", "z", {"=": "="}]],
+"errors":[
+    { "code": "unexpected-equals-sign-before-attribute-name", "line": 1, "col": 4 },
+    { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 6 }
+]},
+
+{"description":"==== attribute",
+"input":"<z ====>",
+"output":[["StartTag", "z", {"=": "=="}]],
+"errors":[
+    { "code": "unexpected-equals-sign-before-attribute-name", "line": 1, "col": 4 },
+    { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 6 },
+    { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 7 }
+]},
+
+{"description":"\" after ampersand in double-quoted attribute value",
+"input":"<z z=\"&\">",
+"output":[["StartTag", "z", {"z": "&"}]]},
+
+{"description":"' after ampersand in double-quoted attribute value",
+"input":"<z z=\"&'\">",
+"output":[["StartTag", "z", {"z": "&'"}]]},
+
+{"description":"' after ampersand in single-quoted attribute value",
+"input":"<z z='&'>",
+"output":[["StartTag", "z", {"z": "&"}]]},
+
+{"description":"\" after ampersand in single-quoted attribute value",
+"input":"<z z='&\"'>",
+"output":[["StartTag", "z", {"z": "&\""}]]},
+
+{"description":"Text after bogus character reference",
+"input":"<z z='&xlink_xmlns;'>bar<z>",
+"output":[["StartTag","z",{"z":"&xlink_xmlns;"}],["Character","bar"],["StartTag","z",{}]]},
+
+{"description":"Text after hex character reference",
+"input":"<z z='&#x0020; foo'>bar<z>",
+"output":[["StartTag","z",{"z":"  foo"}],["Character","bar"],["StartTag","z",{}]]},
+
+{"description":"Attribute name starting with \"",
+"input":"<foo \"='bar'>",
+"output":[["StartTag", "foo", {"\"": "bar"}]],
+"errors":[
+    { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 6 }
+]},
+
+{"description":"Attribute name starting with '",
+"input":"<foo '='bar'>",
+"output":[["StartTag", "foo", {"'": "bar"}]],
+"errors":[
+    { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 6 }
+]},
+
+{"description":"Attribute name containing \"",
+"input":"<foo a\"b='bar'>",
+"output":[["StartTag", "foo", {"a\"b": "bar"}]],
+"errors":[
+    { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 7 }
+]},
+
+{"description":"Attribute name containing '",
+"input":"<foo a'b='bar'>",
+"output":[["StartTag", "foo", {"a'b": "bar"}]],
+"errors":[
+    { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 7 }
+]},
+
+{"description":"Unquoted attribute value containing '",
+"input":"<foo a=b'c>",
+"output":[["StartTag", "foo", {"a": "b'c"}]],
+"errors":[
+    { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 9 }
+]},
+
+
+{"description":"Unquoted attribute value containing \"",
+"input":"<foo a=b\"c>",
+"output":[["StartTag", "foo", {"a": "b\"c"}]],
+"errors":[
+    { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 9 }
+]},
+
+{"description":"Double-quoted attribute value not followed by whitespace",
+"input":"<foo a=\"b\"c>",
+"output":[["StartTag", "foo", {"a": "b", "c": ""}]],
+"errors":[
+    { "code": "missing-whitespace-between-attributes", "line": 1, "col": 11 }
+]},
+
+{"description":"Single-quoted attribute value not followed by whitespace",
+"input":"<foo a='b'c>",
+"output":[["StartTag", "foo", {"a": "b", "c": ""}]],
+"errors":[
+    { "code": "missing-whitespace-between-attributes", "line": 1, "col": 11 }
+]},
+
+{"description":"Quoted attribute followed by permitted /",
+"input":"<br a='b'/>",
+"output":[["StartTag","br",{"a":"b"},true]]},
+
+{"description":"Quoted attribute followed by non-permitted /",
+"input":"<bar a='b'/>",
+"output":[["StartTag","bar",{"a":"b"},true]]},
+
+{"description":"CR EOF after doctype name",
+"input":"<!doctype html \r",
+"output":[["DOCTYPE", "html", null, null, false]],
+"errors":[
+    { "code": "eof-in-doctype", "line": 2, "col": 1 }
+]},
+
+{"description":"CR EOF in tag name",
+"input":"<z\r",
+"output":[],
+"errors":[
+    { "code": "eof-in-tag", "line": 2, "col": 1 }
+]},
+
+{"description":"Slash EOF in tag name",
+"input":"<z/",
+"output":[],
+"errors":[
+    { "code": "eof-in-tag", "line": 1, "col": 4 }
+]},
+
+{"description":"Zero hex numeric entity",
+"input":"&#x0",
+"output":[["Character", "\uFFFD"]],
+"errors":[
+    { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 5 },
+    { "code": "null-character-reference", "line": 1, "col": 5 }
+]},
+
+{"description":"Zero decimal numeric entity",
+"input":"&#0",
+"output":[["Character", "\uFFFD"]],
+"errors":[
+    { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 4 },
+    { "code": "null-character-reference", "line": 1, "col": 4 }
+]},
+
+{"description":"Zero-prefixed hex numeric entity",
+"input":"&#x000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000041;",
+"output":[["Character", "A"]]},
+
+{"description":"Zero-prefixed decimal numeric entity",
+"input":"&#000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000065;",
+"output":[["Character", "A"]]},
+
+{"description":"Empty hex numeric entities",
+"input":"&#x &#X ",
+"output":[["Character", "&#x &#X "]],
+"errors":[
+    { "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 4 },
+    { "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 8 }
+]},
+
+{"description":"Invalid digit in hex numeric entity",
+"input":"&#xZ",
+"output":[["Character", "&#xZ"]],
+"errors":[
+    { "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 4 }
+]},
+
+{"description":"Empty decimal numeric entities",
+"input":"&# &#; ",
+"output":[["Character", "&# &#; "]],
+"errors":[
+    { "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 3 },
+    { "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 6 }
+]},
+
+{"description":"Invalid digit in decimal numeric entity",
+"input":"&#A",
+"output":[["Character", "&#A"]],
+"errors":[
+    { "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 3 }
+]},
+
+{"description":"Non-BMP numeric entity",
+"input":"&#x10000;",
+"output":[["Character", "\uD800\uDC00"]]},
+
+{"description":"Maximum non-BMP numeric entity",
+"input":"&#X10FFFF;",
+"output":[["Character", "\uDBFF\uDFFF"]],
+"errors":[
+    { "code": "noncharacter-character-reference", "line": 1, "col": 11 }
+]},
+
+
+{"description":"Above maximum numeric entity",
+"input":"&#x110000;",
+"output":[["Character", "\uFFFD"]],
+"errors":[
+    { "code": "character-reference-outside-unicode-range", "line": 1, "col": 11 }
+]},
+
+{"description":"32-bit hex numeric entity",
+"input":"&#x80000041;",
+"output":[["Character", "\uFFFD"]],
+"errors":[
+    { "code": "character-reference-outside-unicode-range", "line": 1, "col": 13 }
+]},
+
+{"description":"33-bit hex numeric entity",
+"input":"&#x100000041;",
+"output":[["Character", "\uFFFD"]],
+"errors":[
+    { "code": "character-reference-outside-unicode-range", "line": 1, "col": 14 }
+]},
+
+{"description":"33-bit decimal numeric entity",
+"input":"&#4294967361;",
+"output":[["Character", "\uFFFD"]],
+"errors":[
+    { "code": "character-reference-outside-unicode-range", "line": 1, "col": 14 }
+]},
+
+{"description":"65-bit hex numeric entity",
+"input":"&#x10000000000000041;",
+"output":[["Character", "\uFFFD"]],
+"errors":[
+    { "code": "character-reference-outside-unicode-range", "line": 1, "col": 22 }
+]},
+
+{"description":"65-bit decimal numeric entity",
+"input":"&#18446744073709551681;",
+"output":[["Character", "\uFFFD"]],
+"errors":[
+    { "code": "character-reference-outside-unicode-range", "line": 1, "col": 24 }
+]},
+
+{"description":"Surrogate code point edge cases",
+"input":"&#xD7FF;&#xD800;&#xD801;&#xDFFE;&#xDFFF;&#xE000;",
+"output":[["Character", "\uD7FF\uFFFD\uFFFD\uFFFD\uFFFD\uE000"]],
+"errors":[
+    { "code": "surrogate-character-reference", "line": 1, "col": 17 },
+    { "code": "surrogate-character-reference", "line": 1, "col": 25 },
+    { "code": "surrogate-character-reference", "line": 1, "col": 33 },
+    { "code": "surrogate-character-reference", "line": 1, "col": 41 }
+]},
+
+{"description":"Uppercase start tag name",
+"input":"<X>",
+"output":[["StartTag", "x", {}]]},
+
+{"description":"Uppercase end tag name",
+"input":"</X>",
+"output":[["EndTag", "x"]]},
+
+{"description":"Uppercase attribute name",
+"input":"<x X>",
+"output":[["StartTag", "x", { "x":"" }]]},
+
+{"description":"Tag/attribute name case edge values",
+"input":"<x@AZ[`az{ @AZ[`az{>",
+"output":[["StartTag", "x@az[`az{", { "@az[`az{":"" }]]},
+
+{"description":"Duplicate different-case attributes",
+"input":"<x x=1 x=2 X=3>",
+"output":[["StartTag", "x", { "x":"1" }]],
+"errors":[
+    { "code": "duplicate-attribute", "line": 1, "col": 9 },
+    { "code": "duplicate-attribute", "line": 1, "col": 13 }
+]},
+
+{"description":"Uppercase close tag attributes",
+"input":"</x X>",
+"output":[["EndTag", "x"]],
+"errors":[
+    { "code": "end-tag-with-attributes", "line": 1, "col": 6 }
+]},
+
+{"description":"Duplicate close tag attributes",
+"input":"</x x x>",
+"output":[["EndTag", "x"]],
+"errors":[
+    { "code": "duplicate-attribute", "line": 1, "col": 8 },
+    { "code": "end-tag-with-attributes", "line": 1, "col": 8 }
+]},
+
+{"description":"Permitted slash",
+"input":"<br/>",
+"output":[["StartTag","br",{},true]]},
+
+{"description":"Non-permitted slash",
+"input":"<xr/>",
+"output":[["StartTag","xr",{},true]]},
+
+{"description":"Permitted slash but in close tag",
+"input":"</br/>",
+"output":[["EndTag", "br"]],
+"errors":[
+    { "code": "end-tag-with-trailing-solidus", "line": 1, "col": 6 }
+]},
+
+{"description":"Doctype public case-sensitivity (1)",
+"input":"<!DoCtYpE HtMl PuBlIc \"AbC\" \"XyZ\">",
+"output":[["DOCTYPE", "html", "AbC", "XyZ", true]]},
+
+{"description":"Doctype public case-sensitivity (2)",
+"input":"<!dOcTyPe hTmL pUbLiC \"aBc\" \"xYz\">",
+"output":[["DOCTYPE", "html", "aBc", "xYz", true]]},
+
+{"description":"Doctype system case-sensitivity (1)",
+"input":"<!DoCtYpE HtMl SyStEm \"XyZ\">",
+"output":[["DOCTYPE", "html", null, "XyZ", true]]},
+
+{"description":"Doctype system case-sensitivity (2)",
+"input":"<!dOcTyPe hTmL sYsTeM \"xYz\">",
+"output":[["DOCTYPE", "html", null, "xYz", true]]},
+
+{"description":"U+0000 in lookahead region after non-matching character",
+"input":"<!doc>\u0000",
+"output":[["Comment", "doc"], ["Character", "\u0000"]],
+"errors":[
+    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 },
+    { "code": "unexpected-null-character", "line": 1, "col": 7 }
+]},
+
+{"description":"U+0000 in lookahead region",
+"input":"<!doc\u0000",
+"output":[["Comment", "doc\uFFFD"]],
+"errors":[
+    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 },
+    { "code": "unexpected-null-character", "line": 1, "col": 6 }
+]},
+
+{"description":"U+0080 in lookahead region",
+"input":"<!doc\u0080",
+"output":[["Comment", "doc\u0080"]],
+"errors":[
+    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 },
+    { "code": "control-character-in-input-stream", "line": 1, "col": 6 }
+]},
+
+{"description":"U+FDD1 in lookahead region",
+"input":"<!doc\uFDD1",
+"output":[["Comment", "doc\uFDD1"]],
+"errors":[
+    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 },
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 6 }
+]},
+
+{"description":"U+1FFFF in lookahead region",
+"input":"<!doc\uD83F\uDFFF",
+"output":[["Comment", "doc\uD83F\uDFFF"]],
+"errors":[
+    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 },
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 6 }
+]},
+
+{"description":"CR followed by non-LF",
+"input":"\r?",
+"output":[["Character", "\n?"]]},
+
+{"description":"CR at EOF",
+"input":"\r",
+"output":[["Character", "\n"]]},
+
+{"description":"LF at EOF",
+"input":"\n",
+"output":[["Character", "\n"]]},
+
+{"description":"CR LF",
+"input":"\r\n",
+"output":[["Character", "\n"]]},
+
+{"description":"CR CR",
+"input":"\r\r",
+"output":[["Character", "\n\n"]]},
+
+{"description":"LF LF",
+"input":"\n\n",
+"output":[["Character", "\n\n"]]},
+
+{"description":"LF CR",
+"input":"\n\r",
+"output":[["Character", "\n\n"]]},
+
+{"description":"text CR CR CR text",
+"input":"text\r\r\rtext",
+"output":[["Character", "text\n\n\ntext"]]},
+
+{"description":"Doctype publik",
+"input":"<!DOCTYPE html PUBLIK \"AbC\" \"XyZ\">",
+"output":[["DOCTYPE", "html", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 16 }
+]},
+
+{"description":"Doctype publi",
+"input":"<!DOCTYPE html PUBLI",
+"output":[["DOCTYPE", "html", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 16 }
+]},
+
+{"description":"Doctype sistem",
+"input":"<!DOCTYPE html SISTEM \"AbC\">",
+"output":[["DOCTYPE", "html", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 16 }
+]},
+
+{"description":"Doctype sys",
+"input":"<!DOCTYPE html SYS",
+"output":[["DOCTYPE", "html", null, null, false]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 16 }
+]},
+
+{"description":"Doctype html x>text",
+"input":"<!DOCTYPE html x>text",
+"output":[["DOCTYPE", "html", null, null, false], ["Character", "text"]],
+"errors":[
+    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 16 }
+]},
+
+{"description":"Grave accent in unquoted attribute",
+"input":"<a a=aa`>",
+"output":[["StartTag", "a", {"a":"aa`"}]],
+"errors":[
+    { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 8 }
+]},
+
+{"description":"EOF in tag name state ",
+"input":"<a",
+"output":[],
+"errors": [
+    { "code": "eof-in-tag", "line": 1, "col": 3 }
+]},
+
+{"description":"EOF in before attribute name state",
+"input":"<a ",
+"output":[],
+"errors":[
+    { "code": "eof-in-tag", "line": 1, "col": 4 }
+]},
+
+{"description":"EOF in attribute name state",
+"input":"<a a",
+"output":[],
+"errors":[
+    { "code": "eof-in-tag", "line": 1, "col": 5 }
+]},
+
+{"description":"EOF in after attribute name state",
+"input":"<a a ",
+"output":[],
+"errors":[
+    { "code": "eof-in-tag", "line": 1, "col": 6 }
+]},
+
+{"description":"EOF in before attribute value state",
+"input":"<a a =",
+"output":[],
+"errors":[
+    { "code": "eof-in-tag", "line": 1, "col": 7 }
+]},
+
+{"description":"EOF in attribute value (double quoted) state",
+"input":"<a a =\"a",
+"output":[],
+"errors":[
+    { "code": "eof-in-tag", "line": 1, "col": 9 }
+]},
+
+{"description":"EOF in attribute value (single quoted) state",
+"input":"<a a ='a",
+"output":[],
+"errors":[
+    { "code": "eof-in-tag", "line": 1, "col": 9 }
+]},
+
+{"description":"EOF in attribute value (unquoted) state",
+"input":"<a a =a",
+"output":[],
+"errors":[
+    { "code": "eof-in-tag", "line": 1, "col": 8 }
+]},
+
+{"description":"EOF in after attribute value state",
+"input":"<a a ='a'",
+"output":[],
+"errors":[
+    { "code": "eof-in-tag", "line": 1, "col": 10 }
+]}
+
+]}
diff --git a/lib/html5lib/tests/testdata/tokenizer/unicodeChars.test b/lib/html5lib/tests/testdata/tokenizer/unicodeChars.test
new file mode 100644
index 00000000..49a80985
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tokenizer/unicodeChars.test
@@ -0,0 +1,1577 @@
+{"tests": [
+
+{"description": "Invalid Unicode character U+0001",
+"input": "\u0001",
+"output": [["Character", "\u0001"]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+0002",
+"input": "\u0002",
+"output": [["Character", "\u0002"]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+0003",
+"input": "\u0003",
+"output": [["Character", "\u0003"]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+0004",
+"input": "\u0004",
+"output": [["Character", "\u0004"]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+0005",
+"input": "\u0005",
+"output": [["Character", "\u0005"]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+0006",
+"input": "\u0006",
+"output": [["Character", "\u0006"]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+0007",
+"input": "\u0007",
+"output": [["Character", "\u0007"]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+0008",
+"input": "\u0008",
+"output": [["Character", "\u0008"]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+000B",
+"input": "\u000B",
+"output": [["Character", "\u000B"]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+000E",
+"input": "\u000E",
+"output": [["Character", "\u000E"]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+000F",
+"input": "\u000F",
+"output": [["Character", "\u000F"]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+0010",
+"input": "\u0010",
+"output": [["Character", "\u0010"]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+0011",
+"input": "\u0011",
+"output": [["Character", "\u0011"]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+0012",
+"input": "\u0012",
+"output": [["Character", "\u0012"]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+0013",
+"input": "\u0013",
+"output": [["Character", "\u0013"]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+0014",
+"input": "\u0014",
+"output": [["Character", "\u0014"]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+0015",
+"input": "\u0015",
+"output": [["Character", "\u0015"]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+0016",
+"input": "\u0016",
+"output": [["Character", "\u0016"]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+0017",
+"input": "\u0017",
+"output": [["Character", "\u0017"]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+0018",
+"input": "\u0018",
+"output": [["Character", "\u0018"]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+0019",
+"input": "\u0019",
+"output": [["Character", "\u0019"]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+001A",
+"input": "\u001A",
+"output": [["Character", "\u001A"]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+001B",
+"input": "\u001B",
+"output": [["Character", "\u001B"]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+001C",
+"input": "\u001C",
+"output": [["Character", "\u001C"]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+001D",
+"input": "\u001D",
+"output": [["Character", "\u001D"]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+001E",
+"input": "\u001E",
+"output": [["Character", "\u001E"]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+001F",
+"input": "\u001F",
+"output": [["Character", "\u001F"]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+007F",
+"input": "\u007F",
+"output": [["Character", "\u007F"]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+FDD0",
+"input": "\uFDD0",
+"output": [["Character", "\uFDD0"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+FDD1",
+"input": "\uFDD1",
+"output": [["Character", "\uFDD1"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+FDD2",
+"input": "\uFDD2",
+"output": [["Character", "\uFDD2"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+FDD3",
+"input": "\uFDD3",
+"output": [["Character", "\uFDD3"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+FDD4",
+"input": "\uFDD4",
+"output": [["Character", "\uFDD4"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+FDD5",
+"input": "\uFDD5",
+"output": [["Character", "\uFDD5"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+FDD6",
+"input": "\uFDD6",
+"output": [["Character", "\uFDD6"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+FDD7",
+"input": "\uFDD7",
+"output": [["Character", "\uFDD7"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+FDD8",
+"input": "\uFDD8",
+"output": [["Character", "\uFDD8"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+FDD9",
+"input": "\uFDD9",
+"output": [["Character", "\uFDD9"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+FDDA",
+"input": "\uFDDA",
+"output": [["Character", "\uFDDA"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+FDDB",
+"input": "\uFDDB",
+"output": [["Character", "\uFDDB"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+FDDC",
+"input": "\uFDDC",
+"output": [["Character", "\uFDDC"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+FDDD",
+"input": "\uFDDD",
+"output": [["Character", "\uFDDD"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+FDDE",
+"input": "\uFDDE",
+"output": [["Character", "\uFDDE"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+FDDF",
+"input": "\uFDDF",
+"output": [["Character", "\uFDDF"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+FDE0",
+"input": "\uFDE0",
+"output": [["Character", "\uFDE0"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+FDE1",
+"input": "\uFDE1",
+"output": [["Character", "\uFDE1"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+FDE2",
+"input": "\uFDE2",
+"output": [["Character", "\uFDE2"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+FDE3",
+"input": "\uFDE3",
+"output": [["Character", "\uFDE3"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+FDE4",
+"input": "\uFDE4",
+"output": [["Character", "\uFDE4"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+FDE5",
+"input": "\uFDE5",
+"output": [["Character", "\uFDE5"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+FDE6",
+"input": "\uFDE6",
+"output": [["Character", "\uFDE6"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+FDE7",
+"input": "\uFDE7",
+"output": [["Character", "\uFDE7"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+FDE8",
+"input": "\uFDE8",
+"output": [["Character", "\uFDE8"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+FDE9",
+"input": "\uFDE9",
+"output": [["Character", "\uFDE9"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+FDEA",
+"input": "\uFDEA",
+"output": [["Character", "\uFDEA"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+FDEB",
+"input": "\uFDEB",
+"output": [["Character", "\uFDEB"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+FDEC",
+"input": "\uFDEC",
+"output": [["Character", "\uFDEC"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+FDED",
+"input": "\uFDED",
+"output": [["Character", "\uFDED"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+FDEE",
+"input": "\uFDEE",
+"output": [["Character", "\uFDEE"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+FDEF",
+"input": "\uFDEF",
+"output": [["Character", "\uFDEF"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+FFFE",
+"input": "\uFFFE",
+"output": [["Character", "\uFFFE"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+FFFF",
+"input": "\uFFFF",
+"output": [["Character", "\uFFFF"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+1FFFE",
+"input": "\uD83F\uDFFE",
+"output": [["Character", "\uD83F\uDFFE"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+1FFFF",
+"input": "\uD83F\uDFFF",
+"output": [["Character", "\uD83F\uDFFF"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+2FFFE",
+"input": "\uD87F\uDFFE",
+"output": [["Character", "\uD87F\uDFFE"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+2FFFF",
+"input": "\uD87F\uDFFF",
+"output": [["Character", "\uD87F\uDFFF"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+3FFFE",
+"input": "\uD8BF\uDFFE",
+"output": [["Character", "\uD8BF\uDFFE"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+3FFFF",
+"input": "\uD8BF\uDFFF",
+"output": [["Character", "\uD8BF\uDFFF"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+4FFFE",
+"input": "\uD8FF\uDFFE",
+"output": [["Character", "\uD8FF\uDFFE"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+4FFFF",
+"input": "\uD8FF\uDFFF",
+"output": [["Character", "\uD8FF\uDFFF"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+5FFFE",
+"input": "\uD93F\uDFFE",
+"output": [["Character", "\uD93F\uDFFE"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+5FFFF",
+"input": "\uD93F\uDFFF",
+"output": [["Character", "\uD93F\uDFFF"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+6FFFE",
+"input": "\uD97F\uDFFE",
+"output": [["Character", "\uD97F\uDFFE"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+6FFFF",
+"input": "\uD97F\uDFFF",
+"output": [["Character", "\uD97F\uDFFF"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+7FFFE",
+"input": "\uD9BF\uDFFE",
+"output": [["Character", "\uD9BF\uDFFE"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+7FFFF",
+"input": "\uD9BF\uDFFF",
+"output": [["Character", "\uD9BF\uDFFF"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+8FFFE",
+"input": "\uD9FF\uDFFE",
+"output": [["Character", "\uD9FF\uDFFE"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+8FFFF",
+"input": "\uD9FF\uDFFF",
+"output": [["Character", "\uD9FF\uDFFF"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+9FFFE",
+"input": "\uDA3F\uDFFE",
+"output": [["Character", "\uDA3F\uDFFE"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+9FFFF",
+"input": "\uDA3F\uDFFF",
+"output": [["Character", "\uDA3F\uDFFF"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+AFFFE",
+"input": "\uDA7F\uDFFE",
+"output": [["Character", "\uDA7F\uDFFE"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+AFFFF",
+"input": "\uDA7F\uDFFF",
+"output": [["Character", "\uDA7F\uDFFF"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+BFFFE",
+"input": "\uDABF\uDFFE",
+"output": [["Character", "\uDABF\uDFFE"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+BFFFF",
+"input": "\uDABF\uDFFF",
+"output": [["Character", "\uDABF\uDFFF"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+CFFFE",
+"input": "\uDAFF\uDFFE",
+"output": [["Character", "\uDAFF\uDFFE"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+CFFFF",
+"input": "\uDAFF\uDFFF",
+"output": [["Character", "\uDAFF\uDFFF"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+DFFFE",
+"input": "\uDB3F\uDFFE",
+"output": [["Character", "\uDB3F\uDFFE"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+DFFFF",
+"input": "\uDB3F\uDFFF",
+"output": [["Character", "\uDB3F\uDFFF"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+EFFFE",
+"input": "\uDB7F\uDFFE",
+"output": [["Character", "\uDB7F\uDFFE"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+EFFFF",
+"input": "\uDB7F\uDFFF",
+"output": [["Character", "\uDB7F\uDFFF"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+FFFFE",
+"input": "\uDBBF\uDFFE",
+"output": [["Character", "\uDBBF\uDFFE"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+FFFFF",
+"input": "\uDBBF\uDFFF",
+"output": [["Character", "\uDBBF\uDFFF"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+10FFFE",
+"input": "\uDBFF\uDFFE",
+"output": [["Character", "\uDBFF\uDFFE"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+10FFFF",
+"input": "\uDBFF\uDFFF",
+"output": [["Character", "\uDBFF\uDFFF"]],
+"errors":[
+    { "code": "noncharacter-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Valid Unicode character U+0009",
+"input": "\u0009",
+"output": [["Character", "\u0009"]]},
+
+{"description": "Valid Unicode character U+000A",
+"input": "\u000A",
+"output": [["Character", "\u000A"]]},
+
+{"description": "Valid Unicode character U+0020",
+"input": "\u0020",
+"output": [["Character", "\u0020"]]},
+
+{"description": "Valid Unicode character U+0021",
+"input": "\u0021",
+"output": [["Character", "\u0021"]]},
+
+{"description": "Valid Unicode character U+0022",
+"input": "\u0022",
+"output": [["Character", "\u0022"]]},
+
+{"description": "Valid Unicode character U+0023",
+"input": "\u0023",
+"output": [["Character", "\u0023"]]},
+
+{"description": "Valid Unicode character U+0024",
+"input": "\u0024",
+"output": [["Character", "\u0024"]]},
+
+{"description": "Valid Unicode character U+0025",
+"input": "\u0025",
+"output": [["Character", "\u0025"]]},
+
+{"description": "Valid Unicode character U+0026",
+"input": "\u0026",
+"output": [["Character", "\u0026"]]},
+
+{"description": "Valid Unicode character U+0027",
+"input": "\u0027",
+"output": [["Character", "\u0027"]]},
+
+{"description": "Valid Unicode character U+0028",
+"input": "\u0028",
+"output": [["Character", "\u0028"]]},
+
+{"description": "Valid Unicode character U+0029",
+"input": "\u0029",
+"output": [["Character", "\u0029"]]},
+
+{"description": "Valid Unicode character U+002A",
+"input": "\u002A",
+"output": [["Character", "\u002A"]]},
+
+{"description": "Valid Unicode character U+002B",
+"input": "\u002B",
+"output": [["Character", "\u002B"]]},
+
+{"description": "Valid Unicode character U+002C",
+"input": "\u002C",
+"output": [["Character", "\u002C"]]},
+
+{"description": "Valid Unicode character U+002D",
+"input": "\u002D",
+"output": [["Character", "\u002D"]]},
+
+{"description": "Valid Unicode character U+002E",
+"input": "\u002E",
+"output": [["Character", "\u002E"]]},
+
+{"description": "Valid Unicode character U+002F",
+"input": "\u002F",
+"output": [["Character", "\u002F"]]},
+
+{"description": "Valid Unicode character U+0030",
+"input": "\u0030",
+"output": [["Character", "\u0030"]]},
+
+{"description": "Valid Unicode character U+0031",
+"input": "\u0031",
+"output": [["Character", "\u0031"]]},
+
+{"description": "Valid Unicode character U+0032",
+"input": "\u0032",
+"output": [["Character", "\u0032"]]},
+
+{"description": "Valid Unicode character U+0033",
+"input": "\u0033",
+"output": [["Character", "\u0033"]]},
+
+{"description": "Valid Unicode character U+0034",
+"input": "\u0034",
+"output": [["Character", "\u0034"]]},
+
+{"description": "Valid Unicode character U+0035",
+"input": "\u0035",
+"output": [["Character", "\u0035"]]},
+
+{"description": "Valid Unicode character U+0036",
+"input": "\u0036",
+"output": [["Character", "\u0036"]]},
+
+{"description": "Valid Unicode character U+0037",
+"input": "\u0037",
+"output": [["Character", "\u0037"]]},
+
+{"description": "Valid Unicode character U+0038",
+"input": "\u0038",
+"output": [["Character", "\u0038"]]},
+
+{"description": "Valid Unicode character U+0039",
+"input": "\u0039",
+"output": [["Character", "\u0039"]]},
+
+{"description": "Valid Unicode character U+003A",
+"input": "\u003A",
+"output": [["Character", "\u003A"]]},
+
+{"description": "Valid Unicode character U+003B",
+"input": "\u003B",
+"output": [["Character", "\u003B"]]},
+
+{"description": "Valid Unicode character U+003D",
+"input": "\u003D",
+"output": [["Character", "\u003D"]]},
+
+{"description": "Valid Unicode character U+003E",
+"input": "\u003E",
+"output": [["Character", "\u003E"]]},
+
+{"description": "Valid Unicode character U+003F",
+"input": "\u003F",
+"output": [["Character", "\u003F"]]},
+
+{"description": "Valid Unicode character U+0040",
+"input": "\u0040",
+"output": [["Character", "\u0040"]]},
+
+{"description": "Valid Unicode character U+0041",
+"input": "\u0041",
+"output": [["Character", "\u0041"]]},
+
+{"description": "Valid Unicode character U+0042",
+"input": "\u0042",
+"output": [["Character", "\u0042"]]},
+
+{"description": "Valid Unicode character U+0043",
+"input": "\u0043",
+"output": [["Character", "\u0043"]]},
+
+{"description": "Valid Unicode character U+0044",
+"input": "\u0044",
+"output": [["Character", "\u0044"]]},
+
+{"description": "Valid Unicode character U+0045",
+"input": "\u0045",
+"output": [["Character", "\u0045"]]},
+
+{"description": "Valid Unicode character U+0046",
+"input": "\u0046",
+"output": [["Character", "\u0046"]]},
+
+{"description": "Valid Unicode character U+0047",
+"input": "\u0047",
+"output": [["Character", "\u0047"]]},
+
+{"description": "Valid Unicode character U+0048",
+"input": "\u0048",
+"output": [["Character", "\u0048"]]},
+
+{"description": "Valid Unicode character U+0049",
+"input": "\u0049",
+"output": [["Character", "\u0049"]]},
+
+{"description": "Valid Unicode character U+004A",
+"input": "\u004A",
+"output": [["Character", "\u004A"]]},
+
+{"description": "Valid Unicode character U+004B",
+"input": "\u004B",
+"output": [["Character", "\u004B"]]},
+
+{"description": "Valid Unicode character U+004C",
+"input": "\u004C",
+"output": [["Character", "\u004C"]]},
+
+{"description": "Valid Unicode character U+004D",
+"input": "\u004D",
+"output": [["Character", "\u004D"]]},
+
+{"description": "Valid Unicode character U+004E",
+"input": "\u004E",
+"output": [["Character", "\u004E"]]},
+
+{"description": "Valid Unicode character U+004F",
+"input": "\u004F",
+"output": [["Character", "\u004F"]]},
+
+{"description": "Valid Unicode character U+0050",
+"input": "\u0050",
+"output": [["Character", "\u0050"]]},
+
+{"description": "Valid Unicode character U+0051",
+"input": "\u0051",
+"output": [["Character", "\u0051"]]},
+
+{"description": "Valid Unicode character U+0052",
+"input": "\u0052",
+"output": [["Character", "\u0052"]]},
+
+{"description": "Valid Unicode character U+0053",
+"input": "\u0053",
+"output": [["Character", "\u0053"]]},
+
+{"description": "Valid Unicode character U+0054",
+"input": "\u0054",
+"output": [["Character", "\u0054"]]},
+
+{"description": "Valid Unicode character U+0055",
+"input": "\u0055",
+"output": [["Character", "\u0055"]]},
+
+{"description": "Valid Unicode character U+0056",
+"input": "\u0056",
+"output": [["Character", "\u0056"]]},
+
+{"description": "Valid Unicode character U+0057",
+"input": "\u0057",
+"output": [["Character", "\u0057"]]},
+
+{"description": "Valid Unicode character U+0058",
+"input": "\u0058",
+"output": [["Character", "\u0058"]]},
+
+{"description": "Valid Unicode character U+0059",
+"input": "\u0059",
+"output": [["Character", "\u0059"]]},
+
+{"description": "Valid Unicode character U+005A",
+"input": "\u005A",
+"output": [["Character", "\u005A"]]},
+
+{"description": "Valid Unicode character U+005B",
+"input": "\u005B",
+"output": [["Character", "\u005B"]]},
+
+{"description": "Valid Unicode character U+005C",
+"input": "\u005C",
+"output": [["Character", "\u005C"]]},
+
+{"description": "Valid Unicode character U+005D",
+"input": "\u005D",
+"output": [["Character", "\u005D"]]},
+
+{"description": "Valid Unicode character U+005E",
+"input": "\u005E",
+"output": [["Character", "\u005E"]]},
+
+{"description": "Valid Unicode character U+005F",
+"input": "\u005F",
+"output": [["Character", "\u005F"]]},
+
+{"description": "Valid Unicode character U+0060",
+"input": "\u0060",
+"output": [["Character", "\u0060"]]},
+
+{"description": "Valid Unicode character U+0061",
+"input": "\u0061",
+"output": [["Character", "\u0061"]]},
+
+{"description": "Valid Unicode character U+0062",
+"input": "\u0062",
+"output": [["Character", "\u0062"]]},
+
+{"description": "Valid Unicode character U+0063",
+"input": "\u0063",
+"output": [["Character", "\u0063"]]},
+
+{"description": "Valid Unicode character U+0064",
+"input": "\u0064",
+"output": [["Character", "\u0064"]]},
+
+{"description": "Valid Unicode character U+0065",
+"input": "\u0065",
+"output": [["Character", "\u0065"]]},
+
+{"description": "Valid Unicode character U+0066",
+"input": "\u0066",
+"output": [["Character", "\u0066"]]},
+
+{"description": "Valid Unicode character U+0067",
+"input": "\u0067",
+"output": [["Character", "\u0067"]]},
+
+{"description": "Valid Unicode character U+0068",
+"input": "\u0068",
+"output": [["Character", "\u0068"]]},
+
+{"description": "Valid Unicode character U+0069",
+"input": "\u0069",
+"output": [["Character", "\u0069"]]},
+
+{"description": "Valid Unicode character U+006A",
+"input": "\u006A",
+"output": [["Character", "\u006A"]]},
+
+{"description": "Valid Unicode character U+006B",
+"input": "\u006B",
+"output": [["Character", "\u006B"]]},
+
+{"description": "Valid Unicode character U+006C",
+"input": "\u006C",
+"output": [["Character", "\u006C"]]},
+
+{"description": "Valid Unicode character U+006D",
+"input": "\u006D",
+"output": [["Character", "\u006D"]]},
+
+{"description": "Valid Unicode character U+006E",
+"input": "\u006E",
+"output": [["Character", "\u006E"]]},
+
+{"description": "Valid Unicode character U+006F",
+"input": "\u006F",
+"output": [["Character", "\u006F"]]},
+
+{"description": "Valid Unicode character U+0070",
+"input": "\u0070",
+"output": [["Character", "\u0070"]]},
+
+{"description": "Valid Unicode character U+0071",
+"input": "\u0071",
+"output": [["Character", "\u0071"]]},
+
+{"description": "Valid Unicode character U+0072",
+"input": "\u0072",
+"output": [["Character", "\u0072"]]},
+
+{"description": "Valid Unicode character U+0073",
+"input": "\u0073",
+"output": [["Character", "\u0073"]]},
+
+{"description": "Valid Unicode character U+0074",
+"input": "\u0074",
+"output": [["Character", "\u0074"]]},
+
+{"description": "Valid Unicode character U+0075",
+"input": "\u0075",
+"output": [["Character", "\u0075"]]},
+
+{"description": "Valid Unicode character U+0076",
+"input": "\u0076",
+"output": [["Character", "\u0076"]]},
+
+{"description": "Valid Unicode character U+0077",
+"input": "\u0077",
+"output": [["Character", "\u0077"]]},
+
+{"description": "Valid Unicode character U+0078",
+"input": "\u0078",
+"output": [["Character", "\u0078"]]},
+
+{"description": "Valid Unicode character U+0079",
+"input": "\u0079",
+"output": [["Character", "\u0079"]]},
+
+{"description": "Valid Unicode character U+007A",
+"input": "\u007A",
+"output": [["Character", "\u007A"]]},
+
+{"description": "Valid Unicode character U+007B",
+"input": "\u007B",
+"output": [["Character", "\u007B"]]},
+
+{"description": "Valid Unicode character U+007C",
+"input": "\u007C",
+"output": [["Character", "\u007C"]]},
+
+{"description": "Valid Unicode character U+007D",
+"input": "\u007D",
+"output": [["Character", "\u007D"]]},
+
+{"description": "Valid Unicode character U+007E",
+"input": "\u007E",
+"output": [["Character", "\u007E"]]},
+
+{"description": "Valid Unicode character U+00A0",
+"input": "\u00A0",
+"output": [["Character", "\u00A0"]]},
+
+{"description": "Valid Unicode character U+00A1",
+"input": "\u00A1",
+"output": [["Character", "\u00A1"]]},
+
+{"description": "Valid Unicode character U+00A2",
+"input": "\u00A2",
+"output": [["Character", "\u00A2"]]},
+
+{"description": "Valid Unicode character U+00A3",
+"input": "\u00A3",
+"output": [["Character", "\u00A3"]]},
+
+{"description": "Valid Unicode character U+00A4",
+"input": "\u00A4",
+"output": [["Character", "\u00A4"]]},
+
+{"description": "Valid Unicode character U+00A5",
+"input": "\u00A5",
+"output": [["Character", "\u00A5"]]},
+
+{"description": "Valid Unicode character U+00A6",
+"input": "\u00A6",
+"output": [["Character", "\u00A6"]]},
+
+{"description": "Valid Unicode character U+00A7",
+"input": "\u00A7",
+"output": [["Character", "\u00A7"]]},
+
+{"description": "Valid Unicode character U+00A8",
+"input": "\u00A8",
+"output": [["Character", "\u00A8"]]},
+
+{"description": "Valid Unicode character U+00A9",
+"input": "\u00A9",
+"output": [["Character", "\u00A9"]]},
+
+{"description": "Valid Unicode character U+00AA",
+"input": "\u00AA",
+"output": [["Character", "\u00AA"]]},
+
+{"description": "Valid Unicode character U+00AB",
+"input": "\u00AB",
+"output": [["Character", "\u00AB"]]},
+
+{"description": "Valid Unicode character U+00AC",
+"input": "\u00AC",
+"output": [["Character", "\u00AC"]]},
+
+{"description": "Valid Unicode character U+00AD",
+"input": "\u00AD",
+"output": [["Character", "\u00AD"]]},
+
+{"description": "Valid Unicode character U+00AE",
+"input": "\u00AE",
+"output": [["Character", "\u00AE"]]},
+
+{"description": "Valid Unicode character U+00AF",
+"input": "\u00AF",
+"output": [["Character", "\u00AF"]]},
+
+{"description": "Valid Unicode character U+00B0",
+"input": "\u00B0",
+"output": [["Character", "\u00B0"]]},
+
+{"description": "Valid Unicode character U+00B1",
+"input": "\u00B1",
+"output": [["Character", "\u00B1"]]},
+
+{"description": "Valid Unicode character U+00B2",
+"input": "\u00B2",
+"output": [["Character", "\u00B2"]]},
+
+{"description": "Valid Unicode character U+00B3",
+"input": "\u00B3",
+"output": [["Character", "\u00B3"]]},
+
+{"description": "Valid Unicode character U+00B4",
+"input": "\u00B4",
+"output": [["Character", "\u00B4"]]},
+
+{"description": "Valid Unicode character U+00B5",
+"input": "\u00B5",
+"output": [["Character", "\u00B5"]]},
+
+{"description": "Valid Unicode character U+00B6",
+"input": "\u00B6",
+"output": [["Character", "\u00B6"]]},
+
+{"description": "Valid Unicode character U+00B7",
+"input": "\u00B7",
+"output": [["Character", "\u00B7"]]},
+
+{"description": "Valid Unicode character U+00B8",
+"input": "\u00B8",
+"output": [["Character", "\u00B8"]]},
+
+{"description": "Valid Unicode character U+00B9",
+"input": "\u00B9",
+"output": [["Character", "\u00B9"]]},
+
+{"description": "Valid Unicode character U+00BA",
+"input": "\u00BA",
+"output": [["Character", "\u00BA"]]},
+
+{"description": "Valid Unicode character U+00BB",
+"input": "\u00BB",
+"output": [["Character", "\u00BB"]]},
+
+{"description": "Valid Unicode character U+00BC",
+"input": "\u00BC",
+"output": [["Character", "\u00BC"]]},
+
+{"description": "Valid Unicode character U+00BD",
+"input": "\u00BD",
+"output": [["Character", "\u00BD"]]},
+
+{"description": "Valid Unicode character U+00BE",
+"input": "\u00BE",
+"output": [["Character", "\u00BE"]]},
+
+{"description": "Valid Unicode character U+00BF",
+"input": "\u00BF",
+"output": [["Character", "\u00BF"]]},
+
+{"description": "Valid Unicode character U+00C0",
+"input": "\u00C0",
+"output": [["Character", "\u00C0"]]},
+
+{"description": "Valid Unicode character U+00C1",
+"input": "\u00C1",
+"output": [["Character", "\u00C1"]]},
+
+{"description": "Valid Unicode character U+00C2",
+"input": "\u00C2",
+"output": [["Character", "\u00C2"]]},
+
+{"description": "Valid Unicode character U+00C3",
+"input": "\u00C3",
+"output": [["Character", "\u00C3"]]},
+
+{"description": "Valid Unicode character U+00C4",
+"input": "\u00C4",
+"output": [["Character", "\u00C4"]]},
+
+{"description": "Valid Unicode character U+00C5",
+"input": "\u00C5",
+"output": [["Character", "\u00C5"]]},
+
+{"description": "Valid Unicode character U+00C6",
+"input": "\u00C6",
+"output": [["Character", "\u00C6"]]},
+
+{"description": "Valid Unicode character U+00C7",
+"input": "\u00C7",
+"output": [["Character", "\u00C7"]]},
+
+{"description": "Valid Unicode character U+00C8",
+"input": "\u00C8",
+"output": [["Character", "\u00C8"]]},
+
+{"description": "Valid Unicode character U+00C9",
+"input": "\u00C9",
+"output": [["Character", "\u00C9"]]},
+
+{"description": "Valid Unicode character U+00CA",
+"input": "\u00CA",
+"output": [["Character", "\u00CA"]]},
+
+{"description": "Valid Unicode character U+00CB",
+"input": "\u00CB",
+"output": [["Character", "\u00CB"]]},
+
+{"description": "Valid Unicode character U+00CC",
+"input": "\u00CC",
+"output": [["Character", "\u00CC"]]},
+
+{"description": "Valid Unicode character U+00CD",
+"input": "\u00CD",
+"output": [["Character", "\u00CD"]]},
+
+{"description": "Valid Unicode character U+00CE",
+"input": "\u00CE",
+"output": [["Character", "\u00CE"]]},
+
+{"description": "Valid Unicode character U+00CF",
+"input": "\u00CF",
+"output": [["Character", "\u00CF"]]},
+
+{"description": "Valid Unicode character U+00D0",
+"input": "\u00D0",
+"output": [["Character", "\u00D0"]]},
+
+{"description": "Valid Unicode character U+00D1",
+"input": "\u00D1",
+"output": [["Character", "\u00D1"]]},
+
+{"description": "Valid Unicode character U+00D2",
+"input": "\u00D2",
+"output": [["Character", "\u00D2"]]},
+
+{"description": "Valid Unicode character U+00D3",
+"input": "\u00D3",
+"output": [["Character", "\u00D3"]]},
+
+{"description": "Valid Unicode character U+00D4",
+"input": "\u00D4",
+"output": [["Character", "\u00D4"]]},
+
+{"description": "Valid Unicode character U+00D5",
+"input": "\u00D5",
+"output": [["Character", "\u00D5"]]},
+
+{"description": "Valid Unicode character U+00D6",
+"input": "\u00D6",
+"output": [["Character", "\u00D6"]]},
+
+{"description": "Valid Unicode character U+00D7",
+"input": "\u00D7",
+"output": [["Character", "\u00D7"]]},
+
+{"description": "Valid Unicode character U+00D8",
+"input": "\u00D8",
+"output": [["Character", "\u00D8"]]},
+
+{"description": "Valid Unicode character U+00D9",
+"input": "\u00D9",
+"output": [["Character", "\u00D9"]]},
+
+{"description": "Valid Unicode character U+00DA",
+"input": "\u00DA",
+"output": [["Character", "\u00DA"]]},
+
+{"description": "Valid Unicode character U+00DB",
+"input": "\u00DB",
+"output": [["Character", "\u00DB"]]},
+
+{"description": "Valid Unicode character U+00DC",
+"input": "\u00DC",
+"output": [["Character", "\u00DC"]]},
+
+{"description": "Valid Unicode character U+00DD",
+"input": "\u00DD",
+"output": [["Character", "\u00DD"]]},
+
+{"description": "Valid Unicode character U+00DE",
+"input": "\u00DE",
+"output": [["Character", "\u00DE"]]},
+
+{"description": "Valid Unicode character U+00DF",
+"input": "\u00DF",
+"output": [["Character", "\u00DF"]]},
+
+{"description": "Valid Unicode character U+00E0",
+"input": "\u00E0",
+"output": [["Character", "\u00E0"]]},
+
+{"description": "Valid Unicode character U+00E1",
+"input": "\u00E1",
+"output": [["Character", "\u00E1"]]},
+
+{"description": "Valid Unicode character U+00E2",
+"input": "\u00E2",
+"output": [["Character", "\u00E2"]]},
+
+{"description": "Valid Unicode character U+00E3",
+"input": "\u00E3",
+"output": [["Character", "\u00E3"]]},
+
+{"description": "Valid Unicode character U+00E4",
+"input": "\u00E4",
+"output": [["Character", "\u00E4"]]},
+
+{"description": "Valid Unicode character U+00E5",
+"input": "\u00E5",
+"output": [["Character", "\u00E5"]]},
+
+{"description": "Valid Unicode character U+00E6",
+"input": "\u00E6",
+"output": [["Character", "\u00E6"]]},
+
+{"description": "Valid Unicode character U+00E7",
+"input": "\u00E7",
+"output": [["Character", "\u00E7"]]},
+
+{"description": "Valid Unicode character U+00E8",
+"input": "\u00E8",
+"output": [["Character", "\u00E8"]]},
+
+{"description": "Valid Unicode character U+00E9",
+"input": "\u00E9",
+"output": [["Character", "\u00E9"]]},
+
+{"description": "Valid Unicode character U+00EA",
+"input": "\u00EA",
+"output": [["Character", "\u00EA"]]},
+
+{"description": "Valid Unicode character U+00EB",
+"input": "\u00EB",
+"output": [["Character", "\u00EB"]]},
+
+{"description": "Valid Unicode character U+00EC",
+"input": "\u00EC",
+"output": [["Character", "\u00EC"]]},
+
+{"description": "Valid Unicode character U+00ED",
+"input": "\u00ED",
+"output": [["Character", "\u00ED"]]},
+
+{"description": "Valid Unicode character U+00EE",
+"input": "\u00EE",
+"output": [["Character", "\u00EE"]]},
+
+{"description": "Valid Unicode character U+00EF",
+"input": "\u00EF",
+"output": [["Character", "\u00EF"]]},
+
+{"description": "Valid Unicode character U+00F0",
+"input": "\u00F0",
+"output": [["Character", "\u00F0"]]},
+
+{"description": "Valid Unicode character U+00F1",
+"input": "\u00F1",
+"output": [["Character", "\u00F1"]]},
+
+{"description": "Valid Unicode character U+00F2",
+"input": "\u00F2",
+"output": [["Character", "\u00F2"]]},
+
+{"description": "Valid Unicode character U+00F3",
+"input": "\u00F3",
+"output": [["Character", "\u00F3"]]},
+
+{"description": "Valid Unicode character U+00F4",
+"input": "\u00F4",
+"output": [["Character", "\u00F4"]]},
+
+{"description": "Valid Unicode character U+00F5",
+"input": "\u00F5",
+"output": [["Character", "\u00F5"]]},
+
+{"description": "Valid Unicode character U+00F6",
+"input": "\u00F6",
+"output": [["Character", "\u00F6"]]},
+
+{"description": "Valid Unicode character U+00F7",
+"input": "\u00F7",
+"output": [["Character", "\u00F7"]]},
+
+{"description": "Valid Unicode character U+00F8",
+"input": "\u00F8",
+"output": [["Character", "\u00F8"]]},
+
+{"description": "Valid Unicode character U+00F9",
+"input": "\u00F9",
+"output": [["Character", "\u00F9"]]},
+
+{"description": "Valid Unicode character U+00FA",
+"input": "\u00FA",
+"output": [["Character", "\u00FA"]]},
+
+{"description": "Valid Unicode character U+00FB",
+"input": "\u00FB",
+"output": [["Character", "\u00FB"]]},
+
+{"description": "Valid Unicode character U+00FC",
+"input": "\u00FC",
+"output": [["Character", "\u00FC"]]},
+
+{"description": "Valid Unicode character U+00FD",
+"input": "\u00FD",
+"output": [["Character", "\u00FD"]]},
+
+{"description": "Valid Unicode character U+00FE",
+"input": "\u00FE",
+"output": [["Character", "\u00FE"]]},
+
+{"description": "Valid Unicode character U+00FF",
+"input": "\u00FF",
+"output": [["Character", "\u00FF"]]},
+
+{"description": "Valid Unicode character U+D7FF",
+"input": "\uD7FF",
+"output": [["Character", "\uD7FF"]]},
+
+{"description": "Valid Unicode character U+E000",
+"input": "\uE000",
+"output": [["Character", "\uE000"]]},
+
+{"description": "Valid Unicode character U+FDCF",
+"input": "\uFDCF",
+"output": [["Character", "\uFDCF"]]},
+
+{"description": "Valid Unicode character U+FDF0",
+"input": "\uFDF0",
+"output": [["Character", "\uFDF0"]]},
+
+{"description": "Valid Unicode character U+FFFD",
+"input": "\uFFFD",
+"output": [["Character", "\uFFFD"]]},
+
+{"description": "Valid Unicode character U+10000",
+"input": "\uD800\uDC00",
+"output": [["Character", "\uD800\uDC00"]]},
+
+{"description": "Valid Unicode character U+1FFFD",
+"input": "\uD83F\uDFFD",
+"output": [["Character", "\uD83F\uDFFD"]]},
+
+{"description": "Valid Unicode character U+20000",
+"input": "\uD840\uDC00",
+"output": [["Character", "\uD840\uDC00"]]},
+
+{"description": "Valid Unicode character U+2FFFD",
+"input": "\uD87F\uDFFD",
+"output": [["Character", "\uD87F\uDFFD"]]},
+
+{"description": "Valid Unicode character U+30000",
+"input": "\uD880\uDC00",
+"output": [["Character", "\uD880\uDC00"]]},
+
+{"description": "Valid Unicode character U+3FFFD",
+"input": "\uD8BF\uDFFD",
+"output": [["Character", "\uD8BF\uDFFD"]]},
+
+{"description": "Valid Unicode character U+40000",
+"input": "\uD8C0\uDC00",
+"output": [["Character", "\uD8C0\uDC00"]]},
+
+{"description": "Valid Unicode character U+4FFFD",
+"input": "\uD8FF\uDFFD",
+"output": [["Character", "\uD8FF\uDFFD"]]},
+
+{"description": "Valid Unicode character U+50000",
+"input": "\uD900\uDC00",
+"output": [["Character", "\uD900\uDC00"]]},
+
+{"description": "Valid Unicode character U+5FFFD",
+"input": "\uD93F\uDFFD",
+"output": [["Character", "\uD93F\uDFFD"]]},
+
+{"description": "Valid Unicode character U+60000",
+"input": "\uD940\uDC00",
+"output": [["Character", "\uD940\uDC00"]]},
+
+{"description": "Valid Unicode character U+6FFFD",
+"input": "\uD97F\uDFFD",
+"output": [["Character", "\uD97F\uDFFD"]]},
+
+{"description": "Valid Unicode character U+70000",
+"input": "\uD980\uDC00",
+"output": [["Character", "\uD980\uDC00"]]},
+
+{"description": "Valid Unicode character U+7FFFD",
+"input": "\uD9BF\uDFFD",
+"output": [["Character", "\uD9BF\uDFFD"]]},
+
+{"description": "Valid Unicode character U+80000",
+"input": "\uD9C0\uDC00",
+"output": [["Character", "\uD9C0\uDC00"]]},
+
+{"description": "Valid Unicode character U+8FFFD",
+"input": "\uD9FF\uDFFD",
+"output": [["Character", "\uD9FF\uDFFD"]]},
+
+{"description": "Valid Unicode character U+90000",
+"input": "\uDA00\uDC00",
+"output": [["Character", "\uDA00\uDC00"]]},
+
+{"description": "Valid Unicode character U+9FFFD",
+"input": "\uDA3F\uDFFD",
+"output": [["Character", "\uDA3F\uDFFD"]]},
+
+{"description": "Valid Unicode character U+A0000",
+"input": "\uDA40\uDC00",
+"output": [["Character", "\uDA40\uDC00"]]},
+
+{"description": "Valid Unicode character U+AFFFD",
+"input": "\uDA7F\uDFFD",
+"output": [["Character", "\uDA7F\uDFFD"]]},
+
+{"description": "Valid Unicode character U+B0000",
+"input": "\uDA80\uDC00",
+"output": [["Character", "\uDA80\uDC00"]]},
+
+{"description": "Valid Unicode character U+BFFFD",
+"input": "\uDABF\uDFFD",
+"output": [["Character", "\uDABF\uDFFD"]]},
+
+{"description": "Valid Unicode character U+C0000",
+"input": "\uDAC0\uDC00",
+"output": [["Character", "\uDAC0\uDC00"]]},
+
+{"description": "Valid Unicode character U+CFFFD",
+"input": "\uDAFF\uDFFD",
+"output": [["Character", "\uDAFF\uDFFD"]]},
+
+{"description": "Valid Unicode character U+D0000",
+"input": "\uDB00\uDC00",
+"output": [["Character", "\uDB00\uDC00"]]},
+
+{"description": "Valid Unicode character U+DFFFD",
+"input": "\uDB3F\uDFFD",
+"output": [["Character", "\uDB3F\uDFFD"]]},
+
+{"description": "Valid Unicode character U+E0000",
+"input": "\uDB40\uDC00",
+"output": [["Character", "\uDB40\uDC00"]]},
+
+{"description": "Valid Unicode character U+EFFFD",
+"input": "\uDB7F\uDFFD",
+"output": [["Character", "\uDB7F\uDFFD"]]},
+
+{"description": "Valid Unicode character U+F0000",
+"input": "\uDB80\uDC00",
+"output": [["Character", "\uDB80\uDC00"]]},
+
+{"description": "Valid Unicode character U+FFFFD",
+"input": "\uDBBF\uDFFD",
+"output": [["Character", "\uDBBF\uDFFD"]]},
+
+{"description": "Valid Unicode character U+100000",
+"input": "\uDBC0\uDC00",
+"output": [["Character", "\uDBC0\uDC00"]]},
+
+{"description": "Valid Unicode character U+10FFFD",
+"input": "\uDBFF\uDFFD",
+"output": [["Character", "\uDBFF\uDFFD"]]}
+
+]}
diff --git a/lib/html5lib/tests/testdata/tokenizer/unicodeCharsProblematic.test b/lib/html5lib/tests/testdata/tokenizer/unicodeCharsProblematic.test
new file mode 100644
index 00000000..3ddb96c0
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tokenizer/unicodeCharsProblematic.test
@@ -0,0 +1,41 @@
+{"tests" : [
+{"description": "Invalid Unicode character U+DFFF",
+"doubleEscaped":true,
+"input": "\\uDFFF",
+"output":[["Character", "\\uDFFF"]],
+"errors":[
+    { "code": "surrogate-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+D800",
+"doubleEscaped":true,
+"input": "\\uD800",
+"output":[["Character", "\\uD800"]],
+"errors":[
+    { "code": "surrogate-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description": "Invalid Unicode character U+DFFF with valid preceding character",
+"doubleEscaped":true,
+"input": "a\\uDFFF",
+"output":[["Character", "a\\uDFFF"]],
+"errors":[
+    { "code": "surrogate-in-input-stream", "line": 1, "col": 2 }
+]},
+
+{"description": "Invalid Unicode character U+D800 with valid following character",
+"doubleEscaped":true,
+"input": "\\uD800a",
+"output":[["Character", "\\uD800a"]],
+"errors":[
+    { "code": "surrogate-in-input-stream", "line": 1, "col": 1 }
+]},
+
+{"description":"CR followed by U+0000",
+"input":"\r\u0000",
+"output":[["Character", "\n\u0000"]],
+"errors":[
+    { "code": "unexpected-null-character", "line": 2, "col": 1 }
+]}
+]
+}
\ No newline at end of file
diff --git a/lib/html5lib/tests/testdata/tokenizer/xmlViolation.test b/lib/html5lib/tests/testdata/tokenizer/xmlViolation.test
new file mode 100644
index 00000000..da6159e2
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tokenizer/xmlViolation.test
@@ -0,0 +1,20 @@
+{"xmlViolationTests": [
+
+{"description":"Non-XML character",
+"input":"a\uFFFFb",
+"output":[["Character","a\uFFFDb"]]},
+
+{"description":"Non-XML space",
+"input":"a\u000Cb",
+"output":[["Character","a b"]]},
+
+{"description":"Double hyphen in comment",
+"input":"<!-- foo -- bar -->",
+"output":[["Comment"," foo - - bar "]]},
+
+{"description":"FF between attributes",
+"input":"<a b=''\u000Cc=''>",
+"output":[["StartTag","a",{"b":"","c":""}]]}
+]}
+
+
diff --git a/lib/html5lib/tests/testdata/tree-construction/README.md b/lib/html5lib/tests/testdata/tree-construction/README.md
new file mode 100644
index 00000000..4737a3a8
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/README.md
@@ -0,0 +1,108 @@
+Tree Construction Tests
+=======================
+
+Each file containing tree construction tests consists of any number of
+tests separated by two newlines (LF) and a single newline before the end
+of the file. For instance:
+
+    [TEST]LF
+    LF
+    [TEST]LF
+    LF
+    [TEST]LF
+
+Where [TEST] is the following format:
+
+Each test must begin with a string "\#data" followed by a newline (LF).
+All subsequent lines until a line that says "\#errors" are the test data
+and must be passed to the system being tested unchanged, except with the
+final newline (on the last line) removed.
+
+Then there must be a line that says "\#errors". It must be followed by
+one line per parse error that a conformant checker would return. It
+doesn't matter what those lines are, although they can't be
+"\#new-errors", "\#document-fragment", "\#document", "\#script-off",
+"\#script-on", or empty, the only thing that matters is that there be
+the right number of parse errors.
+
+Then there \*may\* be a line that says "\#new-errors", which works like
+the "\#errors" section adding more errors to the expected number of
+errors.
+
+Then there \*may\* be a line that says "\#document-fragment", which must
+be followed by a newline (LF), followed by a string of characters that
+indicates the context element, followed by a newline (LF). If the string 
+of characters starts with "svg ", the context element is in the SVG
+namespace and the substring after "svg " is the local name. If the
+string of characters starts with "math ", the context element is in the
+MathML namespace and the substring after "math " is the local name.
+Otherwise, the context element is in the HTML namespace and the string
+is the local name. If this line is present the "\#data" must be parsed
+using the HTML fragment parsing algorithm with the context element as
+context.
+
+Then there \*may\* be a line that says "\#script-off" or
+"\#script-on". If a line that says "\#script-off" is present, the
+parser must set the scripting flag to disabled. If a line that says
+"\#script-on" is present, it must set it to enabled. Otherwise, the
+test should be run in both modes.
+
+Then there must be a line that says "\#document", which must be followed
+by a dump of the tree of the parsed DOM. Each node must be represented
+by a single line. Each line must start with "| ", followed by two spaces
+per parent node that the node has before the root document node.
+
+-   Element nodes must be represented by a "`<`" then the *tag name
+    string* "`>`", and all the attributes must be given, sorted
+    lexicographically by UTF-16 code unit according to their *attribute
+    name string*, on subsequent lines, as if they were children of the
+    element node.
+-   Attribute nodes must have the *attribute name string*, then an "="
+    sign, then the attribute value in double quotes (").
+-   Text nodes must be the string, in double quotes. Newlines aren't
+    escaped.
+-   Comments must be "`<`" then "`!-- `" then the data then "` -->`".
+-   DOCTYPEs must be "`<!DOCTYPE `" then the name then if either of the
+    system id or public id is non-empty a space, public id in
+    double-quotes, another space an the system id in double-quotes, and
+    then in any case "`>`".
+-   Processing instructions must be "`<?`", then the target, then a
+    space, then the data and then "`>`". (The HTML parser cannot emit
+    processing instructions, but scripts can, and the WebVTT to DOM
+    rules can emit them.)
+-   Template contents are represented by the string "content" with the
+    children below it.
+
+The *tag name string* is the local name prefixed by a namespace
+designator. For the HTML namespace, the namespace designator is the
+empty string, i.e. there's no prefix. For the SVG namespace, the
+namespace designator is "svg ". For the MathML namespace, the namespace
+designator is "math ".
+
+The *attribute name string* is the local name prefixed by a namespace
+designator. For no namespace, the namespace designator is the empty
+string, i.e. there's no prefix. For the XLink namespace, the namespace
+designator is "xlink ". For the XML namespace, the namespace designator
+is "xml ". For the XMLNS namespace, the namespace designator is "xmlns
+". Note the difference between "xlink:href" which is an attribute in no
+namespace with the local name "xlink:href" and "xlink href" which is an
+attribute in the xlink namespace with the local name "href".
+
+If there is also a "\#document-fragment" the bit following "\#document"
+must be a representation of the HTML fragment serialization for the
+context element given by "\#document-fragment".
+
+For example:
+
+    #data
+    <p>One<p>Two
+    #errors
+    3: Missing document type declaration
+    #document
+    | <html>
+    |   <head>
+    |   <body>
+    |     <p>
+    |       "One"
+    |     <p>
+    |       "Two"
diff --git a/lib/html5lib/tests/testdata/tree-construction/adoption01.dat b/lib/html5lib/tests/testdata/tree-construction/adoption01.dat
new file mode 100644
index 00000000..38f98efd
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/adoption01.dat
@@ -0,0 +1,354 @@
+#data
+<a><p></a></p>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,10): adoption-agency-1.3
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|     <p>
+|       <a>
+
+#data
+<a>1<p>2</a>3</p>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,12): adoption-agency-1.3
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       "1"
+|     <p>
+|       <a>
+|         "2"
+|       "3"
+
+#data
+<a>1<button>2</a>3</button>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,17): adoption-agency-1.3
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       "1"
+|     <button>
+|       <a>
+|         "2"
+|       "3"
+
+#data
+<a>1<b>2</a>3</b>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,12): adoption-agency-1.3
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       "1"
+|       <b>
+|         "2"
+|     <b>
+|       "3"
+
+#data
+<a>1<div>2<div>3</a>4</div>5</div>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,20): adoption-agency-1.3
+(1,20): adoption-agency-1.3
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       "1"
+|     <div>
+|       <a>
+|         "2"
+|       <div>
+|         <a>
+|           "3"
+|         "4"
+|       "5"
+
+#data
+<table><a>1<p>2</a>3</p>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,10): unexpected-start-tag-implies-table-voodoo
+(1,11): unexpected-character-implies-table-voodoo
+(1,14): unexpected-start-tag-implies-table-voodoo
+(1,15): unexpected-character-implies-table-voodoo
+(1,19): unexpected-end-tag-implies-table-voodoo
+(1,19): adoption-agency-1.3
+(1,20): unexpected-character-implies-table-voodoo
+(1,24): unexpected-end-tag-implies-table-voodoo
+(1,24): eof-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       "1"
+|     <p>
+|       <a>
+|         "2"
+|       "3"
+|     <table>
+
+#data
+<b><b><a><p></a>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,16): adoption-agency-1.3
+(1,16): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <b>
+|         <a>
+|         <p>
+|           <a>
+
+#data
+<b><a><b><p></a>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,16): adoption-agency-1.3
+(1,16): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <a>
+|         <b>
+|       <b>
+|         <p>
+|           <a>
+
+#data
+<a><b><b><p></a>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,16): adoption-agency-1.3
+(1,16): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       <b>
+|         <b>
+|     <b>
+|       <b>
+|         <p>
+|           <a>
+
+#data
+<p>1<s id="A">2<b id="B">3</p>4</s>5</b>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,30): unexpected-end-tag
+(1,35): adoption-agency-1.3
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "1"
+|       <s>
+|         id="A"
+|         "2"
+|         <b>
+|           id="B"
+|           "3"
+|     <s>
+|       id="A"
+|       <b>
+|         id="B"
+|         "4"
+|     <b>
+|       id="B"
+|       "5"
+
+#data
+<table><a>1<td>2</td>3</table>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,10): unexpected-start-tag-implies-table-voodoo
+(1,11): unexpected-character-implies-table-voodoo
+(1,15): unexpected-cell-in-table-body
+(1,30): unexpected-implied-end-tag-in-table-view
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       "1"
+|     <a>
+|       "3"
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             "2"
+
+#data
+<table>A<td>B</td>C</table>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,8): unexpected-character-implies-table-voodoo
+(1,12): unexpected-cell-in-table-body
+(1,22): unexpected-character-implies-table-voodoo
+#document
+| <html>
+|   <head>
+|   <body>
+|     "AC"
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             "B"
+
+#data
+<a><svg><tr><input></a>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,23): unexpected-end-tag
+(1,23): adoption-agency-1.3
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       <svg svg>
+|         <svg tr>
+|           <svg input>
+
+#data
+<div><a><b><div><div><div><div><div><div><div><div><div><div></a>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,65): adoption-agency-1.3
+(1,65): adoption-agency-1.3
+(1,65): adoption-agency-1.3
+(1,65): adoption-agency-1.3
+(1,65): adoption-agency-1.3
+(1,65): adoption-agency-1.3
+(1,65): adoption-agency-1.3
+(1,65): adoption-agency-1.3
+(1,65): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <a>
+|         <b>
+|       <b>
+|         <div>
+|           <a>
+|           <div>
+|             <a>
+|             <div>
+|               <a>
+|               <div>
+|                 <a>
+|                 <div>
+|                   <a>
+|                   <div>
+|                     <a>
+|                     <div>
+|                       <a>
+|                       <div>
+|                         <a>
+|                           <div>
+|                             <div>
+
+#data
+<div><a><b><u><i><code><div></a>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,32): adoption-agency-1.3
+(1,32): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <a>
+|         <b>
+|           <u>
+|             <i>
+|               <code>
+|       <u>
+|         <i>
+|           <code>
+|             <div>
+|               <a>
+
+#data
+<b><b><b><b>x</b></b></b></b>y
+#errors
+(1,3): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <b>
+|         <b>
+|           <b>
+|             "x"
+|     "y"
+
+#data
+<p><b><b><b><b><p>x
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,18): unexpected-end-tag
+(1,19): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <b>
+|         <b>
+|           <b>
+|             <b>
+|     <p>
+|       <b>
+|         <b>
+|           <b>
+|             "x"
+
+#data
+<b><em><foo><foob><fooc><aside></b></em>
+#errors
+(1,35): adoption-agency-1.3
+(1,40): adoption-agency-1.3
+(1,40): expected-closing-tag-but-got-eof
+#document-fragment
+div
+#document
+| <b>
+|   <em>
+|     <foo>
+|       <foob>
+|         <fooc>
+| <aside>
+|   <b>
diff --git a/lib/html5lib/tests/testdata/tree-construction/adoption02.dat b/lib/html5lib/tests/testdata/tree-construction/adoption02.dat
new file mode 100644
index 00000000..e54d8033
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/adoption02.dat
@@ -0,0 +1,39 @@
+#data
+<b>1<i>2<p>3</b>4
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,16): adoption-agency-1.3
+(1,17): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       "1"
+|       <i>
+|         "2"
+|     <i>
+|       <p>
+|         <b>
+|           "3"
+|         "4"
+
+#data
+<a><div><style></style><address><a>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,35): unexpected-start-tag-implies-end-tag
+(1,35): adoption-agency-1.3
+(1,35): adoption-agency-1.3
+(1,35): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|     <div>
+|       <a>
+|         <style>
+|       <address>
+|         <a>
+|         <a>
diff --git a/lib/html5lib/tests/testdata/tree-construction/blocks.dat b/lib/html5lib/tests/testdata/tree-construction/blocks.dat
new file mode 100644
index 00000000..5d3871ea
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/blocks.dat
@@ -0,0 +1,719 @@
+#data
+<!doctype html><p>foo<address>bar<p>baz
+#errors
+(1,39): expected-closing-tag-but-got-eof
+30: Unclosed element “address”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <address>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><address><p>foo</address>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <address>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!doctype html><p>foo<article>bar<p>baz
+#errors
+(1,39): expected-closing-tag-but-got-eof
+30: Unclosed element “article”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <article>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><article><p>foo</article>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <article>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!doctype html><p>foo<aside>bar<p>baz
+#errors
+(1,37): expected-closing-tag-but-got-eof
+28: Unclosed element “aside”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <aside>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><aside><p>foo</aside>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <aside>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!doctype html><p>foo<blockquote>bar<p>baz
+#errors
+(1,42): expected-closing-tag-but-got-eof
+33: Unclosed element “blockquote”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <blockquote>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><blockquote><p>foo</blockquote>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <blockquote>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!doctype html><p>foo<center>bar<p>baz
+#errors
+(1,38): expected-closing-tag-but-got-eof
+29: Unclosed element “center”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <center>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><center><p>foo</center>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <center>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!doctype html><p>foo<details>bar<p>baz
+#errors
+(1,39): expected-closing-tag-but-got-eof
+30: Unclosed element “details”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <details>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><details><p>foo</details>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <details>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!doctype html><p>foo<dialog>bar<p>baz
+#errors
+(1,38): expected-closing-tag-but-got-eof
+29: Unclosed element “dialog”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <dialog>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><dialog><p>foo</dialog>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <dialog>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!doctype html><p>foo<dir>bar<p>baz
+#errors
+(1,35): expected-closing-tag-but-got-eof
+26: Unclosed element “dir”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <dir>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><dir><p>foo</dir>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <dir>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!doctype html><p>foo<div>bar<p>baz
+#errors
+(1,35): expected-closing-tag-but-got-eof
+26: Unclosed element “div”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <div>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><div><p>foo</div>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!doctype html><p>foo<dl>bar<p>baz
+#errors
+(1,34): expected-closing-tag-but-got-eof
+25: Unclosed element “dl”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <dl>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><dl><p>foo</dl>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <dl>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!doctype html><p>foo<fieldset>bar<p>baz
+#errors
+(1,40): expected-closing-tag-but-got-eof
+31: Unclosed element “fieldset”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <fieldset>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><fieldset><p>foo</fieldset>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <fieldset>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!doctype html><p>foo<figcaption>bar<p>baz
+#errors
+(1,42): expected-closing-tag-but-got-eof
+33: Unclosed element “figcaption”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <figcaption>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><figcaption><p>foo</figcaption>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <figcaption>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!doctype html><p>foo<figure>bar<p>baz
+#errors
+(1,38): expected-closing-tag-but-got-eof
+29: Unclosed element “figure”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <figure>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><figure><p>foo</figure>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <figure>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!doctype html><p>foo<footer>bar<p>baz
+#errors
+(1,38): expected-closing-tag-but-got-eof
+29: Unclosed element “footer”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <footer>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><footer><p>foo</footer>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <footer>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!doctype html><p>foo<header>bar<p>baz
+#errors
+(1,38): expected-closing-tag-but-got-eof
+29: Unclosed element “header”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <header>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><header><p>foo</header>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <header>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!doctype html><p>foo<hgroup>bar<p>baz
+#errors
+(1,38): expected-closing-tag-but-got-eof
+29: Unclosed element “hgroup”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <hgroup>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><hgroup><p>foo</hgroup>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <hgroup>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!doctype html><p>foo<listing>bar<p>baz
+#errors
+(1,39): expected-closing-tag-but-got-eof
+30: Unclosed element “listing”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <listing>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><listing><p>foo</listing>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <listing>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!doctype html><p>foo<menu>bar<p>baz
+#errors
+(1,36): expected-closing-tag-but-got-eof
+27: Unclosed element “menu”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <menu>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><menu><p>foo</menu>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <menu>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!doctype html><p>foo<nav>bar<p>baz
+#errors
+(1,35): expected-closing-tag-but-got-eof
+26: Unclosed element “nav”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <nav>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><nav><p>foo</nav>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <nav>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!doctype html><p>foo<ol>bar<p>baz
+#errors
+(1,34): expected-closing-tag-but-got-eof
+25: Unclosed element “ol”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <ol>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><ol><p>foo</ol>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <ol>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!doctype html><p>foo<pre>bar<p>baz
+#errors
+(1,35): expected-closing-tag-but-got-eof
+26: Unclosed element “pre”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <pre>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><pre><p>foo</pre>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <pre>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!doctype html><p>foo<section>bar<p>baz
+#errors
+(1,39): expected-closing-tag-but-got-eof
+30: Unclosed element “section”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <section>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><section><p>foo</section>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <section>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!doctype html><p>foo<summary>bar<p>baz
+#errors
+(1,39): expected-closing-tag-but-got-eof
+30: Unclosed element “summary”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <summary>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><summary><p>foo</summary>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <summary>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!doctype html><p>foo<ul>bar<p>baz
+#errors
+(1,34): expected-closing-tag-but-got-eof
+25: Unclosed element “ul”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <ul>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><ul><p>foo</ul>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <ul>
+|       <p>
+|         "foo"
+|     "bar"
diff --git a/lib/html5lib/tests/testdata/tree-construction/comments01.dat b/lib/html5lib/tests/testdata/tree-construction/comments01.dat
new file mode 100644
index 00000000..fa79c2b1
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/comments01.dat
@@ -0,0 +1,224 @@
+#data
+FOO<!-- BAR -->BAZ
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <!--  BAR  -->
+|     "BAZ"
+
+#data
+FOO<!-- BAR --!>BAZ
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,15): unexpected-bang-after-double-dash-in-comment
+#new-errors
+(1:16) incorrectly-closed-comment
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <!--  BAR  -->
+|     "BAZ"
+
+#data
+FOO<!-- BAR --! >BAZ
+#errors
+(1,3): expected-doctype-but-got-chars
+#new-errors
+(1:20) eof-in-comment
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <!--  BAR --! >BAZ -->
+
+#data
+FOO<!-- BAR --!
+>BAZ
+#errors
+(1,3): expected-doctype-but-got-chars
+#new-errors
+(1:20) eof-in-comment
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <!--  BAR --!
+>BAZ -->
+
+#data
+FOO<!-- BAR --   >BAZ
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,15): unexpected-char-in-comment
+(1,21): eof-in-comment
+#new-errors
+(1:22) eof-in-comment
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <!--  BAR --   >BAZ -->
+
+#data
+FOO<!-- BAR -- <QUX> -- MUX -->BAZ
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,15): unexpected-char-in-comment
+(1,24): unexpected-char-in-comment
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <!--  BAR -- <QUX> -- MUX  -->
+|     "BAZ"
+
+#data
+FOO<!-- BAR -- <QUX> -- MUX --!>BAZ
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,15): unexpected-char-in-comment
+(1,24): unexpected-char-in-comment
+(1,31): unexpected-bang-after-double-dash-in-comment
+#new-errors
+(1:32) incorrectly-closed-comment
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <!--  BAR -- <QUX> -- MUX  -->
+|     "BAZ"
+
+#data
+FOO<!-- BAR -- <QUX> -- MUX -- >BAZ
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,15): unexpected-char-in-comment
+(1,24): unexpected-char-in-comment
+(1,31): unexpected-char-in-comment
+(1,35): eof-in-comment
+#new-errors
+(1:36) eof-in-comment
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <!--  BAR -- <QUX> -- MUX -- >BAZ -->
+
+#data
+FOO<!---->BAZ
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <!--  -->
+|     "BAZ"
+
+#data
+FOO<!--->BAZ
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,9): incorrect-comment
+#new-errors
+(1:9) abrupt-closing-of-empty-comment
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <!--  -->
+|     "BAZ"
+
+#data
+FOO<!-->BAZ
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,8): incorrect-comment
+#new-errors
+(1:8) abrupt-closing-of-empty-comment
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <!--  -->
+|     "BAZ"
+
+#data
+<?xml version="1.0">Hi
+#errors
+(1,1): expected-tag-name-but-got-question-mark
+(1,22): expected-doctype-but-got-chars
+#new-errors
+(1:2) unexpected-question-mark-instead-of-tag-name
+#document
+| <!-- ?xml version="1.0" -->
+| <html>
+|   <head>
+|   <body>
+|     "Hi"
+
+#data
+<?xml version="1.0">
+#errors
+(1,1): expected-tag-name-but-got-question-mark
+(1,20): expected-doctype-but-got-eof
+#new-errors
+(1:2) unexpected-question-mark-instead-of-tag-name
+#document
+| <!-- ?xml version="1.0" -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<?xml version
+#errors
+(1,1): expected-tag-name-but-got-question-mark
+(1,13): expected-doctype-but-got-eof
+#new-errors
+(1:2) unexpected-question-mark-instead-of-tag-name
+#document
+| <!-- ?xml version -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+FOO<!----->BAZ
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,10): unexpected-dash-after-double-dash-in-comment
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <!-- - -->
+|     "BAZ"
+
+#data
+<html><!-- comment --><title>Comment before head</title>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <!--  comment  -->
+|   <head>
+|     <title>
+|       "Comment before head"
+|   <body>
diff --git a/lib/html5lib/tests/testdata/tree-construction/doctype01.dat b/lib/html5lib/tests/testdata/tree-construction/doctype01.dat
new file mode 100644
index 00000000..c845becf
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/doctype01.dat
@@ -0,0 +1,470 @@
+#data
+<!DOCTYPE html>Hello
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!dOctYpE HtMl>Hello
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPEhtml>Hello
+#errors
+(1,9): need-space-after-doctype
+#new-errors
+(1:10) missing-whitespace-before-doctype-name
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE>Hello
+#errors
+(1,9): need-space-after-doctype
+(1,10): expected-doctype-name-but-got-right-bracket
+(1,10): unknown-doctype
+#new-errors
+(1:10) missing-doctype-name
+#document
+| <!DOCTYPE >
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE >Hello
+#errors
+(1,11): expected-doctype-name-but-got-right-bracket
+(1,11): unknown-doctype
+#new-errors
+(1:11) missing-doctype-name
+#document
+| <!DOCTYPE >
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato>Hello
+#errors
+(1,17): unknown-doctype
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato >Hello
+#errors
+(1,18): unknown-doctype
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato taco>Hello
+#errors
+(1,17): expected-space-or-right-bracket-in-doctype
+(1,22): unknown-doctype
+#new-errors
+(1:18) invalid-character-sequence-after-doctype-name
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato taco "ddd>Hello
+#errors
+(1,17): expected-space-or-right-bracket-in-doctype
+(1,27): unknown-doctype
+#new-errors
+(1:18) invalid-character-sequence-after-doctype-name
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato sYstEM>Hello
+#errors
+(1,24): unexpected-char-in-doctype
+(1,24): unknown-doctype
+#new-errors
+(1:24) missing-doctype-system-identifier
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato sYstEM    >Hello
+#errors
+(1,28): unexpected-char-in-doctype
+(1,28): unknown-doctype
+#new-errors
+(1:28) missing-doctype-system-identifier
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE   potato       sYstEM  ggg>Hello
+#errors
+(1,34): unexpected-char-in-doctype
+(1,37): unknown-doctype
+#new-errors
+(1:34) missing-quote-before-doctype-system-identifier
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato SYSTEM taco  >Hello
+#errors
+(1,25): unexpected-char-in-doctype
+(1,31): unknown-doctype
+#new-errors
+(1:25) missing-quote-before-doctype-system-identifier
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato SYSTEM 'taco"'>Hello
+#errors
+(1,32): unknown-doctype
+#document
+| <!DOCTYPE potato "" "taco"">
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato SYSTEM "taco">Hello
+#errors
+(1,31): unknown-doctype
+#document
+| <!DOCTYPE potato "" "taco">
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato SYSTEM "tai'co">Hello
+#errors
+(1,33): unknown-doctype
+#document
+| <!DOCTYPE potato "" "tai'co">
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato SYSTEMtaco "ddd">Hello
+#errors
+(1,24): unexpected-char-in-doctype
+(1,34): unknown-doctype
+#new-errors
+(1:24) missing-quote-before-doctype-system-identifier
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato grass SYSTEM taco>Hello
+#errors
+(1,17): expected-space-or-right-bracket-in-doctype
+(1,35): unknown-doctype
+#new-errors
+(1:18) invalid-character-sequence-after-doctype-name
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato pUbLIc>Hello
+#errors
+(1,24): unexpected-end-of-doctype
+(1,24): unknown-doctype
+#new-errors
+(1:24) missing-doctype-public-identifier
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato pUbLIc >Hello
+#errors
+(1,25): unexpected-end-of-doctype
+(1,25): unknown-doctype
+#new-errors
+(1:25) missing-doctype-public-identifier
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato pUbLIcgoof>Hello
+#errors
+(1,24): unexpected-char-in-doctype
+(1,28): unknown-doctype
+#new-errors
+(1:24) missing-quote-before-doctype-public-identifier
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato PUBLIC goof>Hello
+#errors
+(1,25): unexpected-char-in-doctype
+(1,29): unknown-doctype
+#new-errors
+(1:25) missing-quote-before-doctype-public-identifier
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato PUBLIC "go'of">Hello
+#errors
+(1,32): unknown-doctype
+#document
+| <!DOCTYPE potato "go'of" "">
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato PUBLIC 'go'of'>Hello
+#errors
+(1,29): unexpected-char-in-doctype
+(1,32): unknown-doctype
+#new-errors
+(1:29) missing-quote-before-doctype-system-identifier
+#document
+| <!DOCTYPE potato "go" "">
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato PUBLIC 'go:hh   of' >Hello
+#errors
+(1,38): unknown-doctype
+#document
+| <!DOCTYPE potato "go:hh   of" "">
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato PUBLIC "W3C-//dfdf" SYSTEM ggg>Hello
+#errors
+(1,38): unexpected-char-in-doctype
+(1,48): unknown-doctype
+#new-errors
+(1:38) missing-quote-before-doctype-system-identifier
+#document
+| <!DOCTYPE potato "W3C-//dfdf" "">
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+   "http://www.w3.org/TR/html4/strict.dtd">Hello
+#errors
+#document
+| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE ...>Hello
+#errors
+(1,14): unknown-doctype
+#document
+| <!DOCTYPE ...>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+#errors
+(2,58): unknown-doctype
+#document
+| <!DOCTYPE html "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
+#errors
+(2,54): unknown-doctype
+#document
+| <!DOCTYPE html "-//W3C//DTD XHTML 1.0 Frameset//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE root-element [SYSTEM OR PUBLIC FPI] "uri" [ 
+<!-- internal declarations -->
+]>
+#errors
+(1,23): expected-space-or-right-bracket-in-doctype
+(2,30): unknown-doctype
+#new-errors
+(1:24) invalid-character-sequence-after-doctype-name
+#document
+| <!DOCTYPE root-element>
+| <html>
+|   <head>
+|   <body>
+|     "]>"
+
+#data
+<!DOCTYPE html PUBLIC
+  "-//WAPFORUM//DTD XHTML Mobile 1.0//EN"
+    "http://www.wapforum.org/DTD/xhtml-mobile10.dtd">
+#errors
+(3,53): unknown-doctype
+#document
+| <!DOCTYPE html "-//WAPFORUM//DTD XHTML Mobile 1.0//EN" "http://www.wapforum.org/DTD/xhtml-mobile10.dtd">
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE HTML SYSTEM "http://www.w3.org/DTD/HTML4-strict.dtd"><body><b>Mine!</b></body>
+#errors
+(1,63): unknown-doctype
+#document
+| <!DOCTYPE html "" "http://www.w3.org/DTD/HTML4-strict.dtd">
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       "Mine!"
+
+#data
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN""http://www.w3.org/TR/html4/strict.dtd">
+#errors
+(1,50): unexpected-char-in-doctype
+#new-errors
+(1:50) missing-whitespace-between-doctype-public-and-system-identifiers
+#document
+| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"'http://www.w3.org/TR/html4/strict.dtd'>
+#errors
+(1,50): unexpected-char-in-doctype
+#new-errors
+(1:50) missing-whitespace-between-doctype-public-and-system-identifiers
+#document
+| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE HTML PUBLIC"-//W3C//DTD HTML 4.01//EN"'http://www.w3.org/TR/html4/strict.dtd'>
+#errors
+(1,21): unexpected-char-in-doctype
+(1,49): unexpected-char-in-doctype
+#new-errors
+(1:22) missing-whitespace-after-doctype-public-keyword
+(1:49) missing-whitespace-between-doctype-public-and-system-identifiers
+#document
+| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE HTML PUBLIC'-//W3C//DTD HTML 4.01//EN''http://www.w3.org/TR/html4/strict.dtd'>
+#errors
+(1,21): unexpected-char-in-doctype
+(1,49): unexpected-char-in-doctype
+#new-errors
+(1:22) missing-whitespace-after-doctype-public-keyword
+(1:49) missing-whitespace-between-doctype-public-and-system-identifiers
+#document
+| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+| <html>
+|   <head>
+|   <body>
diff --git a/lib/html5lib/tests/testdata/tree-construction/domjs-unsafe.dat b/lib/html5lib/tests/testdata/tree-construction/domjs-unsafe.dat
new file mode 100644
index 00000000..0a1b10f9
Binary files /dev/null and b/lib/html5lib/tests/testdata/tree-construction/domjs-unsafe.dat differ
diff --git a/lib/html5lib/tests/testdata/tree-construction/entities01.dat b/lib/html5lib/tests/testdata/tree-construction/entities01.dat
new file mode 100644
index 00000000..9bacebea
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/entities01.dat
@@ -0,0 +1,943 @@
+#data
+FOO&gt;BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO>BAR"
+
+#data
+FOO&gtBAR
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,6): named-entity-without-semicolon
+#new-errors
+(1:7) missing-semicolon-after-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO>BAR"
+
+#data
+FOO&gt BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,6): named-entity-without-semicolon
+#new-errors
+(1:7) missing-semicolon-after-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO> BAR"
+
+#data
+FOO&gt;;;BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO>;;BAR"
+
+#data
+I'm &notit; I tell you
+#errors
+(1,4): expected-doctype-but-got-chars
+(1,9): named-entity-without-semicolon
+#new-errors
+(1:9) missing-semicolon-after-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "I'm ¬it; I tell you"
+
+#data
+I'm &notin; I tell you
+#errors
+(1,4): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "I'm ∉ I tell you"
+
+#data
+&ammmp;
+#errors
+(1,1): expected-doctype-but-got-chars
+(1,7): unknown-named-character-reference
+#new-errors
+(1:7) unknown-named-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "&ammmp;"
+
+#data
+&ammmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmp;
+#errors
+(1,1): expected-doctype-but-got-chars
+(1,950): unknown-named-character-reference
+#new-errors
+(1:950) unknown-named-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "&ammmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmp;"
+
+#data
+FOO& BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO& BAR"
+
+#data
+FOO&<BAR>
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,9): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO&"
+|     <bar>
+
+#data
+FOO&&&&gt;BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO&&&>BAR"
+
+#data
+FOO&#41;BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO)BAR"
+
+#data
+FOO&#x41;BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOABAR"
+
+#data
+FOO&#X41;BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOABAR"
+
+#data
+FOO&#BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,5): expected-numeric-entity
+#new-errors
+(1:6) absence-of-digits-in-numeric-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO&#BAR"
+
+#data
+FOO&#ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,5): expected-numeric-entity
+#new-errors
+(1:6) absence-of-digits-in-numeric-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO&#ZOO"
+
+#data
+FOO&#xBAR
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,7): expected-numeric-entity
+#new-errors
+(1:9) missing-semicolon-after-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOºR"
+
+#data
+FOO&#xZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,6): expected-numeric-entity
+#new-errors
+(1:7) absence-of-digits-in-numeric-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO&#xZOO"
+
+#data
+FOO&#XZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,6): expected-numeric-entity
+#new-errors
+(1:7) absence-of-digits-in-numeric-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO&#XZOO"
+
+#data
+FOO&#41BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,7): numeric-entity-without-semicolon
+#new-errors
+(1:8) missing-semicolon-after-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO)BAR"
+
+#data
+FOO&#x41BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,10): numeric-entity-without-semicolon
+#new-errors
+(1:11) missing-semicolon-after-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO䆺R"
+
+#data
+FOO&#x41ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,8): numeric-entity-without-semicolon
+#new-errors
+(1:9) missing-semicolon-after-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOAZOO"
+
+#data
+FOO&#x0000;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) null-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO�ZOO"
+
+#data
+FOO&#x0078;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOxZOO"
+
+#data
+FOO&#x0079;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOyZOO"
+
+#data
+FOO&#x0080;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO€ZOO"
+
+#data
+FOO&#x0081;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOZOO"
+
+#data
+FOO&#x0082;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO‚ZOO"
+
+#data
+FOO&#x0083;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOƒZOO"
+
+#data
+FOO&#x0084;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO„ZOO"
+
+#data
+FOO&#x0085;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO…ZOO"
+
+#data
+FOO&#x0086;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO†ZOO"
+
+#data
+FOO&#x0087;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO‡ZOO"
+
+#data
+FOO&#x0088;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOˆZOO"
+
+#data
+FOO&#x0089;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO‰ZOO"
+
+#data
+FOO&#x008A;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOŠZOO"
+
+#data
+FOO&#x008B;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO‹ZOO"
+
+#data
+FOO&#x008C;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOŒZOO"
+
+#data
+FOO&#x008D;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOZOO"
+
+#data
+FOO&#x008E;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOŽZOO"
+
+#data
+FOO&#x008F;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOZOO"
+
+#data
+FOO&#x0090;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOZOO"
+
+#data
+FOO&#x0091;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO‘ZOO"
+
+#data
+FOO&#x0092;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO’ZOO"
+
+#data
+FOO&#x0093;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO“ZOO"
+
+#data
+FOO&#x0094;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO”ZOO"
+
+#data
+FOO&#x0095;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO•ZOO"
+
+#data
+FOO&#x0096;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO–ZOO"
+
+#data
+FOO&#x0097;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO—ZOO"
+
+#data
+FOO&#x0098;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO˜ZOO"
+
+#data
+FOO&#x0099;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO™ZOO"
+
+#data
+FOO&#x009A;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOšZOO"
+
+#data
+FOO&#x009B;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO›ZOO"
+
+#data
+FOO&#x009C;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOœZOO"
+
+#data
+FOO&#x009D;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOZOO"
+
+#data
+FOO&#x009E;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOžZOO"
+
+#data
+FOO&#x009F;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOŸZOO"
+
+#data
+FOO&#x00A0;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO ZOO"
+
+#data
+FOO&#xD7FF;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO퟿ZOO"
+
+#data
+FOO&#xD800;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) surrogate-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO�ZOO"
+
+#data
+FOO&#xD801;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) surrogate-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO�ZOO"
+
+#data
+FOO&#xDFFE;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) surrogate-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO�ZOO"
+
+#data
+FOO&#xDFFF;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) surrogate-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO�ZOO"
+
+#data
+FOO&#xE000;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOZOO"
+
+#data
+FOO&#x10FFFE;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,13): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:14) noncharacter-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO􏿾ZOO"
+
+#data
+FOO&#x1087D4;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO􈟔ZOO"
+
+#data
+FOO&#x10FFFF;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,13): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:14) noncharacter-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO􏿿ZOO"
+
+#data
+FOO&#x110000;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,13): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:14) character-reference-outside-unicode-range
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO�ZOO"
+
+#data
+FOO&#xFFFFFF;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,13): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:14) character-reference-outside-unicode-range
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO�ZOO"
+
+#data
+FOO&#11111111111
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,13): illegal-codepoint-for-numeric-entity
+(1,13): eof-in-numeric-entity
+#new-errors
+(1:17) missing-semicolon-after-character-reference
+(1:17) character-reference-outside-unicode-range
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO�"
+
+#data
+FOO&#1111111111
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,13): illegal-codepoint-for-numeric-entity
+(1,13): eof-in-numeric-entity
+#new-errors
+(1:16) missing-semicolon-after-character-reference
+(1:16) character-reference-outside-unicode-range
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO�"
+
+#data
+FOO&#111111111111
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,13): illegal-codepoint-for-numeric-entity
+(1,13): eof-in-numeric-entity
+#new-errors
+(1:18) missing-semicolon-after-character-reference
+(1:18) character-reference-outside-unicode-range
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO�"
+
+#data
+FOO&#11111111111ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,16): numeric-entity-without-semicolon
+(1,16): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:17) missing-semicolon-after-character-reference
+(1:17) character-reference-outside-unicode-range
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO�ZOO"
+
+#data
+FOO&#1111111111ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,15): numeric-entity-without-semicolon
+(1,15): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:16) missing-semicolon-after-character-reference
+(1:16) character-reference-outside-unicode-range
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO�ZOO"
+
+#data
+FOO&#111111111111ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,17): numeric-entity-without-semicolon
+(1,17): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:18) missing-semicolon-after-character-reference
+(1:18) character-reference-outside-unicode-range
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO�ZOO"
diff --git a/lib/html5lib/tests/testdata/tree-construction/entities02.dat b/lib/html5lib/tests/testdata/tree-construction/entities02.dat
new file mode 100644
index 00000000..0c6e898c
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/entities02.dat
@@ -0,0 +1,309 @@
+#data
+<div bar="ZZ&gt;YY"></div>
+#errors
+(1,20): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ>YY"
+
+#data
+<div bar="ZZ&"></div>
+#errors
+(1,15): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ&"
+
+#data
+<div bar='ZZ&'></div>
+#errors
+(1,15): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ&"
+
+#data
+<div bar=ZZ&></div>
+#errors
+(1,13): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ&"
+
+#data
+<div bar="ZZ&gt=YY"></div>
+#errors
+(1,15): named-entity-without-semicolon
+(1,20): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ&gt=YY"
+
+#data
+<div bar="ZZ&gt0YY"></div>
+#errors
+(1,20): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ&gt0YY"
+
+#data
+<div bar="ZZ&gt9YY"></div>
+#errors
+(1,20): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ&gt9YY"
+
+#data
+<div bar="ZZ&gtaYY"></div>
+#errors
+(1,20): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ&gtaYY"
+
+#data
+<div bar="ZZ&gtZYY"></div>
+#errors
+(1,20): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ&gtZYY"
+
+#data
+<div bar="ZZ&gt YY"></div>
+#errors
+(1,15): named-entity-without-semicolon
+(1,20): expected-doctype-but-got-start-tag
+#new-errors
+(1:16) missing-semicolon-after-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ> YY"
+
+#data
+<div bar="ZZ&gt"></div>
+#errors
+(1,15): named-entity-without-semicolon
+(1,17): expected-doctype-but-got-start-tag
+#new-errors
+(1:16) missing-semicolon-after-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ>"
+
+#data
+<div bar='ZZ&gt'></div>
+#errors
+(1,15): named-entity-without-semicolon
+(1,17): expected-doctype-but-got-start-tag
+#new-errors
+(1:16) missing-semicolon-after-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ>"
+
+#data
+<div bar=ZZ&gt></div>
+#errors
+(1,14): named-entity-without-semicolon
+(1,15): expected-doctype-but-got-start-tag
+#new-errors
+(1:15) missing-semicolon-after-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ>"
+
+#data
+<div bar="ZZ&pound_id=23"></div>
+#errors
+(1,18): named-entity-without-semicolon
+(1,26): expected-doctype-but-got-start-tag
+#new-errors
+(1:19) missing-semicolon-after-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ£_id=23"
+
+#data
+<div bar="ZZ&prod_id=23"></div>
+#errors
+(1,25): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ&prod_id=23"
+
+#data
+<div bar="ZZ&pound;_id=23"></div>
+#errors
+(1,27): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ£_id=23"
+
+#data
+<div bar="ZZ&prod;_id=23"></div>
+#errors
+(1,26): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ∏_id=23"
+
+#data
+<div bar="ZZ&pound=23"></div>
+#errors
+(1,18): named-entity-without-semicolon
+(1,23): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ&pound=23"
+
+#data
+<div bar="ZZ&prod=23"></div>
+#errors
+(1,22): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ&prod=23"
+
+#data
+<div>ZZ&pound_id=23</div>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,13): named-entity-without-semicolon
+#new-errors
+(1:14) missing-semicolon-after-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "ZZ£_id=23"
+
+#data
+<div>ZZ&prod_id=23</div>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "ZZ&prod_id=23"
+
+#data
+<div>ZZ&pound;_id=23</div>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "ZZ£_id=23"
+
+#data
+<div>ZZ&prod;_id=23</div>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "ZZ∏_id=23"
+
+#data
+<div>ZZ&pound=23</div>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,13): named-entity-without-semicolon
+#new-errors
+(1:14) missing-semicolon-after-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "ZZ£=23"
+
+#data
+<div>ZZ&prod=23</div>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "ZZ&prod=23"
+
+#data
+<div>ZZ&AElig=</div>
+#errors
+#new-errors
+(1:14) missing-semicolon-after-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "ZZÆ="
diff --git a/lib/html5lib/tests/testdata/tree-construction/foreign-fragment.dat b/lib/html5lib/tests/testdata/tree-construction/foreign-fragment.dat
new file mode 100644
index 00000000..c81ae817
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/foreign-fragment.dat
@@ -0,0 +1,559 @@
+#data
+<nobr>X
+#errors
+6: HTML start tag “nobr” in a foreign namespace context.
+7: End of file seen and there were open elements.
+6: Unclosed element “nobr”.
+#document-fragment
+svg path
+#document
+| <svg nobr>
+|   "X"
+
+#data
+<font color></font>X
+#errors
+12: HTML start tag “font” in a foreign namespace context.
+#document-fragment
+svg path
+#document
+| <svg font>
+|   color=""
+| "X"
+
+#data
+<font></font>X
+#errors
+#document-fragment
+svg path
+#document
+| <svg font>
+| "X"
+
+#data
+<g></path>X
+#errors
+10: End tag “path” did not match the name of the current open element (“g”).
+11: End of file seen and there were open elements.
+3: Unclosed element “g”.
+#document-fragment
+svg path
+#document
+| <svg g>
+|   "X"
+
+#data
+</path>X
+#errors
+5: Stray end tag “path”.
+#document-fragment
+svg path
+#document
+| "X"
+
+#data
+</foreignObject>X
+#errors
+5: Stray end tag “foreignobject”.
+#document-fragment
+svg foreignObject
+#document
+| "X"
+
+#data
+</desc>X
+#errors
+5: Stray end tag “desc”.
+#document-fragment
+svg desc
+#document
+| "X"
+
+#data
+</title>X
+#errors
+5: Stray end tag “title”.
+#document-fragment
+svg title
+#document
+| "X"
+
+#data
+</svg>X
+#errors
+5: Stray end tag “svg”.
+#document-fragment
+svg svg
+#document
+| "X"
+
+#data
+</mfenced>X
+#errors
+5: Stray end tag “mfenced”.
+#document-fragment
+math mfenced
+#document
+| "X"
+
+#data
+</malignmark>X
+#errors
+5: Stray end tag “malignmark”.
+#document-fragment
+math malignmark
+#document
+| "X"
+
+#data
+</math>X
+#errors
+5: Stray end tag “math”.
+#document-fragment
+math math
+#document
+| "X"
+
+#data
+</annotation-xml>X
+#errors
+5: Stray end tag “annotation-xml”.
+#document-fragment
+math annotation-xml
+#document
+| "X"
+
+#data
+</mtext>X
+#errors
+5: Stray end tag “mtext”.
+#document-fragment
+math mtext
+#document
+| "X"
+
+#data
+</mi>X
+#errors
+5: Stray end tag “mi”.
+#document-fragment
+math mi
+#document
+| "X"
+
+#data
+</mo>X
+#errors
+5: Stray end tag “mo”.
+#document-fragment
+math mo
+#document
+| "X"
+
+#data
+</mn>X
+#errors
+5: Stray end tag “mn”.
+#document-fragment
+math mn
+#document
+| "X"
+
+#data
+</ms>X
+#errors
+5: Stray end tag “ms”.
+#document-fragment
+math ms
+#document
+| "X"
+
+#data
+<b></b><mglyph/><i></i><malignmark/><u></u><ms/>X
+#errors
+51: Self-closing syntax (“/>”) used on a non-void HTML element. Ignoring the slash and treating as a start tag.
+52: End of file seen and there were open elements.
+51: Unclosed element “ms”.
+#new-errors
+(1:44-1:49) non-void-html-element-start-tag-with-trailing-solidus
+#document-fragment
+math ms
+#document
+| <b>
+| <math mglyph>
+| <i>
+| <math malignmark>
+| <u>
+| <ms>
+|   "X"
+
+#data
+<malignmark></malignmark>
+#errors
+#document-fragment
+math ms
+#document
+| <math malignmark>
+
+#data
+<div></div>
+#errors
+#document-fragment
+math ms
+#document
+| <div>
+
+#data
+<figure></figure>
+#errors
+#document-fragment
+math ms
+#document
+| <figure>
+
+#data
+<b></b><mglyph/><i></i><malignmark/><u></u><mn/>X
+#errors
+51: Self-closing syntax (“/>”) used on a non-void HTML element. Ignoring the slash and treating as a start tag.
+52: End of file seen and there were open elements.
+51: Unclosed element “mn”.
+#new-errors
+(1:44-1:49) non-void-html-element-start-tag-with-trailing-solidus
+#document-fragment
+math mn
+#document
+| <b>
+| <math mglyph>
+| <i>
+| <math malignmark>
+| <u>
+| <mn>
+|   "X"
+
+#data
+<malignmark></malignmark>
+#errors
+#document-fragment
+math mn
+#document
+| <math malignmark>
+
+#data
+<div></div>
+#errors
+#document-fragment
+math mn
+#document
+| <div>
+
+#data
+<figure></figure>
+#errors
+#document-fragment
+math mn
+#document
+| <figure>
+
+#data
+<b></b><mglyph/><i></i><malignmark/><u></u><mo/>X
+#errors
+51: Self-closing syntax (“/>”) used on a non-void HTML element. Ignoring the slash and treating as a start tag.
+52: End of file seen and there were open elements.
+51: Unclosed element “mo”.
+#new-errors
+(1:44-1:49) non-void-html-element-start-tag-with-trailing-solidus
+#document-fragment
+math mo
+#document
+| <b>
+| <math mglyph>
+| <i>
+| <math malignmark>
+| <u>
+| <mo>
+|   "X"
+
+#data
+<malignmark></malignmark>
+#errors
+#document-fragment
+math mo
+#document
+| <math malignmark>
+
+#data
+<div></div>
+#errors
+#document-fragment
+math mo
+#document
+| <div>
+
+#data
+<figure></figure>
+#errors
+#document-fragment
+math mo
+#document
+| <figure>
+
+#data
+<b></b><mglyph/><i></i><malignmark/><u></u><mi/>X
+#errors
+51: Self-closing syntax (“/>”) used on a non-void HTML element. Ignoring the slash and treating as a start tag.
+52: End of file seen and there were open elements.
+51: Unclosed element “mi”.
+#new-errors
+(1:44-1:49) non-void-html-element-start-tag-with-trailing-solidus
+#document-fragment
+math mi
+#document
+| <b>
+| <math mglyph>
+| <i>
+| <math malignmark>
+| <u>
+| <mi>
+|   "X"
+
+#data
+<malignmark></malignmark>
+#errors
+#document-fragment
+math mi
+#document
+| <math malignmark>
+
+#data
+<div></div>
+#errors
+#document-fragment
+math mi
+#document
+| <div>
+
+#data
+<figure></figure>
+#errors
+#document-fragment
+math mi
+#document
+| <figure>
+
+#data
+<b></b><mglyph/><i></i><malignmark/><u></u><mtext/>X
+#errors
+51: Self-closing syntax (“/>”) used on a non-void HTML element. Ignoring the slash and treating as a start tag.
+52: End of file seen and there were open elements.
+51: Unclosed element “mtext”.
+#new-errors
+(1:44-1:52) non-void-html-element-start-tag-with-trailing-solidus
+#document-fragment
+math mtext
+#document
+| <b>
+| <math mglyph>
+| <i>
+| <math malignmark>
+| <u>
+| <mtext>
+|   "X"
+
+#data
+<malignmark></malignmark>
+#errors
+#document-fragment
+math mtext
+#document
+| <math malignmark>
+
+#data
+<div></div>
+#errors
+#document-fragment
+math mtext
+#document
+| <div>
+
+#data
+<figure></figure>
+#errors
+#document-fragment
+math mtext
+#document
+| <figure>
+
+#data
+<div></div>
+#errors
+5: HTML start tag “div” in a foreign namespace context.
+#document-fragment
+math annotation-xml
+#document
+| <math div>
+
+#data
+<figure></figure>
+#errors
+#document-fragment
+math annotation-xml
+#document
+| <math figure>
+
+#data
+<div></div>
+#errors
+5: HTML start tag “div” in a foreign namespace context.
+#document-fragment
+math math
+#document
+| <math div>
+
+#data
+<figure></figure>
+#errors
+#document-fragment
+math math
+#document
+| <math figure>
+
+#data
+<div></div>
+#errors
+#document-fragment
+svg foreignObject
+#document
+| <div>
+
+#data
+<figure></figure>
+#errors
+#document-fragment
+svg foreignObject
+#document
+| <figure>
+
+#data
+<div></div>
+#errors
+#document-fragment
+svg title
+#document
+| <div>
+
+#data
+<figure></figure>
+#errors
+#document-fragment
+svg title
+#document
+| <figure>
+
+#data
+<figure></figure>
+#errors
+#document-fragment
+svg desc
+#document
+| <figure>
+
+#data
+<div><h1>X</h1></div>
+#errors
+5: HTML start tag “div” in a foreign namespace context.
+9: HTML start tag “h1” in a foreign namespace context.
+#document-fragment
+svg svg
+#document
+| <svg div>
+|   <svg h1>
+|     "X"
+
+#data
+<div></div>
+#errors
+5: HTML start tag “div” in a foreign namespace context.
+#document-fragment
+svg svg
+#document
+| <svg div>
+
+#data
+<div></div>
+#errors
+#document-fragment
+svg desc
+#document
+| <div>
+
+#data
+<figure></figure>
+#errors
+#document-fragment
+svg desc
+#document
+| <figure>
+
+#data
+<plaintext><foo>
+#errors
+(1,16): expected-closing-tag-but-got-eof
+#document-fragment
+svg desc
+#document
+| <plaintext>
+|   "<foo>"
+
+#data
+<frameset>X
+#errors
+6: Stray start tag “frameset”.
+#document-fragment
+svg desc
+#document
+| "X"
+
+#data
+<head>X
+#errors
+6: Stray start tag “head”.
+#document-fragment
+svg desc
+#document
+| "X"
+
+#data
+<body>X
+#errors
+6: Stray start tag “body”.
+#document-fragment
+svg desc
+#document
+| "X"
+
+#data
+<html>X
+#errors
+6: Stray start tag “html”.
+#document-fragment
+svg desc
+#document
+| "X"
+
+#data
+<html class="foo">X
+#errors
+6: Stray start tag “html”.
+#document-fragment
+svg desc
+#document
+| "X"
+
+#data
+<body class="foo">X
+#errors
+6: Stray start tag “body”.
+#document-fragment
+svg desc
+#document
+| "X"
diff --git a/lib/html5lib/tests/testdata/tree-construction/html5test-com.dat b/lib/html5lib/tests/testdata/tree-construction/html5test-com.dat
new file mode 100644
index 00000000..f7380101
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/html5test-com.dat
@@ -0,0 +1,302 @@
+#data
+<div<div>
+#errors
+(1,9): expected-doctype-but-got-start-tag
+(1,9): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div<div>
+
+#data
+<div foo<bar=''>
+#errors
+(1,9): invalid-character-in-attribute-name
+(1,16): expected-doctype-but-got-start-tag
+(1,16): expected-closing-tag-but-got-eof
+#new-errors
+(1:9) unexpected-character-in-attribute-name
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       foo<bar=""
+
+#data
+<div foo=`bar`>
+#errors
+(1,10): equals-in-unquoted-attribute-value
+(1,14): unexpected-character-in-unquoted-attribute-value
+(1,15): expected-doctype-but-got-start-tag
+(1,15): expected-closing-tag-but-got-eof
+#new-errors
+(1:10) unexpected-character-in-unquoted-attribute-value
+(1:14) unexpected-character-in-unquoted-attribute-value
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       foo="`bar`"
+
+#data
+<div \"foo=''>
+#errors
+(1,7): invalid-character-in-attribute-name
+(1,14): expected-doctype-but-got-start-tag
+(1,14): expected-closing-tag-but-got-eof
+#new-errors
+(1:7) unexpected-character-in-attribute-name
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       \"foo=""
+
+#data
+<a href='\nbar'></a>
+#errors
+(1,16): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       href="\nbar"
+
+#data
+<!DOCTYPE html>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+
+#data
+&lang;&rang;
+#errors
+(1,6): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "⟨⟩"
+
+#data
+&apos;
+#errors
+(1,6): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "'"
+
+#data
+&ImaginaryI;
+#errors
+(1,12): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "ⅈ"
+
+#data
+&Kopf;
+#errors
+(1,6): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "𝕂"
+
+#data
+&notinva;
+#errors
+(1,9): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "∉"
+
+#data
+<?import namespace="foo" implementation="#bar">
+#errors
+(1,1): expected-tag-name-but-got-question-mark
+(1,47): expected-doctype-but-got-eof
+#new-errors
+(1:2) unexpected-question-mark-instead-of-tag-name
+#document
+| <!-- ?import namespace="foo" implementation="#bar" -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!--foo--bar-->
+#errors
+(1,10): unexpected-char-in-comment
+(1,15): expected-doctype-but-got-eof
+#document
+| <!-- foo--bar -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<![CDATA[x]]>
+#errors
+(1,2): expected-dashes-or-doctype
+(1,13): expected-doctype-but-got-eof
+#new-errors
+(1:9) cdata-in-html-content
+#document
+| <!-- [CDATA[x]] -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<textarea><!--</textarea>--></textarea>
+#errors
+(1,10): expected-doctype-but-got-start-tag
+(1,39): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "<!--"
+|     "-->"
+
+#data
+<textarea><!--</textarea>-->
+#errors
+(1,10): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "<!--"
+|     "-->"
+
+#data
+<style><!--</style>--></style>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,30): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|     <style>
+|       "<!--"
+|   <body>
+|     "-->"
+
+#data
+<style><!--</style>-->
+#errors
+(1,7): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <style>
+|       "<!--"
+|   <body>
+|     "-->"
+
+#data
+<ul><li>A </li> <li>B</li></ul>
+#errors
+(1,4): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ul>
+|       <li>
+|         "A "
+|       " "
+|       <li>
+|         "B"
+
+#data
+<table><form><input type=hidden><input></form><div></div></table>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,13): unexpected-form-in-table
+(1,32): unexpected-hidden-input-in-table
+(1,39): unexpected-start-tag-implies-table-voodoo
+(1,46): unexpected-end-tag-implies-table-voodoo
+(1,46): unexpected-end-tag
+(1,51): unexpected-start-tag-implies-table-voodoo
+(1,57): unexpected-end-tag-implies-table-voodoo
+#document
+| <html>
+|   <head>
+|   <body>
+|     <input>
+|     <div>
+|     <table>
+|       <form>
+|       <input>
+|         type="hidden"
+
+#data
+<i>A<b>B</i>C</b>D
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,15): adoption-agency-1.3
+(1,20): adoption-agency-1.3
+#document
+| <html>
+|   <head>
+|   <body>
+|     <i>
+|       "A"
+|       <b>
+|         "B"
+|     <b>
+|     
+|       <b>
+|         <i>
+|         "C"
+|       "D"
+
+#data
+<div></div>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+
+#data
+<svg></svg>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+
+#data
+<math></math>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
diff --git a/lib/html5lib/tests/testdata/tree-construction/inbody01.dat b/lib/html5lib/tests/testdata/tree-construction/inbody01.dat
new file mode 100644
index 00000000..10f6520f
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/inbody01.dat
@@ -0,0 +1,54 @@
+#data
+<button>1</foo>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,15): unexpected-end-tag
+(1,15): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <button>
+|       "1"
+
+#data
+<foo>12</foo>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,16): unexpected-end-tag
+(1,16): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <foo>
+|       "1"
+|       
+|         "2"
+
+#data
+<dd>1</foo>
+#errors
+(1,4): expected-doctype-but-got-start-tag
+(1,11): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <dd>
+|       "1"
+
+#data
+<foo>1<dd>2</foo>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,17): unexpected-end-tag
+(1,17): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <foo>
+|       "1"
+|       <dd>
+|         "2"
diff --git a/lib/html5lib/tests/testdata/tree-construction/isindex.dat b/lib/html5lib/tests/testdata/tree-construction/isindex.dat
new file mode 100644
index 00000000..733f82ea
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/isindex.dat
@@ -0,0 +1,49 @@
+#data
+<isindex>
+#errors
+(1,9): expected-doctype-but-got-start-tag
+(1,9): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <isindex>
+
+#data
+<isindex name="A" action="B" prompt="C" foo="D">
+#errors
+(1,48): expected-doctype-but-got-start-tag
+(1,48): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <isindex>
+|       action="B"
+|       foo="D"
+|       name="A"
+|       prompt="C"
+
+#data
+<form><isindex>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,15): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <form>
+|       <isindex>
+
+#data
+<!doctype html><isindex>x</isindex>x
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <isindex>
+|       "x"
+|     "x"
diff --git a/lib/html5lib/tests/testdata/tree-construction/main-element.dat b/lib/html5lib/tests/testdata/tree-construction/main-element.dat
new file mode 100644
index 00000000..4b103bb0
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/main-element.dat
@@ -0,0 +1,46 @@
+#data
+<!doctype html>foo<main>barbaz
+#errors
+(1,36): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       "foo"
+|     <main>
+|       "bar"
+|       
+|         "baz"
+
+#data
+<!doctype html><main>foo</main>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <main>
+|       
+|         "foo"
+|     "bar"
+
+#data
+<!DOCTYPE html>xxx<svg><x><g><a><main><b>
+#errors
+ * (1,42) unexpected HTML-like start tag token in foreign content
+ * (1,42) unexpected end of file
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "xxx"
+|     <svg svg>
+|       <svg x>
+|         <svg g>
+|           <svg a>
+|             <svg main>
+|     <b>
diff --git a/lib/html5lib/tests/testdata/tree-construction/math.dat b/lib/html5lib/tests/testdata/tree-construction/math.dat
new file mode 100644
index 00000000..ae9cd7c6
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/math.dat
@@ -0,0 +1,81 @@
+#data
+<math><tr><td><mo><tr>
+#errors
+#document-fragment
+td
+#document
+| <math math>
+|   <math tr>
+|     <math td>
+|       <math mo>
+
+#data
+<math><tr><td><mo><tr>
+#errors
+#document-fragment
+tr
+#document
+| <math math>
+|   <math tr>
+|     <math td>
+|       <math mo>
+
+#data
+<math><thead><mo><tbody>
+#errors
+#document-fragment
+thead
+#document
+| <math math>
+|   <math thead>
+|     <math mo>
+
+#data
+<math><tfoot><mo><tbody>
+#errors
+#document-fragment
+tfoot
+#document
+| <math math>
+|   <math tfoot>
+|     <math mo>
+
+#data
+<math><tbody><mo><tfoot>
+#errors
+#document-fragment
+tbody
+#document
+| <math math>
+|   <math tbody>
+|     <math mo>
+
+#data
+<math><tbody><mo></table>
+#errors
+#document-fragment
+tbody
+#document
+| <math math>
+|   <math tbody>
+|     <math mo>
+
+#data
+<math><thead><mo></table>
+#errors
+#document-fragment
+tbody
+#document
+| <math math>
+|   <math thead>
+|     <math mo>
+
+#data
+<math><tfoot><mo></table>
+#errors
+#document-fragment
+tbody
+#document
+| <math math>
+|   <math tfoot>
+|     <math mo>
diff --git a/lib/html5lib/tests/testdata/tree-construction/menuitem-element.dat b/lib/html5lib/tests/testdata/tree-construction/menuitem-element.dat
new file mode 100644
index 00000000..43aa0c67
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/menuitem-element.dat
@@ -0,0 +1,257 @@
+#data
+<menuitem>
+#errors
+10: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
+10: End of file seen and there were open elements.
+10: Unclosed element “menuitem”.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <menuitem>
+
+#data
+</menuitem>
+#errors
+11: End tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
+11: Stray end tag “menuitem”.
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html><body><menuitem>A
+#errors
+32: End of file seen and there were open elements.
+31: Unclosed element “menuitem”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <menuitem>
+|       "A"
+
+#data
+<!DOCTYPE html><body><menuitem>A<menuitem>B
+#errors
+43: End of file seen and there were open elements.
+42: Unclosed element “menuitem”.
+31: Unclosed element “menuitem”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <menuitem>
+|       "A"
+|       <menuitem>
+|         "B"
+
+#data
+<!DOCTYPE html><body><menuitem>A<menu>B</menu>
+#errors
+46: End of file seen and there were open elements.
+31: Unclosed element “menuitem”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <menuitem>
+|       "A"
+|       <menu>
+|         "B"
+
+#data
+<!DOCTYPE html><body><menuitem>A<hr>B
+#errors
+37: End of file seen and there were open elements.
+31: Unclosed element “menuitem”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <menuitem>
+|       "A"
+|       <hr>
+|       "B"
+
+#data
+<!DOCTYPE html><li><menuitem><li>
+#errors
+33: End tag “li” implied, but there were open elements.
+29: Unclosed element “menuitem”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <li>
+|       <menuitem>
+|     <li>
+
+#data
+<!DOCTYPE html><menuitem></menuitem>x
+#errors
+39: Stray end tag “menuitem”.
+40: End of file seen and there were open elements.
+25: Unclosed element “menuitem”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <menuitem>
+|       
+|         "x"
+
+#data
+<!DOCTYPE html><b><menuitem>
+#errors
+25: End tag “p” seen, but there were open elements.
+21: Unclosed element “b”.
+35: End of file seen and there were open elements.
+35: Unclosed element “menuitem”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <b>
+|     <b>
+|       <menuitem>
+
+#data
+<!DOCTYPE html><menuitem><asdf></menuitem>x
+#errors
+42: End tag “menuitem” seen, but there were open elements.
+31: Unclosed element “asdf”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <menuitem>
+|       <asdf>
+|     "x"
+
+#data
+<!DOCTYPE html></menuitem>
+#errors
+26: Stray end tag “menuitem”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html><html></menuitem>
+#errors
+26: Stray end tag “menuitem”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html><head></menuitem>
+#errors
+26: Stray end tag “menuitem”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html><select><menuitem></select>
+#errors
+33: Stray start tag “menuitem”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+
+#data
+<!DOCTYPE html><option><menuitem>
+#errors
+33: End of file seen and there were open elements.
+33: Unclosed element “menuitem”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <option>
+|       <menuitem>
+
+#data
+<!DOCTYPE html><menuitem><option>
+#errors
+33: End of file seen and there were open elements.
+25: Unclosed element “menuitem”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <menuitem>
+|       <option>
+
+#data
+<!DOCTYPE html><menuitem></body>
+#errors
+32: End tag for  “body” seen, but there were unclosed elements.
+25: Unclosed element “menuitem”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <menuitem>
+
+#data
+<!DOCTYPE html><menuitem></html>
+#errors
+32: End tag for  “html” seen, but there were unclosed elements.
+25: Unclosed element “menuitem”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <menuitem>
+
+#data
+<!DOCTYPE html><menuitem>
+#errors
+28: End of file seen and there were open elements.
+25: Unclosed element “menuitem”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <menuitem>
+|       
+
+#data
+<!DOCTYPE html><menuitem><li>
+#errors
+29: End of file seen and there were open elements.
+25: Unclosed element “menuitem”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <menuitem>
+|       <li>
diff --git a/lib/html5lib/tests/testdata/tree-construction/namespace-sensitivity.dat b/lib/html5lib/tests/testdata/tree-construction/namespace-sensitivity.dat
new file mode 100644
index 00000000..ca35c0e7
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/namespace-sensitivity.dat
@@ -0,0 +1,16 @@
+#data
+<body><table><tr><td><svg><td><foreignObject><span></td>Foo
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "Foo"
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <svg svg>
+|               <svg td>
+|                 <svg foreignObject>
+|                   <span>
diff --git a/lib/html5lib/tests/testdata/tree-construction/noscript01.dat b/lib/html5lib/tests/testdata/tree-construction/noscript01.dat
new file mode 100644
index 00000000..ec3496ce
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/noscript01.dat
@@ -0,0 +1,237 @@
+#data
+<head><noscript><!doctype html><!--foo--></noscript>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+Line: 1 Col: 31 Unexpected DOCTYPE. Ignored.
+#script-off
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       <!-- foo -->
+|   <body>
+
+#data
+<head><noscript><html class="foo"><!--foo--></noscript>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+Line: 1 Col: 34 html needs to be the first start tag.
+#script-off
+#document
+| <html>
+|   class="foo"
+|   <head>
+|     <noscript>
+|       <!-- foo -->
+|   <body>
+
+#data
+<head><noscript></noscript>
+#errors
+(1,6): expected-doctype-but-got-tag
+#script-off
+#document
+| <html>
+|   <head>
+|     <noscript>
+|   <body>
+
+#data
+<head><noscript>   </noscript>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+#script-off
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       "   "
+|   <body>
+
+#data
+<head><noscript><!--foo--></noscript>
+#errors
+(1,6): expected-doctype-but-got-tag
+#script-off
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       <!-- foo -->
+|   <body>
+
+#data
+<head><noscript><basefont><!--foo--></noscript>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+#script-off
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       <basefont>
+|       <!-- foo -->
+|   <body>
+
+#data
+<head><noscript><bgsound><!--foo--></noscript>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+#script-off
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       <bgsound>
+|       <!-- foo -->
+|   <body>
+
+#data
+<head><noscript><link><!--foo--></noscript>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+#script-off
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       <link>
+|       <!-- foo -->
+|   <body>
+
+#data
+<head><noscript><meta><!--foo--></noscript>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+#script-off
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       <meta>
+|       <!-- foo -->
+|   <body>
+
+#data
+<head><noscript><noframes>XXX</noscript></noframes></noscript>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+#script-off
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       <noframes>
+|         "XXX</noscript>"
+|   <body>
+
+#data
+<head><noscript><style>XXX</style></noscript>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+#script-off
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       <style>
+|         "XXX"
+|   <body>
+
+#data
+<head><noscript></br><!--foo--></noscript>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+Line: 1 Col: 21 Element br not allowed in a inhead-noscript context
+Line: 1 Col: 21 Unexpected end tag (br). Treated as br element.
+Line: 1 Col: 42 Unexpected end tag (noscript). Ignored.
+#script-off
+#document
+| <html>
+|   <head>
+|     <noscript>
+|   <body>
+|     
+|     <!-- foo -->
+
+#data
+<head><noscript><head class="foo"><!--foo--></noscript>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+Line: 1 Col: 34 Unexpected start tag (head).
+#script-off
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       <!-- foo -->
+|   <body>
+
+#data
+<head><noscript><noscript class="foo"><!--foo--></noscript>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+Line: 1 Col: 34 Unexpected start tag (noscript).
+#script-off
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       <!-- foo -->
+|   <body>
+
+#data
+<head><noscript><!--foo--></noscript>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+Line: 1 Col: 20 Unexpected end tag (p). Ignored.
+#script-off
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       <!-- foo -->
+|   <body>
+
+#data
+<head><noscript><!--foo--></noscript>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+Line: 1 Col: 19 Element p not allowed in a inhead-noscript context
+Line: 1 Col: 40 Unexpected end tag (noscript). Ignored.
+#script-off
+#document
+| <html>
+|   <head>
+|     <noscript>
+|   <body>
+|     
+|       <!-- foo -->
+
+#data
+<head><noscript>XXX<!--foo--></noscript></head>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+Line: 1 Col: 19 Unexpected non-space character. Expected inhead-noscript content
+Line: 1 Col: 30 Unexpected end tag (noscript). Ignored.
+Line: 1 Col: 37 Unexpected end tag (head). Ignored.
+#script-off
+#document
+| <html>
+|   <head>
+|     <noscript>
+|   <body>
+|     "XXX"
+|     <!-- foo -->
+
+#data
+<head><noscript>
+#errors
+(1,6): expected-doctype-but-got-tag
+(1,6): eof-in-head-noscript
+#script-off
+#document
+| <html>
+|   <head>
+|     <noscript>
+|   <body>
diff --git a/lib/html5lib/tests/testdata/tree-construction/pending-spec-changes-plain-text-unsafe.dat b/lib/html5lib/tests/testdata/tree-construction/pending-spec-changes-plain-text-unsafe.dat
new file mode 100644
index 00000000..2c546d40
Binary files /dev/null and b/lib/html5lib/tests/testdata/tree-construction/pending-spec-changes-plain-text-unsafe.dat differ
diff --git a/lib/html5lib/tests/testdata/tree-construction/pending-spec-changes.dat b/lib/html5lib/tests/testdata/tree-construction/pending-spec-changes.dat
new file mode 100644
index 00000000..1647d7f2
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/pending-spec-changes.dat
@@ -0,0 +1,46 @@
+#data
+<input type="hidden"><frameset>
+#errors
+(1,21): expected-doctype-but-got-start-tag
+(1,31): unexpected-start-tag
+(1,31): eof-in-frameset
+#document
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!DOCTYPE html><table><caption><svg>foo</table>bar
+#errors
+(1,47): unexpected-end-tag
+(1,47): end-table-tag-in-caption
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <svg svg>
+|           "foo"
+|     "bar"
+
+#data
+<table><tr><td><svg><desc><td></desc><circle>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,30): unexpected-cell-end-tag
+(1,37): unexpected-end-tag
+(1,45): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <svg svg>
+|               <svg desc>
+|           <td>
+|             <circle>
diff --git a/lib/html5lib/tests/testdata/tree-construction/plain-text-unsafe.dat b/lib/html5lib/tests/testdata/tree-construction/plain-text-unsafe.dat
new file mode 100644
index 00000000..dfb5cb63
Binary files /dev/null and b/lib/html5lib/tests/testdata/tree-construction/plain-text-unsafe.dat differ
diff --git a/lib/html5lib/tests/testdata/tree-construction/ruby.dat b/lib/html5lib/tests/testdata/tree-construction/ruby.dat
new file mode 100644
index 00000000..696782f0
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/ruby.dat
@@ -0,0 +1,301 @@
+#data
+<html><ruby>a<rb>b<rb></ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rb>
+|         "b"
+|       <rb>
+
+#data
+<html><ruby>a<rb>b<rt></ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rb>
+|         "b"
+|       <rt>
+
+#data
+<html><ruby>a<rb>b<rtc></ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rb>
+|         "b"
+|       <rtc>
+
+#data
+<html><ruby>a<rb>b<rp></ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rb>
+|         "b"
+|       <rp>
+
+#data
+<html><ruby>a<rb>b<span></ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,31): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rb>
+|         "b"
+|         <span>
+
+#data
+<html><ruby>a<rt>b<rb></ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rt>
+|         "b"
+|       <rb>
+
+#data
+<html><ruby>a<rt>b<rt></ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rt>
+|         "b"
+|       <rt>
+
+#data
+<html><ruby>a<rt>b<rtc></ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rt>
+|         "b"
+|       <rtc>
+
+#data
+<html><ruby>a<rt>b<rp></ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rt>
+|         "b"
+|       <rp>
+
+#data
+<html><ruby>a<rt>b<span></ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,31): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rt>
+|         "b"
+|         <span>
+
+#data
+<html><ruby>a<rtc>b<rb></ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rtc>
+|         "b"
+|       <rb>
+
+#data
+<html><ruby>a<rtc>b<rt>c<rt>d</ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rtc>
+|         "b"
+|         <rt>
+|           "c"
+|         <rt>
+|           "d"
+
+#data
+<html><ruby>a<rtc>b<rtc></ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rtc>
+|         "b"
+|       <rtc>
+
+#data
+<html><ruby>a<rtc>b<rp></ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rtc>
+|         "b"
+|         <rp>
+
+#data
+<html><ruby>a<rtc>b<span></ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rtc>
+|         "b"
+|         <span>
+
+#data
+<html><ruby>a<rp>b<rb></ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rp>
+|         "b"
+|       <rb>
+
+#data
+<html><ruby>a<rp>b<rt></ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rp>
+|         "b"
+|       <rt>
+
+#data
+<html><ruby>a<rp>b<rtc></ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rp>
+|         "b"
+|       <rtc>
+
+#data
+<html><ruby>a<rp>b<rp></ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rp>
+|         "b"
+|       <rp>
+
+#data
+<html><ruby>a<rp>b<span></ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,31): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rp>
+|         "b"
+|         <span>
+
+#data
+<html><ruby><rtc><ruby>a<rb>b<rt></ruby></ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       <rtc>
+|         <ruby>
+|           "a"
+|           <rb>
+|             "b"
+|           <rt>
diff --git a/lib/html5lib/tests/testdata/tree-construction/scriptdata01.dat b/lib/html5lib/tests/testdata/tree-construction/scriptdata01.dat
new file mode 100644
index 00000000..e5708589
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/scriptdata01.dat
@@ -0,0 +1,385 @@
+#data
+FOO<script>'Hello'</script>BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       "'Hello'"
+|     "BAR"
+
+#data
+FOO<script></script>BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|     "BAR"
+
+#data
+FOO<script></script >BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|     "BAR"
+
+#data
+FOO<script></script/>BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,21): self-closing-flag-on-end-tag
+#new-errors
+(1:21) end-tag-with-trailing-solidus
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|     "BAR"
+
+#data
+FOO<script></script/ >BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,20): unexpected-character-after-solidus-in-tag
+#new-errors
+(1:21) unexpected-solidus-in-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|     "BAR"
+
+#data
+FOO<script type="text/plain"></scriptx>BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,42): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       type="text/plain"
+|       "</scriptx>BAR"
+
+#data
+FOO<script></script foo=">" dd>BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,31): attributes-in-end-tag
+#new-errors
+(1:31) end-tag-with-attributes
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|     "BAR"
+
+#data
+FOO<script>'<'</script>BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       "'<'"
+|     "BAR"
+
+#data
+FOO<script>'<!'</script>BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       "'<!'"
+|     "BAR"
+
+#data
+FOO<script>'<!-'</script>BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       "'<!-'"
+|     "BAR"
+
+#data
+FOO<script>'<!--'</script>BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       "'<!--'"
+|     "BAR"
+
+#data
+FOO<script>'<!---'</script>BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       "'<!---'"
+|     "BAR"
+
+#data
+FOO<script>'<!-->'</script>BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       "'<!-->'"
+|     "BAR"
+
+#data
+FOO<script>'<!-->'</script>BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       "'<!-->'"
+|     "BAR"
+
+#data
+FOO<script>'<!-- potato'</script>BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       "'<!-- potato'"
+|     "BAR"
+
+#data
+FOO<script>'<!-- <sCrIpt'</script>BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       "'<!-- <sCrIpt'"
+|     "BAR"
+
+#data
+FOO<script type="text/plain">'<!-- <sCrIpt>'</script>BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,56): expected-script-data-but-got-eof
+(1,56): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:57) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       type="text/plain"
+|       "'<!-- <sCrIpt>'</script>BAR"
+
+#data
+FOO<script type="text/plain">'<!-- <sCrIpt> -'</script>BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,58): expected-script-data-but-got-eof
+(1,58): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:59) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       type="text/plain"
+|       "'<!-- <sCrIpt> -'</script>BAR"
+
+#data
+FOO<script type="text/plain">'<!-- <sCrIpt> --'</script>BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,59): expected-script-data-but-got-eof
+(1,59): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:60) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       type="text/plain"
+|       "'<!-- <sCrIpt> --'</script>BAR"
+
+#data
+FOO<script>'<!-- <sCrIpt> -->'</script>BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       "'<!-- <sCrIpt> -->'"
+|     "BAR"
+
+#data
+FOO<script type="text/plain">'<!-- <sCrIpt> --!>'</script>BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,61): expected-script-data-but-got-eof
+(1,61): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:62) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       type="text/plain"
+|       "'<!-- <sCrIpt> --!>'</script>BAR"
+
+#data
+FOO<script type="text/plain">'<!-- <sCrIpt> -- >'</script>BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,61): expected-script-data-but-got-eof
+(1,61): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:62) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       type="text/plain"
+|       "'<!-- <sCrIpt> -- >'</script>BAR"
+
+#data
+FOO<script type="text/plain">'<!-- <sCrIpt '</script>BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,56): expected-script-data-but-got-eof
+(1,56): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:57) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       type="text/plain"
+|       "'<!-- <sCrIpt '</script>BAR"
+
+#data
+FOO<script type="text/plain">'<!-- <sCrIpt/'</script>BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,56): expected-script-data-but-got-eof
+(1,56): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:57) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       type="text/plain"
+|       "'<!-- <sCrIpt/'</script>BAR"
+
+#data
+FOO<script type="text/plain">'<!-- <sCrIpt\'</script>BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       type="text/plain"
+|       "'<!-- <sCrIpt\'"
+|     "BAR"
+
+#data
+FOO<script type="text/plain">'<!-- <sCrIpt/'</script>BAR</script>QUX
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       type="text/plain"
+|       "'<!-- <sCrIpt/'</script>BAR"
+|     "QUX"
+
+#data
+FOO<script><!--<script>-></script>--></script>QUX
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       "<!--<script>-></script>-->"
+|     "QUX"
diff --git a/lib/html5lib/tests/testdata/tree-construction/scripted/adoption01.dat b/lib/html5lib/tests/testdata/tree-construction/scripted/adoption01.dat
new file mode 100644
index 00000000..5cc0f074
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/scripted/adoption01.dat
@@ -0,0 +1,16 @@
+#data
+<b id="A"><script>document.getElementById("A").id = "B"</script>TEXT</b>
+#errors
+#script-on
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+|       <b>
+|         id="B"
+|         <script>
+|           "document.getElementById("A").id = "B""
+|     <b>
+|       id="A"
+|       "TEXT"
diff --git a/lib/html5lib/tests/testdata/tree-construction/scripted/ark.dat b/lib/html5lib/tests/testdata/tree-construction/scripted/ark.dat
new file mode 100644
index 00000000..feebeadb
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/scripted/ark.dat
@@ -0,0 +1,27 @@
+#data
+<font size=4><font size=4><font size=4><script>document.getElementsByTagName("font")[2].setAttribute("size", "5");</script><font size=4>X
+#errors
+#script-on
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+|       <font>
+|         size="4"
+|         <font>
+|           size="4"
+|           <font>
+|             size="5"
+|             <script>
+|               "document.getElementsByTagName("font")[2].setAttribute("size", "5");"
+|             <font>
+|               size="4"
+|     
+|       <font>
+|         size="4"
+|         <font>
+|           size="4"
+|           <font>
+|             size="4"
+|             "X"
diff --git a/lib/html5lib/tests/testdata/tree-construction/scripted/webkit01.dat b/lib/html5lib/tests/testdata/tree-construction/scripted/webkit01.dat
new file mode 100644
index 00000000..3e71c1ba
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/scripted/webkit01.dat
@@ -0,0 +1,30 @@
+#data
+1<script>document.write("2")</script>3
+#errors
+#script-on
+#document
+| <html>
+|   <head>
+|   <body>
+|     "1"
+|     <script>
+|       "document.write("2")"
+|     "23"
+
+#data
+1<script>document.write("<script>document.write('2')</scr"+ "ipt><script>document.write('3')</scr" + "ipt>")</script>4
+#errors
+#script-on
+#document
+| <html>
+|   <head>
+|   <body>
+|     "1"
+|     <script>
+|       "document.write("<script>document.write('2')</scr"+ "ipt><script>document.write('3')</scr" + "ipt>")"
+|     <script>
+|       "document.write('2')"
+|     "2"
+|     <script>
+|       "document.write('3')"
+|     "34"
diff --git a/lib/html5lib/tests/testdata/tree-construction/tables01.dat b/lib/html5lib/tests/testdata/tree-construction/tables01.dat
new file mode 100644
index 00000000..f0caaa3c
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/tables01.dat
@@ -0,0 +1,286 @@
+#data
+<table><th>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,11): unexpected-cell-in-table-body
+(1,11): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <th>
+
+#data
+<table><td>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,11): unexpected-cell-in-table-body
+(1,11): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+
+#data
+<table><col foo='bar'>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,22): eof-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <colgroup>
+|         <col>
+|           foo="bar"
+
+#data
+<table><colgroup></html>foo
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,24): unexpected-end-tag
+(1,27): foster-parenting-character-in-table
+(1,27): foster-parenting-character-in-table
+(1,27): foster-parenting-character-in-table
+(1,27): eof-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     "foo"
+|     <table>
+|       <colgroup>
+
+#data
+<table></table>foo
+#errors
+(1,7): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|     
+|       "foo"
+
+#data
+<table></body></caption></col></colgroup></html></tbody></td></tfoot></th></thead></tr><td>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,14): unexpected-end-tag
+(1,24): unexpected-end-tag
+(1,30): unexpected-end-tag
+(1,41): unexpected-end-tag
+(1,48): unexpected-end-tag
+(1,56): unexpected-end-tag
+(1,61): unexpected-end-tag
+(1,69): unexpected-end-tag
+(1,74): unexpected-end-tag
+(1,82): unexpected-end-tag
+(1,87): unexpected-end-tag
+(1,91): unexpected-cell-in-table-body
+(1,91): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+
+#data
+<table><select><option>3</select></table>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,15): unexpected-start-tag-implies-table-voodoo
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+|         "3"
+|     <table>
+
+#data
+<table><select><table></table></select></table>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,15): unexpected-start-tag-implies-table-voodoo
+(1,22): unexpected-table-element-start-tag-in-select-in-table
+(1,22): unexpected-start-tag-implies-end-tag
+(1,39): unexpected-end-tag
+(1,47): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|     <table>
+|     <table>
+
+#data
+<table><select></table>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,15): unexpected-start-tag-implies-table-voodoo
+(1,23): unexpected-table-element-end-tag-in-select-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|     <table>
+
+#data
+<table><select><option>A<tr><td>B</td></tr></table>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,15): unexpected-start-tag-implies-table-voodoo
+(1,28): unexpected-table-element-start-tag-in-select-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+|         "A"
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             "B"
+
+#data
+<table><td></body></caption></col></colgroup></html>foo
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,11): unexpected-cell-in-table-body
+(1,18): unexpected-end-tag
+(1,28): unexpected-end-tag
+(1,34): unexpected-end-tag
+(1,45): unexpected-end-tag
+(1,52): unexpected-end-tag
+(1,55): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             "foo"
+
+#data
+<table><td>A</table>B
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,11): unexpected-cell-in-table-body
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             "A"
+|     "B"
+
+#data
+<table><tr><caption>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,20): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|       <caption>
+
+#data
+<table><tr></body></caption></col></colgroup></html></td></th><td>foo
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,18): unexpected-end-tag-in-table-row
+(1,28): unexpected-end-tag-in-table-row
+(1,34): unexpected-end-tag-in-table-row
+(1,45): unexpected-end-tag-in-table-row
+(1,52): unexpected-end-tag-in-table-row
+(1,57): unexpected-end-tag-in-table-row
+(1,62): unexpected-end-tag-in-table-row
+(1,69): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             "foo"
+
+#data
+<table><td><tr>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,11): unexpected-cell-in-table-body
+(1,15): eof-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|         <tr>
+
+#data
+<table><td><button><td>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,11): unexpected-cell-in-table-body
+(1,23): unexpected-cell-end-tag
+(1,23): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <button>
+|           <td>
+
+#data
+<table><tr><td><svg><desc><td>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,30): unexpected-cell-end-tag
+(1,30): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <svg svg>
+|               <svg desc>
+|           <td>
diff --git a/lib/html5lib/tests/testdata/tree-construction/template.dat b/lib/html5lib/tests/testdata/tree-construction/template.dat
new file mode 100644
index 00000000..b38d4f58
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/template.dat
@@ -0,0 +1,1604 @@
+#data
+<body><template>Hello</template>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         "Hello"
+
+#data
+<template>Hello</template>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         "Hello"
+|   <body>
+
+#data
+<template></template><div></div>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|   <body>
+|     <div>
+
+#data
+<html><template>Hello</template>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         "Hello"
+|   <body>
+
+#data
+<head><template><div></div></template></head>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <div>
+|   <body>
+
+#data
+<div><template><div><span></template><b>
+#errors
+ * (1,6) missing DOCTYPE
+ * (1,38) mismatched template end tag
+ * (1,41) unexpected end of file
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <template>
+|         content
+|           <div>
+|             <span>
+|       <b>
+
+#data
+<div><template></div>Hello
+#errors
+ * (1,6) missing DOCTYPE
+ * (1,22) unexpected token in template
+ * (1,27) unexpected end of file in template
+ * (1,27) unexpected end of file
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <template>
+|         content
+|           "Hello"
+
+#data
+<div></template></div>
+#errors
+ * (1,6) missing DOCTYPE
+ * (1,17) unexpected template end tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+
+#data
+<table><template></template></table>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <template>
+|         content
+
+#data
+<table><template></template></div>
+#errors
+ * (1,8) missing DOCTYPE
+ * (1,35) unexpected token in table - foster parenting
+ * (1,35) unexpected end tag
+ * (1,35) unexpected end of file
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <template>
+|         content
+
+#data
+<table><div><template></template></div>
+#errors
+ * (1,8) missing DOCTYPE
+ * (1,13) unexpected token in table - foster parenting
+ * (1,40) unexpected token in table - foster parenting
+ * (1,40) unexpected end of file
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <template>
+|         content
+|     <table>
+
+#data
+<table><template></template><div></div>
+#errors
+no doctype
+bad div in table
+bad /div in table
+eof in table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|     <table>
+|       <template>
+|         content
+
+#data
+<table>   <template></template></table>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       "   "
+|       <template>
+|         content
+
+#data
+<table><tbody><template></template></tbody>
+#errors
+no doctype
+eof in table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <template>
+|           content
+
+#data
+<table><tbody><template></tbody></template>
+#errors
+no doctype
+bad /tbody
+eof in table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <template>
+|           content
+
+#data
+<table><tbody><template></template></tbody></table>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <template>
+|           content
+
+#data
+<table><thead><template></template></thead>
+#errors
+no doctype
+eof in table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <thead>
+|         <template>
+|           content
+
+#data
+<table><tfoot><template></template></tfoot>
+#errors
+no doctype
+eof in table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tfoot>
+|         <template>
+|           content
+
+#data
+<select><template></template></select>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <template>
+|         content
+
+#data
+<select><template><option></option></template></select>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <template>
+|         content
+|           <option>
+
+#data
+<template><option></option></select><option></option></template>
+#errors
+no doctype
+bad /select
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <option>
+|         <option>
+|   <body>
+
+#data
+<select><template></template><option></select>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <template>
+|         content
+|       <option>
+
+#data
+<select><option><template></template></select>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+|         <template>
+|           content
+
+#data
+<select><template>
+#errors
+no doctype
+eof in template
+eof in select
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <template>
+|         content
+
+#data
+<select><option></option><template>
+#errors
+no doctype
+eof in template
+eof in select
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+|       <template>
+|         content
+
+#data
+<select><option></option><template><option>
+#errors
+no doctype
+eof in template
+eof in select
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+|       <template>
+|         content
+|           <option>
+
+#data
+<table><thead><template><td></template></table>
+#errors
+ * (1,8) missing DOCTYPE
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <thead>
+|         <template>
+|           content
+|             <td>
+
+#data
+<table><template><thead></template></table>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <template>
+|         content
+|           <thead>
+
+#data
+<body><table><template><td></tr><div></template></table>
+#errors
+no doctype
+bad </tr>
+missing </div>
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <template>
+|         content
+|           <td>
+|             <div>
+
+#data
+<table><template><thead></template></thead></table>
+#errors
+no doctype
+bad /thead after /template
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <template>
+|         content
+|           <thead>
+
+#data
+<table><thead><template><tr></template></table>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <thead>
+|         <template>
+|           content
+|             <tr>
+
+#data
+<table><template><tr></template></table>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <template>
+|         content
+|           <tr>
+
+#data
+<table><tr><template><td>
+#errors
+no doctype
+eof in template
+eof in table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <template>
+|             content
+|               <td>
+
+#data
+<table><template><tr><template><td></template></tr></template></table>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <template>
+|         content
+|           <tr>
+|             <template>
+|               content
+|                 <td>
+
+#data
+<table><template><tr><template><td></td></template></tr></template></table>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <template>
+|         content
+|           <tr>
+|             <template>
+|               content
+|                 <td>
+
+#data
+<table><template><td></template>
+#errors
+no doctype
+eof in table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <template>
+|         content
+|           <td>
+
+#data
+<body><template><td></td></template>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <td>
+
+#data
+<body><template><template><tr></tr></template><td></td></template>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <template>
+|           content
+|             <tr>
+|         <td>
+
+#data
+<table><colgroup><template><col>
+#errors
+no doctype
+eof in template
+eof in table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <colgroup>
+|         <template>
+|           content
+|             <col>
+
+#data
+<frameset><template><frame></frame></template></frameset>
+#errors
+ * (1,11) missing DOCTYPE
+ * (1,21) unexpected start tag token
+ * (1,36) unexpected end tag token
+ * (1,47) unexpected end tag token
+#document
+| <html>
+|   <head>
+|   <frameset>
+|     <frame>
+
+#data
+<template><frame></frame></frameset><frame></frame></template>
+#errors
+ * (1,11) missing DOCTYPE
+ * (1,18) unexpected start tag
+ * (1,26) unexpected end tag
+ * (1,37) unexpected end tag
+ * (1,44) unexpected start tag
+ * (1,52) unexpected end tag
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|   <body>
+
+#data
+<template><div><frameset><span></span></div><span></span></template>
+#errors
+no doctype
+bad frameset
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <div>
+|           <span>
+|         <span>
+|   <body>
+
+#data
+<body><template><div><frameset><span></span></div><span></span></template></body>
+#errors
+no doctype
+bad frameset
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <div>
+|           <span>
+|         <span>
+
+#data
+<body><template><script>var i = 1;</script><td></td></template>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <script>
+|           "var i = 1;"
+|         <td>
+
+#data
+<body><template><tr><div></div></tr></template>
+#errors
+no doctype
+foster-parented div
+foster-parented /div
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <tr>
+|         <div>
+
+#data
+<body><template><tr></tr><td></td></template>
+#errors
+no doctype
+unexpected <td>
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <tr>
+|         <tr>
+|           <td>
+
+#data
+<body><template><td></td></tr><td></td></template>
+#errors
+no doctype
+bad </tr>
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <td>
+|         <td>
+
+#data
+<body><template><td></td><tbody><td></td></template>
+#errors
+no doctype
+bad <tbody>
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <td>
+|         <td>
+
+#data
+<body><template><td></td><caption></caption><td></td></template>
+#errors
+ * (1,7) missing DOCTYPE
+ * (1,35) unexpected start tag in table row
+ * (1,45) unexpected end tag in table row
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <td>
+|         <td>
+
+#data
+<body><template><td></td><colgroup></caption><td></td></template>
+#errors
+ * (1,7) missing DOCTYPE
+ * (1,36) unexpected start tag in table row
+ * (1,46) unexpected end tag in table row
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <td>
+|         <td>
+
+#data
+<body><template><td></td></table><td></td></template>
+#errors
+no doctype
+bad </table>
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <td>
+|         <td>
+
+#data
+<body><template><tr></tr><tbody><tr></tr></template>
+#errors
+no doctype
+bad <tbody>
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <tr>
+|         <tr>
+
+#data
+<body><template><tr></tr><caption><tr></tr></template>
+#errors
+no doctype
+bad <caption>
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <tr>
+|         <tr>
+
+#data
+<body><template><tr></tr></table><tr></tr></template>
+#errors
+no doctype
+bad </table>
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <tr>
+|         <tr>
+
+#data
+<body><template><thead></thead><caption></caption><tbody></tbody></template>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <thead>
+|         <caption>
+|         <tbody>
+
+#data
+<body><template><thead></thead></table><tbody></tbody></template></body>
+#errors
+no doctype
+bad </table>
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <thead>
+|         <tbody>
+
+#data
+<body><template><div><tr></tr></div></template>
+#errors
+no doctype
+bad tr
+bad /tr
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <div>
+
+#data
+<body><template><em>Hello</em></template>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <em>
+|           "Hello"
+
+#data
+<body><template><!--comment--></template>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <!-- comment -->
+
+#data
+<body><template><style></style><td></td></template>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <style>
+|         <td>
+
+#data
+<body><template><meta><td></td></template>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <meta>
+|         <td>
+
+#data
+<body><template><link><td></td></template>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <link>
+|         <td>
+
+#data
+<body><template><template><tr></tr></template><td></td></template>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <template>
+|           content
+|             <tr>
+|         <td>
+
+#data
+<body><table><colgroup><template><col></col></template></colgroup></table></body>
+#errors
+no doctype
+bad /col
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <colgroup>
+|         <template>
+|           content
+|             <col>
+
+#data
+<body a=b><template><div></div><body c=d><div></div></body></template></body>
+#errors
+no doctype
+bad <body>
+bad </body>
+#document
+| <html>
+|   <head>
+|   <body>
+|     a="b"
+|     <template>
+|       content
+|         <div>
+|         <div>
+
+#data
+<html a=b><template><div><html b=c><span></template>
+#errors
+no doctype
+bad <html>
+missing end tags in template
+#document
+| <html>
+|   a="b"
+|   <head>
+|     <template>
+|       content
+|         <div>
+|           <span>
+|   <body>
+
+#data
+<html a=b><template><col></col><html b=c><col></col></template>
+#errors
+no doctype
+bad /col
+bad html
+bad /col
+#document
+| <html>
+|   a="b"
+|   <head>
+|     <template>
+|       content
+|         <col>
+|         <col>
+|   <body>
+
+#data
+<html a=b><template><frame></frame><html b=c><frame></frame></template>
+#errors
+no doctype
+bad frame
+bad /frame
+bad html
+bad frame
+bad /frame
+#document
+| <html>
+|   a="b"
+|   <head>
+|     <template>
+|       content
+|   <body>
+
+#data
+<body><template><tr></tr><template></template><td></td></template>
+#errors
+no doctype
+unexpected <td>
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <tr>
+|         <template>
+|           content
+|         <tr>
+|           <td>
+
+#data
+<body><template><thead></thead><template><tr></tr></template><tr></tr><tfoot></tfoot></template>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <thead>
+|         <template>
+|           content
+|             <tr>
+|         <tbody>
+|           <tr>
+|         <tfoot>
+
+#data
+<body><template><template><b><template></template></template>text</template>
+#errors
+no doctype
+missing </b>
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <template>
+|           content
+|             <b>
+|               <template>
+|                 content
+|         "text"
+
+#data
+<body><template><col><colgroup>
+#errors
+no doctype
+bad colgroup
+eof in template
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <col>
+
+#data
+<body><template><col></colgroup>
+#errors
+no doctype
+bogus /colgroup
+eof in template
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <col>
+
+#data
+<body><template><col><colgroup></template></body>
+#errors
+no doctype
+bad colgroup
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <col>
+
+#data
+<body><template><col><div>
+#errors
+ * (1,7) missing DOCTYPE
+ * (1,27) unexpected token
+ * (1,27) unexpected end of file in template
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <col>
+
+#data
+<body><template><col></div>
+#errors
+no doctype
+bad /div
+eof in template
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <col>
+
+#data
+<body><template><col>Hello
+#errors
+no doctype
+unexpected text
+eof in template
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <col>
+
+#data
+<body><template><i><menu>Foo</i>
+#errors
+no doctype
+mising /menu
+eof in template
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <i>
+|         <menu>
+|           <i>
+|             "Foo"
+
+#data
+<body><template></div><div>Foo</div><template></template><tr></tr>
+#errors
+no doctype
+bogus /div
+bogus tr
+bogus /tr
+eof in template
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <div>
+|           "Foo"
+|         <template>
+|           content
+
+#data
+<body><div><template></div><tr><td>Foo</td></tr></template>
+#errors
+ * (1,7) missing DOCTYPE
+ * (1,28) unexpected token in template
+ * (1,60) unexpected end of file
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <template>
+|         content
+|           <tr>
+|             <td>
+|               "Foo"
+
+#data
+<template></figcaption><sub><table></table>
+#errors
+no doctype
+bad /figcaption
+eof in template
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <sub>
+|           <table>
+|   <body>
+
+#data
+<template><template>
+#errors
+no doctype
+eof in template
+eof in template
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <template>
+|           content
+|   <body>
+
+#data
+<template><div>
+#errors
+no doctype
+eof in template
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <div>
+|   <body>
+
+#data
+<template><template><div>
+#errors
+no doctype
+eof in template
+eof in template
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <template>
+|           content
+|             <div>
+|   <body>
+
+#data
+<template><template><table>
+#errors
+no doctype
+eof in template
+eof in template
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <template>
+|           content
+|             <table>
+|   <body>
+
+#data
+<template><template><tbody>
+#errors
+no doctype
+eof in template
+eof in template
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <template>
+|           content
+|             <tbody>
+|   <body>
+
+#data
+<template><template><tr>
+#errors
+no doctype
+eof in template
+eof in template
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <template>
+|           content
+|             <tr>
+|   <body>
+
+#data
+<template><template><td>
+#errors
+no doctype
+eof in template
+eof in template
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <template>
+|           content
+|             <td>
+|   <body>
+
+#data
+<template><template><caption>
+#errors
+no doctype
+eof in template
+eof in template
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <template>
+|           content
+|             <caption>
+|   <body>
+
+#data
+<template><template><colgroup>
+#errors
+no doctype
+eof in template
+eof in template
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <template>
+|           content
+|             <colgroup>
+|   <body>
+
+#data
+<template><template><col>
+#errors
+no doctype
+eof in template
+eof in template
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <template>
+|           content
+|             <col>
+|   <body>
+
+#data
+<template><template><tbody><select>
+#errors
+ * (1,11) missing DOCTYPE
+ * (1,36) unexpected token in table - foster parenting
+ * (1,36) unexpected end of file in template
+ * (1,36) unexpected end of file in template
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <template>
+|           content
+|             <tbody>
+|             <select>
+|   <body>
+
+#data
+<template><template><table>Foo
+#errors
+no doctype
+foster-parenting text F
+foster-parenting text o
+foster-parenting text o
+eof
+eof
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <template>
+|           content
+|             "Foo"
+|             <table>
+|   <body>
+
+#data
+<template><template><frame>
+#errors
+no doctype
+bad tag
+eof
+eof
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <template>
+|           content
+|   <body>
+
+#data
+<template><template><script>var i
+#errors
+no doctype
+eof in script
+eof in template
+eof in template
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <template>
+|           content
+|             <script>
+|               "var i"
+|   <body>
+
+#data
+<template><template><style>var i
+#errors
+no doctype
+eof in style
+eof in template
+eof in template
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <template>
+|           content
+|             <style>
+|               "var i"
+|   <body>
+
+#data
+<template><table></template><body><span>Foo
+#errors
+no doctype
+missing /table
+bad eof
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <table>
+|   <body>
+|     <span>
+|       "Foo"
+
+#data
+<template><td></template><body><span>Foo
+#errors
+no doctype
+bad eof
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <td>
+|   <body>
+|     <span>
+|       "Foo"
+
+#data
+<template><object></template><body><span>Foo
+#errors
+no doctype
+missing /object
+bad eof
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <object>
+|   <body>
+|     <span>
+|       "Foo"
+
+#data
+<template><svg><template>
+#errors
+no doctype
+eof in template
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <svg svg>
+|           <svg template>
+|   <body>
+
+#data
+<template><svg><foo><template><foreignObject><div></template><div>
+#errors
+no doctype
+ugly template closure
+bad eof
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <svg svg>
+|           <svg foo>
+|             <svg template>
+|               <svg foreignObject>
+|                 <div>
+|   <body>
+|     <div>
+
+#data
+<dummy><template><span></dummy>
+#errors
+no doctype
+bad end tag </dummy>
+eof in template
+eof in dummy
+#document
+| <html>
+|   <head>
+|   <body>
+|     <dummy>
+|       <template>
+|         content
+|           <span>
+
+#data
+<body><table><tr><td><select><template>Foo</template><caption>A</table>
+#errors
+no doctype
+(1,62): unexpected-caption-in-select-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <select>
+|               <template>
+|                 content
+|                   "Foo"
+|       <caption>
+|         "A"
+
+#data
+<body></body><template>
+#errors
+no doctype
+(1,23): template-after-body
+(1,24): eof-in-template
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+
+#data
+<head></head><template>
+#errors
+no doctype
+(1,23): template-after-head
+(1,24): eof-in-template
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|   <body>
+
+#data
+<head></head><template>Foo</template>
+#errors
+no doctype
+(1,23): template-after-head
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         "Foo"
+|   <body>
+
+#data
+<!DOCTYPE HTML><dummy><table><template><table><template><table><script>
+#errors
+eof script
+eof template
+eof template
+eof table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <dummy>
+|       <table>
+|         <template>
+|           content
+|             <table>
+|               <template>
+|                 content
+|                   <table>
+|                     <script>
+
+#data
+<template><a><table><a>
+#errors
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <a>
+|           <a>
+|           <table>
+|   <body>
diff --git a/lib/html5lib/tests/testdata/tree-construction/tests1.dat b/lib/html5lib/tests/testdata/tree-construction/tests1.dat
new file mode 100644
index 00000000..1c36c1b8
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/tests1.dat
@@ -0,0 +1,1988 @@
+#data
+Test
+#errors
+(1,0): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "Test"
+
+#data
+OneTwo
+#errors
+(1,3): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+|       "One"
+|     
+|       "Two"
+
+#data
+Line1Line2Line3Line4
+#errors
+(1,0): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "Line1"
+|     
+|     "Line2"
+|     
+|     "Line3"
+|     
+|     "Line4"
+
+#data
+<html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<head>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<body>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<html><head>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<html><head></head>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<html><head></head><body>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<html><head></head><body></body>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<html><head><body></body></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<html><head></body></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<html><head><body></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<html><body></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<body></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<head></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+</head>
+#errors
+(1,7): expected-doctype-but-got-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+</body>
+#errors
+(1,7): expected-doctype-but-got-end-tag element.
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+</html>
+#errors
+(1,7): expected-doctype-but-got-end-tag element.
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<b><table><td><i></table>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,14): unexpected-cell-in-table-body
+(1,25): unexpected-cell-end-tag
+(1,25): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <table>
+|         <tbody>
+|           <tr>
+|             <td>
+|               <i>
+
+#data
+<b><table><td></b><i></table>X
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,14): unexpected-cell-in-table-body
+(1,18): unexpected-end-tag
+(1,29): unexpected-cell-end-tag
+(1,30): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <table>
+|         <tbody>
+|           <tr>
+|             <td>
+|               <i>
+|       "X"
+
+#data
+<h1>Hello<h2>World
+#errors
+(1,4): expected-doctype-but-got-start-tag
+(1,13): unexpected-start-tag
+(1,18): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <h1>
+|       "Hello"
+|     <h2>
+|       "World"
+
+#data
+<a>X<a>Y</a>Z</a>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,10): unexpected-start-tag-implies-end-tag
+(1,10): adoption-agency-1.3
+(1,24): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|     
+|       <a>
+|         "X"
+|       <a>
+|         "Y"
+|       "Z"
+
+#data
+<b><button>foo</b>bar
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,18): adoption-agency-1.3
+(1,21): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|     <button>
+|       <b>
+|         "foo"
+|       "bar"
+
+#data
+<!DOCTYPE html><span><button>foo</span>bar
+#errors
+(1,39): unexpected-end-tag
+(1,42): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <span>
+|       <button>
+|         "foobar"
+
+#data
+<b><div><marquee></b></div>X
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,11): unexpected-end-tag
+(1,24): unexpected-end-tag
+(1,28): unexpected-end-tag
+(1,34): end-tag-too-early
+(1,35): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+|       <b>
+|     <div>
+|       <b>
+|         <marquee>
+|           
+|           "X"
+
+#data
+<script><div></script></div><title></title>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,28): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<div>"
+|     <title>
+|       ""
+|   <body>
+|     
+|     
+
+#data
+<!--><div>--<!-->
+#errors
+(1,5): incorrect-comment
+(1,10): expected-doctype-but-got-start-tag
+(1,17): incorrect-comment
+(1,17): expected-closing-tag-but-got-eof
+#new-errors
+(1:5) abrupt-closing-of-empty-comment
+(1:17) abrupt-closing-of-empty-comment
+#document
+| <!--  -->
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "--"
+|       <!--  -->
+
+#data
+<hr>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,11): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+|     <hr>
+|     
+
+#data
+<select><b><option><select><option></b></select>X
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,11): unexpected-start-tag-in-select
+(1,27): unexpected-select-in-select
+(1,39): unexpected-end-tag
+(1,48): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+|     <option>
+|       "X"
+
+#data
+<a><table><td><a><table></table><a></tr><a></table><b>X</b>C<a>Y
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,14): unexpected-cell-in-table-body
+(1,35): unexpected-start-tag-implies-end-tag
+(1,40): unexpected-cell-end-tag
+(1,43): unexpected-start-tag-implies-table-voodoo
+(1,43): unexpected-start-tag-implies-end-tag
+(1,43): unexpected-end-tag
+(1,63): unexpected-start-tag-implies-end-tag
+(1,64): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       <a>
+|       <table>
+|         <tbody>
+|           <tr>
+|             <td>
+|               <a>
+|                 <table>
+|               <a>
+|     <a>
+|       <b>
+|         "X"
+|       "C"
+|     <a>
+|       "Y"
+
+#data
+<a X>0<b>1<a Y>2
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,15): unexpected-start-tag-implies-end-tag
+(1,15): adoption-agency-1.3
+(1,16): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       x=""
+|       "0"
+|       <b>
+|         "1"
+|     <b>
+|       <a>
+|         y=""
+|         "2"
+
+#data
+<!-----><font><div>hello<table>excite!<b>me!<th><i>please!</tr><!--X-->
+#errors
+(1,7): unexpected-dash-after-double-dash-in-comment
+(1,14): expected-doctype-but-got-start-tag
+(1,41): unexpected-start-tag-implies-table-voodoo
+(1,48): foster-parenting-character-in-table
+(1,48): foster-parenting-character-in-table
+(1,48): foster-parenting-character-in-table
+(1,48): foster-parenting-character-in-table
+(1,48): foster-parenting-character-in-table
+(1,48): foster-parenting-character-in-table
+(1,48): foster-parenting-character-in-table
+(1,48): foster-parenting-character-in-table
+(1,48): foster-parenting-character-in-table
+(1,48): foster-parenting-character-in-table
+(1,48): unexpected-cell-in-table-body
+(1,63): unexpected-cell-end-tag
+(1,71): eof-in-table
+#document
+| <!-- - -->
+| <html>
+|   <head>
+|   <body>
+|     <font>
+|       <div>
+|         "helloexcite!"
+|         <b>
+|           "me!"
+|         <table>
+|           <tbody>
+|             <tr>
+|               <th>
+|                 <i>
+|                   "please!"
+|             <!-- X -->
+
+#data
+<!DOCTYPE html><li>hello<li>world<ul>how<li>do</ul>you</body><!--do-->
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <li>
+|       "hello"
+|     <li>
+|       "world"
+|       <ul>
+|         "how"
+|         <li>
+|           "do"
+|       "you"
+|   <!-- do -->
+
+#data
+<!DOCTYPE html>A<option>B<optgroup>C<select>D</option>E
+#errors
+(1,54): unexpected-end-tag-in-select
+(1,55): eof-in-select
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "A"
+|     <option>
+|       "B"
+|     <optgroup>
+|       "C"
+|       <select>
+|         "DE"
+
+#data
+<
+#errors
+(1,1): expected-tag-name
+(1,1): expected-doctype-but-got-chars
+#new-errors
+(1:2) eof-before-tag-name
+#document
+| <html>
+|   <head>
+|   <body>
+|     "<"
+
+#data
+<#
+#errors
+(1,1): expected-tag-name
+(1,1): expected-doctype-but-got-chars
+#new-errors
+(1:2) invalid-first-character-of-tag-name
+#document
+| <html>
+|   <head>
+|   <body>
+|     "<#"
+
+#data
+</
+#errors
+(1,2): expected-closing-tag-but-got-eof
+(1,2): expected-doctype-but-got-chars
+#new-errors
+(1:3) eof-before-tag-name
+#document
+| <html>
+|   <head>
+|   <body>
+|     "</"
+
+#data
+</#
+#errors
+(1,2): expected-closing-tag-but-got-char
+(1,3): expected-doctype-but-got-eof
+#new-errors
+(1:3) invalid-first-character-of-tag-name
+#document
+| <!-- # -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<?
+#errors
+(1,1): expected-tag-name-but-got-question-mark
+(1,2): expected-doctype-but-got-eof
+#new-errors
+(1:2) unexpected-question-mark-instead-of-tag-name
+#document
+| <!-- ? -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<?#
+#errors
+(1,1): expected-tag-name-but-got-question-mark
+(1,3): expected-doctype-but-got-eof
+#new-errors
+(1:2) unexpected-question-mark-instead-of-tag-name
+#document
+| <!-- ?# -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!
+#errors
+(1,2): expected-dashes-or-doctype
+(1,2): expected-doctype-but-got-eof
+#new-errors
+(1:3) incorrectly-opened-comment
+#document
+| <!--  -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!#
+#errors
+(1,2): expected-dashes-or-doctype
+(1,3): expected-doctype-but-got-eof
+#new-errors
+(1:3) incorrectly-opened-comment
+#document
+| <!-- # -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<?COMMENT?>
+#errors
+(1,1): expected-tag-name-but-got-question-mark
+(1,11): expected-doctype-but-got-eof
+#new-errors
+(1:2) unexpected-question-mark-instead-of-tag-name
+#document
+| <!-- ?COMMENT? -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!COMMENT>
+#errors
+(1,2): expected-dashes-or-doctype
+(1,10): expected-doctype-but-got-eof
+#new-errors
+(1:3) incorrectly-opened-comment
+#document
+| <!-- COMMENT -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+</ COMMENT >
+#errors
+(1,2): expected-closing-tag-but-got-char
+(1,12): expected-doctype-but-got-eof
+#new-errors
+(1:3) invalid-first-character-of-tag-name
+#document
+| <!--  COMMENT  -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<?COM--MENT?>
+#errors
+(1,1): expected-tag-name-but-got-question-mark
+(1,13): expected-doctype-but-got-eof
+#new-errors
+(1:2) unexpected-question-mark-instead-of-tag-name
+#document
+| <!-- ?COM--MENT? -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!COM--MENT>
+#errors
+(1,2): expected-dashes-or-doctype
+(1,12): expected-doctype-but-got-eof
+#new-errors
+(1:3) incorrectly-opened-comment
+#document
+| <!-- COM--MENT -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+</ COM--MENT >
+#errors
+(1,2): expected-closing-tag-but-got-char
+(1,14): expected-doctype-but-got-eof
+#new-errors
+(1:3) invalid-first-character-of-tag-name
+#document
+| <!--  COM--MENT  -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html><style> EOF
+#errors
+(1,26): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <style>
+|       " EOF"
+|   <body>
+
+#data
+<!DOCTYPE html><script> <!-- </script> --> </script> EOF
+#errors
+(1,52): unexpected-end-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       " <!-- "
+|     " "
+|   <body>
+|     "-->  EOF"
+
+#data
+<b></b>TEST
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,10): adoption-agency-1.3
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|     
+|       <b>
+|       "TEST"
+
+#data
+<b></b>TEST
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,19): unexpected-end-tag
+(1,23): adoption-agency-1.2
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+|       id="a"
+|       <b>
+|     
+|       id="b"
+|       "TEST"
+
+#data
+<b id=a><b id=b></b>TEST
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,23): unexpected-end-tag
+(1,27): adoption-agency-1.2
+(1,31): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       id="a"
+|       
+|         <b>
+|           id="b"
+|       "TEST"
+
+#data
+<!DOCTYPE html><title>U-test</title><body><div>Test<u></div></body>
+#errors
+(1,61): unexpected-end-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <title>
+|       "U-test"
+|   <body>
+|     <div>
+|       
+|         "Test"
+|         <u>
+
+#data
+<!DOCTYPE html><font><table></font></table></font>
+#errors
+(1,35): unexpected-end-tag-implies-table-voodoo
+(1,35): unexpected-end-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <font>
+|       <table>
+
+#data
+<font>hello<b>cruel</font>world
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,29): adoption-agency-1.3
+(1,29): adoption-agency-1.3
+(1,34): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <font>
+|     
+|       <font>
+|         "hello"
+|         <b>
+|           "cruel"
+|       <b>
+|         "world"
+
+#data
+<b>Test</i>Test
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,11): unexpected-end-tag
+(1,15): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       "TestTest"
+
+#data
+<b>A<cite>B<div>C
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,17): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       "A"
+|       <cite>
+|         "B"
+|         <div>
+|           "C"
+
+#data
+<b>A<cite>B<div>C</cite>D
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,24): unexpected-end-tag
+(1,25): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       "A"
+|       <cite>
+|         "B"
+|         <div>
+|           "CD"
+
+#data
+<b>A<cite>B<div>C</b>D
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,21): adoption-agency-1.3
+(1,22): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       "A"
+|       <cite>
+|         "B"
+|     <div>
+|       <b>
+|         "C"
+|       "D"
+
+#data
+
+#errors
+(1,0): expected-doctype-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<DIV>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,5): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+
+#data
+<DIV> abc
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,9): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc"
+
+#data
+<DIV> abc <B>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,13): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+
+#data
+<DIV> abc <B> def
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,17): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+|         " def"
+
+#data
+<DIV> abc <B> def <I>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,21): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+|         " def "
+|         <i>
+
+#data
+<DIV> abc <B> def <I> ghi
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,25): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+|         " def "
+|         <i>
+|           " ghi"
+
+#data
+<DIV> abc <B> def <I> ghi 
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,29): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+|         " def "
+|         <i>
+|           " ghi "
+|           
+
+#data
+<DIV> abc <B> def <I> ghi  jkl
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,33): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+|         " def "
+|         <i>
+|           " ghi "
+|           
+|             " jkl"
+
+#data
+<DIV> abc <B> def <I> ghi  jkl </B>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,38): adoption-agency-1.3
+(1,38): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+|         " def "
+|         <i>
+|           " ghi "
+|       <i>
+|         
+|           <b>
+|             " jkl "
+
+#data
+<DIV> abc <B> def <I> ghi  jkl </B> mno
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,38): adoption-agency-1.3
+(1,42): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+|         " def "
+|         <i>
+|           " ghi "
+|       <i>
+|         
+|           <b>
+|             " jkl "
+|           " mno"
+
+#data
+<DIV> abc <B> def <I> ghi  jkl </B> mno </I>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,38): adoption-agency-1.3
+(1,47): adoption-agency-1.3
+(1,47): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+|         " def "
+|         <i>
+|           " ghi "
+|       <i>
+|       
+|         <i>
+|           <b>
+|             " jkl "
+|           " mno "
+
+#data
+<DIV> abc <B> def <I> ghi  jkl </B> mno </I> pqr
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,38): adoption-agency-1.3
+(1,47): adoption-agency-1.3
+(1,51): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+|         " def "
+|         <i>
+|           " ghi "
+|       <i>
+|       
+|         <i>
+|           <b>
+|             " jkl "
+|           " mno "
+|         " pqr"
+
+#data
+<DIV> abc <B> def <I> ghi  jkl </B> mno </I> pqr 
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,38): adoption-agency-1.3
+(1,47): adoption-agency-1.3
+(1,56): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+|         " def "
+|         <i>
+|           " ghi "
+|       <i>
+|       
+|         <i>
+|           <b>
+|             " jkl "
+|           " mno "
+|         " pqr "
+
+#data
+<DIV> abc <B> def <I> ghi  jkl </B> mno </I> pqr  stu
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,38): adoption-agency-1.3
+(1,47): adoption-agency-1.3
+(1,60): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+|         " def "
+|         <i>
+|           " ghi "
+|       <i>
+|       
+|         <i>
+|           <b>
+|             " jkl "
+|           " mno "
+|         " pqr "
+|       " stu"
+
+#data
+<test attribute---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------->
+#errors
+(1,1040): expected-doctype-but-got-start-tag
+(1,1040): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <test>
+|       attribute----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------=""
+
+#data
+<a href="blah">aba<table><a href="foo">br<tr><td></td></tr>x</table>aoe
+#errors
+(1,15): expected-doctype-but-got-start-tag
+(1,39): unexpected-start-tag-implies-table-voodoo
+(1,39): unexpected-start-tag-implies-end-tag
+(1,39): unexpected-end-tag
+(1,45): foster-parenting-character-in-table
+(1,45): foster-parenting-character-in-table
+(1,68): foster-parenting-character-in-table
+(1,71): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       href="blah"
+|       "aba"
+|       <a>
+|         href="foo"
+|         "br"
+|       <a>
+|         href="foo"
+|         "x"
+|       <table>
+|         <tbody>
+|           <tr>
+|             <td>
+|     <a>
+|       href="foo"
+|       "aoe"
+
+#data
+<a href="blah">aba<table><tr><td><a href="foo">br</td></tr>x</table>aoe
+#errors
+(1,15): expected-doctype-but-got-start-tag
+(1,54): unexpected-cell-end-tag
+(1,68): unexpected text in table
+(1,71): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       href="blah"
+|       "abax"
+|       <table>
+|         <tbody>
+|           <tr>
+|             <td>
+|               <a>
+|                 href="foo"
+|                 "br"
+|       "aoe"
+
+#data
+<table><a href="blah">aba<tr><td><a href="foo">br</td></tr>x</table>aoe
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,22): unexpected-start-tag-implies-table-voodoo
+(1,29): foster-parenting-character-in-table
+(1,29): foster-parenting-character-in-table
+(1,29): foster-parenting-character-in-table
+(1,54): unexpected-cell-end-tag
+(1,68): foster-parenting-character-in-table
+(1,71): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       href="blah"
+|       "aba"
+|     <a>
+|       href="blah"
+|       "x"
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <a>
+|               href="foo"
+|               "br"
+|     <a>
+|       href="blah"
+|       "aoe"
+
+#data
+<a href=a>aa<marquee>aa<a href=b>bb</marquee>aa
+#errors
+(1,10): expected-doctype-but-got-start-tag
+(1,45): end-tag-too-early
+(1,47): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       href="a"
+|       "aa"
+|       <marquee>
+|         "aa"
+|         <a>
+|           href="b"
+|           "bb"
+|       "aa"
+
+#data
+<wbr><strike><code></strike><code><strike></code>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,28): adoption-agency-1.3
+(1,49): adoption-agency-1.3
+(1,49): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <wbr>
+|     <strike>
+|       <code>
+|     <code>
+|       <code>
+|         <strike>
+
+#data
+<!DOCTYPE html><spacer>foo
+#errors
+(1,26): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <spacer>
+|       "foo"
+
+#data
+<title><meta></title><link><title><meta></title>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <title>
+|       "<meta>"
+|     <link>
+|     <title>
+|       "<meta>"
+|   <body>
+
+#data
+<style><!--</style><meta><script>--><link></script>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <style>
+|       "<!--"
+|     <meta>
+|     <script>
+|       "--><link>"
+|   <body>
+
+#data
+<head><meta></head><link>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,25): unexpected-start-tag-out-of-my-head
+#document
+| <html>
+|   <head>
+|     <meta>
+|     <link>
+|   <body>
+
+#data
+<table><tr><tr><td><td><span><th><span>X</table>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,33): unexpected-cell-end-tag
+(1,48): unexpected-cell-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|         <tr>
+|           <td>
+|           <td>
+|             <span>
+|           <th>
+|             <span>
+|               "X"
+
+#data
+<body><body><base><link><meta><title></title><body></body>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,12): unexpected-start-tag
+(1,54): unexpected-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <base>
+|     <link>
+|     <meta>
+|     <title>
+|       ""
+|     
+
+#data
+<textarea></textarea>
+#errors
+(1,10): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       ""
+
+#data
+<image>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,10): unexpected-start-tag-treated-as
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+|       <img>
+
+#data
+<a><table><a></table><a><div><a>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,13): unexpected-start-tag-implies-table-voodoo
+(1,13): unexpected-start-tag-implies-end-tag
+(1,13): adoption-agency-1.3
+(1,27): unexpected-start-tag-implies-end-tag
+(1,27): adoption-agency-1.2
+(1,32): unexpected-end-tag
+(1,35): unexpected-start-tag-implies-end-tag
+(1,35): adoption-agency-1.2
+(1,35): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       <a>
+|       <table>
+|     
+|       <a>
+|     <div>
+|       <a>
+
+#data
+<head><meta>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,10): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|     <meta>
+|   <body>
+|     
+
+#data
+<head></html><meta>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,19): expected-eof-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <meta>
+|     
+
+#data
+<b><table><td><i></table>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,14): unexpected-cell-in-table-body
+(1,25): unexpected-cell-end-tag
+(1,25): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <table>
+|         <tbody>
+|           <tr>
+|             <td>
+|               <i>
+
+#data
+<b><table><td></b><i></table>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,14): unexpected-cell-in-table-body
+(1,18): unexpected-end-tag
+(1,29): unexpected-cell-end-tag
+(1,29): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <table>
+|         <tbody>
+|           <tr>
+|             <td>
+|               <i>
+
+#data
+<h1><h2>
+#errors
+(1,4): expected-doctype-but-got-start-tag
+(1,8): unexpected-start-tag
+(1,8): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <h1>
+|     <h2>
+
+#data
+<a><a></a></a>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,9): unexpected-start-tag-implies-end-tag
+(1,9): adoption-agency-1.3
+(1,21): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|     
+|       <a>
+|       <a>
+
+#data
+<b><button></b></button></b>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,15): adoption-agency-1.3
+(1,28): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|     <button>
+|       <b>
+
+#data
+<b><div><marquee></b></div>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,11): unexpected-end-tag
+(1,24): unexpected-end-tag
+(1,28): unexpected-end-tag
+(1,34): end-tag-too-early
+(1,34): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+|       <b>
+|     <div>
+|       <b>
+|         <marquee>
+|           
+
+#data
+<script></script></div><title></title>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,23): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|     <script>
+|     <title>
+|   <body>
+|     
+|     
+
+#data
+<hr>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,11): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+|     <hr>
+|     
+
+#data
+<select><b><option><select><option></b></select>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,11): unexpected-start-tag-in-select
+(1,27): unexpected-select-in-select
+(1,39): unexpected-end-tag
+(1,48): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+|     <option>
+
+#data
+<html><head><title></title><body></body></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <title>
+|   <body>
+
+#data
+<a><table><td><a><table></table><a></tr><a></table><a>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,14): unexpected-cell-in-table-body
+(1,35): unexpected-start-tag-implies-end-tag
+(1,40): unexpected-cell-end-tag
+(1,43): unexpected-start-tag-implies-table-voodoo
+(1,43): unexpected-start-tag-implies-end-tag
+(1,43): unexpected-end-tag
+(1,54): unexpected-start-tag-implies-end-tag
+(1,54): adoption-agency-1.2
+(1,54): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       <a>
+|       <table>
+|         <tbody>
+|           <tr>
+|             <td>
+|               <a>
+|                 <table>
+|               <a>
+|     <a>
+
+#data
+<ul><li></li><div><li></div><li><li><div><li><address><li><b><em></b><li></ul>
+#errors
+(1,4): expected-doctype-but-got-start-tag
+(1,45): end-tag-too-early
+(1,58): end-tag-too-early
+(1,69): adoption-agency-1.3
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ul>
+|       <li>
+|       <div>
+|         <li>
+|       <li>
+|       <li>
+|         <div>
+|       <li>
+|         <address>
+|       <li>
+|         <b>
+|           <em>
+|       <li>
+
+#data
+<ul><li><ul></li><li>a</li></ul></li></ul>
+#errors
+(1,4): expected-doctype-but-got-start-tag
+(1,17): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ul>
+|       <li>
+|         <ul>
+|           <li>
+|             "a"
+
+#data
+<frameset><frame><frameset><frame></frameset><noframes></noframes></frameset>
+#errors
+(1,10): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <frameset>
+|     <frame>
+|     <frameset>
+|       <frame>
+|     <noframes>
+
+#data
+<h1><table><td><h3></table><h3></h1>
+#errors
+(1,4): expected-doctype-but-got-start-tag
+(1,15): unexpected-cell-in-table-body
+(1,27): unexpected-cell-end-tag
+(1,31): unexpected-start-tag
+(1,36): end-tag-too-early
+#document
+| <html>
+|   <head>
+|   <body>
+|     <h1>
+|       <table>
+|         <tbody>
+|           <tr>
+|             <td>
+|               <h3>
+|     <h3>
+
+#data
+<table><colgroup><col><colgroup><col><col><col><colgroup><col><col><thead><tr><td></table>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <colgroup>
+|         <col>
+|       <colgroup>
+|         <col>
+|         <col>
+|         <col>
+|       <colgroup>
+|         <col>
+|         <col>
+|       <thead>
+|         <tr>
+|           <td>
+
+#data
+<table><col><tbody><col><tr><col><td><col></table><col>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,37): unexpected-cell-in-table-body
+(1,55): unexpected-start-tag-ignored
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <colgroup>
+|         <col>
+|       <tbody>
+|       <colgroup>
+|         <col>
+|       <tbody>
+|         <tr>
+|       <colgroup>
+|         <col>
+|       <tbody>
+|         <tr>
+|           <td>
+|       <colgroup>
+|         <col>
+
+#data
+<table><colgroup><tbody><colgroup><tr><colgroup><td><colgroup></table><colgroup>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,52): unexpected-cell-in-table-body
+(1,80): unexpected-start-tag-ignored
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <colgroup>
+|       <tbody>
+|       <colgroup>
+|       <tbody>
+|         <tr>
+|       <colgroup>
+|       <tbody>
+|         <tr>
+|           <td>
+|       <colgroup>
+
+#data
+</strong></b></em></i></u></strike></s></blink></tt></pre></big></small></font></select></h1></h2></h3></h4></h5></h6></body></br></a></img></title></span></style></script></table></th></td></tr></frame></area></link></param></hr></input></col></base></meta></basefont></bgsound></embed></spacer></dd></dt></caption></colgroup></tbody></tfoot></thead></address></blockquote></center></dir></div></dl></fieldset></listing></menu></ol></ul></li></nobr></wbr></form></button></marquee></object></html></frameset></head></iframe></image></isindex></noembed></noframes></noscript></optgroup></option></plaintext></textarea>
+#errors
+(1,9): expected-doctype-but-got-end-tag
+(1,9): unexpected-end-tag-before-html
+(1,13): unexpected-end-tag-before-html
+(1,18): unexpected-end-tag-before-html
+(1,22): unexpected-end-tag-before-html
+(1,26): unexpected-end-tag-before-html
+(1,35): unexpected-end-tag-before-html
+(1,39): unexpected-end-tag-before-html
+(1,47): unexpected-end-tag-before-html
+(1,52): unexpected-end-tag-before-html
+(1,58): unexpected-end-tag-before-html
+(1,64): unexpected-end-tag-before-html
+(1,72): unexpected-end-tag-before-html
+(1,79): unexpected-end-tag-before-html
+(1,88): unexpected-end-tag-before-html
+(1,93): unexpected-end-tag-before-html
+(1,98): unexpected-end-tag-before-html
+(1,103): unexpected-end-tag-before-html
+(1,108): unexpected-end-tag-before-html
+(1,113): unexpected-end-tag-before-html
+(1,118): unexpected-end-tag-before-html
+(1,130): unexpected-end-tag-after-body
+(1,130): unexpected-end-tag-treated-as
+(1,134): unexpected-end-tag
+(1,140): unexpected-end-tag
+(1,148): unexpected-end-tag
+(1,155): unexpected-end-tag
+(1,163): unexpected-end-tag
+(1,172): unexpected-end-tag
+(1,180): unexpected-end-tag
+(1,185): unexpected-end-tag
+(1,190): unexpected-end-tag
+(1,195): unexpected-end-tag
+(1,203): unexpected-end-tag
+(1,210): unexpected-end-tag
+(1,217): unexpected-end-tag
+(1,225): unexpected-end-tag
+(1,230): unexpected-end-tag
+(1,238): unexpected-end-tag
+(1,244): unexpected-end-tag
+(1,251): unexpected-end-tag
+(1,258): unexpected-end-tag
+(1,269): unexpected-end-tag
+(1,279): unexpected-end-tag
+(1,287): unexpected-end-tag
+(1,296): unexpected-end-tag
+(1,300): unexpected-end-tag
+(1,305): unexpected-end-tag
+(1,310): unexpected-end-tag
+(1,320): unexpected-end-tag
+(1,331): unexpected-end-tag
+(1,339): unexpected-end-tag
+(1,347): unexpected-end-tag
+(1,355): unexpected-end-tag
+(1,365): end-tag-too-early
+(1,378): end-tag-too-early
+(1,387): end-tag-too-early
+(1,393): end-tag-too-early
+(1,399): end-tag-too-early
+(1,404): end-tag-too-early
+(1,415): end-tag-too-early
+(1,425): end-tag-too-early
+(1,432): end-tag-too-early
+(1,437): end-tag-too-early
+(1,442): end-tag-too-early
+(1,447): unexpected-end-tag
+(1,454): unexpected-end-tag
+(1,460): unexpected-end-tag
+(1,467): unexpected-end-tag
+(1,476): end-tag-too-early
+(1,486): end-tag-too-early
+(1,495): end-tag-too-early
+(1,513): expected-eof-but-got-end-tag
+(1,513): unexpected-end-tag
+(1,520): unexpected-end-tag
+(1,529): unexpected-end-tag
+(1,537): unexpected-end-tag
+(1,547): unexpected-end-tag
+(1,557): unexpected-end-tag
+(1,568): unexpected-end-tag
+(1,579): unexpected-end-tag
+(1,590): unexpected-end-tag
+(1,599): unexpected-end-tag
+(1,611): unexpected-end-tag
+(1,622): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+|     
+
+#data
+<table><tr></strong></b></em></i></u></strike></s></blink></tt></pre></big></small></font></select></h1></h2></h3></h4></h5></h6></body></br></a></img></title></span></style></script></table></th></td></tr></frame></area></link></param></hr></input></col></base></meta></basefont></bgsound></embed></spacer></dd></dt></caption></colgroup></tbody></tfoot></thead></address></blockquote></center></dir></div></dl></fieldset></listing></menu></ol></ul></li></nobr></wbr></form></button></marquee></object></html></frameset></head></iframe></image></isindex></noembed></noframes></noscript></optgroup></option></plaintext></textarea>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,20): unexpected-end-tag-implies-table-voodoo
+(1,20): unexpected-end-tag
+(1,24): unexpected-end-tag-implies-table-voodoo
+(1,24): unexpected-end-tag
+(1,29): unexpected-end-tag-implies-table-voodoo
+(1,29): unexpected-end-tag
+(1,33): unexpected-end-tag-implies-table-voodoo
+(1,33): unexpected-end-tag
+(1,37): unexpected-end-tag-implies-table-voodoo
+(1,37): unexpected-end-tag
+(1,46): unexpected-end-tag-implies-table-voodoo
+(1,46): unexpected-end-tag
+(1,50): unexpected-end-tag-implies-table-voodoo
+(1,50): unexpected-end-tag
+(1,58): unexpected-end-tag-implies-table-voodoo
+(1,58): unexpected-end-tag
+(1,63): unexpected-end-tag-implies-table-voodoo
+(1,63): unexpected-end-tag
+(1,69): unexpected-end-tag-implies-table-voodoo
+(1,69): end-tag-too-early
+(1,75): unexpected-end-tag-implies-table-voodoo
+(1,75): unexpected-end-tag
+(1,83): unexpected-end-tag-implies-table-voodoo
+(1,83): unexpected-end-tag
+(1,90): unexpected-end-tag-implies-table-voodoo
+(1,90): unexpected-end-tag
+(1,99): unexpected-end-tag-implies-table-voodoo
+(1,99): unexpected-end-tag
+(1,104): unexpected-end-tag-implies-table-voodoo
+(1,104): end-tag-too-early
+(1,109): unexpected-end-tag-implies-table-voodoo
+(1,109): end-tag-too-early
+(1,114): unexpected-end-tag-implies-table-voodoo
+(1,114): end-tag-too-early
+(1,119): unexpected-end-tag-implies-table-voodoo
+(1,119): end-tag-too-early
+(1,124): unexpected-end-tag-implies-table-voodoo
+(1,124): end-tag-too-early
+(1,129): unexpected-end-tag-implies-table-voodoo
+(1,129): end-tag-too-early
+(1,136): unexpected-end-tag-in-table-row
+(1,141): unexpected-end-tag-implies-table-voodoo
+(1,141): unexpected-end-tag-treated-as
+(1,145): unexpected-end-tag-implies-table-voodoo
+(1,145): unexpected-end-tag
+(1,151): unexpected-end-tag-implies-table-voodoo
+(1,151): unexpected-end-tag
+(1,159): unexpected-end-tag-implies-table-voodoo
+(1,159): unexpected-end-tag
+(1,166): unexpected-end-tag-implies-table-voodoo
+(1,166): unexpected-end-tag
+(1,174): unexpected-end-tag-implies-table-voodoo
+(1,174): unexpected-end-tag
+(1,183): unexpected-end-tag-implies-table-voodoo
+(1,183): unexpected-end-tag
+(1,196): unexpected-end-tag
+(1,201): unexpected-end-tag
+(1,206): unexpected-end-tag
+(1,214): unexpected-end-tag
+(1,221): unexpected-end-tag
+(1,228): unexpected-end-tag
+(1,236): unexpected-end-tag
+(1,241): unexpected-end-tag
+(1,249): unexpected-end-tag
+(1,255): unexpected-end-tag
+(1,262): unexpected-end-tag
+(1,269): unexpected-end-tag
+(1,280): unexpected-end-tag
+(1,290): unexpected-end-tag
+(1,298): unexpected-end-tag
+(1,307): unexpected-end-tag
+(1,311): unexpected-end-tag
+(1,316): unexpected-end-tag
+(1,321): unexpected-end-tag
+(1,331): unexpected-end-tag
+(1,342): unexpected-end-tag
+(1,350): unexpected-end-tag
+(1,358): unexpected-end-tag
+(1,366): unexpected-end-tag
+(1,376): end-tag-too-early
+(1,389): end-tag-too-early
+(1,398): end-tag-too-early
+(1,404): end-tag-too-early
+(1,410): end-tag-too-early
+(1,415): end-tag-too-early
+(1,426): end-tag-too-early
+(1,436): end-tag-too-early
+(1,443): end-tag-too-early
+(1,448): end-tag-too-early
+(1,453): end-tag-too-early
+(1,458): unexpected-end-tag
+(1,465): unexpected-end-tag
+(1,471): unexpected-end-tag
+(1,478): unexpected-end-tag
+(1,487): end-tag-too-early
+(1,497): end-tag-too-early
+(1,506): end-tag-too-early
+(1,524): expected-eof-but-got-end-tag
+(1,524): unexpected-end-tag
+(1,531): unexpected-end-tag
+(1,540): unexpected-end-tag
+(1,548): unexpected-end-tag
+(1,558): unexpected-end-tag
+(1,568): unexpected-end-tag
+(1,579): unexpected-end-tag
+(1,590): unexpected-end-tag
+(1,601): unexpected-end-tag
+(1,610): unexpected-end-tag
+(1,622): unexpected-end-tag
+(1,633): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+|     <table>
+|       <tbody>
+|         <tr>
+|     
+
+#data
+<frameset>
+#errors
+(1,10): expected-doctype-but-got-start-tag
+(1,10): eof-in-frameset
+#document
+| <html>
+|   <head>
+|   <frameset>
diff --git a/lib/html5lib/tests/testdata/tree-construction/tests10.dat b/lib/html5lib/tests/testdata/tree-construction/tests10.dat
new file mode 100644
index 00000000..f84e2d54
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/tests10.dat
@@ -0,0 +1,849 @@
+#data
+<!DOCTYPE html><svg></svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+
+#data
+<!DOCTYPE html><svg></svg><![CDATA[a]]>
+#errors
+(1,28) expected-dashes-or-doctype
+#new-errors
+(1:35) cdata-in-html-content
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|     <!-- [CDATA[a]] -->
+
+#data
+<!DOCTYPE html><body><svg></svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+
+#data
+<!DOCTYPE html><body><select><svg></svg></select>
+#errors
+(1,34) unexpected-start-tag-in-select
+(1,40) unexpected-end-tag-in-select
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+
+#data
+<!DOCTYPE html><body><select><option><svg></svg></option></select>
+#errors
+(1,42) unexpected-start-tag-in-select
+(1,48) unexpected-end-tag-in-select
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+
+#data
+<!DOCTYPE html><body><table><svg></svg></table>
+#errors
+(1,33) foster-parenting-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|     <table>
+
+#data
+<!DOCTYPE html><body><table><svg><g>foo</g></svg></table>
+#errors
+(1,33) foster-parenting-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg g>
+|         "foo"
+|     <table>
+
+#data
+<!DOCTYPE html><body><table><svg><g>foo</g><g>bar</g></svg></table>
+#errors
+(1,33) foster-parenting-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg g>
+|         "foo"
+|       <svg g>
+|         "bar"
+|     <table>
+
+#data
+<!DOCTYPE html><body><table><tbody><svg><g>foo</g><g>bar</g></svg></tbody></table>
+#errors
+(1,40) foster-parenting-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg g>
+|         "foo"
+|       <svg g>
+|         "bar"
+|     <table>
+|       <tbody>
+
+#data
+<!DOCTYPE html><body><table><tbody><tr><svg><g>foo</g><g>bar</g></svg></tr></tbody></table>
+#errors
+(1,44) foster-parenting-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg g>
+|         "foo"
+|       <svg g>
+|         "bar"
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+<!DOCTYPE html><body><table><tbody><tr><td><svg><g>foo</g><g>bar</g></svg></td></tr></tbody></table>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <svg svg>
+|               <svg g>
+|                 "foo"
+|               <svg g>
+|                 "bar"
+
+#data
+<!DOCTYPE html><body><table><tbody><tr><td><svg><g>foo</g><g>bar</g></svg>baz</td></tr></tbody></table>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <svg svg>
+|               <svg g>
+|                 "foo"
+|               <svg g>
+|                 "bar"
+|             
+|               "baz"
+
+#data
+<!DOCTYPE html><body><table><caption><svg><g>foo</g><g>bar</g></svg>baz</caption></table>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <svg svg>
+|           <svg g>
+|             "foo"
+|           <svg g>
+|             "bar"
+|         
+|           "baz"
+
+#data
+<!DOCTYPE html><body><table><caption><svg><g>foo</g><g>bar</g>baz</table>quux
+#errors
+(1,65) unexpected-html-element-in-foreign-content
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <svg svg>
+|           <svg g>
+|             "foo"
+|           <svg g>
+|             "bar"
+|         
+|           "baz"
+|     
+|       "quux"
+
+#data
+<!DOCTYPE html><body><table><caption><svg><g>foo</g><g>bar</g>baz</table>quux
+#errors
+(1,73) unexpected-end-tag
+(1,73) expected-one-end-tag-but-got-another
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <svg svg>
+|           <svg g>
+|             "foo"
+|           <svg g>
+|             "bar"
+|           "baz"
+|     
+|       "quux"
+
+#data
+<!DOCTYPE html><body><table><colgroup><svg><g>foo</g><g>bar</g>baz</table>quux
+#errors
+(1,43) foster-parenting-start-tag svg
+(1,66) unexpected HTML-like start tag token in foreign content
+(1,66) foster-parenting-start-tag
+(1,67) foster-parenting-character
+(1,68) foster-parenting-character
+(1,69) foster-parenting-character
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg g>
+|         "foo"
+|       <svg g>
+|         "bar"
+|     
+|       "baz"
+|     <table>
+|       <colgroup>
+|     
+|       "quux"
+
+#data
+<!DOCTYPE html><body><table><tr><td><select><svg><g>foo</g><g>bar</g>baz</table>quux
+#errors
+(1,49) unexpected-start-tag-in-select
+(1,52) unexpected-start-tag-in-select
+(1,59) unexpected-end-tag-in-select
+(1,62) unexpected-start-tag-in-select
+(1,69) unexpected-end-tag-in-select
+(1,72) unexpected-start-tag-in-select
+(1,83) unexpected-table-element-end-tag-in-select-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <select>
+|               "foobarbaz"
+|     
+|       "quux"
+
+#data
+<!DOCTYPE html><body><table><select><svg><g>foo</g><g>bar</g>baz</table>quux
+#errors
+(1,36) unexpected-start-tag-implies-table-voodoo
+(1,41) unexpected-start-tag-in-select
+(1,44) unexpected-start-tag-in-select
+(1,51) unexpected-end-tag-in-select
+(1,54) unexpected-start-tag-in-select
+(1,61) unexpected-end-tag-in-select
+(1,64) unexpected-start-tag-in-select
+(1,75) unexpected-table-element-end-tag-in-select-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       "foobarbaz"
+|     <table>
+|     
+|       "quux"
+
+#data
+<!DOCTYPE html><body></body></html><svg><g>foo</g><g>bar</g>baz
+#errors
+(1,40) expected-eof-but-got-start-tag
+(1,63) unexpected-html-element-in-foreign-content
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg g>
+|         "foo"
+|       <svg g>
+|         "bar"
+|     
+|       "baz"
+
+#data
+<!DOCTYPE html><body></body><svg><g>foo</g><g>bar</g>baz
+#errors
+(1,33) unexpected-start-tag-after-body
+(1,56) unexpected-html-element-in-foreign-content
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg g>
+|         "foo"
+|       <svg g>
+|         "bar"
+|     
+|       "baz"
+
+#data
+<!DOCTYPE html><frameset><svg><g></g><g></g><span>
+#errors
+(1,30) unexpected-start-tag-in-frameset
+(1,33) unexpected-start-tag-in-frameset
+(1,37) unexpected-end-tag-in-frameset
+(1,40) unexpected-start-tag-in-frameset
+(1,44) unexpected-end-tag-in-frameset
+(1,47) unexpected-start-tag-in-frameset
+(1,53) unexpected-start-tag-in-frameset
+(1,53) eof-in-frameset
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!DOCTYPE html><frameset></frameset><svg><g></g><g></g><span>
+#errors
+(1,41) unexpected-start-tag-after-frameset
+(1,44) unexpected-start-tag-after-frameset
+(1,48) unexpected-end-tag-after-frameset
+(1,51) unexpected-start-tag-after-frameset
+(1,55) unexpected-end-tag-after-frameset
+(1,58) unexpected-start-tag-after-frameset
+(1,64) unexpected-start-tag-after-frameset
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!DOCTYPE html><body xlink:href=foo><svg xlink:href=foo></svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     xlink:href="foo"
+|     <svg svg>
+|       xlink href="foo"
+
+#data
+<!DOCTYPE html><body xlink:href=foo xml:lang=en><svg><g xml:lang=en xlink:href=foo></g></svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     xlink:href="foo"
+|     xml:lang="en"
+|     <svg svg>
+|       <svg g>
+|         xlink href="foo"
+|         xml lang="en"
+
+#data
+<!DOCTYPE html><body xlink:href=foo xml:lang=en><svg><g xml:lang=en xlink:href=foo /></svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     xlink:href="foo"
+|     xml:lang="en"
+|     <svg svg>
+|       <svg g>
+|         xlink href="foo"
+|         xml lang="en"
+
+#data
+<!DOCTYPE html><body xlink:href=foo xml:lang=en><svg><g xml:lang=en xlink:href=foo />bar</svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     xlink:href="foo"
+|     xml:lang="en"
+|     <svg svg>
+|       <svg g>
+|         xlink href="foo"
+|         xml lang="en"
+|       "bar"
+
+#data
+<svg></path>
+#errors
+(1,5) expected-doctype-but-got-start-tag
+(1,12) unexpected-end-tag
+(1,12) unexpected-end-tag
+(1,12) expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+
+#data
+<div><svg></div>a
+#errors
+(1,5) expected-doctype-but-got-start-tag
+(1,16) unexpected-end-tag
+(1,16) end-tag-too-early
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <svg svg>
+|     "a"
+
+#data
+<div><svg><path></div>a
+#errors
+(1,5) expected-doctype-but-got-start-tag
+(1,22) unexpected-end-tag
+(1,22) end-tag-too-early
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <svg svg>
+|         <svg path>
+|     "a"
+
+#data
+<div><svg><path></svg><path>
+#errors
+(1,5) expected-doctype-but-got-start-tag
+(1,22) unexpected-end-tag
+(1,28) expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <svg svg>
+|         <svg path>
+|       <path>
+
+#data
+<div><svg><path><foreignObject><math></div>a
+#errors
+(1,5) expected-doctype-but-got-start-tag
+(1,43) unexpected-end-tag
+(1,43) end-tag-too-early
+(1,44) expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <svg svg>
+|         <svg path>
+|           <svg foreignObject>
+|             <math math>
+|               "a"
+
+#data
+<div><svg><path><foreignObject></div>a
+#errors
+(1,5) expected-doctype-but-got-start-tag
+(1,40) end-tag-too-early
+(1,41) expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <svg svg>
+|         <svg path>
+|           <svg foreignObject>
+|             
+|               "a"
+
+#data
+<!DOCTYPE html><svg><desc><div><svg><ul>a
+#errors
+(1,40) unexpected-html-element-in-foreign-content
+(1,41) expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg desc>
+|         <div>
+|           <svg svg>
+|           <ul>
+|             "a"
+
+#data
+<!DOCTYPE html><svg><desc><svg><ul>a
+#errors
+(1,35) unexpected-html-element-in-foreign-content
+(1,36) expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg desc>
+|         <svg svg>
+|         <ul>
+|           "a"
+
+#data
+<!DOCTYPE html><svg><desc>
+#errors
+(1,32) expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <svg svg>
+|         <svg desc>
+|           
+
+#data
+<!DOCTYPE html><svg><title>
+#errors
+(1,33) expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <svg svg>
+|         <svg title>
+|           
+
+#data
+<div><svg><path><foreignObject></foreignObject>
+#errors
+(1,5) expected-doctype-but-got-start-tag
+(1,50) unexpected-end-tag
+(1,53) expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <svg svg>
+|         <svg path>
+|           <svg foreignObject>
+|             
+|             
+
+#data
+<math><mi><div><object><div><span></span></div></object></div></mi><mi>
+#errors
+(1,6) expected-doctype-but-got-start-tag
+(1,71) expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+|         <div>
+|           <object>
+|             <div>
+|               <span>
+|       <math mi>
+
+#data
+<math><mi><svg><foreignObject><div><div></div></div></foreignObject></svg></mi><mi>
+#errors
+(1,6) expected-doctype-but-got-start-tag
+(1,83) expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+|         <svg svg>
+|           <svg foreignObject>
+|             <div>
+|               <div>
+|       <math mi>
+
+#data
+<svg><script></script><path>
+#errors
+(1,5) expected-doctype-but-got-start-tag
+(1,28) expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg script>
+|       <svg path>
+
+#data
+<table><svg></svg><tr>
+#errors
+(1,7) expected-doctype-but-got-start-tag
+(1,12) unexpected-start-tag-implies-table-voodoo
+(1,22) eof-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+<math><mi><mglyph>
+#errors
+(1,6) expected-doctype-but-got-start-tag
+(1,18) expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+|         <math mglyph>
+
+#data
+<math><mi><malignmark>
+#errors
+(1,6) expected-doctype-but-got-start-tag
+(1,22) expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+|         <math malignmark>
+
+#data
+<math><mo><mglyph>
+#errors
+(1,6) expected-doctype-but-got-start-tag
+(1,18) expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mo>
+|         <math mglyph>
+
+#data
+<math><mo><malignmark>
+#errors
+(1,6) expected-doctype-but-got-start-tag
+(1,22) expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mo>
+|         <math malignmark>
+
+#data
+<math><mn><mglyph>
+#errors
+(1,6) expected-doctype-but-got-start-tag
+(1,18) expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mn>
+|         <math mglyph>
+
+#data
+<math><mn><malignmark>
+#errors
+(1,6) expected-doctype-but-got-start-tag
+(1,22) expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mn>
+|         <math malignmark>
+
+#data
+<math><ms><mglyph>
+#errors
+(1,6) expected-doctype-but-got-start-tag
+(1,18) expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math ms>
+|         <math mglyph>
+
+#data
+<math><ms><malignmark>
+#errors
+(1,6) expected-doctype-but-got-start-tag
+(1,22) expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math ms>
+|         <math malignmark>
+
+#data
+<math><mtext><mglyph>
+#errors
+(1,6) expected-doctype-but-got-start-tag
+(1,21) expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mtext>
+|         <math mglyph>
+
+#data
+<math><mtext><malignmark>
+#errors
+(1,6) expected-doctype-but-got-start-tag
+(1,25) expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mtext>
+|         <math malignmark>
+
+#data
+<math><annotation-xml><svg></svg></annotation-xml><mi>
+#errors
+(1,6) expected-doctype-but-got-start-tag
+(1,54) expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         <svg svg>
+|       <math mi>
+
+#data
+<math><annotation-xml><svg><foreignObject><div><math><mi></mi></math><span></span></div></foreignObject><path></path></svg></annotation-xml><mi>
+#errors
+(1,6) expected-doctype-but-got-start-tag
+(1,144) expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         <svg svg>
+|           <svg foreignObject>
+|             <div>
+|               <math math>
+|                 <math mi>
+|               <span>
+|           <svg path>
+|       <math mi>
+
+#data
+<math><annotation-xml><svg><foreignObject><math><mi><svg></svg></mi><mo></mo></math><span></span></foreignObject><path></path></svg></annotation-xml><mi>
+#errors
+(1,6) expected-doctype-but-got-start-tag
+(1,153) expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         <svg svg>
+|           <svg foreignObject>
+|             <math math>
+|               <math mi>
+|                 <svg svg>
+|               <math mo>
+|             <span>
+|           <svg path>
+|       <math mi>
diff --git a/lib/html5lib/tests/testdata/tree-construction/tests11.dat b/lib/html5lib/tests/testdata/tree-construction/tests11.dat
new file mode 100644
index 00000000..b9901e79
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/tests11.dat
@@ -0,0 +1,523 @@
+#data
+<!DOCTYPE html><body><svg attributeName='' attributeType='' baseFrequency='' baseProfile='' calcMode='' clipPathUnits='' diffuseConstant='' edgeMode='' filterUnits='' glyphRef='' gradientTransform='' gradientUnits='' kernelMatrix='' kernelUnitLength='' keyPoints='' keySplines='' keyTimes='' lengthAdjust='' limitingConeAngle='' markerHeight='' markerUnits='' markerWidth='' maskContentUnits='' maskUnits='' numOctaves='' pathLength='' patternContentUnits='' patternTransform='' patternUnits='' pointsAtX='' pointsAtY='' pointsAtZ='' preserveAlpha='' preserveAspectRatio='' primitiveUnits='' refX='' refY='' repeatCount='' repeatDur='' requiredExtensions='' requiredFeatures='' specularConstant='' specularExponent='' spreadMethod='' startOffset='' stdDeviation='' stitchTiles='' surfaceScale='' systemLanguage='' tableValues='' targetX='' targetY='' textLength='' viewBox='' viewTarget='' xChannelSelector='' yChannelSelector='' zoomAndPan=''></svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       attributeName=""
+|       attributeType=""
+|       baseFrequency=""
+|       baseProfile=""
+|       calcMode=""
+|       clipPathUnits=""
+|       diffuseConstant=""
+|       edgeMode=""
+|       filterUnits=""
+|       glyphRef=""
+|       gradientTransform=""
+|       gradientUnits=""
+|       kernelMatrix=""
+|       kernelUnitLength=""
+|       keyPoints=""
+|       keySplines=""
+|       keyTimes=""
+|       lengthAdjust=""
+|       limitingConeAngle=""
+|       markerHeight=""
+|       markerUnits=""
+|       markerWidth=""
+|       maskContentUnits=""
+|       maskUnits=""
+|       numOctaves=""
+|       pathLength=""
+|       patternContentUnits=""
+|       patternTransform=""
+|       patternUnits=""
+|       pointsAtX=""
+|       pointsAtY=""
+|       pointsAtZ=""
+|       preserveAlpha=""
+|       preserveAspectRatio=""
+|       primitiveUnits=""
+|       refX=""
+|       refY=""
+|       repeatCount=""
+|       repeatDur=""
+|       requiredExtensions=""
+|       requiredFeatures=""
+|       specularConstant=""
+|       specularExponent=""
+|       spreadMethod=""
+|       startOffset=""
+|       stdDeviation=""
+|       stitchTiles=""
+|       surfaceScale=""
+|       systemLanguage=""
+|       tableValues=""
+|       targetX=""
+|       targetY=""
+|       textLength=""
+|       viewBox=""
+|       viewTarget=""
+|       xChannelSelector=""
+|       yChannelSelector=""
+|       zoomAndPan=""
+
+#data
+<!DOCTYPE html><BODY><SVG ATTRIBUTENAME='' ATTRIBUTETYPE='' BASEFREQUENCY='' BASEPROFILE='' CALCMODE='' CLIPPATHUNITS='' DIFFUSECONSTANT='' EDGEMODE='' FILTERUNITS='' GLYPHREF='' GRADIENTTRANSFORM='' GRADIENTUNITS='' KERNELMATRIX='' KERNELUNITLENGTH='' KEYPOINTS='' KEYSPLINES='' KEYTIMES='' LENGTHADJUST='' LIMITINGCONEANGLE='' MARKERHEIGHT='' MARKERUNITS='' MARKERWIDTH='' MASKCONTENTUNITS='' MASKUNITS='' NUMOCTAVES='' PATHLENGTH='' PATTERNCONTENTUNITS='' PATTERNTRANSFORM='' PATTERNUNITS='' POINTSATX='' POINTSATY='' POINTSATZ='' PRESERVEALPHA='' PRESERVEASPECTRATIO='' PRIMITIVEUNITS='' REFX='' REFY='' REPEATCOUNT='' REPEATDUR='' REQUIREDEXTENSIONS='' REQUIREDFEATURES='' SPECULARCONSTANT='' SPECULAREXPONENT='' SPREADMETHOD='' STARTOFFSET='' STDDEVIATION='' STITCHTILES='' SURFACESCALE='' SYSTEMLANGUAGE='' TABLEVALUES='' TARGETX='' TARGETY='' TEXTLENGTH='' VIEWBOX='' VIEWTARGET='' XCHANNELSELECTOR='' YCHANNELSELECTOR='' ZOOMANDPAN=''></SVG>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       attributeName=""
+|       attributeType=""
+|       baseFrequency=""
+|       baseProfile=""
+|       calcMode=""
+|       clipPathUnits=""
+|       diffuseConstant=""
+|       edgeMode=""
+|       filterUnits=""
+|       glyphRef=""
+|       gradientTransform=""
+|       gradientUnits=""
+|       kernelMatrix=""
+|       kernelUnitLength=""
+|       keyPoints=""
+|       keySplines=""
+|       keyTimes=""
+|       lengthAdjust=""
+|       limitingConeAngle=""
+|       markerHeight=""
+|       markerUnits=""
+|       markerWidth=""
+|       maskContentUnits=""
+|       maskUnits=""
+|       numOctaves=""
+|       pathLength=""
+|       patternContentUnits=""
+|       patternTransform=""
+|       patternUnits=""
+|       pointsAtX=""
+|       pointsAtY=""
+|       pointsAtZ=""
+|       preserveAlpha=""
+|       preserveAspectRatio=""
+|       primitiveUnits=""
+|       refX=""
+|       refY=""
+|       repeatCount=""
+|       repeatDur=""
+|       requiredExtensions=""
+|       requiredFeatures=""
+|       specularConstant=""
+|       specularExponent=""
+|       spreadMethod=""
+|       startOffset=""
+|       stdDeviation=""
+|       stitchTiles=""
+|       surfaceScale=""
+|       systemLanguage=""
+|       tableValues=""
+|       targetX=""
+|       targetY=""
+|       textLength=""
+|       viewBox=""
+|       viewTarget=""
+|       xChannelSelector=""
+|       yChannelSelector=""
+|       zoomAndPan=""
+
+#data
+<!DOCTYPE html><body><svg attributename='' attributetype='' basefrequency='' baseprofile='' calcmode='' clippathunits='' diffuseconstant='' edgemode='' filterunits='' filterres='' glyphref='' gradienttransform='' gradientunits='' kernelmatrix='' kernelunitlength='' keypoints='' keysplines='' keytimes='' lengthadjust='' limitingconeangle='' markerheight='' markerunits='' markerwidth='' maskcontentunits='' maskunits='' numoctaves='' pathlength='' patterncontentunits='' patterntransform='' patternunits='' pointsatx='' pointsaty='' pointsatz='' preservealpha='' preserveaspectratio='' primitiveunits='' refx='' refy='' repeatcount='' repeatdur='' requiredextensions='' requiredfeatures='' specularconstant='' specularexponent='' spreadmethod='' startoffset='' stddeviation='' stitchtiles='' surfacescale='' systemlanguage='' tablevalues='' targetx='' targety='' textlength='' viewbox='' viewtarget='' xchannelselector='' ychannelselector='' zoomandpan=''></svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       attributeName=""
+|       attributeType=""
+|       baseFrequency=""
+|       baseProfile=""
+|       calcMode=""
+|       clipPathUnits=""
+|       diffuseConstant=""
+|       edgeMode=""
+|       filterUnits=""
+|       filterres=""
+|       glyphRef=""
+|       gradientTransform=""
+|       gradientUnits=""
+|       kernelMatrix=""
+|       kernelUnitLength=""
+|       keyPoints=""
+|       keySplines=""
+|       keyTimes=""
+|       lengthAdjust=""
+|       limitingConeAngle=""
+|       markerHeight=""
+|       markerUnits=""
+|       markerWidth=""
+|       maskContentUnits=""
+|       maskUnits=""
+|       numOctaves=""
+|       pathLength=""
+|       patternContentUnits=""
+|       patternTransform=""
+|       patternUnits=""
+|       pointsAtX=""
+|       pointsAtY=""
+|       pointsAtZ=""
+|       preserveAlpha=""
+|       preserveAspectRatio=""
+|       primitiveUnits=""
+|       refX=""
+|       refY=""
+|       repeatCount=""
+|       repeatDur=""
+|       requiredExtensions=""
+|       requiredFeatures=""
+|       specularConstant=""
+|       specularExponent=""
+|       spreadMethod=""
+|       startOffset=""
+|       stdDeviation=""
+|       stitchTiles=""
+|       surfaceScale=""
+|       systemLanguage=""
+|       tableValues=""
+|       targetX=""
+|       targetY=""
+|       textLength=""
+|       viewBox=""
+|       viewTarget=""
+|       xChannelSelector=""
+|       yChannelSelector=""
+|       zoomAndPan=""
+
+#data
+<!DOCTYPE html><body><math attributeName='' attributeType='' baseFrequency='' baseProfile='' calcMode='' clipPathUnits='' diffuseConstant='' edgeMode='' filterUnits='' glyphRef='' gradientTransform='' gradientUnits='' kernelMatrix='' kernelUnitLength='' keyPoints='' keySplines='' keyTimes='' lengthAdjust='' limitingConeAngle='' markerHeight='' markerUnits='' markerWidth='' maskContentUnits='' maskUnits='' numOctaves='' pathLength='' patternContentUnits='' patternTransform='' patternUnits='' pointsAtX='' pointsAtY='' pointsAtZ='' preserveAlpha='' preserveAspectRatio='' primitiveUnits='' refX='' refY='' repeatCount='' repeatDur='' requiredExtensions='' requiredFeatures='' specularConstant='' specularExponent='' spreadMethod='' startOffset='' stdDeviation='' stitchTiles='' surfaceScale='' systemLanguage='' tableValues='' targetX='' targetY='' textLength='' viewBox='' viewTarget='' xChannelSelector='' yChannelSelector='' zoomAndPan=''></math>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       attributename=""
+|       attributetype=""
+|       basefrequency=""
+|       baseprofile=""
+|       calcmode=""
+|       clippathunits=""
+|       diffuseconstant=""
+|       edgemode=""
+|       filterunits=""
+|       glyphref=""
+|       gradienttransform=""
+|       gradientunits=""
+|       kernelmatrix=""
+|       kernelunitlength=""
+|       keypoints=""
+|       keysplines=""
+|       keytimes=""
+|       lengthadjust=""
+|       limitingconeangle=""
+|       markerheight=""
+|       markerunits=""
+|       markerwidth=""
+|       maskcontentunits=""
+|       maskunits=""
+|       numoctaves=""
+|       pathlength=""
+|       patterncontentunits=""
+|       patterntransform=""
+|       patternunits=""
+|       pointsatx=""
+|       pointsaty=""
+|       pointsatz=""
+|       preservealpha=""
+|       preserveaspectratio=""
+|       primitiveunits=""
+|       refx=""
+|       refy=""
+|       repeatcount=""
+|       repeatdur=""
+|       requiredextensions=""
+|       requiredfeatures=""
+|       specularconstant=""
+|       specularexponent=""
+|       spreadmethod=""
+|       startoffset=""
+|       stddeviation=""
+|       stitchtiles=""
+|       surfacescale=""
+|       systemlanguage=""
+|       tablevalues=""
+|       targetx=""
+|       targety=""
+|       textlength=""
+|       viewbox=""
+|       viewtarget=""
+|       xchannelselector=""
+|       ychannelselector=""
+|       zoomandpan=""
+
+#data
+<!DOCTYPE html><body><svg contentScriptType='' contentStyleType='' externalResourcesRequired='' filterRes=''></svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       contentscripttype=""
+|       contentstyletype=""
+|       externalresourcesrequired=""
+|       filterres=""
+
+#data
+<!DOCTYPE html><body><svg CONTENTSCRIPTTYPE='' CONTENTSTYLETYPE='' EXTERNALRESOURCESREQUIRED='' FILTERRES=''></svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       contentscripttype=""
+|       contentstyletype=""
+|       externalresourcesrequired=""
+|       filterres=""
+
+#data
+<!DOCTYPE html><body><svg contentscripttype='' contentstyletype='' externalresourcesrequired='' filterres=''></svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       contentscripttype=""
+|       contentstyletype=""
+|       externalresourcesrequired=""
+|       filterres=""
+
+#data
+<!DOCTYPE html><body><math contentScriptType='' contentStyleType='' externalResourcesRequired='' filterRes=''></math>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       contentscripttype=""
+|       contentstyletype=""
+|       externalresourcesrequired=""
+|       filterres=""
+
+#data
+<!DOCTYPE html><body><svg><altGlyph /><altGlyphDef /><altGlyphItem /><animateColor /><animateMotion /><animateTransform /><clipPath /><feBlend /><feColorMatrix /><feComponentTransfer /><feComposite /><feConvolveMatrix /><feDiffuseLighting /><feDisplacementMap /><feDistantLight /><feFlood /><feFuncA /><feFuncB /><feFuncG /><feFuncR /><feGaussianBlur /><feImage /><feMerge /><feMergeNode /><feMorphology /><feOffset /><fePointLight /><feSpecularLighting /><feSpotLight /><feTile /><feTurbulence /><foreignObject /><glyphRef /><linearGradient /><radialGradient /><textPath /></svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg altGlyph>
+|       <svg altGlyphDef>
+|       <svg altGlyphItem>
+|       <svg animateColor>
+|       <svg animateMotion>
+|       <svg animateTransform>
+|       <svg clipPath>
+|       <svg feBlend>
+|       <svg feColorMatrix>
+|       <svg feComponentTransfer>
+|       <svg feComposite>
+|       <svg feConvolveMatrix>
+|       <svg feDiffuseLighting>
+|       <svg feDisplacementMap>
+|       <svg feDistantLight>
+|       <svg feFlood>
+|       <svg feFuncA>
+|       <svg feFuncB>
+|       <svg feFuncG>
+|       <svg feFuncR>
+|       <svg feGaussianBlur>
+|       <svg feImage>
+|       <svg feMerge>
+|       <svg feMergeNode>
+|       <svg feMorphology>
+|       <svg feOffset>
+|       <svg fePointLight>
+|       <svg feSpecularLighting>
+|       <svg feSpotLight>
+|       <svg feTile>
+|       <svg feTurbulence>
+|       <svg foreignObject>
+|       <svg glyphRef>
+|       <svg linearGradient>
+|       <svg radialGradient>
+|       <svg textPath>
+
+#data
+<!DOCTYPE html><body><svg><altglyph /><altglyphdef /><altglyphitem /><animatecolor /><animatemotion /><animatetransform /><clippath /><feblend /><fecolormatrix /><fecomponenttransfer /><fecomposite /><feconvolvematrix /><fediffuselighting /><fedisplacementmap /><fedistantlight /><feflood /><fefunca /><fefuncb /><fefuncg /><fefuncr /><fegaussianblur /><feimage /><femerge /><femergenode /><femorphology /><feoffset /><fepointlight /><fespecularlighting /><fespotlight /><fetile /><feturbulence /><foreignobject /><glyphref /><lineargradient /><radialgradient /><textpath /></svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg altGlyph>
+|       <svg altGlyphDef>
+|       <svg altGlyphItem>
+|       <svg animateColor>
+|       <svg animateMotion>
+|       <svg animateTransform>
+|       <svg clipPath>
+|       <svg feBlend>
+|       <svg feColorMatrix>
+|       <svg feComponentTransfer>
+|       <svg feComposite>
+|       <svg feConvolveMatrix>
+|       <svg feDiffuseLighting>
+|       <svg feDisplacementMap>
+|       <svg feDistantLight>
+|       <svg feFlood>
+|       <svg feFuncA>
+|       <svg feFuncB>
+|       <svg feFuncG>
+|       <svg feFuncR>
+|       <svg feGaussianBlur>
+|       <svg feImage>
+|       <svg feMerge>
+|       <svg feMergeNode>
+|       <svg feMorphology>
+|       <svg feOffset>
+|       <svg fePointLight>
+|       <svg feSpecularLighting>
+|       <svg feSpotLight>
+|       <svg feTile>
+|       <svg feTurbulence>
+|       <svg foreignObject>
+|       <svg glyphRef>
+|       <svg linearGradient>
+|       <svg radialGradient>
+|       <svg textPath>
+
+#data
+<!DOCTYPE html><BODY><SVG><ALTGLYPH /><ALTGLYPHDEF /><ALTGLYPHITEM /><ANIMATECOLOR /><ANIMATEMOTION /><ANIMATETRANSFORM /><CLIPPATH /><FEBLEND /><FECOLORMATRIX /><FECOMPONENTTRANSFER /><FECOMPOSITE /><FECONVOLVEMATRIX /><FEDIFFUSELIGHTING /><FEDISPLACEMENTMAP /><FEDISTANTLIGHT /><FEFLOOD /><FEFUNCA /><FEFUNCB /><FEFUNCG /><FEFUNCR /><FEGAUSSIANBLUR /><FEIMAGE /><FEMERGE /><FEMERGENODE /><FEMORPHOLOGY /><FEOFFSET /><FEPOINTLIGHT /><FESPECULARLIGHTING /><FESPOTLIGHT /><FETILE /><FETURBULENCE /><FOREIGNOBJECT /><GLYPHREF /><LINEARGRADIENT /><RADIALGRADIENT /><TEXTPATH /></SVG>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg altGlyph>
+|       <svg altGlyphDef>
+|       <svg altGlyphItem>
+|       <svg animateColor>
+|       <svg animateMotion>
+|       <svg animateTransform>
+|       <svg clipPath>
+|       <svg feBlend>
+|       <svg feColorMatrix>
+|       <svg feComponentTransfer>
+|       <svg feComposite>
+|       <svg feConvolveMatrix>
+|       <svg feDiffuseLighting>
+|       <svg feDisplacementMap>
+|       <svg feDistantLight>
+|       <svg feFlood>
+|       <svg feFuncA>
+|       <svg feFuncB>
+|       <svg feFuncG>
+|       <svg feFuncR>
+|       <svg feGaussianBlur>
+|       <svg feImage>
+|       <svg feMerge>
+|       <svg feMergeNode>
+|       <svg feMorphology>
+|       <svg feOffset>
+|       <svg fePointLight>
+|       <svg feSpecularLighting>
+|       <svg feSpotLight>
+|       <svg feTile>
+|       <svg feTurbulence>
+|       <svg foreignObject>
+|       <svg glyphRef>
+|       <svg linearGradient>
+|       <svg radialGradient>
+|       <svg textPath>
+
+#data
+<!DOCTYPE html><body><math><altGlyph /><altGlyphDef /><altGlyphItem /><animateColor /><animateMotion /><animateTransform /><clipPath /><feBlend /><feColorMatrix /><feComponentTransfer /><feComposite /><feConvolveMatrix /><feDiffuseLighting /><feDisplacementMap /><feDistantLight /><feFlood /><feFuncA /><feFuncB /><feFuncG /><feFuncR /><feGaussianBlur /><feImage /><feMerge /><feMergeNode /><feMorphology /><feOffset /><fePointLight /><feSpecularLighting /><feSpotLight /><feTile /><feTurbulence /><foreignObject /><glyphRef /><linearGradient /><radialGradient /><textPath /></math>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math altglyph>
+|       <math altglyphdef>
+|       <math altglyphitem>
+|       <math animatecolor>
+|       <math animatemotion>
+|       <math animatetransform>
+|       <math clippath>
+|       <math feblend>
+|       <math fecolormatrix>
+|       <math fecomponenttransfer>
+|       <math fecomposite>
+|       <math feconvolvematrix>
+|       <math fediffuselighting>
+|       <math fedisplacementmap>
+|       <math fedistantlight>
+|       <math feflood>
+|       <math fefunca>
+|       <math fefuncb>
+|       <math fefuncg>
+|       <math fefuncr>
+|       <math fegaussianblur>
+|       <math feimage>
+|       <math femerge>
+|       <math femergenode>
+|       <math femorphology>
+|       <math feoffset>
+|       <math fepointlight>
+|       <math fespecularlighting>
+|       <math fespotlight>
+|       <math fetile>
+|       <math feturbulence>
+|       <math foreignobject>
+|       <math glyphref>
+|       <math lineargradient>
+|       <math radialgradient>
+|       <math textpath>
+
+#data
+<!DOCTYPE html><body><svg><solidColor /></svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg solidcolor>
diff --git a/lib/html5lib/tests/testdata/tree-construction/tests12.dat b/lib/html5lib/tests/testdata/tree-construction/tests12.dat
new file mode 100644
index 00000000..63107d27
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/tests12.dat
@@ -0,0 +1,62 @@
+#data
+<!DOCTYPE html><body>foo<math><mtext><i>baz</i></mtext><annotation-xml><svg><desc><b>eggs</b></desc><g><foreignObject>spam<TABLE><tr><td><img></td></table></foreignObject></g><g>quux</g></svg></annotation-xml></math>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|       <math math>
+|         <math mtext>
+|           <i>
+|             "baz"
+|         <math annotation-xml>
+|           <svg svg>
+|             <svg desc>
+|               <b>
+|                 "eggs"
+|             <svg g>
+|               <svg foreignObject>
+|                 
+|                   "spam"
+|                 <table>
+|                   <tbody>
+|                     <tr>
+|                       <td>
+|                         <img>
+|             <svg g>
+|               "quux"
+|       "bar"
+
+#data
+<!DOCTYPE html><body>foo<math><mtext><i>baz</i></mtext><annotation-xml><svg><desc><b>eggs</b></desc><g><foreignObject>spam<TABLE><tr><td><img></td></table></foreignObject></g><g>quux</g></svg></annotation-xml></math>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "foo"
+|     <math math>
+|       <math mtext>
+|         <i>
+|           "baz"
+|       <math annotation-xml>
+|         <svg svg>
+|           <svg desc>
+|             <b>
+|               "eggs"
+|           <svg g>
+|             <svg foreignObject>
+|               
+|                 "spam"
+|               <table>
+|                 <tbody>
+|                   <tr>
+|                     <td>
+|                       <img>
+|           <svg g>
+|             "quux"
+|     "bar"
diff --git a/lib/html5lib/tests/testdata/tree-construction/tests14.dat b/lib/html5lib/tests/testdata/tree-construction/tests14.dat
new file mode 100644
index 00000000..a08b7649
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/tests14.dat
@@ -0,0 +1,75 @@
+#data
+<!DOCTYPE html><html><body><xyz:abc></xyz:abc>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <xyz:abc>
+
+#data
+<!DOCTYPE html><html><body><xyz:abc></xyz:abc><span></span>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <xyz:abc>
+|     <span>
+
+#data
+<!DOCTYPE html><html><html abc:def=gh><xyz:abc></xyz:abc>
+#errors
+(1,38): non-html-root
+#document
+| <!DOCTYPE html>
+| <html>
+|   abc:def="gh"
+|   <head>
+|   <body>
+|     <xyz:abc>
+
+#data
+<!DOCTYPE html><html xml:lang=bar><html xml:lang=foo>
+#errors
+(1,53): non-html-root
+#document
+| <!DOCTYPE html>
+| <html>
+|   xml:lang="bar"
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html><html 123=456>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   123="456"
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html><html 123=456><html 789=012>
+#errors
+(1,43): non-html-root
+#document
+| <!DOCTYPE html>
+| <html>
+|   123="456"
+|   789="012"
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html><html><body 789=012>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     789="012"
diff --git a/lib/html5lib/tests/testdata/tree-construction/tests15.dat b/lib/html5lib/tests/testdata/tree-construction/tests15.dat
new file mode 100644
index 00000000..93d06a87
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/tests15.dat
@@ -0,0 +1,216 @@
+#data
+<!DOCTYPE html><b><i><u> X
+#errors
+(1,31): unexpected-end-tag
+(1,36): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <b>
+|         <i>
+|           <u>
+|     <b>
+|       <i>
+|         <u>
+|           " "
+|           
+|             "X"
+
+#data
+<b><i><u>
+X
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,16): unexpected-end-tag
+(2,4): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+|       <b>
+|         <i>
+|           <u>
+|     <b>
+|       <i>
+|         <u>
+|           "
+"
+|           
+|             "X"
+
+#data
+<!doctype html></html> <head>
+#errors
+(1,29): expected-eof-but-got-start-tag
+(1,29): unexpected-start-tag-ignored
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     " "
+
+#data
+<!doctype html></body><meta>
+#errors
+(1,28): unexpected-start-tag-after-body
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <meta>
+
+#data
+<html></html><!-- foo -->
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+| <!--  foo  -->
+
+#data
+<!doctype html></body><title>X</title>
+#errors
+(1,29): unexpected-start-tag-after-body
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <title>
+|       "X"
+
+#data
+<!doctype html><table> X<meta></table>
+#errors
+(1,23): foster-parenting-character
+(1,24): foster-parenting-character
+(1,30): foster-parenting-start-character
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     " X"
+|     <meta>
+|     <table>
+
+#data
+<!doctype html><table> x</table>
+#errors
+(1,23): foster-parenting-character
+(1,24): foster-parenting-character
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     " x"
+|     <table>
+
+#data
+<!doctype html><table> x </table>
+#errors
+(1,23): foster-parenting-character
+(1,24): foster-parenting-character
+(1,25): foster-parenting-character
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     " x "
+|     <table>
+
+#data
+<!doctype html><table><tr> x</table>
+#errors
+(1,27): foster-parenting-character
+(1,28): foster-parenting-character
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     " x"
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+<!doctype html><table>X<style> <tr>x </style> </table>
+#errors
+(1,23): foster-parenting-character
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "X"
+|     <table>
+|       <style>
+|         " <tr>x "
+|       " "
+
+#data
+<!doctype html><div><table><a>foo</a> <tr><td>bar</td> </tr></table></div>
+#errors
+(1,30): foster-parenting-start-tag
+(1,31): foster-parenting-character
+(1,32): foster-parenting-character
+(1,33): foster-parenting-character
+(1,37): foster-parenting-end-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <a>
+|         "foo"
+|       <table>
+|         " "
+|         <tbody>
+|           <tr>
+|             <td>
+|               "bar"
+|             " "
+
+#data
+<frame></frame></frame><frameset><frame><frameset><frame></frameset><noframes></frameset><noframes>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,7): unexpected-start-tag-ignored
+(1,15): unexpected-end-tag
+(1,23): unexpected-end-tag
+(1,33): unexpected-start-tag
+(1,99): expected-named-closing-tag-but-got-eof
+(1,99): eof-in-frameset
+#document
+| <html>
+|   <head>
+|   <frameset>
+|     <frame>
+|     <frameset>
+|       <frame>
+|     <noframes>
+|       "</frameset><noframes>"
+
+#data
+<!DOCTYPE html><object></html>
+#errors
+(1,30): expected-body-in-scope
+(1,30): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <object>
diff --git a/lib/html5lib/tests/testdata/tree-construction/tests16.dat b/lib/html5lib/tests/testdata/tree-construction/tests16.dat
new file mode 100644
index 00000000..cea7340a
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/tests16.dat
@@ -0,0 +1,2604 @@
+#data
+<!doctype html><script>
+#errors
+(1,23): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|   <body>
+
+#data
+<!doctype html><script>a
+#errors
+(1,24): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "a"
+|   <body>
+
+#data
+<!doctype html><script><
+#errors
+(1,24): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<"
+|   <body>
+
+#data
+<!doctype html><script></
+#errors
+(1,25): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</"
+|   <body>
+
+#data
+<!doctype html><script></S
+#errors
+(1,26): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</S"
+|   <body>
+
+#data
+<!doctype html><script></SC
+#errors
+(1,27): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</SC"
+|   <body>
+
+#data
+<!doctype html><script></SCR
+#errors
+(1,28): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</SCR"
+|   <body>
+
+#data
+<!doctype html><script></SCRI
+#errors
+(1,29): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</SCRI"
+|   <body>
+
+#data
+<!doctype html><script></SCRIP
+#errors
+(1,30): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</SCRIP"
+|   <body>
+
+#data
+<!doctype html><script></SCRIPT
+#errors
+(1,31): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</SCRIPT"
+|   <body>
+
+#data
+<!doctype html><script></SCRIPT 
+#errors
+(1,32): expected-attribute-name-but-got-eof
+(1,32): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:33) eof-in-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|   <body>
+
+#data
+<!doctype html><script></s
+#errors
+(1,26): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</s"
+|   <body>
+
+#data
+<!doctype html><script></sc
+#errors
+(1,27): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</sc"
+|   <body>
+
+#data
+<!doctype html><script></scr
+#errors
+(1,28): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</scr"
+|   <body>
+
+#data
+<!doctype html><script></scri
+#errors
+(1,29): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</scri"
+|   <body>
+
+#data
+<!doctype html><script></scrip
+#errors
+(1,30): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</scrip"
+|   <body>
+
+#data
+<!doctype html><script></script
+#errors
+(1,31): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</script"
+|   <body>
+
+#data
+<!doctype html><script></script 
+#errors
+(1,32): expected-attribute-name-but-got-eof
+(1,32): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:33) eof-in-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|   <body>
+
+#data
+<!doctype html><script><!
+#errors
+(1,25): expected-script-data-but-got-eof
+(1,25): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!"
+|   <body>
+
+#data
+<!doctype html><script><!a
+#errors
+(1,26): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!a"
+|   <body>
+
+#data
+<!doctype html><script><!-
+#errors
+(1,26): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!-"
+|   <body>
+
+#data
+<!doctype html><script><!-a
+#errors
+(1,27): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!-a"
+|   <body>
+
+#data
+<!doctype html><script><!--
+#errors
+(1,27): expected-named-closing-tag-but-got-eof
+(1,27): unexpected-eof-in-text-mode
+#new-errors
+(1:28) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--"
+|   <body>
+
+#data
+<!doctype html><script><!--a
+#errors
+(1,28): expected-named-closing-tag-but-got-eof
+(1,28): unexpected-eof-in-text-mode
+#new-errors
+(1:29) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--a"
+|   <body>
+
+#data
+<!doctype html><script><!--<
+#errors
+(1,28): expected-named-closing-tag-but-got-eof
+(1,28): unexpected-eof-in-text-mode
+#new-errors
+(1:29) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<"
+|   <body>
+
+#data
+<!doctype html><script><!--<a
+#errors
+(1,29): expected-named-closing-tag-but-got-eof
+(1,29): unexpected-eof-in-text-mode
+#new-errors
+(1:30) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<a"
+|   <body>
+
+#data
+<!doctype html><script><!--</
+#errors
+(1,29): expected-named-closing-tag-but-got-eof
+(1,29): unexpected-eof-in-text-mode
+#new-errors
+(1:30) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--</"
+|   <body>
+
+#data
+<!doctype html><script><!--</script
+#errors
+(1,35): expected-named-closing-tag-but-got-eof
+(1,35): unexpected-eof-in-text-mode
+#new-errors
+(1:36) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--</script"
+|   <body>
+
+#data
+<!doctype html><script><!--</script 
+#errors
+(1,36): expected-attribute-name-but-got-eof
+(1,36): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:37) eof-in-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--"
+|   <body>
+
+#data
+<!doctype html><script><!--<s
+#errors
+(1,29): expected-named-closing-tag-but-got-eof
+(1,29): unexpected-eof-in-text-mode
+#new-errors
+(1:30) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<s"
+|   <body>
+
+#data
+<!doctype html><script><!--<script
+#errors
+(1,34): expected-named-closing-tag-but-got-eof
+(1,34): unexpected-eof-in-text-mode
+#new-errors
+(1:35) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script"
+|   <body>
+
+#data
+<!doctype html><script><!--<script 
+#errors
+(1,35): eof-in-script-in-script
+(1,35): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:36) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script "
+|   <body>
+
+#data
+<!doctype html><script><!--<script <
+#errors
+(1,36): eof-in-script-in-script
+(1,36): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:37) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script <"
+|   <body>
+
+#data
+<!doctype html><script><!--<script <a
+#errors
+(1,37): eof-in-script-in-script
+(1,37): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:38) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script <a"
+|   <body>
+
+#data
+<!doctype html><script><!--<script </
+#errors
+(1,37): eof-in-script-in-script
+(1,37): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:38) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </"
+|   <body>
+
+#data
+<!doctype html><script><!--<script </s
+#errors
+(1,38): eof-in-script-in-script
+(1,38): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:39) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </s"
+|   <body>
+
+#data
+<!doctype html><script><!--<script </script
+#errors
+(1,43): eof-in-script-in-script
+(1,43): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:44) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script"
+|   <body>
+
+#data
+<!doctype html><script><!--<script </scripta
+#errors
+(1,44): eof-in-script-in-script
+(1,44): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:45) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </scripta"
+|   <body>
+
+#data
+<!doctype html><script><!--<script </script 
+#errors
+(1,44): expected-named-closing-tag-but-got-eof
+(1,44): unexpected-eof-in-text-mode
+#new-errors
+(1:45) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script "
+|   <body>
+
+#data
+<!doctype html><script><!--<script </script>
+#errors
+(1,44): expected-named-closing-tag-but-got-eof
+(1,44): unexpected-eof-in-text-mode
+#new-errors
+(1:45) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script>"
+|   <body>
+
+#data
+<!doctype html><script><!--<script </script/
+#errors
+(1,44): expected-named-closing-tag-but-got-eof
+(1,44): unexpected-eof-in-text-mode
+#new-errors
+(1:45) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script/"
+|   <body>
+
+#data
+<!doctype html><script><!--<script </script <
+#errors
+(1,45): expected-named-closing-tag-but-got-eof
+(1,45): unexpected-eof-in-text-mode
+#new-errors
+(1:46) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script <"
+|   <body>
+
+#data
+<!doctype html><script><!--<script </script <a
+#errors
+(1,46): expected-named-closing-tag-but-got-eof
+(1,46): unexpected-eof-in-text-mode
+#new-errors
+(1:47) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script <a"
+|   <body>
+
+#data
+<!doctype html><script><!--<script </script </
+#errors
+(1,46): expected-named-closing-tag-but-got-eof
+(1,46): unexpected-eof-in-text-mode
+#new-errors
+(1:47) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script </"
+|   <body>
+
+#data
+<!doctype html><script><!--<script </script </script
+#errors
+(1,52): expected-named-closing-tag-but-got-eof
+(1,52): unexpected-eof-in-text-mode
+#new-errors
+(1:53) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script </script"
+|   <body>
+
+#data
+<!doctype html><script><!--<script </script </script 
+#errors
+(1,53): expected-attribute-name-but-got-eof
+(1,53): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:54) eof-in-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script "
+|   <body>
+
+#data
+<!doctype html><script><!--<script </script </script/
+#errors
+(1,53): unexpected-EOF-after-solidus-in-tag
+(1,53): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:54) eof-in-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script "
+|   <body>
+
+#data
+<!doctype html><script><!--<script </script </script>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script "
+|   <body>
+
+#data
+<!doctype html><script><!--<script -
+#errors
+(1,36): eof-in-script-in-script
+(1,36): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:37) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -"
+|   <body>
+
+#data
+<!doctype html><script><!--<script -a
+#errors
+(1,37): eof-in-script-in-script
+(1,37): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:38) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -a"
+|   <body>
+
+#data
+<!doctype html><script><!--<script -<
+#errors
+(1,37): eof-in-script-in-script
+(1,37): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:38) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -<"
+|   <body>
+
+#data
+<!doctype html><script><!--<script --
+#errors
+(1,37): eof-in-script-in-script
+(1,37): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:38) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script --"
+|   <body>
+
+#data
+<!doctype html><script><!--<script --a
+#errors
+(1,38): eof-in-script-in-script
+(1,38): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:39) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script --a"
+|   <body>
+
+#data
+<!doctype html><script><!--<script --<
+#errors
+(1,38): eof-in-script-in-script
+(1,38): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:39) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script --<"
+|   <body>
+
+#data
+<!doctype html><script><!--<script -->
+#errors
+(1,38): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -->"
+|   <body>
+
+#data
+<!doctype html><script><!--<script --><
+#errors
+(1,39): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script --><"
+|   <body>
+
+#data
+<!doctype html><script><!--<script --></
+#errors
+(1,40): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script --></"
+|   <body>
+
+#data
+<!doctype html><script><!--<script --></script
+#errors
+(1,46): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script --></script"
+|   <body>
+
+#data
+<!doctype html><script><!--<script --></script 
+#errors
+(1,47): expected-attribute-name-but-got-eof
+(1,47): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:48) eof-in-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -->"
+|   <body>
+
+#data
+<!doctype html><script><!--<script --></script/
+#errors
+(1,47): unexpected-EOF-after-solidus-in-tag
+(1,47): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:48) eof-in-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -->"
+|   <body>
+
+#data
+<!doctype html><script><!--<script --></script>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -->"
+|   <body>
+
+#data
+<!doctype html><script><!--<script><\/script>--></script>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script><\/script>-->"
+|   <body>
+
+#data
+<!doctype html><script><!--<script></scr'+'ipt>--></script>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></scr'+'ipt>-->"
+|   <body>
+
+#data
+<!doctype html><script><!--<script></script><script></script></script>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>"
+|   <body>
+
+#data
+<!doctype html><script><!--<script></script><script></script>--><!--</script>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>--><!--"
+|   <body>
+
+#data
+<!doctype html><script><!--<script></script><script></script>-- ></script>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>-- >"
+|   <body>
+
+#data
+<!doctype html><script><!--<script></script><script></script>- -></script>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>- ->"
+|   <body>
+
+#data
+<!doctype html><script><!--<script></script><script></script>- - ></script>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>- - >"
+|   <body>
+
+#data
+<!doctype html><script><!--<script></script><script></script>-></script>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>->"
+|   <body>
+
+#data
+<!doctype html><script><!--<script>--!></script>X
+#errors
+(1,49): expected-named-closing-tag-but-got-eof
+(1,49): unexpected-EOF-in-text-mode
+#new-errors
+(1:50) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script>--!></script>X"
+|   <body>
+
+#data
+<!doctype html><script><!--<scr'+'ipt></script>--></script>
+#errors
+(1,59): unexpected-end-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<scr'+'ipt>"
+|   <body>
+|     "-->"
+
+#data
+<!doctype html><script><!--<script></scr'+'ipt></script>X
+#errors
+(1,57): expected-named-closing-tag-but-got-eof
+(1,57): unexpected-eof-in-text-mode
+#new-errors
+(1:58) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></scr'+'ipt></script>X"
+|   <body>
+
+#data
+<!doctype html><style><!--<style></style>--></style>
+#errors
+(1,52): unexpected-end-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <style>
+|       "<!--<style>"
+|   <body>
+|     "-->"
+
+#data
+<!doctype html><style><!--</style>X
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <style>
+|       "<!--"
+|   <body>
+|     "X"
+
+#data
+<!doctype html><style><!--...</style>...--></style>
+#errors
+(1,51): unexpected-end-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <style>
+|       "<!--..."
+|   <body>
+|     "...-->"
+
+#data
+<!doctype html><style><!--<html xmlns:v="urn:schemas-microsoft-com:vml"><!--[if !mso]><style></style>X
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <style>
+|       "<!--<html xmlns:v="urn:schemas-microsoft-com:vml"><!--[if !mso]><style>"
+|   <body>
+|     "X"
+
+#data
+<!doctype html><style><!--...<style><!--...--!></style>--></style>
+#errors
+(1,66): unexpected-end-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <style>
+|       "<!--...<style><!--...--!>"
+|   <body>
+|     "-->"
+
+#data
+<!doctype html><style><!--...</style><!-- --><style>@import ...</style>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <style>
+|       "<!--..."
+|     <!--   -->
+|     <style>
+|       "@import ..."
+|   <body>
+
+#data
+<!doctype html><style>...<style><!--...</style><!-- --></style>
+#errors
+(1,63): unexpected-end-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <style>
+|       "...<style><!--..."
+|     <!--   -->
+|   <body>
+
+#data
+<!doctype html><style>...<!--[if IE]><style>...</style>X
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <style>
+|       "...<!--[if IE]><style>..."
+|   <body>
+|     "X"
+
+#data
+<!doctype html><title><!--<title></title>--></title>
+#errors
+(1,52): unexpected-end-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <title>
+|       "<!--<title>"
+|   <body>
+|     "-->"
+
+#data
+<!doctype html><title>&lt;/title></title>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <title>
+|       "</title>"
+|   <body>
+
+#data
+<!doctype html><title>foo/title><link></head><body>X
+#errors
+(1,52): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <title>
+|       "foo/title><link></head><body>X"
+|   <body>
+
+#data
+<!doctype html><noscript><!--<noscript></noscript>--></noscript>
+#errors
+(1,64): unexpected-end-tag
+#script-on
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <noscript>
+|       "<!--<noscript>"
+|   <body>
+|     "-->"
+
+#data
+<!doctype html><noscript><!--<noscript></noscript>--></noscript>
+#errors
+#script-off
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <noscript>
+|       <!-- <noscript></noscript> -->
+|   <body>
+
+#data
+<!doctype html><noscript><!--</noscript>X<noscript>--></noscript>
+#errors
+#script-on
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <noscript>
+|       "<!--"
+|   <body>
+|     "X"
+|     <noscript>
+|       "-->"
+
+#data
+<!doctype html><noscript><!--</noscript>X<noscript>--></noscript>
+#errors
+#script-off
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <noscript>
+|       <!-- </noscript>X<noscript> -->
+|   <body>
+
+#data
+<!doctype html><noscript><iframe></noscript>X
+#errors
+#script-on
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <noscript>
+|       "<iframe>"
+|   <body>
+|     "X"
+
+#data
+<!doctype html><noscript><iframe></noscript>X
+#errors
+ * (1,34) unexpected token in head noscript
+ * (1,46) unexpected EOF
+#script-off
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <noscript>
+|   <body>
+|     <iframe>
+|       "</noscript>X"
+
+#data
+<!doctype html><noframes><!--<noframes></noframes>--></noframes>
+#errors
+(1,64): unexpected-end-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <noframes>
+|       "<!--<noframes>"
+|   <body>
+|     "-->"
+
+#data
+<!doctype html><noframes><body><script><!--...</script></body></noframes></html>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <noframes>
+|       "<body><script><!--...</script></body>"
+|   <body>
+
+#data
+<!doctype html><textarea><!--<textarea></textarea>--></textarea>
+#errors
+(1,64): unexpected-end-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "<!--<textarea>"
+|     "-->"
+
+#data
+<!doctype html><textarea>&lt;/textarea></textarea>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "</textarea>"
+
+#data
+<!doctype html><textarea>&lt;</textarea>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "<"
+
+#data
+<!doctype html><textarea>a&lt;b</textarea>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "a<b"
+
+#data
+<!doctype html><iframe><!--<iframe></iframe>--></iframe>
+#errors
+(1,56): unexpected-end-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <iframe>
+|       "<!--<iframe>"
+|     "-->"
+
+#data
+<!doctype html><iframe>...<!--X->...<!--/X->...</iframe>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <iframe>
+|       "...<!--X->...<!--/X->..."
+
+#data
+<!doctype html><xmp><!--<xmp></xmp>--></xmp>
+#errors
+(1,44): unexpected-end-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <xmp>
+|       "<!--<xmp>"
+|     "-->"
+
+#data
+<!doctype html><noembed><!--<noembed></noembed>--></noembed>
+#errors
+(1,60): unexpected-end-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <noembed>
+|       "<!--<noembed>"
+|     "-->"
+
+#data
+<script>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,8): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|   <body>
+
+#data
+<script>a
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,9): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "a"
+|   <body>
+
+#data
+<script><
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,9): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<"
+|   <body>
+
+#data
+<script></
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,10): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</"
+|   <body>
+
+#data
+<script></S
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,11): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</S"
+|   <body>
+
+#data
+<script></SC
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,12): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</SC"
+|   <body>
+
+#data
+<script></SCR
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,13): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</SCR"
+|   <body>
+
+#data
+<script></SCRI
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,14): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</SCRI"
+|   <body>
+
+#data
+<script></SCRIP
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,15): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</SCRIP"
+|   <body>
+
+#data
+<script></SCRIPT
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,16): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</SCRIPT"
+|   <body>
+
+#data
+<script></SCRIPT 
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,17): expected-attribute-name-but-got-eof
+(1,17): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:18) eof-in-tag
+#document
+| <html>
+|   <head>
+|     <script>
+|   <body>
+
+#data
+<script></s
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,11): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</s"
+|   <body>
+
+#data
+<script></sc
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,12): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</sc"
+|   <body>
+
+#data
+<script></scr
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,13): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</scr"
+|   <body>
+
+#data
+<script></scri
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,14): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</scri"
+|   <body>
+
+#data
+<script></scrip
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,15): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</scrip"
+|   <body>
+
+#data
+<script></script
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,16): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</script"
+|   <body>
+
+#data
+<script></script 
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,17): expected-attribute-name-but-got-eof
+(1,17): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:18) eof-in-tag
+#document
+| <html>
+|   <head>
+|     <script>
+|   <body>
+
+#data
+<script><!
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,10): expected-script-data-but-got-eof
+(1,10): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!"
+|   <body>
+
+#data
+<script><!a
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,11): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!a"
+|   <body>
+
+#data
+<script><!-
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,11): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!-"
+|   <body>
+
+#data
+<script><!-a
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,12): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!-a"
+|   <body>
+
+#data
+<script><!--
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,12): expected-named-closing-tag-but-got-eof
+(1,12): unexpected-eof-in-text-mode
+#new-errors
+(1:13) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--"
+|   <body>
+
+#data
+<script><!--a
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,13): expected-named-closing-tag-but-got-eof
+(1,13): unexpected-eof-in-text-mode
+#new-errors
+(1:14) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--a"
+|   <body>
+
+#data
+<script><!--<
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,13): expected-named-closing-tag-but-got-eof
+(1,13): unexpected-eof-in-text-mode
+#new-errors
+(1:14) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<"
+|   <body>
+
+#data
+<script><!--<a
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,14): expected-named-closing-tag-but-got-eof
+(1,14): unexpected-eof-in-text-mode
+#new-errors
+(1:15) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<a"
+|   <body>
+
+#data
+<script><!--</
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,14): expected-named-closing-tag-but-got-eof
+(1,14): unexpected-eof-in-text-mode
+#new-errors
+(1:15) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--</"
+|   <body>
+
+#data
+<script><!--</script
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,20): expected-named-closing-tag-but-got-eof
+(1,20): unexpected-eof-in-text-mode
+#new-errors
+(1:21) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--</script"
+|   <body>
+
+#data
+<script><!--</script 
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,21): expected-attribute-name-but-got-eof
+(1,21): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:22) eof-in-tag
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--"
+|   <body>
+
+#data
+<script><!--<s
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,14): expected-named-closing-tag-but-got-eof
+(1,14): unexpected-eof-in-text-mode
+#new-errors
+(1:15) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<s"
+|   <body>
+
+#data
+<script><!--<script
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,19): expected-named-closing-tag-but-got-eof
+(1,19): unexpected-eof-in-text-mode
+#new-errors
+(1:20) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script"
+|   <body>
+
+#data
+<script><!--<script 
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,20): eof-in-script-in-script
+(1,20): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:21) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script "
+|   <body>
+
+#data
+<script><!--<script <
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,21): eof-in-script-in-script
+(1,21): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:22) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script <"
+|   <body>
+
+#data
+<script><!--<script <a
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,22): eof-in-script-in-script
+(1,22): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:23) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script <a"
+|   <body>
+
+#data
+<script><!--<script </
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,22): eof-in-script-in-script
+(1,22): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:23) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </"
+|   <body>
+
+#data
+<script><!--<script </s
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,23): eof-in-script-in-script
+(1,23): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:24) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </s"
+|   <body>
+
+#data
+<script><!--<script </script
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,28): eof-in-script-in-script
+(1,28): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:29) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script"
+|   <body>
+
+#data
+<script><!--<script </scripta
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,29): eof-in-script-in-script
+(1,29): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:30) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </scripta"
+|   <body>
+
+#data
+<script><!--<script </script 
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,29): expected-named-closing-tag-but-got-eof
+(1,29): unexpected-eof-in-text-mode
+#new-errors
+(1:30) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script "
+|   <body>
+
+#data
+<script><!--<script </script>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,29): expected-named-closing-tag-but-got-eof
+(1,29): unexpected-eof-in-text-mode
+#new-errors
+(1:30) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script>"
+|   <body>
+
+#data
+<script><!--<script </script/
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,29): expected-named-closing-tag-but-got-eof
+(1,29): unexpected-eof-in-text-mode
+#new-errors
+(1:30) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script/"
+|   <body>
+
+#data
+<script><!--<script </script <
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,30): expected-named-closing-tag-but-got-eof
+(1,30): unexpected-eof-in-text-mode
+#new-errors
+(1:31) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script <"
+|   <body>
+
+#data
+<script><!--<script </script <a
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,31): expected-named-closing-tag-but-got-eof
+(1,31): unexpected-eof-in-text-mode
+#new-errors
+(1:32) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script <a"
+|   <body>
+
+#data
+<script><!--<script </script </
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,31): expected-named-closing-tag-but-got-eof
+(1,31): unexpected-eof-in-text-mode
+#new-errors
+(1:32) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script </"
+|   <body>
+
+#data
+<script><!--<script </script </script
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,37): expected-named-closing-tag-but-got-eof
+(1,37): unexpected-eof-in-text-mode
+#new-errors
+(1:38) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script </script"
+|   <body>
+
+#data
+<script><!--<script </script </script 
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,38): expected-attribute-name-but-got-eof
+(1,38): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:39) eof-in-tag
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script "
+|   <body>
+
+#data
+<script><!--<script </script </script/
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,38): unexpected-EOF-after-solidus-in-tag
+(1,38): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:39) eof-in-tag
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script "
+|   <body>
+
+#data
+<script><!--<script </script </script>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script "
+|   <body>
+
+#data
+<script><!--<script -
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,21): eof-in-script-in-script
+(1,21): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:22) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -"
+|   <body>
+
+#data
+<script><!--<script -a
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,22): eof-in-script-in-script
+(1,22): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:23) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -a"
+|   <body>
+
+#data
+<script><!--<script --
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,22): eof-in-script-in-script
+(1,22): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:23) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script --"
+|   <body>
+
+#data
+<script><!--<script --a
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,23): eof-in-script-in-script
+(1,23): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:24) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script --a"
+|   <body>
+
+#data
+<script><!--<script -->
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,23): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -->"
+|   <body>
+
+#data
+<script><!--<script --><
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,24): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script --><"
+|   <body>
+
+#data
+<script><!--<script --></
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,25): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script --></"
+|   <body>
+
+#data
+<script><!--<script --></script
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,31): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script --></script"
+|   <body>
+
+#data
+<script><!--<script --></script 
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,32): expected-attribute-name-but-got-eof
+(1,32): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:33) eof-in-tag
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -->"
+|   <body>
+
+#data
+<script><!--<script --></script/
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,32): unexpected-EOF-after-solidus-in-tag
+(1,32): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:33) eof-in-tag
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -->"
+|   <body>
+
+#data
+<script><!--<script --></script>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -->"
+|   <body>
+
+#data
+<script><!--<script><\/script>--></script>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script><\/script>-->"
+|   <body>
+
+#data
+<script><!--<script></scr'+'ipt>--></script>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></scr'+'ipt>-->"
+|   <body>
+
+#data
+<script><!--<script></script><script></script></script>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>"
+|   <body>
+
+#data
+<script><!--<script></script><script></script>--><!--</script>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>--><!--"
+|   <body>
+
+#data
+<script><!--<script></script><script></script>-- ></script>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>-- >"
+|   <body>
+
+#data
+<script><!--<script></script><script></script>- -></script>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>- ->"
+|   <body>
+
+#data
+<script><!--<script></script><script></script>- - ></script>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>- - >"
+|   <body>
+
+#data
+<script><!--<script></script><script></script>-></script>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>->"
+|   <body>
+
+#data
+<script><!--<script>--!></script>X
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,34): expected-named-closing-tag-but-got-eof
+(1,34): unexpected-eof-in-text-mode
+#new-errors
+(1:35) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script>--!></script>X"
+|   <body>
+
+#data
+<script><!--<scr'+'ipt></script>--></script>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,44): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<scr'+'ipt>"
+|   <body>
+|     "-->"
+
+#data
+<script><!--<script></scr'+'ipt></script>X
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,42): expected-named-closing-tag-but-got-eof
+(1,42): unexpected-eof-in-text-mode
+#new-errors
+(1:43) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></scr'+'ipt></script>X"
+|   <body>
+
+#data
+<style><!--<style></style>--></style>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,37): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|     <style>
+|       "<!--<style>"
+|   <body>
+|     "-->"
+
+#data
+<style><!--</style>X
+#errors
+(1,7): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <style>
+|       "<!--"
+|   <body>
+|     "X"
+
+#data
+<style><!--...</style>...--></style>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,36): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|     <style>
+|       "<!--..."
+|   <body>
+|     "...-->"
+
+#data
+<style><!--<html xmlns:v="urn:schemas-microsoft-com:vml"><!--[if !mso]><style></style>X
+#errors
+(1,7): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <style>
+|       "<!--<html xmlns:v="urn:schemas-microsoft-com:vml"><!--[if !mso]><style>"
+|   <body>
+|     "X"
+
+#data
+<style><!--...<style><!--...--!></style>--></style>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,51): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|     <style>
+|       "<!--...<style><!--...--!>"
+|   <body>
+|     "-->"
+
+#data
+<style><!--...</style><!-- --><style>@import ...</style>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <style>
+|       "<!--..."
+|     <!--   -->
+|     <style>
+|       "@import ..."
+|   <body>
+
+#data
+<style>...<style><!--...</style><!-- --></style>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,48): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|     <style>
+|       "...<style><!--..."
+|     <!--   -->
+|   <body>
+
+#data
+<style>...<!--[if IE]><style>...</style>X
+#errors
+(1,7): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <style>
+|       "...<!--[if IE]><style>..."
+|   <body>
+|     "X"
+
+#data
+<title><!--<title></title>--></title>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,37): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|     <title>
+|       "<!--<title>"
+|   <body>
+|     "-->"
+
+#data
+<title>&lt;/title></title>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <title>
+|       "</title>"
+|   <body>
+
+#data
+<title>foo/title><link></head><body>X
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,37): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <title>
+|       "foo/title><link></head><body>X"
+|   <body>
+
+#data
+<noscript><!--<noscript></noscript>--></noscript>
+#errors
+(1,10): expected-doctype-but-got-start-tag
+(1,49): unexpected-end-tag
+#script-on
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       "<!--<noscript>"
+|   <body>
+|     "-->"
+
+#data
+<noscript><!--<noscript></noscript>--></noscript>
+#errors
+ * (1,11) missing DOCTYPE
+#script-off
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       <!-- <noscript></noscript> -->
+|   <body>
+
+#data
+<noscript><!--</noscript>X<noscript>--></noscript>
+#errors
+(1,10): expected-doctype-but-got-start-tag
+#script-on
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       "<!--"
+|   <body>
+|     "X"
+|     <noscript>
+|       "-->"
+
+#data
+<noscript><!--</noscript>X<noscript>--></noscript>
+#errors
+(1,10): expected-doctype-but-got-start-tag
+#script-off
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       <!-- </noscript>X<noscript> -->
+|   <body>
+
+#data
+<noscript><iframe></noscript>X
+#errors
+(1,10): expected-doctype-but-got-start-tag
+#script-on
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       "<iframe>"
+|   <body>
+|     "X"
+
+#data
+<noscript><iframe></noscript>X
+#errors
+ * (1,11) missing DOCTYPE
+ * (1,19) unexpected token in head noscript
+ * (1,31) unexpected EOF
+#script-off
+#document
+| <html>
+|   <head>
+|     <noscript>
+|   <body>
+|     <iframe>
+|       "</noscript>X"
+
+#data
+<noframes><!--<noframes></noframes>--></noframes>
+#errors
+(1,10): expected-doctype-but-got-start-tag
+(1,49): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|     <noframes>
+|       "<!--<noframes>"
+|   <body>
+|     "-->"
+
+#data
+<noframes><body><script><!--...</script></body></noframes></html>
+#errors
+(1,10): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <noframes>
+|       "<body><script><!--...</script></body>"
+|   <body>
+
+#data
+<textarea><!--<textarea></textarea>--></textarea>
+#errors
+(1,10): expected-doctype-but-got-start-tag
+(1,49): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "<!--<textarea>"
+|     "-->"
+
+#data
+<textarea>&lt;/textarea></textarea>
+#errors
+(1,10): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "</textarea>"
+
+#data
+<iframe><!--<iframe></iframe>--></iframe>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,41): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <iframe>
+|       "<!--<iframe>"
+|     "-->"
+
+#data
+<iframe>...<!--X->...<!--/X->...</iframe>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <iframe>
+|       "...<!--X->...<!--/X->..."
+
+#data
+<xmp><!--<xmp></xmp>--></xmp>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,29): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <xmp>
+|       "<!--<xmp>"
+|     "-->"
+
+#data
+<noembed><!--<noembed></noembed>--></noembed>
+#errors
+(1,9): expected-doctype-but-got-start-tag
+(1,45): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <noembed>
+|       "<!--<noembed>"
+|     "-->"
+
+#data
+<!doctype html><table>
+
+#errors
+(2,0): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       "
+"
+
+#data
+<!doctype html><table><td><span><font></span><span>
+#errors
+(1,26): unexpected-cell-in-table-body
+(1,45): unexpected-end-tag
+(1,51): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <span>
+|               <font>
+|             <font>
+|               <span>
+
+#data
+<!doctype html><form><table></form><form></table></form>
+#errors
+(1,35): unexpected-end-tag-implies-table-voodoo
+(1,35): unexpected-end-tag
+(1,41): unexpected-form-in-table
+(1,56): unexpected-end-tag
+(1,56): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <form>
+|       <table>
+|         <form>
diff --git a/lib/html5lib/tests/testdata/tree-construction/tests17.dat b/lib/html5lib/tests/testdata/tree-construction/tests17.dat
new file mode 100644
index 00000000..e49bcf03
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/tests17.dat
@@ -0,0 +1,179 @@
+#data
+<!doctype html><table><tbody><select><tr>
+#errors
+(1,37): unexpected-start-tag-implies-table-voodoo
+(1,41): unexpected-table-element-start-tag-in-select-in-table
+(1,41): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+<!doctype html><table><tr><select><td>
+#errors
+(1,34): unexpected-start-tag-implies-table-voodoo
+(1,38): unexpected-table-element-start-tag-in-select-in-table
+(1,38): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+
+#data
+<!doctype html><table><tr><td><select><td>
+#errors
+(1,42): unexpected-table-element-start-tag-in-select-in-table
+(1,42): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <select>
+|           <td>
+
+#data
+<!doctype html><table><tr><th><select><td>
+#errors
+(1,42): unexpected-table-element-start-tag-in-select-in-table
+(1,42): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <th>
+|             <select>
+|           <td>
+
+#data
+<!doctype html><table><caption><select><tr>
+#errors
+(1,43): unexpected-table-element-start-tag-in-select-in-table
+(1,43): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <select>
+|       <tbody>
+|         <tr>
+
+#data
+<!doctype html><select><tr>
+#errors
+(1,27): unexpected-start-tag-in-select
+(1,27): eof-in-select
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+
+#data
+<!doctype html><select><td>
+#errors
+(1,27): unexpected-start-tag-in-select
+(1,27): eof-in-select
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+
+#data
+<!doctype html><select><th>
+#errors
+(1,27): unexpected-start-tag-in-select
+(1,27): eof-in-select
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+
+#data
+<!doctype html><select><tbody>
+#errors
+(1,30): unexpected-start-tag-in-select
+(1,30): eof-in-select
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+
+#data
+<!doctype html><select><thead>
+#errors
+(1,30): unexpected-start-tag-in-select
+(1,30): eof-in-select
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+
+#data
+<!doctype html><select><tfoot>
+#errors
+(1,30): unexpected-start-tag-in-select
+(1,30): eof-in-select
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+
+#data
+<!doctype html><select><caption>
+#errors
+(1,32): unexpected-start-tag-in-select
+(1,32): eof-in-select
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+
+#data
+<!doctype html><table><tr></table>a
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|     "a"
diff --git a/lib/html5lib/tests/testdata/tree-construction/tests18.dat b/lib/html5lib/tests/testdata/tree-construction/tests18.dat
new file mode 100644
index 00000000..05363b39
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/tests18.dat
@@ -0,0 +1,534 @@
+#data
+<plaintext></plaintext>
+#errors
+11: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
+23: End of file seen and there were open elements.
+11: Unclosed element “plaintext”.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+
+#data
+<!doctype html><plaintext></plaintext>
+#errors
+(1,38): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+
+#data
+<!doctype html><html><plaintext></plaintext>
+#errors
+44: End of file seen and there were open elements.
+32: Unclosed element “plaintext”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+
+#data
+<!doctype html><head><plaintext></plaintext>
+#errors
+44: End of file seen and there were open elements.
+32: Unclosed element “plaintext”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+
+#data
+<!doctype html><html><noscript><plaintext></plaintext>
+#errors
+42: Bad start tag in “plaintext” in “head”.
+54: End of file seen and there were open elements.
+42: Unclosed element “plaintext”.
+#script-off
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <noscript>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+
+#data
+<!doctype html></head><plaintext></plaintext>
+#errors
+45: End of file seen and there were open elements.
+33: Unclosed element “plaintext”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+
+#data
+<!doctype html><body><plaintext></plaintext>
+#errors
+44: End of file seen and there were open elements.
+32: Unclosed element “plaintext”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+
+#data
+<!doctype html><table><plaintext></plaintext>
+#errors
+(1,33): foster-parenting-start-tag
+(1,45): foster-parenting-character
+(1,45): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+|     <table>
+
+#data
+<!doctype html><table><tbody><plaintext></plaintext>
+#errors
+(1,40): foster-parenting-start-tag
+(1,41): foster-parenting-character
+(1,52): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+|     <table>
+|       <tbody>
+
+#data
+<!doctype html><table><tbody><tr><plaintext></plaintext>
+#errors
+(1,44): foster-parenting-start-tag
+(1,56): foster-parenting-character
+(1,56): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+<!doctype html><table><td><plaintext></plaintext>
+#errors
+(1,26): unexpected-cell-in-table-body
+(1,49): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <plaintext>
+|               "</plaintext>"
+
+#data
+<!doctype html><table><caption><plaintext></plaintext>
+#errors
+(1,54): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <plaintext>
+|           "</plaintext>"
+
+#data
+<!doctype html><table><colgroup><plaintext></plaintext>
+#errors
+43: Start tag “plaintext” seen in “table”.
+55: Misplaced non-space characters inside a table.
+55: End of file seen and there were open elements.
+43: Unclosed element “plaintext”.
+22: Unclosed element “table”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+|     <table>
+|       <colgroup>
+
+#data
+<!doctype html><select><plaintext></plaintext>X
+#errors
+34: Stray start tag “plaintext”.
+46: Stray end tag “plaintext”.
+47: End of file seen and there were open elements.
+23: Unclosed element “select”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       "X"
+
+#data
+<!doctype html><table><select><plaintext>a<caption>b
+#errors
+30: Start tag “select” seen in “table”.
+41: Stray start tag “plaintext”.
+51: “caption” start tag with “select” open.
+52: End of file seen and there were open elements.
+51: Unclosed element “caption”.
+22: Unclosed element “table”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       "a"
+|     <table>
+|       <caption>
+|         "b"
+
+#data
+<!doctype html><template><plaintext>a</template>b
+#errors
+49: End of file seen and there were open elements.
+36: Unclosed element “plaintext”.
+25: Unclosed element “template”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <plaintext>
+|           "a</template>b"
+|   <body>
+
+#data
+<!doctype html><body></body><plaintext></plaintext>
+#errors
+39: Stray start tag “plaintext”.
+51: End of file seen and there were open elements.
+39: Unclosed element “plaintext”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+
+#data
+<!doctype html><frameset><plaintext></plaintext>
+#errors
+36: Stray start tag “plaintext”.
+48: Stray end tag “plaintext”.
+48: End of file seen and there were open elements.
+25: Unclosed element “frameset”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!doctype html><frameset></frameset><plaintext></plaintext>
+#errors
+47: Stray start tag “plaintext”.
+59: Stray end tag “plaintext”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!doctype html><body></body></html><plaintext></plaintext>
+#errors
+46: Stray start tag “plaintext”.
+58: End of file seen and there were open elements.
+46: Unclosed element “plaintext”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+
+#data
+<!doctype html><frameset></frameset></html><plaintext></plaintext>
+#errors
+54: Stray start tag “plaintext”.
+66: Stray end tag “plaintext”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!doctype html><svg><plaintext>a</plaintext>b
+#errors
+45: End of file seen and there were open elements.
+20: Unclosed element “svg”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg plaintext>
+|         "a"
+|       "b"
+
+#data
+<!doctype html><svg><title><plaintext>a</plaintext>b
+#errors
+52: End of file seen and there were open elements.
+38: Unclosed element “plaintext”.
+27: Unclosed element “title”.
+20: Unclosed element “svg”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg title>
+|         <plaintext>
+|           "a</plaintext>b"
+
+#data
+<!doctype html><table><tr><style></script></style>abc
+#errors
+(1,51): foster-parenting-character
+(1,52): foster-parenting-character
+(1,53): foster-parenting-character
+(1,53): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "abc"
+|     <table>
+|       <tbody>
+|         <tr>
+|           <style>
+|             "</script>"
+
+#data
+<!doctype html><table><tr><script></style></script>abc
+#errors
+(1,52): foster-parenting-character
+(1,53): foster-parenting-character
+(1,54): foster-parenting-character
+(1,54): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "abc"
+|     <table>
+|       <tbody>
+|         <tr>
+|           <script>
+|             "</style>"
+
+#data
+<!doctype html><table><caption><style></script></style>abc
+#errors
+(1,58): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <style>
+|           "</script>"
+|         "abc"
+
+#data
+<!doctype html><table><td><style></script></style>abc
+#errors
+(1,26): unexpected-cell-in-table-body
+(1,53): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <style>
+|               "</script>"
+|             "abc"
+
+#data
+<!doctype html><select><script></style></script>abc
+#errors
+(1,51): eof-in-select
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <script>
+|         "</style>"
+|       "abc"
+
+#data
+<!doctype html><table><select><script></style></script>abc
+#errors
+(1,30): unexpected-start-tag-implies-table-voodoo
+(1,58): eof-in-select
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <script>
+|         "</style>"
+|       "abc"
+|     <table>
+
+#data
+<!doctype html><table><tr><select><script></style></script>abc
+#errors
+(1,34): unexpected-start-tag-implies-table-voodoo
+(1,62): eof-in-select
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <script>
+|         "</style>"
+|       "abc"
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+<!doctype html><frameset></frameset><noframes>abc
+#errors
+(1,49): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|   <noframes>
+|     "abc"
+
+#data
+<!doctype html><frameset></frameset><noframes>abc</noframes><!--abc-->
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|   <noframes>
+|     "abc"
+|   <!-- abc -->
+
+#data
+<!doctype html><frameset></frameset></html><noframes>abc
+#errors
+(1,56): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|   <noframes>
+|     "abc"
+
+#data
+<!doctype html><frameset></frameset></html><noframes>abc</noframes><!--abc-->
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|   <noframes>
+|     "abc"
+| <!-- abc -->
+
+#data
+<!doctype html><table><tr></tbody><tfoot>
+#errors
+(1,41): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|       <tfoot>
+
+#data
+<!doctype html><table><td><svg></svg>abc<td>
+#errors
+(1,26): unexpected-cell-in-table-body
+(1,44): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <svg svg>
+|             "abc"
+|           <td>
diff --git a/lib/html5lib/tests/testdata/tree-construction/tests19.dat b/lib/html5lib/tests/testdata/tree-construction/tests19.dat
new file mode 100644
index 00000000..a1897774
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/tests19.dat
@@ -0,0 +1,1454 @@
+#data
+<!doctype html><math><mn DefinitionUrl="foo">
+#errors
+(1,45): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mn>
+|         definitionURL="foo"
+
+#data
+<!doctype html><html><!--foo-->
+#errors
+(1,25): end-tag-after-implied-root
+#document
+| <!DOCTYPE html>
+| <html>
+|   <!-- foo -->
+|   <head>
+|   <body>
+
+#data
+<!doctype html><head></head><!--foo-->
+#errors
+(1,32): unexpected-end-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <!-- foo -->
+|   <body>
+
+#data
+<!doctype html><body><pre>
+#errors
+(1,29): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|     <pre>
+
+#data
+<!doctype html><body><listing>
+#errors
+(1,33): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|     <listing>
+
+#data
+<!doctype html><plaintext>
+#errors
+(1,29): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|     <plaintext>
+
+#data
+<!doctype html><h1>
+#errors
+(1,22): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|     <h1>
+
+#data
+<!doctype html><isindex type="hidden">
+#errors
+(1,38): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <isindex>
+|       type="hidden"
+
+#data
+<!doctype html><ruby><rp>
+#errors
+(1,28): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       
+|       <rp>
+
+#data
+<!doctype html><ruby><div><span><rp>
+#errors
+(1,36): XXX-undefined-error
+(1,36): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       <div>
+|         <span>
+|           <rp>
+
+#data
+<!doctype html><ruby><div><rp>
+#errors
+(1,33): XXX-undefined-error
+(1,33): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       <div>
+|         
+|         <rp>
+
+#data
+<!doctype html><ruby><rt>
+#errors
+(1,28): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       
+|       <rt>
+
+#data
+<!doctype html><ruby><div><span><rt>
+#errors
+(1,36): XXX-undefined-error
+(1,36): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       <div>
+|         <span>
+|           <rt>
+
+#data
+<!doctype html><ruby><div><rt>
+#errors
+(1,33): XXX-undefined-error
+(1,33): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       <div>
+|         
+|         <rt>
+
+#data
+<html><ruby>a<rb>b<rt></ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rb>
+|         "b"
+|       <rt>
+
+#data
+<html><ruby>a<rp>b<rt></ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rp>
+|         "b"
+|       <rt>
+
+#data
+<html><ruby>a<rt>b<rt></ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rt>
+|         "b"
+|       <rt>
+
+#data
+<html><ruby>a<rtc>b<rt>c<rb>d</ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rtc>
+|         "b"
+|         <rt>
+|           "c"
+|       <rb>
+|         "d"
+
+#data
+<!doctype html><math/><foo>
+#errors
+(1,27): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|     <foo>
+
+#data
+<!doctype html><svg/><foo>
+#errors
+(1,26): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|     <foo>
+
+#data
+<!doctype html><div></body><!--foo-->
+#errors
+(1,27): expected-one-end-tag-but-got-another
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|   <!-- foo -->
+
+#data
+<!doctype html><h1><div><h3><span></h1>foo
+#errors
+(1,39): end-tag-too-early
+(1,42): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <h1>
+|       <div>
+|         <h3>
+|           <span>
+|         "foo"
+
+#data
+<!doctype html></h3>foo
+#errors
+(1,23): end-tag-too-early
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       "foo"
+
+#data
+<!doctype html><h3><li>abc</h2>foo
+#errors
+(1,31): end-tag-too-early
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <h3>
+|       <li>
+|         "abc"
+|     "foo"
+
+#data
+<!doctype html><table>abc<!--foo-->
+#errors
+(1,23): foster-parenting-character
+(1,24): foster-parenting-character
+(1,25): foster-parenting-character
+(1,35): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "abc"
+|     <table>
+|       <!-- foo -->
+
+#data
+<!doctype html><table>  <!--foo-->
+#errors
+(1,34): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       "  "
+|       <!-- foo -->
+
+#data
+<!doctype html><table> b <!--foo-->
+#errors
+(1,23): foster-parenting-character
+(1,24): foster-parenting-character
+(1,25): foster-parenting-character
+(1,35): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     " b "
+|     <table>
+|       <!-- foo -->
+
+#data
+<!doctype html><select><option><option>
+#errors
+(1,39): eof-in-select
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+|       <option>
+
+#data
+<!doctype html><select><option></optgroup>
+#errors
+(1,42): unexpected-end-tag-in-select
+(1,42): eof-in-select
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+
+#data
+<!doctype html><select><option></optgroup>
+#errors
+(1,42): unexpected-end-tag-in-select
+(1,42): eof-in-select
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+
+#data
+<!doctype html><dd><optgroup><dd>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <dd>
+|       <optgroup>
+|     <dd>
+
+#data
+<!doctype html><math><mi><h1>
+#errors
+(1,35): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <math math>
+|         <math mi>
+|           
+|           <h1>
+
+#data
+<!doctype html><math><mo><h1>
+#errors
+(1,35): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <math math>
+|         <math mo>
+|           
+|           <h1>
+
+#data
+<!doctype html><math><mn><h1>
+#errors
+(1,35): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <math math>
+|         <math mn>
+|           
+|           <h1>
+
+#data
+<!doctype html><math><ms><h1>
+#errors
+(1,35): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <math math>
+|         <math ms>
+|           
+|           <h1>
+
+#data
+<!doctype html><math><mtext><h1>
+#errors
+(1,38): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <math math>
+|         <math mtext>
+|           
+|           <h1>
+
+#data
+<!doctype html><frameset></noframes>
+#errors
+(1,36): unexpected-end-tag-in-frameset
+(1,36): eof-in-frameset
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!doctype html><html c=d><body></html><html a=b>
+#errors
+(1,48): non-html-root
+#document
+| <!DOCTYPE html>
+| <html>
+|   a="b"
+|   c="d"
+|   <head>
+|   <body>
+
+#data
+<!doctype html><html c=d><frameset></frameset></html><html a=b>
+#errors
+(1,63): non-html-root
+#document
+| <!DOCTYPE html>
+| <html>
+|   a="b"
+|   c="d"
+|   <head>
+|   <frameset>
+
+#data
+<!doctype html><html><frameset></frameset></html><!--foo-->
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+| <!-- foo -->
+
+#data
+<!doctype html><html><frameset></frameset></html>  
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|   "  "
+
+#data
+<!doctype html><html><frameset></frameset></html>abc
+#errors
+(1,50): expected-eof-but-got-char
+(1,51): expected-eof-but-got-char
+(1,52): expected-eof-but-got-char
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!doctype html><html><frameset></frameset></html>
+#errors
+(1,52): expected-eof-but-got-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!doctype html><html><frameset></frameset></html>
+#errors
+(1,53): expected-eof-but-got-end-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<html><frameset></frameset></html><!doctype html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,49): unexpected-doctype
+#document
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!doctype html><body><frameset>
+#errors
+(1,31): unexpected-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!doctype html><frameset><frame>
+#errors
+(1,28): unexpected-start-tag
+(1,35): eof-in-frameset
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|     <frame>
+
+#data
+<!doctype html>a<frameset>
+#errors
+(1,29): unexpected-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       "a"
+
+#data
+<!doctype html> <frameset><frame>
+#errors
+(1,29): unexpected-start-tag
+(1,36): eof-in-frameset
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|     <frame>
+
+#data
+<!doctype html><pre><frameset>
+#errors
+(1,30): unexpected-start-tag
+(1,30): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <pre>
+
+#data
+<!doctype html><listing><frameset>
+#errors
+(1,34): unexpected-start-tag
+(1,34): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <listing>
+
+#data
+<!doctype html><li><frameset>
+#errors
+(1,29): unexpected-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <li>
+
+#data
+<!doctype html><dd><frameset>
+#errors
+(1,29): unexpected-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <dd>
+
+#data
+<!doctype html><dt><frameset>
+#errors
+(1,29): unexpected-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <dt>
+
+#data
+<!doctype html><button><frameset>
+#errors
+(1,33): unexpected-start-tag
+(1,33): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <button>
+
+#data
+<!doctype html><applet><frameset>
+#errors
+(1,33): unexpected-start-tag
+(1,33): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <applet>
+
+#data
+<!doctype html><marquee><frameset>
+#errors
+(1,34): unexpected-start-tag
+(1,34): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <marquee>
+
+#data
+<!doctype html><object><frameset>
+#errors
+(1,33): unexpected-start-tag
+(1,33): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <object>
+
+#data
+<!doctype html><table><frameset>
+#errors
+(1,32): unexpected-start-tag-implies-table-voodoo
+(1,32): unexpected-start-tag
+(1,32): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+
+#data
+<!doctype html><area><frameset>
+#errors
+(1,31): unexpected-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <area>
+
+#data
+<!doctype html><basefont><frameset>
+#errors
+(1,35): eof-in-frameset
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <basefont>
+|   <frameset>
+
+#data
+<!doctype html><bgsound><frameset>
+#errors
+(1,34): eof-in-frameset
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <bgsound>
+|   <frameset>
+
+#data
+<!doctype html><frameset>
+#errors
+(1,29): unexpected-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+
+#data
+<!doctype html><embed><frameset>
+#errors
+(1,32): unexpected-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <embed>
+
+#data
+<!doctype html><img><frameset>
+#errors
+(1,30): unexpected-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <img>
+
+#data
+<!doctype html><input><frameset>
+#errors
+(1,32): unexpected-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <input>
+
+#data
+<!doctype html><keygen><frameset>
+#errors
+(1,33): unexpected-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <keygen>
+
+#data
+<!doctype html><wbr><frameset>
+#errors
+(1,30): unexpected-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <wbr>
+
+#data
+<!doctype html><hr><frameset>
+#errors
+(1,29): unexpected-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <hr>
+
+#data
+<!doctype html><textarea></textarea><frameset>
+#errors
+(1,46): unexpected-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+
+#data
+<!doctype html><xmp></xmp><frameset>
+#errors
+(1,36): unexpected-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <xmp>
+
+#data
+<!doctype html><iframe></iframe><frameset>
+#errors
+(1,42): unexpected-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <iframe>
+
+#data
+<!doctype html><select></select><frameset>
+#errors
+(1,42): unexpected-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+
+#data
+<!doctype html><svg></svg><frameset><frame>
+#errors
+(1,36): unexpected-start-tag
+(1,43): eof-in-frameset
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|     <frame>
+
+#data
+<!doctype html><math></math><frameset><frame>
+#errors
+(1,38): unexpected-start-tag
+(1,45): eof-in-frameset
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|     <frame>
+
+#data
+<!doctype html><svg><foreignObject><div> <frameset><frame>
+#errors
+(1,51): unexpected-start-tag
+(1,58): eof-in-frameset
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|     <frame>
+
+#data
+<!doctype html><svg>a</svg><frameset><frame>
+#errors
+(1,37): unexpected-start-tag
+(1,44): unexpected-start-tag-ignored
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "a"
+
+#data
+<!doctype html><svg> </svg><frameset><frame>
+#errors
+(1,37): unexpected-start-tag
+(1,44): eof-in-frameset
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|     <frame>
+
+#data
+<html>aaa<frameset></frameset>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,19): unexpected-start-tag
+(1,30): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     "aaa"
+
+#data
+<html> a <frameset></frameset>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,19): unexpected-start-tag
+(1,30): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     "a "
+
+#data
+<!doctype html><div><frameset>
+#errors
+(1,30): unexpected-start-tag
+(1,30): eof-in-frameset
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!doctype html><div><body><frameset>
+#errors
+(1,26): unexpected-start-tag
+(1,36): unexpected-start-tag
+(1,36): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <div>
+
+#data
+<!doctype html><p><math>a
+#errors
+(1,28): unexpected-end-tag
+(1,28): unexpected-end-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <math math>
+|     "a"
+
+#data
+<!doctype html><math><mn><span>a
+#errors
+(1,38): unexpected-end-tag
+(1,39): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <math math>
+|         <math mn>
+|           <span>
+|             
+|             "a"
+
+#data
+<!doctype html><math></html>
+#errors
+(1,28): unexpected-end-tag
+(1,28): expected-one-end-tag-but-got-another
+(1,28): unexpected-end-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+
+#data
+<!doctype html><meta charset="ascii">
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <meta>
+|       charset="ascii"
+|   <body>
+
+#data
+<!doctype html><meta http-equiv="content-type" content="text/html;charset=ascii">
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <meta>
+|       content="text/html;charset=ascii"
+|       http-equiv="content-type"
+|   <body>
+
+#data
+<!doctype html><head><!--aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa--><meta charset="utf8">
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <!-- aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa -->
+|     <meta>
+|       charset="utf8"
+|   <body>
+
+#data
+<!doctype html><html a=b><head></head><html c=d>
+#errors
+(1,48): non-html-root
+#document
+| <!DOCTYPE html>
+| <html>
+|   a="b"
+|   c="d"
+|   <head>
+|   <body>
+
+#data
+<!doctype html><image/>
+#errors
+(1,23): image-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <img>
+
+#data
+<!doctype html>a<i>b<table>c<b>d</i>e</b>f
+#errors
+(1,28): foster-parenting-character
+(1,31): foster-parenting-start-tag
+(1,32): foster-parenting-character
+(1,36): foster-parenting-end-tag
+(1,36): adoption-agency-1.3
+(1,37): foster-parenting-character
+(1,41): foster-parenting-end-tag
+(1,42): foster-parenting-character
+(1,42): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "a"
+|     <i>
+|       "bc"
+|       <b>
+|         "de"
+|       "f"
+|       <table>
+
+#data
+<!doctype html><table><i>a<b>b<div>c<a>d</i>e</b>f
+#errors
+(1,25): foster-parenting-start-tag
+(1,26): foster-parenting-character
+(1,29): foster-parenting-start-tag
+(1,30): foster-parenting-character
+(1,35): foster-parenting-start-tag
+(1,36): foster-parenting-character
+(1,39): foster-parenting-start-tag
+(1,40): foster-parenting-character
+(1,44): foster-parenting-end-tag
+(1,44): adoption-agency-1.3
+(1,44): adoption-agency-1.3
+(1,45): foster-parenting-character
+(1,49): foster-parenting-end-tag
+(1,49): adoption-agency-1.3
+(1,49): adoption-agency-1.3
+(1,50): foster-parenting-character
+(1,50): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <i>
+|       "a"
+|       <b>
+|         "b"
+|     <b>
+|     <div>
+|       <b>
+|         <i>
+|           "c"
+|           <a>
+|             "d"
+|         <a>
+|           "e"
+|       <a>
+|         "f"
+|     <table>
+
+#data
+<!doctype html><i>a<b>b<div>c<a>d</i>e</b>f
+#errors
+(1,37): adoption-agency-1.3
+(1,37): adoption-agency-1.3
+(1,42): adoption-agency-1.3
+(1,42): adoption-agency-1.3
+(1,43): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <i>
+|       "a"
+|       <b>
+|         "b"
+|     <b>
+|     <div>
+|       <b>
+|         <i>
+|           "c"
+|           <a>
+|             "d"
+|         <a>
+|           "e"
+|       <a>
+|         "f"
+
+#data
+<!doctype html><table><i>a<b>b<div>c</i>
+#errors
+(1,25): foster-parenting-start-tag
+(1,26): foster-parenting-character
+(1,29): foster-parenting-start-tag
+(1,30): foster-parenting-character
+(1,35): foster-parenting-start-tag
+(1,36): foster-parenting-character
+(1,40): foster-parenting-end-tag
+(1,40): adoption-agency-1.3
+(1,40): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <i>
+|       "a"
+|       <b>
+|         "b"
+|     <b>
+|       <div>
+|         <i>
+|           "c"
+|     <table>
+
+#data
+<!doctype html><table><i>a<b>b<div>c<a>d</i>e</b>f
+#errors
+(1,25): foster-parenting-start-tag
+(1,26): foster-parenting-character
+(1,29): foster-parenting-start-tag
+(1,30): foster-parenting-character
+(1,35): foster-parenting-start-tag
+(1,36): foster-parenting-character
+(1,39): foster-parenting-start-tag
+(1,40): foster-parenting-character
+(1,44): foster-parenting-end-tag
+(1,44): adoption-agency-1.3
+(1,44): adoption-agency-1.3
+(1,45): foster-parenting-character
+(1,49): foster-parenting-end-tag
+(1,44): adoption-agency-1.3
+(1,44): adoption-agency-1.3
+(1,50): foster-parenting-character
+(1,50): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <i>
+|       "a"
+|       <b>
+|         "b"
+|     <b>
+|     <div>
+|       <b>
+|         <i>
+|           "c"
+|           <a>
+|             "d"
+|         <a>
+|           "e"
+|       <a>
+|         "f"
+|     <table>
+
+#data
+<!doctype html><table><i>a<div>b<tr>c<b>d</i>e
+#errors
+(1,25): foster-parenting-start-tag
+(1,26): foster-parenting-character
+(1,31): foster-parenting-start-tag
+(1,32): foster-parenting-character
+(1,37): foster-parenting-character
+(1,40): foster-parenting-start-tag
+(1,41): foster-parenting-character
+(1,45): foster-parenting-end-tag
+(1,45): adoption-agency-1.3
+(1,46): foster-parenting-character
+(1,46): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <i>
+|       "a"
+|       <div>
+|         "b"
+|     <i>
+|       "c"
+|       <b>
+|         "d"
+|     <b>
+|       "e"
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+<!doctype html><table><td><table><i>a<div>b<b>c</i>d
+#errors
+(1,26): unexpected-cell-in-table-body
+(1,36): foster-parenting-start-tag
+(1,37): foster-parenting-character
+(1,42): foster-parenting-start-tag
+(1,43): foster-parenting-character
+(1,46): foster-parenting-start-tag
+(1,47): foster-parenting-character
+(1,51): foster-parenting-end-tag
+(1,51): adoption-agency-1.3
+(1,51): adoption-agency-1.3
+(1,52): foster-parenting-character
+(1,52): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <i>
+|               "a"
+|             <div>
+|               <i>
+|                 "b"
+|                 <b>
+|                   "c"
+|               <b>
+|                 "d"
+|             <table>
+
+#data
+<!doctype html><body><bgsound>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <bgsound>
+
+#data
+<!doctype html><body><basefont>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <basefont>
+
+#data
+<!doctype html><a><b></a><basefont>
+#errors
+(1,25): adoption-agency-1.3
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       <b>
+|     <basefont>
+
+#data
+<!doctype html><a><b></a><bgsound>
+#errors
+(1,25): adoption-agency-1.3
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       <b>
+|     <bgsound>
+
+#data
+<!doctype html><figcaption><article></figcaption>a
+#errors
+(1,49): end-tag-too-early
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <figcaption>
+|       <article>
+|     "a"
+
+#data
+<!doctype html><summary><article></summary>a
+#errors
+(1,43): end-tag-too-early
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <summary>
+|       <article>
+|     "a"
+
+#data
+<!doctype html><a><plaintext>b
+#errors
+(1,32): unexpected-end-tag
+(1,33): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <a>
+|     <plaintext>
+|       <a>
+|         "b"
+
+#data
+<!DOCTYPE html><div>a<a></div>bcd
+#errors
+(1,30): end-tag-too-early
+(1,40): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "a"
+|       <a>
+|     <a>
+|       "b"
+|       
+|         "c"
+|       "d"
diff --git a/lib/html5lib/tests/testdata/tree-construction/tests2.dat b/lib/html5lib/tests/testdata/tree-construction/tests2.dat
new file mode 100644
index 00000000..b44fec4d
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/tests2.dat
@@ -0,0 +1,821 @@
+#data
+<!DOCTYPE html>Test
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "Test"
+
+#data
+<textarea>test</div>test
+#errors
+(1,10): expected-doctype-but-got-start-tag
+(1,24): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "test</div>test"
+
+#data
+<table><td>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,11): unexpected-cell-in-table-body
+(1,11): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+
+#data
+<table><td>test</tbody></table>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,11): unexpected-cell-in-table-body
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             "test"
+
+#data
+<frame>test
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,7): unexpected-start-tag-ignored
+#document
+| <html>
+|   <head>
+|   <body>
+|     "test"
+
+#data
+<!DOCTYPE html><frameset>test
+#errors
+(1,29): unexpected-char-in-frameset
+(1,29): unexpected-char-in-frameset
+(1,29): unexpected-char-in-frameset
+(1,29): unexpected-char-in-frameset
+(1,29): eof-in-frameset
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!DOCTYPE html><frameset> te st
+#errors
+(1,29): unexpected-char-in-frameset
+(1,29): unexpected-char-in-frameset
+(1,29): unexpected-char-in-frameset
+(1,29): unexpected-char-in-frameset
+(1,29): eof-in-frameset
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|     "  "
+
+#data
+<!DOCTYPE html><frameset></frameset> te st
+#errors
+(1,29): unexpected-char-after-frameset
+(1,29): unexpected-char-after-frameset
+(1,29): unexpected-char-after-frameset
+(1,29): unexpected-char-after-frameset
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|   "  "
+
+#data
+<!DOCTYPE html><frameset><!DOCTYPE html>
+#errors
+(1,40): unexpected-doctype
+(1,40): eof-in-frameset
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!DOCTYPE html><font><b>test</font>
+#errors
+(1,38): adoption-agency-1.3
+(1,38): adoption-agency-1.3
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <font>
+|     
+|       <font>
+|         <b>
+|           "test"
+
+#data
+<!DOCTYPE html><dt><div><dd>
+#errors
+(1,28): end-tag-too-early
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <dt>
+|       <div>
+|     <dd>
+
+#data
+<script></x
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,11): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</x"
+|   <body>
+
+#data
+<table><plaintext><td>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,18): unexpected-start-tag-implies-table-voodoo
+(1,22): foster-parenting-character-in-table
+(1,22): foster-parenting-character-in-table
+(1,22): foster-parenting-character-in-table
+(1,22): foster-parenting-character-in-table
+(1,22): eof-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "<td>"
+|     <table>
+
+#data
+<plaintext></plaintext>
+#errors
+(1,11): expected-doctype-but-got-start-tag
+(1,23): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+
+#data
+<!DOCTYPE html><table><tr>TEST
+#errors
+(1,30): foster-parenting-character-in-table
+(1,30): foster-parenting-character-in-table
+(1,30): foster-parenting-character-in-table
+(1,30): foster-parenting-character-in-table
+(1,30): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "TEST"
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+<!DOCTYPE html><body t1=1><body t2=2><body t3=3 t4=4>
+#errors
+(1,37): unexpected-start-tag
+(1,53): unexpected-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     t1="1"
+|     t2="2"
+|     t3="3"
+|     t4="4"
+
+#data
+</b test
+#errors
+(1,8): eof-in-attribute-name
+(1,8): expected-doctype-but-got-eof
+#new-errors
+(1:9) eof-in-tag
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html></b test<b &=&amp>X
+#errors
+(1,24): invalid-character-in-attribute-name
+(1,32): named-entity-without-semicolon
+(1,33): attributes-in-end-tag
+(1,33): unexpected-end-tag-before-html
+#new-errors
+(1:24) unexpected-character-in-attribute-name
+(1:33) missing-semicolon-after-character-reference
+(1:33) end-tag-with-attributes
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "X"
+
+#data
+<!doctypehtml><scrIPt type=text/x-foobar;baz>X</SCRipt
+#errors
+(1,9): need-space-after-doctype
+(1,54): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:10) missing-whitespace-before-doctype-name
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       type="text/x-foobar;baz"
+|       "X</SCRipt"
+|   <body>
+
+#data
+&
+#errors
+(1,1): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "&"
+
+#data
+&#
+#errors
+(1,2): expected-numeric-entity
+(1,2): expected-doctype-but-got-chars
+#new-errors
+(1:3) absence-of-digits-in-numeric-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "&#"
+
+#data
+&#X
+#errors
+(1,3): expected-numeric-entity
+(1,3): expected-doctype-but-got-chars
+#new-errors
+(1:4) absence-of-digits-in-numeric-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "&#X"
+
+#data
+&#x
+#errors
+(1,3): expected-numeric-entity
+(1,3): expected-doctype-but-got-chars
+#new-errors
+(1:4) absence-of-digits-in-numeric-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "&#x"
+
+#data
+&#45
+#errors
+(1,4): numeric-entity-without-semicolon
+(1,4): expected-doctype-but-got-chars
+#new-errors
+(1:5) missing-semicolon-after-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "-"
+
+#data
+&x-test
+#errors
+(1,2): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "&x-test"
+
+#data
+<!doctypehtml><li>
+#errors
+(1,9): need-space-after-doctype
+#new-errors
+(1:10) missing-whitespace-before-doctype-name
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|     <li>
+
+#data
+<!doctypehtml><dt>
+#errors
+(1,9): need-space-after-doctype
+#new-errors
+(1:10) missing-whitespace-before-doctype-name
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|     <dt>
+
+#data
+<!doctypehtml><dd>
+#errors
+(1,9): need-space-after-doctype
+#new-errors
+(1:10) missing-whitespace-before-doctype-name
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|     <dd>
+
+#data
+<!doctypehtml><form>
+#errors
+(1,9): need-space-after-doctype
+(1,23): expected-closing-tag-but-got-eof
+#new-errors
+(1:10) missing-whitespace-before-doctype-name
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|     <form>
+
+#data
+<!DOCTYPE html>X
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|     "X"
+
+#data
+&AMP
+#errors
+(1,4): named-entity-without-semicolon
+(1,4): expected-doctype-but-got-chars
+#new-errors
+(1:5) missing-semicolon-after-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "&"
+
+#data
+&AMp;
+#errors
+(1,3): expected-named-entity
+(1,3): expected-doctype-but-got-chars
+#new-errors
+(1:5) unknown-named-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "&AMp;"
+
+#data
+<!DOCTYPE html><html><head></head><body><thisISasillyTESTelementNameToMakeSureCrazyTagNamesArePARSEDcorrectLY>
+#errors
+(1,110): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <thisisasillytestelementnametomakesurecrazytagnamesareparsedcorrectly>
+
+#data
+<!DOCTYPE html>X</body>X
+#errors
+(1,24): unexpected-char-after-body
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "XX"
+
+#data
+<!DOCTYPE html><!-- X
+#errors
+(1,21): eof-in-comment
+#new-errors
+(1:22) eof-in-comment
+#document
+| <!DOCTYPE html>
+| <!--  X -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html><table><caption>test TEST</caption><td>test
+#errors
+(1,54): unexpected-cell-in-table-body
+(1,58): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         "test TEST"
+|       <tbody>
+|         <tr>
+|           <td>
+|             "test"
+
+#data
+<!DOCTYPE html><select><option><optgroup>
+#errors
+(1,41): eof-in-select
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+|       <optgroup>
+
+#data
+<!DOCTYPE html><select><optgroup><option></optgroup><option><select><option>
+#errors
+(1,68): unexpected-select-in-select
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <optgroup>
+|         <option>
+|       <option>
+|     <option>
+
+#data
+<!DOCTYPE html><select><optgroup><option><optgroup>
+#errors
+(1,51): eof-in-select
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <optgroup>
+|         <option>
+|       <optgroup>
+
+#data
+<!DOCTYPE html><datalist><option>foo</datalist>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <datalist>
+|       <option>
+|         "foo"
+|     "bar"
+
+#data
+<!DOCTYPE html><font><input><input></font>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <font>
+|       <input>
+|       <input>
+
+#data
+<!DOCTYPE html><!-- XXX - XXX -->
+#errors
+#document
+| <!DOCTYPE html>
+| <!--  XXX - XXX  -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html><!-- XXX - XXX
+#errors
+(1,29): eof-in-comment
+#new-errors
+(1:30) eof-in-comment
+#document
+| <!DOCTYPE html>
+| <!--  XXX - XXX -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html><!-- XXX - XXX - XXX -->
+#errors
+#document
+| <!DOCTYPE html>
+| <!--  XXX - XXX - XXX  -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+test
+test
+#errors
+(2,4): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "test
+test"
+
+#data
+<!DOCTYPE html><body><title>test</body></title>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <title>
+|       "test</body>"
+
+#data
+<!DOCTYPE html><body><title>X</title><meta name=z><link rel=foo><style>
+x { content:"</style" } </style>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <title>
+|       "X"
+|     <meta>
+|       name="z"
+|     <link>
+|       rel="foo"
+|     <style>
+|       "
+x { content:"</style" } "
+
+#data
+<!DOCTYPE html><select><optgroup></optgroup></select>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <optgroup>
+
+#data
+ 
+ 
+#errors
+(2,1): expected-doctype-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html>  <html>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html><script>
+</script>  <title>x</title>  </head>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "
+"
+|     "  "
+|     <title>
+|       "x"
+|     "  "
+|   <body>
+
+#data
+<!DOCTYPE html><html><body><html id=x>
+#errors
+(1,38): non-html-root
+#document
+| <!DOCTYPE html>
+| <html>
+|   id="x"
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html>X</body><html id="x">
+#errors
+(1,36): non-html-root
+#document
+| <!DOCTYPE html>
+| <html>
+|   id="x"
+|   <head>
+|   <body>
+|     "X"
+
+#data
+<!DOCTYPE html><head><html id=x>
+#errors
+(1,32): non-html-root
+#document
+| <!DOCTYPE html>
+| <html>
+|   id="x"
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html>X</html>X
+#errors
+(1,24): expected-eof-but-got-char
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "XX"
+
+#data
+<!DOCTYPE html>X</html> 
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "X "
+
+#data
+<!DOCTYPE html>X</html>X
+#errors
+(1,26): expected-eof-but-got-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "X"
+|     
+|       "X"
+
+#data
+<!DOCTYPE html>X
+#errors
+(1,19): unexpected-character-after-solidus-in-tag
+(1,21): unexpected-character-after-solidus-in-tag
+(1,23): unexpected-character-after-solidus-in-tag
+#new-errors
+(1:20) unexpected-solidus-in-tag
+(1:22) unexpected-solidus-in-tag
+(1:24) unexpected-solidus-in-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "X"
+|     
+|       x=""
+|       y=""
+|       z=""
+
+#data
+<!DOCTYPE html><!--x--
+#errors
+(1,22): eof-in-comment-double-dash
+#new-errors
+(1:23) eof-in-comment
+#document
+| <!DOCTYPE html>
+| <!-- x -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html><table><tr><td></table>
+#errors
+(1,34): unexpected-end-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             
+
+#data
+<!DOCTYPE <!DOCTYPE HTML>><!--<!--x-->-->
+#errors
+(1,20): expected-space-or-right-bracket-in-doctype
+(1,25): unknown-doctype
+(1,35): unexpected-char-in-comment
+#new-errors
+(1:21) invalid-character-sequence-after-doctype-name
+(1:35) nested-comment
+#document
+| <!DOCTYPE <!doctype>
+| <html>
+|   <head>
+|   <body>
+|     ">"
+|     <!-- <!--x -->
+|     "-->"
+
+#data
+<!doctype html><div><form></form><div></div></div>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <form>
+|       <div>
diff --git a/lib/html5lib/tests/testdata/tree-construction/tests20.dat b/lib/html5lib/tests/testdata/tree-construction/tests20.dat
new file mode 100644
index 00000000..afdae743
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/tests20.dat
@@ -0,0 +1,582 @@
+#data
+<!doctype html><button><button>
+#errors
+(1,34): unexpected-start-tag-implies-end-tag
+(1,34): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <button>
+|       <button>
+
+#data
+<!doctype html><button><address>
+#errors
+(1,35): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <button>
+|         <address>
+
+#data
+<!doctype html><button><blockquote>
+#errors
+(1,38): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <button>
+|         <blockquote>
+
+#data
+<!doctype html><button><menu>
+#errors
+(1,32): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <button>
+|         <menu>
+
+#data
+<!doctype html><button>
+#errors
+(1,29): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <button>
+|         
+
+#data
+<!doctype html><button><ul>
+#errors
+(1,30): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <button>
+|         <ul>
+
+#data
+<!doctype html><button><h1>
+#errors
+(1,30): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <button>
+|         <h1>
+
+#data
+<!doctype html><button><h6>
+#errors
+(1,30): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <button>
+|         <h6>
+
+#data
+<!doctype html><button><listing>
+#errors
+(1,35): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <button>
+|         <listing>
+
+#data
+<!doctype html><button><pre>
+#errors
+(1,31): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <button>
+|         <pre>
+
+#data
+<!doctype html><button><form>
+#errors
+(1,32): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <button>
+|         <form>
+
+#data
+<!doctype html><button><li>
+#errors
+(1,30): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <button>
+|         <li>
+
+#data
+<!doctype html><button><dd>
+#errors
+(1,30): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <button>
+|         <dd>
+
+#data
+<!doctype html><button><dt>
+#errors
+(1,30): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <button>
+|         <dt>
+
+#data
+<!doctype html><button><plaintext>
+#errors
+(1,37): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <button>
+|         <plaintext>
+
+#data
+<!doctype html><button><table>
+#errors
+(1,33): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <button>
+|         <table>
+
+#data
+<!doctype html><button><hr>
+#errors
+(1,30): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <button>
+|         <hr>
+
+#data
+<!doctype html><button><xmp>
+#errors
+(1,31): expected-named-closing-tag-but-got-eof
+(1,31): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <button>
+|         <xmp>
+
+#data
+<!doctype html><button>
+#errors
+(1,30): unexpected-end-tag
+(1,30): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <button>
+|         
+
+#data
+<!doctype html><address><button></address>a
+#errors
+(1,42): end-tag-too-early
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <address>
+|       <button>
+|     "a"
+
+#data
+<!doctype html><address><button></address>a
+#errors
+(1,42): end-tag-too-early
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <address>
+|       <button>
+|     "a"
+
+#data
+<table>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,14): unexpected-end-tag-implies-table-voodoo
+(1,14): unexpected-end-tag
+(1,14): eof-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+|       
+|       <table>
+
+#data
+<!doctype html><svg>
+#errors
+(1,20): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+
+#data
+<!doctype html><figcaption>
+#errors
+(1,30): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|     <figcaption>
+
+#data
+<!doctype html><summary>
+#errors
+(1,27): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|     <summary>
+
+#data
+<!doctype html><form><table><form>
+#errors
+(1,34): unexpected-form-in-table
+(1,34): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <form>
+|       <table>
+
+#data
+<!doctype html><table><form><form>
+#errors
+(1,28): unexpected-form-in-table
+(1,34): unexpected-form-in-table
+(1,34): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <form>
+
+#data
+<!doctype html><table><form></table><form>
+#errors
+(1,28): unexpected-form-in-table
+(1,42): unexpected-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <form>
+
+#data
+<!doctype html><svg><foreignObject>
+#errors
+(1,38): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg foreignObject>
+|         
+
+#data
+<!doctype html><svg><title>abc
+#errors
+(1,30): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg title>
+|         "abc"
+
+#data
+<option><span><option>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,22): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <option>
+|       <span>
+|         <option>
+
+#data
+<option><option>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <option>
+|     <option>
+
+#data
+<math><annotation-xml><div>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,27): unexpected-html-element-in-foreign-content
+(1,27): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|     <div>
+
+#data
+<math><annotation-xml encoding="application/svg+xml"><div>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,58): unexpected-html-element-in-foreign-content
+(1,58): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         encoding="application/svg+xml"
+|     <div>
+
+#data
+<math><annotation-xml encoding="application/xhtml+xml"><div>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,60): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         encoding="application/xhtml+xml"
+|         <div>
+
+#data
+<math><annotation-xml encoding="aPPlication/xhtmL+xMl"><div>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,60): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         encoding="aPPlication/xhtmL+xMl"
+|         <div>
+
+#data
+<math><annotation-xml encoding="text/html"><div>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,48): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         encoding="text/html"
+|         <div>
+
+#data
+<math><annotation-xml encoding="Text/htmL"><div>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,48): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         encoding="Text/htmL"
+|         <div>
+
+#data
+<math><annotation-xml encoding=" text/html "><div>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,50): unexpected-html-element-in-foreign-content
+(1,50): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         encoding=" text/html "
+|     <div>
+
+#data
+<math><annotation-xml> </annotation-xml>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,40): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         " "
+
+#data
+<math><annotation-xml>c</annotation-xml>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,40): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         "c"
+
+#data
+<math><annotation-xml><!--foo-->
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,32): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         <!-- foo -->
+
+#data
+<math><annotation-xml></svg>x
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,28): unexpected-end-tag
+(1,29): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         "x"
+
+#data
+<math><annotation-xml><svg>x
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,28): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         <svg svg>
+|           "x"
diff --git a/lib/html5lib/tests/testdata/tree-construction/tests21.dat b/lib/html5lib/tests/testdata/tree-construction/tests21.dat
new file mode 100644
index 00000000..1e2af7c1
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/tests21.dat
@@ -0,0 +1,325 @@
+#data
+<svg><![CDATA[foo]]>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,20): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "foo"
+
+#data
+<math><![CDATA[foo]]>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,21): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       "foo"
+
+#data
+<div><![CDATA[foo]]>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,7): expected-dashes-or-doctype
+(1,20): expected-closing-tag-but-got-eof
+#new-errors
+(1:14) cdata-in-html-content
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <!-- [CDATA[foo]] -->
+
+#data
+<svg><![CDATA[foo
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,17): expected-closing-tag-but-got-eof
+#new-errors
+(1:18) eof-in-cdata
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "foo"
+
+#data
+<svg><![CDATA[foo
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,17): expected-closing-tag-but-got-eof
+#new-errors
+(1:18) eof-in-cdata
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "foo"
+
+#data
+<svg><![CDATA[
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,14): expected-closing-tag-but-got-eof
+#new-errors
+(1:15) eof-in-cdata
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+
+#data
+<svg><![CDATA[]]>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,17): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+
+#data
+<svg><![CDATA[]] >]]>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,21): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "]] >"
+
+#data
+<svg><![CDATA[]] >]]>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,21): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "]] >"
+
+#data
+<svg><![CDATA[]]
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,16): expected-closing-tag-but-got-eof
+#new-errors
+(1:17) eof-in-cdata
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "]]"
+
+#data
+<svg><![CDATA[]
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,15): expected-closing-tag-but-got-eof
+#new-errors
+(1:16) eof-in-cdata
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "]"
+
+#data
+<svg><![CDATA[]>a
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,17): expected-closing-tag-but-got-eof
+#new-errors
+(1:18) eof-in-cdata
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "]>a"
+
+#data
+<!DOCTYPE html><svg><![CDATA[foo]]]>
+#errors
+(1,36): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "foo]"
+
+#data
+<!DOCTYPE html><svg><![CDATA[foo]]]]>
+#errors
+(1,37): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "foo]]"
+
+#data
+<!DOCTYPE html><svg><![CDATA[foo]]]]]>
+#errors
+(1,38): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "foo]]]"
+
+#data
+<svg><foreignObject><div><![CDATA[foo]]>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,27): expected-dashes-or-doctype
+(1,40): expected-closing-tag-but-got-eof
+#new-errors
+(1:34) cdata-in-html-content
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg foreignObject>
+|         <div>
+|           <!-- [CDATA[foo]] -->
+
+#data
+<svg><![CDATA[<svg>]]>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,22): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "<svg>"
+
+#data
+<svg><![CDATA[</svg>a]]>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,24): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "</svg>a"
+
+#data
+<svg><![CDATA[<svg>a
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,20): expected-closing-tag-but-got-eof
+#new-errors
+(1:21) eof-in-cdata
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "<svg>a"
+
+#data
+<svg><![CDATA[</svg>a
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,21): expected-closing-tag-but-got-eof
+#new-errors
+(1:22) eof-in-cdata
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "</svg>a"
+
+#data
+<svg><![CDATA[<svg>]]><path>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,28): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "<svg>"
+|       <svg path>
+
+#data
+<svg><![CDATA[<svg>]]></path>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,29): unexpected-end-tag
+(1,29): unexpected-end-tag
+(1,29): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "<svg>"
+
+#data
+<svg><![CDATA[<svg>]]><!--path-->
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,33): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "<svg>"
+|       <!-- path -->
+
+#data
+<svg><![CDATA[<svg>]]>path
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,26): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "<svg>path"
+
+#data
+<svg><![CDATA[<!--svg-->]]>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,27): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "<!--svg-->"
diff --git a/lib/html5lib/tests/testdata/tree-construction/tests22.dat b/lib/html5lib/tests/testdata/tree-construction/tests22.dat
new file mode 100644
index 00000000..31e6d9e3
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/tests22.dat
@@ -0,0 +1,190 @@
+#data
+<a><b><big><em><strong><div>X</a>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,33): adoption-agency-1.3
+(1,33): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       <b>
+|         <big>
+|           <em>
+|             <strong>
+|     <big>
+|       <em>
+|         <strong>
+|           <div>
+|             <a>
+|               "X"
+
+#data
+<a><b><div id=1><div id=2><div id=3><div id=4><div id=5><div id=6><div id=7><div id=8>A</a>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,91): adoption-agency-1.3
+(1,91): adoption-agency-1.3
+(1,91): adoption-agency-1.3
+(1,91): adoption-agency-1.3
+(1,91): adoption-agency-1.3
+(1,91): adoption-agency-1.3
+(1,91): adoption-agency-1.3
+(1,91): adoption-agency-1.3
+(1,91): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       <b>
+|     <b>
+|       <div>
+|         id="1"
+|         <a>
+|         <div>
+|           id="2"
+|           <a>
+|           <div>
+|             id="3"
+|             <a>
+|             <div>
+|               id="4"
+|               <a>
+|               <div>
+|                 id="5"
+|                 <a>
+|                 <div>
+|                   id="6"
+|                   <a>
+|                   <div>
+|                     id="7"
+|                     <a>
+|                     <div>
+|                       id="8"
+|                       <a>
+|                         "A"
+
+#data
+<a><b><div id=1><div id=2><div id=3><div id=4><div id=5><div id=6><div id=7><div id=8><div id=9>A</a>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,101): adoption-agency-1.3
+(1,101): adoption-agency-1.3
+(1,101): adoption-agency-1.3
+(1,101): adoption-agency-1.3
+(1,101): adoption-agency-1.3
+(1,101): adoption-agency-1.3
+(1,101): adoption-agency-1.3
+(1,101): adoption-agency-1.3
+(1,101): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       <b>
+|     <b>
+|       <div>
+|         id="1"
+|         <a>
+|         <div>
+|           id="2"
+|           <a>
+|           <div>
+|             id="3"
+|             <a>
+|             <div>
+|               id="4"
+|               <a>
+|               <div>
+|                 id="5"
+|                 <a>
+|                 <div>
+|                   id="6"
+|                   <a>
+|                   <div>
+|                     id="7"
+|                     <a>
+|                     <div>
+|                       id="8"
+|                       <a>
+|                         <div>
+|                           id="9"
+|                           "A"
+
+#data
+<a><b><div id=1><div id=2><div id=3><div id=4><div id=5><div id=6><div id=7><div id=8><div id=9><div id=10>A</a>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,112): adoption-agency-1.3
+(1,112): adoption-agency-1.3
+(1,112): adoption-agency-1.3
+(1,112): adoption-agency-1.3
+(1,112): adoption-agency-1.3
+(1,112): adoption-agency-1.3
+(1,112): adoption-agency-1.3
+(1,112): adoption-agency-1.3
+(1,112): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       <b>
+|     <b>
+|       <div>
+|         id="1"
+|         <a>
+|         <div>
+|           id="2"
+|           <a>
+|           <div>
+|             id="3"
+|             <a>
+|             <div>
+|               id="4"
+|               <a>
+|               <div>
+|                 id="5"
+|                 <a>
+|                 <div>
+|                   id="6"
+|                   <a>
+|                   <div>
+|                     id="7"
+|                     <a>
+|                     <div>
+|                       id="8"
+|                       <a>
+|                         <div>
+|                           id="9"
+|                           <div>
+|                             id="10"
+|                             "A"
+
+#data
+<cite><b><cite><i><cite><i><cite><i><div>X</b>TEST
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,46): adoption-agency-1.3
+(1,50): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <cite>
+|       <b>
+|         <cite>
+|           <i>
+|             <cite>
+|               <i>
+|                 <cite>
+|                   <i>
+|       <i>
+|         <i>
+|           <div>
+|             <b>
+|               "X"
+|             "TEST"
diff --git a/lib/html5lib/tests/testdata/tree-construction/tests23.dat b/lib/html5lib/tests/testdata/tree-construction/tests23.dat
new file mode 100644
index 00000000..49e4a4ac
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/tests23.dat
@@ -0,0 +1,168 @@
+#data
+<font size=4><font color=red><font size=4><font size=4><font size=4><font size=4><font size=4><font color=red>X
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,116): unexpected-end-tag
+(1,117): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+|       <font>
+|         size="4"
+|         <font>
+|           color="red"
+|           <font>
+|             size="4"
+|             <font>
+|               size="4"
+|               <font>
+|                 size="4"
+|                 <font>
+|                   size="4"
+|                   <font>
+|                     size="4"
+|                     <font>
+|                       color="red"
+|     
+|       <font>
+|         color="red"
+|         <font>
+|           size="4"
+|           <font>
+|             size="4"
+|             <font>
+|               size="4"
+|               <font>
+|                 color="red"
+|                 "X"
+
+#data
+<font size=4><font size=4><font size=4><font size=4>X
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,58): unexpected-end-tag
+(1,59): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+|       <font>
+|         size="4"
+|         <font>
+|           size="4"
+|           <font>
+|             size="4"
+|             <font>
+|               size="4"
+|     
+|       <font>
+|         size="4"
+|         <font>
+|           size="4"
+|           <font>
+|             size="4"
+|             "X"
+
+#data
+<font size=4><font size=4><font size=4><font size="5"><font size=4>X
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,73): unexpected-end-tag
+(1,74): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+|       <font>
+|         size="4"
+|         <font>
+|           size="4"
+|           <font>
+|             size="4"
+|             <font>
+|               size="5"
+|               <font>
+|                 size="4"
+|     
+|       <font>
+|         size="4"
+|         <font>
+|           size="4"
+|           <font>
+|             size="5"
+|             <font>
+|               size="4"
+|               "X"
+
+#data
+<font size=4 id=a><font size=4 id=b><font size=4><font size=4>X
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,68): unexpected-end-tag
+(1,69): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+|       <font>
+|         id="a"
+|         size="4"
+|         <font>
+|           id="b"
+|           size="4"
+|           <font>
+|             size="4"
+|             <font>
+|               size="4"
+|     
+|       <font>
+|         id="a"
+|         size="4"
+|         <font>
+|           id="b"
+|           size="4"
+|           <font>
+|             size="4"
+|             <font>
+|               size="4"
+|               "X"
+
+#data
+<b id=a><b id=a><b id=a><b><object><b id=a><b id=a>X</object>Y
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,64): end-tag-too-early
+(1,67): unexpected-end-tag
+(1,68): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+|       <b>
+|         id="a"
+|         <b>
+|           id="a"
+|           <b>
+|             id="a"
+|             <b>
+|               <object>
+|                 <b>
+|                   id="a"
+|                   <b>
+|                     id="a"
+|                     "X"
+|     
+|       <b>
+|         id="a"
+|         <b>
+|           id="a"
+|           <b>
+|             id="a"
+|             <b>
+|               "Y"
diff --git a/lib/html5lib/tests/testdata/tree-construction/tests24.dat b/lib/html5lib/tests/testdata/tree-construction/tests24.dat
new file mode 100644
index 00000000..f6dc7eb4
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/tests24.dat
@@ -0,0 +1,79 @@
+#data
+<!DOCTYPE html>&NotEqualTilde;
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "≂̸"
+
+#data
+<!DOCTYPE html>&NotEqualTilde;A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "≂̸A"
+
+#data
+<!DOCTYPE html>&ThickSpace;
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "  "
+
+#data
+<!DOCTYPE html>&ThickSpace;A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "  A"
+
+#data
+<!DOCTYPE html>&NotSubset;
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "⊂⃒"
+
+#data
+<!DOCTYPE html>&NotSubset;A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "⊂⃒A"
+
+#data
+<!DOCTYPE html>&Gopf;
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "𝔾"
+
+#data
+<!DOCTYPE html>&Gopf;A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "𝔾A"
diff --git a/lib/html5lib/tests/testdata/tree-construction/tests25.dat b/lib/html5lib/tests/testdata/tree-construction/tests25.dat
new file mode 100644
index 00000000..5bb369b2
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/tests25.dat
@@ -0,0 +1,288 @@
+#data
+<!DOCTYPE html><body><foo>A
+#errors
+(1,27): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <foo>
+|       "A"
+
+#data
+<!DOCTYPE html><body><area>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <area>
+|     "A"
+
+#data
+<!DOCTYPE html><body><base>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <base>
+|     "A"
+
+#data
+<!DOCTYPE html><body><basefont>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <basefont>
+|     "A"
+
+#data
+<!DOCTYPE html><body><bgsound>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <bgsound>
+|     "A"
+
+#data
+<!DOCTYPE html><body>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|     "A"
+
+#data
+<!DOCTYPE html><body><col>A
+#errors
+(1,26): unexpected-start-tag-ignored
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "A"
+
+#data
+<!DOCTYPE html><body><command>A
+#errors
+eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <command>
+|       "A"
+
+#data
+<!DOCTYPE html><body><embed>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <embed>
+|     "A"
+
+#data
+<!DOCTYPE html><body><frame>A
+#errors
+(1,28): unexpected-start-tag-ignored
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "A"
+
+#data
+<!DOCTYPE html><body><hr>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <hr>
+|     "A"
+
+#data
+<!DOCTYPE html><body><img>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <img>
+|     "A"
+
+#data
+<!DOCTYPE html><body><input>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <input>
+|     "A"
+
+#data
+<!DOCTYPE html><body><keygen>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <keygen>
+|     "A"
+
+#data
+<!DOCTYPE html><keygen>A</keygen>B
+#errors
+33: Stray end tag “keygen”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <keygen>
+|     "AB"
+
+#data
+</keygen>A
+#errors
+9: End tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
+9: Stray end tag “keygen”.
+#document
+| <html>
+|   <head>
+|   <body>
+|     "A"
+
+#data
+<!DOCTYPE html></keygen>A
+#errors
+24: Stray end tag “keygen”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "A"
+
+#data
+<!DOCTYPE html><head></keygen>A
+#errors
+30: Stray end tag “keygen”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "A"
+
+#data
+<!DOCTYPE html><head></head></keygen>A
+#errors
+30: Stray end tag “keygen”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "A"
+
+#data
+<!DOCTYPE html><body></keygen>A
+#errors
+30: Stray end tag “keygen”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "A"
+
+#data
+<!DOCTYPE html><body><link>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <link>
+|     "A"
+
+#data
+<!DOCTYPE html><body><meta>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <meta>
+|     "A"
+
+#data
+<!DOCTYPE html><body><param>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <param>
+|     "A"
+
+#data
+<!DOCTYPE html><body><source>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <source>
+|     "A"
+
+#data
+<!DOCTYPE html><body><track>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <track>
+|     "A"
+
+#data
+<!DOCTYPE html><body><wbr>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <wbr>
+|     "A"
diff --git a/lib/html5lib/tests/testdata/tree-construction/tests26.dat b/lib/html5lib/tests/testdata/tree-construction/tests26.dat
new file mode 100644
index 00000000..de453b9c
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/tests26.dat
@@ -0,0 +1,393 @@
+#data
+<!DOCTYPE html><body><a href='#1'><nobr>1<nobr></a><a href='#2'><nobr>2<nobr></a><a href='#3'><nobr>3<nobr></a>
+#errors
+(1,47): unexpected-start-tag-implies-end-tag
+(1,51): adoption-agency-1.3
+(1,74): unexpected-start-tag-implies-end-tag
+(1,74): adoption-agency-1.3
+(1,81): unexpected-start-tag-implies-end-tag
+(1,85): adoption-agency-1.3
+(1,108): unexpected-start-tag-implies-end-tag
+(1,108): adoption-agency-1.3
+(1,115): unexpected-start-tag-implies-end-tag
+(1,119): adoption-agency-1.3
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       href="#1"
+|       <nobr>
+|         "1"
+|       <nobr>
+|     <nobr>
+|       
+|       <a>
+|         href="#2"
+|     <a>
+|       href="#2"
+|       <nobr>
+|         "2"
+|       <nobr>
+|     <nobr>
+|       
+|       <a>
+|         href="#3"
+|     <a>
+|       href="#3"
+|       <nobr>
+|         "3"
+|       <nobr>
+
+#data
+<!DOCTYPE html><body><b><nobr>1<nobr></b><i><nobr>2<nobr></i>3
+#errors
+(1,37): unexpected-start-tag-implies-end-tag
+(1,41): adoption-agency-1.3
+(1,50): unexpected-start-tag-implies-end-tag
+(1,50): adoption-agency-1.3
+(1,57): unexpected-start-tag-implies-end-tag
+(1,61): adoption-agency-1.3
+(1,62): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <nobr>
+|         "1"
+|       <nobr>
+|     <nobr>
+|       <i>
+|     <i>
+|       <nobr>
+|         "2"
+|       <nobr>
+|     <nobr>
+|       "3"
+
+#data
+<!DOCTYPE html><body><b><nobr>1<table><nobr></b><i><nobr>2<nobr></i>3
+#errors
+(1,44): foster-parenting-start-tag
+(1,48): foster-parenting-end-tag
+(1,48): adoption-agency-1.3
+(1,51): foster-parenting-start-tag
+(1,57): foster-parenting-start-tag
+(1,57): nobr-already-in-scope
+(1,57): adoption-agency-1.2
+(1,58): foster-parenting-character
+(1,64): foster-parenting-start-tag
+(1,64): nobr-already-in-scope
+(1,68): foster-parenting-end-tag
+(1,68): adoption-agency-1.2
+(1,69): foster-parenting-character
+(1,69): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <nobr>
+|         "1"
+|         <nobr>
+|           <i>
+|         <i>
+|           <nobr>
+|             "2"
+|           <nobr>
+|         <nobr>
+|           "3"
+|         <table>
+
+#data
+<!DOCTYPE html><body><b><nobr>1<table><tr><td><nobr></b><i><nobr>2<nobr></i>3
+#errors
+(1,56): unexpected-end-tag
+(1,65): unexpected-start-tag-implies-end-tag
+(1,65): adoption-agency-1.3
+(1,72): unexpected-start-tag-implies-end-tag
+(1,76): adoption-agency-1.3
+(1,77): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <nobr>
+|         "1"
+|         <table>
+|           <tbody>
+|             <tr>
+|               <td>
+|                 <nobr>
+|                   <i>
+|                 <i>
+|                   <nobr>
+|                     "2"
+|                   <nobr>
+|                 <nobr>
+|                   "3"
+
+#data
+<!DOCTYPE html><body><b><nobr>1<div><nobr></b><i><nobr>2<nobr></i>3
+#errors
+(1,42): unexpected-start-tag-implies-end-tag
+(1,42): adoption-agency-1.3
+(1,46): adoption-agency-1.3
+(1,46): adoption-agency-1.3
+(1,55): unexpected-start-tag-implies-end-tag
+(1,55): adoption-agency-1.3
+(1,62): unexpected-start-tag-implies-end-tag
+(1,66): adoption-agency-1.3
+(1,67): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <nobr>
+|         "1"
+|     <div>
+|       <b>
+|         <nobr>
+|         <nobr>
+|       <nobr>
+|         <i>
+|       <i>
+|         <nobr>
+|           "2"
+|         <nobr>
+|       <nobr>
+|         "3"
+
+#data
+<!DOCTYPE html><body><b><nobr>1<nobr></b><div><i><nobr>2<nobr></i>3
+#errors
+(1,37): unexpected-start-tag-implies-end-tag
+(1,41): adoption-agency-1.3
+(1,55): unexpected-start-tag-implies-end-tag
+(1,55): adoption-agency-1.3
+(1,62): unexpected-start-tag-implies-end-tag
+(1,66): adoption-agency-1.3
+(1,67): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <nobr>
+|         "1"
+|       <nobr>
+|     <div>
+|       <nobr>
+|         <i>
+|       <i>
+|         <nobr>
+|           "2"
+|         <nobr>
+|       <nobr>
+|         "3"
+
+#data
+<!DOCTYPE html><body><b><nobr>1<nobr><ins></b><i><nobr>
+#errors
+(1,37): unexpected-start-tag-implies-end-tag
+(1,46): adoption-agency-1.3
+(1,55): unexpected-start-tag-implies-end-tag
+(1,55): adoption-agency-1.3
+(1,55): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <nobr>
+|         "1"
+|       <nobr>
+|         <ins>
+|     <nobr>
+|       <i>
+|     <i>
+|       <nobr>
+
+#data
+<!DOCTYPE html><body><b><nobr>1<ins><nobr></b><i>2
+#errors
+(1,42): unexpected-start-tag-implies-end-tag
+(1,42): adoption-agency-1.3
+(1,46): adoption-agency-1.3
+(1,50): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <nobr>
+|         "1"
+|         <ins>
+|       <nobr>
+|     <nobr>
+|       <i>
+|         "2"
+
+#data
+<!DOCTYPE html><body><b>1<nobr></b><i><nobr>2</i>
+#errors
+(1,35): adoption-agency-1.3
+(1,44): unexpected-start-tag-implies-end-tag
+(1,44): adoption-agency-1.3
+(1,49): adoption-agency-1.3
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       "1"
+|       <nobr>
+|     <nobr>
+|       <i>
+|     <i>
+|       <nobr>
+|         "2"
+
+#data
+<code x</code>
+
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,11): invalid-character-in-attribute-name
+(1,12): unexpected-character-after-solidus-in-tag
+(1,21): unexpected-end-tag
+(2,0): expected-closing-tag-but-got-eof
+#new-errors
+(1:11) unexpected-character-in-attribute-name
+(1:13) unexpected-solidus-in-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+|       <code>
+|         code=""
+|         x<=""
+|     <code>
+|       code=""
+|       x<=""
+|       "
+"
+
+#data
+<!DOCTYPE html><svg><foreignObject><i>a
+#errors
+(1,45): unexpected-end-tag
+(1,46): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg foreignObject>
+|         
+|           <i>
+|         <i>
+|           "a"
+
+#data
+<!DOCTYPE html><table><tr><td><svg><foreignObject><i>a
+#errors
+(1,60): unexpected-end-tag
+(1,61): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <svg svg>
+|               <svg foreignObject>
+|                 
+|                   <i>
+|                 <i>
+|                   "a"
+
+#data
+<!DOCTYPE html><math><mtext><i>a
+#errors
+(1,38): unexpected-end-tag
+(1,39): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mtext>
+|         
+|           <i>
+|         <i>
+|           "a"
+
+#data
+<!DOCTYPE html><table><tr><td><math><mtext><i>a
+#errors
+(1,53): unexpected-end-tag
+(1,54): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <math math>
+|               <math mtext>
+|                 
+|                   <i>
+|                 <i>
+|                   "a"
+
+#data
+<!DOCTYPE html><body><div><!/div>a
+#errors
+(1,28): expected-dashes-or-doctype
+(1,34): expected-closing-tag-but-got-eof
+#new-errors
+(1:29) incorrectly-opened-comment
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <!-- /div -->
+|       "a"
+
+#data
+<button><button>
+#errors
+Line 1 Col 8 Unexpected start tag (button). Expected DOCTYPE.
+Line 1 Col 19 Unexpected start tag (button) implies end tag (button).
+Line 1 Col 19 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <button>
+|       
+|     <button>
diff --git a/lib/html5lib/tests/testdata/tree-construction/tests3.dat b/lib/html5lib/tests/testdata/tree-construction/tests3.dat
new file mode 100644
index 00000000..c7583d99
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/tests3.dat
@@ -0,0 +1,305 @@
+#data
+<head></head><style></style>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,20): unexpected-start-tag-out-of-my-head
+#document
+| <html>
+|   <head>
+|     <style>
+|   <body>
+
+#data
+<head></head><script></script>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,21): unexpected-start-tag-out-of-my-head
+#document
+| <html>
+|   <head>
+|     <script>
+|   <body>
+
+#data
+<head></head><!-- --><style></style><!-- --><script></script>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,28): unexpected-start-tag-out-of-my-head
+(1,52): unexpected-start-tag-out-of-my-head
+#document
+| <html>
+|   <head>
+|     <style>
+|     <script>
+|   <!--   -->
+|   <!--   -->
+|   <body>
+
+#data
+<head></head><!-- -->x<style></style><!-- --><script></script>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <!--   -->
+|   <body>
+|     "x"
+|     <style>
+|     <!--   -->
+|     <script>
+
+#data
+<!DOCTYPE html><html><head></head><body><pre>
+</pre></body></html>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <pre>
+
+#data
+<!DOCTYPE html><html><head></head><body><pre>
+foo</pre></body></html>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <pre>
+|       "foo"
+
+#data
+<!DOCTYPE html><html><head></head><body><pre>
+
+foo</pre></body></html>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <pre>
+|       "
+foo"
+
+#data
+<!DOCTYPE html><html><head></head><body><pre>
+foo
+</pre></body></html>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <pre>
+|       "foo
+"
+
+#data
+<!DOCTYPE html><html><head></head><body><pre>x</pre><span>
+</span></body></html>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <pre>
+|       "x"
+|     <span>
+|       "
+"
+
+#data
+<!DOCTYPE html><html><head></head><body><pre>x
+y</pre></body></html>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <pre>
+|       "x
+y"
+
+#data
+<!DOCTYPE html><html><head></head><body><pre>x<div>
+y</pre></body></html>
+#errors
+(2,7): end-tag-too-early
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <pre>
+|       "x"
+|       <div>
+|         "
+y"
+
+#data
+<!DOCTYPE html><pre>&#x0a;&#x0a;A</pre>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <pre>
+|       "
+A"
+
+#data
+<!DOCTYPE html><HTML><META><HEAD></HEAD></HTML>
+#errors
+(1,33): two-heads-are-not-better-than-one
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <meta>
+|   <body>
+
+#data
+<!DOCTYPE html><HTML><HEAD><head></HEAD></HTML>
+#errors
+(1,33): two-heads-are-not-better-than-one
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+
+#data
+<textarea>foo<span>bar</span><i>baz
+#errors
+(1,10): expected-doctype-but-got-start-tag
+(1,35): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "foo<span>bar</span><i>baz"
+
+#data
+<title>foo<span>bar</em><i>baz
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,30): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <title>
+|       "foo<span>bar</em><i>baz"
+|   <body>
+
+#data
+<!DOCTYPE html><textarea>
+</textarea>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+
+#data
+<!DOCTYPE html><textarea>
+foo</textarea>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "foo"
+
+#data
+<!DOCTYPE html><textarea>
+
+foo</textarea>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "
+foo"
+
+#data
+<!DOCTYPE html><html><head></head><body><ul><li><div><li></ul></body></html>
+#errors
+(1,60): end-tag-too-early
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <ul>
+|       <li>
+|         <div>
+|           
+|       <li>
+
+#data
+<!doctype html><nobr><nobr><nobr>
+#errors
+(1,27): unexpected-start-tag-implies-end-tag
+(1,33): unexpected-start-tag-implies-end-tag
+(1,33): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <nobr>
+|     <nobr>
+|     <nobr>
+
+#data
+<!doctype html><nobr><nobr></nobr><nobr>
+#errors
+(1,27): unexpected-start-tag-implies-end-tag
+(1,40): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <nobr>
+|     <nobr>
+|     <nobr>
+
+#data
+<!doctype html><html><body><table></table></body></html>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|     <table>
+
+#data
+<table></table>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+|       <table>
diff --git a/lib/html5lib/tests/testdata/tree-construction/tests4.dat b/lib/html5lib/tests/testdata/tree-construction/tests4.dat
new file mode 100644
index 00000000..0a6174c3
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/tests4.dat
@@ -0,0 +1,58 @@
+#data
+direct div content
+#errors
+#document-fragment
+div
+#document
+| "direct div content"
+
+#data
+direct textarea content
+#errors
+#document-fragment
+textarea
+#document
+| "direct textarea content"
+
+#data
+textarea content with <em>pseudo</em> <foo>markup
+#errors
+#document-fragment
+textarea
+#document
+| "textarea content with <em>pseudo</em> <foo>markup"
+
+#data
+this is &#x0043;DATA inside a <style> element
+#errors
+#document-fragment
+style
+#document
+| "this is &#x0043;DATA inside a <style> element"
+
+#data
+</plaintext>
+#errors
+#document-fragment
+plaintext
+#document
+| "</plaintext>"
+
+#data
+setting html's innerHTML
+#errors
+#document-fragment
+html
+#document
+| <head>
+| <body>
+|   "setting html's innerHTML"
+
+#data
+<title>setting head's innerHTML</title>
+#errors
+#document-fragment
+head
+#document
+| <title>
+|   "setting head's innerHTML"
diff --git a/lib/html5lib/tests/testdata/tree-construction/tests5.dat b/lib/html5lib/tests/testdata/tree-construction/tests5.dat
new file mode 100644
index 00000000..1ef8cae4
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/tests5.dat
@@ -0,0 +1,210 @@
+#data
+<style> <!-- </style>x
+#errors
+(1,7): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <style>
+|       " <!-- "
+|   <body>
+|     "x"
+
+#data
+<style> <!-- </style> --> </style>x
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,34): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|     <style>
+|       " <!-- "
+|     " "
+|   <body>
+|     "--> x"
+
+#data
+<style> <!--> </style>x
+#errors
+(1,7): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <style>
+|       " <!--> "
+|   <body>
+|     "x"
+
+#data
+<style> <!---> </style>x
+#errors
+(1,7): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <style>
+|       " <!---> "
+|   <body>
+|     "x"
+
+#data
+<iframe> <!---> </iframe>x
+#errors
+(1,8): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <iframe>
+|       " <!---> "
+|     "x"
+
+#data
+<iframe> <!--- </iframe>->x</iframe> --> </iframe>x
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,36): unexpected-end-tag
+(1,50): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <iframe>
+|       " <!--- "
+|     "->x --> x"
+
+#data
+<script> <!-- </script> --> </script>x
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,37): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|     <script>
+|       " <!-- "
+|     " "
+|   <body>
+|     "--> x"
+
+#data
+<title> <!-- </title> --> </title>x
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,34): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|     <title>
+|       " <!-- "
+|     " "
+|   <body>
+|     "--> x"
+
+#data
+<textarea> <!--- </textarea>->x</textarea> --> </textarea>x
+#errors
+(1,10): expected-doctype-but-got-start-tag
+(1,42): unexpected-end-tag
+(1,58): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       " <!--- "
+|     "->x --> x"
+
+#data
+<style> <!</-- </style>x
+#errors
+(1,7): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <style>
+|       " <!</-- "
+|   <body>
+|     "x"
+
+#data
+<xmp></xmp>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+|     <xmp>
+
+#data
+<xmp> <!-- > --> </xmp>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <xmp>
+|       " <!-- > --> "
+
+#data
+<title>&amp;</title>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <title>
+|       "&"
+|   <body>
+
+#data
+<title><!--&amp;--></title>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <title>
+|       "<!--&-->"
+|   <body>
+
+#data
+<title><!--</title>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <title>
+|       "<!--"
+|   <body>
+
+#data
+<noscript><!--</noscript>--></noscript>
+#errors
+(1,10): expected-doctype-but-got-start-tag
+(1,39): unexpected-end-tag
+#script-on
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       "<!--"
+|   <body>
+|     "-->"
+
+#data
+<noscript><!--</noscript>--></noscript>
+#errors
+(1,10): expected-doctype-but-got-start-tag
+#script-off
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       <!-- </noscript> -->
+|   <body>
diff --git a/lib/html5lib/tests/testdata/tree-construction/tests6.dat b/lib/html5lib/tests/testdata/tree-construction/tests6.dat
new file mode 100644
index 00000000..f3991232
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/tests6.dat
@@ -0,0 +1,663 @@
+#data
+<!doctype html></head> <head>
+#errors
+(1,29): unexpected-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   " "
+|   <body>
+
+#data
+<!doctype html><form><div></form><div>
+#errors
+(1,33): end-tag-too-early-ignored
+(1,38): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <form>
+|       <div>
+|         <div>
+
+#data
+<!doctype html><title>&amp;</title>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <title>
+|       "&"
+|   <body>
+
+#data
+<!doctype html><title><!--&amp;--></title>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <title>
+|       "<!--&-->"
+|   <body>
+
+#data
+<!doctype>
+#errors
+(1,9): need-space-after-doctype
+(1,10): expected-doctype-name-but-got-right-bracket
+(1,10): unknown-doctype
+#new-errors
+(1:10) missing-doctype-name
+#document
+| <!DOCTYPE >
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!---x
+#errors
+(1,6): eof-in-comment
+(1,6): expected-doctype-but-got-eof
+#new-errors
+(1:7) eof-in-comment
+#document
+| <!-- -x -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<body>
+<div>
+#errors
+(1,6): unexpected-start-tag
+(2,5): expected-closing-tag-but-got-eof
+#document-fragment
+div
+#document
+| "
+"
+| <div>
+
+#data
+<frameset></frameset>
+foo
+#errors
+(1,10): expected-doctype-but-got-start-tag
+(2,1): unexpected-char-after-frameset
+(2,2): unexpected-char-after-frameset
+(2,3): unexpected-char-after-frameset
+#document
+| <html>
+|   <head>
+|   <frameset>
+|   "
+"
+
+#data
+<frameset></frameset>
+<noframes>
+#errors
+(1,10): expected-doctype-but-got-start-tag
+(2,10): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <frameset>
+|   "
+"
+|   <noframes>
+
+#data
+<frameset></frameset>
+<div>
+#errors
+(1,10): expected-doctype-but-got-start-tag
+(2,5): unexpected-start-tag-after-frameset
+#document
+| <html>
+|   <head>
+|   <frameset>
+|   "
+"
+
+#data
+<frameset></frameset>
+</html>
+#errors
+(1,10): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <frameset>
+|   "
+"
+
+#data
+<frameset></frameset>
+</div>
+#errors
+(1,10): expected-doctype-but-got-start-tag
+(2,6): unexpected-end-tag-after-frameset
+#document
+| <html>
+|   <head>
+|   <frameset>
+|   "
+"
+
+#data
+<form><form>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,12): unexpected-start-tag
+(1,12): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <form>
+
+#data
+<button><button>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,16): unexpected-start-tag-implies-end-tag
+(1,16): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <button>
+|     <button>
+
+#data
+<table><tr><td></th>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,20): unexpected-end-tag
+(1,20): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+
+#data
+<table><caption><td>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,20): unexpected-cell-in-table-body
+(1,20): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|       <tbody>
+|         <tr>
+|           <td>
+
+#data
+<table><caption><div>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,21): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <div>
+
+#data
+</caption><div>
+#errors
+(1,10): XXX-undefined-error
+(1,15): expected-closing-tag-but-got-eof
+#document-fragment
+caption
+#document
+| <div>
+
+#data
+<table><caption><div></caption>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,31): expected-one-end-tag-but-got-another
+(1,31): eof-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <div>
+
+#data
+<table><caption></table>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+
+#data
+</table><div>
+#errors
+(1,8): unexpected-end-tag
+(1,13): expected-closing-tag-but-got-eof
+#document-fragment
+caption
+#document
+| <div>
+
+#data
+<table><caption></body></col></colgroup></html></tbody></td></tfoot></th></thead></tr>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,23): unexpected-end-tag
+(1,29): unexpected-end-tag
+(1,40): unexpected-end-tag
+(1,47): unexpected-end-tag
+(1,55): unexpected-end-tag
+(1,60): unexpected-end-tag
+(1,68): unexpected-end-tag
+(1,73): unexpected-end-tag
+(1,81): unexpected-end-tag
+(1,86): unexpected-end-tag
+(1,86): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+
+#data
+<table><caption><div></div>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,27): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <div>
+
+#data
+<table><tr><td></body></caption></col></colgroup></html>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,22): unexpected-end-tag
+(1,32): unexpected-end-tag
+(1,38): unexpected-end-tag
+(1,49): unexpected-end-tag
+(1,56): unexpected-end-tag
+(1,56): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+
+#data
+</table></tbody></tfoot></thead></tr><div>
+#errors
+(1,8): unexpected-end-tag
+(1,16): unexpected-end-tag
+(1,24): unexpected-end-tag
+(1,32): unexpected-end-tag
+(1,37): unexpected-end-tag
+(1,42): expected-closing-tag-but-got-eof
+#document-fragment
+td
+#document
+| <div>
+
+#data
+<table><colgroup>foo
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,18): foster-parenting-character-in-table
+(1,19): foster-parenting-character-in-table
+(1,20): foster-parenting-character-in-table
+(1,20): eof-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     "foo"
+|     <table>
+|       <colgroup>
+
+#data
+foo<col>
+#errors
+(1,1): unexpected-character-in-colgroup
+(1,2): unexpected-character-in-colgroup
+(1,3): unexpected-character-in-colgroup
+#document-fragment
+colgroup
+#document
+| <col>
+
+#data
+<table><colgroup></col>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,23): no-end-tag
+(1,23): eof-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <colgroup>
+
+#data
+<frameset><div>
+#errors
+(1,10): expected-doctype-but-got-start-tag
+(1,15): unexpected-start-tag-in-frameset
+(1,15): eof-in-frameset
+#document
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+</frameset><frame>
+#errors
+(1,11): unexpected-frameset-in-frameset-innerhtml
+#document-fragment
+frameset
+#document
+| <frame>
+
+#data
+<frameset></div>
+#errors
+(1,10): expected-doctype-but-got-start-tag
+(1,16): unexpected-end-tag-in-frameset
+(1,16): eof-in-frameset
+#document
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+</body><div>
+#errors
+(1,7): unexpected-close-tag
+(1,12): expected-closing-tag-but-got-eof
+#document-fragment
+body
+#document
+| <div>
+
+#data
+<table><tr><div>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,16): unexpected-start-tag-implies-table-voodoo
+(1,16): eof-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+</tr><td>
+#errors
+(1,5): unexpected-end-tag
+#document-fragment
+tr
+#document
+| <td>
+
+#data
+</tbody></tfoot></thead><td>
+#errors
+(1,8): unexpected-end-tag
+(1,16): unexpected-end-tag
+(1,24): unexpected-end-tag
+#document-fragment
+tr
+#document
+| <td>
+
+#data
+<table><tr><div><td>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,16): foster-parenting-start-tag
+(1,20): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+
+#data
+<caption><col><colgroup><tbody><tfoot><thead><tr>
+#errors
+(1,9): unexpected-start-tag
+(1,14): unexpected-start-tag
+(1,24): unexpected-start-tag
+(1,31): unexpected-start-tag
+(1,38): unexpected-start-tag
+(1,45): unexpected-start-tag
+#document-fragment
+tbody
+#document
+| <tr>
+
+#data
+<table><tbody></thead>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,22): unexpected-end-tag-in-table-body
+(1,22): eof-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+
+#data
+</table><tr>
+#errors
+(1,8): unexpected-end-tag
+#document-fragment
+tbody
+#document
+| <tr>
+
+#data
+<table><tbody></body></caption></col></colgroup></html></td></th></tr>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,21): unexpected-end-tag-in-table-body
+(1,31): unexpected-end-tag-in-table-body
+(1,37): unexpected-end-tag-in-table-body
+(1,48): unexpected-end-tag-in-table-body
+(1,55): unexpected-end-tag-in-table-body
+(1,60): unexpected-end-tag-in-table-body
+(1,65): unexpected-end-tag-in-table-body
+(1,70): unexpected-end-tag-in-table-body
+(1,70): eof-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+
+#data
+<table><tbody></div>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,20): unexpected-end-tag-implies-table-voodoo
+(1,20): end-tag-too-early
+(1,20): eof-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+
+#data
+<table><table>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,14): unexpected-start-tag-implies-end-tag
+(1,14): eof-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|     <table>
+
+#data
+<table></body></caption></col></colgroup></html></tbody></td></tfoot></th></thead></tr>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,14): unexpected-end-tag
+(1,24): unexpected-end-tag
+(1,30): unexpected-end-tag
+(1,41): unexpected-end-tag
+(1,48): unexpected-end-tag
+(1,56): unexpected-end-tag
+(1,61): unexpected-end-tag
+(1,69): unexpected-end-tag
+(1,74): unexpected-end-tag
+(1,82): unexpected-end-tag
+(1,87): unexpected-end-tag
+(1,87): eof-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+
+#data
+</table><tr>
+#errors
+(1,8): unexpected-end-tag
+#document-fragment
+table
+#document
+| <tbody>
+|   <tr>
+
+#data
+<body></body></html>
+#errors
+(1,20): unexpected-end-tag-after-body-innerhtml
+#document-fragment
+html
+#document
+| <head>
+| <body>
+
+#data
+<html><frameset></frameset></html> 
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <frameset>
+|   " "
+
+#data
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html></html>
+#errors
+#document
+| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "">
+| <html>
+|   <head>
+|   <body>
+
+#data
+<param><frameset></frameset>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,17): unexpected-start-tag
+#document
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<source><frameset></frameset>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,18): unexpected-start-tag
+#document
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<track><frameset></frameset>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,17): unexpected-start-tag
+#document
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+</html><frameset></frameset>
+#errors
+(1,7): expected-doctype-but-got-end-tag
+(1,17): expected-eof-but-got-start-tag
+(1,17): unexpected-start-tag
+#document
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+</body><frameset></frameset>
+#errors
+(1,7): expected-doctype-but-got-end-tag
+(1,17): unexpected-start-tag-after-body
+(1,17): unexpected-start-tag
+#document
+| <html>
+|   <head>
+|   <frameset>
diff --git a/lib/html5lib/tests/testdata/tree-construction/tests7.dat b/lib/html5lib/tests/testdata/tree-construction/tests7.dat
new file mode 100644
index 00000000..395dc72b
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/tests7.dat
@@ -0,0 +1,418 @@
+#data
+<!doctype html><body><title>X</title>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <title>
+|       "X"
+
+#data
+<!doctype html><table><title>X</title></table>
+#errors
+(1,29): unexpected-start-tag-implies-table-voodoo
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <title>
+|       "X"
+|     <table>
+
+#data
+<!doctype html><head></head><title>X</title>
+#errors
+(1,35): unexpected-start-tag-out-of-my-head
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <title>
+|       "X"
+|   <body>
+
+#data
+<!doctype html></head><title>X</title>
+#errors
+(1,29): unexpected-start-tag-out-of-my-head
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <title>
+|       "X"
+|   <body>
+
+#data
+<!doctype html><table><meta></table>
+#errors
+(1,28): unexpected-start-tag-implies-table-voodoo
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <meta>
+|     <table>
+
+#data
+<!doctype html><table>X<tr><td><table> <meta></table></table>
+#errors
+unexpected text in table
+(1,45): unexpected-start-tag-implies-table-voodoo
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "X"
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <meta>
+|             <table>
+|               " "
+
+#data
+<!doctype html><html> <head>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!doctype html> <head>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!doctype html><table><style> <tr>x </style> </table>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <style>
+|         " <tr>x "
+|       " "
+
+#data
+<!doctype html><table><TBODY><script> <tr>x </script> </table>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <script>
+|           " <tr>x "
+|         " "
+
+#data
+<!doctype html><applet>X</applet>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <applet>
+|         
+|           "X"
+
+#data
+<!doctype html><object type="application/x-non-existant-plugin">X</object>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <object>
+|         type="application/x-non-existant-plugin"
+|         
+|           "X"
+
+#data
+<!doctype html><listing>
+X</listing>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <listing>
+|       "X"
+
+#data
+<!doctype html><select><input>X
+#errors
+(1,30): unexpected-input-in-select
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|     <input>
+|     "X"
+
+#data
+<!doctype html><select><select>X
+#errors
+(1,31): unexpected-select-in-select
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|     "X"
+
+#data
+<!doctype html><table><input type=hidDEN></table>
+#errors
+(1,41): unexpected-hidden-input-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <input>
+|         type="hidDEN"
+
+#data
+<!doctype html><table>X<input type=hidDEN></table>
+#errors
+(1,23): foster-parenting-character
+(1,42): unexpected-hidden-input-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "X"
+|     <table>
+|       <input>
+|         type="hidDEN"
+
+#data
+<!doctype html><table>  <input type=hidDEN></table>
+#errors
+(1,43): unexpected-hidden-input-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       "  "
+|       <input>
+|         type="hidDEN"
+
+#data
+<!doctype html><table>  <input type='hidDEN'></table>
+#errors
+(1,45): unexpected-hidden-input-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       "  "
+|       <input>
+|         type="hidDEN"
+
+#data
+<!doctype html><table><input type=" hidden"><input type=hidDEN></table>
+#errors
+(1,44): unexpected-start-tag-implies-table-voodoo
+(1,63): unexpected-hidden-input-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <input>
+|       type=" hidden"
+|     <table>
+|       <input>
+|         type="hidDEN"
+
+#data
+<!doctype html><table><select>X<tr>
+#errors
+(1,30): unexpected-start-tag-implies-table-voodoo
+(1,35): unexpected-table-element-start-tag-in-select-in-table
+(1,35): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       "X"
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+<!doctype html><select>X</select>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       "X"
+
+#data
+<!DOCTYPE hTmL><html></html>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE HTML><html></html>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+
+#data
+<body>X</body></body>
+#errors
+(1,21): unexpected-end-tag-after-body
+#document-fragment
+html
+#document
+| <head>
+| <body>
+|   "X"
+
+#data
+<div>a</x> b
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,13): unexpected-end-tag
+(1,15): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       
+|         "a b"
+
+#data
+<table><tr><td><code></code> </table>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <code>
+|             " "
+
+#data
+<table><b><tr><td>aaa</td></tr>bbb</table>ccc
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,10): foster-parenting-start-tag
+(1,32): foster-parenting-character
+(1,33): foster-parenting-character
+(1,34): foster-parenting-character
+(1,45): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|     <b>
+|       "bbb"
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             "aaa"
+|     <b>
+|       "ccc"
+
+#data
+A<table><tr> B</tr> B</table>
+#errors
+(1,1): expected-doctype-but-got-chars
+(1,13): foster-parenting-character
+(1,14): foster-parenting-character
+(1,20): foster-parenting-character
+(1,21): foster-parenting-character
+#document
+| <html>
+|   <head>
+|   <body>
+|     "A B B"
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+A<table><tr> B</tr> </em>C</table>
+#errors
+(1,1): expected-doctype-but-got-chars
+(1,13): foster-parenting-character
+(1,14): foster-parenting-character
+(1,20): foster-parenting-character
+(1,25): unexpected-end-tag
+(1,25): unexpected-end-tag-in-special-element
+(1,26): foster-parenting-character
+#document
+| <html>
+|   <head>
+|   <body>
+|     "A BC"
+|     <table>
+|       <tbody>
+|         <tr>
+|         " "
+
+#data
+<select><keygen>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,16): unexpected-input-in-select
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|     <keygen>
diff --git a/lib/html5lib/tests/testdata/tree-construction/tests8.dat b/lib/html5lib/tests/testdata/tree-construction/tests8.dat
new file mode 100644
index 00000000..ba2e63dd
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/tests8.dat
@@ -0,0 +1,162 @@
+#data
+<div>
+<div></div>
+</span>x
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(3,7): unexpected-end-tag
+(3,8): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "
+"
+|       <div>
+|       "
+x"
+
+#data
+<div>x<div></div>
+</span>x
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(2,7): unexpected-end-tag
+(2,8): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "x"
+|       <div>
+|       "
+x"
+
+#data
+<div>x<div></div>x</span>x
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,25): unexpected-end-tag
+(1,26): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "x"
+|       <div>
+|       "xx"
+
+#data
+<div>x<div></div>y</span>z
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,25): unexpected-end-tag
+(1,26): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "x"
+|       <div>
+|       "yz"
+
+#data
+<table><div>x<div></div>x</span>x
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,12): foster-parenting-start-tag
+(1,13): foster-parenting-character
+(1,18): foster-parenting-start-tag
+(1,24): foster-parenting-end-tag
+(1,25): foster-parenting-start-tag
+(1,32): foster-parenting-end-tag
+(1,32): unexpected-end-tag
+(1,33): foster-parenting-character
+(1,33): eof-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "x"
+|       <div>
+|       "xx"
+|     <table>
+
+#data
+<table><li><li></table>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <li>
+|     <li>
+|     <table>
+
+#data
+x<table>x
+#errors
+(1,1): expected-doctype-but-got-chars
+(1,9): foster-parenting-character
+(1,9): eof-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     "xx"
+|     <table>
+
+#data
+x<table><table>x
+#errors
+(1,1): expected-doctype-but-got-chars
+(1,15): unexpected-start-tag-implies-end-tag
+(1,16): foster-parenting-character
+(1,16): eof-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     "x"
+|     <table>
+|     "x"
+|     <table>
+
+#data
+<b>a<div></div><div></b>y
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,24): adoption-agency-1.3
+(1,25): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       "a"
+|       <div>
+|     <div>
+|       <b>
+|       "y"
+
+#data
+<a><div></a>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,15): adoption-agency-1.3
+(1,15): adoption-agency-1.3
+(1,15): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|     <div>
+|       <a>
+|       
+|         <a>
diff --git a/lib/html5lib/tests/testdata/tree-construction/tests9.dat b/lib/html5lib/tests/testdata/tree-construction/tests9.dat
new file mode 100644
index 00000000..f8d04b23
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/tests9.dat
@@ -0,0 +1,472 @@
+#data
+<!DOCTYPE html><math></math>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+
+#data
+<!DOCTYPE html><body><math></math>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+
+#data
+<!DOCTYPE html><math><mi>
+#errors
+(1,25) expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+
+#data
+<!DOCTYPE html><math><annotation-xml><svg><u>
+#errors
+(1,45) unexpected-html-element-in-foreign-content
+(1,45) expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         <svg svg>
+|     <u>
+
+#data
+<!DOCTYPE html><body><select><math></math></select>
+#errors
+(1,35) unexpected-start-tag-in-select
+(1,42) unexpected-end-tag-in-select
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+
+#data
+<!DOCTYPE html><body><select><option><math></math></option></select>
+#errors
+(1,43) unexpected-start-tag-in-select
+(1,50) unexpected-end-tag-in-select
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+
+#data
+<!DOCTYPE html><body><table><math></math></table>
+#errors
+(1,34) unexpected-start-tag-implies-table-voodoo
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|     <table>
+
+#data
+<!DOCTYPE html><body><table><math><mi>foo</mi></math></table>
+#errors
+(1,34) foster-parenting-start-token
+(1,39) foster-parenting-character
+(1,40) foster-parenting-character
+(1,41) foster-parenting-character
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+|         "foo"
+|     <table>
+
+#data
+<!DOCTYPE html><body><table><math><mi>foo</mi><mi>bar</mi></math></table>
+#errors
+(1,34) foster-parenting-start-tag
+(1,39) foster-parenting-character
+(1,40) foster-parenting-character
+(1,41) foster-parenting-character
+(1,51) foster-parenting-character
+(1,52) foster-parenting-character
+(1,53) foster-parenting-character
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+|         "foo"
+|       <math mi>
+|         "bar"
+|     <table>
+
+#data
+<!DOCTYPE html><body><table><tbody><math><mi>foo</mi><mi>bar</mi></math></tbody></table>
+#errors
+(1,41) foster-parenting-start-tag
+(1,46) foster-parenting-character
+(1,47) foster-parenting-character
+(1,48) foster-parenting-character
+(1,58) foster-parenting-character
+(1,59) foster-parenting-character
+(1,60) foster-parenting-character
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+|         "foo"
+|       <math mi>
+|         "bar"
+|     <table>
+|       <tbody>
+
+#data
+<!DOCTYPE html><body><table><tbody><tr><math><mi>foo</mi><mi>bar</mi></math></tr></tbody></table>
+#errors
+(1,45) foster-parenting-start-tag
+(1,50) foster-parenting-character
+(1,51) foster-parenting-character
+(1,52) foster-parenting-character
+(1,62) foster-parenting-character
+(1,63) foster-parenting-character
+(1,64) foster-parenting-character
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+|         "foo"
+|       <math mi>
+|         "bar"
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+<!DOCTYPE html><body><table><tbody><tr><td><math><mi>foo</mi><mi>bar</mi></math></td></tr></tbody></table>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <math math>
+|               <math mi>
+|                 "foo"
+|               <math mi>
+|                 "bar"
+
+#data
+<!DOCTYPE html><body><table><tbody><tr><td><math><mi>foo</mi><mi>bar</mi></math><p>baz</td></tr></tbody></table>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <math math>
+|               <math mi>
+|                 "foo"
+|               <math mi>
+|                 "bar"
+|             
+|               "baz"
+
+#data
+<!DOCTYPE html><body><table><caption><math><mi>foo</mi><mi>bar</mi></math><p>baz</caption></table>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <math math>
+|           <math mi>
+|             "foo"
+|           <math mi>
+|             "bar"
+|         
+|           "baz"
+
+#data
+<!DOCTYPE html><body><table><caption><math><mi>foo</mi><mi>bar</mi>baz</table>quux
+#errors
+(1,70) unexpected-html-element-in-foreign-content
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <math math>
+|           <math mi>
+|             "foo"
+|           <math mi>
+|             "bar"
+|         
+|           "baz"
+|     
+|       "quux"
+
+#data
+<!DOCTYPE html><body><table><caption><math><mi>foo</mi><mi>bar</mi>baz</table>quux
+#errors
+(1,78) unexpected-end-tag
+(1,78) expected-one-end-tag-but-got-another
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <math math>
+|           <math mi>
+|             "foo"
+|           <math mi>
+|             "bar"
+|           "baz"
+|     
+|       "quux"
+
+#data
+<!DOCTYPE html><body><table><colgroup><math><mi>foo</mi><mi>bar</mi>baz</table>quux
+#errors
+(1,44) foster-parenting-start-tag
+(1,49) foster-parenting-character
+(1,50) foster-parenting-character
+(1,51) foster-parenting-character
+(1,61) foster-parenting-character
+(1,62) foster-parenting-character
+(1,63) foster-parenting-character
+(1,71) unexpected-html-element-in-foreign-content
+(1,71) foster-parenting-start-tag
+(1,63) foster-parenting-character
+(1,63) foster-parenting-character
+(1,63) foster-parenting-character
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+|         "foo"
+|       <math mi>
+|         "bar"
+|     
+|       "baz"
+|     <table>
+|       <colgroup>
+|     
+|       "quux"
+
+#data
+<!DOCTYPE html><body><table><tr><td><select><math><mi>foo</mi><mi>bar</mi>baz</table>quux
+#errors
+(1,50) unexpected-start-tag-in-select
+(1,54) unexpected-start-tag-in-select
+(1,62) unexpected-end-tag-in-select
+(1,66) unexpected-start-tag-in-select
+(1,74) unexpected-end-tag-in-select
+(1,77) unexpected-start-tag-in-select
+(1,88) unexpected-table-element-end-tag-in-select-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <select>
+|               "foobarbaz"
+|     
+|       "quux"
+
+#data
+<!DOCTYPE html><body><table><select><math><mi>foo</mi><mi>bar</mi>baz</table>quux
+#errors
+(1,36) unexpected-start-tag-implies-table-voodoo
+(1,42) unexpected-start-tag-in-select
+(1,46) unexpected-start-tag-in-select
+(1,54) unexpected-end-tag-in-select
+(1,58) unexpected-start-tag-in-select
+(1,66) unexpected-end-tag-in-select
+(1,69) unexpected-start-tag-in-select
+(1,80) unexpected-table-element-end-tag-in-select-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       "foobarbaz"
+|     <table>
+|     
+|       "quux"
+
+#data
+<!DOCTYPE html><body></body></html><math><mi>foo</mi><mi>bar</mi>baz
+#errors
+(1,41) expected-eof-but-got-start-tag
+(1,68) unexpected-html-element-in-foreign-content
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+|         "foo"
+|       <math mi>
+|         "bar"
+|     
+|       "baz"
+
+#data
+<!DOCTYPE html><body></body><math><mi>foo</mi><mi>bar</mi>baz
+#errors
+(1,34) unexpected-start-tag-after-body
+(1,61) unexpected-html-element-in-foreign-content
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+|         "foo"
+|       <math mi>
+|         "bar"
+|     
+|       "baz"
+
+#data
+<!DOCTYPE html><frameset><math><mi></mi><mi></mi><span>
+#errors
+(1,31) unexpected-start-tag-in-frameset
+(1,35) unexpected-start-tag-in-frameset
+(1,40) unexpected-end-tag-in-frameset
+(1,44) unexpected-start-tag-in-frameset
+(1,49) unexpected-end-tag-in-frameset
+(1,52) unexpected-start-tag-in-frameset
+(1,58) unexpected-start-tag-in-frameset
+(1,58) eof-in-frameset
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!DOCTYPE html><frameset></frameset><math><mi></mi><mi></mi><span>
+#errors
+(1,42) unexpected-start-tag-after-frameset
+(1,46) unexpected-start-tag-after-frameset
+(1,51) unexpected-end-tag-after-frameset
+(1,55) unexpected-start-tag-after-frameset
+(1,60) unexpected-end-tag-after-frameset
+(1,63) unexpected-start-tag-after-frameset
+(1,69) unexpected-start-tag-after-frameset
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!DOCTYPE html><body xlink:href=foo><math xlink:href=foo></math>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     xlink:href="foo"
+|     <math math>
+|       xlink href="foo"
+
+#data
+<!DOCTYPE html><body xlink:href=foo xml:lang=en><math><mi xml:lang=en xlink:href=foo></mi></math>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     xlink:href="foo"
+|     xml:lang="en"
+|     <math math>
+|       <math mi>
+|         xlink href="foo"
+|         xml lang="en"
+
+#data
+<!DOCTYPE html><body xlink:href=foo xml:lang=en><math><mi xml:lang=en xlink:href=foo /></math>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     xlink:href="foo"
+|     xml:lang="en"
+|     <math math>
+|       <math mi>
+|         xlink href="foo"
+|         xml lang="en"
+
+#data
+<!DOCTYPE html><body xlink:href=foo xml:lang=en><math><mi xml:lang=en xlink:href=foo />bar</math>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     xlink:href="foo"
+|     xml:lang="en"
+|     <math math>
+|       <math mi>
+|         xlink href="foo"
+|         xml lang="en"
+|       "bar"
diff --git a/lib/html5lib/tests/testdata/tree-construction/tests_innerHTML_1.dat b/lib/html5lib/tests/testdata/tree-construction/tests_innerHTML_1.dat
new file mode 100644
index 00000000..54f43684
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/tests_innerHTML_1.dat
@@ -0,0 +1,887 @@
+#data
+<body><span>
+#errors
+(1,6): unexpected-start-tag
+(1,12): expected-closing-tag-but-got-eof
+#document-fragment
+body
+#document
+| <span>
+
+#data
+<span><body>
+#errors
+(1,12): unexpected-start-tag
+(1,12): expected-closing-tag-but-got-eof
+#document-fragment
+body
+#document
+| <span>
+
+#data
+<span><body>
+#errors
+(1,12): unexpected-start-tag
+(1,12): expected-closing-tag-but-got-eof
+#document-fragment
+div
+#document
+| <span>
+
+#data
+<body><span>
+#errors
+(1,12): expected-closing-tag-but-got-eof
+#document-fragment
+html
+#document
+| <head>
+| <body>
+|   <span>
+
+#data
+<frameset><span>
+#errors
+(1,10): unexpected-start-tag
+(1,16): expected-closing-tag-but-got-eof
+#document-fragment
+body
+#document
+| <span>
+
+#data
+<span><frameset>
+#errors
+(1,16): unexpected-start-tag
+(1,16): expected-closing-tag-but-got-eof
+#document-fragment
+body
+#document
+| <span>
+
+#data
+<span><frameset>
+#errors
+(1,16): unexpected-start-tag
+(1,16): expected-closing-tag-but-got-eof
+#document-fragment
+div
+#document
+| <span>
+
+#data
+<frameset><span>
+#errors
+(1,16): unexpected-start-tag-in-frameset
+(1,16): eof-in-frameset
+#document-fragment
+html
+#document
+| <head>
+| <frameset>
+
+#data
+<table><tr>
+#errors
+(1,7): unexpected-start-tag
+#document-fragment
+table
+#document
+| <tbody>
+|   <tr>
+
+#data
+</table><tr>
+#errors
+(1,8): unexpected-end-tag
+#document-fragment
+table
+#document
+| <tbody>
+|   <tr>
+
+#data
+<a>
+#errors
+(1,3): unexpected-start-tag-implies-table-voodoo
+(1,3): eof-in-table
+#document-fragment
+table
+#document
+| <a>
+
+#data
+<a>
+#errors
+(1,3): unexpected-start-tag-implies-table-voodoo
+(1,3): eof-in-table
+#document-fragment
+table
+#document
+| <a>
+
+#data
+<a><caption>a
+#errors
+(1,3): unexpected-start-tag-implies-table-voodoo
+(1,13): expected-closing-tag-but-got-eof
+#document-fragment
+table
+#document
+| <a>
+| <caption>
+|   "a"
+
+#data
+<a><colgroup><col>
+#errors
+(1,3): foster-parenting-start-token
+(1,18): expected-closing-tag-but-got-eof
+#document-fragment
+table
+#document
+| <a>
+| <colgroup>
+|   <col>
+
+#data
+<a><tbody><tr>
+#errors
+(1,3): foster-parenting-start-tag
+#document-fragment
+table
+#document
+| <a>
+| <tbody>
+|   <tr>
+
+#data
+<a><tfoot><tr>
+#errors
+(1,3): foster-parenting-start-tag
+#document-fragment
+table
+#document
+| <a>
+| <tfoot>
+|   <tr>
+
+#data
+<a><thead><tr>
+#errors
+(1,3): foster-parenting-start-tag
+#document-fragment
+table
+#document
+| <a>
+| <thead>
+|   <tr>
+
+#data
+<a><tr>
+#errors
+(1,3): foster-parenting-start-tag
+#document-fragment
+table
+#document
+| <a>
+| <tbody>
+|   <tr>
+
+#data
+<a><th>
+#errors
+(1,3): unexpected-start-tag-implies-table-voodoo
+(1,7): unexpected-cell-in-table-body
+#document-fragment
+table
+#document
+| <a>
+| <tbody>
+|   <tr>
+|     <th>
+
+#data
+<a><td>
+#errors
+(1,3): unexpected-start-tag-implies-table-voodoo
+(1,7): unexpected-cell-in-table-body
+#document-fragment
+table
+#document
+| <a>
+| <tbody>
+|   <tr>
+|     <td>
+
+#data
+<table></table><tbody>
+#errors
+(1,22): unexpected-start-tag
+#document-fragment
+caption
+#document
+| <table>
+
+#data
+</table><span>
+#errors
+(1,8): unexpected-end-tag
+(1,14): expected-closing-tag-but-got-eof
+#document-fragment
+caption
+#document
+| <span>
+
+#data
+<span></table>
+#errors
+(1,14): unexpected-end-tag
+(1,14): expected-closing-tag-but-got-eof
+#document-fragment
+caption
+#document
+| <span>
+
+#data
+</caption><span>
+#errors
+(1,10): XXX-undefined-error
+(1,16): expected-closing-tag-but-got-eof
+#document-fragment
+caption
+#document
+| <span>
+
+#data
+<span></caption><span>
+#errors
+(1,16): XXX-undefined-error
+(1,22): expected-closing-tag-but-got-eof
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+<span><caption><span>
+#errors
+(1,15): unexpected-start-tag
+(1,21): expected-closing-tag-but-got-eof
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+<span><col><span>
+#errors
+(1,11): unexpected-start-tag
+(1,17): expected-closing-tag-but-got-eof
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+<span><colgroup><span>
+#errors
+(1,16): unexpected-start-tag
+(1,22): expected-closing-tag-but-got-eof
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+<span><html><span>
+#errors
+(1,12): non-html-root
+(1,18): expected-closing-tag-but-got-eof
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+<span><tbody><span>
+#errors
+(1,13): unexpected-start-tag
+(1,19): expected-closing-tag-but-got-eof
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+<span><td><span>
+#errors
+(1,10): unexpected-start-tag
+(1,16): expected-closing-tag-but-got-eof
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+<span><tfoot><span>
+#errors
+(1,13): unexpected-start-tag
+(1,19): expected-closing-tag-but-got-eof
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+<span><thead><span>
+#errors
+(1,13): unexpected-start-tag
+(1,19): expected-closing-tag-but-got-eof
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+<span><th><span>
+#errors
+(1,10): unexpected-start-tag
+(1,16): expected-closing-tag-but-got-eof
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+<span><tr><span>
+#errors
+(1,10): unexpected-start-tag
+(1,16): expected-closing-tag-but-got-eof
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+<span></table><span>
+#errors
+(1,14): unexpected-end-tag
+(1,20): expected-closing-tag-but-got-eof
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+</colgroup><col>
+#errors
+(1,11): XXX-undefined-error
+#document-fragment
+colgroup
+#document
+| <col>
+
+#data
+<a><col>
+#errors
+(1,3): XXX-undefined-error
+#document-fragment
+colgroup
+#document
+| <col>
+
+#data
+<caption><a>
+#errors
+(1,9): XXX-undefined-error
+(1,12): unexpected-start-tag-implies-table-voodoo
+(1,12): eof-in-table
+#document-fragment
+tbody
+#document
+| <a>
+
+#data
+<col><a>
+#errors
+(1,5): XXX-undefined-error
+(1,8): unexpected-start-tag-implies-table-voodoo
+(1,8): eof-in-table
+#document-fragment
+tbody
+#document
+| <a>
+
+#data
+<colgroup><a>
+#errors
+(1,10): XXX-undefined-error
+(1,13): unexpected-start-tag-implies-table-voodoo
+(1,13): eof-in-table
+#document-fragment
+tbody
+#document
+| <a>
+
+#data
+<tbody><a>
+#errors
+(1,7): XXX-undefined-error
+(1,10): unexpected-start-tag-implies-table-voodoo
+(1,10): eof-in-table
+#document-fragment
+tbody
+#document
+| <a>
+
+#data
+<tfoot><a>
+#errors
+(1,7): XXX-undefined-error
+(1,10): unexpected-start-tag-implies-table-voodoo
+(1,10): eof-in-table
+#document-fragment
+tbody
+#document
+| <a>
+
+#data
+<thead><a>
+#errors
+(1,7): XXX-undefined-error
+(1,10): unexpected-start-tag-implies-table-voodoo
+(1,10): eof-in-table
+#document-fragment
+tbody
+#document
+| <a>
+
+#data
+</table><a>
+#errors
+(1,8): XXX-undefined-error
+(1,11): unexpected-start-tag-implies-table-voodoo
+(1,11): eof-in-table
+#document-fragment
+tbody
+#document
+| <a>
+
+#data
+<a><tr>
+#errors
+(1,3): unexpected-start-tag-implies-table-voodoo
+#document-fragment
+tbody
+#document
+| <a>
+| <tr>
+
+#data
+<a><td>
+#errors
+(1,3): unexpected-start-tag-implies-table-voodoo
+(1,7): unexpected-cell-in-table-body
+#document-fragment
+tbody
+#document
+| <a>
+| <tr>
+|   <td>
+
+#data
+<a><td>
+#errors
+(1,3): unexpected-start-tag-implies-table-voodoo
+(1,7): unexpected-cell-in-table-body
+#document-fragment
+tbody
+#document
+| <a>
+| <tr>
+|   <td>
+
+#data
+<a><td>
+#errors
+(1,3): unexpected-start-tag-implies-table-voodoo
+(1,7): unexpected-cell-in-table-body
+#document-fragment
+tbody
+#document
+| <a>
+| <tr>
+|   <td>
+
+#data
+<td><table><tbody><a><tr>
+#errors
+(1,4): unexpected-cell-in-table-body
+(1,21): unexpected-start-tag-implies-table-voodoo
+(1,25): eof-in-table
+#document-fragment
+tbody
+#document
+| <tr>
+|   <td>
+|     <a>
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+</tr><td>
+#errors
+(1,5): XXX-undefined-error
+#document-fragment
+tr
+#document
+| <td>
+
+#data
+<td><table><a><tr></tr><tr>
+#errors
+(1,14): unexpected-start-tag-implies-table-voodoo
+(1,27): eof-in-table
+#document-fragment
+tr
+#document
+| <td>
+|   <a>
+|   <table>
+|     <tbody>
+|       <tr>
+|       <tr>
+
+#data
+<caption><td>
+#errors
+(1,9): XXX-undefined-error
+#document-fragment
+tr
+#document
+| <td>
+
+#data
+<col><td>
+#errors
+(1,5): XXX-undefined-error
+#document-fragment
+tr
+#document
+| <td>
+
+#data
+<colgroup><td>
+#errors
+(1,10): XXX-undefined-error
+#document-fragment
+tr
+#document
+| <td>
+
+#data
+<tbody><td>
+#errors
+(1,7): XXX-undefined-error
+#document-fragment
+tr
+#document
+| <td>
+
+#data
+<tfoot><td>
+#errors
+(1,7): XXX-undefined-error
+#document-fragment
+tr
+#document
+| <td>
+
+#data
+<thead><td>
+#errors
+(1,7): XXX-undefined-error
+#document-fragment
+tr
+#document
+| <td>
+
+#data
+<tr><td>
+#errors
+(1,4): XXX-undefined-error
+#document-fragment
+tr
+#document
+| <td>
+
+#data
+</table><td>
+#errors
+(1,8): XXX-undefined-error
+#document-fragment
+tr
+#document
+| <td>
+
+#data
+<td><table></table><td>
+#errors
+#document-fragment
+tr
+#document
+| <td>
+|   <table>
+| <td>
+
+#data
+<td><table></table><td>
+#errors
+#document-fragment
+tr
+#document
+| <td>
+|   <table>
+| <td>
+
+#data
+<caption><a>
+#errors
+(1,9): XXX-undefined-error
+(1,12): expected-closing-tag-but-got-eof
+#document-fragment
+td
+#document
+| <a>
+
+#data
+<col><a>
+#errors
+(1,5): XXX-undefined-error
+(1,8): expected-closing-tag-but-got-eof
+#document-fragment
+td
+#document
+| <a>
+
+#data
+<colgroup><a>
+#errors
+(1,10): XXX-undefined-error
+(1,13): expected-closing-tag-but-got-eof
+#document-fragment
+td
+#document
+| <a>
+
+#data
+<tbody><a>
+#errors
+(1,7): XXX-undefined-error
+(1,10): expected-closing-tag-but-got-eof
+#document-fragment
+td
+#document
+| <a>
+
+#data
+<tfoot><a>
+#errors
+(1,7): XXX-undefined-error
+(1,10): expected-closing-tag-but-got-eof
+#document-fragment
+td
+#document
+| <a>
+
+#data
+<th><a>
+#errors
+(1,4): XXX-undefined-error
+(1,7): expected-closing-tag-but-got-eof
+#document-fragment
+td
+#document
+| <a>
+
+#data
+<thead><a>
+#errors
+(1,7): XXX-undefined-error
+(1,10): expected-closing-tag-but-got-eof
+#document-fragment
+td
+#document
+| <a>
+
+#data
+<tr><a>
+#errors
+(1,4): XXX-undefined-error
+(1,7): expected-closing-tag-but-got-eof
+#document-fragment
+td
+#document
+| <a>
+
+#data
+</table><a>
+#errors
+(1,8): XXX-undefined-error
+(1,11): expected-closing-tag-but-got-eof
+#document-fragment
+td
+#document
+| <a>
+
+#data
+</tbody><a>
+#errors
+(1,8): XXX-undefined-error
+(1,11): expected-closing-tag-but-got-eof
+#document-fragment
+td
+#document
+| <a>
+
+#data
+</td><a>
+#errors
+(1,5): unexpected-end-tag
+(1,8): expected-closing-tag-but-got-eof
+#document-fragment
+td
+#document
+| <a>
+
+#data
+</tfoot><a>
+#errors
+(1,8): XXX-undefined-error
+(1,11): expected-closing-tag-but-got-eof
+#document-fragment
+td
+#document
+| <a>
+
+#data
+</thead><a>
+#errors
+(1,8): XXX-undefined-error
+(1,11): expected-closing-tag-but-got-eof
+#document-fragment
+td
+#document
+| <a>
+
+#data
+</th><a>
+#errors
+(1,5): unexpected-end-tag
+(1,8): expected-closing-tag-but-got-eof
+#document-fragment
+td
+#document
+| <a>
+
+#data
+</tr><a>
+#errors
+(1,5): XXX-undefined-error
+(1,8): expected-closing-tag-but-got-eof
+#document-fragment
+td
+#document
+| <a>
+
+#data
+<table><td><td>
+#errors
+(1,11): unexpected-cell-in-table-body
+(1,15): expected-closing-tag-but-got-eof
+#document-fragment
+td
+#document
+| <table>
+|   <tbody>
+|     <tr>
+|       <td>
+|       <td>
+
+#data
+</select><option>
+#errors
+(1,9): XXX-undefined-error
+#document-fragment
+select
+#document
+| <option>
+
+#data
+<input><option>
+#errors
+(1,7): unexpected-input-in-select
+#document-fragment
+select
+#document
+| <option>
+
+#data
+<keygen><option>
+#errors
+(1,8): unexpected-input-in-select
+#document-fragment
+select
+#document
+| <option>
+
+#data
+<textarea><option>
+#errors
+(1,10): unexpected-input-in-select
+#document-fragment
+select
+#document
+| <option>
+
+#data
+</html><!--abc-->
+#errors
+(1,7): unexpected-end-tag-after-body-innerhtml
+#document-fragment
+html
+#document
+| <head>
+| <body>
+| <!-- abc -->
+
+#data
+</frameset><frame>
+#errors
+(1,11): unexpected-frameset-in-frameset-innerhtml
+#document-fragment
+frameset
+#document
+| <frame>
+
+#data
+#errors
+#document-fragment
+html
+#document
+| <head>
+| <body>
diff --git a/lib/html5lib/tests/testdata/tree-construction/tricky01.dat b/lib/html5lib/tests/testdata/tree-construction/tricky01.dat
new file mode 100644
index 00000000..753502a2
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/tricky01.dat
@@ -0,0 +1,336 @@
+#data
+<b>Bold </b> Not bold
+Also not bold.
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,15): adoption-agency-1.3
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|     
+|       <b>
+|         "Bold "
+|       " Not bold"
+|     "
+Also not bold."
+
+#data
+<html>
+<font color=red><i>Italic and RedItalic and Red </font> Just italic. Italic only.</i> Plain
+I should not be red. <font color=red>Red. <i>Italic and red.
+Italic and red. </i> Red.</font> I should not be red.
+<b>Bold <i>Bold and italic</b> Only Italic </i> Plain
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(2,58): adoption-agency-1.3
+(3,67): unexpected-end-tag
+(4,23): adoption-agency-1.3
+(4,35): adoption-agency-1.3
+(5,30): adoption-agency-1.3
+#document
+| <html>
+|   <head>
+|   <body>
+|     <font>
+|       color="red"
+|       <i>
+|         "Italic and Red"
+|     <i>
+|       
+|         <font>
+|           color="red"
+|           "Italic and Red "
+|         " Just italic."
+|       " Italic only."
+|     " Plain
+"
+|     
+|       "I should not be red. "
+|       <font>
+|         color="red"
+|         "Red. "
+|         <i>
+|           "Italic and red."
+|     <font>
+|       color="red"
+|       <i>
+|         "
+"
+|     
+|       <font>
+|         color="red"
+|         <i>
+|           "Italic and red. "
+|         " Red."
+|       " I should not be red."
+|     "
+"
+|     <b>
+|       "Bold "
+|       <i>
+|         "Bold and italic"
+|     <i>
+|       " Only Italic "
+|     " Plain"
+
+#data
+<html><body>
+<font size="7">First paragraph.
+Second paragraph.</font>
+<b><i>Bold and Italic</b> Italic
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(2,38): unexpected-end-tag
+(4,28): adoption-agency-1.3
+(4,28): adoption-agency-1.3
+(4,39): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     "
+"
+|     
+|       <font>
+|         size="7"
+|         "First paragraph."
+|     <font>
+|       size="7"
+|       "
+"
+|       
+|         "Second paragraph."
+|     "
+"
+|     <b>
+|     
+|       <b>
+|         <i>
+|           "Bold and Italic"
+|       <i>
+|         " Italic"
+
+#data
+<html>
+<dl>
+<dt><b>Boo
+<dd>Goo?
+</dl>
+</html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(4,4): end-tag-too-early
+(5,5): end-tag-too-early
+(6,7): expected-one-end-tag-but-got-another
+#document
+| <html>
+|   <head>
+|   <body>
+|     <dl>
+|       "
+"
+|       <dt>
+|         <b>
+|           "Boo
+"
+|       <dd>
+|         <b>
+|           "Goo?
+"
+|     <b>
+|       "
+"
+
+#data
+<html><body>
+<label><a><div>Hello<div>World</div></a></label>  
+</body></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(2,40): adoption-agency-1.3
+(2,48): unexpected-end-tag
+(3,7): expected-one-end-tag-but-got-another
+#document
+| <html>
+|   <head>
+|   <body>
+|     "
+"
+|     <label>
+|       <a>
+|       <div>
+|         <a>
+|           "Hello"
+|           <div>
+|             "World"
+|         "  
+"
+
+#data
+<table><center> <font>a</center> <img> <tr><td> </td> </tr> </table>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,15): foster-parenting-start-tag
+(1,16): foster-parenting-character
+(1,22): foster-parenting-start-tag
+(1,23): foster-parenting-character
+(1,32): foster-parenting-end-tag
+(1,32): end-tag-too-early
+(1,33): foster-parenting-character
+(1,38): foster-parenting-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <center>
+|       " "
+|       <font>
+|         "a"
+|     <font>
+|       <img>
+|       " "
+|     <table>
+|       " "
+|       <tbody>
+|         <tr>
+|           <td>
+|             " "
+|           " "
+|         " "
+
+#data
+<table><tr><a>You should see this text.
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,14): unexpected-start-tag-implies-table-voodoo
+(1,17): unexpected-start-tag-implies-table-voodoo
+(1,20): unexpected-start-tag-implies-table-voodoo
+(1,20): closing-non-current-p-element
+(1,21): foster-parenting-character
+(1,22): foster-parenting-character
+(1,23): foster-parenting-character
+(1,24): foster-parenting-character
+(1,25): foster-parenting-character
+(1,26): foster-parenting-character
+(1,27): foster-parenting-character
+(1,28): foster-parenting-character
+(1,29): foster-parenting-character
+(1,30): foster-parenting-character
+(1,31): foster-parenting-character
+(1,32): foster-parenting-character
+(1,33): foster-parenting-character
+(1,34): foster-parenting-character
+(1,35): foster-parenting-character
+(1,36): foster-parenting-character
+(1,37): foster-parenting-character
+(1,38): foster-parenting-character
+(1,39): foster-parenting-character
+(1,40): foster-parenting-character
+(1,41): foster-parenting-character
+(1,42): foster-parenting-character
+(1,43): foster-parenting-character
+(1,44): foster-parenting-character
+(1,45): foster-parenting-character
+(1,45): eof-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+|       <a>
+|     
+|       <a>
+|         "You should see this text."
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+<TABLE>
+<TR>
+<CENTER><CENTER><TD></TD></TR><TR>
+<FONT>
+<TABLE><tr></tr></TABLE>
+
+<a></font><font></a>
+This page contains an insanely badly-nested tag sequence.
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(3,8): unexpected-start-tag-implies-table-voodoo
+(3,16): unexpected-start-tag-implies-table-voodoo
+(4,6): unexpected-start-tag-implies-table-voodoo
+(4,6): unexpected character token in table (the newline)
+(5,7): unexpected-start-tag-implies-end-tag
+(6,4): unexpected p end tag
+(7,10): adoption-agency-1.3
+(7,20): adoption-agency-1.3
+(8,57): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <center>
+|       <center>
+|     <font>
+|       "
+"
+|     <table>
+|       "
+"
+|       <tbody>
+|         <tr>
+|           "
+"
+|           <td>
+|         <tr>
+|           "
+"
+|     <table>
+|       <tbody>
+|         <tr>
+|     <font>
+|       "
+"
+|       
+|       "
+"
+|       <a>
+|     <a>
+|       <font>
+|     <font>
+|       "
+This page contains an insanely badly-nested tag sequence."
+
+#data
+<html>
+<body>
+<b><nobr><div>This text is in a div inside a nobr</nobr>More text that should not be in the nobr, i.e., the
+nobr should have closed the div inside it implicitly. </b><pre>A pre tag outside everything else.</pre>
+</body>
+</html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(3,56): adoption-agency-1.3
+(4,58): adoption-agency-1.3
+(5,7): expected-one-end-tag-but-got-another
+#document
+| <html>
+|   <head>
+|   <body>
+|     "
+"
+|     <b>
+|       <nobr>
+|     <div>
+|       <b>
+|         <nobr>
+|           "This text is in a div inside a nobr"
+|         "More text that should not be in the nobr, i.e., the
+nobr should have closed the div inside it implicitly. "
+|       <pre>
+|         "A pre tag outside everything else."
+|       "
+
+"
diff --git a/lib/html5lib/tests/testdata/tree-construction/webkit01.dat b/lib/html5lib/tests/testdata/tree-construction/webkit01.dat
new file mode 100644
index 00000000..2127cfe1
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/webkit01.dat
@@ -0,0 +1,755 @@
+#data
+Test
+#errors
+(1,4): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "Test"
+
+#data
+<div></div>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+
+#data
+<div>Test</div>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "Test"
+
+#data
+<di
+#errors
+(1,3): eof-in-tag-name
+(1,3): expected-doctype-but-got-eof
+#new-errors
+(1:4) eof-in-tag
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<div>Hello</div>
+<script>
+console.log("PASS");
+</script>
+<div>Bye</div>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "Hello"
+|     "
+"
+|     <script>
+|       "
+console.log("PASS");
+"
+|     "
+"
+|     <div>
+|       "Bye"
+
+#data
+<div foo="bar">Hello</div>
+#errors
+(1,15): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       foo="bar"
+|       "Hello"
+
+#data
+<div>Hello</div>
+<script>
+console.log("FOO<span>BAR</span>BAZ");
+</script>
+<div>Bye</div>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "Hello"
+|     "
+"
+|     <script>
+|       "
+console.log("FOO<span>BAR</span>BAZ");
+"
+|     "
+"
+|     <div>
+|       "Bye"
+
+#data
+<foo bar="baz"></foo><potato quack="duck"></potato>
+#errors
+(1,15): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <foo>
+|       bar="baz"
+|     <potato>
+|       quack="duck"
+
+#data
+<foo bar="baz"><potato quack="duck"></potato></foo>
+#errors
+(1,15): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <foo>
+|       bar="baz"
+|       <potato>
+|         quack="duck"
+
+#data
+<foo></foo bar="baz"><potato></potato quack="duck">
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,21): attributes-in-end-tag
+(1,51): attributes-in-end-tag
+#new-errors
+(1:21) end-tag-with-attributes
+(1:51) end-tag-with-attributes
+#document
+| <html>
+|   <head>
+|   <body>
+|     <foo>
+|     <potato>
+
+#data
+</ tttt>
+#errors
+(1,2): expected-closing-tag-but-got-char
+(1,8): expected-doctype-but-got-eof
+#new-errors
+(1:3) invalid-first-character-of-tag-name
+#document
+| <!--  tttt -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<div FOO ><img><img></div>
+#errors
+(1,10): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       foo=""
+|       <img>
+|       <img>
+
+#data
+TestTest2
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,13): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+|       "TestTest2"
+
+#data
+<rdar://problem/6869687>
+#errors
+(1,7): unexpected-character-after-solidus-in-tag
+(1,8): unexpected-character-after-solidus-in-tag
+(1,16): unexpected-character-after-solidus-in-tag
+(1,24): expected-doctype-but-got-start-tag
+(1,24): expected-closing-tag-but-got-eof
+#new-errors
+(1:8) unexpected-solidus-in-tag
+(1:9) unexpected-solidus-in-tag
+(1:17) unexpected-solidus-in-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <rdar:>
+|       6869687=""
+|       problem=""
+
+#data
+<A>test< /A>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,8): expected-tag-name
+(1,12): expected-closing-tag-but-got-eof
+#new-errors
+(1:9) invalid-first-character-of-tag-name
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       "test< /A>"
+
+#data
+&lt;
+#errors
+(1,4): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "<"
+
+#data
+<body foo='bar'><body foo='baz' yo='mama'>
+#errors
+(1,16): expected-doctype-but-got-start-tag
+(1,42): unexpected-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     foo="bar"
+|     yo="mama"
+
+#data
+<body></br foo="bar"></body>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,21): attributes-in-end-tag
+(1,21): unexpected-end-tag-treated-as
+#new-errors
+(1:21) end-tag-with-attributes
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+
+#data
+<bdy></body>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,26): expected-one-end-tag-but-got-another
+#document
+| <html>
+|   <head>
+|   <body>
+|     <bdy>
+|       
+|         foo="bar"
+
+#data
+<body></body></br foo="bar">
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,28): attributes-in-end-tag
+(1,28): unexpected-end-tag-after-body
+(1,28): unexpected-end-tag-treated-as
+#new-errors
+(1:28) end-tag-with-attributes
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+
+#data
+<bdy></body>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,12): expected-one-end-tag-but-got-another
+(1,26): unexpected-start-tag-after-body
+(1,26): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <bdy>
+|       
+|         foo="bar"
+
+#data
+<html><body></body></html><!-- Hi there -->
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+| <!--  Hi there  -->
+
+#data
+<html><body></body></html><!-- Comment A --><!-- Comment B --><!-- Comment C --><!-- Comment D --><!-- Comment E -->
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+| <!--  Comment A  -->
+| <!--  Comment B  -->
+| <!--  Comment C  -->
+| <!--  Comment D  -->
+| <!--  Comment E  -->
+
+#data
+<html><body></body></html>x<!-- Hi there -->
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,27): expected-eof-but-got-char
+#document
+| <html>
+|   <head>
+|   <body>
+|     "x"
+|     <!--  Hi there  -->
+
+#data
+<html><body></body></html>x<!-- Hi there --></html><!-- Again -->
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,27): expected-eof-but-got-char
+#document
+| <html>
+|   <head>
+|   <body>
+|     "x"
+|     <!--  Hi there  -->
+| <!--  Again  -->
+
+#data
+<html><body></body></html>x<!-- Hi there --></body></html><!-- Again -->
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,27): expected-eof-but-got-char
+#document
+| <html>
+|   <head>
+|   <body>
+|     "x"
+|     <!--  Hi there  -->
+| <!--  Again  -->
+
+#data
+<html><body><ruby><div><rp>xx</rp></div></ruby></body></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,27): XXX-undefined-error
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       <div>
+|         <rp>
+|           "xx"
+
+#data
+<html><body><ruby><div><rt>xx</rt></div></ruby></body></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,27): XXX-undefined-error
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       <div>
+|         <rt>
+|           "xx"
+
+#data
+<html><frameset><!--1--><noframes>A</noframes><!--2--></frameset><!--3--><noframes>B</noframes><!--4--></html><!--5--><noframes>C</noframes><!--6-->
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <frameset>
+|     <!-- 1 -->
+|     <noframes>
+|       "A"
+|     <!-- 2 -->
+|   <!-- 3 -->
+|   <noframes>
+|     "B"
+|   <!-- 4 -->
+|   <noframes>
+|     "C"
+| <!-- 5 -->
+| <!-- 6 -->
+
+#data
+<select><option>A<select><option>B<select><option>C<select><option>D<select><option>E<select><option>F<select><option>G<select>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,25): unexpected-select-in-select
+(1,59): unexpected-select-in-select
+(1,93): unexpected-select-in-select
+(1,127): unexpected-select-in-select
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+|         "A"
+|     <option>
+|       "B"
+|       <select>
+|         <option>
+|           "C"
+|     <option>
+|       "D"
+|       <select>
+|         <option>
+|           "E"
+|     <option>
+|       "F"
+|       <select>
+|         <option>
+|           "G"
+
+#data
+<dd><dd><dt><dt><dd><li><li>
+#errors
+(1,4): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <dd>
+|     <dd>
+|     <dt>
+|     <dt>
+|     <dd>
+|       <li>
+|       <li>
+
+#data
+<div><b></div><div><nobr>a<nobr>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,14): end-tag-too-early
+(1,32): unexpected-start-tag-implies-end-tag
+(1,32): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <b>
+|     <div>
+|       <b>
+|         <nobr>
+|           "a"
+|         <nobr>
+
+#data
+<head></head>
+<body></body>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   "
+"
+|   <body>
+
+#data
+<head></head> <style></style>ddd
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,21): unexpected-start-tag-out-of-my-head
+#document
+| <html>
+|   <head>
+|     <style>
+|   " "
+|   <body>
+|     "ddd"
+
+#data
+<kbd><table></kbd><col><select><tr>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,18): unexpected-end-tag-implies-table-voodoo
+(1,18): unexpected-end-tag
+(1,31): unexpected-start-tag-implies-table-voodoo
+(1,35): unexpected-table-element-start-tag-in-select-in-table
+(1,35): eof-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <kbd>
+|       <select>
+|       <table>
+|         <colgroup>
+|           <col>
+|         <tbody>
+|           <tr>
+
+#data
+<kbd><table></kbd><col><select><tr></table><div>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,18): unexpected-end-tag-implies-table-voodoo
+(1,18): unexpected-end-tag
+(1,31): unexpected-start-tag-implies-table-voodoo
+(1,35): unexpected-table-element-start-tag-in-select-in-table
+(1,48): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <kbd>
+|       <select>
+|       <table>
+|         <colgroup>
+|           <col>
+|         <tbody>
+|           <tr>
+|       <div>
+
+#data
+<a><li><style></style><title></title></a>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,41): adoption-agency-1.3
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|     <li>
+|       <a>
+|         <style>
+|         <title>
+
+#data
+<font><meta><title></title></font>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,10): unexpected-end-tag
+(1,41): adoption-agency-1.3
+#document
+| <html>
+|   <head>
+|   <body>
+|     <font>
+|       
+|     
+|       <font>
+|         <meta>
+|         <title>
+
+#data
+<a><center><title></title><a>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,29): unexpected-start-tag-implies-end-tag
+(1,29): adoption-agency-1.3
+(1,29): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|     <center>
+|       <a>
+|         <title>
+|       <a>
+
+#data
+<svg><title><div>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,17): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg title>
+|         <div>
+
+#data
+<svg><title><rect><div>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,23): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg title>
+|         <rect>
+|           <div>
+
+#data
+<svg><title><svg><div>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,22): unexpected-html-element-in-foreign-content
+(1,22): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg title>
+|         <svg svg>
+|         <div>
+
+#data
+<img <="" FAIL>
+#errors
+(1,6): invalid-character-in-attribute-name
+(1,15): expected-doctype-but-got-start-tag
+#new-errors
+(1:6) unexpected-character-in-attribute-name
+#document
+| <html>
+|   <head>
+|   <body>
+|     <img>
+|       <=""
+|       fail=""
+
+#data
+<ul><li><div id='foo'/>A</li><li>B<div>C</div></li></ul>
+#errors
+(1,4): expected-doctype-but-got-start-tag
+(1,23): non-void-element-with-trailing-solidus
+(1,29): end-tag-too-early
+#new-errors
+(1:9-1:24) non-void-html-element-start-tag-with-trailing-solidus
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ul>
+|       <li>
+|         <div>
+|           id="foo"
+|           "A"
+|       <li>
+|         "B"
+|         <div>
+|           "C"
+
+#data
+<svg><em><desc></em>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,9): unexpected-html-element-in-foreign-content
+(1,20): adoption-agency-1.3
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|     <em>
+|       <desc>
+
+#data
+<table><tr><td><svg><desc><td></desc><circle>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <svg svg>
+|               <svg desc>
+|           <td>
+|             <circle>
+
+#data
+<svg><tfoot></mi><td>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,17): unexpected-end-tag
+(1,17): unexpected-end-tag
+(1,21): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg tfoot>
+|         <svg td>
+
+#data
+<math><mrow><mrow><mn>1</mn></mrow><mi>a</mi></mrow></math>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mrow>
+|         <math mrow>
+|           <math mn>
+|             "1"
+|         <math mi>
+|           "a"
+
+#data
+<!doctype html><input type="hidden"><frameset>
+#errors
+(1,46): unexpected-start-tag
+(1,46): eof-in-frameset
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!doctype html><input type="button"><frameset>
+#errors
+(1,46): unexpected-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <input>
+|       type="button"
diff --git a/lib/html5lib/tests/testdata/tree-construction/webkit02.dat b/lib/html5lib/tests/testdata/tree-construction/webkit02.dat
new file mode 100644
index 00000000..791991d2
--- /dev/null
+++ b/lib/html5lib/tests/testdata/tree-construction/webkit02.dat
@@ -0,0 +1,303 @@
+#data
+<foo bar=qux/>
+#errors
+(1,14): expected-doctype-but-got-start-tag
+(1,14): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <foo>
+|       bar="qux/"
+
+#data
+<p id="status"><noscript><strong>A</strong></noscript><span>B</span></p>
+#errors
+(1,15): expected-doctype-but-got-start-tag
+#script-on
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       id="status"
+|       <noscript>
+|         "<strong>A</strong>"
+|       <span>
+|         "B"
+
+#data
+<p id="status"><noscript><strong>A</strong></noscript><span>B</span></p>
+#errors
+(1,15): expected-doctype-but-got-start-tag
+#script-off
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       id="status"
+|       <noscript>
+|         <strong>
+|           "A"
+|       <span>
+|         "B"
+
+#data
+<div><sarcasm><div></div></sarcasm></div>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <sarcasm>
+|         <div>
+
+#data
+<html><body><img src="" border="0" alt="><div>A</div></body></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,67): eof-in-attribute-value-double-quote
+#new-errors
+(1:68) eof-in-tag
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<table><td></tbody>A
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,11): unexpected-cell-in-table-body
+(1,20): foster-parenting-character
+(1,20): eof-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     "A"
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+
+#data
+<table><td></thead>A
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,11): unexpected-cell-in-table-body
+(1,19): XXX-undefined-error
+(1,20): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             "A"
+
+#data
+<table><td></tfoot>A
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,11): unexpected-cell-in-table-body
+(1,19): XXX-undefined-error
+(1,20): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             "A"
+
+#data
+<table><thead><td></tbody>A
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,18): unexpected-cell-in-table-body
+(1,26): XXX-undefined-error
+(1,27): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <thead>
+|         <tr>
+|           <td>
+|             "A"
+
+#data
+<legend>test</legend>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <legend>
+|       "test"
+
+#data
+<table><input>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <input>
+|     <table>
+
+#data
+<b><em><foo><foo><aside></b>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <em>
+|         <foo>
+|           <foo>
+|     <em>
+|       <aside>
+|         <b>
+
+#data
+<b><em><foo><foo><aside></b></em>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <em>
+|         <foo>
+|           <foo>
+|     <em>
+|     <aside>
+|       <em>
+|         <b>
+
+#data
+<b><em><foo><foo><foo><aside></b>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <em>
+|         <foo>
+|           <foo>
+|             <foo>
+|     <aside>
+|       <b>
+
+#data
+<b><em><foo><foo><foo><aside></b></em>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <em>
+|         <foo>
+|           <foo>
+|             <foo>
+|     <aside>
+|       <b>
+
+#data
+<b><em><foo><foo><foo><foo><foo><foo><foo><foo><foo><foo><aside></b></em>
+#errors
+#document-fragment
+div
+#document
+| <b>
+|   <em>
+|     <foo>
+|       <foo>
+|         <foo>
+|           <foo>
+|             <foo>
+|               <foo>
+|                 <foo>
+|                   <foo>
+|                     <foo>
+|                       <foo>
+| <aside>
+|   <b>
+
+#data
+<b><em><foo><foob><foob><foob><foob><fooc><fooc><fooc><fooc><food><aside></b></em>
+#errors
+#document-fragment
+div
+#document
+| <b>
+|   <em>
+|     <foo>
+|       <foob>
+|         <foob>
+|           <foob>
+|             <foob>
+|               <fooc>
+|                 <fooc>
+|                   <fooc>
+|                     <fooc>
+|                       <food>
+| <aside>
+|   <b>
+
+#data
+<option><XH<optgroup></optgroup>
+#errors
+#document-fragment
+select
+#document
+| <option>
+
+#data
+<svg><foreignObject><div>foo</div><plaintext></foreignObject></svg><div>bar</div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg foreignObject>
+|         <div>
+|           "foo"
+|         <plaintext>
+|           "</foreignObject></svg><div>bar</div>"
+
+#data
+<svg><foreignObject></foreignObject><title></svg>foo
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg foreignObject>
+|       <svg title>
+|     "foo"
+
+#data
+</foreignObject><plaintext><div>foo</div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "<div>foo</div>"
diff --git a/lib/html5lib/tests/tokenizer.py b/lib/html5lib/tests/tokenizer.py
new file mode 100644
index 00000000..47264cc3
--- /dev/null
+++ b/lib/html5lib/tests/tokenizer.py
@@ -0,0 +1,253 @@
+from __future__ import absolute_import, division, unicode_literals
+
+import codecs
+import json
+import warnings
+import re
+
+import pytest
+from six import unichr
+
+from html5lib._tokenizer import HTMLTokenizer
+from html5lib import constants, _utils
+
+
+class TokenizerTestParser(object):
+    def __init__(self, initialState, lastStartTag=None):
+        self.tokenizer = HTMLTokenizer
+        self._state = initialState
+        self._lastStartTag = lastStartTag
+
+    def parse(self, stream, encoding=None, innerHTML=False):
+        # pylint:disable=unused-argument
+        tokenizer = self.tokenizer(stream, encoding)
+        self.outputTokens = []
+
+        tokenizer.state = getattr(tokenizer, self._state)
+        if self._lastStartTag is not None:
+            tokenizer.currentToken = {"type": "startTag",
+                                      "name": self._lastStartTag}
+
+        types = {v: k for k, v in constants.tokenTypes.items()}
+        for token in tokenizer:
+            getattr(self, 'process%s' % types[token["type"]])(token)
+
+        return self.outputTokens
+
+    def processDoctype(self, token):
+        self.outputTokens.append(["DOCTYPE", token["name"], token["publicId"],
+                                  token["systemId"], token["correct"]])
+
+    def processStartTag(self, token):
+        self.outputTokens.append(["StartTag", token["name"],
+                                  token["data"], token["selfClosing"]])
+
+    def processEmptyTag(self, token):
+        if token["name"] not in constants.voidElements:
+            self.outputTokens.append("ParseError")
+        self.outputTokens.append(["StartTag", token["name"], dict(token["data"][::-1])])
+
+    def processEndTag(self, token):
+        self.outputTokens.append(["EndTag", token["name"],
+                                  token["selfClosing"]])
+
+    def processComment(self, token):
+        self.outputTokens.append(["Comment", token["data"]])
+
+    def processSpaceCharacters(self, token):
+        self.outputTokens.append(["Character", token["data"]])
+        self.processSpaceCharacters = self.processCharacters
+
+    def processCharacters(self, token):
+        self.outputTokens.append(["Character", token["data"]])
+
+    def processEOF(self, token):
+        pass
+
+    def processParseError(self, token):
+        self.outputTokens.append(["ParseError", token["data"]])
+
+
+def concatenateCharacterTokens(tokens):
+    outputTokens = []
+    for token in tokens:
+        if "ParseError" not in token and token[0] == "Character":
+            if (outputTokens and "ParseError" not in outputTokens[-1] and
+                    outputTokens[-1][0] == "Character"):
+                outputTokens[-1][1] += token[1]
+            else:
+                outputTokens.append(token)
+        else:
+            outputTokens.append(token)
+    return outputTokens
+
+
+def normalizeTokens(tokens):
+    # TODO: convert tests to reflect arrays
+    for i, token in enumerate(tokens):
+        if token[0] == 'ParseError':
+            tokens[i] = token[0]
+    return tokens
+
+
+def tokensMatch(expectedTokens, receivedTokens, ignoreErrorOrder,
+                ignoreErrors=False):
+    """Test whether the test has passed or failed
+
+    If the ignoreErrorOrder flag is set to true we don't test the relative
+    positions of parse errors and non parse errors
+    """
+    checkSelfClosing = False
+    for token in expectedTokens:
+        if (token[0] == "StartTag" and len(token) == 4 or
+                token[0] == "EndTag" and len(token) == 3):
+            checkSelfClosing = True
+            break
+
+    if not checkSelfClosing:
+        for token in receivedTokens:
+            if token[0] == "StartTag" or token[0] == "EndTag":
+                token.pop()
+
+    if not ignoreErrorOrder and not ignoreErrors:
+        expectedTokens = concatenateCharacterTokens(expectedTokens)
+        return expectedTokens == receivedTokens
+    else:
+        # Sort the tokens into two groups; non-parse errors and parse errors
+        tokens = {"expected": [[], []], "received": [[], []]}
+        for tokenType, tokenList in zip(list(tokens.keys()),
+                                        (expectedTokens, receivedTokens)):
+            for token in tokenList:
+                if token != "ParseError":
+                    tokens[tokenType][0].append(token)
+                else:
+                    if not ignoreErrors:
+                        tokens[tokenType][1].append(token)
+            tokens[tokenType][0] = concatenateCharacterTokens(tokens[tokenType][0])
+        return tokens["expected"] == tokens["received"]
+
+
+_surrogateRe = re.compile(r"\\u([0-9A-Fa-f]{4})(?:\\u([0-9A-Fa-f]{4}))?")
+
+
+def unescape(test):
+    def decode(inp):
+        """Decode \\uXXXX escapes
+
+        This decodes \\uXXXX escapes, possibly into non-BMP characters when
+        two surrogate character escapes are adjacent to each other.
+        """
+        # This cannot be implemented using the unicode_escape codec
+        # because that requires its input be ISO-8859-1, and we need
+        # arbitrary unicode as input.
+        def repl(m):
+            if m.group(2) is not None:
+                high = int(m.group(1), 16)
+                low = int(m.group(2), 16)
+                if 0xD800 <= high <= 0xDBFF and 0xDC00 <= low <= 0xDFFF:
+                    cp = ((high - 0xD800) << 10) + (low - 0xDC00) + 0x10000
+                    return unichr(cp)
+                else:
+                    return unichr(high) + unichr(low)
+            else:
+                return unichr(int(m.group(1), 16))
+        try:
+            return _surrogateRe.sub(repl, inp)
+        except ValueError:
+            # This occurs when unichr throws ValueError, which should
+            # only be for a lone-surrogate.
+            if _utils.supports_lone_surrogates:
+                raise
+            return None
+
+    test["input"] = decode(test["input"])
+    for token in test["output"]:
+        if token == "ParseError":
+            continue
+        else:
+            token[1] = decode(token[1])
+            if len(token) > 2:
+                for key, value in token[2]:
+                    del token[2][key]
+                    token[2][decode(key)] = decode(value)
+    return test
+
+
+def _doCapitalize(match):
+    return match.group(1).upper()
+
+
+_capitalizeRe = re.compile(r"\W+(\w)").sub
+
+
+def capitalize(s):
+    s = s.lower()
+    s = _capitalizeRe(_doCapitalize, s)
+    return s
+
+
+class TokenizerFile(pytest.File):
+    def collect(self):
+        with codecs.open(str(self.fspath), "r", encoding="utf-8") as fp:
+            tests = json.load(fp)
+        if 'tests' in tests:
+            for i, test in enumerate(tests['tests']):
+                yield TokenizerTestCollector(str(i), self, testdata=test)
+
+
+class TokenizerTestCollector(pytest.Collector):
+    def __init__(self, name, parent=None, config=None, session=None, testdata=None):
+        super(TokenizerTestCollector, self).__init__(name, parent, config, session)
+        if 'initialStates' not in testdata:
+            testdata["initialStates"] = ["Data state"]
+        if 'doubleEscaped' in testdata:
+            testdata = unescape(testdata)
+        self.testdata = testdata
+
+    def collect(self):
+        for initialState in self.testdata["initialStates"]:
+            initialState = capitalize(initialState)
+            item = TokenizerTest(initialState,
+                                 self,
+                                 self.testdata,
+                                 initialState)
+            if self.testdata["input"] is None:
+                item.add_marker(pytest.mark.skipif(True, reason="Relies on lone surrogates"))
+            yield item
+
+
+class TokenizerTest(pytest.Item):
+    def __init__(self, name, parent, test, initialState):
+        super(TokenizerTest, self).__init__(name, parent)
+        self.obj = lambda: 1  # this is to hack around skipif needing a function!
+        self.test = test
+        self.initialState = initialState
+
+    def runtest(self):
+        warnings.resetwarnings()
+        warnings.simplefilter("error")
+
+        expected = self.test['output']
+        if 'lastStartTag' not in self.test:
+            self.test['lastStartTag'] = None
+        parser = TokenizerTestParser(self.initialState,
+                                     self.test['lastStartTag'])
+        tokens = parser.parse(self.test['input'])
+        received = normalizeTokens(tokens)
+        errorMsg = "\n".join(["\n\nInitial state:",
+                              self.initialState,
+                              "\nInput:", self.test['input'],
+                              "\nExpected:", repr(expected),
+                              "\nreceived:", repr(tokens)])
+        errorMsg = errorMsg
+        ignoreErrorOrder = self.test.get('ignoreErrorOrder', False)
+        assert tokensMatch(expected, received, ignoreErrorOrder, True), errorMsg
+
+    def repr_failure(self, excinfo):
+        traceback = excinfo.traceback
+        ntraceback = traceback.cut(path=__file__)
+        excinfo.traceback = ntraceback.filter()
+
+        return excinfo.getrepr(funcargs=True,
+                               showlocals=False,
+                               style="short", tbfilter=False)
diff --git a/lib/html5lib/tests/tokenizertotree.py b/lib/html5lib/tests/tokenizertotree.py
new file mode 100644
index 00000000..8528e876
--- /dev/null
+++ b/lib/html5lib/tests/tokenizertotree.py
@@ -0,0 +1,69 @@
+from __future__ import absolute_import, division, unicode_literals
+
+import sys
+import os
+import json
+import re
+
+import html5lib
+from . import support
+from . import test_tokenizer
+
+p = html5lib.HTMLParser()
+
+unnamespaceExpected = re.compile(r"^(\|\s*)<html ([^>]+)>", re.M).sub
+
+
+def main(out_path):
+    if not os.path.exists(out_path):
+        sys.stderr.write("Path %s does not exist" % out_path)
+        sys.exit(1)
+
+    for filename in support.get_data_files('tokenizer', '*.test'):
+        run_file(filename, out_path)
+
+
+def run_file(filename, out_path):
+    try:
+        tests_data = json.load(open(filename, "r"))
+    except ValueError:
+        sys.stderr.write("Failed to load %s\n" % filename)
+        return
+    name = os.path.splitext(os.path.split(filename)[1])[0]
+    output_file = open(os.path.join(out_path, "tokenizer_%s.dat" % name), "w")
+
+    if 'tests' in tests_data:
+        for test_data in tests_data['tests']:
+            if 'initialStates' not in test_data:
+                test_data["initialStates"] = ["Data state"]
+
+            for initial_state in test_data["initialStates"]:
+                if initial_state != "Data state":
+                    # don't support this yet
+                    continue
+                test = make_test(test_data)
+                output_file.write(test)
+
+    output_file.close()
+
+
+def make_test(test_data):
+    if 'doubleEscaped' in test_data:
+        test_data = test_tokenizer.unescape_test(test_data)
+
+    rv = []
+    rv.append("#data")
+    rv.append(test_data["input"].encode("utf8"))
+    rv.append("#errors")
+    tree = p.parse(test_data["input"])
+    output = p.tree.testSerializer(tree)
+    output = "\n".join(("| " + line[3:]) if line.startswith("|  ") else line
+                       for line in output.split("\n"))
+    output = unnamespaceExpected(r"\1<\2>", output)
+    rv.append(output.encode("utf8"))
+    rv.append("")
+    return "\n".join(rv)
+
+
+if __name__ == "__main__":
+    main(sys.argv[1])
diff --git a/lib/html5lib/tests/tree_construction.py b/lib/html5lib/tests/tree_construction.py
new file mode 100644
index 00000000..1ef6e725
--- /dev/null
+++ b/lib/html5lib/tests/tree_construction.py
@@ -0,0 +1,205 @@
+from __future__ import absolute_import, division, unicode_literals
+
+import itertools
+import re
+import warnings
+from difflib import unified_diff
+
+import pytest
+
+from .support import TestData, convert, convertExpected, treeTypes
+from html5lib import html5parser, constants, treewalkers
+from html5lib.filters.lint import Filter as Lint
+
+_attrlist_re = re.compile(r"^(\s+)\w+=.*(\n\1\w+=.*)+", re.M)
+
+
+def sortattrs(s):
+    def replace(m):
+        lines = m.group(0).split("\n")
+        lines.sort()
+        return "\n".join(lines)
+    return _attrlist_re.sub(replace, s)
+
+
+class TreeConstructionFile(pytest.File):
+    def collect(self):
+        tests = TestData(str(self.fspath), "data")
+        for i, test in enumerate(tests):
+            yield TreeConstructionTest(str(i), self, testdata=test)
+
+
+class TreeConstructionTest(pytest.Collector):
+    def __init__(self, name, parent=None, config=None, session=None, testdata=None):
+        super(TreeConstructionTest, self).__init__(name, parent, config, session)
+        self.testdata = testdata
+
+    def collect(self):
+        for treeName, treeAPIs in sorted(treeTypes.items()):
+            for x in itertools.chain(self._getParserTests(treeName, treeAPIs),
+                                     self._getTreeWalkerTests(treeName, treeAPIs)):
+                yield x
+
+    def _getParserTests(self, treeName, treeAPIs):
+        if treeAPIs is not None and "adapter" in treeAPIs:
+            return
+        for namespaceHTMLElements in (True, False):
+            if namespaceHTMLElements:
+                nodeid = "%s::parser::namespaced" % treeName
+            else:
+                nodeid = "%s::parser::void-namespace" % treeName
+            item = ParserTest(nodeid,
+                              self,
+                              self.testdata,
+                              treeAPIs["builder"] if treeAPIs is not None else None,
+                              namespaceHTMLElements)
+            item.add_marker(getattr(pytest.mark, treeName))
+            item.add_marker(pytest.mark.parser)
+            if namespaceHTMLElements:
+                item.add_marker(pytest.mark.namespaced)
+            yield item
+
+    def _getTreeWalkerTests(self, treeName, treeAPIs):
+        nodeid = "%s::treewalker" % treeName
+        item = TreeWalkerTest(nodeid,
+                              self,
+                              self.testdata,
+                              treeAPIs)
+        item.add_marker(getattr(pytest.mark, treeName))
+        item.add_marker(pytest.mark.treewalker)
+        yield item
+
+
+def convertTreeDump(data):
+    return "\n".join(convert(3)(data).split("\n")[1:])
+
+
+namespaceExpected = re.compile(r"^(\s*)<(\S+)>", re.M).sub
+
+
+class ParserTest(pytest.Item):
+    def __init__(self, name, parent, test, treeClass, namespaceHTMLElements):
+        super(ParserTest, self).__init__(name, parent)
+        self.test = test
+        self.treeClass = treeClass
+        self.namespaceHTMLElements = namespaceHTMLElements
+
+    def runtest(self):
+        if self.treeClass is None:
+            pytest.skip("Treebuilder not loaded")
+
+        p = html5parser.HTMLParser(tree=self.treeClass,
+                                   namespaceHTMLElements=self.namespaceHTMLElements)
+
+        input = self.test['data']
+        fragmentContainer = self.test['document-fragment']
+        expected = convertExpected(self.test['document'])
+        expectedErrors = self.test['errors'].split("\n") if self.test['errors'] else []
+
+        scripting = False
+        if 'script-on' in self.test:
+            scripting = True
+
+        with warnings.catch_warnings():
+            warnings.simplefilter("error")
+            try:
+                if fragmentContainer:
+                    document = p.parseFragment(input, fragmentContainer, scripting=scripting)
+                else:
+                    document = p.parse(input, scripting=scripting)
+            except constants.DataLossWarning:
+                pytest.skip("data loss warning")
+
+        output = convertTreeDump(p.tree.testSerializer(document))
+
+        expected = expected
+        if self.namespaceHTMLElements:
+            expected = namespaceExpected(r"\1<html \2>", expected)
+
+        errorMsg = "\n".join(["\n\nInput:", input, "\nExpected:", expected,
+                              "\nReceived:", output])
+        assert expected == output, errorMsg
+
+        errStr = []
+        for (line, col), errorcode, datavars in p.errors:
+            assert isinstance(datavars, dict), "%s, %s" % (errorcode, repr(datavars))
+            errStr.append("Line: %i Col: %i %s" % (line, col,
+                                                   constants.E[errorcode] % datavars))
+
+        errorMsg2 = "\n".join(["\n\nInput:", input,
+                               "\nExpected errors (" + str(len(expectedErrors)) + "):\n" + "\n".join(expectedErrors),
+                               "\nActual errors (" + str(len(p.errors)) + "):\n" + "\n".join(errStr)])
+        if False:  # we're currently not testing parse errors
+            assert len(p.errors) == len(expectedErrors), errorMsg2
+
+    def repr_failure(self, excinfo):
+        traceback = excinfo.traceback
+        ntraceback = traceback.cut(path=__file__)
+        excinfo.traceback = ntraceback.filter()
+
+        return excinfo.getrepr(funcargs=True,
+                               showlocals=False,
+                               style="short", tbfilter=False)
+
+
+class TreeWalkerTest(pytest.Item):
+    def __init__(self, name, parent, test, treeAPIs):
+        super(TreeWalkerTest, self).__init__(name, parent)
+        self.test = test
+        self.treeAPIs = treeAPIs
+
+    def runtest(self):
+        if self.treeAPIs is None:
+            pytest.skip("Treebuilder not loaded")
+
+        p = html5parser.HTMLParser(tree=self.treeAPIs["builder"])
+
+        input = self.test['data']
+        fragmentContainer = self.test['document-fragment']
+        expected = convertExpected(self.test['document'])
+
+        scripting = False
+        if 'script-on' in self.test:
+            scripting = True
+
+        with warnings.catch_warnings():
+            warnings.simplefilter("error")
+            try:
+                if fragmentContainer:
+                    document = p.parseFragment(input, fragmentContainer, scripting=scripting)
+                else:
+                    document = p.parse(input, scripting=scripting)
+            except constants.DataLossWarning:
+                pytest.skip("data loss warning")
+
+        poutput = convertTreeDump(p.tree.testSerializer(document))
+        namespace_expected = namespaceExpected(r"\1<html \2>", expected)
+        if poutput != namespace_expected:
+            pytest.skip("parser output incorrect")
+
+        document = self.treeAPIs.get("adapter", lambda x: x)(document)
+
+        try:
+            output = treewalkers.pprint(Lint(self.treeAPIs["walker"](document)))
+            output = sortattrs(output)
+            expected = sortattrs(expected)
+            diff = "".join(unified_diff([line + "\n" for line in expected.splitlines()],
+                                        [line + "\n" for line in output.splitlines()],
+                                        "Expected", "Received"))
+            assert expected == output, "\n".join([
+                "", "Input:", input,
+                    "", "Expected:", expected,
+                    "", "Received:", output,
+                    "", "Diff:", diff,
+            ])
+        except NotImplementedError:
+            pytest.skip("tree walker NotImplementedError")
+
+    def repr_failure(self, excinfo):
+        traceback = excinfo.traceback
+        ntraceback = traceback.cut(path=__file__)
+        excinfo.traceback = ntraceback.filter()
+
+        return excinfo.getrepr(funcargs=True,
+                               showlocals=False,
+                               style="short", tbfilter=False)
diff --git a/lib/html5lib/treeadapters/__init__.py b/lib/html5lib/treeadapters/__init__.py
index e69de29b..dfeb0ba5 100644
--- a/lib/html5lib/treeadapters/__init__.py
+++ b/lib/html5lib/treeadapters/__init__.py
@@ -0,0 +1,30 @@
+"""Tree adapters let you convert from one tree structure to another
+
+Example:
+
+.. code-block:: python
+
+   import html5lib
+   from html5lib.treeadapters import genshi
+
+   doc = '<html><body>Hi!</body></html>'
+   treebuilder = html5lib.getTreeBuilder('etree')
+   parser = html5lib.HTMLParser(tree=treebuilder)
+   tree = parser.parse(doc)
+   TreeWalker = html5lib.getTreeWalker('etree')
+
+   genshi_tree = genshi.to_genshi(TreeWalker(tree))
+
+"""
+from __future__ import absolute_import, division, unicode_literals
+
+from . import sax
+
+__all__ = ["sax"]
+
+try:
+    from . import genshi  # noqa
+except ImportError:
+    pass
+else:
+    __all__.append("genshi")
diff --git a/lib/html5lib/treeadapters/genshi.py b/lib/html5lib/treeadapters/genshi.py
new file mode 100644
index 00000000..61d5fb6a
--- /dev/null
+++ b/lib/html5lib/treeadapters/genshi.py
@@ -0,0 +1,54 @@
+from __future__ import absolute_import, division, unicode_literals
+
+from genshi.core import QName, Attrs
+from genshi.core import START, END, TEXT, COMMENT, DOCTYPE
+
+
+def to_genshi(walker):
+    """Convert a tree to a genshi tree
+
+    :arg walker: the treewalker to use to walk the tree to convert it
+
+    :returns: generator of genshi nodes
+
+    """
+    text = []
+    for token in walker:
+        type = token["type"]
+        if type in ("Characters", "SpaceCharacters"):
+            text.append(token["data"])
+        elif text:
+            yield TEXT, "".join(text), (None, -1, -1)
+            text = []
+
+        if type in ("StartTag", "EmptyTag"):
+            if token["namespace"]:
+                name = "{%s}%s" % (token["namespace"], token["name"])
+            else:
+                name = token["name"]
+            attrs = Attrs([(QName("{%s}%s" % attr if attr[0] is not None else attr[1]), value)
+                           for attr, value in token["data"].items()])
+            yield (START, (QName(name), attrs), (None, -1, -1))
+            if type == "EmptyTag":
+                type = "EndTag"
+
+        if type == "EndTag":
+            if token["namespace"]:
+                name = "{%s}%s" % (token["namespace"], token["name"])
+            else:
+                name = token["name"]
+
+            yield END, QName(name), (None, -1, -1)
+
+        elif type == "Comment":
+            yield COMMENT, token["data"], (None, -1, -1)
+
+        elif type == "Doctype":
+            yield DOCTYPE, (token["name"], token["publicId"],
+                            token["systemId"]), (None, -1, -1)
+
+        else:
+            pass  # FIXME: What to do?
+
+    if text:
+        yield TEXT, "".join(text), (None, -1, -1)
diff --git a/lib/html5lib/treeadapters/sax.py b/lib/html5lib/treeadapters/sax.py
index ad47df95..f4ccea5a 100644
--- a/lib/html5lib/treeadapters/sax.py
+++ b/lib/html5lib/treeadapters/sax.py
@@ -11,7 +11,13 @@ for prefix, localName, namespace in adjustForeignAttributes.values():
 
 
 def to_sax(walker, handler):
-    """Call SAX-like content handler based on treewalker walker"""
+    """Call SAX-like content handler based on treewalker walker
+
+    :arg walker: the treewalker to use to walk the tree to convert it
+
+    :arg handler: SAX handler to use
+
+    """
     handler.startDocument()
     for prefix, namespace in prefix_mapping.items():
         handler.startPrefixMapping(prefix, namespace)
diff --git a/lib/html5lib/treebuilders/__init__.py b/lib/html5lib/treebuilders/__init__.py
index 6a6b2a4c..d44447ea 100644
--- a/lib/html5lib/treebuilders/__init__.py
+++ b/lib/html5lib/treebuilders/__init__.py
@@ -1,56 +1,68 @@
-"""A collection of modules for building different kinds of tree from
-HTML documents.
+"""A collection of modules for building different kinds of trees from HTML
+documents.
 
 To create a treebuilder for a new type of tree, you need to do
 implement several things:
 
-1) A set of classes for various types of elements: Document, Doctype,
-Comment, Element. These must implement the interface of
-_base.treebuilders.Node (although comment nodes have a different
-signature for their constructor, see treebuilders.etree.Comment)
-Textual content may also be implemented as another node type, or not, as
-your tree implementation requires.
+1. A set of classes for various types of elements: Document, Doctype, Comment,
+   Element. These must implement the interface of ``base.treebuilders.Node``
+   (although comment nodes have a different signature for their constructor,
+   see ``treebuilders.etree.Comment``) Textual content may also be implemented
+   as another node type, or not, as your tree implementation requires.
 
-2) A treebuilder object (called TreeBuilder by convention) that
-inherits from treebuilders._base.TreeBuilder. This has 4 required attributes:
-documentClass - the class to use for the bottommost node of a document
-elementClass - the class to use for HTML Elements
-commentClass - the class to use for comments
-doctypeClass - the class to use for doctypes
-It also has one required method:
-getDocument - Returns the root node of the complete document tree
+2. A treebuilder object (called ``TreeBuilder`` by convention) that inherits
+   from ``treebuilders.base.TreeBuilder``. This has 4 required attributes:
+
+   * ``documentClass`` - the class to use for the bottommost node of a document
+   * ``elementClass`` - the class to use for HTML Elements
+   * ``commentClass`` - the class to use for comments
+   * ``doctypeClass`` - the class to use for doctypes
+
+   It also has one required method:
+
+   * ``getDocument`` - Returns the root node of the complete document tree
+
+3. If you wish to run the unit tests, you must also create a ``testSerializer``
+   method on your treebuilder which accepts a node and returns a string
+   containing Node and its children serialized according to the format used in
+   the unittests
 
-3) If you wish to run the unit tests, you must also create a
-testSerializer method on your treebuilder which accepts a node and
-returns a string containing Node and its children serialized according
-to the format used in the unittests
 """
 
 from __future__ import absolute_import, division, unicode_literals
 
-from ..utils import default_etree
+from .._utils import default_etree
 
 treeBuilderCache = {}
 
 
 def getTreeBuilder(treeType, implementation=None, **kwargs):
-    """Get a TreeBuilder class for various types of tree with built-in support
+    """Get a TreeBuilder class for various types of trees with built-in support
 
-    treeType - the name of the tree type required (case-insensitive). Supported
-               values are:
+    :arg treeType: the name of the tree type required (case-insensitive). Supported
+        values are:
 
-               "dom" - A generic builder for DOM implementations, defaulting to
-                       a xml.dom.minidom based implementation.
-               "etree" - A generic builder for tree implementations exposing an
-                         ElementTree-like interface, defaulting to
-                         xml.etree.cElementTree if available and
-                         xml.etree.ElementTree if not.
-               "lxml" - A etree-based builder for lxml.etree, handling
-                        limitations of lxml's implementation.
+        * "dom" - A generic builder for DOM implementations, defaulting to a
+          xml.dom.minidom based implementation.
+        * "etree" - A generic builder for tree implementations exposing an
+          ElementTree-like interface, defaulting to xml.etree.cElementTree if
+          available and xml.etree.ElementTree if not.
+        * "lxml" - A etree-based builder for lxml.etree, handling limitations
+          of lxml's implementation.
 
-    implementation - (Currently applies to the "etree" and "dom" tree types). A
-                      module implementing the tree type e.g.
-                      xml.etree.ElementTree or xml.etree.cElementTree."""
+    :arg implementation: (Currently applies to the "etree" and "dom" tree
+        types). A module implementing the tree type e.g. xml.etree.ElementTree
+        or xml.etree.cElementTree.
+
+    :arg kwargs: Any additional options to pass to the TreeBuilder when
+        creating it.
+
+    Example:
+
+    >>> from html5lib.treebuilders import getTreeBuilder
+    >>> builder = getTreeBuilder('etree')
+
+    """
 
     treeType = treeType.lower()
     if treeType not in treeBuilderCache:
diff --git a/lib/html5lib/treebuilders/_base.py b/lib/html5lib/treebuilders/base.py
similarity index 84%
rename from lib/html5lib/treebuilders/_base.py
rename to lib/html5lib/treebuilders/base.py
index 8b97cc11..e4a3d710 100644
--- a/lib/html5lib/treebuilders/_base.py
+++ b/lib/html5lib/treebuilders/base.py
@@ -10,9 +10,9 @@ Marker = None
 
 listElementsMap = {
     None: (frozenset(scopingElements), False),
-    "button": (frozenset(scopingElements | set([(namespaces["html"], "button")])), False),
-    "list": (frozenset(scopingElements | set([(namespaces["html"], "ol"),
-                                              (namespaces["html"], "ul")])), False),
+    "button": (frozenset(scopingElements | {(namespaces["html"], "button")}), False),
+    "list": (frozenset(scopingElements | {(namespaces["html"], "ol"),
+                                          (namespaces["html"], "ul")}), False),
     "table": (frozenset([(namespaces["html"], "html"),
                          (namespaces["html"], "table")]), False),
     "select": (frozenset([(namespaces["html"], "optgroup"),
@@ -21,22 +21,25 @@ listElementsMap = {
 
 
 class Node(object):
+    """Represents an item in the tree"""
     def __init__(self, name):
-        """Node representing an item in the tree.
-        name - The tag name associated with the node
-        parent - The parent of the current node (or None for the document node)
-        value - The value of the current node (applies to text nodes and
-        comments
-        attributes - a dict holding name, value pairs for attributes of the node
-        childNodes - a list of child nodes of the current node. This must
-        include all elements but not necessarily other node types
-        _flags - A list of miscellaneous flags that can be set on the node
+        """Creates a Node
+
+        :arg name: The tag name associated with the node
+
         """
+        # The tag name associated with the node
         self.name = name
+        # The parent of the current node (or None for the document node)
         self.parent = None
+        # The value of the current node (applies to text nodes and comments)
         self.value = None
+        # A dict holding name -> value pairs for attributes of the node
         self.attributes = {}
+        # A list of child nodes of the current node. This must include all
+        # elements but not necessarily other node types.
         self.childNodes = []
+        # A list of miscellaneous flags that can be set on the node.
         self._flags = []
 
     def __str__(self):
@@ -53,23 +56,41 @@ class Node(object):
 
     def appendChild(self, node):
         """Insert node as a child of the current node
+
+        :arg node: the node to insert
+
         """
         raise NotImplementedError
 
     def insertText(self, data, insertBefore=None):
         """Insert data as text in the current node, positioned before the
         start of node insertBefore or to the end of the node's text.
+
+        :arg data: the data to insert
+
+        :arg insertBefore: True if you want to insert the text before the node
+            and False if you want to insert it after the node
+
         """
         raise NotImplementedError
 
     def insertBefore(self, node, refNode):
         """Insert node as a child of the current node, before refNode in the
         list of child nodes. Raises ValueError if refNode is not a child of
-        the current node"""
+        the current node
+
+        :arg node: the node to insert
+
+        :arg refNode: the child node to insert the node before
+
+        """
         raise NotImplementedError
 
     def removeChild(self, node):
         """Remove node from the children of the current node
+
+        :arg node: the child node to remove
+
         """
         raise NotImplementedError
 
@@ -77,6 +98,9 @@ class Node(object):
         """Move all the children of the current node to newParent.
         This is needed so that trees that don't store text as nodes move the
         text in the correct way
+
+        :arg newParent: the node to move all this node's children to
+
         """
         # XXX - should this method be made more general?
         for child in self.childNodes:
@@ -121,11 +145,14 @@ class ActiveFormattingElements(list):
 
 class TreeBuilder(object):
     """Base treebuilder implementation
-    documentClass - the class to use for the bottommost node of a document
-    elementClass - the class to use for HTML Elements
-    commentClass - the class to use for comments
-    doctypeClass - the class to use for doctypes
+
+    * documentClass - the class to use for the bottommost node of a document
+    * elementClass - the class to use for HTML Elements
+    * commentClass - the class to use for comments
+    * doctypeClass - the class to use for doctypes
+
     """
+    # pylint:disable=not-callable
 
     # Document class
     documentClass = None
@@ -143,6 +170,11 @@ class TreeBuilder(object):
     fragmentClass = None
 
     def __init__(self, namespaceHTMLElements):
+        """Create a TreeBuilder
+
+        :arg namespaceHTMLElements: whether or not to namespace HTML elements
+
+        """
         if namespaceHTMLElements:
             self.defaultNamespace = "http://www.w3.org/1999/xhtml"
         else:
@@ -166,12 +198,17 @@ class TreeBuilder(object):
         # If we pass a node in we match that. if we pass a string
         # match any node with that name
         exactNode = hasattr(target, "nameTuple")
+        if not exactNode:
+            if isinstance(target, text_type):
+                target = (namespaces["html"], target)
+            assert isinstance(target, tuple)
 
         listElements, invert = listElementsMap[variant]
 
         for node in reversed(self.openElements):
-            if (node.name == target and not exactNode or
-                    node == target and exactNode):
+            if exactNode and node == target:
+                return True
+            elif not exactNode and node.nameTuple == target:
                 return True
             elif (invert ^ (node.nameTuple in listElements)):
                 return False
@@ -353,19 +390,19 @@ class TreeBuilder(object):
     def generateImpliedEndTags(self, exclude=None):
         name = self.openElements[-1].name
         # XXX td, th and tr are not actually needed
-        if (name in frozenset(("dd", "dt", "li", "option", "optgroup", "p", "rp", "rt"))
-                and name != exclude):
+        if (name in frozenset(("dd", "dt", "li", "option", "optgroup", "p", "rp", "rt")) and
+                name != exclude):
             self.openElements.pop()
             # XXX This is not entirely what the specification says. We should
             # investigate it more closely.
             self.generateImpliedEndTags(exclude)
 
     def getDocument(self):
-        "Return the final tree"
+        """Return the final tree"""
         return self.document
 
     def getFragment(self):
-        "Return the final fragment"
+        """Return the final fragment"""
         # assert self.innerHTML
         fragment = self.fragmentClass()
         self.openElements[0].reparentChildren(fragment)
@@ -373,5 +410,8 @@ class TreeBuilder(object):
 
     def testSerializer(self, node):
         """Serialize the subtree of node in the format required by unit tests
-        node - the node from which to start serializing"""
+
+        :arg node: the node from which to start serializing
+
+        """
         raise NotImplementedError
diff --git a/lib/html5lib/treebuilders/dom.py b/lib/html5lib/treebuilders/dom.py
index 61e5ed79..d8b53004 100644
--- a/lib/html5lib/treebuilders/dom.py
+++ b/lib/html5lib/treebuilders/dom.py
@@ -1,54 +1,65 @@
 from __future__ import absolute_import, division, unicode_literals
 
 
+try:
+    from collections.abc import MutableMapping
+except ImportError:  # Python 2.7
+    from collections import MutableMapping
 from xml.dom import minidom, Node
 import weakref
 
-from . import _base
+from . import base
 from .. import constants
 from ..constants import namespaces
-from ..utils import moduleFactoryFactory
+from .._utils import moduleFactoryFactory
 
 
 def getDomBuilder(DomImplementation):
     Dom = DomImplementation
 
-    class AttrList(object):
+    class AttrList(MutableMapping):
         def __init__(self, element):
             self.element = element
 
         def __iter__(self):
-            return list(self.element.attributes.items()).__iter__()
+            return iter(self.element.attributes.keys())
 
         def __setitem__(self, name, value):
-            self.element.setAttribute(name, value)
-
-        def __len__(self):
-            return len(list(self.element.attributes.items()))
-
-        def items(self):
-            return [(item[0], item[1]) for item in
-                    list(self.element.attributes.items())]
-
-        def keys(self):
-            return list(self.element.attributes.keys())
-
-        def __getitem__(self, name):
-            return self.element.getAttribute(name)
-
-        def __contains__(self, name):
             if isinstance(name, tuple):
                 raise NotImplementedError
             else:
-                return self.element.hasAttribute(name)
+                attr = self.element.ownerDocument.createAttribute(name)
+                attr.value = value
+                self.element.attributes[name] = attr
 
-    class NodeBuilder(_base.Node):
+        def __len__(self):
+            return len(self.element.attributes)
+
+        def items(self):
+            return list(self.element.attributes.items())
+
+        def values(self):
+            return list(self.element.attributes.values())
+
+        def __getitem__(self, name):
+            if isinstance(name, tuple):
+                raise NotImplementedError
+            else:
+                return self.element.attributes[name].value
+
+        def __delitem__(self, name):
+            if isinstance(name, tuple):
+                raise NotImplementedError
+            else:
+                del self.element.attributes[name]
+
+    class NodeBuilder(base.Node):
         def __init__(self, element):
-            _base.Node.__init__(self, element.nodeName)
+            base.Node.__init__(self, element.nodeName)
             self.element = element
 
-        namespace = property(lambda self: hasattr(self.element, "namespaceURI")
-                             and self.element.namespaceURI or None)
+        namespace = property(lambda self: hasattr(self.element, "namespaceURI") and
+                             self.element.namespaceURI or None)
 
         def appendChild(self, node):
             node.parent = self
@@ -109,7 +120,7 @@ def getDomBuilder(DomImplementation):
 
         nameTuple = property(getNameTuple)
 
-    class TreeBuilder(_base.TreeBuilder):
+    class TreeBuilder(base.TreeBuilder):  # pylint:disable=unused-variable
         def documentClass(self):
             self.dom = Dom.getDOMImplementation().createDocument(None, None, None)
             return weakref.proxy(self)
@@ -149,16 +160,17 @@ def getDomBuilder(DomImplementation):
             return self.dom
 
         def getFragment(self):
-            return _base.TreeBuilder.getFragment(self).element
+            return base.TreeBuilder.getFragment(self).element
 
         def insertText(self, data, parent=None):
             data = data
             if parent != self:
-                _base.TreeBuilder.insertText(self, data, parent)
+                base.TreeBuilder.insertText(self, data, parent)
             else:
                 # HACK: allow text nodes as children of the document node
                 if hasattr(self.dom, '_child_node_types'):
-                    if not Node.TEXT_NODE in self.dom._child_node_types:
+                    # pylint:disable=protected-access
+                    if Node.TEXT_NODE not in self.dom._child_node_types:
                         self.dom._child_node_types = list(self.dom._child_node_types)
                         self.dom._child_node_types.append(Node.TEXT_NODE)
                 self.dom.appendChild(self.dom.createTextNode(data))
diff --git a/lib/html5lib/treebuilders/etree.py b/lib/html5lib/treebuilders/etree.py
index 2c8ed19f..086bed4e 100644
--- a/lib/html5lib/treebuilders/etree.py
+++ b/lib/html5lib/treebuilders/etree.py
@@ -1,13 +1,17 @@
 from __future__ import absolute_import, division, unicode_literals
+# pylint:disable=protected-access
+
 from six import text_type
 
 import re
 
-from . import _base
-from .. import ihatexml
+from copy import copy
+
+from . import base
+from .. import _ihatexml
 from .. import constants
 from ..constants import namespaces
-from ..utils import moduleFactoryFactory
+from .._utils import moduleFactoryFactory
 
 tag_regexp = re.compile("{([^}]*)}(.*)")
 
@@ -16,7 +20,7 @@ def getETreeBuilder(ElementTreeImplementation, fullTree=False):
     ElementTree = ElementTreeImplementation
     ElementTreeCommentType = ElementTree.Comment("asd").tag
 
-    class Element(_base.Node):
+    class Element(base.Node):
         def __init__(self, name, namespace=None):
             self._name = name
             self._namespace = namespace
@@ -59,16 +63,17 @@ def getETreeBuilder(ElementTreeImplementation, fullTree=False):
             return self._element.attrib
 
         def _setAttributes(self, attributes):
-            # Delete existing attributes first
-            # XXX - there may be a better way to do this...
-            for key in list(self._element.attrib.keys()):
-                del self._element.attrib[key]
-            for key, value in attributes.items():
-                if isinstance(key, tuple):
-                    name = "{%s}%s" % (key[2], key[1])
-                else:
-                    name = key
-                self._element.set(name, value)
+            el_attrib = self._element.attrib
+            el_attrib.clear()
+            if attributes:
+                # calling .items _always_ allocates, and the above truthy check is cheaper than the
+                # allocation on average
+                for key, value in attributes.items():
+                    if isinstance(key, tuple):
+                        name = "{%s}%s" % (key[2], key[1])
+                    else:
+                        name = key
+                    el_attrib[name] = value
 
         attributes = property(_getAttributes, _setAttributes)
 
@@ -98,6 +103,7 @@ def getETreeBuilder(ElementTreeImplementation, fullTree=False):
             node.parent = self
 
         def removeChild(self, node):
+            self._childNodes.remove(node)
             self._element.remove(node._element)
             node.parent = None
 
@@ -126,8 +132,8 @@ def getETreeBuilder(ElementTreeImplementation, fullTree=False):
 
         def cloneNode(self):
             element = type(self)(self.name, self.namespace)
-            for name, value in self.attributes.items():
-                element.attributes[name] = value
+            if self._element.attrib:
+                element._element.attrib = copy(self._element.attrib)
             return element
 
         def reparentChildren(self, newParent):
@@ -139,7 +145,7 @@ def getETreeBuilder(ElementTreeImplementation, fullTree=False):
                 if self._element.text is not None:
                     newParent._element.text += self._element.text
             self._element.text = ""
-            _base.Node.reparentChildren(self, newParent)
+            base.Node.reparentChildren(self, newParent)
 
     class Comment(Element):
         def __init__(self, data):
@@ -253,10 +259,10 @@ def getETreeBuilder(ElementTreeImplementation, fullTree=False):
 
         return "\n".join(rv)
 
-    def tostring(element):
+    def tostring(element):  # pylint:disable=unused-variable
         """Serialize an element and its child nodes to a string"""
         rv = []
-        filter = ihatexml.InfosetFilter()
+        filter = _ihatexml.InfosetFilter()
 
         def serializeElement(element):
             if isinstance(element, ElementTree.ElementTree):
@@ -307,7 +313,7 @@ def getETreeBuilder(ElementTreeImplementation, fullTree=False):
 
         return "".join(rv)
 
-    class TreeBuilder(_base.TreeBuilder):
+    class TreeBuilder(base.TreeBuilder):  # pylint:disable=unused-variable
         documentClass = Document
         doctypeClass = DocumentType
         elementClass = Element
@@ -329,7 +335,7 @@ def getETreeBuilder(ElementTreeImplementation, fullTree=False):
                     return self.document._element.find("html")
 
         def getFragment(self):
-            return _base.TreeBuilder.getFragment(self)._element
+            return base.TreeBuilder.getFragment(self)._element
 
     return locals()
 
diff --git a/lib/html5lib/treebuilders/etree_lxml.py b/lib/html5lib/treebuilders/etree_lxml.py
index 35d08efa..e73de61a 100644
--- a/lib/html5lib/treebuilders/etree_lxml.py
+++ b/lib/html5lib/treebuilders/etree_lxml.py
@@ -10,18 +10,25 @@ When any of these things occur, we emit a DataLossWarning
 """
 
 from __future__ import absolute_import, division, unicode_literals
+# pylint:disable=protected-access
 
 import warnings
 import re
 import sys
 
-from . import _base
+try:
+    from collections.abc import MutableMapping
+except ImportError:
+    from collections import MutableMapping
+
+from . import base
 from ..constants import DataLossWarning
 from .. import constants
 from . import etree as etree_builders
-from .. import ihatexml
+from .. import _ihatexml
 
 import lxml.etree as etree
+from six import PY3, binary_type
 
 
 fullTree = True
@@ -43,7 +50,11 @@ class Document(object):
         self._childNodes = []
 
     def appendChild(self, element):
-        self._elementTree.getroot().addnext(element._element)
+        last = self._elementTree.getroot()
+        for last in self._elementTree.getroot().itersiblings():
+            pass
+
+        last.addnext(element._element)
 
     def _getChildNodes(self):
         return self._childNodes
@@ -53,8 +64,7 @@ class Document(object):
 
 def testSerializer(element):
     rv = []
-    finalText = None
-    infosetFilter = ihatexml.InfosetFilter()
+    infosetFilter = _ihatexml.InfosetFilter(preventDoubleDashComments=True)
 
     def serializeElement(element, indent=0):
         if not hasattr(element, "tag"):
@@ -79,7 +89,7 @@ def testSerializer(element):
                     next_element = next_element.getnext()
             elif isinstance(element, str) or isinstance(element, bytes):
                 # Text in a fragment
-                assert isinstance(element, str) or sys.version_info.major == 2
+                assert isinstance(element, str) or sys.version_info[0] == 2
                 rv.append("|%s\"%s\"" % (' ' * indent, element))
             else:
                 # Fragment case
@@ -128,16 +138,12 @@ def testSerializer(element):
                 rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail))
     serializeElement(element, 0)
 
-    if finalText is not None:
-        rv.append("|%s\"%s\"" % (' ' * 2, finalText))
-
     return "\n".join(rv)
 
 
 def tostring(element):
     """Serialize an element and its child nodes to a string"""
     rv = []
-    finalText = None
 
     def serializeElement(element):
         if not hasattr(element, "tag"):
@@ -173,13 +179,10 @@ def tostring(element):
 
     serializeElement(element)
 
-    if finalText is not None:
-        rv.append("%s\"" % (' ' * 2, finalText))
-
     return "".join(rv)
 
 
-class TreeBuilder(_base.TreeBuilder):
+class TreeBuilder(base.TreeBuilder):
     documentClass = Document
     doctypeClass = DocumentType
     elementClass = None
@@ -189,27 +192,40 @@ class TreeBuilder(_base.TreeBuilder):
 
     def __init__(self, namespaceHTMLElements, fullTree=False):
         builder = etree_builders.getETreeModule(etree, fullTree=fullTree)
-        infosetFilter = self.infosetFilter = ihatexml.InfosetFilter()
+        infosetFilter = self.infosetFilter = _ihatexml.InfosetFilter(preventDoubleDashComments=True)
         self.namespaceHTMLElements = namespaceHTMLElements
 
-        class Attributes(dict):
-            def __init__(self, element, value={}):
+        class Attributes(MutableMapping):
+            def __init__(self, element):
                 self._element = element
-                dict.__init__(self, value)
-                for key, value in self.items():
-                    if isinstance(key, tuple):
-                        name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1]))
-                    else:
-                        name = infosetFilter.coerceAttribute(key)
-                    self._element._element.attrib[name] = value
 
-            def __setitem__(self, key, value):
-                dict.__setitem__(self, key, value)
+            def _coerceKey(self, key):
                 if isinstance(key, tuple):
                     name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1]))
                 else:
                     name = infosetFilter.coerceAttribute(key)
-                self._element._element.attrib[name] = value
+                return name
+
+            def __getitem__(self, key):
+                value = self._element._element.attrib[self._coerceKey(key)]
+                if not PY3 and isinstance(value, binary_type):
+                    value = value.decode("ascii")
+                return value
+
+            def __setitem__(self, key, value):
+                self._element._element.attrib[self._coerceKey(key)] = value
+
+            def __delitem__(self, key):
+                del self._element._element.attrib[self._coerceKey(key)]
+
+            def __iter__(self):
+                return iter(self._element._element.attrib)
+
+            def __len__(self):
+                return len(self._element._element.attrib)
+
+            def clear(self):
+                return self._element._element.attrib.clear()
 
         class Element(builder.Element):
             def __init__(self, name, namespace):
@@ -230,8 +246,10 @@ class TreeBuilder(_base.TreeBuilder):
             def _getAttributes(self):
                 return self._attributes
 
-            def _setAttributes(self, attributes):
-                self._attributes = Attributes(self, attributes)
+            def _setAttributes(self, value):
+                attributes = self.attributes
+                attributes.clear()
+                attributes.update(value)
 
             attributes = property(_getAttributes, _setAttributes)
 
@@ -239,8 +257,11 @@ class TreeBuilder(_base.TreeBuilder):
                 data = infosetFilter.coerceCharacters(data)
                 builder.Element.insertText(self, data, insertBefore)
 
-            def appendChild(self, child):
-                builder.Element.appendChild(self, child)
+            def cloneNode(self):
+                element = type(self)(self.name, self.namespace)
+                if self._element.attrib:
+                    element._element.attrib.update(self._element.attrib)
+                return element
 
         class Comment(builder.Comment):
             def __init__(self, data):
@@ -257,12 +278,12 @@ class TreeBuilder(_base.TreeBuilder):
             data = property(_getData, _setData)
 
         self.elementClass = Element
-        self.commentClass = builder.Comment
+        self.commentClass = Comment
         # self.fragmentClass = builder.DocumentFragment
-        _base.TreeBuilder.__init__(self, namespaceHTMLElements)
+        base.TreeBuilder.__init__(self, namespaceHTMLElements)
 
     def reset(self):
-        _base.TreeBuilder.reset(self)
+        base.TreeBuilder.reset(self)
         self.insertComment = self.insertCommentInitial
         self.initial_comments = []
         self.doctype = None
@@ -303,19 +324,20 @@ class TreeBuilder(_base.TreeBuilder):
             self.doctype = doctype
 
     def insertCommentInitial(self, data, parent=None):
+        assert parent is None or parent is self.document
+        assert self.document._elementTree is None
         self.initial_comments.append(data)
 
     def insertCommentMain(self, data, parent=None):
         if (parent == self.document and
                 self.document._elementTree.getroot()[-1].tag == comment_type):
-                warnings.warn("lxml cannot represent adjacent comments beyond the root elements", DataLossWarning)
+            warnings.warn("lxml cannot represent adjacent comments beyond the root elements", DataLossWarning)
         super(TreeBuilder, self).insertComment(data, parent)
 
     def insertRoot(self, token):
-        """Create the document root"""
         # Because of the way libxml2 works, it doesn't seem to be possible to
         # alter information like the doctype after the tree has been parsed.
-        # Therefore we need to use the built-in parser to create our iniial
+        # Therefore we need to use the built-in parser to create our initial
         # tree, after which we can add elements like normal
         docStr = ""
         if self.doctype:
@@ -344,7 +366,8 @@ class TreeBuilder(_base.TreeBuilder):
 
         # Append the initial comments:
         for comment_token in self.initial_comments:
-            root.addprevious(etree.Comment(comment_token["data"]))
+            comment = self.commentClass(comment_token["data"])
+            root.addprevious(comment._element)
 
         # Create the root document and add the ElementTree to it
         self.document = self.documentClass()
diff --git a/lib/html5lib/treewalkers/__init__.py b/lib/html5lib/treewalkers/__init__.py
index 18124e75..b2d3aac3 100644
--- a/lib/html5lib/treewalkers/__init__.py
+++ b/lib/html5lib/treewalkers/__init__.py
@@ -2,17 +2,18 @@
 tree, generating tokens identical to those produced by the tokenizer
 module.
 
-To create a tree walker for a new type of tree, you need to do
+To create a tree walker for a new type of tree, you need to
 implement a tree walker object (called TreeWalker by convention) that
-implements a 'serialize' method taking a tree as sole argument and
-returning an iterator generating tokens.
+implements a 'serialize' method which takes a tree as sole argument and
+returns an iterator which generates tokens.
 """
 
 from __future__ import absolute_import, division, unicode_literals
 
-import sys
+from .. import constants
+from .._utils import default_etree
 
-from ..utils import default_etree
+__all__ = ["getTreeWalker", "pprint"]
 
 treeWalkerCache = {}
 
@@ -20,34 +21,38 @@ treeWalkerCache = {}
 def getTreeWalker(treeType, implementation=None, **kwargs):
     """Get a TreeWalker class for various types of tree with built-in support
 
-    treeType - the name of the tree type required (case-insensitive). Supported
-               values are:
+    :arg str treeType: the name of the tree type required (case-insensitive).
+        Supported values are:
 
-                "dom" - The xml.dom.minidom DOM implementation
-                "pulldom" - The xml.dom.pulldom event stream
-                "etree" - A generic walker for tree implementations exposing an
-                          elementtree-like interface (known to work with
-                          ElementTree, cElementTree and lxml.etree).
-                "lxml" - Optimized walker for lxml.etree
-                "genshi" - a Genshi stream
+        * "dom": The xml.dom.minidom DOM implementation
+        * "etree": A generic walker for tree implementations exposing an
+          elementtree-like interface (known to work with ElementTree,
+          cElementTree and lxml.etree).
+        * "lxml": Optimized walker for lxml.etree
+        * "genshi": a Genshi stream
 
-    implementation - (Currently applies to the "etree" tree type only). A module
-                      implementing the tree type e.g. xml.etree.ElementTree or
-                      cElementTree."""
+    :arg implementation: A module implementing the tree type e.g.
+        xml.etree.ElementTree or cElementTree (Currently applies to the "etree"
+        tree type only).
+
+    :arg kwargs: keyword arguments passed to the etree walker--for other
+        walkers, this has no effect
+
+    :returns: a TreeWalker class
+
+    """
 
     treeType = treeType.lower()
     if treeType not in treeWalkerCache:
-        if treeType in ("dom", "pulldom"):
-            name = "%s.%s" % (__name__, treeType)
-            __import__(name)
-            mod = sys.modules[name]
-            treeWalkerCache[treeType] = mod.TreeWalker
+        if treeType == "dom":
+            from . import dom
+            treeWalkerCache[treeType] = dom.TreeWalker
         elif treeType == "genshi":
-            from . import genshistream
-            treeWalkerCache[treeType] = genshistream.TreeWalker
+            from . import genshi
+            treeWalkerCache[treeType] = genshi.TreeWalker
         elif treeType == "lxml":
-            from . import lxmletree
-            treeWalkerCache[treeType] = lxmletree.TreeWalker
+            from . import etree_lxml
+            treeWalkerCache[treeType] = etree_lxml.TreeWalker
         elif treeType == "etree":
             from . import etree
             if implementation is None:
@@ -55,3 +60,95 @@ def getTreeWalker(treeType, implementation=None, **kwargs):
             # XXX: NEVER cache here, caching is done in the etree submodule
             return etree.getETreeModule(implementation, **kwargs).TreeWalker
     return treeWalkerCache.get(treeType)
+
+
+def concatenateCharacterTokens(tokens):
+    pendingCharacters = []
+    for token in tokens:
+        type = token["type"]
+        if type in ("Characters", "SpaceCharacters"):
+            pendingCharacters.append(token["data"])
+        else:
+            if pendingCharacters:
+                yield {"type": "Characters", "data": "".join(pendingCharacters)}
+                pendingCharacters = []
+            yield token
+    if pendingCharacters:
+        yield {"type": "Characters", "data": "".join(pendingCharacters)}
+
+
+def pprint(walker):
+    """Pretty printer for tree walkers
+
+    Takes a TreeWalker instance and pretty prints the output of walking the tree.
+
+    :arg walker: a TreeWalker instance
+
+    """
+    output = []
+    indent = 0
+    for token in concatenateCharacterTokens(walker):
+        type = token["type"]
+        if type in ("StartTag", "EmptyTag"):
+            # tag name
+            if token["namespace"] and token["namespace"] != constants.namespaces["html"]:
+                if token["namespace"] in constants.prefixes:
+                    ns = constants.prefixes[token["namespace"]]
+                else:
+                    ns = token["namespace"]
+                name = "%s %s" % (ns, token["name"])
+            else:
+                name = token["name"]
+            output.append("%s<%s>" % (" " * indent, name))
+            indent += 2
+            # attributes (sorted for consistent ordering)
+            attrs = token["data"]
+            for (namespace, localname), value in sorted(attrs.items()):
+                if namespace:
+                    if namespace in constants.prefixes:
+                        ns = constants.prefixes[namespace]
+                    else:
+                        ns = namespace
+                    name = "%s %s" % (ns, localname)
+                else:
+                    name = localname
+                output.append("%s%s=\"%s\"" % (" " * indent, name, value))
+            # self-closing
+            if type == "EmptyTag":
+                indent -= 2
+
+        elif type == "EndTag":
+            indent -= 2
+
+        elif type == "Comment":
+            output.append("%s<!-- %s -->" % (" " * indent, token["data"]))
+
+        elif type == "Doctype":
+            if token["name"]:
+                if token["publicId"]:
+                    output.append("""%s<!DOCTYPE %s "%s" "%s">""" %
+                                  (" " * indent,
+                                   token["name"],
+                                   token["publicId"],
+                                   token["systemId"] if token["systemId"] else ""))
+                elif token["systemId"]:
+                    output.append("""%s<!DOCTYPE %s "" "%s">""" %
+                                  (" " * indent,
+                                   token["name"],
+                                   token["systemId"]))
+                else:
+                    output.append("%s<!DOCTYPE %s>" % (" " * indent,
+                                                       token["name"]))
+            else:
+                output.append("%s<!DOCTYPE >" % (" " * indent,))
+
+        elif type == "Characters":
+            output.append("%s\"%s\"" % (" " * indent, token["data"]))
+
+        elif type == "SpaceCharacters":
+            assert False, "concatenateCharacterTokens should have got rid of all Space tokens"
+
+        else:
+            raise ValueError("Unknown token type, %s" % type)
+
+    return "\n".join(output)
diff --git a/lib/html5lib/treewalkers/_base.py b/lib/html5lib/treewalkers/base.py
similarity index 52%
rename from lib/html5lib/treewalkers/_base.py
rename to lib/html5lib/treewalkers/base.py
index 34252e50..80c474c4 100644
--- a/lib/html5lib/treewalkers/_base.py
+++ b/lib/html5lib/treewalkers/base.py
@@ -1,10 +1,10 @@
 from __future__ import absolute_import, division, unicode_literals
-from six import text_type, string_types
-
-import gettext
-_ = gettext.gettext
 
 from xml.dom import Node
+from ..constants import namespaces, voidElements, spaceCharacters
+
+__all__ = ["DOCUMENT", "DOCTYPE", "TEXT", "ELEMENT", "COMMENT", "ENTITY", "UNKNOWN",
+           "TreeWalker", "NonRecursiveTreeWalker"]
 
 DOCUMENT = Node.DOCUMENT_NODE
 DOCTYPE = Node.DOCUMENT_TYPE_NODE
@@ -14,80 +14,115 @@ COMMENT = Node.COMMENT_NODE
 ENTITY = Node.ENTITY_NODE
 UNKNOWN = "<#UNKNOWN#>"
 
-from ..constants import voidElements, spaceCharacters
 spaceCharacters = "".join(spaceCharacters)
 
 
-def to_text(s, blank_if_none=True):
-    """Wrapper around six.text_type to convert None to empty string"""
-    if s is None:
-        if blank_if_none:
-            return ""
-        else:
-            return None
-    elif isinstance(s, text_type):
-        return s
-    else:
-        return text_type(s)
-
-
-def is_text_or_none(string):
-    """Wrapper around isinstance(string_types) or is None"""
-    return string is None or isinstance(string, string_types)
-
-
 class TreeWalker(object):
+    """Walks a tree yielding tokens
+
+    Tokens are dicts that all have a ``type`` field specifying the type of the
+    token.
+
+    """
     def __init__(self, tree):
+        """Creates a TreeWalker
+
+        :arg tree: the tree to walk
+
+        """
         self.tree = tree
 
     def __iter__(self):
         raise NotImplementedError
 
     def error(self, msg):
+        """Generates an error token with the given message
+
+        :arg msg: the error message
+
+        :returns: SerializeError token
+
+        """
         return {"type": "SerializeError", "data": msg}
 
     def emptyTag(self, namespace, name, attrs, hasChildren=False):
-        assert namespace is None or isinstance(namespace, string_types), type(namespace)
-        assert isinstance(name, string_types), type(name)
-        assert all((namespace is None or isinstance(namespace, string_types)) and
-                   isinstance(name, string_types) and
-                   isinstance(value, string_types)
-                   for (namespace, name), value in attrs.items())
+        """Generates an EmptyTag token
 
-        yield {"type": "EmptyTag", "name": to_text(name, False),
-               "namespace": to_text(namespace),
+        :arg namespace: the namespace of the token--can be ``None``
+
+        :arg name: the name of the element
+
+        :arg attrs: the attributes of the element as a dict
+
+        :arg hasChildren: whether or not to yield a SerializationError because
+            this tag shouldn't have children
+
+        :returns: EmptyTag token
+
+        """
+        yield {"type": "EmptyTag", "name": name,
+               "namespace": namespace,
                "data": attrs}
         if hasChildren:
-            yield self.error(_("Void element has children"))
+            yield self.error("Void element has children")
 
     def startTag(self, namespace, name, attrs):
-        assert namespace is None or isinstance(namespace, string_types), type(namespace)
-        assert isinstance(name, string_types), type(name)
-        assert all((namespace is None or isinstance(namespace, string_types)) and
-                   isinstance(name, string_types) and
-                   isinstance(value, string_types)
-                   for (namespace, name), value in attrs.items())
+        """Generates a StartTag token
 
+        :arg namespace: the namespace of the token--can be ``None``
+
+        :arg name: the name of the element
+
+        :arg attrs: the attributes of the element as a dict
+
+        :returns: StartTag token
+
+        """
         return {"type": "StartTag",
-                "name": text_type(name),
-                "namespace": to_text(namespace),
-                "data": dict(((to_text(namespace, False), to_text(name)),
-                              to_text(value, False))
-                             for (namespace, name), value in attrs.items())}
+                "name": name,
+                "namespace": namespace,
+                "data": attrs}
 
     def endTag(self, namespace, name):
-        assert namespace is None or isinstance(namespace, string_types), type(namespace)
-        assert isinstance(name, string_types), type(namespace)
+        """Generates an EndTag token
 
+        :arg namespace: the namespace of the token--can be ``None``
+
+        :arg name: the name of the element
+
+        :returns: EndTag token
+
+        """
         return {"type": "EndTag",
-                "name": to_text(name, False),
-                "namespace": to_text(namespace),
-                "data": {}}
+                "name": name,
+                "namespace": namespace}
 
     def text(self, data):
-        assert isinstance(data, string_types), type(data)
+        """Generates SpaceCharacters and Characters tokens
 
-        data = to_text(data)
+        Depending on what's in the data, this generates one or more
+        ``SpaceCharacters`` and ``Characters`` tokens.
+
+        For example:
+
+            >>> from html5lib.treewalkers.base import TreeWalker
+            >>> # Give it an empty tree just so it instantiates
+            >>> walker = TreeWalker([])
+            >>> list(walker.text(''))
+            []
+            >>> list(walker.text('  '))
+            [{u'data': '  ', u'type': u'SpaceCharacters'}]
+            >>> list(walker.text(' abc '))  # doctest: +NORMALIZE_WHITESPACE
+            [{u'data': ' ', u'type': u'SpaceCharacters'},
+            {u'data': u'abc', u'type': u'Characters'},
+            {u'data': u' ', u'type': u'SpaceCharacters'}]
+
+        :arg data: the text data
+
+        :returns: one or more ``SpaceCharacters`` and ``Characters`` tokens
+
+        """
+        data = data
         middle = data.lstrip(spaceCharacters)
         left = data[:len(data) - len(middle)]
         if left:
@@ -101,28 +136,45 @@ class TreeWalker(object):
             yield {"type": "SpaceCharacters", "data": right}
 
     def comment(self, data):
-        assert isinstance(data, string_types), type(data)
+        """Generates a Comment token
 
-        return {"type": "Comment", "data": text_type(data)}
+        :arg data: the comment
 
-    def doctype(self, name, publicId=None, systemId=None, correct=True):
-        assert is_text_or_none(name), type(name)
-        assert is_text_or_none(publicId), type(publicId)
-        assert is_text_or_none(systemId), type(systemId)
+        :returns: Comment token
 
+        """
+        return {"type": "Comment", "data": data}
+
+    def doctype(self, name, publicId=None, systemId=None):
+        """Generates a Doctype token
+
+        :arg name:
+
+        :arg publicId:
+
+        :arg systemId:
+
+        :returns: the Doctype token
+
+        """
         return {"type": "Doctype",
-                "name": to_text(name),
-                "publicId": to_text(publicId),
-                "systemId": to_text(systemId),
-                "correct": to_text(correct)}
+                "name": name,
+                "publicId": publicId,
+                "systemId": systemId}
 
     def entity(self, name):
-        assert isinstance(name, string_types), type(name)
+        """Generates an Entity token
 
-        return {"type": "Entity", "name": text_type(name)}
+        :arg name: the entity name
+
+        :returns: an Entity token
+
+        """
+        return {"type": "Entity", "name": name}
 
     def unknown(self, nodeType):
-        return self.error(_("Unknown node type: ") + nodeType)
+        """Handles unknown node types"""
+        return self.error("Unknown node type: " + nodeType)
 
 
 class NonRecursiveTreeWalker(TreeWalker):
@@ -154,7 +206,7 @@ class NonRecursiveTreeWalker(TreeWalker):
 
             elif type == ELEMENT:
                 namespace, name, attributes, hasChildren = details
-                if name in voidElements:
+                if (not namespace or namespace == namespaces["html"]) and name in voidElements:
                     for token in self.emptyTag(namespace, name, attributes,
                                                hasChildren):
                         yield token
@@ -187,7 +239,7 @@ class NonRecursiveTreeWalker(TreeWalker):
                     type, details = details[0], details[1:]
                     if type == ELEMENT:
                         namespace, name, attributes, hasChildren = details
-                        if name not in voidElements:
+                        if (namespace and namespace != namespaces["html"]) or name not in voidElements:
                             yield self.endTag(namespace, name)
                     if self.tree is currentNode:
                         currentNode = None
diff --git a/lib/html5lib/treewalkers/dom.py b/lib/html5lib/treewalkers/dom.py
index a01287a9..b0c89b00 100644
--- a/lib/html5lib/treewalkers/dom.py
+++ b/lib/html5lib/treewalkers/dom.py
@@ -2,19 +2,16 @@ from __future__ import absolute_import, division, unicode_literals
 
 from xml.dom import Node
 
-import gettext
-_ = gettext.gettext
-
-from . import _base
+from . import base
 
 
-class TreeWalker(_base.NonRecursiveTreeWalker):
+class TreeWalker(base.NonRecursiveTreeWalker):
     def getNodeDetails(self, node):
         if node.nodeType == Node.DOCUMENT_TYPE_NODE:
-            return _base.DOCTYPE, node.name, node.publicId, node.systemId
+            return base.DOCTYPE, node.name, node.publicId, node.systemId
 
         elif node.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
-            return _base.TEXT, node.nodeValue
+            return base.TEXT, node.nodeValue
 
         elif node.nodeType == Node.ELEMENT_NODE:
             attrs = {}
@@ -24,17 +21,17 @@ class TreeWalker(_base.NonRecursiveTreeWalker):
                     attrs[(attr.namespaceURI, attr.localName)] = attr.value
                 else:
                     attrs[(None, attr.name)] = attr.value
-            return (_base.ELEMENT, node.namespaceURI, node.nodeName,
+            return (base.ELEMENT, node.namespaceURI, node.nodeName,
                     attrs, node.hasChildNodes())
 
         elif node.nodeType == Node.COMMENT_NODE:
-            return _base.COMMENT, node.nodeValue
+            return base.COMMENT, node.nodeValue
 
         elif node.nodeType in (Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE):
-            return (_base.DOCUMENT,)
+            return (base.DOCUMENT,)
 
         else:
-            return _base.UNKNOWN, node.nodeType
+            return base.UNKNOWN, node.nodeType
 
     def getFirstChild(self, node):
         return node.firstChild
diff --git a/lib/html5lib/treewalkers/etree.py b/lib/html5lib/treewalkers/etree.py
index fd8a9cc9..44653372 100644
--- a/lib/html5lib/treewalkers/etree.py
+++ b/lib/html5lib/treewalkers/etree.py
@@ -1,21 +1,12 @@
 from __future__ import absolute_import, division, unicode_literals
 
-try:
-    from collections import OrderedDict
-except ImportError:
-    try:
-        from ordereddict import OrderedDict
-    except ImportError:
-        OrderedDict = dict
-import gettext
-_ = gettext.gettext
-
+from collections import OrderedDict
 import re
 
-from six import text_type
+from six import string_types
 
-from . import _base
-from ..utils import moduleFactoryFactory
+from . import base
+from .._utils import moduleFactoryFactory
 
 tag_regexp = re.compile("{([^}]*)}(.*)")
 
@@ -24,7 +15,7 @@ def getETreeBuilder(ElementTreeImplementation):
     ElementTree = ElementTreeImplementation
     ElementTreeCommentType = ElementTree.Comment("asd").tag
 
-    class TreeWalker(_base.NonRecursiveTreeWalker):
+    class TreeWalker(base.NonRecursiveTreeWalker):  # pylint:disable=unused-variable
         """Given the particular ElementTree representation, this implementation,
         to avoid using recursion, returns "nodes" as tuples with the following
         content:
@@ -40,9 +31,9 @@ def getETreeBuilder(ElementTreeImplementation):
         """
         def getNodeDetails(self, node):
             if isinstance(node, tuple):  # It might be the root Element
-                elt, key, parents, flag = node
+                elt, _, _, flag = node
                 if flag in ("text", "tail"):
-                    return _base.TEXT, getattr(elt, flag)
+                    return base.TEXT, getattr(elt, flag)
                 else:
                     node = elt
 
@@ -50,17 +41,17 @@ def getETreeBuilder(ElementTreeImplementation):
                 node = node.getroot()
 
             if node.tag in ("DOCUMENT_ROOT", "DOCUMENT_FRAGMENT"):
-                return (_base.DOCUMENT,)
+                return (base.DOCUMENT,)
 
             elif node.tag == "<!DOCTYPE>":
-                return (_base.DOCTYPE, node.text,
+                return (base.DOCTYPE, node.text,
                         node.get("publicId"), node.get("systemId"))
 
             elif node.tag == ElementTreeCommentType:
-                return _base.COMMENT, node.text
+                return base.COMMENT, node.text
 
             else:
-                assert type(node.tag) == text_type, type(node.tag)
+                assert isinstance(node.tag, string_types), type(node.tag)
                 # This is assumed to be an ordinary element
                 match = tag_regexp.match(node.tag)
                 if match:
@@ -75,7 +66,7 @@ def getETreeBuilder(ElementTreeImplementation):
                         attrs[(match.group(1), match.group(2))] = value
                     else:
                         attrs[(None, name)] = value
-                return (_base.ELEMENT, namespace, tag,
+                return (base.ELEMENT, namespace, tag,
                         attrs, len(node) or node.text)
 
         def getFirstChild(self, node):
@@ -131,8 +122,10 @@ def getETreeBuilder(ElementTreeImplementation):
                 if not parents:
                     return parent
                 else:
+                    assert list(parents[-1]).count(parent) == 1
                     return parent, list(parents[-1]).index(parent), parents, None
 
     return locals()
 
+
 getETreeModule = moduleFactoryFactory(getETreeBuilder)
diff --git a/lib/html5lib/treewalkers/lxmletree.py b/lib/html5lib/treewalkers/etree_lxml.py
similarity index 71%
rename from lib/html5lib/treewalkers/lxmletree.py
rename to lib/html5lib/treewalkers/etree_lxml.py
index bc934ac0..a614ac5b 100644
--- a/lib/html5lib/treewalkers/lxmletree.py
+++ b/lib/html5lib/treewalkers/etree_lxml.py
@@ -1,15 +1,14 @@
 from __future__ import absolute_import, division, unicode_literals
 from six import text_type
 
+from collections import OrderedDict
+
 from lxml import etree
 from ..treebuilders.etree import tag_regexp
 
-from gettext import gettext
-_ = gettext
+from . import base
 
-from . import _base
-
-from .. import ihatexml
+from .. import _ihatexml
 
 
 def ensure_str(s):
@@ -18,20 +17,27 @@ def ensure_str(s):
     elif isinstance(s, text_type):
         return s
     else:
-        return s.decode("utf-8", "strict")
+        return s.decode("ascii", "strict")
 
 
 class Root(object):
     def __init__(self, et):
         self.elementtree = et
         self.children = []
-        if et.docinfo.internalDTD:
-            self.children.append(Doctype(self,
-                                         ensure_str(et.docinfo.root_name),
-                                         ensure_str(et.docinfo.public_id),
-                                         ensure_str(et.docinfo.system_url)))
-        root = et.getroot()
-        node = root
+
+        try:
+            if et.docinfo.internalDTD:
+                self.children.append(Doctype(self,
+                                             ensure_str(et.docinfo.root_name),
+                                             ensure_str(et.docinfo.public_id),
+                                             ensure_str(et.docinfo.system_url)))
+        except AttributeError:
+            pass
+
+        try:
+            node = et.getroot()
+        except AttributeError:
+            node = et
 
         while node.getprevious() is not None:
             node = node.getprevious()
@@ -118,35 +124,38 @@ class FragmentWrapper(object):
         return len(self.obj)
 
 
-class TreeWalker(_base.NonRecursiveTreeWalker):
+class TreeWalker(base.NonRecursiveTreeWalker):
     def __init__(self, tree):
-        if hasattr(tree, "getroot"):
-            tree = Root(tree)
-        elif isinstance(tree, list):
+        # pylint:disable=redefined-variable-type
+        if isinstance(tree, list):
+            self.fragmentChildren = set(tree)
             tree = FragmentRoot(tree)
-        _base.NonRecursiveTreeWalker.__init__(self, tree)
-        self.filter = ihatexml.InfosetFilter()
+        else:
+            self.fragmentChildren = set()
+            tree = Root(tree)
+        base.NonRecursiveTreeWalker.__init__(self, tree)
+        self.filter = _ihatexml.InfosetFilter()
 
     def getNodeDetails(self, node):
         if isinstance(node, tuple):  # Text node
             node, key = node
-            assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key
-            return _base.TEXT, ensure_str(getattr(node, key))
+            assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key
+            return base.TEXT, ensure_str(getattr(node, key))
 
         elif isinstance(node, Root):
-            return (_base.DOCUMENT,)
+            return (base.DOCUMENT,)
 
         elif isinstance(node, Doctype):
-            return _base.DOCTYPE, node.name, node.public_id, node.system_id
+            return base.DOCTYPE, node.name, node.public_id, node.system_id
 
         elif isinstance(node, FragmentWrapper) and not hasattr(node, "tag"):
-            return _base.TEXT, node.obj
+            return base.TEXT, ensure_str(node.obj)
 
         elif node.tag == etree.Comment:
-            return _base.COMMENT, ensure_str(node.text)
+            return base.COMMENT, ensure_str(node.text)
 
         elif node.tag == etree.Entity:
-            return _base.ENTITY, ensure_str(node.text)[1:-1]  # strip &;
+            return base.ENTITY, ensure_str(node.text)[1:-1]  # strip &;
 
         else:
             # This is assumed to be an ordinary element
@@ -156,7 +165,7 @@ class TreeWalker(_base.NonRecursiveTreeWalker):
             else:
                 namespace = None
                 tag = ensure_str(node.tag)
-            attrs = {}
+            attrs = OrderedDict()
             for name, value in list(node.attrib.items()):
                 name = ensure_str(name)
                 value = ensure_str(value)
@@ -165,11 +174,11 @@ class TreeWalker(_base.NonRecursiveTreeWalker):
                     attrs[(match.group(1), match.group(2))] = value
                 else:
                     attrs[(None, name)] = value
-            return (_base.ELEMENT, namespace, self.filter.fromXmlName(tag),
+            return (base.ELEMENT, namespace, self.filter.fromXmlName(tag),
                     attrs, len(node) > 0 or node.text)
 
     def getFirstChild(self, node):
-        assert not isinstance(node, tuple), _("Text nodes have no children")
+        assert not isinstance(node, tuple), "Text nodes have no children"
 
         assert len(node) or node.text, "Node has no children"
         if node.text:
@@ -180,7 +189,7 @@ class TreeWalker(_base.NonRecursiveTreeWalker):
     def getNextSibling(self, node):
         if isinstance(node, tuple):  # Text node
             node, key = node
-            assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key
+            assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key
             if key == "text":
                 # XXX: we cannot use a "bool(node) and node[0] or None" construct here
                 # because node[0] might evaluate to False if it has no child element
@@ -196,9 +205,11 @@ class TreeWalker(_base.NonRecursiveTreeWalker):
     def getParentNode(self, node):
         if isinstance(node, tuple):  # Text node
             node, key = node
-            assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key
+            assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key
             if key == "text":
                 return node
             # else: fallback to "normal" processing
+        elif node in self.fragmentChildren:
+            return None
 
         return node.getparent()
diff --git a/lib/html5lib/treewalkers/genshistream.py b/lib/html5lib/treewalkers/genshi.py
similarity index 90%
rename from lib/html5lib/treewalkers/genshistream.py
rename to lib/html5lib/treewalkers/genshi.py
index f559c45d..7483be27 100644
--- a/lib/html5lib/treewalkers/genshistream.py
+++ b/lib/html5lib/treewalkers/genshi.py
@@ -4,12 +4,12 @@ from genshi.core import QName
 from genshi.core import START, END, XML_NAMESPACE, DOCTYPE, TEXT
 from genshi.core import START_NS, END_NS, START_CDATA, END_CDATA, PI, COMMENT
 
-from . import _base
+from . import base
 
 from ..constants import voidElements, namespaces
 
 
-class TreeWalker(_base.TreeWalker):
+class TreeWalker(base.TreeWalker):
     def __iter__(self):
         # Buffer the events so we can pass in the following one
         previous = None
@@ -25,7 +25,7 @@ class TreeWalker(_base.TreeWalker):
                 yield token
 
     def tokens(self, event, next):
-        kind, data, pos = event
+        kind, data, _ = event
         if kind == START:
             tag, attribs = data
             name = tag.localname
@@ -39,8 +39,8 @@ class TreeWalker(_base.TreeWalker):
 
             if namespace == namespaces["html"] and name in voidElements:
                 for token in self.emptyTag(namespace, name, converted_attribs,
-                                           not next or next[0] != END
-                                           or next[1] != tag):
+                                           not next or next[0] != END or
+                                           next[1] != tag):
                     yield token
             else:
                 yield self.startTag(namespace, name, converted_attribs)
@@ -48,7 +48,7 @@ class TreeWalker(_base.TreeWalker):
         elif kind == END:
             name = data.localname
             namespace = data.namespace
-            if name not in voidElements:
+            if namespace != namespaces["html"] or name not in voidElements:
                 yield self.endTag(namespace, name)
 
         elif kind == COMMENT:
diff --git a/lib/html5lib/treewalkers/pulldom.py b/lib/html5lib/treewalkers/pulldom.py
deleted file mode 100644
index 0b0f515f..00000000
--- a/lib/html5lib/treewalkers/pulldom.py
+++ /dev/null
@@ -1,63 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from xml.dom.pulldom import START_ELEMENT, END_ELEMENT, \
-    COMMENT, IGNORABLE_WHITESPACE, CHARACTERS
-
-from . import _base
-
-from ..constants import voidElements
-
-
-class TreeWalker(_base.TreeWalker):
-    def __iter__(self):
-        ignore_until = None
-        previous = None
-        for event in self.tree:
-            if previous is not None and \
-                    (ignore_until is None or previous[1] is ignore_until):
-                if previous[1] is ignore_until:
-                    ignore_until = None
-                for token in self.tokens(previous, event):
-                    yield token
-                    if token["type"] == "EmptyTag":
-                        ignore_until = previous[1]
-            previous = event
-        if ignore_until is None or previous[1] is ignore_until:
-            for token in self.tokens(previous, None):
-                yield token
-        elif ignore_until is not None:
-            raise ValueError("Illformed DOM event stream: void element without END_ELEMENT")
-
-    def tokens(self, event, next):
-        type, node = event
-        if type == START_ELEMENT:
-            name = node.nodeName
-            namespace = node.namespaceURI
-            attrs = {}
-            for attr in list(node.attributes.keys()):
-                attr = node.getAttributeNode(attr)
-                attrs[(attr.namespaceURI, attr.localName)] = attr.value
-            if name in voidElements:
-                for token in self.emptyTag(namespace,
-                                           name,
-                                           attrs,
-                                           not next or next[1] is not node):
-                    yield token
-            else:
-                yield self.startTag(namespace, name, attrs)
-
-        elif type == END_ELEMENT:
-            name = node.nodeName
-            namespace = node.namespaceURI
-            if name not in voidElements:
-                yield self.endTag(namespace, name)
-
-        elif type == COMMENT:
-            yield self.comment(node.nodeValue)
-
-        elif type in (IGNORABLE_WHITESPACE, CHARACTERS):
-            for token in self.text(node.nodeValue):
-                yield token
-
-        else:
-            yield self.unknown(type)
diff --git a/lib/html5lib/trie/__init__.py b/lib/html5lib/trie/__init__.py
deleted file mode 100644
index a8cca8a9..00000000
--- a/lib/html5lib/trie/__init__.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from .py import Trie as PyTrie
-
-Trie = PyTrie
-
-try:
-    from .datrie import Trie as DATrie
-except ImportError:
-    pass
-else:
-    Trie = DATrie
diff --git a/lib/html5lib/trie/datrie.py b/lib/html5lib/trie/datrie.py
deleted file mode 100644
index 51f3d046..00000000
--- a/lib/html5lib/trie/datrie.py
+++ /dev/null
@@ -1,44 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from datrie import Trie as DATrie
-from six import text_type
-
-from ._base import Trie as ABCTrie
-
-
-class Trie(ABCTrie):
-    def __init__(self, data):
-        chars = set()
-        for key in data.keys():
-            if not isinstance(key, text_type):
-                raise TypeError("All keys must be strings")
-            for char in key:
-                chars.add(char)
-
-        self._data = DATrie("".join(chars))
-        for key, value in data.items():
-            self._data[key] = value
-
-    def __contains__(self, key):
-        return key in self._data
-
-    def __len__(self):
-        return len(self._data)
-
-    def __iter__(self):
-        raise NotImplementedError()
-
-    def __getitem__(self, key):
-        return self._data[key]
-
-    def keys(self, prefix=None):
-        return self._data.keys(prefix)
-
-    def has_keys_with_prefix(self, prefix):
-        return self._data.has_keys_with_prefix(prefix)
-
-    def longest_prefix(self, prefix):
-        return self._data.longest_prefix(prefix)
-
-    def longest_prefix_item(self, prefix):
-        return self._data.longest_prefix_item(prefix)
diff --git a/lib/html5lib/utils.py b/lib/html5lib/utils.py
deleted file mode 100644
index 2f41f4df..00000000
--- a/lib/html5lib/utils.py
+++ /dev/null
@@ -1,82 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from types import ModuleType
-
-try:
-    import xml.etree.cElementTree as default_etree
-except ImportError:
-    import xml.etree.ElementTree as default_etree
-
-
-__all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair",
-           "surrogatePairToCodepoint", "moduleFactoryFactory"]
-
-
-class MethodDispatcher(dict):
-    """Dict with 2 special properties:
-
-    On initiation, keys that are lists, sets or tuples are converted to
-    multiple keys so accessing any one of the items in the original
-    list-like object returns the matching value
-
-    md = MethodDispatcher({("foo", "bar"):"baz"})
-    md["foo"] == "baz"
-
-    A default value which can be set through the default attribute.
-    """
-
-    def __init__(self, items=()):
-        # Using _dictEntries instead of directly assigning to self is about
-        # twice as fast. Please do careful performance testing before changing
-        # anything here.
-        _dictEntries = []
-        for name, value in items:
-            if type(name) in (list, tuple, frozenset, set):
-                for item in name:
-                    _dictEntries.append((item, value))
-            else:
-                _dictEntries.append((name, value))
-        dict.__init__(self, _dictEntries)
-        self.default = None
-
-    def __getitem__(self, key):
-        return dict.get(self, key, self.default)
-
-
-# Some utility functions to dal with weirdness around UCS2 vs UCS4
-# python builds
-
-def isSurrogatePair(data):
-    return (len(data) == 2 and
-            ord(data[0]) >= 0xD800 and ord(data[0]) <= 0xDBFF and
-            ord(data[1]) >= 0xDC00 and ord(data[1]) <= 0xDFFF)
-
-
-def surrogatePairToCodepoint(data):
-    char_val = (0x10000 + (ord(data[0]) - 0xD800) * 0x400 +
-                (ord(data[1]) - 0xDC00))
-    return char_val
-
-# Module Factory Factory (no, this isn't Java, I know)
-# Here to stop this being duplicated all over the place.
-
-
-def moduleFactoryFactory(factory):
-    moduleCache = {}
-
-    def moduleFactory(baseModule, *args, **kwargs):
-        if isinstance(ModuleType.__name__, type("")):
-            name = "_%s_factory" % baseModule.__name__
-        else:
-            name = b"_%s_factory" % baseModule.__name__
-
-        if name in moduleCache:
-            return moduleCache[name]
-        else:
-            mod = ModuleType(name)
-            objs = factory(baseModule, *args, **kwargs)
-            mod.__dict__.update(objs)
-            moduleCache[name] = mod
-            return mod
-
-    return moduleFactory