Remove unnecessary lib files

2025-07-05 20:51:15 -07:00 · 2021-10-15 01:51:46 -07:00 · 2021-10-15 01:51:46 -07:00 · 53369cd8a6
commit 53369cd8a6
parent afbfebbe59
135 changed files with 0 additions and 107828 deletions
--- a/lib/bs4/tests/init.py
+++ b/lib/bs4/tests/init.py
@ -1 +0,0 @@
 "The beautifulsoup tests."
--- a/lib/bs4/tests/test_builder_registry.py
+++ b/lib/bs4/tests/test_builder_registry.py
@ -1,147 +0,0 @@
 """Tests of the builder registry."""
 import unittest
 import warnings
 from bs4 import BeautifulSoup
 from bs4.builder import (
    builder_registry as registry,
    HTMLParserTreeBuilder,
    TreeBuilderRegistry,
 )
 try:
    from bs4.builder import HTML5TreeBuilder
    HTML5LIB_PRESENT = True
 except ImportError:
    HTML5LIB_PRESENT = False
 try:
    from bs4.builder import (
        LXMLTreeBuilderForXML,
        LXMLTreeBuilder,
        )
    LXML_PRESENT = True
 except ImportError:
    LXML_PRESENT = False
 class BuiltInRegistryTest(unittest.TestCase):
    """Test the built-in registry with the default builders registered."""
    def test_combination(self):
        if LXML_PRESENT:
            self.assertEqual(registry.lookup('fast', 'html'),
                             LXMLTreeBuilder)
        if LXML_PRESENT:
            self.assertEqual(registry.lookup('permissive', 'xml'),
                             LXMLTreeBuilderForXML)
        self.assertEqual(registry.lookup('strict', 'html'),
                          HTMLParserTreeBuilder)
        if HTML5LIB_PRESENT:
            self.assertEqual(registry.lookup('html5lib', 'html'),
                              HTML5TreeBuilder)
    def test_lookup_by_markup_type(self):
        if LXML_PRESENT:
            self.assertEqual(registry.lookup('html'), LXMLTreeBuilder)
            self.assertEqual(registry.lookup('xml'), LXMLTreeBuilderForXML)
        else:
            self.assertEqual(registry.lookup('xml'), None)
            if HTML5LIB_PRESENT:
                self.assertEqual(registry.lookup('html'), HTML5TreeBuilder)
            else:
                self.assertEqual(registry.lookup('html'), HTMLParserTreeBuilder)
    def test_named_library(self):
        if LXML_PRESENT:
            self.assertEqual(registry.lookup('lxml', 'xml'),
                             LXMLTreeBuilderForXML)
            self.assertEqual(registry.lookup('lxml', 'html'),
                             LXMLTreeBuilder)
        if HTML5LIB_PRESENT:
            self.assertEqual(registry.lookup('html5lib'),
                              HTML5TreeBuilder)
        self.assertEqual(registry.lookup('html.parser'),
                          HTMLParserTreeBuilder)
    def test_beautifulsoup_constructor_does_lookup(self):
        with warnings.catch_warnings(record=True) as w:
            # This will create a warning about not explicitly
            # specifying a parser, but we'll ignore it.
            # You can pass in a string.
            BeautifulSoup("", features="html")
            # Or a list of strings.
            BeautifulSoup("", features=["html", "fast"])
        # You'll get an exception if BS can't find an appropriate
        # builder.
        self.assertRaises(ValueError, BeautifulSoup,
                          "", features="no-such-feature")
 class RegistryTest(unittest.TestCase):
    """Test the TreeBuilderRegistry class in general."""
    def setUp(self):
        self.registry = TreeBuilderRegistry()
    def builder_for_features(self, *feature_list):
        cls = type('Builder_' + '_'.join(feature_list),
                   (object,), {'features' : feature_list})
        self.registry.register(cls)
        return cls
    def test_register_with_no_features(self):
        builder = self.builder_for_features()
        # Since the builder advertises no features, you can't find it
        # by looking up features.
        self.assertEqual(self.registry.lookup('foo'), None)
        # But you can find it by doing a lookup with no features, if
        # this happens to be the only registered builder.
        self.assertEqual(self.registry.lookup(), builder)
    def test_register_with_features_makes_lookup_succeed(self):
        builder = self.builder_for_features('foo', 'bar')
        self.assertEqual(self.registry.lookup('foo'), builder)
        self.assertEqual(self.registry.lookup('bar'), builder)
    def test_lookup_fails_when_no_builder_implements_feature(self):
        builder = self.builder_for_features('foo', 'bar')
        self.assertEqual(self.registry.lookup('baz'), None)
    def test_lookup_gets_most_recent_registration_when_no_feature_specified(self):
        builder1 = self.builder_for_features('foo')
        builder2 = self.builder_for_features('bar')
        self.assertEqual(self.registry.lookup(), builder2)
    def test_lookup_fails_when_no_tree_builders_registered(self):
        self.assertEqual(self.registry.lookup(), None)
    def test_lookup_gets_most_recent_builder_supporting_all_features(self):
        has_one = self.builder_for_features('foo')
        has_the_other = self.builder_for_features('bar')
        has_both_early = self.builder_for_features('foo', 'bar', 'baz')
        has_both_late = self.builder_for_features('foo', 'bar', 'quux')
        lacks_one = self.builder_for_features('bar')
        has_the_other = self.builder_for_features('foo')
        # There are two builders featuring 'foo' and 'bar', but
        # the one that also features 'quux' was registered later.
        self.assertEqual(self.registry.lookup('foo', 'bar'),
                          has_both_late)
        # There is only one builder featuring 'foo', 'bar', and 'baz'.
        self.assertEqual(self.registry.lookup('foo', 'bar', 'baz'),
                          has_both_early)
    def test_lookup_fails_when_cannot_reconcile_requested_features(self):
        builder1 = self.builder_for_features('foo', 'bar')
        builder2 = self.builder_for_features('foo', 'baz')
        self.assertEqual(self.registry.lookup('bar', 'baz'), None)
--- a/lib/bs4/tests/test_docs.py
+++ b/lib/bs4/tests/test_docs.py
@ -1,36 +0,0 @@
 "Test harness for doctests."
 # pylint: disable-msg=E0611,W0142
 __metaclass__ = type
 __all__ = [
    'additional_tests',
    ]
 import atexit
 import doctest
 import os
 #from pkg_resources import (
 #    resource_filename, resource_exists, resource_listdir, cleanup_resources)
 import unittest
 DOCTEST_FLAGS = (
    doctest.ELLIPSIS |
    doctest.NORMALIZE_WHITESPACE |
    doctest.REPORT_NDIFF)
 # def additional_tests():
 #     "Run the doc tests (README.txt and docs/*, if any exist)"
 #     doctest_files = [
 #         os.path.abspath(resource_filename('bs4', 'README.txt'))]
 #     if resource_exists('bs4', 'docs'):
 #         for name in resource_listdir('bs4', 'docs'):
 #             if name.endswith('.txt'):
 #                 doctest_files.append(
 #                     os.path.abspath(
 #                         resource_filename('bs4', 'docs/%s' % name)))
 #     kwargs = dict(module_relative=False, optionflags=DOCTEST_FLAGS)
 #     atexit.register(cleanup_resources)
 #     return unittest.TestSuite((
 #         doctest.DocFileSuite(*doctest_files, **kwargs)))
--- a/lib/bs4/tests/test_html5lib.py
+++ b/lib/bs4/tests/test_html5lib.py
@ -1,226 +0,0 @@
 """Tests to ensure that the html5lib tree builder generates good trees."""
 import warnings
 try:
    from bs4.builder import HTML5TreeBuilder
    HTML5LIB_PRESENT = True
 except ImportError as e:
    HTML5LIB_PRESENT = False
 from bs4.element import SoupStrainer
 from bs4.testing import (
    HTML5TreeBuilderSmokeTest,
    SoupTest,
    skipIf,
 )
@skipIf(
    not HTML5LIB_PRESENT,
    "html5lib seems not to be present, not testing its tree builder.")
 class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest):
    """See ``HTML5TreeBuilderSmokeTest``."""
    @property
    def default_builder(self):
        return HTML5TreeBuilder
    def test_soupstrainer(self):
        # The html5lib tree builder does not support SoupStrainers.
        strainer = SoupStrainer("b")
        markup = "<p>A <b>bold</b> statement.</p>"
        with warnings.catch_warnings(record=True) as w:
            soup = self.soup(markup, parse_only=strainer)
        self.assertEqual(
            soup.decode(), self.document_for(markup))
        self.assertTrue(
            "the html5lib tree builder doesn't support parse_only" in
            str(w[0].message))
    def test_correctly_nested_tables(self):
        """html5lib inserts <tbody> tags where other parsers don't."""
        markup = ('<table id="1">'
                  '<tr>'
                  "<td>Here's another table:"
                  '<table id="2">'
                  '<tr><td>foo</td></tr>'
                  '</table></td>')
        self.assertSoupEquals(
            markup,
            '<table id="1"><tbody><tr><td>Here\'s another table:'
            '<table id="2"><tbody><tr><td>foo</td></tr></tbody></table>'
            '</td></tr></tbody></table>')
        self.assertSoupEquals(
            "<table><thead><tr><td>Foo</td></tr></thead>"
            "<tbody><tr><td>Bar</td></tr></tbody>"
            "<tfoot><tr><td>Baz</td></tr></tfoot></table>")
    def test_xml_declaration_followed_by_doctype(self):
        markup = '''<?xml version="1.0" encoding="utf-8"?>
 <!DOCTYPE html>
 <html>
  <head>
  </head>
  <body>
   <p>foo</p>
  </body>
 </html>'''
        soup = self.soup(markup)
        # Verify that we can reach the <p> tag; this means the tree is connected.
        self.assertEqual(b"<p>foo</p>", soup.p.encode())
    def test_reparented_markup(self):
        markup = '<p><em>foo</p>\n<p>bar<a></a></em></p>'
        soup = self.soup(markup)
        self.assertEqual("<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p></body>", soup.body.decode())
        self.assertEqual(2, len(soup.find_all('p')))
    def test_reparented_markup_ends_with_whitespace(self):
        markup = '<p><em>foo</p>\n<p>bar<a></a></em></p>\n'
        soup = self.soup(markup)
        self.assertEqual("<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p>\n</body>", soup.body.decode())
        self.assertEqual(2, len(soup.find_all('p')))
    def test_reparented_markup_containing_identical_whitespace_nodes(self):
        """Verify that we keep the two whitespace nodes in this
        document distinct when reparenting the adjacent <tbody> tags.
        """
        markup = '<table> <tbody><tbody><ims></tbody> </table>'
        soup = self.soup(markup)
        space1, space2 = soup.find_all(string=' ')
        tbody1, tbody2 = soup.find_all('tbody')
        assert space1.next_element is tbody1
        assert tbody2.next_element is space2
    def test_reparented_markup_containing_children(self):
        markup = '<div><a>aftermath<p><noscript>target</noscript>aftermath</a></p></div>'
        soup = self.soup(markup)
        noscript = soup.noscript
        self.assertEqual("target", noscript.next_element)
        target = soup.find(string='target')
        # The 'aftermath' string was duplicated; we want the second one.
        final_aftermath = soup.find_all(string='aftermath')[-1]
        # The <noscript> tag was moved beneath a copy of the <a> tag,
        # but the 'target' string within is still connected to the
        # (second) 'aftermath' string.
        self.assertEqual(final_aftermath, target.next_element)
        self.assertEqual(target, final_aftermath.previous_element)
    def test_processing_instruction(self):
        """Processing instructions become comments."""
        markup = b"""<?PITarget PIContent?>"""
        soup = self.soup(markup)
        assert str(soup).startswith("<!--?PITarget PIContent?-->")
    def test_cloned_multivalue_node(self):
        markup = b"""<a class="my_class"><p></a>"""
        soup = self.soup(markup)
        a1, a2 = soup.find_all('a')
        self.assertEqual(a1, a2)
        assert a1 is not a2
    def test_foster_parenting(self):
        markup = b"""<table><td></tbody>A"""
        soup = self.soup(markup)
        self.assertEqual("<body>A<table><tbody><tr><td></td></tr></tbody></table></body>", soup.body.decode())
    def test_extraction(self):
        """
        Test that extraction does not destroy the tree.
        https://bugs.launchpad.net/beautifulsoup/+bug/1782928
        """
        markup = """
 <html><head></head>
 <style>
 </style><script></script><body><p>hello</p></body></html>
 """
        soup = self.soup(markup)
        [s.extract() for s in soup('script')]
        [s.extract() for s in soup('style')]
        self.assertEqual(len(soup.find_all("p")), 1)
    def test_empty_comment(self):
        """
        Test that empty comment does not break structure.
        https://bugs.launchpad.net/beautifulsoup/+bug/1806598
        """
        markup = """
 <html>
 <body>
 <form>
 <!----><input type="text">
 </form>
 </body>
 </html>
 """
        soup = self.soup(markup)
        inputs = []
        for form in soup.find_all('form'):
            inputs.extend(form.find_all('input'))
        self.assertEqual(len(inputs), 1)
    def test_tracking_line_numbers(self):
        # The html.parser TreeBuilder keeps track of line number and
        # position of each element.
        markup = "\n   <p>\n\n<sourceline>\n<b>text</b></sourceline><sourcepos></p>"
        soup = self.soup(markup)
        self.assertEqual(2, soup.p.sourceline)
        self.assertEqual(5, soup.p.sourcepos)
        self.assertEqual("sourceline", soup.p.find('sourceline').name)
        # You can deactivate this behavior.
        soup = self.soup(markup, store_line_numbers=False)
        self.assertEqual("sourceline", soup.p.sourceline.name)
        self.assertEqual("sourcepos", soup.p.sourcepos.name)
    def test_special_string_containers(self):
        # The html5lib tree builder doesn't support this standard feature,
        # because there's no way of knowing, when a string is created,
        # where in the tree it will eventually end up.
        pass
    def test_html5_attributes(self):
        # The html5lib TreeBuilder can convert any entity named in
        # the HTML5 spec to a sequence of Unicode characters, and
        # convert those Unicode characters to a (potentially
        # different) named entity on the way out.
        #
        # This is a copy of the same test from
        # HTMLParserTreeBuilderSmokeTest.  It's not in the superclass
        # because the lxml HTML TreeBuilder _doesn't_ work this way.
        for input_element, output_unicode, output_element in (
                ("&RightArrowLeftArrow;", '\u21c4', b'&rlarr;'),
                ('&models;', '\u22a7', b'&models;'),
                ('&Nfr;', '\U0001d511', b'&Nfr;'),
                ('&ngeqq;', '\u2267\u0338', b'&ngeqq;'),
                ('&not;', '\xac', b'&not;'),
                ('&Not;', '\u2aec', b'&Not;'),
                ('&quot;', '"', b'"'),
                ('&there4;', '\u2234', b'&there4;'),
                ('&Therefore;', '\u2234', b'&there4;'),
                ('&therefore;', '\u2234', b'&there4;'),
                ("&fjlig;", 'fj', b'fj'),                
                ("&sqcup;", '\u2294', b'&sqcup;'),
                ("&sqcups;", '\u2294\ufe00', b'&sqcups;'),
                ("&apos;", "'", b"'"),
                ("&verbar;", "|", b"|"),
        ):
            markup = '<div>%s</div>' % input_element
            div = self.soup(markup).div
            without_element = div.encode()
            expect = b"<div>%s</div>" % output_unicode.encode("utf8")
            self.assertEqual(without_element, expect)
            with_element = div.encode(formatter="html")
            expect = b"<div>%s</div>" % output_element
            self.assertEqual(with_element, expect)
--- a/lib/bs4/tests/test_htmlparser.py
+++ b/lib/bs4/tests/test_htmlparser.py
@ -1,134 +0,0 @@
 """Tests to ensure that the html.parser tree builder generates good
 trees."""
 from pdb import set_trace
 import pickle
 import warnings
 from bs4.testing import SoupTest, HTMLTreeBuilderSmokeTest
 from bs4.builder import HTMLParserTreeBuilder
 from bs4.builder._htmlparser import BeautifulSoupHTMLParser
 class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
    default_builder = HTMLParserTreeBuilder
    def test_namespaced_system_doctype(self):
        # html.parser can't handle namespaced doctypes, so skip this one.
        pass
    def test_namespaced_public_doctype(self):
        # html.parser can't handle namespaced doctypes, so skip this one.
        pass
    def test_builder_is_pickled(self):
        """Unlike most tree builders, HTMLParserTreeBuilder and will
        be restored after pickling.
        """
        tree = self.soup("<a><b>foo</a>")
        dumped = pickle.dumps(tree, 2)
        loaded = pickle.loads(dumped)
        self.assertTrue(isinstance(loaded.builder, type(tree.builder)))
    def test_redundant_empty_element_closing_tags(self):
        self.assertSoupEquals('<br></br><br></br><br></br>', "<br/><br/><br/>")
        self.assertSoupEquals('</br></br></br>', "")
    def test_empty_element(self):
        # This verifies that any buffered data present when the parser
        # finishes working is handled.
        self.assertSoupEquals("foo &# bar", "foo &amp;# bar")
    def test_tracking_line_numbers(self):
        # The html.parser TreeBuilder keeps track of line number and
        # position of each element.
        markup = "\n   <p>\n\n<sourceline>\n<b>text</b></sourceline><sourcepos></p>"
        soup = self.soup(markup)
        self.assertEqual(2, soup.p.sourceline)
        self.assertEqual(3, soup.p.sourcepos)
        self.assertEqual("sourceline", soup.p.find('sourceline').name)
        # You can deactivate this behavior.
        soup = self.soup(markup, store_line_numbers=False)
        self.assertEqual("sourceline", soup.p.sourceline.name)
        self.assertEqual("sourcepos", soup.p.sourcepos.name)
    def test_on_duplicate_attribute(self):
        # The html.parser tree builder has a variety of ways of
        # handling a tag that contains the same attribute multiple times.
        markup = '<a class="cls" href="url1" href="url2" href="url3" id="id">'
        # If you don't provide any particular value for
        # on_duplicate_attribute, later values replace earlier values.
        soup = self.soup(markup)
        self.assertEqual("url3", soup.a['href'])
        self.assertEqual(["cls"], soup.a['class'])
        self.assertEqual("id", soup.a['id'])
        # You can also get this behavior explicitly.
        def assert_attribute(on_duplicate_attribute, expected):
            soup = self.soup(
                markup, on_duplicate_attribute=on_duplicate_attribute
            )
            self.assertEqual(expected, soup.a['href'])
            # Verify that non-duplicate attributes are treated normally.
            self.assertEqual(["cls"], soup.a['class'])
            self.assertEqual("id", soup.a['id'])
        assert_attribute(None, "url3")
        assert_attribute(BeautifulSoupHTMLParser.REPLACE, "url3")
        # You can ignore subsequent values in favor of the first.
        assert_attribute(BeautifulSoupHTMLParser.IGNORE, "url1")
        # And you can pass in a callable that does whatever you want.
        def accumulate(attrs, key, value):
            if not isinstance(attrs[key], list):
                attrs[key] = [attrs[key]]
            attrs[key].append(value)
        assert_attribute(accumulate, ["url1", "url2", "url3"])            
    def test_html5_attributes(self):
        # The html.parser TreeBuilder can convert any entity named in
        # the HTML5 spec to a sequence of Unicode characters, and
        # convert those Unicode characters to a (potentially
        # different) named entity on the way out.
        for input_element, output_unicode, output_element in (
                ("&RightArrowLeftArrow;", '\u21c4', b'&rlarr;'),
                ('&models;', '\u22a7', b'&models;'),
                ('&Nfr;', '\U0001d511', b'&Nfr;'),
                ('&ngeqq;', '\u2267\u0338', b'&ngeqq;'),
                ('&not;', '\xac', b'&not;'),
                ('&Not;', '\u2aec', b'&Not;'),
                ('&quot;', '"', b'"'),
                ('&there4;', '\u2234', b'&there4;'),
                ('&Therefore;', '\u2234', b'&there4;'),
                ('&therefore;', '\u2234', b'&there4;'),
                ("&fjlig;", 'fj', b'fj'),                
                ("&sqcup;", '\u2294', b'&sqcup;'),
                ("&sqcups;", '\u2294\ufe00', b'&sqcups;'),
                ("&apos;", "'", b"'"),
                ("&verbar;", "|", b"|"),
        ):
            markup = '<div>%s</div>' % input_element
            div = self.soup(markup).div
            without_element = div.encode()
            expect = b"<div>%s</div>" % output_unicode.encode("utf8")
            self.assertEqual(without_element, expect)
            with_element = div.encode(formatter="html")
            expect = b"<div>%s</div>" % output_element
            self.assertEqual(with_element, expect)
 class TestHTMLParserSubclass(SoupTest):
    def test_error(self):
        """Verify that our HTMLParser subclass implements error() in a way
        that doesn't cause a crash.
        """
        parser = BeautifulSoupHTMLParser()
        with warnings.catch_warnings(record=True) as warns:
            parser.error("don't crash")
        [warning] = warns
        assert "don't crash" == str(warning.message)
--- a/lib/bs4/tests/test_lxml.py
+++ b/lib/bs4/tests/test_lxml.py
@ -1,115 +0,0 @@
 """Tests to ensure that the lxml tree builder generates good trees."""
 import re
 import warnings
 try:
    import lxml.etree
    LXML_PRESENT = True
    LXML_VERSION = lxml.etree.LXML_VERSION
 except ImportError as e:
    LXML_PRESENT = False
    LXML_VERSION = (0,)
 if LXML_PRESENT:
    from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
 from bs4 import (
    BeautifulSoup,
    BeautifulStoneSoup,
    )
 from bs4.element import Comment, Doctype, SoupStrainer
 from bs4.testing import skipIf
 from bs4.tests import test_htmlparser
 from bs4.testing import (
    HTMLTreeBuilderSmokeTest,
    XMLTreeBuilderSmokeTest,
    SoupTest,
    skipIf,
 )
@skipIf(
    not LXML_PRESENT,
    "lxml seems not to be present, not testing its tree builder.")
 class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
    """See ``HTMLTreeBuilderSmokeTest``."""
    @property
    def default_builder(self):
        return LXMLTreeBuilder
    def test_out_of_range_entity(self):
        self.assertSoupEquals(
            "<p>foo&#10000000000000;bar</p>", "<p>foobar</p>")
        self.assertSoupEquals(
            "<p>foo&#x10000000000000;bar</p>", "<p>foobar</p>")
        self.assertSoupEquals(
            "<p>foo&#1000000000;bar</p>", "<p>foobar</p>")
    def test_entities_in_foreign_document_encoding(self):
        # We can't implement this case correctly because by the time we
        # hear about markup like "&#147;", it's been (incorrectly) converted into
        # a string like u'\x93'
        pass
    # In lxml < 2.3.5, an empty doctype causes a segfault. Skip this
    # test if an old version of lxml is installed.
    @skipIf(
        not LXML_PRESENT or LXML_VERSION < (2,3,5,0),
        "Skipping doctype test for old version of lxml to avoid segfault.")
    def test_empty_doctype(self):
        soup = self.soup("<!DOCTYPE>")
        doctype = soup.contents[0]
        self.assertEqual("", doctype.strip())
    def test_beautifulstonesoup_is_xml_parser(self):
        # Make sure that the deprecated BSS class uses an xml builder
        # if one is installed.
        with warnings.catch_warnings(record=True) as w:
            soup = BeautifulStoneSoup("<b />")
        self.assertEqual("<b/>", str(soup.b))
        self.assertTrue("BeautifulStoneSoup class is deprecated" in str(w[0].message))
    def test_tracking_line_numbers(self):
        # The lxml TreeBuilder cannot keep track of line numbers from
        # the original markup. Even if you ask for line numbers, we
        # don't have 'em.
        #
        # This means that if you have a tag like <sourceline> or
        # <sourcepos>, attribute access will find it rather than
        # giving you a numeric answer.
        soup = self.soup(
            "\n   <p>\n\n<sourceline>\n<b>text</b></sourceline><sourcepos></p>",
            store_line_numbers=True
        )
        self.assertEqual("sourceline", soup.p.sourceline.name)
        self.assertEqual("sourcepos", soup.p.sourcepos.name)
@skipIf(
    not LXML_PRESENT,
    "lxml seems not to be present, not testing its XML tree builder.")
 class LXMLXMLTreeBuilderSmokeTest(SoupTest, XMLTreeBuilderSmokeTest):
    """See ``HTMLTreeBuilderSmokeTest``."""
    @property
    def default_builder(self):
        return LXMLTreeBuilderForXML
    def test_namespace_indexing(self):
        # We should not track un-prefixed namespaces as we can only hold one
        # and it will be recognized as the default namespace by soupsieve,
        # which may be confusing in some situations. When no namespace is provided
        # for a selector, the default namespace (if defined) is assumed.
        soup = self.soup(
            '<?xml version="1.1"?>\n'
            '<root>'
            '<tag xmlns="http://unprefixed-namespace.com">content</tag>'
            '<prefix:tag xmlns:prefix="http://prefixed-namespace.com">content</tag>'
            '</root>'
        )
        self.assertEqual(
            soup._namespaces,
            {'xml': 'http://www.w3.org/XML/1998/namespace', 'prefix': 'http://prefixed-namespace.com'}
        )
--- a/lib/bs4/tests/test_soup.py
+++ b/lib/bs4/tests/test_soup.py
@ -1,579 +0,0 @@
 # -*- coding: utf-8 -*-
 """Tests of Beautiful Soup as a whole."""
 from pdb import set_trace
 import logging
 import os
 import unittest
 import sys
 import tempfile
 from bs4 import (
    BeautifulSoup,
    BeautifulStoneSoup,
    GuessedAtParserWarning,
    MarkupResemblesLocatorWarning,
 )
 from bs4.builder import (
    TreeBuilder,
    ParserRejectedMarkup,
 )
 from bs4.element import (
    CharsetMetaAttributeValue,
    Comment,
    ContentMetaAttributeValue,
    SoupStrainer,
    NamespacedAttribute,
    Tag,
    NavigableString,
    )
 import bs4.dammit
 from bs4.dammit import (
    EntitySubstitution,
    UnicodeDammit,
 )
 from bs4.testing import (
    default_builder,
    SoupTest,
    skipIf,
 )
 import warnings
 try:
    from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
    LXML_PRESENT = True
 except ImportError as e:
    LXML_PRESENT = False
 PYTHON_3_PRE_3_2 = (sys.version_info[0] == 3 and sys.version_info < (3,2))
 class TestConstructor(SoupTest):
    def test_short_unicode_input(self):
        data = "<h1>éé</h1>"
        soup = self.soup(data)
        self.assertEqual("éé", soup.h1.string)
    def test_embedded_null(self):
        data = "<h1>foo\0bar</h1>"
        soup = self.soup(data)
        self.assertEqual("foo\0bar", soup.h1.string)
    def test_exclude_encodings(self):
        utf8_data = "Räksmörgås".encode("utf-8")
        soup = self.soup(utf8_data, exclude_encodings=["utf-8"])
        self.assertEqual("windows-1252", soup.original_encoding)
    def test_custom_builder_class(self):
        # Verify that you can pass in a custom Builder class and
        # it'll be instantiated with the appropriate keyword arguments.
        class Mock(object):
            def __init__(self, **kwargs):
                self.called_with = kwargs
                self.is_xml = True
                self.store_line_numbers = False
                self.cdata_list_attributes = []
                self.preserve_whitespace_tags = []
                self.string_containers = {}
            def initialize_soup(self, soup):
                pass
            def feed(self, markup):
                self.fed = markup
            def reset(self):
                pass
            def ignore(self, ignore):
                pass
            set_up_substitutions = can_be_empty_element = ignore
            def prepare_markup(self, *args, **kwargs):
                yield "prepared markup", "original encoding", "declared encoding", "contains replacement characters"
        kwargs = dict(
            var="value",
            # This is a deprecated BS3-era keyword argument, which
            # will be stripped out.
            convertEntities=True,
        )
        with warnings.catch_warnings(record=True):
            soup = BeautifulSoup('', builder=Mock, **kwargs)
        assert isinstance(soup.builder, Mock)
        self.assertEqual(dict(var="value"), soup.builder.called_with)
        self.assertEqual("prepared markup", soup.builder.fed)
        # You can also instantiate the TreeBuilder yourself. In this
        # case, that specific object is used and any keyword arguments
        # to the BeautifulSoup constructor are ignored.
        builder = Mock(**kwargs)
        with warnings.catch_warnings(record=True) as w:
            soup = BeautifulSoup(
                '', builder=builder, ignored_value=True,
            )
        msg = str(w[0].message)
        assert msg.startswith("Keyword arguments to the BeautifulSoup constructor will be ignored.")
        self.assertEqual(builder, soup.builder)
        self.assertEqual(kwargs, builder.called_with)
    def test_parser_markup_rejection(self):
        # If markup is completely rejected by the parser, an
        # explanatory ParserRejectedMarkup exception is raised.
        class Mock(TreeBuilder):
            def feed(self, *args, **kwargs):
                raise ParserRejectedMarkup("Nope.")
        def prepare_markup(self, *args, **kwargs):
            # We're going to try two different ways of preparing this markup,
            # but feed() will reject both of them.
            yield markup, None, None, False
            yield markup, None, None, False
        import re
        self.assertRaisesRegex(
            ParserRejectedMarkup,
            "The markup you provided was rejected by the parser. Trying a different parser or a different encoding may help.",
            BeautifulSoup, '', builder=Mock,
        )
    def test_cdata_list_attributes(self):
        # Most attribute values are represented as scalars, but the
        # HTML standard says that some attributes, like 'class' have
        # space-separated lists as values.
        markup = '<a id=" an id " class=" a class "></a>'
        soup = self.soup(markup)
        # Note that the spaces are stripped for 'class' but not for 'id'.
        a = soup.a
        self.assertEqual(" an id ", a['id'])
        self.assertEqual(["a", "class"], a['class'])
        # TreeBuilder takes an argument called 'mutli_valued_attributes'  which lets
        # you customize or disable this. As always, you can customize the TreeBuilder
        # by passing in a keyword argument to the BeautifulSoup constructor.
        soup = self.soup(markup, builder=default_builder, multi_valued_attributes=None)
        self.assertEqual(" a class ", soup.a['class'])
        # Here are two ways of saying that `id` is a multi-valued
        # attribute in this context, but 'class' is not.
        for switcheroo in ({'*': 'id'}, {'a': 'id'}):
            with warnings.catch_warnings(record=True) as w:
                # This will create a warning about not explicitly
                # specifying a parser, but we'll ignore it.
                soup = self.soup(markup, builder=None, multi_valued_attributes=switcheroo)
            a = soup.a
            self.assertEqual(["an", "id"], a['id'])
            self.assertEqual(" a class ", a['class'])
    def test_replacement_classes(self):
        # Test the ability to pass in replacements for element classes
        # which will be used when building the tree.
        class TagPlus(Tag):
            pass
        class StringPlus(NavigableString):
            pass
        class CommentPlus(Comment):
            pass
        soup = self.soup(
            "<a><b>foo</b>bar</a><!--whee-->",
            element_classes = {
                Tag: TagPlus,
                NavigableString: StringPlus,
                Comment: CommentPlus,
            }
        )
        # The tree was built with TagPlus, StringPlus, and CommentPlus objects,
        # rather than Tag, String, and Comment objects.
        assert all(
            isinstance(x, (TagPlus, StringPlus, CommentPlus))
            for x in soup.recursiveChildGenerator()
        )
    def test_alternate_string_containers(self):
        # Test the ability to customize the string containers for
        # different types of tags.
        class PString(NavigableString):
            pass
        class BString(NavigableString):
            pass
        soup = self.soup(
            "<div>Hello.<p>Here is <b>some <i>bolded</i></b> text",
            string_containers = {
                'b': BString,
                'p': PString,
            }
        )
        # The string before the <p> tag is a regular NavigableString.
        assert isinstance(soup.div.contents[0], NavigableString)
        # The string inside the <p> tag, but not inside the <i> tag,
        # is a PString.
        assert isinstance(soup.p.contents[0], PString)
        # Every string inside the <b> tag is a BString, even the one that
        # was also inside an <i> tag.
        for s in soup.b.strings:
            assert isinstance(s, BString)
        # Now that parsing was complete, the string_container_stack
        # (where this information was kept) has been cleared out.
        self.assertEqual([], soup.string_container_stack)
 class TestWarnings(SoupTest):
    def _assert_warning(self, warnings, cls):
        for w in warnings:
            if isinstance(w.message, cls):
                return w
        raise Exception("%s warning not found in %r" % cls, warnings)
    def _assert_no_parser_specified(self, w):
        warning = self._assert_warning(w, GuessedAtParserWarning)
        message = str(warning.message)
        self.assertTrue(
            message.startswith(BeautifulSoup.NO_PARSER_SPECIFIED_WARNING[:60])
        )
    def test_warning_if_no_parser_specified(self):
        with warnings.catch_warnings(record=True) as w:
            soup = BeautifulSoup("<a><b></b></a>")
        self._assert_no_parser_specified(w)
    def test_warning_if_parser_specified_too_vague(self):
        with warnings.catch_warnings(record=True) as w:
            soup = BeautifulSoup("<a><b></b></a>", "html")
        self._assert_no_parser_specified(w)
    def test_no_warning_if_explicit_parser_specified(self):
        with warnings.catch_warnings(record=True) as w:
            soup = BeautifulSoup("<a><b></b></a>", "html.parser")
        self.assertEqual([], w)
    def test_parseOnlyThese_renamed_to_parse_only(self):
        with warnings.catch_warnings(record=True) as w:
            soup = self.soup("<a><b></b></a>", parseOnlyThese=SoupStrainer("b"))
        msg = str(w[0].message)
        self.assertTrue("parseOnlyThese" in msg)
        self.assertTrue("parse_only" in msg)
        self.assertEqual(b"<b></b>", soup.encode())
    def test_fromEncoding_renamed_to_from_encoding(self):
        with warnings.catch_warnings(record=True) as w:
            utf8 = b"\xc3\xa9"
            soup = self.soup(utf8, fromEncoding="utf8")
        msg = str(w[0].message)
        self.assertTrue("fromEncoding" in msg)
        self.assertTrue("from_encoding" in msg)
        self.assertEqual("utf8", soup.original_encoding)
    def test_unrecognized_keyword_argument(self):
        self.assertRaises(
            TypeError, self.soup, "<a>", no_such_argument=True)
    def test_disk_file_warning(self):
        filehandle = tempfile.NamedTemporaryFile()
        filename = filehandle.name
        try:
            with warnings.catch_warnings(record=True) as w:
                soup = self.soup(filename)
            warning = self._assert_warning(w, MarkupResemblesLocatorWarning)
            self.assertTrue("looks like a filename" in str(warning.message))
        finally:
            filehandle.close()
        # The file no longer exists, so Beautiful Soup will no longer issue the warning.
        with warnings.catch_warnings(record=True) as w:
            soup = self.soup(filename)
        self.assertEqual([], w)
    def test_directory_warning(self):
        try:
            filename = tempfile.mkdtemp()
            with warnings.catch_warnings(record=True) as w:
                soup = self.soup(filename)
            warning = self._assert_warning(w, MarkupResemblesLocatorWarning)
            self.assertTrue("looks like a directory" in str(warning.message))
        finally:
            os.rmdir(filename)
        # The directory no longer exists, so Beautiful Soup will no longer issue the warning.
        with warnings.catch_warnings(record=True) as w:
            soup = self.soup(filename)
        self.assertEqual([], w)
    def test_url_warning_with_bytes_url(self):
        with warnings.catch_warnings(record=True) as warning_list:
            soup = self.soup(b"http://www.crummybytes.com/")
        warning = self._assert_warning(
            warning_list, MarkupResemblesLocatorWarning
        )
        self.assertTrue("looks like a URL" in str(warning.message))
    def test_url_warning_with_unicode_url(self):
        with warnings.catch_warnings(record=True) as warning_list:
            # note - this url must differ from the bytes one otherwise
            # python's warnings system swallows the second warning
            soup = self.soup("http://www.crummyunicode.com/")
        warning = self._assert_warning(
            warning_list, MarkupResemblesLocatorWarning
        )
        self.assertTrue("looks like a URL" in str(warning.message))
    def test_url_warning_with_bytes_and_space(self):
        # Here the markup contains something besides a URL, so no warning
        # is issued.
        with warnings.catch_warnings(record=True) as warning_list:
            soup = self.soup(b"http://www.crummybytes.com/ is great")
        self.assertFalse(any("looks like a URL" in str(w.message) 
            for w in warning_list))
    def test_url_warning_with_unicode_and_space(self):
        with warnings.catch_warnings(record=True) as warning_list:
            soup = self.soup("http://www.crummyuncode.com/ is great")
        self.assertFalse(any("looks like a URL" in str(w.message) 
            for w in warning_list))
 class TestSelectiveParsing(SoupTest):
    def test_parse_with_soupstrainer(self):
        markup = "No<b>Yes</b><a>No<b>Yes <c>Yes</c></b>"
        strainer = SoupStrainer("b")
        soup = self.soup(markup, parse_only=strainer)
        self.assertEqual(soup.encode(), b"<b>Yes</b><b>Yes <c>Yes</c></b>")
 class TestEntitySubstitution(unittest.TestCase):
    """Standalone tests of the EntitySubstitution class."""
    def setUp(self):
        self.sub = EntitySubstitution
    def test_simple_html_substitution(self):
        # Unicode characters corresponding to named HTML entites
        # are substituted, and no others.
        s = "foo\u2200\N{SNOWMAN}\u00f5bar"
        self.assertEqual(self.sub.substitute_html(s),
                          "foo&forall;\N{SNOWMAN}&otilde;bar")
    def test_smart_quote_substitution(self):
        # MS smart quotes are a common source of frustration, so we
        # give them a special test.
        quotes = b"\x91\x92foo\x93\x94"
        dammit = UnicodeDammit(quotes)
        self.assertEqual(self.sub.substitute_html(dammit.markup),
                          "&lsquo;&rsquo;foo&ldquo;&rdquo;")
    def test_html5_entity(self):
        # Some HTML5 entities correspond to single- or multi-character
        # Unicode sequences.
        for entity, u in (
            # A few spot checks of our ability to recognize
            # special character sequences and convert them
            # to named entities.
            ('&models;', '\u22a7'),
            ('&Nfr;', '\U0001d511'),
            ('&ngeqq;', '\u2267\u0338'),
            ('&not;', '\xac'),
            ('&Not;', '\u2aec'),
            # We _could_ convert | to &verbarr;, but we don't, because
            # | is an ASCII character.
            ('|' '|'),
            # Similarly for the fj ligature, which we could convert to
            # &fjlig;, but we don't.
            ("fj", "fj"),
            # We do convert _these_ ASCII characters to HTML entities,
            # because that's required to generate valid HTML.
            ('&gt;', '>'),
            ('&lt;', '<'),
            ('&amp;', '&'),
        ):
            template = '3 %s 4'
            raw = template % u
            with_entities = template % entity
            self.assertEqual(self.sub.substitute_html(raw), with_entities)
    def test_html5_entity_with_variation_selector(self):
        # Some HTML5 entities correspond either to a single-character
        # Unicode sequence _or_ to the same character plus U+FE00,
        # VARIATION SELECTOR 1. We can handle this.
        data = "fjords \u2294 penguins"
        markup = "fjords &sqcup; penguins"
        self.assertEqual(self.sub.substitute_html(data), markup)
        data = "fjords \u2294\ufe00 penguins"
        markup = "fjords &sqcups; penguins"
        self.assertEqual(self.sub.substitute_html(data), markup)
    def test_xml_converstion_includes_no_quotes_if_make_quoted_attribute_is_false(self):
        s = 'Welcome to "my bar"'
        self.assertEqual(self.sub.substitute_xml(s, False), s)
    def test_xml_attribute_quoting_normally_uses_double_quotes(self):
        self.assertEqual(self.sub.substitute_xml("Welcome", True),
                          '"Welcome"')
        self.assertEqual(self.sub.substitute_xml("Bob's Bar", True),
                          '"Bob\'s Bar"')
    def test_xml_attribute_quoting_uses_single_quotes_when_value_contains_double_quotes(self):
        s = 'Welcome to "my bar"'
        self.assertEqual(self.sub.substitute_xml(s, True),
                          "'Welcome to \"my bar\"'")
    def test_xml_attribute_quoting_escapes_single_quotes_when_value_contains_both_single_and_double_quotes(self):
        s = 'Welcome to "Bob\'s Bar"'
        self.assertEqual(
            self.sub.substitute_xml(s, True),
            '"Welcome to &quot;Bob\'s Bar&quot;"')
    def test_xml_quotes_arent_escaped_when_value_is_not_being_quoted(self):
        quoted = 'Welcome to "Bob\'s Bar"'
        self.assertEqual(self.sub.substitute_xml(quoted), quoted)
    def test_xml_quoting_handles_angle_brackets(self):
        self.assertEqual(
            self.sub.substitute_xml("foo<bar>"),
            "foo&lt;bar&gt;")
    def test_xml_quoting_handles_ampersands(self):
        self.assertEqual(self.sub.substitute_xml("AT&T"), "AT&amp;T")
    def test_xml_quoting_including_ampersands_when_they_are_part_of_an_entity(self):
        self.assertEqual(
            self.sub.substitute_xml("&Aacute;T&T"),
            "&amp;Aacute;T&amp;T")
    def test_xml_quoting_ignoring_ampersands_when_they_are_part_of_an_entity(self):
        self.assertEqual(
            self.sub.substitute_xml_containing_entities("&Aacute;T&T"),
            "&Aacute;T&amp;T")
    def test_quotes_not_html_substituted(self):
        """There's no need to do this except inside attribute values."""
        text = 'Bob\'s "bar"'
        self.assertEqual(self.sub.substitute_html(text), text)
 class TestEncodingConversion(SoupTest):
    # Test Beautiful Soup's ability to decode and encode from various
    # encodings.
    def setUp(self):
        super(TestEncodingConversion, self).setUp()
        self.unicode_data = '<html><head><meta charset="utf-8"/></head><body><foo>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</foo></body></html>'
        self.utf8_data = self.unicode_data.encode("utf-8")
        # Just so you know what it looks like.
        self.assertEqual(
            self.utf8_data,
            b'<html><head><meta charset="utf-8"/></head><body><foo>Sacr\xc3\xa9 bleu!</foo></body></html>')
    def test_ascii_in_unicode_out(self):
        # ASCII input is converted to Unicode. The original_encoding
        # attribute is set to 'utf-8', a superset of ASCII.
        chardet = bs4.dammit.chardet_dammit
        logging.disable(logging.WARNING)
        try:
            def noop(str):
                return None
            # Disable chardet, which will realize that the ASCII is ASCII.
            bs4.dammit.chardet_dammit = noop
            ascii = b"<foo>a</foo>"
            soup_from_ascii = self.soup(ascii)
            unicode_output = soup_from_ascii.decode()
            self.assertTrue(isinstance(unicode_output, str))
            self.assertEqual(unicode_output, self.document_for(ascii.decode()))
            self.assertEqual(soup_from_ascii.original_encoding.lower(), "utf-8")
        finally:
            logging.disable(logging.NOTSET)
            bs4.dammit.chardet_dammit = chardet
    def test_unicode_in_unicode_out(self):
        # Unicode input is left alone. The original_encoding attribute
        # is not set.
        soup_from_unicode = self.soup(self.unicode_data)
        self.assertEqual(soup_from_unicode.decode(), self.unicode_data)
        self.assertEqual(soup_from_unicode.foo.string, 'Sacr\xe9 bleu!')
        self.assertEqual(soup_from_unicode.original_encoding, None)
    def test_utf8_in_unicode_out(self):
        # UTF-8 input is converted to Unicode. The original_encoding
        # attribute is set.
        soup_from_utf8 = self.soup(self.utf8_data)
        self.assertEqual(soup_from_utf8.decode(), self.unicode_data)
        self.assertEqual(soup_from_utf8.foo.string, 'Sacr\xe9 bleu!')
    def test_utf8_out(self):
        # The internal data structures can be encoded as UTF-8.
        soup_from_unicode = self.soup(self.unicode_data)
        self.assertEqual(soup_from_unicode.encode('utf-8'), self.utf8_data)
    @skipIf(
        PYTHON_3_PRE_3_2,
        "Bad HTMLParser detected; skipping test of non-ASCII characters in attribute name.")
    def test_attribute_name_containing_unicode_characters(self):
        markup = '<div><a \N{SNOWMAN}="snowman"></a></div>'
        self.assertEqual(self.soup(markup).div.encode("utf8"), markup.encode("utf8"))
 class TestNamedspacedAttribute(SoupTest):
    def test_name_may_be_none_or_missing(self):
        a = NamespacedAttribute("xmlns", None)
        self.assertEqual(a, "xmlns")
        a = NamespacedAttribute("xmlns", "")
        self.assertEqual(a, "xmlns")
        a = NamespacedAttribute("xmlns")
        self.assertEqual(a, "xmlns")
    def test_namespace_may_be_none_or_missing(self):
        a = NamespacedAttribute(None, "tag")
        self.assertEqual(a, "tag")
        a = NamespacedAttribute("", "tag")
        self.assertEqual(a, "tag")
    def test_attribute_is_equivalent_to_colon_separated_string(self):
        a = NamespacedAttribute("a", "b")
        self.assertEqual("a:b", a)
    def test_attributes_are_equivalent_if_prefix_and_name_identical(self):
        a = NamespacedAttribute("a", "b", "c")
        b = NamespacedAttribute("a", "b", "c")
        self.assertEqual(a, b)
        # The actual namespace is not considered.
        c = NamespacedAttribute("a", "b", None)
        self.assertEqual(a, c)
        # But name and prefix are important.
        d = NamespacedAttribute("a", "z", "c")
        self.assertNotEqual(a, d)
        e = NamespacedAttribute("z", "b", "c")
        self.assertNotEqual(a, e)
 class TestAttributeValueWithCharsetSubstitution(unittest.TestCase):
    def test_content_meta_attribute_value(self):
        value = CharsetMetaAttributeValue("euc-jp")
        self.assertEqual("euc-jp", value)
        self.assertEqual("euc-jp", value.original_value)
        self.assertEqual("utf8", value.encode("utf8"))
    def test_content_meta_attribute_value(self):
        value = ContentMetaAttributeValue("text/html; charset=euc-jp")
        self.assertEqual("text/html; charset=euc-jp", value)
        self.assertEqual("text/html; charset=euc-jp", value.original_value)
        self.assertEqual("text/html; charset=utf8", value.encode("utf8"))
--- a/lib/bs4/tests/test_tree.py
+++ b/lib/bs4/tests/test_tree.py
--- a/lib/dateutil/test/init.py
+++ b/lib/dateutil/test/init.py
--- a/lib/dateutil/test/_common.py
+++ b/lib/dateutil/test/_common.py
@ -1,233 +0,0 @@
 from __future__ import unicode_literals
 import os
 import time
 import subprocess
 import warnings
 import tempfile
 import pickle
 import pytest
 class PicklableMixin(object):
    def _get_nobj_bytes(self, obj, dump_kwargs, load_kwargs):
        """
        Pickle and unpickle an object using ``pickle.dumps`` / ``pickle.loads``
        """
        pkl = pickle.dumps(obj, **dump_kwargs)
        return pickle.loads(pkl, **load_kwargs)
    def _get_nobj_file(self, obj, dump_kwargs, load_kwargs):
        """
        Pickle and unpickle an object using ``pickle.dump`` / ``pickle.load`` on
        a temporary file.
        """
        with tempfile.TemporaryFile('w+b') as pkl:
            pickle.dump(obj, pkl, **dump_kwargs)
            pkl.seek(0)         # Reset the file to the beginning to read it
            nobj = pickle.load(pkl, **load_kwargs)
        return nobj
    def assertPicklable(self, obj, singleton=False, asfile=False,
                        dump_kwargs=None, load_kwargs=None):
        """
        Assert that an object can be pickled and unpickled. This assertion
        assumes that the desired behavior is that the unpickled object compares
        equal to the original object, but is not the same object.
        """
        get_nobj = self._get_nobj_file if asfile else self._get_nobj_bytes
        dump_kwargs = dump_kwargs or {}
        load_kwargs = load_kwargs or {}
        nobj = get_nobj(obj, dump_kwargs, load_kwargs)
        if not singleton:
            self.assertIsNot(obj, nobj)
        self.assertEqual(obj, nobj)
 class TZContextBase(object):
    """
    Base class for a context manager which allows changing of time zones.
    Subclasses may define a guard variable to either block or or allow time
    zone changes by redefining ``_guard_var_name`` and ``_guard_allows_change``.
    The default is that the guard variable must be affirmatively set.
    Subclasses must define ``get_current_tz`` and ``set_current_tz``.
    """
    _guard_var_name = "DATEUTIL_MAY_CHANGE_TZ"
    _guard_allows_change = True
    def __init__(self, tzval):
        self.tzval = tzval
        self._old_tz = None
    @classmethod
    def tz_change_allowed(cls):
        """
        Class method used to query whether or not this class allows time zone
        changes.
        """
        guard = bool(os.environ.get(cls._guard_var_name, False))
        # _guard_allows_change gives the "default" behavior - if True, the
        # guard is overcoming a block. If false, the guard is causing a block.
        # Whether tz_change is allowed is therefore the XNOR of the two.
        return guard == cls._guard_allows_change
    @classmethod
    def tz_change_disallowed_message(cls):
        """ Generate instructions on how to allow tz changes """
        msg = ('Changing time zone not allowed. Set {envar} to {gval} '
               'if you would like to allow this behavior')
        return msg.format(envar=cls._guard_var_name,
                          gval=cls._guard_allows_change)
    def __enter__(self):
        if not self.tz_change_allowed():
            msg = self.tz_change_disallowed_message()
            pytest.skip(msg)
            # If this is used outside of a test suite, we still want an error.
            raise ValueError(msg)  # pragma: no cover
        self._old_tz = self.get_current_tz()
        self.set_current_tz(self.tzval)
    def __exit__(self, type, value, traceback):
        if self._old_tz is not None:
            self.set_current_tz(self._old_tz)
        self._old_tz = None
    def get_current_tz(self):
        raise NotImplementedError
    def set_current_tz(self):
        raise NotImplementedError
 class TZEnvContext(TZContextBase):
    """
    Context manager that temporarily sets the `TZ` variable (for use on
    *nix-like systems). Because the effect is local to the shell anyway, this
    will apply *unless* a guard is set.
    If you do not want the TZ environment variable set, you may set the
    ``DATEUTIL_MAY_NOT_CHANGE_TZ_VAR`` variable to a truthy value.
    """
    _guard_var_name = "DATEUTIL_MAY_NOT_CHANGE_TZ_VAR"
    _guard_allows_change = False
    def get_current_tz(self):
        return os.environ.get('TZ', UnsetTz)
    def set_current_tz(self, tzval):
        if tzval is UnsetTz and 'TZ' in os.environ:
            del os.environ['TZ']
        else:
            os.environ['TZ'] = tzval
        time.tzset()
 class TZWinContext(TZContextBase):
    """
    Context manager for changing local time zone on Windows.
    Because the effect of this is system-wide and global, it may have
    unintended side effect. Set the ``DATEUTIL_MAY_CHANGE_TZ`` environment
    variable to a truthy value before using this context manager.
    """
    def get_current_tz(self):
        p = subprocess.Popen(['tzutil', '/g'], stdout=subprocess.PIPE)
        ctzname, err = p.communicate()
        ctzname = ctzname.decode()     # Popen returns
        if p.returncode:
            raise OSError('Failed to get current time zone: ' + err)
        return ctzname
    def set_current_tz(self, tzname):
        p = subprocess.Popen('tzutil /s "' + tzname + '"')
        out, err = p.communicate()
        if p.returncode:
            raise OSError('Failed to set current time zone: ' +
                          (err or 'Unknown error.'))
 ###
 # Utility classes
 class NotAValueClass(object):
    """
    A class analogous to NaN that has operations defined for any type.
    """
    def _op(self, other):
        return self             # Operation with NotAValue returns NotAValue
    def _cmp(self, other):
        return False
    __add__ = __radd__ = _op
    __sub__ = __rsub__ = _op
    __mul__ = __rmul__ = _op
    __div__ = __rdiv__ = _op
    __truediv__ = __rtruediv__ = _op
    __floordiv__ = __rfloordiv__ = _op
    __lt__ = __rlt__ = _op
    __gt__ = __rgt__ = _op
    __eq__ = __req__ = _op
    __le__ = __rle__ = _op
    __ge__ = __rge__ = _op
 NotAValue = NotAValueClass()
 class ComparesEqualClass(object):
    """
    A class that is always equal to whatever you compare it to.
    """
    def __eq__(self, other):
        return True
    def __ne__(self, other):
        return False
    def __le__(self, other):
        return True
    def __ge__(self, other):
        return True
    def __lt__(self, other):
        return False
    def __gt__(self, other):
        return False
    __req__ = __eq__
    __rne__ = __ne__
    __rle__ = __le__
    __rge__ = __ge__
    __rlt__ = __lt__
    __rgt__ = __gt__
 ComparesEqual = ComparesEqualClass()
 class UnsetTzClass(object):
    """ Sentinel class for unset time zone variable """
    pass
 UnsetTz = UnsetTzClass()
--- a/lib/dateutil/test/conftest.py
+++ b/lib/dateutil/test/conftest.py
@ -1,41 +0,0 @@
 import os
 import pytest
 # Configure pytest to ignore xfailing tests
 # See: https://stackoverflow.com/a/53198349/467366
 def pytest_collection_modifyitems(items):
    for item in items:
        marker_getter = getattr(item, 'get_closest_marker', None)
        # Python 3.3 support
        if marker_getter is None:
            marker_getter = item.get_marker
        marker = marker_getter('xfail')
        # Need to query the args because conditional xfail tests still have
        # the xfail mark even if they are not expected to fail
        if marker and (not marker.args or marker.args[0]):
            item.add_marker(pytest.mark.no_cover)
 def set_tzpath():
    """
    Sets the TZPATH variable if it's specified in an environment variable.
    """
    tzpath = os.environ.get('DATEUTIL_TZPATH', None)
    if tzpath is None:
        return
    path_components = tzpath.split(':')
    print("Setting TZPATH to {}".format(path_components))
    from dateutil import tz
    tz.TZPATHS.clear()
    tz.TZPATHS.extend(path_components)
 set_tzpath()
--- a/lib/dateutil/test/property/test_isoparse_prop.py
+++ b/lib/dateutil/test/property/test_isoparse_prop.py
@ -1,27 +0,0 @@
 from hypothesis import given, assume
 from hypothesis import strategies as st
 from dateutil import tz
 from dateutil.parser import isoparse
 import pytest
 # Strategies
 TIME_ZONE_STRATEGY = st.sampled_from([None, tz.UTC] +
    [tz.gettz(zname) for zname in ('US/Eastern', 'US/Pacific',
                                   'Australia/Sydney', 'Europe/London')])
 ASCII_STRATEGY = st.characters(max_codepoint=127)
@pytest.mark.isoparser
@given(dt=st.datetimes(timezones=TIME_ZONE_STRATEGY), sep=ASCII_STRATEGY)
 def test_timespec_auto(dt, sep):
    if dt.tzinfo is not None:
        # Assume offset has no sub-second components
        assume(dt.utcoffset().total_seconds() % 60 == 0)
    sep = str(sep)          # Python 2.7 requires bytes
    dtstr = dt.isoformat(sep=sep)
    dt_rt = isoparse(dtstr)
    assert dt_rt == dt
--- a/lib/dateutil/test/property/test_parser_prop.py
+++ b/lib/dateutil/test/property/test_parser_prop.py
@ -1,22 +0,0 @@
 from hypothesis.strategies import integers
 from hypothesis import given
 import pytest
 from dateutil.parser import parserinfo
@pytest.mark.parserinfo
@given(integers(min_value=100, max_value=9999))
 def test_convertyear(n):
    assert n == parserinfo().convertyear(n)
@pytest.mark.parserinfo
@given(integers(min_value=-50,
                max_value=49))
 def test_convertyear_no_specified_century(n):
    p = parserinfo()
    new_year = p._year + n
    result = p.convertyear(new_year % 100, century_specified=False)
    assert result == new_year
--- a/lib/dateutil/test/property/test_tz_prop.py
+++ b/lib/dateutil/test/property/test_tz_prop.py
@ -1,35 +0,0 @@
 from datetime import datetime, timedelta
 import pytest
 import six
 from hypothesis import assume, given
 from hypothesis import strategies as st
 from dateutil import tz as tz
 EPOCHALYPSE = datetime.fromtimestamp(2147483647)
 NEGATIVE_EPOCHALYPSE = datetime.fromtimestamp(0) - timedelta(seconds=2147483648)
@pytest.mark.gettz
@pytest.mark.parametrize("gettz_arg", [None, ""])
 # TODO: Remove bounds when GH #590 is resolved
@given(
    dt=st.datetimes(
        min_value=NEGATIVE_EPOCHALYPSE, max_value=EPOCHALYPSE, timezones=st.just(tz.UTC),
    )
 )
 def test_gettz_returns_local(gettz_arg, dt):
    act_tz = tz.gettz(gettz_arg)
    if isinstance(act_tz, tz.tzlocal):
        return
    dt_act = dt.astimezone(tz.gettz(gettz_arg))
    if six.PY2:
        dt_exp = dt.astimezone(tz.tzlocal())
    else:
        dt_exp = dt.astimezone()
    assert dt_act == dt_exp
    assert dt_act.tzname() == dt_exp.tzname()
    assert dt_act.utcoffset() == dt_exp.utcoffset()
--- a/lib/dateutil/test/test_easter.py
+++ b/lib/dateutil/test/test_easter.py
@ -1,93 +0,0 @@
 from dateutil.easter import easter
 from dateutil.easter import EASTER_WESTERN, EASTER_ORTHODOX, EASTER_JULIAN
 from datetime import date
 import pytest
 # List of easters between 1990 and 2050
 western_easter_dates = [
    date(1990, 4, 15), date(1991, 3, 31), date(1992, 4, 19), date(1993, 4, 11),
    date(1994, 4,  3), date(1995, 4, 16), date(1996, 4,  7), date(1997, 3, 30),
    date(1998, 4, 12), date(1999, 4,  4),
    date(2000, 4, 23), date(2001, 4, 15), date(2002, 3, 31), date(2003, 4, 20),
    date(2004, 4, 11), date(2005, 3, 27), date(2006, 4, 16), date(2007, 4,  8),
    date(2008, 3, 23), date(2009, 4, 12),
    date(2010, 4,  4), date(2011, 4, 24), date(2012, 4,  8), date(2013, 3, 31),
    date(2014, 4, 20), date(2015, 4,  5), date(2016, 3, 27), date(2017, 4, 16),
    date(2018, 4,  1), date(2019, 4, 21),
    date(2020, 4, 12), date(2021, 4,  4), date(2022, 4, 17), date(2023, 4,  9),
    date(2024, 3, 31), date(2025, 4, 20), date(2026, 4,  5), date(2027, 3, 28),
    date(2028, 4, 16), date(2029, 4,  1),
    date(2030, 4, 21), date(2031, 4, 13), date(2032, 3, 28), date(2033, 4, 17),
    date(2034, 4,  9), date(2035, 3, 25), date(2036, 4, 13), date(2037, 4,  5),
    date(2038, 4, 25), date(2039, 4, 10),
    date(2040, 4,  1), date(2041, 4, 21), date(2042, 4,  6), date(2043, 3, 29),
    date(2044, 4, 17), date(2045, 4,  9), date(2046, 3, 25), date(2047, 4, 14),
    date(2048, 4,  5), date(2049, 4, 18), date(2050, 4, 10)
    ]
 orthodox_easter_dates = [
    date(1990, 4, 15), date(1991, 4,  7), date(1992, 4, 26), date(1993, 4, 18),
    date(1994, 5,  1), date(1995, 4, 23), date(1996, 4, 14), date(1997, 4, 27),
    date(1998, 4, 19), date(1999, 4, 11),
    date(2000, 4, 30), date(2001, 4, 15), date(2002, 5,  5), date(2003, 4, 27),
    date(2004, 4, 11), date(2005, 5,  1), date(2006, 4, 23), date(2007, 4,  8),
    date(2008, 4, 27), date(2009, 4, 19),
    date(2010, 4,  4), date(2011, 4, 24), date(2012, 4, 15), date(2013, 5,  5),
    date(2014, 4, 20), date(2015, 4, 12), date(2016, 5,  1), date(2017, 4, 16),
    date(2018, 4,  8), date(2019, 4, 28),
    date(2020, 4, 19), date(2021, 5,  2), date(2022, 4, 24), date(2023, 4, 16),
    date(2024, 5,  5), date(2025, 4, 20), date(2026, 4, 12), date(2027, 5,  2),
    date(2028, 4, 16), date(2029, 4,  8),
    date(2030, 4, 28), date(2031, 4, 13), date(2032, 5,  2), date(2033, 4, 24),
    date(2034, 4,  9), date(2035, 4, 29), date(2036, 4, 20), date(2037, 4,  5),
    date(2038, 4, 25), date(2039, 4, 17),
    date(2040, 5,  6), date(2041, 4, 21), date(2042, 4, 13), date(2043, 5,  3),
    date(2044, 4, 24), date(2045, 4,  9), date(2046, 4, 29), date(2047, 4, 21),
    date(2048, 4,  5), date(2049, 4, 25), date(2050, 4, 17)
 ]
 # A random smattering of Julian dates.
 # Pulled values from http://www.kevinlaughery.com/east4099.html
 julian_easter_dates = [
    date( 326, 4,  3), date( 375, 4,  5), date( 492, 4,  5), date( 552, 3, 31),
    date( 562, 4,  9), date( 569, 4, 21), date( 597, 4, 14), date( 621, 4, 19),
    date( 636, 3, 31), date( 655, 3, 29), date( 700, 4, 11), date( 725, 4,  8),
    date( 750, 3, 29), date( 782, 4,  7), date( 835, 4, 18), date( 849, 4, 14),
    date( 867, 3, 30), date( 890, 4, 12), date( 922, 4, 21), date( 934, 4,  6),
    date(1049, 3, 26), date(1058, 4, 19), date(1113, 4,  6), date(1119, 3, 30),
    date(1242, 4, 20), date(1255, 3, 28), date(1257, 4,  8), date(1258, 3, 24),
    date(1261, 4, 24), date(1278, 4, 17), date(1333, 4,  4), date(1351, 4, 17),
    date(1371, 4,  6), date(1391, 3, 26), date(1402, 3, 26), date(1412, 4,  3),
    date(1439, 4,  5), date(1445, 3, 28), date(1531, 4,  9), date(1555, 4, 14)
 ]
@pytest.mark.parametrize("easter_date", western_easter_dates)
 def test_easter_western(easter_date):
    assert easter_date == easter(easter_date.year, EASTER_WESTERN)
@pytest.mark.parametrize("easter_date", orthodox_easter_dates)
 def test_easter_orthodox(easter_date):
    assert easter_date == easter(easter_date.year, EASTER_ORTHODOX)
@pytest.mark.parametrize("easter_date", julian_easter_dates)
 def test_easter_julian(easter_date):
    assert easter_date == easter(easter_date.year, EASTER_JULIAN)
 def test_easter_bad_method():
    with pytest.raises(ValueError):
        easter(1975, 4)
--- a/lib/dateutil/test/test_import_star.py
+++ b/lib/dateutil/test/test_import_star.py
@ -1,33 +0,0 @@
 """Test for the "import *" functionality.
 As import * can be only done at module level, it has been added in a separate file
 """
 import pytest
 prev_locals = list(locals())
 from dateutil import *
 new_locals = {name:value for name,value in locals().items()
              if name not in prev_locals}
 new_locals.pop('prev_locals')
@pytest.mark.import_star
 def test_imported_modules():
    """ Test that `from dateutil import *` adds modules in __all__ locally """
    import dateutil.easter
    import dateutil.parser
    import dateutil.relativedelta
    import dateutil.rrule
    import dateutil.tz
    import dateutil.utils
    import dateutil.zoneinfo
    assert dateutil.easter == new_locals.pop("easter")
    assert dateutil.parser == new_locals.pop("parser")
    assert dateutil.relativedelta == new_locals.pop("relativedelta")
    assert dateutil.rrule == new_locals.pop("rrule")
    assert dateutil.tz == new_locals.pop("tz")
    assert dateutil.utils == new_locals.pop("utils")
    assert dateutil.zoneinfo == new_locals.pop("zoneinfo")
    assert not new_locals
--- a/lib/dateutil/test/test_imports.py
+++ b/lib/dateutil/test/test_imports.py
@ -1,176 +0,0 @@
 import sys
 import pytest
 HOST_IS_WINDOWS = sys.platform.startswith('win')
 def test_import_version_str():
    """ Test that dateutil.__version__ can be imported"""
    from dateutil import __version__
 def test_import_version_root():
    import dateutil
    assert hasattr(dateutil, '__version__')
 # Test that dateutil.easter-related imports work properly
 def test_import_easter_direct():
    import dateutil.easter
 def test_import_easter_from():
    from dateutil import easter
 def test_import_easter_start():
    from dateutil.easter import easter
 #  Test that dateutil.parser-related imports work properly
 def test_import_parser_direct():
    import dateutil.parser
 def test_import_parser_from():
    from dateutil import parser
 def test_import_parser_all():
    # All interface
    from dateutil.parser import parse
    from dateutil.parser import parserinfo
    # Other public classes
    from dateutil.parser import parser
    for var in (parse, parserinfo, parser):
        assert var is not None
 # Test that dateutil.relativedelta-related imports work properly
 def test_import_relative_delta_direct():
    import dateutil.relativedelta
 def test_import_relative_delta_from():
    from dateutil import relativedelta
 def test_import_relative_delta_all():
    from dateutil.relativedelta import relativedelta
    from dateutil.relativedelta import MO, TU, WE, TH, FR, SA, SU
    for var in (relativedelta, MO, TU, WE, TH, FR, SA, SU):
        assert var is not None
    # In the public interface but not in all
    from dateutil.relativedelta import weekday
    assert weekday is not  None
 # Test that dateutil.rrule related imports work properly
 def test_import_rrule_direct():
    import dateutil.rrule
 def test_import_rrule_from():
    from dateutil import rrule
 def test_import_rrule_all():
    from dateutil.rrule import rrule
    from dateutil.rrule import rruleset
    from dateutil.rrule import rrulestr
    from dateutil.rrule import YEARLY, MONTHLY, WEEKLY, DAILY
    from dateutil.rrule import HOURLY, MINUTELY, SECONDLY
    from dateutil.rrule import MO, TU, WE, TH, FR, SA, SU
    rr_all = (rrule, rruleset, rrulestr,
              YEARLY, MONTHLY, WEEKLY, DAILY,
              HOURLY, MINUTELY, SECONDLY,
              MO, TU, WE, TH, FR, SA, SU)
    for var in rr_all:
       assert var is not None
    # In the public interface but not in all
    from dateutil.rrule import weekday
    assert weekday is not None
 # Test that dateutil.tz related imports work properly
 def test_import_tztest_direct():
    import dateutil.tz
 def test_import_tz_from():
    from dateutil import tz
 def test_import_tz_all():
    from dateutil.tz import tzutc
    from dateutil.tz import tzoffset
    from dateutil.tz import tzlocal
    from dateutil.tz import tzfile
    from dateutil.tz import tzrange
    from dateutil.tz import tzstr
    from dateutil.tz import tzical
    from dateutil.tz import gettz
    from dateutil.tz import tzwin
    from dateutil.tz import tzwinlocal
    from dateutil.tz import UTC
    from dateutil.tz import datetime_ambiguous
    from dateutil.tz import datetime_exists
    from dateutil.tz import resolve_imaginary
    tz_all = ["tzutc", "tzoffset", "tzlocal", "tzfile", "tzrange",
              "tzstr", "tzical", "gettz", "datetime_ambiguous",
              "datetime_exists", "resolve_imaginary", "UTC"]
    tz_all += ["tzwin", "tzwinlocal"] if sys.platform.startswith("win") else []
    lvars = locals()
    for var in tz_all:
        assert lvars[var] is not None
 # Test that dateutil.tzwin related imports work properly
@pytest.mark.skipif(not HOST_IS_WINDOWS, reason="Requires Windows")
 def test_import_tz_windows_direct():
    import dateutil.tzwin
@pytest.mark.skipif(not HOST_IS_WINDOWS, reason="Requires Windows")
 def test_import_tz_windows_from():
    from dateutil import tzwin
@pytest.mark.skipif(not HOST_IS_WINDOWS, reason="Requires Windows")
 def test_import_tz_windows_star():
    from dateutil.tzwin import tzwin
    from dateutil.tzwin import tzwinlocal
    tzwin_all = [tzwin, tzwinlocal]
    for var in tzwin_all:
        assert var is not None
 # Test imports of Zone Info
 def test_import_zone_info_direct():
    import dateutil.zoneinfo
 def test_import_zone_info_from():
    from dateutil import zoneinfo
 def test_import_zone_info_star():
    from dateutil.zoneinfo import gettz
    from dateutil.zoneinfo import gettz_db_metadata
    from dateutil.zoneinfo import rebuild
    zi_all = (gettz, gettz_db_metadata, rebuild)
    for var in zi_all:
        assert var is not None
--- a/lib/dateutil/test/test_internals.py
+++ b/lib/dateutil/test/test_internals.py
@ -1,91 +0,0 @@
 # -*- coding: utf-8 -*-
 """
 Tests for implementation details, not necessarily part of the user-facing
 API.
 The motivating case for these tests is #483, where we want to smoke-test
 code that may be difficult to reach through the standard API calls.
 """
 import sys
 import pytest
 from dateutil.parser._parser import _ymd
 from dateutil import tz
 IS_PY32 = sys.version_info[0:2] == (3, 2)
@pytest.mark.smoke
 def test_YMD_could_be_day():
    ymd = _ymd('foo bar 124 baz')
    ymd.append(2, 'M')
    assert ymd.has_month
    assert not ymd.has_year
    assert ymd.could_be_day(4)
    assert not ymd.could_be_day(-6)
    assert not ymd.could_be_day(32)
    # Assumes leap year
    assert ymd.could_be_day(29)
    ymd.append(1999)
    assert ymd.has_year
    assert not ymd.could_be_day(29)
    ymd.append(16, 'D')
    assert ymd.has_day
    assert not ymd.could_be_day(1)
    ymd = _ymd('foo bar 124 baz')
    ymd.append(1999)
    assert ymd.could_be_day(31)
 ###
 # Test that private interfaces in _parser are deprecated properly
@pytest.mark.skipif(IS_PY32, reason='pytest.warns not supported on Python 3.2')
 def test_parser_private_warns():
    from dateutil.parser import _timelex, _tzparser
    from dateutil.parser import _parsetz
    with pytest.warns(DeprecationWarning):
        _tzparser()
    with pytest.warns(DeprecationWarning):
        _timelex('2014-03-03')
    with pytest.warns(DeprecationWarning):
        _parsetz('+05:00')
@pytest.mark.skipif(IS_PY32, reason='pytest.warns not supported on Python 3.2')
 def test_parser_parser_private_not_warns():
    from dateutil.parser._parser import _timelex, _tzparser
    from dateutil.parser._parser import _parsetz
    with pytest.warns(None) as recorder:
        _tzparser()
        assert len(recorder) == 0
    with pytest.warns(None) as recorder:
        _timelex('2014-03-03')
        assert len(recorder) == 0
    with pytest.warns(None) as recorder:
        _parsetz('+05:00')
        assert len(recorder) == 0
@pytest.mark.tzstr
 def test_tzstr_internal_timedeltas():
    with pytest.warns(tz.DeprecatedTzFormatWarning):
        tz1 = tz.tzstr("EST5EDT,5,4,0,7200,11,-3,0,7200")
    with pytest.warns(tz.DeprecatedTzFormatWarning):
        tz2 = tz.tzstr("EST5EDT,4,1,0,7200,10,-1,0,7200")
    assert tz1._start_delta != tz2._start_delta
    assert tz1._end_delta != tz2._end_delta
--- a/lib/dateutil/test/test_isoparser.py
+++ b/lib/dateutil/test/test_isoparser.py
@ -1,509 +0,0 @@
 # -*- coding: utf-8 -*-
 from __future__ import unicode_literals
 from datetime import datetime, timedelta, date, time
 import itertools as it
 from dateutil import tz
 from dateutil.tz import UTC
 from dateutil.parser import isoparser, isoparse
 import pytest
 import six
 def _generate_tzoffsets(limited):
    def _mkoffset(hmtuple, fmt):
        h, m = hmtuple
        m_td = (-1 if h < 0 else 1) * m
        tzo = tz.tzoffset(None, timedelta(hours=h, minutes=m_td))
        return tzo, fmt.format(h, m)
    out = []
    if not limited:
        # The subset that's just hours
        hm_out_h = [(h, 0) for h in (-23, -5, 0, 5, 23)]
        out.extend([_mkoffset(hm, '{:+03d}') for hm in hm_out_h])
        # Ones that have hours and minutes
        hm_out = [] + hm_out_h
        hm_out += [(-12, 15), (11, 30), (10, 2), (5, 15), (-5, 30)]
    else:
        hm_out = [(-5, -0)]
    fmts = ['{:+03d}:{:02d}', '{:+03d}{:02d}']
    out += [_mkoffset(hm, fmt) for hm in hm_out for fmt in fmts]
    # Also add in UTC and naive
    out.append((UTC, 'Z'))
    out.append((None, ''))
    return out
 FULL_TZOFFSETS = _generate_tzoffsets(False)
 FULL_TZOFFSETS_AWARE = [x for x in FULL_TZOFFSETS if x[1]]
 TZOFFSETS = _generate_tzoffsets(True)
 DATES = [datetime(1996, 1, 1), datetime(2017, 1, 1)]
@pytest.mark.parametrize('dt', tuple(DATES))
 def test_year_only(dt):
    dtstr = dt.strftime('%Y')
    assert isoparse(dtstr) == dt
 DATES += [datetime(2000, 2, 1), datetime(2017, 4, 1)]
@pytest.mark.parametrize('dt', tuple(DATES))
 def test_year_month(dt):
    fmt   = '%Y-%m'
    dtstr = dt.strftime(fmt)
    assert isoparse(dtstr) == dt
 DATES += [datetime(2016, 2, 29), datetime(2018, 3, 15)]
 YMD_FMTS = ('%Y%m%d', '%Y-%m-%d')
@pytest.mark.parametrize('dt', tuple(DATES))
@pytest.mark.parametrize('fmt', YMD_FMTS)
 def test_year_month_day(dt, fmt):
    dtstr = dt.strftime(fmt)
    assert isoparse(dtstr) == dt
 def _isoparse_date_and_time(dt, date_fmt, time_fmt, tzoffset,
                            microsecond_precision=None):
    tzi, offset_str = tzoffset
    fmt = date_fmt + 'T' + time_fmt
    dt = dt.replace(tzinfo=tzi)
    dtstr = dt.strftime(fmt)
    if microsecond_precision is not None:
        if not fmt.endswith('%f'):  # pragma: nocover
            raise ValueError('Time format has no microseconds!')
        if microsecond_precision != 6: 
            dtstr = dtstr[:-(6 - microsecond_precision)]
        elif microsecond_precision > 6: # pragma: nocover
            raise ValueError('Precision must be 1-6') 
    dtstr += offset_str
    assert isoparse(dtstr) == dt
 DATETIMES = [datetime(1998, 4, 16, 12),
             datetime(2019, 11, 18, 23),
             datetime(2014, 12, 16, 4)]
@pytest.mark.parametrize('dt', tuple(DATETIMES))
@pytest.mark.parametrize('date_fmt', YMD_FMTS)
@pytest.mark.parametrize('tzoffset', TZOFFSETS)
 def test_ymd_h(dt, date_fmt, tzoffset):
    _isoparse_date_and_time(dt, date_fmt, '%H', tzoffset)
 DATETIMES = [datetime(2012, 1, 6, 9, 37)]
@pytest.mark.parametrize('dt', tuple(DATETIMES))
@pytest.mark.parametrize('date_fmt', YMD_FMTS)
@pytest.mark.parametrize('time_fmt', ('%H%M', '%H:%M'))
@pytest.mark.parametrize('tzoffset', TZOFFSETS)
 def test_ymd_hm(dt, date_fmt, time_fmt, tzoffset):
    _isoparse_date_and_time(dt, date_fmt, time_fmt, tzoffset)
 DATETIMES = [datetime(2003, 9, 2, 22, 14, 2),
             datetime(2003, 8, 8, 14, 9, 14),
             datetime(2003, 4, 7, 6, 14, 59)]
 HMS_FMTS = ('%H%M%S', '%H:%M:%S')
@pytest.mark.parametrize('dt', tuple(DATETIMES))
@pytest.mark.parametrize('date_fmt', YMD_FMTS)
@pytest.mark.parametrize('time_fmt', HMS_FMTS)
@pytest.mark.parametrize('tzoffset', TZOFFSETS)
 def test_ymd_hms(dt, date_fmt, time_fmt, tzoffset):
    _isoparse_date_and_time(dt, date_fmt, time_fmt, tzoffset)
 DATETIMES = [datetime(2017, 11, 27, 6, 14, 30, 123456)]
@pytest.mark.parametrize('dt', tuple(DATETIMES))
@pytest.mark.parametrize('date_fmt', YMD_FMTS)
@pytest.mark.parametrize('time_fmt', (x + sep + '%f' for x in HMS_FMTS
                                      for sep in '.,'))
@pytest.mark.parametrize('tzoffset', TZOFFSETS)
@pytest.mark.parametrize('precision', list(range(3, 7)))
 def test_ymd_hms_micro(dt, date_fmt, time_fmt, tzoffset, precision):
    # Truncate the microseconds to the desired precision for the representation
    dt = dt.replace(microsecond=int(round(dt.microsecond, precision-6)))
    _isoparse_date_and_time(dt, date_fmt, time_fmt, tzoffset, precision)
 ###
 # Truncation of extra digits beyond microsecond precision
@pytest.mark.parametrize('dt_str', [
    '2018-07-03T14:07:00.123456000001',
    '2018-07-03T14:07:00.123456999999',
 ])
 def test_extra_subsecond_digits(dt_str):
    assert isoparse(dt_str) == datetime(2018, 7, 3, 14, 7, 0, 123456)
@pytest.mark.parametrize('tzoffset', FULL_TZOFFSETS)
 def test_full_tzoffsets(tzoffset):
    dt = datetime(2017, 11, 27, 6, 14, 30, 123456)
    date_fmt = '%Y-%m-%d'
    time_fmt = '%H:%M:%S.%f'
    _isoparse_date_and_time(dt, date_fmt, time_fmt, tzoffset)
@pytest.mark.parametrize('dt_str', [
    '2014-04-11T00',
    '2014-04-10T24',
    '2014-04-11T00:00',
    '2014-04-10T24:00',
    '2014-04-11T00:00:00',
    '2014-04-10T24:00:00',
    '2014-04-11T00:00:00.000',
    '2014-04-10T24:00:00.000',
    '2014-04-11T00:00:00.000000',
    '2014-04-10T24:00:00.000000']
 )
 def test_datetime_midnight(dt_str):
    assert isoparse(dt_str) == datetime(2014, 4, 11, 0, 0, 0, 0)
@pytest.mark.parametrize('datestr', [
    '2014-01-01',
    '20140101',
 ])
@pytest.mark.parametrize('sep', [' ', 'a', 'T', '_', '-'])
 def test_isoparse_sep_none(datestr, sep):
    isostr = datestr + sep + '14:33:09'
    assert isoparse(isostr) == datetime(2014, 1, 1, 14, 33, 9)
 ##
 # Uncommon date formats
 TIME_ARGS = ('time_args',
    ((None, time(0), None), ) + tuple(('%H:%M:%S.%f', _t, _tz)
        for _t, _tz in it.product([time(0), time(9, 30), time(14, 47)],
                                  TZOFFSETS)))
@pytest.mark.parametrize('isocal,dt_expected',[
    ((2017, 10), datetime(2017, 3, 6)),
    ((2020, 1), datetime(2019, 12, 30)),    # ISO year != Cal year
    ((2004, 53), datetime(2004, 12, 27)),   # Only half the week is in 2014
 ])
 def test_isoweek(isocal, dt_expected):
    # TODO: Figure out how to parametrize this on formats, too
    for fmt in ('{:04d}-W{:02d}', '{:04d}W{:02d}'):
        dtstr = fmt.format(*isocal)
        assert isoparse(dtstr) == dt_expected
@pytest.mark.parametrize('isocal,dt_expected',[
    ((2016, 13, 7), datetime(2016, 4, 3)),
    ((2004, 53, 7), datetime(2005, 1, 2)),      # ISO year != Cal year
    ((2009, 1, 2), datetime(2008, 12, 30)),     # ISO year < Cal year
    ((2009, 53, 6), datetime(2010, 1, 2))       # ISO year > Cal year
 ])
 def test_isoweek_day(isocal, dt_expected):
    # TODO: Figure out how to parametrize this on formats, too
    for fmt in ('{:04d}-W{:02d}-{:d}', '{:04d}W{:02d}{:d}'):
        dtstr = fmt.format(*isocal)
        assert isoparse(dtstr) == dt_expected
@pytest.mark.parametrize('isoord,dt_expected', [
    ((2004, 1), datetime(2004, 1, 1)),
    ((2016, 60), datetime(2016, 2, 29)),
    ((2017, 60), datetime(2017, 3, 1)),
    ((2016, 366), datetime(2016, 12, 31)),
    ((2017, 365), datetime(2017, 12, 31))
 ])
 def test_iso_ordinal(isoord, dt_expected):
    for fmt in ('{:04d}-{:03d}', '{:04d}{:03d}'):
        dtstr = fmt.format(*isoord)
        assert isoparse(dtstr) == dt_expected
 ###
 # Acceptance of bytes
@pytest.mark.parametrize('isostr,dt', [
    (b'2014', datetime(2014, 1, 1)),
    (b'20140204', datetime(2014, 2, 4)),
    (b'2014-02-04', datetime(2014, 2, 4)),
    (b'2014-02-04T12', datetime(2014, 2, 4, 12)),
    (b'2014-02-04T12:30', datetime(2014, 2, 4, 12, 30)),
    (b'2014-02-04T12:30:15', datetime(2014, 2, 4, 12, 30, 15)),
    (b'2014-02-04T12:30:15.224', datetime(2014, 2, 4, 12, 30, 15, 224000)),
    (b'20140204T123015.224', datetime(2014, 2, 4, 12, 30, 15, 224000)),
    (b'2014-02-04T12:30:15.224Z', datetime(2014, 2, 4, 12, 30, 15, 224000,
                                           UTC)),
    (b'2014-02-04T12:30:15.224z', datetime(2014, 2, 4, 12, 30, 15, 224000,
                                           UTC)),
    (b'2014-02-04T12:30:15.224+05:00',
        datetime(2014, 2, 4, 12, 30, 15, 224000,
                 tzinfo=tz.tzoffset(None, timedelta(hours=5))))])
 def test_bytes(isostr, dt):
    assert isoparse(isostr) == dt
 ###
 # Invalid ISO strings
@pytest.mark.parametrize('isostr,exception', [
    ('201', ValueError),                        # ISO string too short
    ('2012-0425', ValueError),                  # Inconsistent date separators
    ('201204-25', ValueError),                  # Inconsistent date separators
    ('20120425T0120:00', ValueError),           # Inconsistent time separators
    ('20120425T01:2000', ValueError),           # Inconsistent time separators
    ('14:3015', ValueError),                    # Inconsistent time separator
    ('20120425T012500-334', ValueError),        # Wrong microsecond separator
    ('2001-1', ValueError),                     # YYYY-M not valid
    ('2012-04-9', ValueError),                  # YYYY-MM-D not valid
    ('201204', ValueError),                     # YYYYMM not valid
    ('20120411T03:30+', ValueError),            # Time zone too short
    ('20120411T03:30+1234567', ValueError),     # Time zone too long
    ('20120411T03:30-25:40', ValueError),       # Time zone invalid
    ('2012-1a', ValueError),                    # Invalid month
    ('20120411T03:30+00:60', ValueError),       # Time zone invalid minutes
    ('20120411T03:30+00:61', ValueError),       # Time zone invalid minutes
    ('20120411T033030.123456012:00',            # No sign in time zone
        ValueError),
    ('2012-W00', ValueError),                   # Invalid ISO week
    ('2012-W55', ValueError),                   # Invalid ISO week
    ('2012-W01-0', ValueError),                 # Invalid ISO week day
    ('2012-W01-8', ValueError),                 # Invalid ISO week day
    ('2013-000', ValueError),                   # Invalid ordinal day
    ('2013-366', ValueError),                   # Invalid ordinal day
    ('2013366', ValueError),                    # Invalid ordinal day
    ('2014-03-12Т12:30:14', ValueError),        # Cyrillic T
    ('2014-04-21T24:00:01', ValueError),        # Invalid use of 24 for midnight
    ('2014_W01-1', ValueError),                 # Invalid separator
    ('2014W01-1', ValueError),                  # Inconsistent use of dashes
    ('2014-W011', ValueError),                  # Inconsistent use of dashes
 ])
 def test_iso_raises(isostr, exception):
    with pytest.raises(exception):
        isoparse(isostr)
@pytest.mark.parametrize('sep_act, valid_sep, exception', [
    ('T', 'C', ValueError),
    ('C', 'T', ValueError),
 ])
 def test_iso_with_sep_raises(sep_act, valid_sep, exception):
    parser = isoparser(sep=valid_sep)
    isostr = '2012-04-25' + sep_act + '01:25:00'
    with pytest.raises(exception):
        parser.isoparse(isostr)
 ###
 # Test ISOParser constructor
@pytest.mark.parametrize('sep', ['  ', '9', '🍛'])
 def test_isoparser_invalid_sep(sep):
    with pytest.raises(ValueError):
        isoparser(sep=sep)
 # This only fails on Python 3
@pytest.mark.xfail(not six.PY2, reason="Fails on Python 3 only")
 def test_isoparser_byte_sep():
    dt = datetime(2017, 12, 6, 12, 30, 45)
    dt_str = dt.isoformat(sep=str('T'))
    dt_rt = isoparser(sep=b'T').isoparse(dt_str)
    assert dt == dt_rt
 ###
 # Test parse_tzstr
@pytest.mark.parametrize('tzoffset', FULL_TZOFFSETS)
 def test_parse_tzstr(tzoffset):
    dt = datetime(2017, 11, 27, 6, 14, 30, 123456)
    date_fmt = '%Y-%m-%d'
    time_fmt = '%H:%M:%S.%f'
    _isoparse_date_and_time(dt, date_fmt, time_fmt, tzoffset)
@pytest.mark.parametrize('tzstr', [
    '-00:00', '+00:00', '+00', '-00', '+0000', '-0000'
 ])
@pytest.mark.parametrize('zero_as_utc', [True, False])
 def test_parse_tzstr_zero_as_utc(tzstr, zero_as_utc):
    tzi = isoparser().parse_tzstr(tzstr, zero_as_utc=zero_as_utc)
    assert tzi == UTC
    assert (type(tzi) == tz.tzutc) == zero_as_utc
@pytest.mark.parametrize('tzstr,exception', [
    ('00:00', ValueError),     # No sign
    ('05:00', ValueError),     # No sign
    ('_00:00', ValueError),    # Invalid sign
    ('+25:00', ValueError),    # Offset too large
    ('00:0000', ValueError),   # String too long
 ])
 def test_parse_tzstr_fails(tzstr, exception):
    with pytest.raises(exception):
        isoparser().parse_tzstr(tzstr)
 ###
 # Test parse_isodate
 def __make_date_examples():
    dates_no_day = [
        date(1999, 12, 1),
        date(2016, 2, 1)
    ]
    if not six.PY2:
        # strftime does not support dates before 1900 in Python 2
        dates_no_day.append(date(1000, 11, 1))
    # Only one supported format for dates with no day
    o = zip(dates_no_day, it.repeat('%Y-%m'))
    dates_w_day = [
        date(1969, 12, 31),
        date(1900, 1, 1),
        date(2016, 2, 29),
        date(2017, 11, 14)
    ]
    dates_w_day_fmts = ('%Y%m%d', '%Y-%m-%d')
    o = it.chain(o, it.product(dates_w_day, dates_w_day_fmts))
    return list(o)
@pytest.mark.parametrize('d,dt_fmt', __make_date_examples())
@pytest.mark.parametrize('as_bytes', [True, False])
 def test_parse_isodate(d, dt_fmt, as_bytes):
    d_str = d.strftime(dt_fmt)
    if isinstance(d_str, six.text_type) and as_bytes:
        d_str = d_str.encode('ascii')
    elif isinstance(d_str, bytes) and not as_bytes:
        d_str = d_str.decode('ascii')
    iparser = isoparser()
    assert iparser.parse_isodate(d_str) == d
@pytest.mark.parametrize('isostr,exception', [
    ('243', ValueError),                        # ISO string too short
    ('2014-0423', ValueError),                  # Inconsistent date separators
    ('201404-23', ValueError),                  # Inconsistent date separators
    ('2014日03月14', ValueError),                # Not ASCII
    ('2013-02-29', ValueError),                 # Not a leap year
    ('2014/12/03', ValueError),                 # Wrong separators
    ('2014-04-19T', ValueError),                # Unknown components
    ('201202', ValueError),                     # Invalid format
 ])
 def test_isodate_raises(isostr, exception):
    with pytest.raises(exception):
        isoparser().parse_isodate(isostr)
 def test_parse_isodate_error_text():
    with pytest.raises(ValueError) as excinfo:
        isoparser().parse_isodate('2014-0423')
    # ensure the error message does not contain b' prefixes
    if six.PY2:
        expected_error = "String contains unknown ISO components: u'2014-0423'"
    else:
        expected_error = "String contains unknown ISO components: '2014-0423'"
    assert expected_error == str(excinfo.value)
 ###
 # Test parse_isotime
 def __make_time_examples():
    outputs = []
    # HH
    time_h = [time(0), time(8), time(22)]
    time_h_fmts = ['%H']
    outputs.append(it.product(time_h, time_h_fmts))
    # HHMM / HH:MM
    time_hm = [time(0, 0), time(0, 30), time(8, 47), time(16, 1)]
    time_hm_fmts = ['%H%M', '%H:%M']
    outputs.append(it.product(time_hm, time_hm_fmts))
    # HHMMSS / HH:MM:SS
    time_hms = [time(0, 0, 0), time(0, 15, 30),
                time(8, 2, 16), time(12, 0), time(16, 2), time(20, 45)]
    time_hms_fmts = ['%H%M%S', '%H:%M:%S']
    outputs.append(it.product(time_hms, time_hms_fmts))
    # HHMMSS.ffffff / HH:MM:SS.ffffff
    time_hmsu = [time(0, 0, 0, 0), time(4, 15, 3, 247993),
                 time(14, 21, 59, 948730),
                 time(23, 59, 59, 999999)]
    time_hmsu_fmts = ['%H%M%S.%f', '%H:%M:%S.%f']
    outputs.append(it.product(time_hmsu, time_hmsu_fmts))
    outputs = list(map(list, outputs))
    # Time zones
    ex_naive = list(it.chain.from_iterable(x[0:2] for x in outputs))
    o = it.product(ex_naive, TZOFFSETS)    # ((time, fmt), (tzinfo, offsetstr))
    o = ((t.replace(tzinfo=tzi), fmt + off_str)
         for (t, fmt), (tzi, off_str) in o)
    outputs.append(o)
    return list(it.chain.from_iterable(outputs))
@pytest.mark.parametrize('time_val,time_fmt', __make_time_examples())
@pytest.mark.parametrize('as_bytes', [True, False])
 def test_isotime(time_val, time_fmt, as_bytes):
    tstr = time_val.strftime(time_fmt)
    if isinstance(tstr, six.text_type) and as_bytes:
        tstr = tstr.encode('ascii')
    elif isinstance(tstr, bytes) and not as_bytes:
        tstr = tstr.decode('ascii')
    iparser = isoparser()
    assert iparser.parse_isotime(tstr) == time_val
@pytest.mark.parametrize('isostr', [
    '24:00',
    '2400',
    '24:00:00',
    '240000',
    '24:00:00.000',
    '24:00:00,000',
    '24:00:00.000000',
    '24:00:00,000000',
 ])
 def test_isotime_midnight(isostr):
    iparser = isoparser()
    assert iparser.parse_isotime(isostr) == time(0, 0, 0, 0)
@pytest.mark.parametrize('isostr,exception', [
    ('3', ValueError),                          # ISO string too short
    ('14時30分15秒', ValueError),                # Not ASCII
    ('14_30_15', ValueError),                   # Invalid separators
    ('1430:15', ValueError),                    # Inconsistent separator use
    ('25', ValueError),                         # Invalid hours
    ('25:15', ValueError),                      # Invalid hours
    ('14:60', ValueError),                      # Invalid minutes
    ('14:59:61', ValueError),                   # Invalid seconds
    ('14:30:15.34468305:00', ValueError),       # No sign in time zone
    ('14:30:15+', ValueError),                  # Time zone too short
    ('14:30:15+1234567', ValueError),           # Time zone invalid
    ('14:59:59+25:00', ValueError),             # Invalid tz hours
    ('14:59:59+12:62', ValueError),             # Invalid tz minutes
    ('14:59:30_344583', ValueError),            # Invalid microsecond separator
    ('24:01', ValueError),                      # 24 used for non-midnight time
    ('24:00:01', ValueError),                   # 24 used for non-midnight time
    ('24:00:00.001', ValueError),               # 24 used for non-midnight time
    ('24:00:00.000001', ValueError),            # 24 used for non-midnight time
 ])
 def test_isotime_raises(isostr, exception):
    iparser = isoparser()
    with pytest.raises(exception):
        iparser.parse_isotime(isostr)
--- a/lib/dateutil/test/test_parser.py
+++ b/lib/dateutil/test/test_parser.py
@ -1,964 +0,0 @@
 # -*- coding: utf-8 -*-
 from __future__ import unicode_literals
 import itertools
 from datetime import datetime, timedelta
 import unittest
 import sys
 from dateutil import tz
 from dateutil.tz import tzoffset
 from dateutil.parser import parse, parserinfo
 from dateutil.parser import ParserError
 from dateutil.parser import UnknownTimezoneWarning
 from ._common import TZEnvContext
 from six import assertRaisesRegex, PY2
 from io import StringIO
 import pytest
 # Platform info
 IS_WIN = sys.platform.startswith('win')
 PLATFORM_HAS_DASH_D = False
 try:
    if datetime.now().strftime('%-d'):
        PLATFORM_HAS_DASH_D = True
 except ValueError:
    pass
@pytest.fixture(params=[True, False])
 def fuzzy(request):
    """Fixture to pass fuzzy=True or fuzzy=False to parse"""
    return request.param
 # Parser test cases using no keyword arguments. Format: (parsable_text, expected_datetime, assertion_message)
 PARSER_TEST_CASES = [
    ("Thu Sep 25 10:36:28 2003", datetime(2003, 9, 25, 10, 36, 28), "date command format strip"),
    ("Thu Sep 25 2003", datetime(2003, 9, 25), "date command format strip"),
    ("2003-09-25T10:49:41", datetime(2003, 9, 25, 10, 49, 41), "iso format strip"),
    ("2003-09-25T10:49", datetime(2003, 9, 25, 10, 49), "iso format strip"),
    ("2003-09-25T10", datetime(2003, 9, 25, 10), "iso format strip"),
    ("2003-09-25", datetime(2003, 9, 25), "iso format strip"),
    ("20030925T104941", datetime(2003, 9, 25, 10, 49, 41), "iso stripped format strip"),
    ("20030925T1049", datetime(2003, 9, 25, 10, 49, 0), "iso stripped format strip"),
    ("20030925T10", datetime(2003, 9, 25, 10), "iso stripped format strip"),
    ("20030925", datetime(2003, 9, 25), "iso stripped format strip"),
    ("2003-09-25 10:49:41,502", datetime(2003, 9, 25, 10, 49, 41, 502000), "python logger format"),
    ("199709020908", datetime(1997, 9, 2, 9, 8), "no separator"),
    ("19970902090807", datetime(1997, 9, 2, 9, 8, 7), "no separator"),
    ("09-25-2003", datetime(2003, 9, 25), "date with dash"),
    ("25-09-2003", datetime(2003, 9, 25), "date with dash"),
    ("10-09-2003", datetime(2003, 10, 9), "date with dash"),
    ("10-09-03", datetime(2003, 10, 9), "date with dash"),
    ("2003.09.25", datetime(2003, 9, 25), "date with dot"),
    ("09.25.2003", datetime(2003, 9, 25), "date with dot"),
    ("25.09.2003", datetime(2003, 9, 25), "date with dot"),
    ("10.09.2003", datetime(2003, 10, 9), "date with dot"),
    ("10.09.03", datetime(2003, 10, 9), "date with dot"),
    ("2003/09/25", datetime(2003, 9, 25), "date with slash"),
    ("09/25/2003", datetime(2003, 9, 25), "date with slash"),
    ("25/09/2003", datetime(2003, 9, 25), "date with slash"),
    ("10/09/2003", datetime(2003, 10, 9), "date with slash"),
    ("10/09/03", datetime(2003, 10, 9), "date with slash"),
    ("2003 09 25", datetime(2003, 9, 25), "date with space"),
    ("09 25 2003", datetime(2003, 9, 25), "date with space"),
    ("25 09 2003", datetime(2003, 9, 25), "date with space"),
    ("10 09 2003", datetime(2003, 10, 9), "date with space"),
    ("10 09 03", datetime(2003, 10, 9), "date with space"),
    ("25 09 03", datetime(2003, 9, 25), "date with space"),
    ("03 25 Sep", datetime(2003, 9, 25), "strangely ordered date"),
    ("25 03 Sep", datetime(2025, 9, 3), "strangely ordered date"),
    ("  July   4 ,  1976   12:01:02   am  ", datetime(1976, 7, 4, 0, 1, 2), "extra space"),
    ("Wed, July 10, '96", datetime(1996, 7, 10, 0, 0), "random format"),
    ("1996.July.10 AD 12:08 PM", datetime(1996, 7, 10, 12, 8), "random format"),
    ("July 4, 1976", datetime(1976, 7, 4), "random format"),
    ("7 4 1976", datetime(1976, 7, 4), "random format"),
    ("4 jul 1976", datetime(1976, 7, 4), "random format"),
    ("4 Jul 1976", datetime(1976, 7, 4), "'%-d %b %Y' format"),
    ("7-4-76", datetime(1976, 7, 4), "random format"),
    ("19760704", datetime(1976, 7, 4), "random format"),
    ("0:01:02 on July 4, 1976", datetime(1976, 7, 4, 0, 1, 2), "random format"),
    ("July 4, 1976 12:01:02 am", datetime(1976, 7, 4, 0, 1, 2), "random format"),
    ("Mon Jan  2 04:24:27 1995", datetime(1995, 1, 2, 4, 24, 27), "random format"),
    ("04.04.95 00:22", datetime(1995, 4, 4, 0, 22), "random format"),
    ("Jan 1 1999 11:23:34.578", datetime(1999, 1, 1, 11, 23, 34, 578000), "random format"),
    ("950404 122212", datetime(1995, 4, 4, 12, 22, 12), "random format"),
    ("3rd of May 2001", datetime(2001, 5, 3), "random format"),
    ("5th of March 2001", datetime(2001, 3, 5), "random format"),
    ("1st of May 2003", datetime(2003, 5, 1), "random format"),
    ('0099-01-01T00:00:00', datetime(99, 1, 1, 0, 0), "99 ad"),
    ('0031-01-01T00:00:00', datetime(31, 1, 1, 0, 0), "31 ad"),
    ("20080227T21:26:01.123456789", datetime(2008, 2, 27, 21, 26, 1, 123456), "high precision seconds"),
    ('13NOV2017', datetime(2017, 11, 13), "dBY (See GH360)"),
    ('0003-03-04', datetime(3, 3, 4), "pre 12 year same month (See GH PR #293)"),
    ('December.0031.30', datetime(31, 12, 30), "BYd corner case (GH#687)"),
    # Cases with legacy h/m/s format, candidates for deprecation (GH#886)
    ("2016-12-21 04.2h", datetime(2016, 12, 21, 4, 12), "Fractional Hours"),
 ]
 # Check that we don't have any duplicates
 assert len(set([x[0] for x in PARSER_TEST_CASES])) == len(PARSER_TEST_CASES)
@pytest.mark.parametrize("parsable_text,expected_datetime,assertion_message", PARSER_TEST_CASES)
 def test_parser(parsable_text, expected_datetime, assertion_message):
    assert parse(parsable_text) == expected_datetime, assertion_message
 # Parser test cases using datetime(2003, 9, 25) as a default.
 # Format: (parsable_text, expected_datetime, assertion_message)
 PARSER_DEFAULT_TEST_CASES = [
    ("Thu Sep 25 10:36:28", datetime(2003, 9, 25, 10, 36, 28), "date command format strip"),
    ("Thu Sep 10:36:28", datetime(2003, 9, 25, 10, 36, 28), "date command format strip"),
    ("Thu 10:36:28", datetime(2003, 9, 25, 10, 36, 28), "date command format strip"),
    ("Sep 10:36:28", datetime(2003, 9, 25, 10, 36, 28), "date command format strip"),
    ("10:36:28", datetime(2003, 9, 25, 10, 36, 28), "date command format strip"),
    ("10:36", datetime(2003, 9, 25, 10, 36), "date command format strip"),
    ("Sep 2003", datetime(2003, 9, 25), "date command format strip"),
    ("Sep", datetime(2003, 9, 25), "date command format strip"),
    ("2003", datetime(2003, 9, 25), "date command format strip"),
    ("10h36m28.5s", datetime(2003, 9, 25, 10, 36, 28, 500000), "hour with letters"),
    ("10h36m28s", datetime(2003, 9, 25, 10, 36, 28), "hour with letters strip"),
    ("10h36m", datetime(2003, 9, 25, 10, 36), "hour with letters strip"),
    ("10h", datetime(2003, 9, 25, 10), "hour with letters strip"),
    ("10 h 36", datetime(2003, 9, 25, 10, 36), "hour with letters strip"),
    ("10 h 36.5", datetime(2003, 9, 25, 10, 36, 30), "hour with letter strip"),
    ("36 m 5", datetime(2003, 9, 25, 0, 36, 5), "hour with letters spaces"),
    ("36 m 5 s", datetime(2003, 9, 25, 0, 36, 5), "minute with letters spaces"),
    ("36 m 05", datetime(2003, 9, 25, 0, 36, 5), "minute with letters spaces"),
    ("36 m 05 s", datetime(2003, 9, 25, 0, 36, 5), "minutes with letters spaces"),
    ("10h am", datetime(2003, 9, 25, 10), "hour am pm"),
    ("10h pm", datetime(2003, 9, 25, 22), "hour am pm"),
    ("10am", datetime(2003, 9, 25, 10), "hour am pm"),
    ("10pm", datetime(2003, 9, 25, 22), "hour am pm"),
    ("10:00 am", datetime(2003, 9, 25, 10), "hour am pm"),
    ("10:00 pm", datetime(2003, 9, 25, 22), "hour am pm"),
    ("10:00am", datetime(2003, 9, 25, 10), "hour am pm"),
    ("10:00pm", datetime(2003, 9, 25, 22), "hour am pm"),
    ("10:00a.m", datetime(2003, 9, 25, 10), "hour am pm"),
    ("10:00p.m", datetime(2003, 9, 25, 22), "hour am pm"),
    ("10:00a.m.", datetime(2003, 9, 25, 10), "hour am pm"),
    ("10:00p.m.", datetime(2003, 9, 25, 22), "hour am pm"),
    ("Wed", datetime(2003, 10, 1), "weekday alone"),
    ("Wednesday", datetime(2003, 10, 1), "long weekday"),
    ("October", datetime(2003, 10, 25), "long month"),
    ("31-Dec-00", datetime(2000, 12, 31), "zero year"),
    ("0:01:02", datetime(2003, 9, 25, 0, 1, 2), "random format"),
    ("12h 01m02s am", datetime(2003, 9, 25, 0, 1, 2), "random format"),
    ("12:08 PM", datetime(2003, 9, 25, 12, 8), "random format"),
    ("01h02m03", datetime(2003, 9, 25, 1, 2, 3), "random format"),
    ("01h02", datetime(2003, 9, 25, 1, 2), "random format"),
    ("01h02s", datetime(2003, 9, 25, 1, 0, 2), "random format"),
    ("01m02", datetime(2003, 9, 25, 0, 1, 2), "random format"),
    ("01m02h", datetime(2003, 9, 25, 2, 1), "random format"),
    ("2004 10 Apr 11h30m", datetime(2004, 4, 10, 11, 30), "random format")
 ]
 # Check that we don't have any duplicates
 assert len(set([x[0] for x in PARSER_DEFAULT_TEST_CASES])) == len(PARSER_DEFAULT_TEST_CASES)
@pytest.mark.parametrize("parsable_text,expected_datetime,assertion_message", PARSER_DEFAULT_TEST_CASES)
 def test_parser_default(parsable_text, expected_datetime, assertion_message):
    assert parse(parsable_text, default=datetime(2003, 9, 25)) == expected_datetime, assertion_message
@pytest.mark.parametrize('sep', ['-', '.', '/', ' '])
 def test_parse_dayfirst(sep):
    expected = datetime(2003, 9, 10)
    fmt = sep.join(['%d', '%m', '%Y'])
    dstr = expected.strftime(fmt)
    result = parse(dstr, dayfirst=True)
    assert result == expected
@pytest.mark.parametrize('sep', ['-', '.', '/', ' '])
 def test_parse_yearfirst(sep):
    expected = datetime(2010, 9, 3)
    fmt = sep.join(['%Y', '%m', '%d'])
    dstr = expected.strftime(fmt)
    result = parse(dstr, yearfirst=True)
    assert result == expected
@pytest.mark.parametrize('dstr,expected', [
    ("Thu Sep 25 10:36:28 BRST 2003", datetime(2003, 9, 25, 10, 36, 28)),
    ("1996.07.10 AD at 15:08:56 PDT", datetime(1996, 7, 10, 15, 8, 56)),
    ("Tuesday, April 12, 1952 AD 3:30:42pm PST",
     datetime(1952, 4, 12, 15, 30, 42)),
    ("November 5, 1994, 8:15:30 am EST", datetime(1994, 11, 5, 8, 15, 30)),
    ("1994-11-05T08:15:30-05:00", datetime(1994, 11, 5, 8, 15, 30)),
    ("1994-11-05T08:15:30Z", datetime(1994, 11, 5, 8, 15, 30)),
    ("1976-07-04T00:01:02Z", datetime(1976, 7, 4, 0, 1, 2)),
    ("1986-07-05T08:15:30z", datetime(1986, 7, 5, 8, 15, 30)),
    ("Tue Apr 4 00:22:12 PDT 1995", datetime(1995, 4, 4, 0, 22, 12)),
 ])
 def test_parse_ignoretz(dstr, expected):
    result = parse(dstr, ignoretz=True)
    assert result == expected
 _brsttz = tzoffset("BRST", -10800)
@pytest.mark.parametrize('dstr,expected', [
    ("20030925T104941-0300",
     datetime(2003, 9, 25, 10, 49, 41, tzinfo=_brsttz)),
    ("Thu, 25 Sep 2003 10:49:41 -0300",
     datetime(2003, 9, 25, 10, 49, 41, tzinfo=_brsttz)),
    ("2003-09-25T10:49:41.5-03:00",
     datetime(2003, 9, 25, 10, 49, 41, 500000, tzinfo=_brsttz)),
    ("2003-09-25T10:49:41-03:00",
     datetime(2003, 9, 25, 10, 49, 41, tzinfo=_brsttz)),
    ("20030925T104941.5-0300",
     datetime(2003, 9, 25, 10, 49, 41, 500000, tzinfo=_brsttz)),
 ])
 def test_parse_with_tzoffset(dstr, expected):
    # In these cases, we are _not_ passing a tzinfos arg
    result = parse(dstr)
    assert result == expected
 class TestFormat(object):
    def test_ybd(self):
        # If we have a 4-digit year, a non-numeric month (abbreviated or not),
        # and a day (1 or 2 digits), then there is no ambiguity as to which
        # token is a year/month/day.  This holds regardless of what order the
        # terms are in and for each of the separators below.
        seps = ['-', ' ', '/', '.']
        year_tokens = ['%Y']
        month_tokens = ['%b', '%B']
        day_tokens = ['%d']
        if PLATFORM_HAS_DASH_D:
            day_tokens.append('%-d')
        prods = itertools.product(year_tokens, month_tokens, day_tokens)
        perms = [y for x in prods for y in itertools.permutations(x)]
        unambig_fmts = [sep.join(perm) for sep in seps for perm in perms]
        actual = datetime(2003, 9, 25)
        for fmt in unambig_fmts:
            dstr = actual.strftime(fmt)
            res = parse(dstr)
            assert res == actual
    # TODO: some redundancy with PARSER_TEST_CASES cases
    @pytest.mark.parametrize("fmt,dstr", [
        ("%a %b %d %Y", "Thu Sep 25 2003"),
        ("%b %d %Y", "Sep 25 2003"),
        ("%Y-%m-%d", "2003-09-25"),
        ("%Y%m%d", "20030925"),
        ("%Y-%b-%d", "2003-Sep-25"),
        ("%d-%b-%Y", "25-Sep-2003"),
        ("%b-%d-%Y", "Sep-25-2003"),
        ("%m-%d-%Y", "09-25-2003"),
        ("%d-%m-%Y", "25-09-2003"),
        ("%Y.%m.%d", "2003.09.25"),
        ("%Y.%b.%d", "2003.Sep.25"),
        ("%d.%b.%Y", "25.Sep.2003"),
        ("%b.%d.%Y", "Sep.25.2003"),
        ("%m.%d.%Y", "09.25.2003"),
        ("%d.%m.%Y", "25.09.2003"),
        ("%Y/%m/%d", "2003/09/25"),
        ("%Y/%b/%d", "2003/Sep/25"),
        ("%d/%b/%Y", "25/Sep/2003"),
        ("%b/%d/%Y", "Sep/25/2003"),
        ("%m/%d/%Y", "09/25/2003"),
        ("%d/%m/%Y", "25/09/2003"),
        ("%Y %m %d", "2003 09 25"),
        ("%Y %b %d", "2003 Sep 25"),
        ("%d %b %Y", "25 Sep 2003"),
        ("%m %d %Y", "09 25 2003"),
        ("%d %m %Y", "25 09 2003"),
        ("%y %d %b", "03 25 Sep",),
    ])
    def test_strftime_formats_2003Sep25(self, fmt, dstr):
        expected = datetime(2003, 9, 25)
        # First check that the format strings behave as expected
        #  (not strictly necessary, but nice to have)
        assert expected.strftime(fmt) == dstr
        res = parse(dstr)
        assert res == expected
 class TestInputTypes(object):
    def test_empty_string_invalid(self):
        with pytest.raises(ParserError):
            parse('')
    def test_none_invalid(self):
        with pytest.raises(TypeError):
            parse(None)
    def test_int_invalid(self):
        with pytest.raises(TypeError):
            parse(13)
    def test_duck_typing(self):
        # We want to support arbitrary classes that implement the stream
        # interface.
        class StringPassThrough(object):
            def __init__(self, stream):
                self.stream = stream
            def read(self, *args, **kwargs):
                return self.stream.read(*args, **kwargs)
        dstr = StringPassThrough(StringIO('2014 January 19'))
        res = parse(dstr)
        expected = datetime(2014, 1, 19)
        assert res == expected
    def test_parse_stream(self):
        dstr = StringIO('2014 January 19')
        res = parse(dstr)
        expected = datetime(2014, 1, 19)
        assert res == expected
    def test_parse_str(self):
        # Parser should be able to handle bytestring and unicode
        uni_str = '2014-05-01 08:00:00'
        bytes_str = uni_str.encode()
        res = parse(bytes_str)
        expected = parse(uni_str)
        assert res == expected
    def test_parse_bytes(self):
        res = parse(b'2014 January 19')
        expected = datetime(2014, 1, 19)
        assert res == expected
    def test_parse_bytearray(self):
        # GH#417
        res = parse(bytearray(b'2014 January 19'))
        expected = datetime(2014, 1, 19)
        assert res == expected
 class TestTzinfoInputTypes(object):
    def assert_equal_same_tz(self, dt1, dt2):
        assert dt1 == dt2
        assert dt1.tzinfo is dt2.tzinfo
    def test_tzinfo_dict_could_return_none(self):
        dstr = "2017-02-03 12:40 BRST"
        result = parse(dstr, tzinfos={"BRST": None})
        expected = datetime(2017, 2, 3, 12, 40)
        self.assert_equal_same_tz(result, expected)
    def test_tzinfos_callable_could_return_none(self):
        dstr = "2017-02-03 12:40 BRST"
        result = parse(dstr, tzinfos=lambda *args: None)
        expected = datetime(2017, 2, 3, 12, 40)
        self.assert_equal_same_tz(result, expected)
    def test_invalid_tzinfo_input(self):
        dstr = "2014 January 19 09:00 UTC"
        # Pass an absurd tzinfos object
        tzinfos = {"UTC": ValueError}
        with pytest.raises(TypeError):
            parse(dstr, tzinfos=tzinfos)
    def test_valid_tzinfo_tzinfo_input(self):
        dstr = "2014 January 19 09:00 UTC"
        tzinfos = {"UTC": tz.UTC}
        expected = datetime(2014, 1, 19, 9, tzinfo=tz.UTC)
        res = parse(dstr, tzinfos=tzinfos)
        self.assert_equal_same_tz(res, expected)
    def test_valid_tzinfo_unicode_input(self):
        dstr = "2014 January 19 09:00 UTC"
        tzinfos = {u"UTC": u"UTC+0"}
        expected = datetime(2014, 1, 19, 9, tzinfo=tz.tzstr("UTC+0"))
        res = parse(dstr, tzinfos=tzinfos)
        self.assert_equal_same_tz(res, expected)
    def test_valid_tzinfo_callable_input(self):
        dstr = "2014 January 19 09:00 UTC"
        def tzinfos(*args, **kwargs):
            return u"UTC+0"
        expected = datetime(2014, 1, 19, 9, tzinfo=tz.tzstr("UTC+0"))
        res = parse(dstr, tzinfos=tzinfos)
        self.assert_equal_same_tz(res, expected)
    def test_valid_tzinfo_int_input(self):
        dstr = "2014 January 19 09:00 UTC"
        tzinfos = {u"UTC": -28800}
        expected = datetime(2014, 1, 19, 9, tzinfo=tz.tzoffset(u"UTC", -28800))
        res = parse(dstr, tzinfos=tzinfos)
        self.assert_equal_same_tz(res, expected)
 class ParserTest(unittest.TestCase):
    @classmethod
    def setup_class(cls):
        cls.tzinfos = {"BRST": -10800}
        cls.brsttz = tzoffset("BRST", -10800)
        cls.default = datetime(2003, 9, 25)
        # Parser should be able to handle bytestring and unicode
        cls.uni_str = '2014-05-01 08:00:00'
        cls.str_str = cls.uni_str.encode()
    def testParserParseStr(self):
        from dateutil.parser import parser
        assert parser().parse(self.str_str) == parser().parse(self.uni_str)
    def testParseUnicodeWords(self):
        class rus_parserinfo(parserinfo):
            MONTHS = [("янв", "Январь"),
                      ("фев", "Февраль"),
                      ("мар", "Март"),
                      ("апр", "Апрель"),
                      ("май", "Май"),
                      ("июн", "Июнь"),
                      ("июл", "Июль"),
                      ("авг", "Август"),
                      ("сен", "Сентябрь"),
                      ("окт", "Октябрь"),
                      ("ноя", "Ноябрь"),
                      ("дек", "Декабрь")]
        expected = datetime(2015, 9, 10, 10, 20)
        res = parse('10 Сентябрь 2015 10:20', parserinfo=rus_parserinfo())
        assert res  == expected
    def testParseWithNulls(self):
        # This relies on the from __future__ import unicode_literals, because
        # explicitly specifying a unicode literal is a syntax error in Py 3.2
        # May want to switch to u'...' if we ever drop Python 3.2 support.
        pstring = '\x00\x00August 29, 1924'
        assert parse(pstring) == datetime(1924, 8, 29)
    def testDateCommandFormat(self):
        self.assertEqual(parse("Thu Sep 25 10:36:28 BRST 2003",
                               tzinfos=self.tzinfos),
                         datetime(2003, 9, 25, 10, 36, 28,
                                  tzinfo=self.brsttz))
    def testDateCommandFormatReversed(self):
        self.assertEqual(parse("2003 10:36:28 BRST 25 Sep Thu",
                               tzinfos=self.tzinfos),
                         datetime(2003, 9, 25, 10, 36, 28,
                                  tzinfo=self.brsttz))
    def testDateCommandFormatWithLong(self):
        if PY2:
            self.assertEqual(parse("Thu Sep 25 10:36:28 BRST 2003",
                                   tzinfos={"BRST": long(-10800)}),
                             datetime(2003, 9, 25, 10, 36, 28,
                                      tzinfo=self.brsttz))
    def testISOFormatStrip2(self):
        self.assertEqual(parse("2003-09-25T10:49:41+03:00"),
                         datetime(2003, 9, 25, 10, 49, 41,
                                  tzinfo=tzoffset(None, 10800)))
    def testISOStrippedFormatStrip2(self):
        self.assertEqual(parse("20030925T104941+0300"),
                         datetime(2003, 9, 25, 10, 49, 41,
                                  tzinfo=tzoffset(None, 10800)))
    def testAMPMNoHour(self):
        with pytest.raises(ParserError):
            parse("AM")
        with pytest.raises(ParserError):
            parse("Jan 20, 2015 PM")
    def testAMPMRange(self):
        with pytest.raises(ParserError):
            parse("13:44 AM")
        with pytest.raises(ParserError):
            parse("January 25, 1921 23:13 PM")
    def testPertain(self):
        self.assertEqual(parse("Sep 03", default=self.default),
                         datetime(2003, 9, 3))
        self.assertEqual(parse("Sep of 03", default=self.default),
                         datetime(2003, 9, 25))
    def testFuzzy(self):
        s = "Today is 25 of September of 2003, exactly " \
            "at 10:49:41 with timezone -03:00."
        self.assertEqual(parse(s, fuzzy=True),
                         datetime(2003, 9, 25, 10, 49, 41,
                                  tzinfo=self.brsttz))
    def testFuzzyWithTokens(self):
        s1 = "Today is 25 of September of 2003, exactly " \
            "at 10:49:41 with timezone -03:00."
        self.assertEqual(parse(s1, fuzzy_with_tokens=True),
                         (datetime(2003, 9, 25, 10, 49, 41,
                                   tzinfo=self.brsttz),
                         ('Today is ', 'of ', ', exactly at ',
                          ' with timezone ', '.')))
        s2 = "http://biz.yahoo.com/ipo/p/600221.html"
        self.assertEqual(parse(s2, fuzzy_with_tokens=True),
                         (datetime(2060, 2, 21, 0, 0, 0),
                         ('http://biz.yahoo.com/ipo/p/', '.html')))
    def testFuzzyAMPMProblem(self):
        # Sometimes fuzzy parsing results in AM/PM flag being set without
        # hours - if it's fuzzy it should ignore that.
        s1 = "I have a meeting on March 1, 1974."
        s2 = "On June 8th, 2020, I am going to be the first man on Mars"
        # Also don't want any erroneous AM or PMs changing the parsed time
        s3 = "Meet me at the AM/PM on Sunset at 3:00 AM on December 3rd, 2003"
        s4 = "Meet me at 3:00AM on December 3rd, 2003 at the AM/PM on Sunset"
        self.assertEqual(parse(s1, fuzzy=True), datetime(1974, 3, 1))
        self.assertEqual(parse(s2, fuzzy=True), datetime(2020, 6, 8))
        self.assertEqual(parse(s3, fuzzy=True), datetime(2003, 12, 3, 3))
        self.assertEqual(parse(s4, fuzzy=True), datetime(2003, 12, 3, 3))
    def testFuzzyIgnoreAMPM(self):
        s1 = "Jan 29, 1945 14:45 AM I going to see you there?"
        with pytest.warns(UnknownTimezoneWarning):
            res = parse(s1, fuzzy=True)
        self.assertEqual(res, datetime(1945, 1, 29, 14, 45))
    def testRandomFormat24(self):
        self.assertEqual(parse("0:00 PM, PST", default=self.default,
                               ignoretz=True),
                         datetime(2003, 9, 25, 12, 0))
    def testRandomFormat26(self):
        with pytest.warns(UnknownTimezoneWarning):
            res = parse("5:50 A.M. on June 13, 1990")
        self.assertEqual(res, datetime(1990, 6, 13, 5, 50))
    def testUnspecifiedDayFallback(self):
        # Test that for an unspecified day, the fallback behavior is correct.
        self.assertEqual(parse("April 2009", default=datetime(2010, 1, 31)),
                         datetime(2009, 4, 30))
    def testUnspecifiedDayFallbackFebNoLeapYear(self):
        self.assertEqual(parse("Feb 2007", default=datetime(2010, 1, 31)),
                         datetime(2007, 2, 28))
    def testUnspecifiedDayFallbackFebLeapYear(self):
        self.assertEqual(parse("Feb 2008", default=datetime(2010, 1, 31)),
                         datetime(2008, 2, 29))
    def testErrorType01(self):
        with pytest.raises(ParserError):
            parse('shouldfail')
    def testCorrectErrorOnFuzzyWithTokens(self):
        assertRaisesRegex(self, ParserError, 'Unknown string format',
                          parse, '04/04/32/423', fuzzy_with_tokens=True)
        assertRaisesRegex(self, ParserError, 'Unknown string format',
                          parse, '04/04/04 +32423', fuzzy_with_tokens=True)
        assertRaisesRegex(self, ParserError, 'Unknown string format',
                          parse, '04/04/0d4', fuzzy_with_tokens=True)
    def testIncreasingCTime(self):
        # This test will check 200 different years, every month, every day,
        # every hour, every minute, every second, and every weekday, using
        # a delta of more or less 1 year, 1 month, 1 day, 1 minute and
        # 1 second.
        delta = timedelta(days=365+31+1, seconds=1+60+60*60)
        dt = datetime(1900, 1, 1, 0, 0, 0, 0)
        for i in range(200):
            assert parse(dt.ctime()) == dt
            dt += delta
    def testIncreasingISOFormat(self):
        delta = timedelta(days=365+31+1, seconds=1+60+60*60)
        dt = datetime(1900, 1, 1, 0, 0, 0, 0)
        for i in range(200):
            assert parse(dt.isoformat()) == dt
            dt += delta
    def testMicrosecondsPrecisionError(self):
        # Skip found out that sad precision problem. :-(
        dt1 = parse("00:11:25.01")
        dt2 = parse("00:12:10.01")
        assert dt1.microsecond == 10000
        assert dt2.microsecond == 10000
    def testMicrosecondPrecisionErrorReturns(self):
        # One more precision issue, discovered by Eric Brown.  This should
        # be the last one, as we're no longer using floating points.
        for ms in [100001, 100000, 99999, 99998,
                    10001,  10000,  9999,  9998,
                     1001,   1000,   999,   998,
                      101,    100,    99,    98]:
            dt = datetime(2008, 2, 27, 21, 26, 1, ms)
            assert parse(dt.isoformat()) == dt
    def testCustomParserInfo(self):
        # Custom parser info wasn't working, as Michael Elsdörfer discovered.
        from dateutil.parser import parserinfo, parser
        class myparserinfo(parserinfo):
            MONTHS = parserinfo.MONTHS[:]
            MONTHS[0] = ("Foo", "Foo")
        myparser = parser(myparserinfo())
        dt = myparser.parse("01/Foo/2007")
        assert dt == datetime(2007, 1, 1)
    def testCustomParserShortDaynames(self):
        # Horacio Hoyos discovered that day names shorter than 3 characters,
        # for example two letter German day name abbreviations, don't work:
        # https://github.com/dateutil/dateutil/issues/343
        from dateutil.parser import parserinfo, parser
        class GermanParserInfo(parserinfo):
            WEEKDAYS = [("Mo", "Montag"),
                        ("Di", "Dienstag"),
                        ("Mi", "Mittwoch"),
                        ("Do", "Donnerstag"),
                        ("Fr", "Freitag"),
                        ("Sa", "Samstag"),
                        ("So", "Sonntag")]
        myparser = parser(GermanParserInfo())
        dt = myparser.parse("Sa 21. Jan 2017")
        self.assertEqual(dt, datetime(2017, 1, 21))
    def testNoYearFirstNoDayFirst(self):
        dtstr = '090107'
        # Should be MMDDYY
        self.assertEqual(parse(dtstr),
                         datetime(2007, 9, 1))
        self.assertEqual(parse(dtstr, yearfirst=False, dayfirst=False),
                         datetime(2007, 9, 1))
    def testYearFirst(self):
        dtstr = '090107'
        # Should be MMDDYY
        self.assertEqual(parse(dtstr, yearfirst=True),
                         datetime(2009, 1, 7))
        self.assertEqual(parse(dtstr, yearfirst=True, dayfirst=False),
                         datetime(2009, 1, 7))
    def testDayFirst(self):
        dtstr = '090107'
        # Should be DDMMYY
        self.assertEqual(parse(dtstr, dayfirst=True),
                         datetime(2007, 1, 9))
        self.assertEqual(parse(dtstr, yearfirst=False, dayfirst=True),
                         datetime(2007, 1, 9))
    def testDayFirstYearFirst(self):
        dtstr = '090107'
        # Should be YYDDMM
        self.assertEqual(parse(dtstr, yearfirst=True, dayfirst=True),
                         datetime(2009, 7, 1))
    def testUnambiguousYearFirst(self):
        dtstr = '2015 09 25'
        self.assertEqual(parse(dtstr, yearfirst=True),
                         datetime(2015, 9, 25))
    def testUnambiguousDayFirst(self):
        dtstr = '2015 09 25'
        self.assertEqual(parse(dtstr, dayfirst=True),
                         datetime(2015, 9, 25))
    def testUnambiguousDayFirstYearFirst(self):
        dtstr = '2015 09 25'
        self.assertEqual(parse(dtstr, dayfirst=True, yearfirst=True),
                         datetime(2015, 9, 25))
    def test_mstridx(self):
        # See GH408
        dtstr = '2015-15-May'
        self.assertEqual(parse(dtstr),
                         datetime(2015, 5, 15))
    def test_idx_check(self):
        dtstr = '2017-07-17 06:15:'
        # Pre-PR, the trailing colon will cause an IndexError at 824-825
        # when checking `i < len_l` and then accessing `l[i+1]`
        res = parse(dtstr, fuzzy=True)
        assert res == datetime(2017, 7, 17, 6, 15)
    def test_hmBY(self):
        # See GH#483
        dtstr = '02:17NOV2017'
        res = parse(dtstr, default=self.default)
        assert res == datetime(2017, 11, self.default.day, 2, 17)
    def test_validate_hour(self):
        # See GH353
        invalid = "201A-01-01T23:58:39.239769+03:00"
        with pytest.raises(ParserError):
            parse(invalid)
    def test_era_trailing_year(self):
        dstr = 'AD2001'
        res = parse(dstr)
        assert res.year == 2001, res
    def test_includes_timestr(self):
        timestr = "2020-13-97T44:61:83"
        try:
            parse(timestr)
        except ParserError as e:
            assert e.args[1] == timestr
        else:
            pytest.fail("Failed to raise ParserError")
 class TestOutOfBounds(object):
    def test_no_year_zero(self):
        with pytest.raises(ParserError):
            parse("0000 Jun 20")
    def test_out_of_bound_day(self):
        with pytest.raises(ParserError):
            parse("Feb 30, 2007")
    def test_illegal_month_error(self):
        with pytest.raises(ParserError):
            parse("0-100")
    def test_day_sanity(self, fuzzy):
        dstr = "2014-15-25"
        with pytest.raises(ParserError):
            parse(dstr, fuzzy=fuzzy)
    def test_minute_sanity(self, fuzzy):
        dstr = "2014-02-28 22:64"
        with pytest.raises(ParserError):
            parse(dstr, fuzzy=fuzzy)
    def test_hour_sanity(self, fuzzy):
        dstr = "2014-02-28 25:16 PM"
        with pytest.raises(ParserError):
            parse(dstr, fuzzy=fuzzy)
    def test_second_sanity(self, fuzzy):
        dstr = "2014-02-28 22:14:64"
        with pytest.raises(ParserError):
            parse(dstr, fuzzy=fuzzy)
 class TestParseUnimplementedCases(object):
    @pytest.mark.xfail
    def test_somewhat_ambiguous_string(self):
        # Ref: github issue #487
        # The parser is choosing the wrong part for hour
        # causing datetime to raise an exception.
        dtstr = '1237 PM BRST Mon Oct 30 2017'
        res = parse(dtstr, tzinfo=self.tzinfos)
        assert res == datetime(2017, 10, 30, 12, 37, tzinfo=self.tzinfos)
    @pytest.mark.xfail
    def test_YmdH_M_S(self):
        # found in nasdaq's ftp data
        dstr = '1991041310:19:24'
        expected = datetime(1991, 4, 13, 10, 19, 24)
        res = parse(dstr)
        assert res == expected, (res, expected)
    @pytest.mark.xfail
    def test_first_century(self):
        dstr = '0031 Nov 03'
        expected = datetime(31, 11, 3)
        res = parse(dstr)
        assert res == expected, res
    @pytest.mark.xfail
    def test_era_trailing_year_with_dots(self):
        dstr = 'A.D.2001'
        res = parse(dstr)
        assert res.year == 2001, res
    @pytest.mark.xfail
    def test_ad_nospace(self):
        expected = datetime(6, 5, 19)
        for dstr in [' 6AD May 19', ' 06AD May 19',
                     ' 006AD May 19', ' 0006AD May 19']:
            res = parse(dstr)
            assert res == expected, (dstr, res)
    @pytest.mark.xfail
    def test_four_letter_day(self):
        dstr = 'Frid Dec 30, 2016'
        expected = datetime(2016, 12, 30)
        res = parse(dstr)
        assert res == expected
    @pytest.mark.xfail
    def test_non_date_number(self):
        dstr = '1,700'
        with pytest.raises(ParserError):
            parse(dstr)
    @pytest.mark.xfail
    def test_on_era(self):
        # This could be classified as an "eras" test, but the relevant part
        # about this is the ` on `
        dstr = '2:15 PM on January 2nd 1973 A.D.'
        expected = datetime(1973, 1, 2, 14, 15)
        res = parse(dstr)
        assert res == expected
    @pytest.mark.xfail
    def test_extraneous_year(self):
        # This was found in the wild at insidertrading.org
        dstr = "2011 MARTIN CHILDREN'S IRREVOCABLE TRUST u/a/d NOVEMBER 7, 2012"
        res = parse(dstr, fuzzy_with_tokens=True)
        expected = datetime(2012, 11, 7)
        assert res == expected
    @pytest.mark.xfail
    def test_extraneous_year_tokens(self):
        # This was found in the wild at insidertrading.org
        # Unlike in the case above, identifying the first "2012" as the year
        # would not be a problem, but inferring that the latter 2012 is hhmm
        # is a problem.
        dstr = "2012 MARTIN CHILDREN'S IRREVOCABLE TRUST u/a/d NOVEMBER 7, 2012"
        expected = datetime(2012, 11, 7)
        (res, tokens) = parse(dstr, fuzzy_with_tokens=True)
        assert res == expected
        assert tokens == ("2012 MARTIN CHILDREN'S IRREVOCABLE TRUST u/a/d ",)
    @pytest.mark.xfail
    def test_extraneous_year2(self):
        # This was found in the wild at insidertrading.org
        dstr = ("Berylson Amy Smith 1998 Grantor Retained Annuity Trust "
                "u/d/t November 2, 1998 f/b/o Jennifer L Berylson")
        res = parse(dstr, fuzzy_with_tokens=True)
        expected = datetime(1998, 11, 2)
        assert res == expected
    @pytest.mark.xfail
    def test_extraneous_year3(self):
        # This was found in the wild at insidertrading.org
        dstr = "SMITH R &  WEISS D 94 CHILD TR FBO M W SMITH UDT 12/1/1994"
        res = parse(dstr, fuzzy_with_tokens=True)
        expected = datetime(1994, 12, 1)
        assert res == expected
    @pytest.mark.xfail
    def test_unambiguous_YYYYMM(self):
        # 171206 can be parsed as YYMMDD. However, 201712 cannot be parsed
        # as instance of YYMMDD and parser could fallback to YYYYMM format.
        dstr = "201712"
        res = parse(dstr)
        expected = datetime(2017, 12, 1)
        assert res == expected
    @pytest.mark.xfail
    def test_extraneous_numerical_content(self):
        # ref: https://github.com/dateutil/dateutil/issues/1029
        # parser interprets price and percentage as parts of the date
        dstr = "£14.99 (25% off, until April 20)"
        res = parse(dstr, fuzzy=True, default=datetime(2000, 1, 1))
        expected = datetime(2000, 4, 20)
        assert res == expected
@pytest.mark.skipif(IS_WIN, reason="Windows does not use TZ var")
 class TestTZVar(object):
    def test_parse_unambiguous_nonexistent_local(self):
        # When dates are specified "EST" even when they should be "EDT" in the
        # local time zone, we should still assign the local time zone
        with TZEnvContext('EST+5EDT,M3.2.0/2,M11.1.0/2'):
            dt_exp = datetime(2011, 8, 1, 12, 30, tzinfo=tz.tzlocal())
            dt = parse('2011-08-01T12:30 EST')
            assert dt.tzname() == 'EDT'
            assert dt == dt_exp
    def test_tzlocal_in_gmt(self):
        # GH #318
        with TZEnvContext('GMT0BST,M3.5.0,M10.5.0'):
            # This is an imaginary datetime in tz.tzlocal() but should still
            # parse using the GMT-as-alias-for-UTC rule
            dt = parse('2004-05-01T12:00 GMT')
            dt_exp = datetime(2004, 5, 1, 12, tzinfo=tz.UTC)
            assert dt == dt_exp
    def test_tzlocal_parse_fold(self):
        # One manifestion of GH #318
        with TZEnvContext('EST+5EDT,M3.2.0/2,M11.1.0/2'):
            dt_exp = datetime(2011, 11, 6, 1, 30, tzinfo=tz.tzlocal())
            dt_exp = tz.enfold(dt_exp, fold=1)
            dt = parse('2011-11-06T01:30 EST')
            # Because this is ambiguous, until `tz.tzlocal() is tz.tzlocal()`
            # we'll just check the attributes we care about rather than
            # dt == dt_exp
            assert dt.tzname() == dt_exp.tzname()
            assert dt.replace(tzinfo=None) == dt_exp.replace(tzinfo=None)
            assert getattr(dt, 'fold') == getattr(dt_exp, 'fold')
            assert dt.astimezone(tz.UTC) == dt_exp.astimezone(tz.UTC)
 def test_parse_tzinfos_fold():
    NYC = tz.gettz('America/New_York')
    tzinfos = {'EST': NYC, 'EDT': NYC}
    dt_exp = tz.enfold(datetime(2011, 11, 6, 1, 30, tzinfo=NYC), fold=1)
    dt = parse('2011-11-06T01:30 EST', tzinfos=tzinfos)
    assert dt == dt_exp
    assert dt.tzinfo is dt_exp.tzinfo
    assert getattr(dt, 'fold') == getattr(dt_exp, 'fold')
    assert dt.astimezone(tz.UTC) == dt_exp.astimezone(tz.UTC)
@pytest.mark.parametrize('dtstr,dt', [
    ('5.6h', datetime(2003, 9, 25, 5, 36)),
    ('5.6m', datetime(2003, 9, 25, 0, 5, 36)),
    # '5.6s' never had a rounding problem, test added for completeness
    ('5.6s', datetime(2003, 9, 25, 0, 0, 5, 600000))
 ])
 def test_rounding_floatlike_strings(dtstr, dt):
    assert parse(dtstr, default=datetime(2003, 9, 25)) == dt
@pytest.mark.parametrize('value', ['1: test', 'Nan'])
 def test_decimal_error(value):
    # GH 632, GH 662 - decimal.Decimal raises some non-ParserError exception
    # when constructed with an invalid value
    with pytest.raises(ParserError):
        parse(value)
 def test_parsererror_repr():
    # GH 991 — the __repr__ was not properly indented and so was never defined.
    # This tests the current behavior of the ParserError __repr__, but the
    # precise format is not guaranteed to be stable and may change even in
    # minor versions. This test exists to avoid regressions.
    s = repr(ParserError("Problem with string: %s", "2019-01-01"))
    assert s == "ParserError('Problem with string: %s', '2019-01-01')"
--- a/lib/dateutil/test/test_relativedelta.py
+++ b/lib/dateutil/test/test_relativedelta.py
@ -1,706 +0,0 @@
 # -*- coding: utf-8 -*-
 from __future__ import unicode_literals
 from ._common import NotAValue
 import calendar
 from datetime import datetime, date, timedelta
 import unittest
 import pytest
 from dateutil.relativedelta import relativedelta, MO, TU, WE, FR, SU
 class RelativeDeltaTest(unittest.TestCase):
    now = datetime(2003, 9, 17, 20, 54, 47, 282310)
    today = date(2003, 9, 17)
    def testInheritance(self):
        # Ensure that relativedelta is inheritance-friendly.
        class rdChildClass(relativedelta):
            pass
        ccRD = rdChildClass(years=1, months=1, days=1, leapdays=1, weeks=1,
                            hours=1, minutes=1, seconds=1, microseconds=1)
        rd = relativedelta(years=1, months=1, days=1, leapdays=1, weeks=1,
                           hours=1, minutes=1, seconds=1, microseconds=1)
        self.assertEqual(type(ccRD + rd), type(ccRD),
                         msg='Addition does not inherit type.')
        self.assertEqual(type(ccRD - rd), type(ccRD),
                         msg='Subtraction does not inherit type.')
        self.assertEqual(type(-ccRD), type(ccRD),
                         msg='Negation does not inherit type.')
        self.assertEqual(type(ccRD * 5.0), type(ccRD),
                         msg='Multiplication does not inherit type.')
        self.assertEqual(type(ccRD / 5.0), type(ccRD),
                         msg='Division does not inherit type.')
    def testMonthEndMonthBeginning(self):
        self.assertEqual(relativedelta(datetime(2003, 1, 31, 23, 59, 59),
                                       datetime(2003, 3, 1, 0, 0, 0)),
                         relativedelta(months=-1, seconds=-1))
        self.assertEqual(relativedelta(datetime(2003, 3, 1, 0, 0, 0),
                                       datetime(2003, 1, 31, 23, 59, 59)),
                         relativedelta(months=1, seconds=1))
    def testMonthEndMonthBeginningLeapYear(self):
        self.assertEqual(relativedelta(datetime(2012, 1, 31, 23, 59, 59),
                                       datetime(2012, 3, 1, 0, 0, 0)),
                         relativedelta(months=-1, seconds=-1))
        self.assertEqual(relativedelta(datetime(2003, 3, 1, 0, 0, 0),
                                       datetime(2003, 1, 31, 23, 59, 59)),
                         relativedelta(months=1, seconds=1))
    def testNextMonth(self):
        self.assertEqual(self.now+relativedelta(months=+1),
                         datetime(2003, 10, 17, 20, 54, 47, 282310))
    def testNextMonthPlusOneWeek(self):
        self.assertEqual(self.now+relativedelta(months=+1, weeks=+1),
                         datetime(2003, 10, 24, 20, 54, 47, 282310))
    def testNextMonthPlusOneWeek10am(self):
        self.assertEqual(self.today +
                         relativedelta(months=+1, weeks=+1, hour=10),
                         datetime(2003, 10, 24, 10, 0))
    def testNextMonthPlusOneWeek10amDiff(self):
        self.assertEqual(relativedelta(datetime(2003, 10, 24, 10, 0),
                                       self.today),
                         relativedelta(months=+1, days=+7, hours=+10))
    def testOneMonthBeforeOneYear(self):
        self.assertEqual(self.now+relativedelta(years=+1, months=-1),
                         datetime(2004, 8, 17, 20, 54, 47, 282310))
    def testMonthsOfDiffNumOfDays(self):
        self.assertEqual(date(2003, 1, 27)+relativedelta(months=+1),
                         date(2003, 2, 27))
        self.assertEqual(date(2003, 1, 31)+relativedelta(months=+1),
                         date(2003, 2, 28))
        self.assertEqual(date(2003, 1, 31)+relativedelta(months=+2),
                         date(2003, 3, 31))
    def testMonthsOfDiffNumOfDaysWithYears(self):
        self.assertEqual(date(2000, 2, 28)+relativedelta(years=+1),
                         date(2001, 2, 28))
        self.assertEqual(date(2000, 2, 29)+relativedelta(years=+1),
                         date(2001, 2, 28))
        self.assertEqual(date(1999, 2, 28)+relativedelta(years=+1),
                         date(2000, 2, 28))
        self.assertEqual(date(1999, 3, 1)+relativedelta(years=+1),
                         date(2000, 3, 1))
        self.assertEqual(date(1999, 3, 1)+relativedelta(years=+1),
                         date(2000, 3, 1))
        self.assertEqual(date(2001, 2, 28)+relativedelta(years=-1),
                         date(2000, 2, 28))
        self.assertEqual(date(2001, 3, 1)+relativedelta(years=-1),
                         date(2000, 3, 1))
    def testNextFriday(self):
        self.assertEqual(self.today+relativedelta(weekday=FR),
                         date(2003, 9, 19))
    def testNextFridayInt(self):
        self.assertEqual(self.today+relativedelta(weekday=calendar.FRIDAY),
                         date(2003, 9, 19))
    def testLastFridayInThisMonth(self):
        self.assertEqual(self.today+relativedelta(day=31, weekday=FR(-1)),
                         date(2003, 9, 26))
    def testLastDayOfFebruary(self):
        self.assertEqual(date(2021, 2, 1) + relativedelta(day=31),
                         date(2021, 2, 28))
    def testLastDayOfFebruaryLeapYear(self):
        self.assertEqual(date(2020, 2, 1) + relativedelta(day=31),
                         date(2020, 2, 29))
    def testNextWednesdayIsToday(self):
        self.assertEqual(self.today+relativedelta(weekday=WE),
                         date(2003, 9, 17))
    def testNextWednesdayNotToday(self):
        self.assertEqual(self.today+relativedelta(days=+1, weekday=WE),
                         date(2003, 9, 24))
    def testAddMoreThan12Months(self):
        self.assertEqual(date(2003, 12, 1) + relativedelta(months=+13),
                         date(2005, 1, 1))
    def testAddNegativeMonths(self):
        self.assertEqual(date(2003, 1, 1) + relativedelta(months=-2),
                         date(2002, 11, 1))
    def test15thISOYearWeek(self):
        self.assertEqual(date(2003, 1, 1) +
                         relativedelta(day=4, weeks=+14, weekday=MO(-1)),
                         date(2003, 4, 7))
    def testMillenniumAge(self):
        self.assertEqual(relativedelta(self.now, date(2001, 1, 1)),
                         relativedelta(years=+2, months=+8, days=+16,
                                       hours=+20, minutes=+54, seconds=+47,
                                       microseconds=+282310))
    def testJohnAge(self):
        self.assertEqual(relativedelta(self.now,
                                       datetime(1978, 4, 5, 12, 0)),
                         relativedelta(years=+25, months=+5, days=+12,
                                       hours=+8, minutes=+54, seconds=+47,
                                       microseconds=+282310))
    def testJohnAgeWithDate(self):
        self.assertEqual(relativedelta(self.today,
                                       datetime(1978, 4, 5, 12, 0)),
                         relativedelta(years=+25, months=+5, days=+11,
                                       hours=+12))
    def testYearDay(self):
        self.assertEqual(date(2003, 1, 1)+relativedelta(yearday=260),
                         date(2003, 9, 17))
        self.assertEqual(date(2002, 1, 1)+relativedelta(yearday=260),
                         date(2002, 9, 17))
        self.assertEqual(date(2000, 1, 1)+relativedelta(yearday=260),
                         date(2000, 9, 16))
        self.assertEqual(self.today+relativedelta(yearday=261),
                         date(2003, 9, 18))
    def testYearDayBug(self):
        # Tests a problem reported by Adam Ryan.
        self.assertEqual(date(2010, 1, 1)+relativedelta(yearday=15),
                         date(2010, 1, 15))
    def testNonLeapYearDay(self):
        self.assertEqual(date(2003, 1, 1)+relativedelta(nlyearday=260),
                         date(2003, 9, 17))
        self.assertEqual(date(2002, 1, 1)+relativedelta(nlyearday=260),
                         date(2002, 9, 17))
        self.assertEqual(date(2000, 1, 1)+relativedelta(nlyearday=260),
                         date(2000, 9, 17))
        self.assertEqual(self.today+relativedelta(yearday=261),
                         date(2003, 9, 18))
    def testAddition(self):
        self.assertEqual(relativedelta(days=10) +
                         relativedelta(years=1, months=2, days=3, hours=4,
                                       minutes=5, microseconds=6),
                         relativedelta(years=1, months=2, days=13, hours=4,
                                       minutes=5, microseconds=6))
    def testAbsoluteAddition(self):
        self.assertEqual(relativedelta() + relativedelta(day=0, hour=0),
                         relativedelta(day=0, hour=0))
        self.assertEqual(relativedelta(day=0, hour=0) + relativedelta(),
                         relativedelta(day=0, hour=0))
    def testAdditionToDatetime(self):
        self.assertEqual(datetime(2000, 1, 1) + relativedelta(days=1),
                         datetime(2000, 1, 2))
    def testRightAdditionToDatetime(self):
        self.assertEqual(relativedelta(days=1) + datetime(2000, 1, 1),
                         datetime(2000, 1, 2))
    def testAdditionInvalidType(self):
        with self.assertRaises(TypeError):
            relativedelta(days=3) + 9
    def testAdditionUnsupportedType(self):
        # For unsupported types that define their own comparators, etc.
        self.assertIs(relativedelta(days=1) + NotAValue, NotAValue)
    def testAdditionFloatValue(self):
        self.assertEqual(datetime(2000, 1, 1) + relativedelta(days=float(1)),
                         datetime(2000, 1, 2))
        self.assertEqual(datetime(2000, 1, 1) + relativedelta(months=float(1)),
                         datetime(2000, 2, 1))
        self.assertEqual(datetime(2000, 1, 1) + relativedelta(years=float(1)),
                         datetime(2001, 1, 1))
    def testAdditionFloatFractionals(self):
        self.assertEqual(datetime(2000, 1, 1, 0) +
                         relativedelta(days=float(0.5)),
                         datetime(2000, 1, 1, 12))
        self.assertEqual(datetime(2000, 1, 1, 0, 0) +
                         relativedelta(hours=float(0.5)),
                         datetime(2000, 1, 1, 0, 30))
        self.assertEqual(datetime(2000, 1, 1, 0, 0, 0) +
                         relativedelta(minutes=float(0.5)),
                         datetime(2000, 1, 1, 0, 0, 30))
        self.assertEqual(datetime(2000, 1, 1, 0, 0, 0, 0) +
                         relativedelta(seconds=float(0.5)),
                         datetime(2000, 1, 1, 0, 0, 0, 500000))
        self.assertEqual(datetime(2000, 1, 1, 0, 0, 0, 0) +
                         relativedelta(microseconds=float(500000.25)),
                         datetime(2000, 1, 1, 0, 0, 0, 500000))
    def testSubtraction(self):
        self.assertEqual(relativedelta(days=10) -
                         relativedelta(years=1, months=2, days=3, hours=4,
                                       minutes=5, microseconds=6),
                         relativedelta(years=-1, months=-2, days=7, hours=-4,
                                       minutes=-5, microseconds=-6))
    def testRightSubtractionFromDatetime(self):
        self.assertEqual(datetime(2000, 1, 2) - relativedelta(days=1),
                         datetime(2000, 1, 1))
    def testSubractionWithDatetime(self):
        self.assertRaises(TypeError, lambda x, y: x - y,
                          (relativedelta(days=1), datetime(2000, 1, 1)))
    def testSubtractionInvalidType(self):
        with self.assertRaises(TypeError):
            relativedelta(hours=12) - 14
    def testSubtractionUnsupportedType(self):
        self.assertIs(relativedelta(days=1) + NotAValue, NotAValue)
    def testMultiplication(self):
        self.assertEqual(datetime(2000, 1, 1) + relativedelta(days=1) * 28,
                         datetime(2000, 1, 29))
        self.assertEqual(datetime(2000, 1, 1) + 28 * relativedelta(days=1),
                         datetime(2000, 1, 29))
    def testMultiplicationUnsupportedType(self):
        self.assertIs(relativedelta(days=1) * NotAValue, NotAValue)
    def testDivision(self):
        self.assertEqual(datetime(2000, 1, 1) + relativedelta(days=28) / 28,
                         datetime(2000, 1, 2))
    def testDivisionUnsupportedType(self):
        self.assertIs(relativedelta(days=1) / NotAValue, NotAValue)
    def testBoolean(self):
        self.assertFalse(relativedelta(days=0))
        self.assertTrue(relativedelta(days=1))
    def testAbsoluteValueNegative(self):
        rd_base = relativedelta(years=-1, months=-5, days=-2, hours=-3,
                                minutes=-5, seconds=-2, microseconds=-12)
        rd_expected = relativedelta(years=1, months=5, days=2, hours=3,
                                    minutes=5, seconds=2, microseconds=12)
        self.assertEqual(abs(rd_base), rd_expected)
    def testAbsoluteValuePositive(self):
        rd_base = relativedelta(years=1, months=5, days=2, hours=3,
                                minutes=5, seconds=2, microseconds=12)
        rd_expected = rd_base
        self.assertEqual(abs(rd_base), rd_expected)
    def testComparison(self):
        d1 = relativedelta(years=1, months=1, days=1, leapdays=0, hours=1,
                           minutes=1, seconds=1, microseconds=1)
        d2 = relativedelta(years=1, months=1, days=1, leapdays=0, hours=1,
                           minutes=1, seconds=1, microseconds=1)
        d3 = relativedelta(years=1, months=1, days=1, leapdays=0, hours=1,
                           minutes=1, seconds=1, microseconds=2)
        self.assertEqual(d1, d2)
        self.assertNotEqual(d1, d3)
    def testInequalityTypeMismatch(self):
        # Different type
        self.assertFalse(relativedelta(year=1) == 19)
    def testInequalityUnsupportedType(self):
        self.assertIs(relativedelta(hours=3) == NotAValue, NotAValue)
    def testInequalityWeekdays(self):
        # Different weekdays
        no_wday = relativedelta(year=1997, month=4)
        wday_mo_1 = relativedelta(year=1997, month=4, weekday=MO(+1))
        wday_mo_2 = relativedelta(year=1997, month=4, weekday=MO(+2))
        wday_tu = relativedelta(year=1997, month=4, weekday=TU)
        self.assertTrue(wday_mo_1 == wday_mo_1)
        self.assertFalse(no_wday == wday_mo_1)
        self.assertFalse(wday_mo_1 == no_wday)
        self.assertFalse(wday_mo_1 == wday_mo_2)
        self.assertFalse(wday_mo_2 == wday_mo_1)
        self.assertFalse(wday_mo_1 == wday_tu)
        self.assertFalse(wday_tu == wday_mo_1)
    def testMonthOverflow(self):
        self.assertEqual(relativedelta(months=273),
                         relativedelta(years=22, months=9))
    def testWeeks(self):
        # Test that the weeks property is working properly.
        rd = relativedelta(years=4, months=2, weeks=8, days=6)
        self.assertEqual((rd.weeks, rd.days), (8, 8 * 7 + 6))
        rd.weeks = 3
        self.assertEqual((rd.weeks, rd.days), (3, 3 * 7 + 6))
    def testRelativeDeltaRepr(self):
        self.assertEqual(repr(relativedelta(years=1, months=-1, days=15)),
                         'relativedelta(years=+1, months=-1, days=+15)')
        self.assertEqual(repr(relativedelta(months=14, seconds=-25)),
                         'relativedelta(years=+1, months=+2, seconds=-25)')
        self.assertEqual(repr(relativedelta(month=3, hour=3, weekday=SU(3))),
                         'relativedelta(month=3, weekday=SU(+3), hour=3)')
    def testRelativeDeltaFractionalYear(self):
        with self.assertRaises(ValueError):
            relativedelta(years=1.5)
    def testRelativeDeltaFractionalMonth(self):
        with self.assertRaises(ValueError):
            relativedelta(months=1.5)
    def testRelativeDeltaInvalidDatetimeObject(self):
        with self.assertRaises(TypeError):
            relativedelta(dt1='2018-01-01', dt2='2018-01-02')
        with self.assertRaises(TypeError):
            relativedelta(dt1=datetime(2018, 1, 1), dt2='2018-01-02')
        with self.assertRaises(TypeError):
            relativedelta(dt1='2018-01-01', dt2=datetime(2018, 1, 2))
    def testRelativeDeltaFractionalAbsolutes(self):
        # Fractional absolute values will soon be unsupported,
        # check for the deprecation warning.
        with pytest.warns(DeprecationWarning):
            relativedelta(year=2.86)
        with pytest.warns(DeprecationWarning):
            relativedelta(month=1.29)
        with pytest.warns(DeprecationWarning):
            relativedelta(day=0.44)
        with pytest.warns(DeprecationWarning):
            relativedelta(hour=23.98)
        with pytest.warns(DeprecationWarning):
            relativedelta(minute=45.21)
        with pytest.warns(DeprecationWarning):
            relativedelta(second=13.2)
        with pytest.warns(DeprecationWarning):
            relativedelta(microsecond=157221.93)
    def testRelativeDeltaFractionalRepr(self):
        rd = relativedelta(years=3, months=-2, days=1.25)
        self.assertEqual(repr(rd),
                         'relativedelta(years=+3, months=-2, days=+1.25)')
        rd = relativedelta(hours=0.5, seconds=9.22)
        self.assertEqual(repr(rd),
                         'relativedelta(hours=+0.5, seconds=+9.22)')
    def testRelativeDeltaFractionalWeeks(self):
        # Equivalent to days=8, hours=18
        rd = relativedelta(weeks=1.25)
        d1 = datetime(2009, 9, 3, 0, 0)
        self.assertEqual(d1 + rd,
                         datetime(2009, 9, 11, 18))
    def testRelativeDeltaFractionalDays(self):
        rd1 = relativedelta(days=1.48)
        d1 = datetime(2009, 9, 3, 0, 0)
        self.assertEqual(d1 + rd1,
                         datetime(2009, 9, 4, 11, 31, 12))
        rd2 = relativedelta(days=1.5)
        self.assertEqual(d1 + rd2,
                         datetime(2009, 9, 4, 12, 0, 0))
    def testRelativeDeltaFractionalHours(self):
        rd = relativedelta(days=1, hours=12.5)
        d1 = datetime(2009, 9, 3, 0, 0)
        self.assertEqual(d1 + rd,
                         datetime(2009, 9, 4, 12, 30, 0))
    def testRelativeDeltaFractionalMinutes(self):
        rd = relativedelta(hours=1, minutes=30.5)
        d1 = datetime(2009, 9, 3, 0, 0)
        self.assertEqual(d1 + rd,
                         datetime(2009, 9, 3, 1, 30, 30))
    def testRelativeDeltaFractionalSeconds(self):
        rd = relativedelta(hours=5, minutes=30, seconds=30.5)
        d1 = datetime(2009, 9, 3, 0, 0)
        self.assertEqual(d1 + rd,
                         datetime(2009, 9, 3, 5, 30, 30, 500000))
    def testRelativeDeltaFractionalPositiveOverflow(self):
        # Equivalent to (days=1, hours=14)
        rd1 = relativedelta(days=1.5, hours=2)
        d1 = datetime(2009, 9, 3, 0, 0)
        self.assertEqual(d1 + rd1,
                         datetime(2009, 9, 4, 14, 0, 0))
        # Equivalent to (days=1, hours=14, minutes=45)
        rd2 = relativedelta(days=1.5, hours=2.5, minutes=15)
        d1 = datetime(2009, 9, 3, 0, 0)
        self.assertEqual(d1 + rd2,
                         datetime(2009, 9, 4, 14, 45))
        # Carry back up - equivalent to (days=2, hours=2, minutes=0, seconds=1)
        rd3 = relativedelta(days=1.5, hours=13, minutes=59.5, seconds=31)
        self.assertEqual(d1 + rd3,
                         datetime(2009, 9, 5, 2, 0, 1))
    def testRelativeDeltaFractionalNegativeDays(self):
        # Equivalent to (days=-1, hours=-1)
        rd1 = relativedelta(days=-1.5, hours=11)
        d1 = datetime(2009, 9, 3, 12, 0)
        self.assertEqual(d1 + rd1,
                         datetime(2009, 9, 2, 11, 0, 0))
        # Equivalent to (days=-1, hours=-9)
        rd2 = relativedelta(days=-1.25, hours=-3)
        self.assertEqual(d1 + rd2,
            datetime(2009, 9, 2, 3))
    def testRelativeDeltaNormalizeFractionalDays(self):
        # Equivalent to (days=2, hours=18)
        rd1 = relativedelta(days=2.75)
        self.assertEqual(rd1.normalized(), relativedelta(days=2, hours=18))
        # Equivalent to (days=1, hours=11, minutes=31, seconds=12)
        rd2 = relativedelta(days=1.48)
        self.assertEqual(rd2.normalized(),
            relativedelta(days=1, hours=11, minutes=31, seconds=12))
    def testRelativeDeltaNormalizeFractionalDays2(self):
        # Equivalent to (hours=1, minutes=30)
        rd1 = relativedelta(hours=1.5)
        self.assertEqual(rd1.normalized(), relativedelta(hours=1, minutes=30))
        # Equivalent to (hours=3, minutes=17, seconds=5, microseconds=100)
        rd2 = relativedelta(hours=3.28472225)
        self.assertEqual(rd2.normalized(),
            relativedelta(hours=3, minutes=17, seconds=5, microseconds=100))
    def testRelativeDeltaNormalizeFractionalMinutes(self):
        # Equivalent to (minutes=15, seconds=36)
        rd1 = relativedelta(minutes=15.6)
        self.assertEqual(rd1.normalized(),
            relativedelta(minutes=15, seconds=36))
        # Equivalent to (minutes=25, seconds=20, microseconds=25000)
        rd2 = relativedelta(minutes=25.33375)
        self.assertEqual(rd2.normalized(),
            relativedelta(minutes=25, seconds=20, microseconds=25000))
    def testRelativeDeltaNormalizeFractionalSeconds(self):
        # Equivalent to (seconds=45, microseconds=25000)
        rd1 = relativedelta(seconds=45.025)
        self.assertEqual(rd1.normalized(),
            relativedelta(seconds=45, microseconds=25000))
    def testRelativeDeltaFractionalPositiveOverflow2(self):
        # Equivalent to (days=1, hours=14)
        rd1 = relativedelta(days=1.5, hours=2)
        self.assertEqual(rd1.normalized(),
            relativedelta(days=1, hours=14))
        # Equivalent to (days=1, hours=14, minutes=45)
        rd2 = relativedelta(days=1.5, hours=2.5, minutes=15)
        self.assertEqual(rd2.normalized(),
            relativedelta(days=1, hours=14, minutes=45))
        # Carry back up - equivalent to:
        # (days=2, hours=2, minutes=0, seconds=2, microseconds=3)
        rd3 = relativedelta(days=1.5, hours=13, minutes=59.50045,
                            seconds=31.473, microseconds=500003)
        self.assertEqual(rd3.normalized(),
            relativedelta(days=2, hours=2, minutes=0,
                          seconds=2, microseconds=3))
    def testRelativeDeltaFractionalNegativeOverflow(self):
        # Equivalent to (days=-1)
        rd1 = relativedelta(days=-0.5, hours=-12)
        self.assertEqual(rd1.normalized(),
            relativedelta(days=-1))
        # Equivalent to (days=-1)
        rd2 = relativedelta(days=-1.5, hours=12)
        self.assertEqual(rd2.normalized(),
            relativedelta(days=-1))
        # Equivalent to (days=-1, hours=-14, minutes=-45)
        rd3 = relativedelta(days=-1.5, hours=-2.5, minutes=-15)
        self.assertEqual(rd3.normalized(),
            relativedelta(days=-1, hours=-14, minutes=-45))
        # Equivalent to (days=-1, hours=-14, minutes=+15)
        rd4 = relativedelta(days=-1.5, hours=-2.5, minutes=45)
        self.assertEqual(rd4.normalized(),
            relativedelta(days=-1, hours=-14, minutes=+15))
        # Carry back up - equivalent to:
        # (days=-2, hours=-2, minutes=0, seconds=-2, microseconds=-3)
        rd3 = relativedelta(days=-1.5, hours=-13, minutes=-59.50045,
                            seconds=-31.473, microseconds=-500003)
        self.assertEqual(rd3.normalized(),
            relativedelta(days=-2, hours=-2, minutes=0,
                          seconds=-2, microseconds=-3))
    def testInvalidYearDay(self):
        with self.assertRaises(ValueError):
            relativedelta(yearday=367)
    def testAddTimedeltaToUnpopulatedRelativedelta(self):
        td = timedelta(
            days=1,
            seconds=1,
            microseconds=1,
            milliseconds=1,
            minutes=1,
            hours=1,
            weeks=1
        )
        expected = relativedelta(
            weeks=1,
            days=1,
            hours=1,
            minutes=1,
            seconds=1,
            microseconds=1001
        )
        self.assertEqual(expected, relativedelta() + td)
    def testAddTimedeltaToPopulatedRelativeDelta(self):
        td = timedelta(
            days=1,
            seconds=1,
            microseconds=1,
            milliseconds=1,
            minutes=1,
            hours=1,
            weeks=1
        )
        rd = relativedelta(
            year=1,
            month=1,
            day=1,
            hour=1,
            minute=1,
            second=1,
            microsecond=1,
            years=1,
            months=1,
            days=1,
            weeks=1,
            hours=1,
            minutes=1,
            seconds=1,
            microseconds=1
        )
        expected = relativedelta(
            year=1,
            month=1,
            day=1,
            hour=1,
            minute=1,
            second=1,
            microsecond=1,
            years=1,
            months=1,
            weeks=2,
            days=2,
            hours=2,
            minutes=2,
            seconds=2,
            microseconds=1002,
        )
        self.assertEqual(expected, rd + td)
    def testHashable(self):
        try:
            {relativedelta(minute=1): 'test'}
        except:
            self.fail("relativedelta() failed to hash!")
 class RelativeDeltaWeeksPropertyGetterTest(unittest.TestCase):
    """Test the weeks property getter"""
    def test_one_day(self):
        rd = relativedelta(days=1)
        self.assertEqual(rd.days, 1)
        self.assertEqual(rd.weeks, 0)
    def test_minus_one_day(self):
        rd = relativedelta(days=-1)
        self.assertEqual(rd.days, -1)
        self.assertEqual(rd.weeks, 0)
    def test_height_days(self):
        rd = relativedelta(days=8)
        self.assertEqual(rd.days, 8)
        self.assertEqual(rd.weeks, 1)
    def test_minus_height_days(self):
        rd = relativedelta(days=-8)
        self.assertEqual(rd.days, -8)
        self.assertEqual(rd.weeks, -1)
 class RelativeDeltaWeeksPropertySetterTest(unittest.TestCase):
    """Test the weeks setter which makes a "smart" update of the days attribute"""
    def test_one_day_set_one_week(self):
        rd = relativedelta(days=1)
        rd.weeks = 1  # add 7 days
        self.assertEqual(rd.days, 8)
        self.assertEqual(rd.weeks, 1)
    def test_minus_one_day_set_one_week(self):
        rd = relativedelta(days=-1)
        rd.weeks = 1  # add 7 days
        self.assertEqual(rd.days, 6)
        self.assertEqual(rd.weeks, 0)
    def test_height_days_set_minus_one_week(self):
        rd = relativedelta(days=8)
        rd.weeks = -1  # change from 1 week, 1 day to -1 week, 1 day
        self.assertEqual(rd.days, -6)
        self.assertEqual(rd.weeks, 0)
    def test_minus_height_days_set_minus_one_week(self):
        rd = relativedelta(days=-8)
        rd.weeks = -1  # does not change anything
        self.assertEqual(rd.days, -8)
        self.assertEqual(rd.weeks, -1)
 # vim:ts=4:sw=4:et
--- a/lib/dateutil/test/test_rrule.py
+++ b/lib/dateutil/test/test_rrule.py
--- a/lib/dateutil/test/test_tz.py
+++ b/lib/dateutil/test/test_tz.py
--- a/lib/dateutil/test/test_utils.py
+++ b/lib/dateutil/test/test_utils.py
@ -1,52 +0,0 @@
 # -*- coding: utf-8 -*-
 from __future__ import unicode_literals
 from datetime import timedelta, datetime
 from dateutil import tz
 from dateutil import utils
 from dateutil.tz import UTC
 from dateutil.utils import within_delta
 from freezegun import freeze_time
 NYC = tz.gettz("America/New_York")
@freeze_time(datetime(2014, 12, 15, 1, 21, 33, 4003))
 def test_utils_today():
    assert utils.today() == datetime(2014, 12, 15, 0, 0, 0)
@freeze_time(datetime(2014, 12, 15, 12), tz_offset=5)
 def test_utils_today_tz_info():
    assert utils.today(NYC) == datetime(2014, 12, 15, 0, 0, 0, tzinfo=NYC)
@freeze_time(datetime(2014, 12, 15, 23), tz_offset=5)
 def test_utils_today_tz_info_different_day():
    assert utils.today(UTC) == datetime(2014, 12, 16, 0, 0, 0, tzinfo=UTC)
 def test_utils_default_tz_info_naive():
    dt = datetime(2014, 9, 14, 9, 30)
    assert utils.default_tzinfo(dt, NYC).tzinfo is NYC
 def test_utils_default_tz_info_aware():
    dt = datetime(2014, 9, 14, 9, 30, tzinfo=UTC)
    assert utils.default_tzinfo(dt, NYC).tzinfo is UTC
 def test_utils_within_delta():
    d1 = datetime(2016, 1, 1, 12, 14, 1, 9)
    d2 = d1.replace(microsecond=15)
    assert within_delta(d1, d2, timedelta(seconds=1))
    assert not within_delta(d1, d2, timedelta(microseconds=1))
 def test_utils_within_delta_with_negative_delta():
    d1 = datetime(2016, 1, 1)
    d2 = datetime(2015, 12, 31)
    assert within_delta(d2, d1, timedelta(days=-1))
--- a/lib/html5lib/tests/init.py
+++ b/lib/html5lib/tests/init.py
@ -1 +0,0 @@
 from __future__ import absolute_import, division, unicode_literals
--- a/lib/html5lib/tests/conftest.py
+++ b/lib/html5lib/tests/conftest.py
@ -1,108 +0,0 @@
 from __future__ import print_function
 import os.path
 import sys
 import pkg_resources
 import pytest
 from .tree_construction import TreeConstructionFile
 from .tokenizer import TokenizerFile
 from .sanitizer import SanitizerFile
 _dir = os.path.abspath(os.path.dirname(__file__))
 _root = os.path.join(_dir, "..", "..")
 _testdata = os.path.join(_dir, "testdata")
 _tree_construction = os.path.join(_testdata, "tree-construction")
 _tokenizer = os.path.join(_testdata, "tokenizer")
 _sanitizer_testdata = os.path.join(_dir, "sanitizer-testdata")
 def fail_if_missing_pytest_expect():
    """Throws an exception halting pytest if pytest-expect isn't working"""
    try:
        from pytest_expect import expect  # noqa
    except ImportError:
        header = '*' * 78
        print(
            '\n' +
            header + '\n' +
            'ERROR: Either pytest-expect or its dependency u-msgpack-python is not\n' +
            'installed. Please install them both before running pytest.\n' +
            header + '\n',
            file=sys.stderr
        )
        raise
 fail_if_missing_pytest_expect()
 def pytest_configure(config):
    msgs = []
    if not os.path.exists(_testdata):
        msg = "testdata not available! "
        if os.path.exists(os.path.join(_root, ".git")):
            msg += ("Please run git submodule update --init --recursive " +
                    "and then run tests again.")
        else:
            msg += ("The testdata doesn't appear to be included with this package, " +
                    "so finding the right version will be hard. :(")
        msgs.append(msg)
    if config.option.update_xfail:
        # Check for optional requirements
        req_file = os.path.join(_root, "requirements-optional.txt")
        if os.path.exists(req_file):
            with open(req_file, "r") as fp:
                for line in fp:
                    if (line.strip() and
                        not (line.startswith("-r") or
                             line.startswith("#"))):
                        if ";" in line:
                            spec, marker = line.strip().split(";", 1)
                        else:
                            spec, marker = line.strip(), None
                        req = pkg_resources.Requirement.parse(spec)
                        if marker and not pkg_resources.evaluate_marker(marker):
                            msgs.append("%s not available in this environment" % spec)
                        else:
                            try:
                                installed = pkg_resources.working_set.find(req)
                            except pkg_resources.VersionConflict:
                                msgs.append("Outdated version of %s installed, need %s" % (req.name, spec))
                            else:
                                if not installed:
                                    msgs.append("Need %s" % spec)
        # Check cElementTree
        import xml.etree.ElementTree as ElementTree
        try:
            import xml.etree.cElementTree as cElementTree
        except ImportError:
            msgs.append("cElementTree unable to be imported")
        else:
            if cElementTree.Element is ElementTree.Element:
                msgs.append("cElementTree is just an alias for ElementTree")
    if msgs:
        pytest.exit("\n".join(msgs))
 def pytest_collect_file(path, parent):
    dir = os.path.abspath(path.dirname)
    dir_and_parents = set()
    while dir not in dir_and_parents:
        dir_and_parents.add(dir)
        dir = os.path.dirname(dir)
    if _tree_construction in dir_and_parents:
        if path.ext == ".dat":
            return TreeConstructionFile(path, parent)
    elif _tokenizer in dir_and_parents:
        if path.ext == ".test":
            return TokenizerFile(path, parent)
    elif _sanitizer_testdata in dir_and_parents:
        if path.ext == ".dat":
            return SanitizerFile(path, parent)
--- a/lib/html5lib/tests/sanitizer.py
+++ b/lib/html5lib/tests/sanitizer.py
@ -1,51 +0,0 @@
 from __future__ import absolute_import, division, unicode_literals
 import codecs
 import json
 import pytest
 from html5lib import parseFragment, serialize
 class SanitizerFile(pytest.File):
    def collect(self):
        with codecs.open(str(self.fspath), "r", encoding="utf-8") as fp:
            tests = json.load(fp)
        for i, test in enumerate(tests):
            yield SanitizerTest(str(i), self, test=test)
 class SanitizerTest(pytest.Item):
    def __init__(self, name, parent, test):
        super(SanitizerTest, self).__init__(name, parent)
        self.obj = lambda: 1  # this is to hack around skipif needing a function!
        self.test = test
    def runtest(self):
        input = self.test["input"]
        expected = self.test["output"]
        parsed = parseFragment(input)
        with pytest.deprecated_call():
            serialized = serialize(parsed,
                                   sanitize=True,
                                   omit_optional_tags=False,
                                   use_trailing_solidus=True,
                                   space_before_trailing_solidus=False,
                                   quote_attr_values="always",
                                   quote_char="'",
                                   alphabetical_attributes=True)
        errorMsg = "\n".join(["\n\nInput:", input,
                              "\nExpected:", expected,
                              "\nReceived:", serialized])
        assert expected == serialized, errorMsg
    def repr_failure(self, excinfo):
        traceback = excinfo.traceback
        ntraceback = traceback.cut(path=__file__)
        excinfo.traceback = ntraceback.filter()
        return excinfo.getrepr(funcargs=True,
                               showlocals=False,
                               style="short", tbfilter=False)
--- a/lib/html5lib/tests/support.py
+++ b/lib/html5lib/tests/support.py
@ -1,199 +0,0 @@
 from __future__ import absolute_import, division, unicode_literals
 # pylint:disable=wrong-import-position
 import os
 import sys
 import codecs
 import glob
 import xml.sax.handler
 base_path = os.path.split(__file__)[0]
 test_dir = os.path.join(base_path, 'testdata')
 sys.path.insert(0, os.path.abspath(os.path.join(base_path,
                                                os.path.pardir,
                                                os.path.pardir)))
 from html5lib import treebuilders, treewalkers, treeadapters  # noqa
 del base_path
 # Build a dict of available trees
 treeTypes = {}
 # DOM impls
 treeTypes["DOM"] = {
    "builder": treebuilders.getTreeBuilder("dom"),
    "walker": treewalkers.getTreeWalker("dom")
 }
 # ElementTree impls
 import xml.etree.ElementTree as ElementTree  # noqa
 treeTypes['ElementTree'] = {
    "builder": treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True),
    "walker": treewalkers.getTreeWalker("etree", ElementTree)
 }
 try:
    import xml.etree.cElementTree as cElementTree  # noqa
 except ImportError:
    treeTypes['cElementTree'] = None
 else:
    # On Python 3.3 and above cElementTree is an alias, don't run them twice.
    if cElementTree.Element is ElementTree.Element:
        treeTypes['cElementTree'] = None
    else:
        treeTypes['cElementTree'] = {
            "builder": treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True),
            "walker": treewalkers.getTreeWalker("etree", cElementTree)
        }
 try:
    import lxml.etree as lxml  # noqa
 except ImportError:
    treeTypes['lxml'] = None
 else:
    treeTypes['lxml'] = {
        "builder": treebuilders.getTreeBuilder("lxml"),
        "walker": treewalkers.getTreeWalker("lxml")
    }
 # Genshi impls
 try:
    import genshi  # noqa
 except ImportError:
    treeTypes["genshi"] = None
 else:
    treeTypes["genshi"] = {
        "builder": treebuilders.getTreeBuilder("dom"),
        "adapter": lambda tree: treeadapters.genshi.to_genshi(treewalkers.getTreeWalker("dom")(tree)),
        "walker": treewalkers.getTreeWalker("genshi")
    }
 # pylint:enable=wrong-import-position
 def get_data_files(subdirectory, files='*.dat', search_dir=test_dir):
    return sorted(glob.glob(os.path.join(search_dir, subdirectory, files)))
 class DefaultDict(dict):
    def __init__(self, default, *args, **kwargs):
        self.default = default
        dict.__init__(self, *args, **kwargs)
    def __getitem__(self, key):
        return dict.get(self, key, self.default)
 class TestData(object):
    def __init__(self, filename, newTestHeading="data", encoding="utf8"):
        if encoding is None:
            self.f = open(filename, mode="rb")
        else:
            self.f = codecs.open(filename, encoding=encoding)
        self.encoding = encoding
        self.newTestHeading = newTestHeading
    def __iter__(self):
        data = DefaultDict(None)
        key = None
        for line in self.f:
            heading = self.isSectionHeading(line)
            if heading:
                if data and heading == self.newTestHeading:
                    # Remove trailing newline
                    data[key] = data[key][:-1]
                    yield self.normaliseOutput(data)
                    data = DefaultDict(None)
                key = heading
                data[key] = "" if self.encoding else b""
            elif key is not None:
                data[key] += line
        if data:
            yield self.normaliseOutput(data)
    def isSectionHeading(self, line):
        """If the current heading is a test section heading return the heading,
        otherwise return False"""
        # print(line)
        if line.startswith("#" if self.encoding else b"#"):
            return line[1:].strip()
        else:
            return False
    def normaliseOutput(self, data):
        # Remove trailing newlines
        for key, value in data.items():
            if value.endswith("\n" if self.encoding else b"\n"):
                data[key] = value[:-1]
        return data
 def convert(stripChars):
    def convertData(data):
        """convert the output of str(document) to the format used in the testcases"""
        data = data.split("\n")
        rv = []
        for line in data:
            if line.startswith("|"):
                rv.append(line[stripChars:])
            else:
                rv.append(line)
        return "\n".join(rv)
    return convertData
 convertExpected = convert(2)
 def errorMessage(input, expected, actual):
    msg = ("Input:\n%s\nExpected:\n%s\nReceived\n%s\n" %
           (repr(input), repr(expected), repr(actual)))
    if sys.version_info[0] == 2:
        msg = msg.encode("ascii", "backslashreplace")
    return msg
 class TracingSaxHandler(xml.sax.handler.ContentHandler):
    def __init__(self):
        xml.sax.handler.ContentHandler.__init__(self)
        self.visited = []
    def startDocument(self):
        self.visited.append('startDocument')
    def endDocument(self):
        self.visited.append('endDocument')
    def startPrefixMapping(self, prefix, uri):
        # These are ignored as their order is not guaranteed
        pass
    def endPrefixMapping(self, prefix):
        # These are ignored as their order is not guaranteed
        pass
    def startElement(self, name, attrs):
        self.visited.append(('startElement', name, attrs))
    def endElement(self, name):
        self.visited.append(('endElement', name))
    def startElementNS(self, name, qname, attrs):
        self.visited.append(('startElementNS', name, qname, dict(attrs)))
    def endElementNS(self, name, qname):
        self.visited.append(('endElementNS', name, qname))
    def characters(self, content):
        self.visited.append(('characters', content))
    def ignorableWhitespace(self, whitespace):
        self.visited.append(('ignorableWhitespace', whitespace))
    def processingInstruction(self, target, data):
        self.visited.append(('processingInstruction', target, data))
    def skippedEntity(self, name):
        self.visited.append(('skippedEntity', name))
--- a/lib/html5lib/tests/test_alphabeticalattributes.py
+++ b/lib/html5lib/tests/test_alphabeticalattributes.py
@ -1,78 +0,0 @@
 from __future__ import absolute_import, division, unicode_literals
 from collections import OrderedDict
 import pytest
 import html5lib
 from html5lib.filters.alphabeticalattributes import Filter
 from html5lib.serializer import HTMLSerializer
@pytest.mark.parametrize('msg, attrs, expected_attrs', [
    (
        'no attrs',
        {},
        {}
    ),
    (
        'one attr',
        {(None, 'alt'): 'image'},
        OrderedDict([((None, 'alt'), 'image')])
    ),
    (
        'multiple attrs',
        {
            (None, 'src'): 'foo',
            (None, 'alt'): 'image',
            (None, 'style'): 'border: 1px solid black;'
        },
        OrderedDict([
            ((None, 'alt'), 'image'),
            ((None, 'src'), 'foo'),
            ((None, 'style'), 'border: 1px solid black;')
        ])
    ),
 ])
 def test_alphabetizing(msg, attrs, expected_attrs):
    tokens = [{'type': 'StartTag', 'name': 'img', 'data': attrs}]
    output_tokens = list(Filter(tokens))
    attrs = output_tokens[0]['data']
    assert attrs == expected_attrs
 def test_with_different_namespaces():
    tokens = [{
        'type': 'StartTag',
        'name': 'pattern',
        'data': {
            (None, 'id'): 'patt1',
            ('http://www.w3.org/1999/xlink', 'href'): '#patt2'
        }
    }]
    output_tokens = list(Filter(tokens))
    attrs = output_tokens[0]['data']
    assert attrs == OrderedDict([
        ((None, 'id'), 'patt1'),
        (('http://www.w3.org/1999/xlink', 'href'), '#patt2')
    ])
 def test_with_serializer():
    """Verify filter works in the context of everything else"""
    parser = html5lib.HTMLParser()
    dom = parser.parseFragment('<svg><pattern xlink:href="#patt2" id="patt1"></svg>')
    walker = html5lib.getTreeWalker('etree')
    ser = HTMLSerializer(
        alphabetical_attributes=True,
        quote_attr_values='always'
    )
    # FIXME(willkg): The "xlink" namespace gets dropped by the serializer. When
    # that gets fixed, we can fix this expected result.
    assert (
        ser.render(walker(dom)) ==
        '<svg><pattern id="patt1" href="#patt2"></pattern></svg>'
    )
--- a/lib/html5lib/tests/test_encoding.py
+++ b/lib/html5lib/tests/test_encoding.py
@ -1,117 +0,0 @@
 from __future__ import absolute_import, division, unicode_literals
 import os
 import pytest
 from .support import get_data_files, test_dir, errorMessage, TestData as _TestData
 from html5lib import HTMLParser, _inputstream
 def test_basic_prescan_length():
    data = "<title>Caf\u00E9</title><!--a--><meta charset='utf-8'>".encode('utf-8')
    pad = 1024 - len(data) + 1
    data = data.replace(b"-a-", b"-" + (b"a" * pad) + b"-")
    assert len(data) == 1024  # Sanity
    stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False)
    assert 'utf-8' == stream.charEncoding[0].name
 def test_parser_reparse():
    data = "<title>Caf\u00E9</title><!--a--><meta charset='utf-8'>".encode('utf-8')
    pad = 10240 - len(data) + 1
    data = data.replace(b"-a-", b"-" + (b"a" * pad) + b"-")
    assert len(data) == 10240  # Sanity
    stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False)
    assert 'windows-1252' == stream.charEncoding[0].name
    p = HTMLParser(namespaceHTMLElements=False)
    doc = p.parse(data, useChardet=False)
    assert 'utf-8' == p.documentEncoding
    assert doc.find(".//title").text == "Caf\u00E9"
@pytest.mark.parametrize("expected,data,kwargs", [
    ("utf-16le", b"\xFF\xFE", {"override_encoding": "iso-8859-2"}),
    ("utf-16be", b"\xFE\xFF", {"override_encoding": "iso-8859-2"}),
    ("utf-8", b"\xEF\xBB\xBF", {"override_encoding": "iso-8859-2"}),
    ("iso-8859-2", b"", {"override_encoding": "iso-8859-2", "transport_encoding": "iso-8859-3"}),
    ("iso-8859-2", b"<meta charset=iso-8859-3>", {"transport_encoding": "iso-8859-2"}),
    ("iso-8859-2", b"<meta charset=iso-8859-2>", {"same_origin_parent_encoding": "iso-8859-3"}),
    ("iso-8859-2", b"", {"same_origin_parent_encoding": "iso-8859-2", "likely_encoding": "iso-8859-3"}),
    ("iso-8859-2", b"", {"same_origin_parent_encoding": "utf-16", "likely_encoding": "iso-8859-2"}),
    ("iso-8859-2", b"", {"same_origin_parent_encoding": "utf-16be", "likely_encoding": "iso-8859-2"}),
    ("iso-8859-2", b"", {"same_origin_parent_encoding": "utf-16le", "likely_encoding": "iso-8859-2"}),
    ("iso-8859-2", b"", {"likely_encoding": "iso-8859-2", "default_encoding": "iso-8859-3"}),
    ("iso-8859-2", b"", {"default_encoding": "iso-8859-2"}),
    ("windows-1252", b"", {"default_encoding": "totally-bogus-string"}),
    ("windows-1252", b"", {}),
 ])
 def test_parser_args(expected, data, kwargs):
    stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False, **kwargs)
    assert expected == stream.charEncoding[0].name
    p = HTMLParser()
    p.parse(data, useChardet=False, **kwargs)
    assert expected == p.documentEncoding
@pytest.mark.parametrize("kwargs", [
    {"override_encoding": "iso-8859-2"},
    {"override_encoding": None},
    {"transport_encoding": "iso-8859-2"},
    {"transport_encoding": None},
    {"same_origin_parent_encoding": "iso-8859-2"},
    {"same_origin_parent_encoding": None},
    {"likely_encoding": "iso-8859-2"},
    {"likely_encoding": None},
    {"default_encoding": "iso-8859-2"},
    {"default_encoding": None},
    {"foo_encoding": "iso-8859-2"},
    {"foo_encoding": None},
 ])
 def test_parser_args_raises(kwargs):
    with pytest.raises(TypeError) as exc_info:
        p = HTMLParser()
        p.parse("", useChardet=False, **kwargs)
    assert exc_info.value.args[0].startswith("Cannot set an encoding with a unicode input")
 def param_encoding():
    for filename in get_data_files("encoding"):
        tests = _TestData(filename, b"data", encoding=None)
        for test in tests:
            yield test[b'data'], test[b'encoding']
@pytest.mark.parametrize("data, encoding", param_encoding())
 def test_parser_encoding(data, encoding):
    p = HTMLParser()
    assert p.documentEncoding is None
    p.parse(data, useChardet=False)
    encoding = encoding.lower().decode("ascii")
    assert encoding == p.documentEncoding, errorMessage(data, encoding, p.documentEncoding)
@pytest.mark.parametrize("data, encoding", param_encoding())
 def test_prescan_encoding(data, encoding):
    stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False)
    encoding = encoding.lower().decode("ascii")
    # Very crude way to ignore irrelevant tests
    if len(data) > stream.numBytesMeta:
        return
    assert encoding == stream.charEncoding[0].name, errorMessage(data, encoding, stream.charEncoding[0].name)
 # pylint:disable=wrong-import-position
 try:
    import chardet  # noqa
 except ImportError:
    print("chardet not found, skipping chardet tests")
 else:
    def test_chardet():
        with open(os.path.join(test_dir, "encoding", "chardet", "test_big5.txt"), "rb") as fp:
            encoding = _inputstream.HTMLInputStream(fp.read()).charEncoding
            assert encoding[0].name == "big5"
 # pylint:enable=wrong-import-position
--- a/lib/html5lib/tests/test_meta.py
+++ b/lib/html5lib/tests/test_meta.py
@ -1,41 +0,0 @@
 from __future__ import absolute_import, division, unicode_literals
 import six
 from mock import Mock
 from . import support
 def _createReprMock(r):
    """Creates a mock with a __repr__ returning r
    Also provides __str__ mock with default mock behaviour"""
    mock = Mock()
    mock.__repr__ = Mock()
    mock.__repr__.return_value = r
    mock.__str__ = Mock(wraps=mock.__str__)
    return mock
 def test_errorMessage():
    # Create mock objects to take repr of
    input = _createReprMock("1")
    expected = _createReprMock("2")
    actual = _createReprMock("3")
    # Run the actual test
    r = support.errorMessage(input, expected, actual)
    # Assertions!
    if six.PY2:
        assert b"Input:\n1\nExpected:\n2\nReceived\n3\n" == r
    else:
        assert six.PY3
        assert "Input:\n1\nExpected:\n2\nReceived\n3\n" == r
    assert input.__repr__.call_count == 1
    assert expected.__repr__.call_count == 1
    assert actual.__repr__.call_count == 1
    assert not input.__str__.called
    assert not expected.__str__.called
    assert not actual.__str__.called
--- a/lib/html5lib/tests/test_optionaltags_filter.py
+++ b/lib/html5lib/tests/test_optionaltags_filter.py
@ -1,7 +0,0 @@
 from __future__ import absolute_import, division, unicode_literals
 from html5lib.filters.optionaltags import Filter
 def test_empty():
    assert list(Filter([])) == []
--- a/lib/html5lib/tests/test_parser2.py
+++ b/lib/html5lib/tests/test_parser2.py
@ -1,94 +0,0 @@
 from __future__ import absolute_import, division, unicode_literals
 from six import PY2, text_type
 import io
 from . import support  # noqa
 from html5lib.constants import namespaces
 from html5lib import parse, parseFragment, HTMLParser
 # tests that aren't autogenerated from text files
 def test_assertDoctypeCloneable():
    doc = parse('<!DOCTYPE HTML>', treebuilder="dom")
    assert doc.cloneNode(True) is not None
 def test_line_counter():
    # http://groups.google.com/group/html5lib-discuss/browse_frm/thread/f4f00e4a2f26d5c0
    assert parse("<pre>\nx\n&gt;\n</pre>") is not None
 def test_namespace_html_elements_0_dom():
    doc = parse("<html></html>",
                treebuilder="dom",
                namespaceHTMLElements=True)
    assert doc.childNodes[0].namespaceURI == namespaces["html"]
 def test_namespace_html_elements_1_dom():
    doc = parse("<html></html>",
                treebuilder="dom",
                namespaceHTMLElements=False)
    assert doc.childNodes[0].namespaceURI is None
 def test_namespace_html_elements_0_etree():
    doc = parse("<html></html>",
                treebuilder="etree",
                namespaceHTMLElements=True)
    assert doc.tag == "{%s}html" % (namespaces["html"],)
 def test_namespace_html_elements_1_etree():
    doc = parse("<html></html>",
                treebuilder="etree",
                namespaceHTMLElements=False)
    assert doc.tag == "html"
 def test_unicode_file():
    assert parse(io.StringIO("a")) is not None
 def test_debug_log():
    parser = HTMLParser(debug=True)
    parser.parse("<!doctype html><title>a</title><p>b<script>c</script>d</p>e")
    expected = [('dataState', 'InitialPhase', 'InitialPhase', 'processDoctype', {'type': 'Doctype'}),
                ('dataState', 'BeforeHtmlPhase', 'BeforeHtmlPhase', 'processStartTag', {'name': 'title', 'type': 'StartTag'}),
                ('dataState', 'BeforeHeadPhase', 'BeforeHeadPhase', 'processStartTag', {'name': 'title', 'type': 'StartTag'}),
                ('dataState', 'InHeadPhase', 'InHeadPhase', 'processStartTag', {'name': 'title', 'type': 'StartTag'}),
                ('rcdataState', 'TextPhase', 'TextPhase', 'processCharacters', {'type': 'Characters'}),
                ('dataState', 'TextPhase', 'TextPhase', 'processEndTag', {'name': 'title', 'type': 'EndTag'}),
                ('dataState', 'InHeadPhase', 'InHeadPhase', 'processStartTag', {'name': 'p', 'type': 'StartTag'}),
                ('dataState', 'AfterHeadPhase', 'AfterHeadPhase', 'processStartTag', {'name': 'p', 'type': 'StartTag'}),
                ('dataState', 'InBodyPhase', 'InBodyPhase', 'processStartTag', {'name': 'p', 'type': 'StartTag'}),
                ('dataState', 'InBodyPhase', 'InBodyPhase', 'processCharacters', {'type': 'Characters'}),
                ('dataState', 'InBodyPhase', 'InBodyPhase', 'processStartTag', {'name': 'script', 'type': 'StartTag'}),
                ('dataState', 'InBodyPhase', 'InHeadPhase', 'processStartTag', {'name': 'script', 'type': 'StartTag'}),
                ('scriptDataState', 'TextPhase', 'TextPhase', 'processCharacters', {'type': 'Characters'}),
                ('dataState', 'TextPhase', 'TextPhase', 'processEndTag', {'name': 'script', 'type': 'EndTag'}),
                ('dataState', 'InBodyPhase', 'InBodyPhase', 'processCharacters', {'type': 'Characters'}),
                ('dataState', 'InBodyPhase', 'InBodyPhase', 'processEndTag', {'name': 'p', 'type': 'EndTag'}),
                ('dataState', 'InBodyPhase', 'InBodyPhase', 'processCharacters', {'type': 'Characters'})]
    if PY2:
        for i, log in enumerate(expected):
            log = [x.encode("ascii") if isinstance(x, text_type) else x for x in log]
            expected[i] = tuple(log)
    assert parser.log == expected
 def test_no_duplicate_clone():
    frag = parseFragment("<b><em><foo><foob><fooc><aside></b></em>")
    assert len(frag) == 2
 def test_self_closing_col():
    parser = HTMLParser()
    parser.parseFragment('<table><colgroup><col /></colgroup></table>')
    assert not parser.errors
--- a/lib/html5lib/tests/test_sanitizer.py
+++ b/lib/html5lib/tests/test_sanitizer.py
@ -1,133 +0,0 @@
 from __future__ import absolute_import, division, unicode_literals
 import pytest
 from html5lib import constants, parseFragment, serialize
 from html5lib.filters import sanitizer
 def sanitize_html(stream):
    parsed = parseFragment(stream)
    with pytest.deprecated_call():
        serialized = serialize(parsed,
                               sanitize=True,
                               omit_optional_tags=False,
                               use_trailing_solidus=True,
                               space_before_trailing_solidus=False,
                               quote_attr_values="always",
                               quote_char='"',
                               alphabetical_attributes=True)
    return serialized
 def test_should_handle_astral_plane_characters():
    sanitized = sanitize_html("<p>&#x1d4b5; &#x1d538;</p>")
    expected = '<p>\U0001d4b5 \U0001d538</p>'
    assert expected == sanitized
 def test_should_allow_relative_uris():
    sanitized = sanitize_html('<p><a href="/example.com"></a></p>')
    expected = '<p><a href="/example.com"></a></p>'
    assert expected == sanitized
 def test_invalid_data_uri():
    sanitized = sanitize_html('<audio controls="" src="data:foobar"></audio>')
    expected = '<audio controls></audio>'
    assert expected == sanitized
 def test_invalid_ipv6_url():
    sanitized = sanitize_html('<a href="h://]">')
    expected = "<a></a>"
    assert expected == sanitized
 def test_data_uri_disallowed_type():
    sanitized = sanitize_html('<audio controls="" src="data:text/html,<html>"></audio>')
    expected = "<audio controls></audio>"
    assert expected == sanitized
 def param_sanitizer():
    for ns, tag_name in sanitizer.allowed_elements:
        if ns != constants.namespaces["html"]:
            continue
        if tag_name in ['caption', 'col', 'colgroup', 'optgroup', 'option', 'table', 'tbody', 'td',
                        'tfoot', 'th', 'thead', 'tr', 'select']:
            continue  # TODO
        if tag_name == 'image':
            yield ("test_should_allow_%s_tag" % tag_name,
                   "<img title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz",
                   "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
        elif tag_name == 'br':
            yield ("test_should_allow_%s_tag" % tag_name,
                   "<br title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz<br/>",
                   "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
        elif tag_name in constants.voidElements:
            yield ("test_should_allow_%s_tag" % tag_name,
                   "<%s title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz" % tag_name,
                   "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
        else:
            yield ("test_should_allow_%s_tag" % tag_name,
                   "<%s title=\"1\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</%s>" % (tag_name, tag_name),
                   "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
    for ns, attribute_name in sanitizer.allowed_attributes:
        if ns is not None:
            continue
        if attribute_name != attribute_name.lower():
            continue  # TODO
        if attribute_name == 'style':
            continue
        attribute_value = 'foo'
        if attribute_name in sanitizer.attr_val_is_uri:
            attribute_value = '%s://sub.domain.tld/path/object.ext' % sanitizer.allowed_protocols[0]
        yield ("test_should_allow_%s_attribute" % attribute_name,
               "<p %s=\"%s\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>" % (attribute_name, attribute_value),
               "<p %s='%s'>foo <bad>bar</bad> baz</p>" % (attribute_name, attribute_value))
    for protocol in sanitizer.allowed_protocols:
        rest_of_uri = '//sub.domain.tld/path/object.ext'
        if protocol == 'data':
            rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ='
        yield ("test_should_allow_uppercase_%s_uris" % protocol,
               "<img src=\"%s:%s\">foo</a>" % (protocol, rest_of_uri),
               """<img src="%s:%s">foo</a>""" % (protocol, rest_of_uri))
    for protocol in sanitizer.allowed_protocols:
        rest_of_uri = '//sub.domain.tld/path/object.ext'
        if protocol == 'data':
            rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ='
        protocol = protocol.upper()
        yield ("test_should_allow_uppercase_%s_uris" % protocol,
               "<img src=\"%s:%s\">foo</a>" % (protocol, rest_of_uri),
               """<img src="%s:%s">foo</a>""" % (protocol, rest_of_uri))
@pytest.mark.parametrize("expected, input",
                         (pytest.param(expected, input, id=id)
                          for id, expected, input in param_sanitizer()))
 def test_sanitizer(expected, input):
    parsed = parseFragment(expected)
    expected = serialize(parsed,
                         omit_optional_tags=False,
                         use_trailing_solidus=True,
                         space_before_trailing_solidus=False,
                         quote_attr_values="always",
                         quote_char='"',
                         alphabetical_attributes=True)
    assert expected == sanitize_html(input)
 def test_lowercase_color_codes_in_style():
    sanitized = sanitize_html("<p style=\"border: 1px solid #a2a2a2;\"></p>")
    expected = '<p style=\"border: 1px solid #a2a2a2;\"></p>'
    assert expected == sanitized
 def test_uppercase_color_codes_in_style():
    sanitized = sanitize_html("<p style=\"border: 1px solid #A2A2A2;\"></p>")
    expected = '<p style=\"border: 1px solid #A2A2A2;\"></p>'
    assert expected == sanitized
--- a/lib/html5lib/tests/test_serializer.py
+++ b/lib/html5lib/tests/test_serializer.py
@ -1,226 +0,0 @@
 from __future__ import absolute_import, division, unicode_literals
 import os
 import json
 import pytest
 from .support import get_data_files
 from html5lib import constants
 from html5lib.filters.lint import Filter as Lint
 from html5lib.serializer import HTMLSerializer, serialize
 from html5lib.treewalkers.base import TreeWalker
 # pylint:disable=wrong-import-position
 optionals_loaded = []
 try:
    from lxml import etree
    optionals_loaded.append("lxml")
 except ImportError:
    pass
 # pylint:enable=wrong-import-position
 default_namespace = constants.namespaces["html"]
 class JsonWalker(TreeWalker):
    def __iter__(self):
        for token in self.tree:
            type = token[0]
            if type == "StartTag":
                if len(token) == 4:
                    namespace, name, attrib = token[1:4]
                else:
                    namespace = default_namespace
                    name, attrib = token[1:3]
                yield self.startTag(namespace, name, self._convertAttrib(attrib))
            elif type == "EndTag":
                if len(token) == 3:
                    namespace, name = token[1:3]
                else:
                    namespace = default_namespace
                    name = token[1]
                yield self.endTag(namespace, name)
            elif type == "EmptyTag":
                if len(token) == 4:
                    namespace, name, attrib = token[1:]
                else:
                    namespace = default_namespace
                    name, attrib = token[1:]
                for token in self.emptyTag(namespace, name, self._convertAttrib(attrib)):
                    yield token
            elif type == "Comment":
                yield self.comment(token[1])
            elif type in ("Characters", "SpaceCharacters"):
                for token in self.text(token[1]):
                    yield token
            elif type == "Doctype":
                if len(token) == 4:
                    yield self.doctype(token[1], token[2], token[3])
                elif len(token) == 3:
                    yield self.doctype(token[1], token[2])
                else:
                    yield self.doctype(token[1])
            else:
                raise ValueError("Unknown token type: " + type)
    def _convertAttrib(self, attribs):
        """html5lib tree-walkers use a dict of (namespace, name): value for
        attributes, but JSON cannot represent this. Convert from the format
        in the serializer tests (a list of dicts with "namespace", "name",
        and "value" as keys) to html5lib's tree-walker format."""
        attrs = {}
        for attrib in attribs:
            name = (attrib["namespace"], attrib["name"])
            assert(name not in attrs)
            attrs[name] = attrib["value"]
        return attrs
 def serialize_html(input, options):
    options = {str(k): v for k, v in options.items()}
    encoding = options.get("encoding", None)
    if "encoding" in options:
        del options["encoding"]
    stream = Lint(JsonWalker(input), False)
    serializer = HTMLSerializer(alphabetical_attributes=True, **options)
    return serializer.render(stream, encoding)
 def throwsWithLatin1(input):
    with pytest.raises(UnicodeEncodeError):
        serialize_html(input, {"encoding": "iso-8859-1"})
 def testDoctypeName():
    throwsWithLatin1([["Doctype", "\u0101"]])
 def testDoctypePublicId():
    throwsWithLatin1([["Doctype", "potato", "\u0101"]])
 def testDoctypeSystemId():
    throwsWithLatin1([["Doctype", "potato", "potato", "\u0101"]])
 def testCdataCharacters():
    test_serializer([["StartTag", "http://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\u0101"]],
                    ["<style>&amacr;"], {"encoding": "iso-8859-1"})
 def testCharacters():
    test_serializer([["Characters", "\u0101"]],
                    ["&amacr;"], {"encoding": "iso-8859-1"})
 def testStartTagName():
    throwsWithLatin1([["StartTag", "http://www.w3.org/1999/xhtml", "\u0101", []]])
 def testAttributeName():
    throwsWithLatin1([["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": None, "name": "\u0101", "value": "potato"}]]])
 def testAttributeValue():
    test_serializer([["StartTag", "http://www.w3.org/1999/xhtml", "span",
                      [{"namespace": None, "name": "potato", "value": "\u0101"}]]],
                    ["<span potato=&amacr;>"], {"encoding": "iso-8859-1"})
 def testEndTagName():
    throwsWithLatin1([["EndTag", "http://www.w3.org/1999/xhtml", "\u0101"]])
 def testComment():
    throwsWithLatin1([["Comment", "\u0101"]])
 def testThrowsUnknownOption():
    with pytest.raises(TypeError):
        HTMLSerializer(foobar=None)
@pytest.mark.parametrize("c", list("\t\n\u000C\x20\r\"'=<>`"))
 def testSpecQuoteAttribute(c):
    input_ = [["StartTag", "http://www.w3.org/1999/xhtml", "span",
               [{"namespace": None, "name": "foo", "value": c}]]]
    if c == '"':
        output_ = ["<span foo='%s'>" % c]
    else:
        output_ = ['<span foo="%s">' % c]
    options_ = {"quote_attr_values": "spec"}
    test_serializer(input_, output_, options_)
@pytest.mark.parametrize("c", list("\t\n\u000C\x20\r\"'=<>`"
                                   "\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n"
                                   "\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15"
                                   "\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
                                   "\x20\x2f\x60\xa0\u1680\u180e\u180f\u2000"
                                   "\u2001\u2002\u2003\u2004\u2005\u2006\u2007"
                                   "\u2008\u2009\u200a\u2028\u2029\u202f\u205f"
                                   "\u3000"))
 def testLegacyQuoteAttribute(c):
    input_ = [["StartTag", "http://www.w3.org/1999/xhtml", "span",
               [{"namespace": None, "name": "foo", "value": c}]]]
    if c == '"':
        output_ = ["<span foo='%s'>" % c]
    else:
        output_ = ['<span foo="%s">' % c]
    options_ = {"quote_attr_values": "legacy"}
    test_serializer(input_, output_, options_)
@pytest.fixture
 def lxml_parser():
    return etree.XMLParser(resolve_entities=False)
@pytest.mark.skipif("lxml" not in optionals_loaded, reason="lxml not importable")
 def testEntityReplacement(lxml_parser):
    doc = '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>'
    tree = etree.fromstring(doc, parser=lxml_parser).getroottree()
    result = serialize(tree, tree="lxml", omit_optional_tags=False)
    assert result == '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>\u03B2</html>'
@pytest.mark.skipif("lxml" not in optionals_loaded, reason="lxml not importable")
 def testEntityXML(lxml_parser):
    doc = '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&gt;</html>'
    tree = etree.fromstring(doc, parser=lxml_parser).getroottree()
    result = serialize(tree, tree="lxml", omit_optional_tags=False)
    assert result == '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&gt;</html>'
@pytest.mark.skipif("lxml" not in optionals_loaded, reason="lxml not importable")
 def testEntityNoResolve(lxml_parser):
    doc = '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>'
    tree = etree.fromstring(doc, parser=lxml_parser).getroottree()
    result = serialize(tree, tree="lxml", omit_optional_tags=False,
                                  resolve_entities=False)
    assert result == '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>'
 def param_serializer():
    for filename in get_data_files('serializer-testdata', '*.test', os.path.dirname(__file__)):
        with open(filename) as fp:
            tests = json.load(fp)
            for test in tests['tests']:
                yield test["input"], test["expected"], test.get("options", {})
@pytest.mark.parametrize("input, expected, options", param_serializer())
 def test_serializer(input, expected, options):
    encoding = options.get("encoding", None)
    if encoding:
        expected = list(map(lambda x: x.encode(encoding), expected))
    result = serialize_html(input, options)
    if len(expected) == 1:
        assert expected[0] == result, "Expected:\n%s\nActual:\n%s\nOptions:\n%s" % (expected[0], result, str(options))
    elif result not in expected:
        assert False, "Expected: %s, Received: %s" % (expected, result)
--- a/lib/html5lib/tests/test_stream.py
+++ b/lib/html5lib/tests/test_stream.py
@ -1,325 +0,0 @@
 from __future__ import absolute_import, division, unicode_literals
 from . import support  # noqa
 import codecs
 import sys
 from io import BytesIO, StringIO
 import pytest
 import six
 from six.moves import http_client, urllib
 from html5lib._inputstream import (BufferedStream, HTMLInputStream,
                                   HTMLUnicodeInputStream, HTMLBinaryInputStream)
 from html5lib._utils import supports_lone_surrogates
 def test_basic():
    s = b"abc"
    fp = BufferedStream(BytesIO(s))
    read = fp.read(10)
    assert read == s
 def test_read_length():
    fp = BufferedStream(BytesIO(b"abcdef"))
    read1 = fp.read(1)
    assert read1 == b"a"
    read2 = fp.read(2)
    assert read2 == b"bc"
    read3 = fp.read(3)
    assert read3 == b"def"
    read4 = fp.read(4)
    assert read4 == b""
 def test_tell():
    fp = BufferedStream(BytesIO(b"abcdef"))
    read1 = fp.read(1)
    assert read1 == b"a"
    assert fp.tell() == 1
    read2 = fp.read(2)
    assert read2 == b"bc"
    assert fp.tell() == 3
    read3 = fp.read(3)
    assert read3 == b"def"
    assert fp.tell() == 6
    read4 = fp.read(4)
    assert read4 == b""
    assert fp.tell() == 6
 def test_seek():
    fp = BufferedStream(BytesIO(b"abcdef"))
    read1 = fp.read(1)
    assert read1 == b"a"
    fp.seek(0)
    read2 = fp.read(1)
    assert read2 == b"a"
    read3 = fp.read(2)
    assert read3 == b"bc"
    fp.seek(2)
    read4 = fp.read(2)
    assert read4 == b"cd"
    fp.seek(4)
    read5 = fp.read(2)
    assert read5 == b"ef"
 def test_seek_tell():
    fp = BufferedStream(BytesIO(b"abcdef"))
    read1 = fp.read(1)
    assert read1 == b"a"
    assert fp.tell() == 1
    fp.seek(0)
    read2 = fp.read(1)
    assert read2 == b"a"
    assert fp.tell() == 1
    read3 = fp.read(2)
    assert read3 == b"bc"
    assert fp.tell() == 3
    fp.seek(2)
    read4 = fp.read(2)
    assert read4 == b"cd"
    assert fp.tell() == 4
    fp.seek(4)
    read5 = fp.read(2)
    assert read5 == b"ef"
    assert fp.tell() == 6
 class HTMLUnicodeInputStreamShortChunk(HTMLUnicodeInputStream):
    _defaultChunkSize = 2
 class HTMLBinaryInputStreamShortChunk(HTMLBinaryInputStream):
    _defaultChunkSize = 2
 def test_char_ascii():
    stream = HTMLInputStream(b"'", override_encoding='ascii')
    assert stream.charEncoding[0].name == 'windows-1252'
    assert stream.char() == "'"
 def test_char_utf8():
    stream = HTMLInputStream('\u2018'.encode('utf-8'), override_encoding='utf-8')
    assert stream.charEncoding[0].name == 'utf-8'
    assert stream.char() == '\u2018'
 def test_char_win1252():
    stream = HTMLInputStream("\xa9\xf1\u2019".encode('windows-1252'))
    assert stream.charEncoding[0].name == 'windows-1252'
    assert stream.char() == "\xa9"
    assert stream.char() == "\xf1"
    assert stream.char() == "\u2019"
 def test_bom():
    stream = HTMLInputStream(codecs.BOM_UTF8 + b"'")
    assert stream.charEncoding[0].name == 'utf-8'
    assert stream.char() == "'"
 def test_utf_16():
    stream = HTMLInputStream((' ' * 1025).encode('utf-16'))
    assert stream.charEncoding[0].name in ['utf-16le', 'utf-16be']
    assert len(stream.charsUntil(' ', True)) == 1025
 def test_newlines():
    stream = HTMLBinaryInputStreamShortChunk(codecs.BOM_UTF8 + b"a\nbb\r\nccc\rddddxe")
    assert stream.position() == (1, 0)
    assert stream.charsUntil('c') == "a\nbb\n"
    assert stream.position() == (3, 0)
    assert stream.charsUntil('x') == "ccc\ndddd"
    assert stream.position() == (4, 4)
    assert stream.charsUntil('e') == "x"
    assert stream.position() == (4, 5)
 def test_newlines2():
    size = HTMLUnicodeInputStream._defaultChunkSize
    stream = HTMLInputStream("\r" * size + "\n")
    assert stream.charsUntil('x') == "\n" * size
 def test_position():
    stream = HTMLBinaryInputStreamShortChunk(codecs.BOM_UTF8 + b"a\nbb\nccc\nddde\nf\ngh")
    assert stream.position() == (1, 0)
    assert stream.charsUntil('c') == "a\nbb\n"
    assert stream.position() == (3, 0)
    stream.unget("\n")
    assert stream.position() == (2, 2)
    assert stream.charsUntil('c') == "\n"
    assert stream.position() == (3, 0)
    stream.unget("\n")
    assert stream.position() == (2, 2)
    assert stream.char() == "\n"
    assert stream.position() == (3, 0)
    assert stream.charsUntil('e') == "ccc\nddd"
    assert stream.position() == (4, 3)
    assert stream.charsUntil('h') == "e\nf\ng"
    assert stream.position() == (6, 1)
 def test_position2():
    stream = HTMLUnicodeInputStreamShortChunk("abc\nd")
    assert stream.position() == (1, 0)
    assert stream.char() == "a"
    assert stream.position() == (1, 1)
    assert stream.char() == "b"
    assert stream.position() == (1, 2)
    assert stream.char() == "c"
    assert stream.position() == (1, 3)
    assert stream.char() == "\n"
    assert stream.position() == (2, 0)
    assert stream.char() == "d"
    assert stream.position() == (2, 1)
 def test_python_issue_20007():
    """
    Make sure we have a work-around for Python bug #20007
    http://bugs.python.org/issue20007
    """
    class FakeSocket(object):
        def makefile(self, _mode, _bufsize=None):
            # pylint:disable=unused-argument
            return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText")
    source = http_client.HTTPResponse(FakeSocket())
    source.begin()
    stream = HTMLInputStream(source)
    assert stream.charsUntil(" ") == "Text"
 def test_python_issue_20007_b():
    """
    Make sure we have a work-around for Python bug #20007
    http://bugs.python.org/issue20007
    """
    if six.PY2:
        return
    class FakeSocket(object):
        def makefile(self, _mode, _bufsize=None):
            # pylint:disable=unused-argument
            return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText")
    source = http_client.HTTPResponse(FakeSocket())
    source.begin()
    wrapped = urllib.response.addinfourl(source, source.msg, "http://example.com")
    stream = HTMLInputStream(wrapped)
    assert stream.charsUntil(" ") == "Text"
@pytest.mark.parametrize("inp,num",
                         [("\u0000", 0),
                          ("\u0001", 1),
                          ("\u0008", 1),
                          ("\u0009", 0),
                          ("\u000A", 0),
                          ("\u000B", 1),
                          ("\u000C", 0),
                          ("\u000D", 0),
                          ("\u000E", 1),
                          ("\u001F", 1),
                          ("\u0020", 0),
                          ("\u007E", 0),
                          ("\u007F", 1),
                          ("\u009F", 1),
                          ("\u00A0", 0),
                          ("\uFDCF", 0),
                          ("\uFDD0", 1),
                          ("\uFDEF", 1),
                          ("\uFDF0", 0),
                          ("\uFFFD", 0),
                          ("\uFFFE", 1),
                          ("\uFFFF", 1),
                          ("\U0001FFFD", 0),
                          ("\U0001FFFE", 1),
                          ("\U0001FFFF", 1),
                          ("\U0002FFFD", 0),
                          ("\U0002FFFE", 1),
                          ("\U0002FFFF", 1),
                          ("\U0003FFFD", 0),
                          ("\U0003FFFE", 1),
                          ("\U0003FFFF", 1),
                          ("\U0004FFFD", 0),
                          ("\U0004FFFE", 1),
                          ("\U0004FFFF", 1),
                          ("\U0005FFFD", 0),
                          ("\U0005FFFE", 1),
                          ("\U0005FFFF", 1),
                          ("\U0006FFFD", 0),
                          ("\U0006FFFE", 1),
                          ("\U0006FFFF", 1),
                          ("\U0007FFFD", 0),
                          ("\U0007FFFE", 1),
                          ("\U0007FFFF", 1),
                          ("\U0008FFFD", 0),
                          ("\U0008FFFE", 1),
                          ("\U0008FFFF", 1),
                          ("\U0009FFFD", 0),
                          ("\U0009FFFE", 1),
                          ("\U0009FFFF", 1),
                          ("\U000AFFFD", 0),
                          ("\U000AFFFE", 1),
                          ("\U000AFFFF", 1),
                          ("\U000BFFFD", 0),
                          ("\U000BFFFE", 1),
                          ("\U000BFFFF", 1),
                          ("\U000CFFFD", 0),
                          ("\U000CFFFE", 1),
                          ("\U000CFFFF", 1),
                          ("\U000DFFFD", 0),
                          ("\U000DFFFE", 1),
                          ("\U000DFFFF", 1),
                          ("\U000EFFFD", 0),
                          ("\U000EFFFE", 1),
                          ("\U000EFFFF", 1),
                          ("\U000FFFFD", 0),
                          ("\U000FFFFE", 1),
                          ("\U000FFFFF", 1),
                          ("\U0010FFFD", 0),
                          ("\U0010FFFE", 1),
                          ("\U0010FFFF", 1),
                          ("\x01\x01\x01", 3),
                          ("a\x01a\x01a\x01a", 3)])
 def test_invalid_codepoints(inp, num):
    stream = HTMLUnicodeInputStream(StringIO(inp))
    for _i in range(len(inp)):
        stream.char()
    assert len(stream.errors) == num
@pytest.mark.skipif(not supports_lone_surrogates, reason="doesn't support lone surrogates")
@pytest.mark.parametrize("inp,num",
                         [("'\\uD7FF'", 0),
                          ("'\\uD800'", 1),
                          ("'\\uDBFF'", 1),
                          ("'\\uDC00'", 1),
                          ("'\\uDFFF'", 1),
                          ("'\\uE000'", 0),
                          ("'\\uD800\\uD800\\uD800'", 3),
                          ("'a\\uD800a\\uD800a\\uD800a'", 3),
                          ("'\\uDFFF\\uDBFF'", 2),
                          pytest.param(
                              "'\\uDBFF\\uDFFF'", 2,
                              marks=pytest.mark.skipif(
                                  sys.maxunicode == 0xFFFF,
                                  reason="narrow Python"))])
 def test_invalid_codepoints_surrogates(inp, num):
    inp = eval(inp)  # pylint:disable=eval-used
    fp = StringIO(inp)
    if ord(max(fp.read())) > 0xFFFF:
        pytest.skip("StringIO altered string")
    fp.seek(0)
    stream = HTMLUnicodeInputStream(fp)
    for _i in range(len(inp)):
        stream.char()
    assert len(stream.errors) == num
--- a/lib/html5lib/tests/test_tokenizer2.py
+++ b/lib/html5lib/tests/test_tokenizer2.py
@ -1,66 +0,0 @@
 from __future__ import absolute_import, division, unicode_literals
 import io
 from six import unichr, text_type
 from html5lib._tokenizer import HTMLTokenizer
 from html5lib.constants import tokenTypes
 def ignore_parse_errors(toks):
    for tok in toks:
        if tok['type'] != tokenTypes['ParseError']:
            yield tok
 def test_maintain_attribute_order():
    # generate loads to maximize the chance a hash-based mutation will occur
    attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
    stream = io.StringIO("<span " + " ".join("%s='%s'" % (x, i) for x, i in attrs) + ">")
    toks = HTMLTokenizer(stream)
    out = list(ignore_parse_errors(toks))
    assert len(out) == 1
    assert out[0]['type'] == tokenTypes['StartTag']
    attrs_tok = out[0]['data']
    assert len(attrs_tok) == len(attrs)
    for (in_name, in_value), (out_name, out_value) in zip(attrs, attrs_tok.items()):
        assert in_name == out_name
        assert in_value == out_value
 def test_duplicate_attribute():
    stream = io.StringIO("<span a=1 a=2 a=3>")
    toks = HTMLTokenizer(stream)
    out = list(ignore_parse_errors(toks))
    assert len(out) == 1
    assert out[0]['type'] == tokenTypes['StartTag']
    attrs_tok = out[0]['data']
    assert len(attrs_tok) == 1
    assert list(attrs_tok.items()) == [('a', '1')]
 def test_maintain_duplicate_attribute_order():
    # generate loads to maximize the chance a hash-based mutation will occur
    attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
    stream = io.StringIO("<span " + " ".join("%s='%s'" % (x, i) for x, i in attrs) + " a=100>")
    toks = HTMLTokenizer(stream)
    out = list(ignore_parse_errors(toks))
    assert len(out) == 1
    assert out[0]['type'] == tokenTypes['StartTag']
    attrs_tok = out[0]['data']
    assert len(attrs_tok) == len(attrs)
    for (in_name, in_value), (out_name, out_value) in zip(attrs, attrs_tok.items()):
        assert in_name == out_name
        assert in_value == out_value
--- a/lib/html5lib/tests/test_treeadapters.py
+++ b/lib/html5lib/tests/test_treeadapters.py
@ -1,40 +0,0 @@
 from __future__ import absolute_import, division, unicode_literals
 from . import support  # noqa
 import html5lib
 from html5lib.treeadapters import sax
 from html5lib.treewalkers import getTreeWalker
 def test_to_sax():
    handler = support.TracingSaxHandler()
    tree = html5lib.parse("""<html xml:lang="en">
        <title>Directory Listing</title>
        <a href="/"><b/></p>
    """, treebuilder="etree")
    walker = getTreeWalker("etree")
    sax.to_sax(walker(tree), handler)
    expected = [
        'startDocument',
        ('startElementNS', ('http://www.w3.org/1999/xhtml', 'html'),
            'html', {(None, 'xml:lang'): 'en'}),
        ('startElementNS', ('http://www.w3.org/1999/xhtml', 'head'), 'head', {}),
        ('startElementNS', ('http://www.w3.org/1999/xhtml', 'title'), 'title', {}),
        ('characters', 'Directory Listing'),
        ('endElementNS', ('http://www.w3.org/1999/xhtml', 'title'), 'title'),
        ('characters', '\n        '),
        ('endElementNS', ('http://www.w3.org/1999/xhtml', 'head'), 'head'),
        ('startElementNS', ('http://www.w3.org/1999/xhtml', 'body'), 'body', {}),
        ('startElementNS', ('http://www.w3.org/1999/xhtml', 'a'), 'a', {(None, 'href'): '/'}),
        ('startElementNS', ('http://www.w3.org/1999/xhtml', 'b'), 'b', {}),
        ('startElementNS', ('http://www.w3.org/1999/xhtml', 'p'), 'p', {}),
        ('endElementNS', ('http://www.w3.org/1999/xhtml', 'p'), 'p'),
        ('characters', '\n    '),
        ('endElementNS', ('http://www.w3.org/1999/xhtml', 'b'), 'b'),
        ('endElementNS', ('http://www.w3.org/1999/xhtml', 'a'), 'a'),
        ('endElementNS', ('http://www.w3.org/1999/xhtml', 'body'), 'body'),
        ('endElementNS', ('http://www.w3.org/1999/xhtml', 'html'), 'html'),
        'endDocument',
    ]
    assert expected == handler.visited
--- a/lib/html5lib/tests/test_treewalkers.py
+++ b/lib/html5lib/tests/test_treewalkers.py
@ -1,205 +0,0 @@
 from __future__ import absolute_import, division, unicode_literals
 import itertools
 import sys
 from six import unichr, text_type
 import pytest
 try:
    import lxml.etree
 except ImportError:
    pass
 from .support import treeTypes
 from html5lib import html5parser, treewalkers
 from html5lib.filters.lint import Filter as Lint
 import re
 attrlist = re.compile(r"^(\s+)\w+=.*(\n\1\w+=.*)+", re.M)
 def sortattrs(x):
    lines = x.group(0).split("\n")
    lines.sort()
    return "\n".join(lines)
 def test_all_tokens():
    expected = [
        {'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'html'},
        {'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'head'},
        {'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'head'},
        {'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'body'},
        {'data': 'a', 'type': 'Characters'},
        {'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'div'},
        {'data': 'b', 'type': 'Characters'},
        {'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'div'},
        {'data': 'c', 'type': 'Characters'},
        {'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'body'},
        {'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'html'}
    ]
    for _, treeCls in sorted(treeTypes.items()):
        if treeCls is None:
            continue
        p = html5parser.HTMLParser(tree=treeCls["builder"])
        document = p.parse("<html><head></head><body>a<div>b</div>c</body></html>")
        document = treeCls.get("adapter", lambda x: x)(document)
        output = Lint(treeCls["walker"](document))
        for expectedToken, outputToken in zip(expected, output):
            assert expectedToken == outputToken
 def set_attribute_on_first_child(docfrag, name, value, treeName):
    """naively sets an attribute on the first child of the document
    fragment passed in"""
    setter = {'ElementTree': lambda d: d[0].set,
              'DOM': lambda d: d.firstChild.setAttribute}
    setter['cElementTree'] = setter['ElementTree']
    try:
        setter.get(treeName, setter['DOM'])(docfrag)(name, value)
    except AttributeError:
        setter['ElementTree'](docfrag)(name, value)
 def param_treewalker_six_mix():
    """Str/Unicode mix. If str attrs added to tree"""
    # On Python 2.x string literals are of type str. Unless, like this
    # file, the programmer imports unicode_literals from __future__.
    # In that case, string literals become objects of type unicode.
    # This test simulates a Py2 user, modifying attributes on a document
    # fragment but not using the u'' syntax nor importing unicode_literals
    sm_tests = [
        ('<a href="http://example.com">Example</a>',
         [(str('class'), str('test123'))],
         '<a>\n  class="test123"\n  href="http://example.com"\n  "Example"'),
        ('<link href="http://example.com/cow">',
         [(str('rel'), str('alternate'))],
         '<link>\n  href="http://example.com/cow"\n  rel="alternate"\n  "Example"')
    ]
    for tree in sorted(treeTypes.items()):
        for intext, attrs, expected in sm_tests:
            yield intext, expected, attrs, tree
@pytest.mark.parametrize("intext, expected, attrs_to_add, tree", param_treewalker_six_mix())
 def test_treewalker_six_mix(intext, expected, attrs_to_add, tree):
    """tests what happens when we add attributes to the intext"""
    treeName, treeClass = tree
    if treeClass is None:
        pytest.skip("Treebuilder not loaded")
    parser = html5parser.HTMLParser(tree=treeClass["builder"])
    document = parser.parseFragment(intext)
    for nom, val in attrs_to_add:
        set_attribute_on_first_child(document, nom, val, treeName)
    document = treeClass.get("adapter", lambda x: x)(document)
    output = treewalkers.pprint(treeClass["walker"](document))
    output = attrlist.sub(sortattrs, output)
    if output not in expected:
        raise AssertionError("TreewalkerEditTest: %s\nExpected:\n%s\nReceived:\n%s" % (treeName, expected, output))
@pytest.mark.parametrize("tree,char", itertools.product(sorted(treeTypes.items()), ["x", "\u1234"]))
 def test_fragment_single_char(tree, char):
    expected = [
        {'data': char, 'type': 'Characters'}
    ]
    treeName, treeClass = tree
    if treeClass is None:
        pytest.skip("Treebuilder not loaded")
    parser = html5parser.HTMLParser(tree=treeClass["builder"])
    document = parser.parseFragment(char)
    document = treeClass.get("adapter", lambda x: x)(document)
    output = Lint(treeClass["walker"](document))
    assert list(output) == expected
@pytest.mark.skipif(treeTypes["lxml"] is None, reason="lxml not importable")
 def test_lxml_xml():
    expected = [
        {'data': {}, 'name': 'div', 'namespace': None, 'type': 'StartTag'},
        {'data': {}, 'name': 'div', 'namespace': None, 'type': 'StartTag'},
        {'name': 'div', 'namespace': None, 'type': 'EndTag'},
        {'name': 'div', 'namespace': None, 'type': 'EndTag'}
    ]
    lxmltree = lxml.etree.fromstring('<div><div></div></div>')
    walker = treewalkers.getTreeWalker('lxml')
    output = Lint(walker(lxmltree))
    assert list(output) == expected
@pytest.mark.parametrize("treeName",
                         [pytest.param(treeName, marks=[getattr(pytest.mark, treeName),
                                                        pytest.mark.skipif(
                                                            treeName != "lxml" or
                                                            sys.version_info < (3, 7), reason="dict order undef")])
                          for treeName in sorted(treeTypes.keys())])
 def test_maintain_attribute_order(treeName):
    treeAPIs = treeTypes[treeName]
    if treeAPIs is None:
        pytest.skip("Treebuilder not loaded")
    # generate loads to maximize the chance a hash-based mutation will occur
    attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
    data = "<span " + " ".join("%s='%s'" % (x, i) for x, i in attrs) + ">"
    parser = html5parser.HTMLParser(tree=treeAPIs["builder"])
    document = parser.parseFragment(data)
    document = treeAPIs.get("adapter", lambda x: x)(document)
    output = list(Lint(treeAPIs["walker"](document)))
    assert len(output) == 2
    assert output[0]['type'] == 'StartTag'
    assert output[1]['type'] == "EndTag"
    attrs_out = output[0]['data']
    assert len(attrs) == len(attrs_out)
    for (in_name, in_value), (out_name, out_value) in zip(attrs, attrs_out.items()):
        assert (None, in_name) == out_name
        assert in_value == out_value
@pytest.mark.parametrize("treeName",
                         [pytest.param(treeName, marks=[getattr(pytest.mark, treeName),
                                                        pytest.mark.skipif(
                                                            treeName != "lxml" or
                                                            sys.version_info < (3, 7), reason="dict order undef")])
                          for treeName in sorted(treeTypes.keys())])
 def test_maintain_attribute_order_adjusted(treeName):
    treeAPIs = treeTypes[treeName]
    if treeAPIs is None:
        pytest.skip("Treebuilder not loaded")
    # generate loads to maximize the chance a hash-based mutation will occur
    data = "<svg a=1 refx=2 b=3 xml:lang=4 c=5>"
    parser = html5parser.HTMLParser(tree=treeAPIs["builder"])
    document = parser.parseFragment(data)
    document = treeAPIs.get("adapter", lambda x: x)(document)
    output = list(Lint(treeAPIs["walker"](document)))
    assert len(output) == 2
    assert output[0]['type'] == 'StartTag'
    assert output[1]['type'] == "EndTag"
    attrs_out = output[0]['data']
    assert list(attrs_out.items()) == [((None, 'a'), '1'),
                                       ((None, 'refX'), '2'),
                                       ((None, 'b'), '3'),
                                       (('http://www.w3.org/XML/1998/namespace', 'lang'), '4'),
                                       ((None, 'c'), '5')]
--- a/lib/html5lib/tests/test_whitespace_filter.py
+++ b/lib/html5lib/tests/test_whitespace_filter.py
@ -1,125 +0,0 @@
 from __future__ import absolute_import, division, unicode_literals
 from html5lib.filters.whitespace import Filter
 from html5lib.constants import spaceCharacters
 spaceCharacters = "".join(spaceCharacters)
 def runTest(input, expected):
    output = list(Filter(input))
    errorMsg = "\n".join(["\n\nInput:", str(input),
                          "\nExpected:", str(expected),
                          "\nReceived:", str(output)])
    assert expected == output, errorMsg
 def runTestUnmodifiedOutput(input):
    runTest(input, input)
 def testPhrasingElements():
    runTestUnmodifiedOutput(
        [{"type": "Characters", "data": "This is a "},
         {"type": "StartTag", "name": "span", "data": []},
         {"type": "Characters", "data": "phrase"},
         {"type": "EndTag", "name": "span", "data": []},
         {"type": "SpaceCharacters", "data": " "},
         {"type": "Characters", "data": "with"},
         {"type": "SpaceCharacters", "data": " "},
         {"type": "StartTag", "name": "em", "data": []},
         {"type": "Characters", "data": "emphasised text"},
         {"type": "EndTag", "name": "em", "data": []},
         {"type": "Characters", "data": " and an "},
         {"type": "StartTag", "name": "img", "data": [["alt", "image"]]},
         {"type": "Characters", "data": "."}])
 def testLeadingWhitespace():
    runTest(
        [{"type": "StartTag", "name": "p", "data": []},
         {"type": "SpaceCharacters", "data": spaceCharacters},
         {"type": "Characters", "data": "foo"},
         {"type": "EndTag", "name": "p", "data": []}],
        [{"type": "StartTag", "name": "p", "data": []},
         {"type": "SpaceCharacters", "data": " "},
         {"type": "Characters", "data": "foo"},
         {"type": "EndTag", "name": "p", "data": []}])
 def testLeadingWhitespaceAsCharacters():
    runTest(
        [{"type": "StartTag", "name": "p", "data": []},
         {"type": "Characters", "data": spaceCharacters + "foo"},
         {"type": "EndTag", "name": "p", "data": []}],
        [{"type": "StartTag", "name": "p", "data": []},
         {"type": "Characters", "data": " foo"},
         {"type": "EndTag", "name": "p", "data": []}])
 def testTrailingWhitespace():
    runTest(
        [{"type": "StartTag", "name": "p", "data": []},
         {"type": "Characters", "data": "foo"},
         {"type": "SpaceCharacters", "data": spaceCharacters},
         {"type": "EndTag", "name": "p", "data": []}],
        [{"type": "StartTag", "name": "p", "data": []},
         {"type": "Characters", "data": "foo"},
         {"type": "SpaceCharacters", "data": " "},
         {"type": "EndTag", "name": "p", "data": []}])
 def testTrailingWhitespaceAsCharacters():
    runTest(
        [{"type": "StartTag", "name": "p", "data": []},
         {"type": "Characters", "data": "foo" + spaceCharacters},
         {"type": "EndTag", "name": "p", "data": []}],
        [{"type": "StartTag", "name": "p", "data": []},
         {"type": "Characters", "data": "foo "},
         {"type": "EndTag", "name": "p", "data": []}])
 def testWhitespace():
    runTest(
        [{"type": "StartTag", "name": "p", "data": []},
         {"type": "Characters", "data": "foo" + spaceCharacters + "bar"},
         {"type": "EndTag", "name": "p", "data": []}],
        [{"type": "StartTag", "name": "p", "data": []},
         {"type": "Characters", "data": "foo bar"},
         {"type": "EndTag", "name": "p", "data": []}])
 def testLeadingWhitespaceInPre():
    runTestUnmodifiedOutput(
        [{"type": "StartTag", "name": "pre", "data": []},
         {"type": "SpaceCharacters", "data": spaceCharacters},
         {"type": "Characters", "data": "foo"},
         {"type": "EndTag", "name": "pre", "data": []}])
 def testLeadingWhitespaceAsCharactersInPre():
    runTestUnmodifiedOutput(
        [{"type": "StartTag", "name": "pre", "data": []},
         {"type": "Characters", "data": spaceCharacters + "foo"},
         {"type": "EndTag", "name": "pre", "data": []}])
 def testTrailingWhitespaceInPre():
    runTestUnmodifiedOutput(
        [{"type": "StartTag", "name": "pre", "data": []},
         {"type": "Characters", "data": "foo"},
         {"type": "SpaceCharacters", "data": spaceCharacters},
         {"type": "EndTag", "name": "pre", "data": []}])
 def testTrailingWhitespaceAsCharactersInPre():
    runTestUnmodifiedOutput(
        [{"type": "StartTag", "name": "pre", "data": []},
         {"type": "Characters", "data": "foo" + spaceCharacters},
         {"type": "EndTag", "name": "pre", "data": []}])
 def testWhitespaceInPre():
    runTestUnmodifiedOutput(
        [{"type": "StartTag", "name": "pre", "data": []},
         {"type": "Characters", "data": "foo" + spaceCharacters + "bar"},
         {"type": "EndTag", "name": "pre", "data": []}])
--- a/lib/html5lib/tests/testdata/.gitattributes
+++ b/lib/html5lib/tests/testdata/.gitattributes
@ -1,2 +0,0 @@
 *.dat		-text diff
 *.test		-text diff
--- a/lib/html5lib/tests/testdata/AUTHORS.rst
+++ b/lib/html5lib/tests/testdata/AUTHORS.rst
@ -1,34 +0,0 @@
 Credits
 =======
 The ``html5lib`` test data is maintained by:
 - James Graham
 - Geoffrey Sneddon
 Contributors
 ------------
 - Adam Barth
 - Andi Sidwell
 - Anne van Kesteren
 - David Flanagan
 - Edward Z. Yang
 - Geoffrey Sneddon
 - Henri Sivonen
 - Ian Hickson
 - Jacques Distler
 - James Graham
 - Lachlan Hunt
 - lantis63
 - Mark Pilgrim
 - Mats Palmgren
 - Ms2ger
 - Nolan Waite
 - Philip Taylor
 - Rafael Weinstein
 - Ryan King
 - Sam Ruby
 - Simon Pieters
 - Thomas Broyer
--- a/lib/html5lib/tests/testdata/LICENSE
+++ b/lib/html5lib/tests/testdata/LICENSE
@ -1,21 +0,0 @@
 Copyright (c) 2006-2013 James Graham, Geoffrey Sneddon, and
 other contributors
 Permission is hereby granted, free of charge, to any person obtaining
 a copy of this software and associated documentation files (the
 "Software"), to deal in the Software without restriction, including
 without limitation the rights to use, copy, modify, merge, publish,
 distribute, sublicense, and/or sell copies of the Software, and to
 permit persons to whom the Software is furnished to do so, subject to
 the following conditions:
 The above copyright notice and this permission notice shall be
 included in all copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--- a/lib/html5lib/tests/testdata/encoding/chardet/test_big5.txt
+++ b/lib/html5lib/tests/testdata/encoding/chardet/test_big5.txt
@ -1,51 +0,0 @@
 老子《道德經》 第一~四十章
 老子道經
 第一章
 道可道，非常道。名可名，非常名。無，名天地之始﹔有，名萬物之母。
 故常無，欲以觀其妙；常有，欲以觀其徼。此兩者，同出而異名，同謂之
 玄。玄之又玄，眾妙之門。
 第二章
 天下皆知美之為美，斯惡矣﹔皆知善之為善，斯不善矣。故有無相生，難
 易相成，長短相形，高下相傾，音聲相和，前後相隨。是以聖人處「無為
 」之事，行「不言」之教。萬物作焉而不辭，生而不有，為而不恃，功成
 而弗居。夫唯弗居，是以不去。
 第三章
 不尚賢，使民不爭﹔不貴難得之貨，使民不為盜﹔不見可欲，使民心不亂
 。是以「聖人」之治，虛其心，實其腹，弱其志，強其骨。常使民無知無
 欲。使夫智者不敢為也。為「無為」，則無不治。
 第四章
 「道」沖，而用之或不盈。淵兮，似萬物之宗﹔挫其銳，解其紛，和其光
 ，同其塵﹔湛兮似或存。吾不知誰之子？象帝之先。
 第五章
 天地不仁，以萬物為芻狗﹔聖人不仁，以百姓為芻狗。天地之間，其猶橐
 蘥乎？虛而不屈，動而愈出。多言數窮，不如守中。
 第六章
 谷神不死，是謂玄牝。玄牝之門，是謂天地根。綿綿若存，用之不勤。
 第七章
 天長地久。天地所以能長且久者，以其不自生，故能長久。是以聖人後其
 身而身先，外其身而身存。非以其無私邪？故能成其私。
 第八章
 上善若水。水善利萬物而不爭。處眾人之所惡，故幾於道。居善地，心善
 淵，與善仁，言善信，政善治，事善能，動善時。夫唯不爭，故無尤。
 第九章
 持而盈之，不如其已﹔揣而銳之，不可長保。金玉滿堂，莫之能守﹔富貴
 而驕，自遺其咎。功遂身退，天之道。
--- a/lib/html5lib/tests/testdata/encoding/test-yahoo-jp.dat
+++ b/lib/html5lib/tests/testdata/encoding/test-yahoo-jp.dat
@ -1,10 +0,0 @@
 #data
 <html>
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=euc-jp">
 <!--京-->
 <title>Yahoo! JAPAN</title>
 <meta name="description" content="日本最大級のポータルサイト。検索、オークション、ニュース、メール、コミュニティ、ショッピング、など80以上のサービスを展開。あなたの生活をより豊かにする「ライフ・エンジン」を目指していきます。">
 <style type="text/css" media="all">
 #encoding
 euc-jp
--- a/lib/html5lib/tests/testdata/encoding/tests1.dat
+++ b/lib/html5lib/tests/testdata/encoding/tests1.dat
--- a/lib/html5lib/tests/testdata/encoding/tests2.dat
+++ b/lib/html5lib/tests/testdata/encoding/tests2.dat
@ -1,115 +0,0 @@
 #data
 <meta
 #encoding
 windows-1252
 #data
 <
 #encoding
 windows-1252
 #data
 <!
 #encoding
 windows-1252
 #data
 <meta charset = "
 #encoding
 windows-1252
 #data
 <meta charset=euc-jp
 #encoding
 windows-1252
 #data
 <meta <meta charset='euc-jp'>
 #encoding
 euc-jp
 #data
 <meta       charset    =     'euc-jp'>
 #encoding
 euc-jp
 #data
 <!-- -->
 <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
 #encoding
 utf-8
 #data
 <!-- -->
 <meta http-equiv="Content-Type" content="text/html; charset=utf
 #encoding
 windows-1252
 #data
 <meta http-equiv="Content-Type<meta charset="utf-8">
 #encoding
 windows-1252
 #data
 <meta http-equiv="Content-Type" content="text/html; charset='utf-8'">
 #encoding
 utf-8
 #data
 <meta http-equiv="Content-Type" content="text/html; charset='utf-8">
 #encoding
 windows-1252
 #data
 <meta                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 
 #encoding
 windows-1252
 #data
 <meta charset                    =                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            
 #encoding
 windows-1252
 #data
 <meta charset=                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            utf-8
 >
 #encoding
 utf-8
 #data
 <meta content = "text/html;
 #encoding
 windows-1252
 #data
 <meta charset="UTF-16">
 #encoding
 utf-8
 #data
 <meta charset="UTF-16LE">
 #encoding
 utf-8
 #data
 <meta charset="UTF-16BE">
 #encoding
 utf-8
 #data
 <html a=ñ>
 <meta charset="utf-8">
 #encoding
 utf-8
 #data
 <html ñ>
 <meta charset="utf-8">
 #encoding
 utf-8
 #data
 <html>ñ
 <meta charset="utf-8">
 #encoding
 utf-8
--- a/lib/html5lib/tests/testdata/serializer/core.test
+++ b/lib/html5lib/tests/testdata/serializer/core.test
@ -1,125 +0,0 @@
 {"tests": [
 {"description": "proper attribute value escaping",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "test \"with\" &quot;"}]]],
 "expected": ["<span title='test \"with\" &amp;quot;'>"]
 },
 {"description": "proper attribute value non-quoting",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo"}]]],
 "expected": ["<span title=foo>"],
 "xhtml":    ["<span title=\"foo\">"]
 },
 {"description": "proper attribute value non-quoting (with <)",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo<bar"}]]],
 "expected": ["<span title=foo<bar>"],
 "xhtml":    ["<span title=\"foo&lt;bar\">"]
 },
 {"description": "proper attribute value quoting (with =)",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo=bar"}]]],
 "expected": ["<span title=\"foo=bar\">"]
 },
 {"description": "proper attribute value quoting (with >)",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo>bar"}]]],
 "expected": ["<span title=\"foo>bar\">"]
 },
 {"description": "proper attribute value quoting (with \")",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\"bar"}]]],
 "expected": ["<span title='foo\"bar'>"]
 },
 {"description": "proper attribute value quoting (with ')",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo'bar"}]]],
 "expected": ["<span title=\"foo'bar\">"]
 },
 {"description": "proper attribute value quoting (with both \" and ')",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo'bar\"baz"}]]],
 "expected": ["<span title=\"foo'bar&quot;baz\">"]
 },
 {"description": "proper attribute value quoting (with space)",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo bar"}]]],
 "expected": ["<span title=\"foo bar\">"]
 },
 {"description": "proper attribute value quoting (with tab)",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\tbar"}]]],
 "expected": ["<span title=\"foo\tbar\">"]
 },
 {"description": "proper attribute value quoting (with LF)",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\nbar"}]]],
 "expected": ["<span title=\"foo\nbar\">"]
 },
 {"description": "proper attribute value quoting (with CR)",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\rbar"}]]],
 "expected": ["<span title=\"foo\rbar\">"]
 },
 {"description": "proper attribute value non-quoting (with linetab)",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\u000Bbar"}]]],
 "expected": ["<span title=foo\u000Bbar>"],
 "xhtml": ["<span title=\"foo\u000Bbar\">"]
 },
 {"description": "proper attribute value quoting (with form feed)",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\u000Cbar"}]]],
 "expected": ["<span title=\"foo\u000Cbar\">"]
 },
 {"description": "void element (as EmptyTag token)",
 "input": [["EmptyTag", "img", {}]],
 "expected": ["<img>"],
 "xhtml":    ["<img />"]
 },
 {"description": "void element (as StartTag token)",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "img", {}]],
 "expected": ["<img>"],
 "xhtml":    ["<img />"]
 },
 {"description": "doctype in error",
 "input": [["Doctype", "foo"]],
 "expected": ["<!DOCTYPE foo>"]
 },
 {"description": "character data",
 "options": {"encoding":"utf-8"},
 "input": [["Characters", "a<b>c&d"]],
 "expected": ["a&lt;b&gt;c&amp;d"]
 },
 {"description": "rcdata",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "script", {}], ["Characters", "a<b>c&d"]],
 "expected": ["<script>a<b>c&d"],
 "xhtml": ["<script>a&lt;b&gt;c&amp;d"]
 },
 {"description": "doctype",
 "input": [["Doctype", "HTML"]],
 "expected": ["<!DOCTYPE HTML>"]
 },
 {"description": "HTML 4.01 DOCTYPE",
 "input": [["Doctype", "HTML",  "-//W3C//DTD HTML 4.01//EN", "http://www.w3.org/TR/html4/strict.dtd"]],
 "expected": ["<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">"]
 },
 {"description": "HTML 4.01 DOCTYPE without system identifer",
 "input": [["Doctype", "HTML",  "-//W3C//DTD HTML 4.01//EN"]],
 "expected": ["<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\">"]
 },
 {"description": "IBM DOCTYPE without public identifer",
 "input": [["Doctype", "html",  "", "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"]],
 "expected": ["<!DOCTYPE html SYSTEM \"http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd\">"]
 }
 ]}
--- a/lib/html5lib/tests/testdata/serializer/injectmeta.test
+++ b/lib/html5lib/tests/testdata/serializer/injectmeta.test
@ -1,66 +0,0 @@
 {"tests": [
 {"description": "no encoding",
 "options": {"inject_meta_charset": true},
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
 "expected": [""],
 "xhtml": ["<head></head>"]
 },
 {"description": "empytag head",
 "options": {"inject_meta_charset": true, "encoding":"utf-8"},
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
 "expected": ["<meta charset=utf-8>"],
 "xhtml":    ["<head><meta charset=\"utf-8\" /></head>"]
 },
 {"description": "head w/title",
 "options": {"inject_meta_charset": true, "encoding":"utf-8"},
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["StartTag", "http://www.w3.org/1999/xhtml","title",{}], ["Characters", "foo"],["EndTag", "http://www.w3.org/1999/xhtml", "title"], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
 "expected": ["<meta charset=utf-8><title>foo</title>"],
 "xhtml":    ["<head><meta charset=\"utf-8\" /><title>foo</title></head>"]
 },
 {"description": "head w/meta-charset",
 "options": {"inject_meta_charset": true, "encoding":"utf-8"},
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "charset", "value": "ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
 "expected": ["<meta charset=utf-8>"],
 "xhtml":    ["<head><meta charset=\"utf-8\" /></head>"]
 },
 {"description": "head w/ two meta-charset",
 "options": {"inject_meta_charset": true, "encoding":"utf-8"},
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "charset", "value": "ascii"}]], ["EmptyTag","meta",[{"namespace": null, "name": "charset", "value": "ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
 "expected": ["<meta charset=utf-8><meta charset=utf-8>", "<head><meta charset=utf-8><meta charset=ascii>"],
 "xhtml": ["<head><meta charset=\"utf-8\" /><meta charset=\"utf-8\" /></head>", "<head><meta charset=\"utf-8\" /><meta charset=\"ascii\" /></head>"]
 },
 {"description": "head w/robots",
 "options": {"inject_meta_charset": true, "encoding":"utf-8"},
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "name", "value": "robots"},{"namespace": null, "name": "content", "value": "noindex"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
 "expected": ["<meta charset=utf-8><meta content=noindex name=robots>"],
 "xhtml":    ["<head><meta charset=\"utf-8\" /><meta content=\"noindex\" name=\"robots\" /></head>"]
 },
 {"description": "head w/robots & charset",
 "options": {"inject_meta_charset": true, "encoding":"utf-8"},
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "name", "value": "robots"},{"namespace": null, "name": "content", "value": "noindex"}]], ["EmptyTag","meta",[{"namespace": null, "name": "charset", "value": "ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
 "expected": ["<meta content=noindex name=robots><meta charset=utf-8>"],
 "xhtml":    ["<head><meta content=\"noindex\" name=\"robots\" /><meta charset=\"utf-8\" /></head>"]
 },
 {"description": "head w/ charset in http-equiv content-type",
 "options": {"inject_meta_charset": true, "encoding":"utf-8"},
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "http-equiv", "value": "content-type"}, {"namespace": null, "name": "content", "value": "text/html; charset=ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
 "expected": ["<meta content=\"text/html; charset=utf-8\" http-equiv=content-type>"],
 "xhtml":    ["<head><meta content=\"text/html; charset=utf-8\" http-equiv=\"content-type\" /></head>"]
 },
 {"description": "head w/robots & charset in http-equiv content-type",
 "options": {"inject_meta_charset": true, "encoding":"utf-8"},
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "name", "value": "robots"},{"namespace": null, "name": "content", "value": "noindex"}]], ["EmptyTag","meta",[{"namespace": null, "name": "http-equiv", "value": "content-type"}, {"namespace": null, "name": "content", "value": "text/html; charset=ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
 "expected": ["<meta content=noindex name=robots><meta content=\"text/html; charset=utf-8\" http-equiv=content-type>"],
 "xhtml": ["<head><meta content=\"noindex\" name=\"robots\" /><meta content=\"text/html; charset=utf-8\" http-equiv=\"content-type\" /></head>"]
 }
 ]}
--- a/lib/html5lib/tests/testdata/serializer/optionaltags.test
+++ b/lib/html5lib/tests/testdata/serializer/optionaltags.test
@ -1,965 +0,0 @@
 {"tests": [
 {"description": "html start-tag followed by text, with attributes",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", [{"namespace": null, "name": "lang", "value": "en"}]], ["Characters", "foo"]],
 "expected": ["<html lang=en>foo"]
 },
 {"description": "html start-tag followed by comment",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}], ["Comment", "foo"]],
 "expected": ["<html><!--foo-->"]
 },
 {"description": "html start-tag followed by space character",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}], ["Characters", " foo"]],
 "expected": ["<html> foo"]
 },
 {"description": "html start-tag followed by text",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}], ["Characters", "foo"]],
 "expected": ["foo"]
 },
 {"description": "html start-tag followed by start-tag",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
 "expected": ["<foo>"]
 },
 {"description": "html start-tag followed by end-tag",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
 "expected": ["</foo>"]
 },
 {"description": "html start-tag at EOF (shouldn't ever happen?!)",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}]],
 "expected": [""]
 },
 {"description": "html end-tag followed by comment",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"], ["Comment", "foo"]],
 "expected": ["</html><!--foo-->"]
 },
 {"description": "html end-tag followed by space character",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"], ["Characters", " foo"]],
 "expected": ["</html> foo"]
 },
 {"description": "html end-tag followed by text",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"], ["Characters", "foo"]],
 "expected": ["foo"]
 },
 {"description": "html end-tag followed by start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
 "expected": ["<foo>"]
 },
 {"description": "html end-tag followed by end-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
 "expected": ["</foo>"]
 },
 {"description": "html end-tag at EOF",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"]],
 "expected": [""]
 },
 {"description": "head start-tag followed by comment",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["Comment", "foo"]],
 "expected": ["<head><!--foo-->"]
 },
 {"description": "head start-tag followed by space character",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["Characters", " foo"]],
 "expected": ["<head> foo"]
 },
 {"description": "head start-tag followed by text",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["Characters", "foo"]],
 "expected": ["<head>foo"]
 },
 {"description": "head start-tag followed by start-tag",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
 "expected": ["<foo>"]
 },
 {"description": "head start-tag followed by end-tag (shouldn't ever happen?!)",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
 "expected": ["<head></foo>", "</foo>"]
 },
 {"description": "empty head element",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
 "expected": [""]
 },
 {"description": "head start-tag followed by empty-tag",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag", "foo", {}]],
 "expected": ["<foo>"]
 },
 {"description": "head start-tag at EOF (shouldn't ever happen?!)",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}]],
 "expected": ["<head>", ""]
 },
 {"description": "head end-tag followed by comment",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"], ["Comment", "foo"]],
 "expected": ["</head><!--foo-->"]
 },
 {"description": "head end-tag followed by space character",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"], ["Characters", " foo"]],
 "expected": ["</head> foo"]
 },
 {"description": "head end-tag followed by text",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"], ["Characters", "foo"]],
 "expected": ["foo"]
 },
 {"description": "head end-tag followed by start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
 "expected": ["<foo>"]
 },
 {"description": "head end-tag followed by end-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
 "expected": ["</foo>"]
 },
 {"description": "head end-tag at EOF",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
 "expected": [""]
 },
 {"description": "body start-tag followed by comment",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}], ["Comment", "foo"]],
 "expected": ["<body><!--foo-->"]
 },
 {"description": "body start-tag followed by space character",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}], ["Characters", " foo"]],
 "expected": ["<body> foo"]
 },
 {"description": "body start-tag followed by text",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}], ["Characters", "foo"]],
 "expected": ["foo"]
 },
 {"description": "body start-tag followed by start-tag",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
 "expected": ["<foo>"]
 },
 {"description": "body start-tag followed by end-tag",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
 "expected": ["</foo>"]
 },
 {"description": "body start-tag at EOF (shouldn't ever happen?!)",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}]],
 "expected": [""]
 },
 {"description": "body end-tag followed by comment",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"], ["Comment", "foo"]],
 "expected": ["</body><!--foo-->"]
 },
 {"description": "body end-tag followed by space character",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"], ["Characters", " foo"]],
 "expected": ["</body> foo"]
 },
 {"description": "body end-tag followed by text",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"], ["Characters", "foo"]],
 "expected": ["foo"]
 },
 {"description": "body end-tag followed by start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
 "expected": ["<foo>"]
 },
 {"description": "body end-tag followed by end-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
 "expected": ["</foo>"]
 },
 {"description": "body end-tag at EOF",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"]],
 "expected": [""]
 },
 {"description": "li end-tag followed by comment",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["Comment", "foo"]],
 "expected": ["</li><!--foo-->"]
 },
 {"description": "li end-tag followed by space character",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["Characters", " foo"]],
 "expected": ["</li> foo"]
 },
 {"description": "li end-tag followed by text",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["Characters", "foo"]],
 "expected": ["</li>foo"]
 },
 {"description": "li end-tag followed by start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
 "expected": ["</li><foo>"]
 },
 {"description": "li end-tag followed by li start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["StartTag", "http://www.w3.org/1999/xhtml", "li", {}]],
 "expected": ["<li>"]
 },
 {"description": "li end-tag followed by end-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
 "expected": ["</foo>"]
 },
 {"description": "li end-tag at EOF",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"]],
 "expected": [""]
 },
 {"description": "dt end-tag followed by comment",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["Comment", "foo"]],
 "expected": ["</dt><!--foo-->"]
 },
 {"description": "dt end-tag followed by space character",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["Characters", " foo"]],
 "expected": ["</dt> foo"]
 },
 {"description": "dt end-tag followed by text",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["Characters", "foo"]],
 "expected": ["</dt>foo"]
 },
 {"description": "dt end-tag followed by start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
 "expected": ["</dt><foo>"]
 },
 {"description": "dt end-tag followed by dt start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["StartTag", "http://www.w3.org/1999/xhtml", "dt", {}]],
 "expected": ["<dt>"]
 },
 {"description": "dt end-tag followed by dd start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["StartTag", "http://www.w3.org/1999/xhtml", "dd", {}]],
 "expected": ["<dd>"]
 },
 {"description": "dt end-tag followed by end-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
 "expected": ["</dt></foo>"]
 },
 {"description": "dt end-tag at EOF",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"]],
 "expected": ["</dt>"]
 },
 {"description": "dd end-tag followed by comment",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["Comment", "foo"]],
 "expected": ["</dd><!--foo-->"]
 },
 {"description": "dd end-tag followed by space character",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["Characters", " foo"]],
 "expected": ["</dd> foo"]
 },
 {"description": "dd end-tag followed by text",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["Characters", "foo"]],
 "expected": ["</dd>foo"]
 },
 {"description": "dd end-tag followed by start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
 "expected": ["</dd><foo>"]
 },
 {"description": "dd end-tag followed by dd start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["StartTag", "http://www.w3.org/1999/xhtml", "dd", {}]],
 "expected": ["<dd>"]
 },
 {"description": "dd end-tag followed by dt start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["StartTag", "http://www.w3.org/1999/xhtml", "dt", {}]],
 "expected": ["<dt>"]
 },
 {"description": "dd end-tag followed by end-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
 "expected": ["</foo>"]
 },
 {"description": "dd end-tag at EOF",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"]],
 "expected": [""]
 },
 {"description": "p end-tag followed by comment",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["Comment", "foo"]],
 "expected": ["</p><!--foo-->"]
 },
 {"description": "p end-tag followed by space character",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["Characters", " foo"]],
 "expected": ["</p> foo"]
 },
 {"description": "p end-tag followed by text",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["Characters", "foo"]],
 "expected": ["</p>foo"]
 },
 {"description": "p end-tag followed by start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
 "expected": ["</p><foo>"]
 },
 {"description": "p end-tag followed by address start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "address", {}]],
 "expected": ["<address>"]
 },
 {"description": "p end-tag followed by article start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "article", {}]],
 "expected": ["<article>"]
 },
 {"description": "p end-tag followed by aside start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "aside", {}]],
 "expected": ["<aside>"]
 },
 {"description": "p end-tag followed by blockquote start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "blockquote", {}]],
 "expected": ["<blockquote>"]
 },
 {"description": "p end-tag followed by datagrid start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "datagrid", {}]],
 "expected": ["<datagrid>"]
 },
 {"description": "p end-tag followed by dialog start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "dialog", {}]],
 "expected": ["<dialog>"]
 },
 {"description": "p end-tag followed by dir start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "dir", {}]],
 "expected": ["<dir>"]
 },
 {"description": "p end-tag followed by div start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "div", {}]],
 "expected": ["<div>"]
 },
 {"description": "p end-tag followed by dl start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "dl", {}]],
 "expected": ["<dl>"]
 },
 {"description": "p end-tag followed by fieldset start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "fieldset", {}]],
 "expected": ["<fieldset>"]
 },
 {"description": "p end-tag followed by footer start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "footer", {}]],
 "expected": ["<footer>"]
 },
 {"description": "p end-tag followed by form start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "form", {}]],
 "expected": ["<form>"]
 },
 {"description": "p end-tag followed by h1 start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h1", {}]],
 "expected": ["<h1>"]
 },
 {"description": "p end-tag followed by h2 start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h2", {}]],
 "expected": ["<h2>"]
 },
 {"description": "p end-tag followed by h3 start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h3", {}]],
 "expected": ["<h3>"]
 },
 {"description": "p end-tag followed by h4 start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h4", {}]],
 "expected": ["<h4>"]
 },
 {"description": "p end-tag followed by h5 start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h5", {}]],
 "expected": ["<h5>"]
 },
 {"description": "p end-tag followed by h6 start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h6", {}]],
 "expected": ["<h6>"]
 },
 {"description": "p end-tag followed by header start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "header", {}]],
 "expected": ["<header>"]
 },
 {"description": "p end-tag followed by hr empty-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["EmptyTag", "hr", {}]],
 "expected": ["<hr>"]
 },
 {"description": "p end-tag followed by menu start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "menu", {}]],
 "expected": ["<menu>"]
 },
 {"description": "p end-tag followed by nav start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "nav", {}]],
 "expected": ["<nav>"]
 },
 {"description": "p end-tag followed by ol start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "ol", {}]],
 "expected": ["<ol>"]
 },
 {"description": "p end-tag followed by p start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "p", {}]],
 "expected": ["<p>"]
 },
 {"description": "p end-tag followed by pre start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "pre", {}]],
 "expected": ["<pre>"]
 },
 {"description": "p end-tag followed by section start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "section", {}]],
 "expected": ["<section>"]
 },
 {"description": "p end-tag followed by table start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "table", {}]],
 "expected": ["<table>"]
 },
 {"description": "p end-tag followed by ul start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "ul", {}]],
 "expected": ["<ul>"]
 },
 {"description": "p end-tag followed by end-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
 "expected": ["</foo>"]
 },
 {"description": "p end-tag at EOF",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"]],
 "expected": [""]
 },
 {"description": "optgroup end-tag followed by comment",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["Comment", "foo"]],
 "expected": ["</optgroup><!--foo-->"]
 },
 {"description": "optgroup end-tag followed by space character",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["Characters", " foo"]],
 "expected": ["</optgroup> foo"]
 },
 {"description": "optgroup end-tag followed by text",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["Characters", "foo"]],
 "expected": ["</optgroup>foo"]
 },
 {"description": "optgroup end-tag followed by start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
 "expected": ["</optgroup><foo>"]
 },
 {"description": "optgroup end-tag followed by optgroup start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["StartTag", "http://www.w3.org/1999/xhtml", "optgroup", {}]],
 "expected": ["<optgroup>"]
 },
 {"description": "optgroup end-tag followed by end-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
 "expected": ["</foo>"]
 },
 {"description": "optgroup end-tag at EOF",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"]],
 "expected": [""]
 },
 {"description": "option end-tag followed by comment",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["Comment", "foo"]],
 "expected": ["</option><!--foo-->"]
 },
 {"description": "option end-tag followed by space character",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["Characters", " foo"]],
 "expected": ["</option> foo"]
 },
 {"description": "option end-tag followed by text",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["Characters", "foo"]],
 "expected": ["</option>foo"]
 },
 {"description": "option end-tag followed by optgroup start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["StartTag", "http://www.w3.org/1999/xhtml", "optgroup", {}]],
 "expected": ["<optgroup>"]
 },
 {"description": "option end-tag followed by start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
 "expected": ["</option><foo>"]
 },
 {"description": "option end-tag followed by option start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["StartTag", "http://www.w3.org/1999/xhtml", "option", {}]],
 "expected": ["<option>"]
 },
 {"description": "option end-tag followed by end-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
 "expected": ["</foo>"]
 },
 {"description": "option end-tag at EOF",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"]],
 "expected": [""]
 },
 {"description": "colgroup start-tag followed by comment",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["Comment", "foo"]],
 "expected": ["<colgroup><!--foo-->"]
 },
 {"description": "colgroup start-tag followed by space character",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["Characters", " foo"]],
 "expected": ["<colgroup> foo"]
 },
 {"description": "colgroup start-tag followed by text",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["Characters", "foo"]],
 "expected": ["<colgroup>foo"]
 },
 {"description": "colgroup start-tag followed by start-tag",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
 "expected": ["<colgroup><foo>"]
 },
 {"description": "first colgroup in a table with a col child",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "table", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["EmptyTag", "col", {}]],
 "expected": ["<table><col>"]
 },
 {"description": "colgroup with a col child, following another colgroup",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "col", {}]],
 "expected": ["</colgroup><col>", "<colgroup><col>"]
 },
 {"description": "colgroup start-tag followed by end-tag",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
 "expected": ["<colgroup></foo>"]
 },
 {"description": "colgroup start-tag at EOF",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}]],
 "expected": ["<colgroup>"]
 },
 {"description": "colgroup end-tag followed by comment",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["Comment", "foo"]],
 "expected": ["</colgroup><!--foo-->"]
 },
 {"description": "colgroup end-tag followed by space character",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["Characters", " foo"]],
 "expected": ["</colgroup> foo"]
 },
 {"description": "colgroup end-tag followed by text",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["Characters", "foo"]],
 "expected": ["foo"]
 },
 {"description": "colgroup end-tag followed by start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
 "expected": ["<foo>"]
 },
 {"description": "colgroup end-tag followed by end-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
 "expected": ["</foo>"]
 },
 {"description": "colgroup end-tag at EOF",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"]],
 "expected": [""]
 },
 {"description": "thead end-tag followed by comment",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["Comment", "foo"]],
 "expected": ["</thead><!--foo-->"]
 },
 {"description": "thead end-tag followed by space character",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["Characters", " foo"]],
 "expected": ["</thead> foo"]
 },
 {"description": "thead end-tag followed by text",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["Characters", "foo"]],
 "expected": ["</thead>foo"]
 },
 {"description": "thead end-tag followed by start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
 "expected": ["</thead><foo>"]
 },
 {"description": "thead end-tag followed by tbody start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}]],
 "expected": ["<tbody>"]
 },
 {"description": "thead end-tag followed by tfoot start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["StartTag", "http://www.w3.org/1999/xhtml", "tfoot", {}]],
 "expected": ["<tfoot>"]
 },
 {"description": "thead end-tag followed by end-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
 "expected": ["</thead></foo>"]
 },
 {"description": "thead end-tag at EOF",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"]],
 "expected": ["</thead>"]
 },
 {"description": "tbody start-tag followed by comment",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["Comment", "foo"]],
 "expected": ["<tbody><!--foo-->"]
 },
 {"description": "tbody start-tag followed by space character",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["Characters", " foo"]],
 "expected": ["<tbody> foo"]
 },
 {"description": "tbody start-tag followed by text",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["Characters", "foo"]],
 "expected": ["<tbody>foo"]
 },
 {"description": "tbody start-tag followed by start-tag",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
 "expected": ["<tbody><foo>"]
 },
 {"description": "first tbody in a table with a tr child",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "table", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "tr", {}]],
 "expected": ["<table><tr>"]
 },
 {"description": "tbody with a tr child, following another tbody",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "tr", {}]],
 "expected": ["<tbody><tr>", "</tbody><tr>"]
 },
 {"description": "tbody with a tr child, following a thead",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "tr", {}]],
 "expected": ["<tbody><tr>", "</thead><tr>"]
 },
 {"description": "tbody with a tr child, following a tfoot",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "tr", {}]],
 "expected": ["<tbody><tr>", "</tfoot><tr>"]
 },
 {"description": "tbody start-tag followed by end-tag",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
 "expected": ["<tbody></foo>"]
 },
 {"description": "tbody start-tag at EOF",
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}]],
 "expected": ["<tbody>"]
 },
 {"description": "tbody end-tag followed by comment",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["Comment", "foo"]],
 "expected": ["</tbody><!--foo-->"]
 },
 {"description": "tbody end-tag followed by space character",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["Characters", " foo"]],
 "expected": ["</tbody> foo"]
 },
 {"description": "tbody end-tag followed by text",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["Characters", "foo"]],
 "expected": ["</tbody>foo"]
 },
 {"description": "tbody end-tag followed by start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
 "expected": ["</tbody><foo>"]
 },
 {"description": "tbody end-tag followed by tbody start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}]],
 "expected": ["<tbody>", "</tbody>"]
 },
 {"description": "tbody end-tag followed by tfoot start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["StartTag", "http://www.w3.org/1999/xhtml", "tfoot", {}]],
 "expected": ["<tfoot>"]
 },
 {"description": "tbody end-tag followed by end-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
 "expected": ["</foo>"]
 },
 {"description": "tbody end-tag at EOF",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"]],
 "expected": [""]
 },
 {"description": "tfoot end-tag followed by comment",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["Comment", "foo"]],
 "expected": ["</tfoot><!--foo-->"]
 },
 {"description": "tfoot end-tag followed by space character",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["Characters", " foo"]],
 "expected": ["</tfoot> foo"]
 },
 {"description": "tfoot end-tag followed by text",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["Characters", "foo"]],
 "expected": ["</tfoot>foo"]
 },
 {"description": "tfoot end-tag followed by start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
 "expected": ["</tfoot><foo>"]
 },
 {"description": "tfoot end-tag followed by tbody start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}]],
 "expected": ["<tbody>", "</tfoot>"]
 },
 {"description": "tfoot end-tag followed by end-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
 "expected": ["</foo>"]
 },
 {"description": "tfoot end-tag at EOF",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"]],
 "expected": [""]
 },
 {"description": "tr end-tag followed by comment",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["Comment", "foo"]],
 "expected": ["</tr><!--foo-->"]
 },
 {"description": "tr end-tag followed by space character",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["Characters", " foo"]],
 "expected": ["</tr> foo"]
 },
 {"description": "tr end-tag followed by text",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["Characters", "foo"]],
 "expected": ["</tr>foo"]
 },
 {"description": "tr end-tag followed by start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
 "expected": ["</tr><foo>"]
 },
 {"description": "tr end-tag followed by tr start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["StartTag", "http://www.w3.org/1999/xhtml", "tr", {}]],
 "expected": ["<tr>", "</tr>"]
 },
 {"description": "tr end-tag followed by end-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
 "expected": ["</foo>"]
 },
 {"description": "tr end-tag at EOF",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"]],
 "expected": [""]
 },
 {"description": "td end-tag followed by comment",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["Comment", "foo"]],
 "expected": ["</td><!--foo-->"]
 },
 {"description": "td end-tag followed by space character",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["Characters", " foo"]],
 "expected": ["</td> foo"]
 },
 {"description": "td end-tag followed by text",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["Characters", "foo"]],
 "expected": ["</td>foo"]
 },
 {"description": "td end-tag followed by start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
 "expected": ["</td><foo>"]
 },
 {"description": "td end-tag followed by td start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["StartTag", "http://www.w3.org/1999/xhtml", "td", {}]],
 "expected": ["<td>", "</td>"]
 },
 {"description": "td end-tag followed by th start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["StartTag", "http://www.w3.org/1999/xhtml", "th", {}]],
 "expected": ["<th>", "</td>"]
 },
 {"description": "td end-tag followed by end-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
 "expected": ["</foo>"]
 },
 {"description": "td end-tag at EOF",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"]],
 "expected": [""]
 },
 {"description": "th end-tag followed by comment",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["Comment", "foo"]],
 "expected": ["</th><!--foo-->"]
 },
 {"description": "th end-tag followed by space character",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["Characters", " foo"]],
 "expected": ["</th> foo"]
 },
 {"description": "th end-tag followed by text",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["Characters", "foo"]],
 "expected": ["</th>foo"]
 },
 {"description": "th end-tag followed by start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
 "expected": ["</th><foo>"]
 },
 {"description": "th end-tag followed by th start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["StartTag", "http://www.w3.org/1999/xhtml", "th", {}]],
 "expected": ["<th>", "</th>"]
 },
 {"description": "th end-tag followed by td start-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["StartTag", "http://www.w3.org/1999/xhtml", "td", {}]],
 "expected": ["<td>", "</th>"]
 },
 {"description": "th end-tag followed by end-tag",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
 "expected": ["</foo>"]
 },
 {"description": "th end-tag at EOF",
 "input": [["EndTag", "http://www.w3.org/1999/xhtml"    , "th"]],
 "expected": [""]
 }
 ]}
--- a/lib/html5lib/tests/testdata/serializer/options.test
+++ b/lib/html5lib/tests/testdata/serializer/options.test
@ -1,60 +0,0 @@
 {"tests":[
 {"description": "quote_char=\"'\"",
 "options": {"quote_char": "'"},
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "test 'with' quote_char"}]]],
 "expected": ["<span title='test &#39;with&#39; quote_char'>"]
 },
 {"description": "quote_attr_values=true",
 "options": {"quote_attr_values": true},
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "button", [{"namespace": null, "name": "disabled", "value" :"disabled"}]]],
 "expected": ["<button disabled>"],
 "xhtml":    ["<button disabled=\"disabled\">"]
 },
 {"description": "quote_attr_values=true with irrelevant",
 "options": {"quote_attr_values": true},
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", [{"namespace": null, "name": "irrelevant", "value" :"irrelevant"}]]],
 "expected": ["<div irrelevant>"],
 "xhtml":    ["<div irrelevant=\"irrelevant\">"]
 },
 {"description": "use_trailing_solidus=true with void element",
 "options": {"use_trailing_solidus": true},
 "input": [["EmptyTag", "img", {}]],
 "expected": ["<img />"]
 },
 {"description": "use_trailing_solidus=true with non-void element",
 "options": {"use_trailing_solidus": true},
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", {}]],
 "expected": ["<div>"]
 },
 {"description": "minimize_boolean_attributes=false",
 "options": {"minimize_boolean_attributes": false},
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", [{"namespace": null, "name": "irrelevant", "value" :"irrelevant"}]]],
 "expected": ["<div irrelevant=irrelevant>"],
 "xhtml":    ["<div irrelevant=\"irrelevant\">"]
 },
 {"description": "minimize_boolean_attributes=false with empty value",
 "options": {"minimize_boolean_attributes": false},
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", [{"namespace": null, "name": "irrelevant", "value" :""}]]],
 "expected": ["<div irrelevant=\"\">"]
 },
 {"description": "escape less than signs in attribute values",
 "options": {"escape_lt_in_attrs": true},
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "a", [{"namespace": null, "name": "title", "value": "a<b>c&d"}]]],
 "expected": ["<a title=\"a&lt;b>c&amp;d\">"]
 },
 {"description": "rcdata",
 "options": {"escape_rcdata": true},
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "script", {}], ["Characters", "a<b>c&d"]],
 "expected": ["<script>a&lt;b&gt;c&amp;d"]
 }
 ]}
--- a/lib/html5lib/tests/testdata/serializer/whitespace.test
+++ b/lib/html5lib/tests/testdata/serializer/whitespace.test
@ -1,51 +0,0 @@
 {"tests": [
 {"description": "bare text with leading spaces",
 "options": {"strip_whitespace": true},
 "input": [["Characters", "\t\r\n\u000C foo"]],
 "expected": [" foo"]
 },
 {"description": "bare text with trailing spaces",
 "options": {"strip_whitespace": true},
 "input": [["Characters", "foo \t\r\n\u000C"]],
 "expected": ["foo "]
 },
 {"description": "bare text with inner spaces",
 "options": {"strip_whitespace": true},
 "input": [["Characters", "foo \t\r\n\u000C bar"]],
 "expected": ["foo bar"]
 },
 {"description": "text within <pre>",
 "options": {"strip_whitespace": true},
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "pre", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "pre"]],
 "expected": ["<pre>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</pre>"]
 },
 {"description": "text within <pre>, with inner markup",
 "options": {"strip_whitespace": true},
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "pre", {}], ["Characters", "\t\r\n\u000C fo"], ["StartTag", "http://www.w3.org/1999/xhtml", "span", {}], ["Characters", "o \t\r\n\u000C b"], ["EndTag", "http://www.w3.org/1999/xhtml", "span"], ["Characters", "ar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "pre"]],
 "expected": ["<pre>\t\r\n\u000C fo<span>o \t\r\n\u000C b</span>ar \t\r\n\u000C</pre>"]
 },
 {"description": "text within <textarea>",
 "options": {"strip_whitespace": true},
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "textarea", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "textarea"]],
 "expected": ["<textarea>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</textarea>"]
 },
 {"description": "text within <script>",
 "options": {"strip_whitespace": true},
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "script", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "script"]],
 "expected": ["<script>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</script>"]
 },
 {"description": "text within <style>",
 "options": {"strip_whitespace": true},
 "input": [["StartTag", "http://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "style"]],
 "expected": ["<style>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</style>"]
 }
 ]}
--- a/lib/html5lib/tests/testdata/tokenizer/README.md
+++ b/lib/html5lib/tests/testdata/tokenizer/README.md
@ -1,107 +0,0 @@
 Tokenizer tests
 ===============
 The test format is [JSON](http://www.json.org/). This has the advantage
 that the syntax allows backward-compatible extensions to the tests and
 the disadvantage that it is relatively verbose.
 Basic Structure
 ---------------
    {"tests": [
        {"description": "Test description",
        "input": "input_string",
        "output": [expected_output_tokens],
        "initialStates": [initial_states],
        "lastStartTag": last_start_tag,
        "errors": [parse_errors]
        }
    ]}
 Multiple tests per file are allowed simply by adding more objects to the
 "tests" list.
 Each parse error is an object that contains error `code` and one-based
 error location indices: `line` and `col`.
 `description`, `input` and `output` are always present. The other values
 are optional.
 ### Test set-up
 `test.input` is a string containing the characters to pass to the
 tokenizer. Specifically, it represents the characters of the **input
 stream**, and so implementations are expected to perform the processing
 described in the spec's **Preprocessing the input stream** section
 before feeding the result to the tokenizer.
 If `test.doubleEscaped` is present and `true`, then `test.input` is not
 quite as described above. Instead, it must first be subjected to another
 round of unescaping (i.e., in addition to any unescaping involved in the
 JSON import), and the result of *that* represents the characters of the
 input stream. Currently, the only unescaping required by this option is
 to convert each sequence of the form \\uHHHH (where H is a hex digit)
 into the corresponding Unicode code point. (Note that this option also
 affects the interpretation of `test.output`.)
 `test.initialStates` is a list of strings, each being the name of a
 tokenizer state which can be one of the following:
 -   `Data state`
 -   `PLAINTEXT state`
 -   `RCDATA state`
 -   `RAWTEXT state`
 -   `Script data state`
 -   `CDATA section state`
 The test should be run once for each string, using it
 to set the tokenizer's initial state for that run. If
 `test.initialStates` is omitted, it defaults to `["Data state"]`.
 `test.lastStartTag` is a lowercase string that should be used as "the
 tag name of the last start tag to have been emitted from this
 tokenizer", referenced in the spec's definition of **appropriate end tag
 token**. If it is omitted, it is treated as if "no start tag has been
 emitted from this tokenizer".
 ### Test results
 `test.output` is a list of tokens, ordered with the first produced by
 the tokenizer the first (leftmost) in the list. The list must mach the
 **complete** list of tokens that the tokenizer should produce. Valid
 tokens are:
    ["DOCTYPE", name, public_id, system_id, correctness]
    ["StartTag", name, {attributes}*, true*]
    ["StartTag", name, {attributes}]
    ["EndTag", name]
    ["Comment", data]
    ["Character", data]
 `public_id` and `system_id` are either strings or `null`. `correctness`
 is either `true` or `false`; `true` corresponds to the force-quirks flag
 being false, and vice-versa.
 When the self-closing flag is set, the `StartTag` array has `true` as
 its fourth entry. When the flag is not set, the array has only three
 entries for backwards compatibility.
 All adjacent character tokens are coalesced into a single
 `["Character", data]` token.
 If `test.doubleEscaped` is present and `true`, then every string within
 `test.output` must be further unescaped (as described above) before
 comparing with the tokenizer's output.
 xmlViolation tests
 ------------------
 `tokenizer/xmlViolation.test` differs from the above in a couple of
 ways:
 -   The name of the single member of the top-level JSON object is
    "xmlViolationTests" instead of "tests".
 -   Each test's expected output assumes that implementation is applying
    the tweaks given in the spec's "Coercing an HTML DOM into an
    infoset" section.
--- a/lib/html5lib/tests/testdata/tokenizer/contentModelFlags.test
+++ b/lib/html5lib/tests/testdata/tokenizer/contentModelFlags.test
@ -1,93 +0,0 @@
 {"tests": [
 {"description":"PLAINTEXT content model flag",
 "initialStates":["PLAINTEXT state"],
 "lastStartTag":"plaintext",
 "input":"<head>&body;",
 "output":[["Character", "<head>&body;"]]},
 {"description":"PLAINTEXT with seeming close tag",
 "initialStates":["PLAINTEXT state"],
 "lastStartTag":"plaintext",
 "input":"</plaintext>&body;",
 "output":[["Character", "</plaintext>&body;"]]},
 {"description":"End tag closing RCDATA or RAWTEXT",
 "initialStates":["RCDATA state", "RAWTEXT state"],
 "lastStartTag":"xmp",
 "input":"foo</xmp>",
 "output":[["Character", "foo"], ["EndTag", "xmp"]]},
 {"description":"End tag closing RCDATA or RAWTEXT (case-insensitivity)",
 "initialStates":["RCDATA state", "RAWTEXT state"],
 "lastStartTag":"xmp",
 "input":"foo</xMp>",
 "output":[["Character", "foo"], ["EndTag", "xmp"]]},
 {"description":"End tag closing RCDATA or RAWTEXT (ending with space)",
 "initialStates":["RCDATA state", "RAWTEXT state"],
 "lastStartTag":"xmp",
 "input":"foo</xmp ",
 "output":[["Character", "foo"]],
 "errors":[
    { "code": "eof-in-tag", "line": 1, "col": 10 }
 ]},
 {"description":"End tag closing RCDATA or RAWTEXT (ending with EOF)",
 "initialStates":["RCDATA state", "RAWTEXT state"],
 "lastStartTag":"xmp",
 "input":"foo</xmp",
 "output":[["Character", "foo</xmp"]]},
 {"description":"End tag closing RCDATA or RAWTEXT (ending with slash)",
 "initialStates":["RCDATA state", "RAWTEXT state"],
 "lastStartTag":"xmp",
 "input":"foo</xmp/",
 "output":[["Character", "foo"]],
 "errors":[
    { "code": "eof-in-tag", "line": 1, "col": 10 }
 ]},
 {"description":"End tag not closing RCDATA or RAWTEXT (ending with left-angle-bracket)",
 "initialStates":["RCDATA state", "RAWTEXT state"],
 "lastStartTag":"xmp",
 "input":"foo</xmp<",
 "output":[["Character", "foo</xmp<"]]},
 {"description":"End tag with incorrect name in RCDATA or RAWTEXT",
 "initialStates":["RCDATA state", "RAWTEXT state"],
 "lastStartTag":"xmp",
 "input":"</foo>bar</xmp>",
 "output":[["Character", "</foo>bar"], ["EndTag", "xmp"]]},
 {"description":"Partial end tags leading straight into partial end tags",
 "initialStates":["RCDATA state", "RAWTEXT state"],
 "lastStartTag":"xmp",
 "input":"</xmp</xmp</xmp>",
 "output":[["Character", "</xmp</xmp"], ["EndTag", "xmp"]]},
 {"description":"End tag with incorrect name in RCDATA or RAWTEXT (starting like correct name)",
 "initialStates":["RCDATA state", "RAWTEXT state"],
 "lastStartTag":"xmp",
 "input":"</foo>bar</xmpaar>",
 "output":[["Character", "</foo>bar</xmpaar>"]]},
 {"description":"End tag closing RCDATA or RAWTEXT, switching back to PCDATA",
 "initialStates":["RCDATA state", "RAWTEXT state"],
 "lastStartTag":"xmp",
 "input":"foo</xmp></baz>",
 "output":[["Character", "foo"], ["EndTag", "xmp"], ["EndTag", "baz"]]},
 {"description":"RAWTEXT w/ something looking like an entity",
 "initialStates":["RAWTEXT state"],
 "lastStartTag":"xmp",
 "input":"&foo;",
 "output":[["Character", "&foo;"]]},
 {"description":"RCDATA w/ an entity",
 "initialStates":["RCDATA state"],
 "lastStartTag":"textarea",
 "input":"&lt;",
 "output":[["Character", "<"]]}
 ]}
--- a/lib/html5lib/tests/testdata/tokenizer/domjs.test
+++ b/lib/html5lib/tests/testdata/tokenizer/domjs.test
@ -1,330 +0,0 @@
 {
    "tests": [
        {
            "description":"CR in bogus comment state",
            "input":"<?\u000d",
            "output":[["Comment", "?\u000a"]],
            "errors":[
                { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
            ]
        },
        {
            "description":"CRLF in bogus comment state",
            "input":"<?\u000d\u000a",
            "output":[["Comment", "?\u000a"]],
            "errors":[
                { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
            ]
        },
        {
            "description":"CRLFLF in bogus comment state",
            "input":"<?\u000d\u000a\u000a",
            "output":[["Comment", "?\u000a\u000a"]],
            "errors":[
                { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
            ]
        },
        {
            "description":"Raw NUL replacement",
            "doubleEscaped":true,
            "initialStates":["RCDATA state", "RAWTEXT state", "PLAINTEXT state", "Script data state"],
            "input":"\\u0000",
            "output":[["Character", "\\uFFFD"]],
            "errors":[
                { "code": "unexpected-null-character", "line": 1, "col": 1 }
            ]
        },
        {
            "description":"NUL in CDATA section",
            "doubleEscaped":true,
            "initialStates":["CDATA section state"],
            "input":"\\u0000]]>",
            "output":[["Character", "\\u0000"]]
        },
        {
           "description":"NUL in script HTML comment",
           "doubleEscaped":true,
           "initialStates":["Script data state"],
           "input":"<!--test\\u0000--><!--test-\\u0000--><!--test--\\u0000-->",
           "output":[["Character", "<!--test\\uFFFD--><!--test-\\uFFFD--><!--test--\\uFFFD-->"]],
           "errors":[
               { "code": "unexpected-null-character", "line": 1, "col": 9 },
               { "code": "unexpected-null-character", "line": 1, "col": 22 },
               { "code": "unexpected-null-character", "line": 1, "col": 36 }
           ]
        },
        {
           "description":"NUL in script HTML comment - double escaped",
           "doubleEscaped":true,
           "initialStates":["Script data state"],
           "input":"<!--<script>\\u0000--><!--<script>-\\u0000--><!--<script>--\\u0000-->",
           "output":[["Character", "<!--<script>\\uFFFD--><!--<script>-\\uFFFD--><!--<script>--\\uFFFD-->"]],
           "errors":[
                { "code": "unexpected-null-character", "line": 1, "col": 13 },
                { "code": "unexpected-null-character", "line": 1, "col": 30 },
                { "code": "unexpected-null-character", "line": 1, "col": 48 }
           ]
        },
        {
           "description":"EOF in script HTML comment",
           "initialStates":["Script data state"],
           "input":"<!--test",
           "output":[["Character", "<!--test"]],
           "errors":[
               { "code": "eof-in-script-html-comment-like-text", "line": 1, "col": 9 }
           ]
        },
        {
           "description":"EOF in script HTML comment after dash",
           "initialStates":["Script data state"],
           "input":"<!--test-",
           "output":[["Character", "<!--test-"]],
           "errors":[
               { "code": "eof-in-script-html-comment-like-text", "line": 1, "col": 10 }
           ]
        },
        {
           "description":"EOF in script HTML comment after dash dash",
           "initialStates":["Script data state"],
           "input":"<!--test--",
           "output":[["Character", "<!--test--"]],
           "errors":[
               { "code": "eof-in-script-html-comment-like-text", "line": 1, "col": 11 }
           ]
        },
        {
           "description":"EOF in script HTML comment double escaped after dash",
           "initialStates":["Script data state"],
           "input":"<!--<script>-",
           "output":[["Character", "<!--<script>-"]],
           "errors":[
               { "code": "eof-in-script-html-comment-like-text", "line": 1, "col": 14 }
           ]
        },
        {
           "description":"EOF in script HTML comment double escaped after dash dash",
           "initialStates":["Script data state"],
           "input":"<!--<script>--",
           "output":[["Character", "<!--<script>--"]],
           "errors":[
               { "code": "eof-in-script-html-comment-like-text", "line": 1, "col": 15 }
           ]
        },
        {
           "description":"EOF in script HTML comment - double escaped",
           "initialStates":["Script data state"],
           "input":"<!--<script>",
           "output":[["Character", "<!--<script>"]],
           "errors":[
               { "code": "eof-in-script-html-comment-like-text", "line": 1, "col": 13 }
           ]
        },
        {
            "description":"Dash in script HTML comment",
            "initialStates":["Script data state"],
            "input":"<!-- - -->",
            "output":[["Character", "<!-- - -->"]]
        },
        {
            "description":"Dash less-than in script HTML comment",
            "initialStates":["Script data state"],
            "input":"<!-- -< -->",
            "output":[["Character", "<!-- -< -->"]]
        },
        {
            "description":"Dash at end of script HTML comment",
            "initialStates":["Script data state"],
            "input":"<!--test--->",
            "output":[["Character", "<!--test--->"]]
        },
        {
            "description":"</script> in script HTML comment",
            "initialStates":["Script data state"],
            "lastStartTag":"script",
            "input":"<!-- </script> --></script>",
            "output":[["Character", "<!-- "], ["EndTag", "script"], ["Character", " -->"], ["EndTag", "script"]]
        },
        {
            "description":"</script> in script HTML comment - double escaped",
            "initialStates":["Script data state"],
            "lastStartTag":"script",
            "input":"<!-- <script></script> --></script>",
            "output":[["Character", "<!-- <script></script> -->"], ["EndTag", "script"]]
        },
        {
            "description":"</script> in script HTML comment - double escaped with nested <script>",
            "initialStates":["Script data state"],
            "lastStartTag":"script",
            "input":"<!-- <script><script></script></script> --></script>",
            "output":[["Character", "<!-- <script><script></script>"], ["EndTag", "script"], ["Character", " -->"], ["EndTag", "script"]]
        },
        {
            "description":"</script> in script HTML comment - double escaped with abrupt end",
            "initialStates":["Script data state"],
            "lastStartTag":"script",
            "input":"<!-- <script>--></script> --></script>",
            "output":[["Character", "<!-- <script>-->"], ["EndTag", "script"], ["Character", " -->"], ["EndTag", "script"]]
        },
        {
            "description":"Incomplete start tag in script HTML comment double escaped",
            "initialStates":["Script data state"],
            "lastStartTag":"script",
            "input":"<!--<scrip></script>-->",
            "output":[["Character", "<!--<scrip>"], ["EndTag", "script"], ["Character", "-->"]]
        },
        {
            "description":"Unclosed start tag in script HTML comment double escaped",
            "initialStates":["Script data state"],
            "lastStartTag":"script",
            "input":"<!--<script</script>-->",
            "output":[["Character", "<!--<script"], ["EndTag", "script"], ["Character", "-->"]]
        },
        {
            "description":"Incomplete end tag in script HTML comment double escaped",
            "initialStates":["Script data state"],
            "lastStartTag":"script",
            "input":"<!--<script></scrip>-->",
            "output":[["Character", "<!--<script></scrip>-->"]]
        },
        {
            "description":"Unclosed end tag in script HTML comment double escaped",
            "initialStates":["Script data state"],
            "lastStartTag":"script",
            "input":"<!--<script></script-->",
            "output":[["Character", "<!--<script></script-->"]]
        },
        {
            "description":"leading U+FEFF must pass through",
            "initialStates":["Data state", "RCDATA state", "RAWTEXT state", "Script data state"],
            "doubleEscaped":true,
            "input":"\\uFEFFfoo\\uFEFFbar",
            "output":[["Character", "\\uFEFFfoo\\uFEFFbar"]]
        },
        {
            "description":"Non BMP-charref in RCDATA",
            "initialStates":["RCDATA state"],
            "input":"&NotEqualTilde;",
            "output":[["Character", "\u2242\u0338"]]
        },
        {
            "description":"Bad charref in RCDATA",
            "initialStates":["RCDATA state"],
            "input":"&NotEqualTild;",
            "output":[["Character", "&NotEqualTild;"]],
            "errors":[
               { "code": "unknown-named-character-reference", "line": 1, "col": 14 }
            ]
        },
        {
            "description":"lowercase endtags",
            "initialStates":["RCDATA state", "RAWTEXT state", "Script data state"],
            "lastStartTag":"xmp",
            "input":"</XMP>",
            "output":[["EndTag","xmp"]]
        },
        {
            "description":"bad endtag (space before name)",
            "initialStates":["RCDATA state", "RAWTEXT state", "Script data state"],
            "lastStartTag":"xmp",
            "input":"</ XMP>",
            "output":[["Character","</ XMP>"]]
        },
        {
            "description":"bad endtag (not matching last start tag)",
            "initialStates":["RCDATA state", "RAWTEXT state", "Script data state"],
            "lastStartTag":"xmp",
            "input":"</xm>",
            "output":[["Character","</xm>"]]
        },
        {
            "description":"bad endtag (without close bracket)",
            "initialStates":["RCDATA state", "RAWTEXT state", "Script data state"],
            "lastStartTag":"xmp",
            "input":"</xm ",
            "output":[["Character","</xm "]]
        },
        {
            "description":"bad endtag (trailing solidus)",
            "initialStates":["RCDATA state", "RAWTEXT state", "Script data state"],
            "lastStartTag":"xmp",
            "input":"</xm/",
            "output":[["Character","</xm/"]]
        },
        {
            "description":"Non BMP-charref in attribute",
            "input":"<p id=\"&NotEqualTilde;\">",
            "output":[["StartTag", "p", {"id":"\u2242\u0338"}]]
        },
        {
            "description":"--!NUL in comment ",
            "doubleEscaped":true,
            "input":"<!----!\\u0000-->",
            "output":[["Comment", "--!\\uFFFD"]],
            "errors":[
                { "code": "unexpected-null-character", "line": 1, "col": 8 }
            ]
        },
        {
            "description":"space EOF after doctype ",
            "input":"<!DOCTYPE html ",
            "output":[["DOCTYPE", "html", null, null , false]],
            "errors":[
                { "code": "eof-in-doctype", "line": 1, "col": 16 }
            ]
        },
        {
            "description":"CDATA in HTML content",
            "input":"<![CDATA[foo]]>",
            "output":[["Comment", "[CDATA[foo]]"]],
            "errors":[
                { "code": "cdata-in-html-content", "line": 1, "col": 9 }
            ]
        },
        {
            "description":"CDATA content",
            "input":"foo&#32;]]>",
            "initialStates":["CDATA section state"],
            "output":[["Character", "foo&#32;"]]
        },
        {
            "description":"CDATA followed by HTML content",
            "input":"foo&#32;]]>&#32;",
            "initialStates":["CDATA section state"],
            "output":[["Character", "foo&#32; "]]
        },
        {
            "description":"CDATA with extra bracket",
            "input":"foo]]]>",
            "initialStates":["CDATA section state"],
            "output":[["Character", "foo]"]]
        },
        {
            "description":"CDATA without end marker",
            "input":"foo",
            "initialStates":["CDATA section state"],
            "output":[["Character", "foo"]],
            "errors":[
                { "code": "eof-in-cdata", "line": 1, "col": 4 }
            ]
        },
        {
            "description":"CDATA with single bracket ending",
            "input":"foo]",
            "initialStates":["CDATA section state"],
            "output":[["Character", "foo]"]],
            "errors":[
                { "code": "eof-in-cdata", "line": 1, "col": 5 }
            ]
        },
        {
            "description":"CDATA with two brackets ending",
            "input":"foo]]",
            "initialStates":["CDATA section state"],
            "output":[["Character", "foo]]"]],
            "errors":[
                { "code": "eof-in-cdata", "line": 1, "col": 6 }
            ]
        }
    ]
 }
--- a/lib/html5lib/tests/testdata/tokenizer/entities.test
+++ b/lib/html5lib/tests/testdata/tokenizer/entities.test
@ -1,542 +0,0 @@
 {"tests": [
 {"description": "Undefined named entity in a double-quoted attribute value ending in semicolon and whose name starts with a known entity name.",
 "input":"<h a=\"&noti;\">",
 "output": [["StartTag", "h", {"a": "&noti;"}]]},
 {"description": "Entity name requiring semicolon instead followed by the equals sign in a double-quoted attribute value.",
 "input":"<h a=\"&lang=\">",
 "output": [["StartTag", "h", {"a": "&lang="}]]},
 {"description": "Valid entity name followed by the equals sign in a double-quoted attribute value.",
 "input":"<h a=\"&not=\">",
 "output": [["StartTag", "h", {"a": "&not="}]]},
 {"description": "Undefined named entity in a single-quoted attribute value ending in semicolon and whose name starts with a known entity name.",
 "input":"<h a='&noti;'>",
 "output": [["StartTag", "h", {"a": "&noti;"}]]},
 {"description": "Entity name requiring semicolon instead followed by the equals sign in a single-quoted attribute value.",
 "input":"<h a='&lang='>",
 "output": [["StartTag", "h", {"a": "&lang="}]]},
 {"description": "Valid entity name followed by the equals sign in a single-quoted attribute value.",
 "input":"<h a='&not='>",
 "output": [["StartTag", "h", {"a": "&not="}]]},
 {"description": "Undefined named entity in an unquoted attribute value ending in semicolon and whose name starts with a known entity name.",
 "input":"<h a=&noti;>",
 "output": [["StartTag", "h", {"a": "&noti;"}]]},
 {"description": "Entity name requiring semicolon instead followed by the equals sign in an unquoted attribute value.",
 "input":"<h a=&lang=>",
 "output": [["StartTag", "h", {"a": "&lang="}]],
 "errors":[
    { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 11 }
 ]},
 {"description": "Valid entity name followed by the equals sign in an unquoted attribute value.",
 "input":"<h a=&not=>",
 "output": [["StartTag", "h", {"a": "&not="}]],
 "errors":[
    { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 10 }
 ]},
 {"description": "Ambiguous ampersand.",
 "input":"&rrrraannddom;",
 "output": [["Character", "&rrrraannddom;"]],
 "errors":[
    { "code": "unknown-named-character-reference", "line": 1, "col": 14 }
 ]},
 {"description": "Semicolonless named entity 'not' followed by 'i;' in body",
 "input":"&noti;",
 "output": [["Character", "\u00ACi;"]],
 "errors":[
    { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 5 }
 ]},
 {"description": "Very long undefined named entity in body",
 "input":"&ammmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmp;",
 "output": [["Character", "&ammmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmp;"]],
 "errors":[
    { "code": "unknown-named-character-reference", "line": 1, "col": 950 }
 ]},
 {"description": "CR as numeric entity",
 "input":"&#013;",
 "output": [["Character", "\r"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 7 }
 ]},
 {"description": "CR as hexadecimal numeric entity",
 "input":"&#x00D;",
 "output": [["Character", "\r"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 EURO SIGN numeric entity.",
 "input":"&#0128;",
 "output": [["Character", "\u20AC"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
 "input":"&#0129;",
 "output": [["Character", "\u0081"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 SINGLE LOW-9 QUOTATION MARK numeric entity.",
 "input":"&#0130;",
 "output": [["Character", "\u201A"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 LATIN SMALL LETTER F WITH HOOK numeric entity.",
 "input":"&#0131;",
 "output": [["Character", "\u0192"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 DOUBLE LOW-9 QUOTATION MARK numeric entity.",
 "input":"&#0132;",
 "output": [["Character", "\u201E"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 HORIZONTAL ELLIPSIS numeric entity.",
 "input":"&#0133;",
 "output": [["Character", "\u2026"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 DAGGER numeric entity.",
 "input":"&#0134;",
 "output": [["Character", "\u2020"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 DOUBLE DAGGER numeric entity.",
 "input":"&#0135;",
 "output": [["Character", "\u2021"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 MODIFIER LETTER CIRCUMFLEX ACCENT numeric entity.",
 "input":"&#0136;",
 "output": [["Character", "\u02C6"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 PER MILLE SIGN numeric entity.",
 "input":"&#0137;",
 "output": [["Character", "\u2030"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 LATIN CAPITAL LETTER S WITH CARON numeric entity.",
 "input":"&#0138;",
 "output": [["Character", "\u0160"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 SINGLE LEFT-POINTING ANGLE QUOTATION MARK numeric entity.",
 "input":"&#0139;",
 "output": [["Character", "\u2039"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 LATIN CAPITAL LIGATURE OE numeric entity.",
 "input":"&#0140;",
 "output": [["Character", "\u0152"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
 "input":"&#0141;",
 "output": [["Character", "\u008D"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 LATIN CAPITAL LETTER Z WITH CARON numeric entity.",
 "input":"&#0142;",
 "output": [["Character", "\u017D"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
 "input":"&#0143;",
 "output": [["Character", "\u008F"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
 "input":"&#0144;",
 "output": [["Character", "\u0090"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 LEFT SINGLE QUOTATION MARK numeric entity.",
 "input":"&#0145;",
 "output": [["Character", "\u2018"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 RIGHT SINGLE QUOTATION MARK numeric entity.",
 "input":"&#0146;",
 "output": [["Character", "\u2019"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 LEFT DOUBLE QUOTATION MARK numeric entity.",
 "input":"&#0147;",
 "output": [["Character", "\u201C"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 RIGHT DOUBLE QUOTATION MARK numeric entity.",
 "input":"&#0148;",
 "output": [["Character", "\u201D"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 BULLET numeric entity.",
 "input":"&#0149;",
 "output": [["Character", "\u2022"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 EN DASH numeric entity.",
 "input":"&#0150;",
 "output": [["Character", "\u2013"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 EM DASH numeric entity.",
 "input":"&#0151;",
 "output": [["Character", "\u2014"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 SMALL TILDE numeric entity.",
 "input":"&#0152;",
 "output": [["Character", "\u02DC"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 TRADE MARK SIGN numeric entity.",
 "input":"&#0153;",
 "output": [["Character", "\u2122"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 LATIN SMALL LETTER S WITH CARON numeric entity.",
 "input":"&#0154;",
 "output": [["Character", "\u0161"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 SINGLE RIGHT-POINTING ANGLE QUOTATION MARK numeric entity.",
 "input":"&#0155;",
 "output": [["Character", "\u203A"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 LATIN SMALL LIGATURE OE numeric entity.",
 "input":"&#0156;",
 "output": [["Character", "\u0153"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
 "input":"&#0157;",
 "output": [["Character", "\u009D"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 EURO SIGN hexadecimal numeric entity.",
 "input":"&#x080;",
 "output": [["Character", "\u20AC"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
 "input":"&#x081;",
 "output": [["Character", "\u0081"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 SINGLE LOW-9 QUOTATION MARK hexadecimal numeric entity.",
 "input":"&#x082;",
 "output": [["Character", "\u201A"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 LATIN SMALL LETTER F WITH HOOK hexadecimal numeric entity.",
 "input":"&#x083;",
 "output": [["Character", "\u0192"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 DOUBLE LOW-9 QUOTATION MARK hexadecimal numeric entity.",
 "input":"&#x084;",
 "output": [["Character", "\u201E"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 HORIZONTAL ELLIPSIS hexadecimal numeric entity.",
 "input":"&#x085;",
 "output": [["Character", "\u2026"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 DAGGER hexadecimal numeric entity.",
 "input":"&#x086;",
 "output": [["Character", "\u2020"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 DOUBLE DAGGER hexadecimal numeric entity.",
 "input":"&#x087;",
 "output": [["Character", "\u2021"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 MODIFIER LETTER CIRCUMFLEX ACCENT hexadecimal numeric entity.",
 "input":"&#x088;",
 "output": [["Character", "\u02C6"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 PER MILLE SIGN hexadecimal numeric entity.",
 "input":"&#x089;",
 "output": [["Character", "\u2030"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 LATIN CAPITAL LETTER S WITH CARON hexadecimal numeric entity.",
 "input":"&#x08A;",
 "output": [["Character", "\u0160"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 SINGLE LEFT-POINTING ANGLE QUOTATION MARK hexadecimal numeric entity.",
 "input":"&#x08B;",
 "output": [["Character", "\u2039"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 LATIN CAPITAL LIGATURE OE hexadecimal numeric entity.",
 "input":"&#x08C;",
 "output": [["Character", "\u0152"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
 "input":"&#x08D;",
 "output": [["Character", "\u008D"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 LATIN CAPITAL LETTER Z WITH CARON hexadecimal numeric entity.",
 "input":"&#x08E;",
 "output": [["Character", "\u017D"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
 "input":"&#x08F;",
 "output": [["Character", "\u008F"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
 "input":"&#x090;",
 "output": [["Character", "\u0090"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 LEFT SINGLE QUOTATION MARK hexadecimal numeric entity.",
 "input":"&#x091;",
 "output": [["Character", "\u2018"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 RIGHT SINGLE QUOTATION MARK hexadecimal numeric entity.",
 "input":"&#x092;",
 "output": [["Character", "\u2019"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 LEFT DOUBLE QUOTATION MARK hexadecimal numeric entity.",
 "input":"&#x093;",
 "output": [["Character", "\u201C"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 RIGHT DOUBLE QUOTATION MARK hexadecimal numeric entity.",
 "input":"&#x094;",
 "output": [["Character", "\u201D"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 BULLET hexadecimal numeric entity.",
 "input":"&#x095;",
 "output": [["Character", "\u2022"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 EN DASH hexadecimal numeric entity.",
 "input":"&#x096;",
 "output": [["Character", "\u2013"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 EM DASH hexadecimal numeric entity.",
 "input":"&#x097;",
 "output": [["Character", "\u2014"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 SMALL TILDE hexadecimal numeric entity.",
 "input":"&#x098;",
 "output": [["Character", "\u02DC"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 TRADE MARK SIGN hexadecimal numeric entity.",
 "input":"&#x099;",
 "output": [["Character", "\u2122"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 LATIN SMALL LETTER S WITH CARON hexadecimal numeric entity.",
 "input":"&#x09A;",
 "output": [["Character", "\u0161"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 SINGLE RIGHT-POINTING ANGLE QUOTATION MARK hexadecimal numeric entity.",
 "input":"&#x09B;",
 "output": [["Character", "\u203A"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 LATIN SMALL LIGATURE OE hexadecimal numeric entity.",
 "input":"&#x09C;",
 "output": [["Character", "\u0153"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
 "input":"&#x09D;",
 "output": [["Character", "\u009D"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 LATIN SMALL LETTER Z WITH CARON hexadecimal numeric entity.",
 "input":"&#x09E;",
 "output": [["Character", "\u017E"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Windows-1252 LATIN CAPITAL LETTER Y WITH DIAERESIS hexadecimal numeric entity.",
 "input":"&#x09F;",
 "output": [["Character", "\u0178"]],
 "errors":[
    { "code": "control-character-reference", "line": 1, "col": 8 }
 ]},
 {"description": "Decimal numeric entity followed by hex character a.",
 "input":"&#97a",
 "output": [["Character", "aa"]],
 "errors":[
    { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 5 }
 ]},
 {"description": "Decimal numeric entity followed by hex character A.",
 "input":"&#97A",
 "output": [["Character", "aA"]],
 "errors":[
    { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 5 }
 ]},
 {"description": "Decimal numeric entity followed by hex character f.",
 "input":"&#97f",
 "output": [["Character", "af"]],
 "errors":[
    { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 5 }
 ]},
 {"description": "Decimal numeric entity followed by hex character A.",
 "input":"&#97F",
 "output": [["Character", "aF"]],
 "errors":[
    { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 5 }
 ]}
 ]}
--- a/lib/html5lib/tests/testdata/tokenizer/escapeFlag.test
+++ b/lib/html5lib/tests/testdata/tokenizer/escapeFlag.test
@ -1,36 +0,0 @@
 {"tests": [
 {"description":"Commented close tag in RCDATA or RAWTEXT",
 "initialStates":["RCDATA state", "RAWTEXT state"],
 "lastStartTag":"xmp",
 "input":"foo<!--</xmp>--></xmp>",
 "output":[["Character", "foo<!--"], ["EndTag", "xmp"], ["Character", "-->"], ["EndTag", "xmp"]]},
 {"description":"Bogus comment in RCDATA or RAWTEXT",
 "initialStates":["RCDATA state", "RAWTEXT state"],
 "lastStartTag":"xmp",
 "input":"foo<!-->baz</xmp>",
 "output":[["Character", "foo<!-->baz"], ["EndTag", "xmp"]]},
 {"description":"End tag surrounded by bogus comment in RCDATA or RAWTEXT",
 "initialStates":["RCDATA state", "RAWTEXT state"],
 "lastStartTag":"xmp",
 "input":"foo<!--></xmp><!-->baz</xmp>",
 "output":[["Character", "foo<!-->"], ["EndTag", "xmp"], ["Comment", ""], ["Character", "baz"], ["EndTag", "xmp"]],
 "errors":[
    { "code": "abrupt-closing-of-empty-comment", "line": 1, "col": 19 }
 ]},
 {"description":"Commented entities in RCDATA",
 "initialStates":["RCDATA state"],
 "lastStartTag":"xmp",
 "input":" &amp; <!-- &amp; --> &amp; </xmp>",
 "output":[["Character", " & <!-- & --> & "], ["EndTag", "xmp"]]},
 {"description":"Incorrect comment ending sequences in RCDATA or RAWTEXT",
 "initialStates":["RCDATA state", "RAWTEXT state"],
 "lastStartTag":"xmp",
 "input":"foo<!-- x --x>x-- >x--!>x--<></xmp>",
 "output":[["Character", "foo<!-- x --x>x-- >x--!>x--<>"], ["EndTag", "xmp"]]}
 ]}
--- a/lib/html5lib/tests/testdata/tokenizer/namedEntities.test
+++ b/lib/html5lib/tests/testdata/tokenizer/namedEntities.test
--- a/lib/html5lib/tests/testdata/tokenizer/numericEntities.test
+++ b/lib/html5lib/tests/testdata/tokenizer/numericEntities.test
--- a/lib/html5lib/tests/testdata/tokenizer/pendingSpecChanges.test
+++ b/lib/html5lib/tests/testdata/tokenizer/pendingSpecChanges.test
@ -1,9 +0,0 @@
 {"tests": [
 {"description":"<!---- >",
 "input":"<!---- >",
 "output":[["Comment","-- >"]],
 "errors":[
    { "code": "eof-in-comment", "line": 1, "col": 9 }
 ]}
 ]}
--- a/lib/html5lib/tests/testdata/tokenizer/test1.test
+++ b/lib/html5lib/tests/testdata/tokenizer/test1.test
@ -1,349 +0,0 @@
 {"tests": [
 {"description":"Correct Doctype lowercase",
 "input":"<!DOCTYPE html>",
 "output":[["DOCTYPE", "html", null, null, true]]},
 {"description":"Correct Doctype uppercase",
 "input":"<!DOCTYPE HTML>",
 "output":[["DOCTYPE", "html", null, null, true]]},
 {"description":"Correct Doctype mixed case",
 "input":"<!DOCTYPE HtMl>",
 "output":[["DOCTYPE", "html", null, null, true]]},
 {"description":"Correct Doctype case with EOF",
 "input":"<!DOCTYPE HtMl",
 "output":[["DOCTYPE", "html", null, null, false]],
 "errors":[
    { "code": "eof-in-doctype", "line": 1, "col": 15 }
 ]},
 {"description":"Truncated doctype start",
 "input":"<!DOC>",
 "output":[["Comment", "DOC"]],
 "errors":[
    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 }
 ]},
 {"description":"Doctype in error",
 "input":"<!DOCTYPE foo>",
 "output":[["DOCTYPE", "foo", null, null, true]]},
 {"description":"Single Start Tag",
 "input":"<h>",
 "output":[["StartTag", "h", {}]]},
 {"description":"Empty end tag",
 "input":"</>",
 "output":[],
 "errors":[
    { "code": "missing-end-tag-name", "line": 1, "col": 3 }
 ]},
 {"description":"Empty start tag",
 "input":"<>",
 "output":[["Character", "<>"]],
 "errors":[
    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 2 }
 ]},
 {"description":"Start Tag w/attribute",
 "input":"<h a='b'>",
 "output":[["StartTag", "h", {"a":"b"}]]},
 {"description":"Start Tag w/attribute no quotes",
 "input":"<h a=b>",
 "output":[["StartTag", "h", {"a":"b"}]]},
 {"description":"Start/End Tag",
 "input":"<h></h>",
 "output":[["StartTag", "h", {}], ["EndTag", "h"]]},
 {"description":"Two unclosed start tags",
 "input":"<p>One<p>Two",
 "output":[["StartTag", "p", {}], ["Character", "One"], ["StartTag", "p", {}], ["Character", "Two"]]},
 {"description":"End Tag w/attribute",
 "input":"<h></h a='b'>",
 "output":[["StartTag", "h", {}], ["EndTag", "h"]],
 "errors":[
    { "code": "end-tag-with-attributes", "line": 1, "col": 13 }
 ]},
 {"description":"Multiple atts",
 "input":"<h a='b' c='d'>",
 "output":[["StartTag", "h", {"a":"b", "c":"d"}]]},
 {"description":"Multiple atts no space",
 "input":"<h a='b'c='d'>",
 "output":[["StartTag", "h", {"a":"b", "c":"d"}]],
 "errors":[
    { "code": "missing-whitespace-between-attributes", "line": 1, "col": 9 }
 ]},
 {"description":"Repeated attr",
 "input":"<h a='b' a='d'>",
 "output":[["StartTag", "h", {"a":"b"}]],
 "errors":[
    { "code": "duplicate-attribute", "line": 1, "col": 11 }
 ]},
 {"description":"Simple comment",
 "input":"<!--comment-->",
 "output":[["Comment", "comment"]]},
 {"description":"Comment, Central dash no space",
 "input":"<!----->",
 "output":[["Comment", "-"]]},
 {"description":"Comment, two central dashes",
 "input":"<!-- --comment -->",
 "output":[["Comment", " --comment "]]},
 {"description":"Comment, central less-than bang",
 "input":"<!--<!-->",
 "output":[["Comment", "<!"]]},
 {"description":"Unfinished comment",
 "input":"<!--comment",
 "output":[["Comment", "comment"]],
 "errors":[
    { "code": "eof-in-comment", "line": 1, "col": 12 }
 ]},
 {"description":"Unfinished comment after start of nested comment",
 "input":"<!-- <!--",
 "output":[["Comment", " <!"]],
 "errors":[
    { "code": "eof-in-comment", "line": 1, "col": 10 }
 ]},
 {"description":"Start of a comment",
 "input":"<!-",
 "output":[["Comment", "-"]],
 "errors":[
    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 }
 ]},
 {"description":"Short comment",
 "input":"<!-->",
 "output":[["Comment", ""]],
 "errors":[
    { "code": "abrupt-closing-of-empty-comment", "line": 1, "col": 5 }
 ]},
 {"description":"Short comment two",
 "input":"<!--->",
 "output":[["Comment", ""]],
 "errors":[
    { "code": "abrupt-closing-of-empty-comment", "line": 1, "col": 6 }
 ]},
 {"description":"Short comment three",
 "input":"<!---->",
 "output":[["Comment", ""]]},
 {"description":"< in comment",
 "input":"<!-- <test-->",
 "output":[["Comment", " <test"]]},
 {"description":"<! in comment",
 "input":"<!-- <!test-->",
 "output":[["Comment", " <!test"]]},
 {"description":"<!- in comment",
 "input":"<!-- <!-test-->",
 "output":[["Comment", " <!-test"]]},
 {"description":"Nested comment",
 "input":"<!-- <!--test-->",
 "output":[["Comment", " <!--test"]],
 "errors":[
    { "code": "nested-comment", "line": 1, "col": 10 }
 ]},
 {"description":"Nested comment with extra <",
 "input":"<!-- <<!--test-->",
 "output":[["Comment", " <<!--test"]],
 "errors":[
    { "code": "nested-comment", "line": 1, "col": 11 }
 ]},
 {"description":"< in script data",
 "initialStates":["Script data state"],
 "input":"<test-->",
 "output":[["Character", "<test-->"]]},
 {"description":"<! in script data",
 "initialStates":["Script data state"],
 "input":"<!test-->",
 "output":[["Character", "<!test-->"]]},
 {"description":"<!- in script data",
 "initialStates":["Script data state"],
 "input":"<!-test-->",
 "output":[["Character", "<!-test-->"]]},
 {"description":"Escaped script data",
 "initialStates":["Script data state"],
 "input":"<!--test-->",
 "output":[["Character", "<!--test-->"]]},
 {"description":"< in script HTML comment",
 "initialStates":["Script data state"],
 "input":"<!-- < test -->",
 "output":[["Character", "<!-- < test -->"]]},
 {"description":"</ in script HTML comment",
 "initialStates":["Script data state"],
 "input":"<!-- </ test -->",
 "output":[["Character", "<!-- </ test -->"]]},
 {"description":"Start tag in script HTML comment",
 "initialStates":["Script data state"],
 "input":"<!-- <test> -->",
 "output":[["Character", "<!-- <test> -->"]]},
 {"description":"End tag in script HTML comment",
 "initialStates":["Script data state"],
 "input":"<!-- </test> -->",
 "output":[["Character", "<!-- </test> -->"]]},
 {"description":"- in script HTML comment double escaped",
 "initialStates":["Script data state"],
 "input":"<!--<script>-</script>-->",
 "output":[["Character", "<!--<script>-</script>-->"]]},
 {"description":"-- in script HTML comment double escaped",
 "initialStates":["Script data state"],
 "input":"<!--<script>--</script>-->",
 "output":[["Character", "<!--<script>--</script>-->"]]},
 {"description":"--- in script HTML comment double escaped",
 "initialStates":["Script data state"],
 "input":"<!--<script>---</script>-->",
 "output":[["Character", "<!--<script>---</script>-->"]]},
 {"description":"- spaced in script HTML comment double escaped",
 "initialStates":["Script data state"],
 "input":"<!--<script> - </script>-->",
 "output":[["Character", "<!--<script> - </script>-->"]]},
 {"description":"-- spaced in script HTML comment double escaped",
 "initialStates":["Script data state"],
 "input":"<!--<script> -- </script>-->",
 "output":[["Character", "<!--<script> -- </script>-->"]]},
 {"description":"Ampersand EOF",
 "input":"&",
 "output":[["Character", "&"]]},
 {"description":"Ampersand ampersand EOF",
 "input":"&&",
 "output":[["Character", "&&"]]},
 {"description":"Ampersand space EOF",
 "input":"& ",
 "output":[["Character", "& "]]},
 {"description":"Unfinished entity",
 "input":"&f",
 "output":[["Character", "&f"]]},
 {"description":"Ampersand, number sign",
 "input":"&#",
 "output":[["Character", "&#"]],
 "errors":[
    { "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 3 }
 ]},
 {"description":"Unfinished numeric entity",
 "input":"&#x",
 "output":[["Character", "&#x"]],
 "errors":[
    { "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 4 }
 ]},
 {"description":"Entity with trailing semicolon (1)",
 "input":"I'm &not;it",
 "output":[["Character","I'm \u00ACit"]]},
 {"description":"Entity with trailing semicolon (2)",
 "input":"I'm &notin;",
 "output":[["Character","I'm \u2209"]]},
 {"description":"Entity without trailing semicolon (1)",
 "input":"I'm &notit",
 "output":[["Character","I'm \u00ACit"]],
 "errors": [
    {"code" : "missing-semicolon-after-character-reference", "line": 1, "col": 9 }
 ]},
 {"description":"Entity without trailing semicolon (2)",
 "input":"I'm &notin",
 "output":[["Character","I'm \u00ACin"]],
 "errors": [
    {"code" : "missing-semicolon-after-character-reference", "line": 1, "col": 9 }
 ]},
 {"description":"Partial entity match at end of file",
 "input":"I'm &no",
 "output":[["Character","I'm &no"]]},
 {"description":"Non-ASCII character reference name",
 "input":"&\u00AC;",
 "output":[["Character", "&\u00AC;"]]},
 {"description":"ASCII decimal entity",
 "input":"&#0036;",
 "output":[["Character","$"]]},
 {"description":"ASCII hexadecimal entity",
 "input":"&#x3f;",
 "output":[["Character","?"]]},
 {"description":"Hexadecimal entity in attribute",
 "input":"<h a='&#x3f;'></h>",
 "output":[["StartTag", "h", {"a":"?"}], ["EndTag", "h"]]},
 {"description":"Entity in attribute without semicolon ending in x",
 "input":"<h a='&notx'>",
 "output":[["StartTag", "h", {"a":"&notx"}]]},
 {"description":"Entity in attribute without semicolon ending in 1",
 "input":"<h a='&not1'>",
 "output":[["StartTag", "h", {"a":"&not1"}]]},
 {"description":"Entity in attribute without semicolon ending in i",
 "input":"<h a='&noti'>",
 "output":[["StartTag", "h", {"a":"&noti"}]]},
 {"description":"Entity in attribute without semicolon",
 "input":"<h a='&COPY'>",
 "output":[["StartTag", "h", {"a":"\u00A9"}]],
 "errors": [
    {"code" : "missing-semicolon-after-character-reference", "line": 1, "col": 12 }
 ]},
 {"description":"Unquoted attribute ending in ampersand",
 "input":"<s o=& t>",
 "output":[["StartTag","s",{"o":"&","t":""}]]},
 {"description":"Unquoted attribute at end of tag with final character of &, with tag followed by characters",
 "input":"<a a=a&>foo",
 "output":[["StartTag", "a", {"a":"a&"}], ["Character", "foo"]]},
 {"description":"plaintext element",
 "input":"<plaintext>foobar",
 "output":[["StartTag","plaintext",{}], ["Character","foobar"]]},
 {"description":"Open angled bracket in unquoted attribute value state",
 "input":"<a a=f<>",
 "output":[["StartTag", "a", {"a":"f<"}]],
 "errors":[
    { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 7 }
 ]}
 ]}
--- a/lib/html5lib/tests/testdata/tokenizer/test2.test
+++ b/lib/html5lib/tests/testdata/tokenizer/test2.test
@ -1,275 +0,0 @@
 {"tests": [
 {"description":"DOCTYPE without name",
 "input":"<!DOCTYPE>",
 "output":[["DOCTYPE", null, null, null, false]],
 "errors":[
    { "code": "missing-doctype-name", "line": 1, "col": 10 }
 ]},
 {"description":"DOCTYPE without space before name",
 "input":"<!DOCTYPEhtml>",
 "output":[["DOCTYPE", "html", null, null, true]],
 "errors":[
    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 }
 ]},
 {"description":"Incorrect DOCTYPE without a space before name",
 "input":"<!DOCTYPEfoo>",
 "output":[["DOCTYPE", "foo", null, null, true]],
 "errors":[
    { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 }
 ]},
 {"description":"DOCTYPE with publicId",
 "input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\">",
 "output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", null, true]]},
 {"description":"DOCTYPE with EOF after PUBLIC",
 "input":"<!DOCTYPE html PUBLIC",
 "output":[["DOCTYPE", "html", null, null, false]],
 "errors": [
    { "code": "eof-in-doctype", "col": 22, "line": 1 }
 ]},
 {"description":"DOCTYPE with EOF after PUBLIC '",
 "input":"<!DOCTYPE html PUBLIC '",
 "output":[["DOCTYPE", "html", "", null, false]],
 "errors": [
    { "code": "eof-in-doctype", "col": 24, "line": 1 }
 ]},
 {"description":"DOCTYPE with EOF after PUBLIC 'x",
 "input":"<!DOCTYPE html PUBLIC 'x",
 "output":[["DOCTYPE", "html", "x", null, false]],
 "errors": [
    { "code": "eof-in-doctype", "col": 25, "line": 1 }
 ]},
 {"description":"DOCTYPE with systemId",
 "input":"<!DOCTYPE html SYSTEM \"-//W3C//DTD HTML Transitional 4.01//EN\">",
 "output":[["DOCTYPE", "html", null, "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
 {"description":"DOCTYPE with single-quoted systemId",
 "input":"<!DOCTYPE html SYSTEM '-//W3C//DTD HTML Transitional 4.01//EN'>",
 "output":[["DOCTYPE", "html", null, "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
 {"description":"DOCTYPE with publicId and systemId",
 "input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\" \"-//W3C//DTD HTML Transitional 4.01//EN\">",
 "output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
 {"description":"DOCTYPE with > in double-quoted publicId",
 "input":"<!DOCTYPE html PUBLIC \">x",
 "output":[["DOCTYPE", "html", "", null, false], ["Character", "x"]],
 "errors": [
    { "code": "abrupt-doctype-public-identifier", "col": 24, "line": 1 }
 ]},
 {"description":"DOCTYPE with > in single-quoted publicId",
 "input":"<!DOCTYPE html PUBLIC '>x",
 "output":[["DOCTYPE", "html", "", null, false], ["Character", "x"]],
 "errors": [
    { "code": "abrupt-doctype-public-identifier", "col": 24, "line": 1 }
 ]},
 {"description":"DOCTYPE with > in double-quoted systemId",
 "input":"<!DOCTYPE html PUBLIC \"foo\" \">x",
 "output":[["DOCTYPE", "html", "foo", "", false], ["Character", "x"]],
 "errors": [
    { "code": "abrupt-doctype-system-identifier", "col": 30, "line": 1 }
 ]},
 {"description":"DOCTYPE with > in single-quoted systemId",
 "input":"<!DOCTYPE html PUBLIC 'foo' '>x",
 "output":[["DOCTYPE", "html", "foo", "", false], ["Character", "x"]],
 "errors": [
    { "code": "abrupt-doctype-system-identifier", "col": 30, "line": 1 }
 ]},
 {"description":"Incomplete doctype",
 "input":"<!DOCTYPE html ",
 "output":[["DOCTYPE", "html", null, null, false]],
 "errors":[
    { "code": "eof-in-doctype", "line": 1, "col": 16 }
 ]},
 {"description":"Numeric entity representing the NUL character",
 "input":"&#0000;",
 "output":[["Character", "\uFFFD"]],
 "errors":[
    { "code": "null-character-reference", "line": 1, "col": 8 }
 ]},
 {"description":"Hexadecimal entity representing the NUL character",
 "input":"&#x0000;",
 "output":[["Character", "\uFFFD"]],
 "errors":[
    { "code": "null-character-reference", "line": 1, "col": 9 }
 ]},
 {"description":"Numeric entity representing a codepoint after 1114111 (U+10FFFF)",
 "input":"&#2225222;",
 "output":[["Character", "\uFFFD"]],
 "errors":[
    { "code": "character-reference-outside-unicode-range", "line": 1, "col": 11 }
 ]},
 {"description":"Hexadecimal entity representing a codepoint after 1114111 (U+10FFFF)",
 "input":"&#x1010FFFF;",
 "output":[["Character", "\uFFFD"]],
 "errors":[
    { "code": "character-reference-outside-unicode-range", "line": 1, "col": 13 }
 ]},
 {"description":"Hexadecimal entity pair representing a surrogate pair",
 "input":"&#xD869;&#xDED6;",
 "output":[["Character", "\uFFFD\uFFFD"]],
 "errors":[
    { "code": "surrogate-character-reference", "line": 1, "col": 9 },
    { "code": "surrogate-character-reference", "line": 1, "col": 17 }
 ]},
 {"description":"Hexadecimal entity with mixed uppercase and lowercase",
 "input":"&#xaBcD;",
 "output":[["Character", "\uABCD"]]},
 {"description":"Entity without a name",
 "input":"&;",
 "output":[["Character", "&;"]]},
 {"description":"Unescaped ampersand in attribute value",
 "input":"<h a='&'>",
 "output":[["StartTag", "h", { "a":"&" }]]},
 {"description":"StartTag containing <",
 "input":"<a<b>",
 "output":[["StartTag", "a<b", { }]]},
 {"description":"Non-void element containing trailing /",
 "input":"<h/>",
 "output":[["StartTag","h",{},true]]},
 {"description":"Void element with permitted slash",
 "input":"<br/>",
 "output":[["StartTag","br",{},true]]},
 {"description":"Void element with permitted slash (with attribute)",
 "input":"<br foo='bar'/>",
 "output":[["StartTag","br",{"foo":"bar"},true]]},
 {"description":"StartTag containing /",
 "input":"<h/a='b'>",
 "output":[["StartTag", "h", { "a":"b" }]],
 "errors":[
    { "code": "unexpected-solidus-in-tag", "line": 1, "col": 4 }
 ]},
 {"description":"Double-quoted attribute value",
 "input":"<h a=\"b\">",
 "output":[["StartTag", "h", { "a":"b" }]]},
 {"description":"Unescaped </",
 "input":"</",
 "output":[["Character", "</"]],
 "errors":[
    { "code": "eof-before-tag-name", "line": 1, "col": 3 }
 ]},
 {"description":"Illegal end tag name",
 "input":"</1>",
 "output":[["Comment", "1"]],
 "errors":[
    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 3 }
 ]},
 {"description":"Simili processing instruction",
 "input":"<?namespace>",
 "output":[["Comment", "?namespace"]],
 "errors":[
    { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
 ]},
 {"description":"A bogus comment stops at >, even if preceeded by two dashes",
 "input":"<?foo-->",
 "output":[["Comment", "?foo--"]],
 "errors":[
    { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
 ]},
 {"description":"Unescaped <",
 "input":"foo < bar",
 "output":[["Character", "foo < bar"]],
 "errors":[
    { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 6 }
 ]},
 {"description":"Null Byte Replacement",
 "input":"\u0000",
 "output":[["Character", "\u0000"]],
 "errors":[
    { "code": "unexpected-null-character", "line": 1, "col": 1 }
 ]},
 {"description":"Comment with dash",
 "input":"<!---x",
 "output":[["Comment", "-x"]],
 "errors":[
    { "code": "eof-in-comment", "line": 1, "col": 7 }
 ]},
 {"description":"Entity + newline",
 "input":"\nx\n&gt;\n",
 "output":[["Character","\nx\n>\n"]]},
 {"description":"Start tag with no attributes but space before the greater-than sign",
 "input":"<h >",
 "output":[["StartTag", "h", {}]]},
 {"description":"Empty attribute followed by uppercase attribute",
 "input":"<h a B=''>",
 "output":[["StartTag", "h", {"a":"", "b":""}]]},
 {"description":"Double-quote after attribute name",
 "input":"<h a \">",
 "output":[["StartTag", "h", {"a":"", "\"":""}]],
 "errors":[
    { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 6 }
 ]},
 {"description":"Single-quote after attribute name",
 "input":"<h a '>",
 "output":[["StartTag", "h", {"a":"", "'":""}]],
 "errors":[
    { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 6 }
 ]},
 {"description":"Empty end tag with following characters",
 "input":"a</>bc",
 "output":[["Character", "abc"]],
 "errors":[
    { "code": "missing-end-tag-name", "line": 1, "col": 4 }
 ]},
 {"description":"Empty end tag with following tag",
 "input":"a</><b>c",
 "output":[["Character", "a"], ["StartTag", "b", {}], ["Character", "c"]],
 "errors":[
    { "code": "missing-end-tag-name", "line": 1, "col": 4 }
 ]},
 {"description":"Empty end tag with following comment",
 "input":"a</><!--b-->c",
 "output":[["Character", "a"], ["Comment", "b"], ["Character", "c"]],
 "errors":[
    { "code": "missing-end-tag-name", "line": 1, "col": 4 }
 ]},
 {"description":"Empty end tag with following end tag",
 "input":"a</></b>c",
 "output":[["Character", "a"], ["EndTag", "b"], ["Character", "c"]],
 "errors":[
    { "code": "missing-end-tag-name", "line": 1, "col": 4 }
 ]}
 ]}
--- a/lib/html5lib/tests/testdata/tokenizer/test3.test
+++ b/lib/html5lib/tests/testdata/tokenizer/test3.test
--- a/lib/html5lib/tests/testdata/tokenizer/test4.test
+++ b/lib/html5lib/tests/testdata/tokenizer/test4.test
@ -1,532 +0,0 @@
 {"tests": [
 {"description":"< in attribute name",
 "input":"<z/0  <>",
 "output":[["StartTag", "z", {"0": "", "<": ""}]],
 "errors":[
    { "code": "unexpected-solidus-in-tag", "line": 1, "col": 4 },
    { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 7 }
 ]},
 {"description":"< in unquoted attribute value",
 "input":"<z x=<>",
 "output":[["StartTag", "z", {"x": "<"}]],
 "errors":[
    { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 6 }
 ]},
 {"description":"= in unquoted attribute value",
 "input":"<z z=z=z>",
 "output":[["StartTag", "z", {"z": "z=z"}]],
 "errors":[
    { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 7 }
 ]},
 {"description":"= attribute",
 "input":"<z =>",
 "output":[["StartTag", "z", {"=": ""}]],
 "errors":[
    { "code": "unexpected-equals-sign-before-attribute-name", "line": 1, "col": 4 }
 ]},
 {"description":"== attribute",
 "input":"<z ==>",
 "output":[["StartTag", "z", {"=": ""}]],
 "errors":[
    { "code": "unexpected-equals-sign-before-attribute-name", "line": 1, "col": 4 },
    { "code": "missing-attribute-value", "line": 1, "col": 6 }
 ]},
 {"description":"=== attribute",
 "input":"<z ===>",
 "output":[["StartTag", "z", {"=": "="}]],
 "errors":[
    { "code": "unexpected-equals-sign-before-attribute-name", "line": 1, "col": 4 },
    { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 6 }
 ]},
 {"description":"==== attribute",
 "input":"<z ====>",
 "output":[["StartTag", "z", {"=": "=="}]],
 "errors":[
    { "code": "unexpected-equals-sign-before-attribute-name", "line": 1, "col": 4 },
    { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 6 },
    { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 7 }
 ]},
 {"description":"\" after ampersand in double-quoted attribute value",
 "input":"<z z=\"&\">",
 "output":[["StartTag", "z", {"z": "&"}]]},
 {"description":"' after ampersand in double-quoted attribute value",
 "input":"<z z=\"&'\">",
 "output":[["StartTag", "z", {"z": "&'"}]]},
 {"description":"' after ampersand in single-quoted attribute value",
 "input":"<z z='&'>",
 "output":[["StartTag", "z", {"z": "&"}]]},
 {"description":"\" after ampersand in single-quoted attribute value",
 "input":"<z z='&\"'>",
 "output":[["StartTag", "z", {"z": "&\""}]]},
 {"description":"Text after bogus character reference",
 "input":"<z z='&xlink_xmlns;'>bar<z>",
 "output":[["StartTag","z",{"z":"&xlink_xmlns;"}],["Character","bar"],["StartTag","z",{}]]},
 {"description":"Text after hex character reference",
 "input":"<z z='&#x0020; foo'>bar<z>",
 "output":[["StartTag","z",{"z":"  foo"}],["Character","bar"],["StartTag","z",{}]]},
 {"description":"Attribute name starting with \"",
 "input":"<foo \"='bar'>",
 "output":[["StartTag", "foo", {"\"": "bar"}]],
 "errors":[
    { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 6 }
 ]},
 {"description":"Attribute name starting with '",
 "input":"<foo '='bar'>",
 "output":[["StartTag", "foo", {"'": "bar"}]],
 "errors":[
    { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 6 }
 ]},
 {"description":"Attribute name containing \"",
 "input":"<foo a\"b='bar'>",
 "output":[["StartTag", "foo", {"a\"b": "bar"}]],
 "errors":[
    { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 7 }
 ]},
 {"description":"Attribute name containing '",
 "input":"<foo a'b='bar'>",
 "output":[["StartTag", "foo", {"a'b": "bar"}]],
 "errors":[
    { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 7 }
 ]},
 {"description":"Unquoted attribute value containing '",
 "input":"<foo a=b'c>",
 "output":[["StartTag", "foo", {"a": "b'c"}]],
 "errors":[
    { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 9 }
 ]},
 {"description":"Unquoted attribute value containing \"",
 "input":"<foo a=b\"c>",
 "output":[["StartTag", "foo", {"a": "b\"c"}]],
 "errors":[
    { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 9 }
 ]},
 {"description":"Double-quoted attribute value not followed by whitespace",
 "input":"<foo a=\"b\"c>",
 "output":[["StartTag", "foo", {"a": "b", "c": ""}]],
 "errors":[
    { "code": "missing-whitespace-between-attributes", "line": 1, "col": 11 }
 ]},
 {"description":"Single-quoted attribute value not followed by whitespace",
 "input":"<foo a='b'c>",
 "output":[["StartTag", "foo", {"a": "b", "c": ""}]],
 "errors":[
    { "code": "missing-whitespace-between-attributes", "line": 1, "col": 11 }
 ]},
 {"description":"Quoted attribute followed by permitted /",
 "input":"<br a='b'/>",
 "output":[["StartTag","br",{"a":"b"},true]]},
 {"description":"Quoted attribute followed by non-permitted /",
 "input":"<bar a='b'/>",
 "output":[["StartTag","bar",{"a":"b"},true]]},
 {"description":"CR EOF after doctype name",
 "input":"<!doctype html \r",
 "output":[["DOCTYPE", "html", null, null, false]],
 "errors":[
    { "code": "eof-in-doctype", "line": 2, "col": 1 }
 ]},
 {"description":"CR EOF in tag name",
 "input":"<z\r",
 "output":[],
 "errors":[
    { "code": "eof-in-tag", "line": 2, "col": 1 }
 ]},
 {"description":"Slash EOF in tag name",
 "input":"<z/",
 "output":[],
 "errors":[
    { "code": "eof-in-tag", "line": 1, "col": 4 }
 ]},
 {"description":"Zero hex numeric entity",
 "input":"&#x0",
 "output":[["Character", "\uFFFD"]],
 "errors":[
    { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 5 },
    { "code": "null-character-reference", "line": 1, "col": 5 }
 ]},
 {"description":"Zero decimal numeric entity",
 "input":"&#0",
 "output":[["Character", "\uFFFD"]],
 "errors":[
    { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 4 },
    { "code": "null-character-reference", "line": 1, "col": 4 }
 ]},
 {"description":"Zero-prefixed hex numeric entity",
 "input":"&#x000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000041;",
 "output":[["Character", "A"]]},
 {"description":"Zero-prefixed decimal numeric entity",
 "input":"&#000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000065;",
 "output":[["Character", "A"]]},
 {"description":"Empty hex numeric entities",
 "input":"&#x &#X ",
 "output":[["Character", "&#x &#X "]],
 "errors":[
    { "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 4 },
    { "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 8 }
 ]},
 {"description":"Invalid digit in hex numeric entity",
 "input":"&#xZ",
 "output":[["Character", "&#xZ"]],
 "errors":[
    { "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 4 }
 ]},
 {"description":"Empty decimal numeric entities",
 "input":"&# &#; ",
 "output":[["Character", "&# &#; "]],
 "errors":[
    { "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 3 },
    { "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 6 }
 ]},
 {"description":"Invalid digit in decimal numeric entity",
 "input":"&#A",
 "output":[["Character", "&#A"]],
 "errors":[
    { "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 3 }
 ]},
 {"description":"Non-BMP numeric entity",
 "input":"&#x10000;",
 "output":[["Character", "\uD800\uDC00"]]},
 {"description":"Maximum non-BMP numeric entity",
 "input":"&#X10FFFF;",
 "output":[["Character", "\uDBFF\uDFFF"]],
 "errors":[
    { "code": "noncharacter-character-reference", "line": 1, "col": 11 }
 ]},
 {"description":"Above maximum numeric entity",
 "input":"&#x110000;",
 "output":[["Character", "\uFFFD"]],
 "errors":[
    { "code": "character-reference-outside-unicode-range", "line": 1, "col": 11 }
 ]},
 {"description":"32-bit hex numeric entity",
 "input":"&#x80000041;",
 "output":[["Character", "\uFFFD"]],
 "errors":[
    { "code": "character-reference-outside-unicode-range", "line": 1, "col": 13 }
 ]},
 {"description":"33-bit hex numeric entity",
 "input":"&#x100000041;",
 "output":[["Character", "\uFFFD"]],
 "errors":[
    { "code": "character-reference-outside-unicode-range", "line": 1, "col": 14 }
 ]},
 {"description":"33-bit decimal numeric entity",
 "input":"&#4294967361;",
 "output":[["Character", "\uFFFD"]],
 "errors":[
    { "code": "character-reference-outside-unicode-range", "line": 1, "col": 14 }
 ]},
 {"description":"65-bit hex numeric entity",
 "input":"&#x10000000000000041;",
 "output":[["Character", "\uFFFD"]],
 "errors":[
    { "code": "character-reference-outside-unicode-range", "line": 1, "col": 22 }
 ]},
 {"description":"65-bit decimal numeric entity",
 "input":"&#18446744073709551681;",
 "output":[["Character", "\uFFFD"]],
 "errors":[
    { "code": "character-reference-outside-unicode-range", "line": 1, "col": 24 }
 ]},
 {"description":"Surrogate code point edge cases",
 "input":"&#xD7FF;&#xD800;&#xD801;&#xDFFE;&#xDFFF;&#xE000;",
 "output":[["Character", "\uD7FF\uFFFD\uFFFD\uFFFD\uFFFD\uE000"]],
 "errors":[
    { "code": "surrogate-character-reference", "line": 1, "col": 17 },
    { "code": "surrogate-character-reference", "line": 1, "col": 25 },
    { "code": "surrogate-character-reference", "line": 1, "col": 33 },
    { "code": "surrogate-character-reference", "line": 1, "col": 41 }
 ]},
 {"description":"Uppercase start tag name",
 "input":"<X>",
 "output":[["StartTag", "x", {}]]},
 {"description":"Uppercase end tag name",
 "input":"</X>",
 "output":[["EndTag", "x"]]},
 {"description":"Uppercase attribute name",
 "input":"<x X>",
 "output":[["StartTag", "x", { "x":"" }]]},
 {"description":"Tag/attribute name case edge values",
 "input":"<x@AZ[`az{ @AZ[`az{>",
 "output":[["StartTag", "x@az[`az{", { "@az[`az{":"" }]]},
 {"description":"Duplicate different-case attributes",
 "input":"<x x=1 x=2 X=3>",
 "output":[["StartTag", "x", { "x":"1" }]],
 "errors":[
    { "code": "duplicate-attribute", "line": 1, "col": 9 },
    { "code": "duplicate-attribute", "line": 1, "col": 13 }
 ]},
 {"description":"Uppercase close tag attributes",
 "input":"</x X>",
 "output":[["EndTag", "x"]],
 "errors":[
    { "code": "end-tag-with-attributes", "line": 1, "col": 6 }
 ]},
 {"description":"Duplicate close tag attributes",
 "input":"</x x x>",
 "output":[["EndTag", "x"]],
 "errors":[
    { "code": "duplicate-attribute", "line": 1, "col": 8 },
    { "code": "end-tag-with-attributes", "line": 1, "col": 8 }
 ]},
 {"description":"Permitted slash",
 "input":"<br/>",
 "output":[["StartTag","br",{},true]]},
 {"description":"Non-permitted slash",
 "input":"<xr/>",
 "output":[["StartTag","xr",{},true]]},
 {"description":"Permitted slash but in close tag",
 "input":"</br/>",
 "output":[["EndTag", "br"]],
 "errors":[
    { "code": "end-tag-with-trailing-solidus", "line": 1, "col": 6 }
 ]},
 {"description":"Doctype public case-sensitivity (1)",
 "input":"<!DoCtYpE HtMl PuBlIc \"AbC\" \"XyZ\">",
 "output":[["DOCTYPE", "html", "AbC", "XyZ", true]]},
 {"description":"Doctype public case-sensitivity (2)",
 "input":"<!dOcTyPe hTmL pUbLiC \"aBc\" \"xYz\">",
 "output":[["DOCTYPE", "html", "aBc", "xYz", true]]},
 {"description":"Doctype system case-sensitivity (1)",
 "input":"<!DoCtYpE HtMl SyStEm \"XyZ\">",
 "output":[["DOCTYPE", "html", null, "XyZ", true]]},
 {"description":"Doctype system case-sensitivity (2)",
 "input":"<!dOcTyPe hTmL sYsTeM \"xYz\">",
 "output":[["DOCTYPE", "html", null, "xYz", true]]},
 {"description":"U+0000 in lookahead region after non-matching character",
 "input":"<!doc>\u0000",
 "output":[["Comment", "doc"], ["Character", "\u0000"]],
 "errors":[
    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 },
    { "code": "unexpected-null-character", "line": 1, "col": 7 }
 ]},
 {"description":"U+0000 in lookahead region",
 "input":"<!doc\u0000",
 "output":[["Comment", "doc\uFFFD"]],
 "errors":[
    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 },
    { "code": "unexpected-null-character", "line": 1, "col": 6 }
 ]},
 {"description":"U+0080 in lookahead region",
 "input":"<!doc\u0080",
 "output":[["Comment", "doc\u0080"]],
 "errors":[
    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 },
    { "code": "control-character-in-input-stream", "line": 1, "col": 6 }
 ]},
 {"description":"U+FDD1 in lookahead region",
 "input":"<!doc\uFDD1",
 "output":[["Comment", "doc\uFDD1"]],
 "errors":[
    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 },
    { "code": "noncharacter-in-input-stream", "line": 1, "col": 6 }
 ]},
 {"description":"U+1FFFF in lookahead region",
 "input":"<!doc\uD83F\uDFFF",
 "output":[["Comment", "doc\uD83F\uDFFF"]],
 "errors":[
    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 },
    { "code": "noncharacter-in-input-stream", "line": 1, "col": 6 }
 ]},
 {"description":"CR followed by non-LF",
 "input":"\r?",
 "output":[["Character", "\n?"]]},
 {"description":"CR at EOF",
 "input":"\r",
 "output":[["Character", "\n"]]},
 {"description":"LF at EOF",
 "input":"\n",
 "output":[["Character", "\n"]]},
 {"description":"CR LF",
 "input":"\r\n",
 "output":[["Character", "\n"]]},
 {"description":"CR CR",
 "input":"\r\r",
 "output":[["Character", "\n\n"]]},
 {"description":"LF LF",
 "input":"\n\n",
 "output":[["Character", "\n\n"]]},
 {"description":"LF CR",
 "input":"\n\r",
 "output":[["Character", "\n\n"]]},
 {"description":"text CR CR CR text",
 "input":"text\r\r\rtext",
 "output":[["Character", "text\n\n\ntext"]]},
 {"description":"Doctype publik",
 "input":"<!DOCTYPE html PUBLIK \"AbC\" \"XyZ\">",
 "output":[["DOCTYPE", "html", null, null, false]],
 "errors":[
    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 16 }
 ]},
 {"description":"Doctype publi",
 "input":"<!DOCTYPE html PUBLI",
 "output":[["DOCTYPE", "html", null, null, false]],
 "errors":[
    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 16 }
 ]},
 {"description":"Doctype sistem",
 "input":"<!DOCTYPE html SISTEM \"AbC\">",
 "output":[["DOCTYPE", "html", null, null, false]],
 "errors":[
    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 16 }
 ]},
 {"description":"Doctype sys",
 "input":"<!DOCTYPE html SYS",
 "output":[["DOCTYPE", "html", null, null, false]],
 "errors":[
    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 16 }
 ]},
 {"description":"Doctype html x>text",
 "input":"<!DOCTYPE html x>text",
 "output":[["DOCTYPE", "html", null, null, false], ["Character", "text"]],
 "errors":[
    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 16 }
 ]},
 {"description":"Grave accent in unquoted attribute",
 "input":"<a a=aa`>",
 "output":[["StartTag", "a", {"a":"aa`"}]],
 "errors":[
    { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 8 }
 ]},
 {"description":"EOF in tag name state ",
 "input":"<a",
 "output":[],
 "errors": [
    { "code": "eof-in-tag", "line": 1, "col": 3 }
 ]},
 {"description":"EOF in before attribute name state",
 "input":"<a ",
 "output":[],
 "errors":[
    { "code": "eof-in-tag", "line": 1, "col": 4 }
 ]},
 {"description":"EOF in attribute name state",
 "input":"<a a",
 "output":[],
 "errors":[
    { "code": "eof-in-tag", "line": 1, "col": 5 }
 ]},
 {"description":"EOF in after attribute name state",
 "input":"<a a ",
 "output":[],
 "errors":[
    { "code": "eof-in-tag", "line": 1, "col": 6 }
 ]},
 {"description":"EOF in before attribute value state",
 "input":"<a a =",
 "output":[],
 "errors":[
    { "code": "eof-in-tag", "line": 1, "col": 7 }
 ]},
 {"description":"EOF in attribute value (double quoted) state",
 "input":"<a a =\"a",
 "output":[],
 "errors":[
    { "code": "eof-in-tag", "line": 1, "col": 9 }
 ]},
 {"description":"EOF in attribute value (single quoted) state",
 "input":"<a a ='a",
 "output":[],
 "errors":[
    { "code": "eof-in-tag", "line": 1, "col": 9 }
 ]},
 {"description":"EOF in attribute value (unquoted) state",
 "input":"<a a =a",
 "output":[],
 "errors":[
    { "code": "eof-in-tag", "line": 1, "col": 8 }
 ]},
 {"description":"EOF in after attribute value state",
 "input":"<a a ='a'",
 "output":[],
 "errors":[
    { "code": "eof-in-tag", "line": 1, "col": 10 }
 ]}
 ]}
--- a/lib/html5lib/tests/testdata/tokenizer/unicodeChars.test
+++ b/lib/html5lib/tests/testdata/tokenizer/unicodeChars.test
--- a/lib/html5lib/tests/testdata/tokenizer/unicodeCharsProblematic.test
+++ b/lib/html5lib/tests/testdata/tokenizer/unicodeCharsProblematic.test
@ -1,41 +0,0 @@
 {"tests" : [
 {"description": "Invalid Unicode character U+DFFF",
 "doubleEscaped":true,
 "input": "\\uDFFF",
 "output":[["Character", "\\uDFFF"]],
 "errors":[
    { "code": "surrogate-in-input-stream", "line": 1, "col": 1 }
 ]},
 {"description": "Invalid Unicode character U+D800",
 "doubleEscaped":true,
 "input": "\\uD800",
 "output":[["Character", "\\uD800"]],
 "errors":[
    { "code": "surrogate-in-input-stream", "line": 1, "col": 1 }
 ]},
 {"description": "Invalid Unicode character U+DFFF with valid preceding character",
 "doubleEscaped":true,
 "input": "a\\uDFFF",
 "output":[["Character", "a\\uDFFF"]],
 "errors":[
    { "code": "surrogate-in-input-stream", "line": 1, "col": 2 }
 ]},
 {"description": "Invalid Unicode character U+D800 with valid following character",
 "doubleEscaped":true,
 "input": "\\uD800a",
 "output":[["Character", "\\uD800a"]],
 "errors":[
    { "code": "surrogate-in-input-stream", "line": 1, "col": 1 }
 ]},
 {"description":"CR followed by U+0000",
 "input":"\r\u0000",
 "output":[["Character", "\n\u0000"]],
 "errors":[
    { "code": "unexpected-null-character", "line": 2, "col": 1 }
 ]}
 ]
 }
--- a/lib/html5lib/tests/testdata/tokenizer/xmlViolation.test
+++ b/lib/html5lib/tests/testdata/tokenizer/xmlViolation.test
@ -1,20 +0,0 @@
 {"xmlViolationTests": [
 {"description":"Non-XML character",
 "input":"a\uFFFFb",
 "output":[["Character","a\uFFFDb"]]},
 {"description":"Non-XML space",
 "input":"a\u000Cb",
 "output":[["Character","a b"]]},
 {"description":"Double hyphen in comment",
 "input":"<!-- foo -- bar -->",
 "output":[["Comment"," foo - - bar "]]},
 {"description":"FF between attributes",
 "input":"<a b=''\u000Cc=''>",
 "output":[["StartTag","a",{"b":"","c":""}]]}
 ]}
--- a/lib/html5lib/tests/testdata/tree-construction/README.md
+++ b/lib/html5lib/tests/testdata/tree-construction/README.md
@ -1,108 +0,0 @@
 Tree Construction Tests
 =======================
 Each file containing tree construction tests consists of any number of
 tests separated by two newlines (LF) and a single newline before the end
 of the file. For instance:
    [TEST]LF
    LF
    [TEST]LF
    LF
    [TEST]LF
 Where [TEST] is the following format:
 Each test must begin with a string "\#data" followed by a newline (LF).
 All subsequent lines until a line that says "\#errors" are the test data
 and must be passed to the system being tested unchanged, except with the
 final newline (on the last line) removed.
 Then there must be a line that says "\#errors". It must be followed by
 one line per parse error that a conformant checker would return. It
 doesn't matter what those lines are, although they can't be
 "\#new-errors", "\#document-fragment", "\#document", "\#script-off",
 "\#script-on", or empty, the only thing that matters is that there be
 the right number of parse errors.
 Then there \*may\* be a line that says "\#new-errors", which works like
 the "\#errors" section adding more errors to the expected number of
 errors.
 Then there \*may\* be a line that says "\#document-fragment", which must
 be followed by a newline (LF), followed by a string of characters that
 indicates the context element, followed by a newline (LF). If the string 
 of characters starts with "svg ", the context element is in the SVG
 namespace and the substring after "svg " is the local name. If the
 string of characters starts with "math ", the context element is in the
 MathML namespace and the substring after "math " is the local name.
 Otherwise, the context element is in the HTML namespace and the string
 is the local name. If this line is present the "\#data" must be parsed
 using the HTML fragment parsing algorithm with the context element as
 context.
 Then there \*may\* be a line that says "\#script-off" or
 "\#script-on". If a line that says "\#script-off" is present, the
 parser must set the scripting flag to disabled. If a line that says
 "\#script-on" is present, it must set it to enabled. Otherwise, the
 test should be run in both modes.
 Then there must be a line that says "\#document", which must be followed
 by a dump of the tree of the parsed DOM. Each node must be represented
 by a single line. Each line must start with "| ", followed by two spaces
 per parent node that the node has before the root document node.
 -   Element nodes must be represented by a "`<`" then the *tag name
    string* "`>`", and all the attributes must be given, sorted
    lexicographically by UTF-16 code unit according to their *attribute
    name string*, on subsequent lines, as if they were children of the
    element node.
 -   Attribute nodes must have the *attribute name string*, then an "="
    sign, then the attribute value in double quotes (").
 -   Text nodes must be the string, in double quotes. Newlines aren't
    escaped.
 -   Comments must be "`<`" then "`!-- `" then the data then "` -->`".
 -   DOCTYPEs must be "`<!DOCTYPE `" then the name then if either of the
    system id or public id is non-empty a space, public id in
    double-quotes, another space an the system id in double-quotes, and
    then in any case "`>`".
 -   Processing instructions must be "`<?`", then the target, then a
    space, then the data and then "`>`". (The HTML parser cannot emit
    processing instructions, but scripts can, and the WebVTT to DOM
    rules can emit them.)
 -   Template contents are represented by the string "content" with the
    children below it.
 The *tag name string* is the local name prefixed by a namespace
 designator. For the HTML namespace, the namespace designator is the
 empty string, i.e. there's no prefix. For the SVG namespace, the
 namespace designator is "svg ". For the MathML namespace, the namespace
 designator is "math ".
 The *attribute name string* is the local name prefixed by a namespace
 designator. For no namespace, the namespace designator is the empty
 string, i.e. there's no prefix. For the XLink namespace, the namespace
 designator is "xlink ". For the XML namespace, the namespace designator
 is "xml ". For the XMLNS namespace, the namespace designator is "xmlns
 ". Note the difference between "xlink:href" which is an attribute in no
 namespace with the local name "xlink:href" and "xlink href" which is an
 attribute in the xlink namespace with the local name "href".
 If there is also a "\#document-fragment" the bit following "\#document"
 must be a representation of the HTML fragment serialization for the
 context element given by "\#document-fragment".
 For example:
    #data
    <p>One<p>Two
    #errors
    3: Missing document type declaration
    #document
    | <html>
    |   <head>
    |   <body>
    |     <p>
    |       "One"
    |     <p>
    |       "Two"
--- a/lib/html5lib/tests/testdata/tree-construction/adoption01.dat
+++ b/lib/html5lib/tests/testdata/tree-construction/adoption01.dat
@ -1,354 +0,0 @@
 #data
 <a><p></a></p>
 #errors
 (1,3): expected-doctype-but-got-start-tag
 (1,10): adoption-agency-1.3
 #document
 | <html>
 |   <head>
 |   <body>
 |     <a>
 |     <p>
 |       <a>
 #data
 <a>1<p>2</a>3</p>
 #errors
 (1,3): expected-doctype-but-got-start-tag
 (1,12): adoption-agency-1.3
 #document
 | <html>
 |   <head>
 |   <body>
 |     <a>
 |       "1"
 |     <p>
 |       <a>
 |         "2"
 |       "3"
 #data
 <a>1<button>2</a>3</button>
 #errors
 (1,3): expected-doctype-but-got-start-tag
 (1,17): adoption-agency-1.3
 #document
 | <html>
 |   <head>
 |   <body>
 |     <a>
 |       "1"
 |     <button>
 |       <a>
 |         "2"
 |       "3"
 #data
 <a>1<b>2</a>3</b>
 #errors
 (1,3): expected-doctype-but-got-start-tag
 (1,12): adoption-agency-1.3
 #document
 | <html>
 |   <head>
 |   <body>
 |     <a>
 |       "1"
 |       <b>
 |         "2"
 |     <b>
 |       "3"
 #data
 <a>1<div>2<div>3</a>4</div>5</div>
 #errors
 (1,3): expected-doctype-but-got-start-tag
 (1,20): adoption-agency-1.3
 (1,20): adoption-agency-1.3
 #document
 | <html>
 |   <head>
 |   <body>
 |     <a>
 |       "1"
 |     <div>
 |       <a>
 |         "2"
 |       <div>
 |         <a>
 |           "3"
 |         "4"
 |       "5"
 #data
 <table><a>1<p>2</a>3</p>
 #errors
 (1,7): expected-doctype-but-got-start-tag
 (1,10): unexpected-start-tag-implies-table-voodoo
 (1,11): unexpected-character-implies-table-voodoo
 (1,14): unexpected-start-tag-implies-table-voodoo
 (1,15): unexpected-character-implies-table-voodoo
 (1,19): unexpected-end-tag-implies-table-voodoo
 (1,19): adoption-agency-1.3
 (1,20): unexpected-character-implies-table-voodoo
 (1,24): unexpected-end-tag-implies-table-voodoo
 (1,24): eof-in-table
 #document
 | <html>
 |   <head>
 |   <body>
 |     <a>
 |       "1"
 |     <p>
 |       <a>
 |         "2"
 |       "3"
 |     <table>
 #data
 <b><b><a><p></a>
 #errors
 (1,3): expected-doctype-but-got-start-tag
 (1,16): adoption-agency-1.3
 (1,16): expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     <b>
 |       <b>
 |         <a>
 |         <p>
 |           <a>
 #data
 <b><a><b><p></a>
 #errors
 (1,3): expected-doctype-but-got-start-tag
 (1,16): adoption-agency-1.3
 (1,16): expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     <b>
 |       <a>
 |         <b>
 |       <b>
 |         <p>
 |           <a>
 #data
 <a><b><b><p></a>
 #errors
 (1,3): expected-doctype-but-got-start-tag
 (1,16): adoption-agency-1.3
 (1,16): expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     <a>
 |       <b>
 |         <b>
 |     <b>
 |       <b>
 |         <p>
 |           <a>
 #data
 <p>1<s id="A">2<b id="B">3</p>4</s>5</b>
 #errors
 (1,3): expected-doctype-but-got-start-tag
 (1,30): unexpected-end-tag
 (1,35): adoption-agency-1.3
 #document
 | <html>
 |   <head>
 |   <body>
 |     <p>
 |       "1"
 |       <s>
 |         id="A"
 |         "2"
 |         <b>
 |           id="B"
 |           "3"
 |     <s>
 |       id="A"
 |       <b>
 |         id="B"
 |         "4"
 |     <b>
 |       id="B"
 |       "5"
 #data
 <table><a>1<td>2</td>3</table>
 #errors
 (1,7): expected-doctype-but-got-start-tag
 (1,10): unexpected-start-tag-implies-table-voodoo
 (1,11): unexpected-character-implies-table-voodoo
 (1,15): unexpected-cell-in-table-body
 (1,30): unexpected-implied-end-tag-in-table-view
 #document
 | <html>
 |   <head>
 |   <body>
 |     <a>
 |       "1"
 |     <a>
 |       "3"
 |     <table>
 |       <tbody>
 |         <tr>
 |           <td>
 |             "2"
 #data
 <table>A<td>B</td>C</table>
 #errors
 (1,7): expected-doctype-but-got-start-tag
 (1,8): unexpected-character-implies-table-voodoo
 (1,12): unexpected-cell-in-table-body
 (1,22): unexpected-character-implies-table-voodoo
 #document
 | <html>
 |   <head>
 |   <body>
 |     "AC"
 |     <table>
 |       <tbody>
 |         <tr>
 |           <td>
 |             "B"
 #data
 <a><svg><tr><input></a>
 #errors
 (1,3): expected-doctype-but-got-start-tag
 (1,23): unexpected-end-tag
 (1,23): adoption-agency-1.3
 #document
 | <html>
 |   <head>
 |   <body>
 |     <a>
 |       <svg svg>
 |         <svg tr>
 |           <svg input>
 #data
 <div><a><b><div><div><div><div><div><div><div><div><div><div></a>
 #errors
 (1,5): expected-doctype-but-got-start-tag
 (1,65): adoption-agency-1.3
 (1,65): adoption-agency-1.3
 (1,65): adoption-agency-1.3
 (1,65): adoption-agency-1.3
 (1,65): adoption-agency-1.3
 (1,65): adoption-agency-1.3
 (1,65): adoption-agency-1.3
 (1,65): adoption-agency-1.3
 (1,65): expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     <div>
 |       <a>
 |         <b>
 |       <b>
 |         <div>
 |           <a>
 |           <div>
 |             <a>
 |             <div>
 |               <a>
 |               <div>
 |                 <a>
 |                 <div>
 |                   <a>
 |                   <div>
 |                     <a>
 |                     <div>
 |                       <a>
 |                       <div>
 |                         <a>
 |                           <div>
 |                             <div>
 #data
 <div><a><b><u><i><code><div></a>
 #errors
 (1,5): expected-doctype-but-got-start-tag
 (1,32): adoption-agency-1.3
 (1,32): expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     <div>
 |       <a>
 |         <b>
 |           <u>
 |             <i>
 |               <code>
 |       <u>
 |         <i>
 |           <code>
 |             <div>
 |               <a>
 #data
 <b><b><b><b>x</b></b></b></b>y
 #errors
 (1,3): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <b>
 |       <b>
 |         <b>
 |           <b>
 |             "x"
 |     "y"
 #data
 <p><b><b><b><b><p>x
 #errors
 (1,3): expected-doctype-but-got-start-tag
 (1,18): unexpected-end-tag
 (1,19): expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     <p>
 |       <b>
 |         <b>
 |           <b>
 |             <b>
 |     <p>
 |       <b>
 |         <b>
 |           <b>
 |             "x"
 #data
 <b><em><foo><foob><fooc><aside></b></em>
 #errors
 (1,35): adoption-agency-1.3
 (1,40): adoption-agency-1.3
 (1,40): expected-closing-tag-but-got-eof
 #document-fragment
 div
 #document
 | <b>
 |   <em>
 |     <foo>
 |       <foob>
 |         <fooc>
 | <aside>
 |   <b>
--- a/lib/html5lib/tests/testdata/tree-construction/adoption02.dat
+++ b/lib/html5lib/tests/testdata/tree-construction/adoption02.dat
@ -1,39 +0,0 @@
 #data
 <b>1<i>2<p>3</b>4
 #errors
 (1,3): expected-doctype-but-got-start-tag
 (1,16): adoption-agency-1.3
 (1,17): expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     <b>
 |       "1"
 |       <i>
 |         "2"
 |     <i>
 |       <p>
 |         <b>
 |           "3"
 |         "4"
 #data
 <a><div><style></style><address><a>
 #errors
 (1,3): expected-doctype-but-got-start-tag
 (1,35): unexpected-start-tag-implies-end-tag
 (1,35): adoption-agency-1.3
 (1,35): adoption-agency-1.3
 (1,35): expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     <a>
 |     <div>
 |       <a>
 |         <style>
 |       <address>
 |         <a>
 |         <a>
--- a/lib/html5lib/tests/testdata/tree-construction/blocks.dat
+++ b/lib/html5lib/tests/testdata/tree-construction/blocks.dat
@ -1,719 +0,0 @@
 #data
 <!doctype html><p>foo<address>bar<p>baz
 #errors
 (1,39): expected-closing-tag-but-got-eof
 30: Unclosed element “address”.
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <p>
 |       "foo"
 |     <address>
 |       "bar"
 |       <p>
 |         "baz"
 #data
 <!doctype html><address><p>foo</address>bar
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <address>
 |       <p>
 |         "foo"
 |     "bar"
 #data
 <!doctype html><p>foo<article>bar<p>baz
 #errors
 (1,39): expected-closing-tag-but-got-eof
 30: Unclosed element “article”.
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <p>
 |       "foo"
 |     <article>
 |       "bar"
 |       <p>
 |         "baz"
 #data
 <!doctype html><article><p>foo</article>bar
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <article>
 |       <p>
 |         "foo"
 |     "bar"
 #data
 <!doctype html><p>foo<aside>bar<p>baz
 #errors
 (1,37): expected-closing-tag-but-got-eof
 28: Unclosed element “aside”.
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <p>
 |       "foo"
 |     <aside>
 |       "bar"
 |       <p>
 |         "baz"
 #data
 <!doctype html><aside><p>foo</aside>bar
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <aside>
 |       <p>
 |         "foo"
 |     "bar"
 #data
 <!doctype html><p>foo<blockquote>bar<p>baz
 #errors
 (1,42): expected-closing-tag-but-got-eof
 33: Unclosed element “blockquote”.
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <p>
 |       "foo"
 |     <blockquote>
 |       "bar"
 |       <p>
 |         "baz"
 #data
 <!doctype html><blockquote><p>foo</blockquote>bar
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <blockquote>
 |       <p>
 |         "foo"
 |     "bar"
 #data
 <!doctype html><p>foo<center>bar<p>baz
 #errors
 (1,38): expected-closing-tag-but-got-eof
 29: Unclosed element “center”.
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <p>
 |       "foo"
 |     <center>
 |       "bar"
 |       <p>
 |         "baz"
 #data
 <!doctype html><center><p>foo</center>bar
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <center>
 |       <p>
 |         "foo"
 |     "bar"
 #data
 <!doctype html><p>foo<details>bar<p>baz
 #errors
 (1,39): expected-closing-tag-but-got-eof
 30: Unclosed element “details”.
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <p>
 |       "foo"
 |     <details>
 |       "bar"
 |       <p>
 |         "baz"
 #data
 <!doctype html><details><p>foo</details>bar
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <details>
 |       <p>
 |         "foo"
 |     "bar"
 #data
 <!doctype html><p>foo<dialog>bar<p>baz
 #errors
 (1,38): expected-closing-tag-but-got-eof
 29: Unclosed element “dialog”.
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <p>
 |       "foo"
 |     <dialog>
 |       "bar"
 |       <p>
 |         "baz"
 #data
 <!doctype html><dialog><p>foo</dialog>bar
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <dialog>
 |       <p>
 |         "foo"
 |     "bar"
 #data
 <!doctype html><p>foo<dir>bar<p>baz
 #errors
 (1,35): expected-closing-tag-but-got-eof
 26: Unclosed element “dir”.
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <p>
 |       "foo"
 |     <dir>
 |       "bar"
 |       <p>
 |         "baz"
 #data
 <!doctype html><dir><p>foo</dir>bar
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <dir>
 |       <p>
 |         "foo"
 |     "bar"
 #data
 <!doctype html><p>foo<div>bar<p>baz
 #errors
 (1,35): expected-closing-tag-but-got-eof
 26: Unclosed element “div”.
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <p>
 |       "foo"
 |     <div>
 |       "bar"
 |       <p>
 |         "baz"
 #data
 <!doctype html><div><p>foo</div>bar
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <div>
 |       <p>
 |         "foo"
 |     "bar"
 #data
 <!doctype html><p>foo<dl>bar<p>baz
 #errors
 (1,34): expected-closing-tag-but-got-eof
 25: Unclosed element “dl”.
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <p>
 |       "foo"
 |     <dl>
 |       "bar"
 |       <p>
 |         "baz"
 #data
 <!doctype html><dl><p>foo</dl>bar
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <dl>
 |       <p>
 |         "foo"
 |     "bar"
 #data
 <!doctype html><p>foo<fieldset>bar<p>baz
 #errors
 (1,40): expected-closing-tag-but-got-eof
 31: Unclosed element “fieldset”.
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <p>
 |       "foo"
 |     <fieldset>
 |       "bar"
 |       <p>
 |         "baz"
 #data
 <!doctype html><fieldset><p>foo</fieldset>bar
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <fieldset>
 |       <p>
 |         "foo"
 |     "bar"
 #data
 <!doctype html><p>foo<figcaption>bar<p>baz
 #errors
 (1,42): expected-closing-tag-but-got-eof
 33: Unclosed element “figcaption”.
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <p>
 |       "foo"
 |     <figcaption>
 |       "bar"
 |       <p>
 |         "baz"
 #data
 <!doctype html><figcaption><p>foo</figcaption>bar
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <figcaption>
 |       <p>
 |         "foo"
 |     "bar"
 #data
 <!doctype html><p>foo<figure>bar<p>baz
 #errors
 (1,38): expected-closing-tag-but-got-eof
 29: Unclosed element “figure”.
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <p>
 |       "foo"
 |     <figure>
 |       "bar"
 |       <p>
 |         "baz"
 #data
 <!doctype html><figure><p>foo</figure>bar
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <figure>
 |       <p>
 |         "foo"
 |     "bar"
 #data
 <!doctype html><p>foo<footer>bar<p>baz
 #errors
 (1,38): expected-closing-tag-but-got-eof
 29: Unclosed element “footer”.
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <p>
 |       "foo"
 |     <footer>
 |       "bar"
 |       <p>
 |         "baz"
 #data
 <!doctype html><footer><p>foo</footer>bar
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <footer>
 |       <p>
 |         "foo"
 |     "bar"
 #data
 <!doctype html><p>foo<header>bar<p>baz
 #errors
 (1,38): expected-closing-tag-but-got-eof
 29: Unclosed element “header”.
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <p>
 |       "foo"
 |     <header>
 |       "bar"
 |       <p>
 |         "baz"
 #data
 <!doctype html><header><p>foo</header>bar
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <header>
 |       <p>
 |         "foo"
 |     "bar"
 #data
 <!doctype html><p>foo<hgroup>bar<p>baz
 #errors
 (1,38): expected-closing-tag-but-got-eof
 29: Unclosed element “hgroup”.
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <p>
 |       "foo"
 |     <hgroup>
 |       "bar"
 |       <p>
 |         "baz"
 #data
 <!doctype html><hgroup><p>foo</hgroup>bar
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <hgroup>
 |       <p>
 |         "foo"
 |     "bar"
 #data
 <!doctype html><p>foo<listing>bar<p>baz
 #errors
 (1,39): expected-closing-tag-but-got-eof
 30: Unclosed element “listing”.
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <p>
 |       "foo"
 |     <listing>
 |       "bar"
 |       <p>
 |         "baz"
 #data
 <!doctype html><listing><p>foo</listing>bar
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <listing>
 |       <p>
 |         "foo"
 |     "bar"
 #data
 <!doctype html><p>foo<menu>bar<p>baz
 #errors
 (1,36): expected-closing-tag-but-got-eof
 27: Unclosed element “menu”.
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <p>
 |       "foo"
 |     <menu>
 |       "bar"
 |       <p>
 |         "baz"
 #data
 <!doctype html><menu><p>foo</menu>bar
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <menu>
 |       <p>
 |         "foo"
 |     "bar"
 #data
 <!doctype html><p>foo<nav>bar<p>baz
 #errors
 (1,35): expected-closing-tag-but-got-eof
 26: Unclosed element “nav”.
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <p>
 |       "foo"
 |     <nav>
 |       "bar"
 |       <p>
 |         "baz"
 #data
 <!doctype html><nav><p>foo</nav>bar
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <nav>
 |       <p>
 |         "foo"
 |     "bar"
 #data
 <!doctype html><p>foo<ol>bar<p>baz
 #errors
 (1,34): expected-closing-tag-but-got-eof
 25: Unclosed element “ol”.
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <p>
 |       "foo"
 |     <ol>
 |       "bar"
 |       <p>
 |         "baz"
 #data
 <!doctype html><ol><p>foo</ol>bar
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <ol>
 |       <p>
 |         "foo"
 |     "bar"
 #data
 <!doctype html><p>foo<pre>bar<p>baz
 #errors
 (1,35): expected-closing-tag-but-got-eof
 26: Unclosed element “pre”.
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <p>
 |       "foo"
 |     <pre>
 |       "bar"
 |       <p>
 |         "baz"
 #data
 <!doctype html><pre><p>foo</pre>bar
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <pre>
 |       <p>
 |         "foo"
 |     "bar"
 #data
 <!doctype html><p>foo<section>bar<p>baz
 #errors
 (1,39): expected-closing-tag-but-got-eof
 30: Unclosed element “section”.
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <p>
 |       "foo"
 |     <section>
 |       "bar"
 |       <p>
 |         "baz"
 #data
 <!doctype html><section><p>foo</section>bar
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <section>
 |       <p>
 |         "foo"
 |     "bar"
 #data
 <!doctype html><p>foo<summary>bar<p>baz
 #errors
 (1,39): expected-closing-tag-but-got-eof
 30: Unclosed element “summary”.
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <p>
 |       "foo"
 |     <summary>
 |       "bar"
 |       <p>
 |         "baz"
 #data
 <!doctype html><summary><p>foo</summary>bar
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <summary>
 |       <p>
 |         "foo"
 |     "bar"
 #data
 <!doctype html><p>foo<ul>bar<p>baz
 #errors
 (1,34): expected-closing-tag-but-got-eof
 25: Unclosed element “ul”.
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <p>
 |       "foo"
 |     <ul>
 |       "bar"
 |       <p>
 |         "baz"
 #data
 <!doctype html><ul><p>foo</ul>bar
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <ul>
 |       <p>
 |         "foo"
 |     "bar"
--- a/lib/html5lib/tests/testdata/tree-construction/comments01.dat
+++ b/lib/html5lib/tests/testdata/tree-construction/comments01.dat
@ -1,224 +0,0 @@
 #data
 FOO<!-- BAR -->BAZ
 #errors
 (1,3): expected-doctype-but-got-chars
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO"
 |     <!--  BAR  -->
 |     "BAZ"
 #data
 FOO<!-- BAR --!>BAZ
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,15): unexpected-bang-after-double-dash-in-comment
 #new-errors
 (1:16) incorrectly-closed-comment
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO"
 |     <!--  BAR  -->
 |     "BAZ"
 #data
 FOO<!-- BAR --! >BAZ
 #errors
 (1,3): expected-doctype-but-got-chars
 #new-errors
 (1:20) eof-in-comment
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO"
 |     <!--  BAR --! >BAZ -->
 #data
 FOO<!-- BAR --!
 >BAZ
 #errors
 (1,3): expected-doctype-but-got-chars
 #new-errors
 (1:20) eof-in-comment
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO"
 |     <!--  BAR --!
 >BAZ -->
 #data
 FOO<!-- BAR --   >BAZ
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,15): unexpected-char-in-comment
 (1,21): eof-in-comment
 #new-errors
 (1:22) eof-in-comment
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO"
 |     <!--  BAR --   >BAZ -->
 #data
 FOO<!-- BAR -- <QUX> -- MUX -->BAZ
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,15): unexpected-char-in-comment
 (1,24): unexpected-char-in-comment
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO"
 |     <!--  BAR -- <QUX> -- MUX  -->
 |     "BAZ"
 #data
 FOO<!-- BAR -- <QUX> -- MUX --!>BAZ
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,15): unexpected-char-in-comment
 (1,24): unexpected-char-in-comment
 (1,31): unexpected-bang-after-double-dash-in-comment
 #new-errors
 (1:32) incorrectly-closed-comment
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO"
 |     <!--  BAR -- <QUX> -- MUX  -->
 |     "BAZ"
 #data
 FOO<!-- BAR -- <QUX> -- MUX -- >BAZ
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,15): unexpected-char-in-comment
 (1,24): unexpected-char-in-comment
 (1,31): unexpected-char-in-comment
 (1,35): eof-in-comment
 #new-errors
 (1:36) eof-in-comment
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO"
 |     <!--  BAR -- <QUX> -- MUX -- >BAZ -->
 #data
 FOO<!---->BAZ
 #errors
 (1,3): expected-doctype-but-got-chars
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO"
 |     <!--  -->
 |     "BAZ"
 #data
 FOO<!--->BAZ
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,9): incorrect-comment
 #new-errors
 (1:9) abrupt-closing-of-empty-comment
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO"
 |     <!--  -->
 |     "BAZ"
 #data
 FOO<!-->BAZ
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,8): incorrect-comment
 #new-errors
 (1:8) abrupt-closing-of-empty-comment
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO"
 |     <!--  -->
 |     "BAZ"
 #data
 <?xml version="1.0">Hi
 #errors
 (1,1): expected-tag-name-but-got-question-mark
 (1,22): expected-doctype-but-got-chars
 #new-errors
 (1:2) unexpected-question-mark-instead-of-tag-name
 #document
 | <!-- ?xml version="1.0" -->
 | <html>
 |   <head>
 |   <body>
 |     "Hi"
 #data
 <?xml version="1.0">
 #errors
 (1,1): expected-tag-name-but-got-question-mark
 (1,20): expected-doctype-but-got-eof
 #new-errors
 (1:2) unexpected-question-mark-instead-of-tag-name
 #document
 | <!-- ?xml version="1.0" -->
 | <html>
 |   <head>
 |   <body>
 #data
 <?xml version
 #errors
 (1,1): expected-tag-name-but-got-question-mark
 (1,13): expected-doctype-but-got-eof
 #new-errors
 (1:2) unexpected-question-mark-instead-of-tag-name
 #document
 | <!-- ?xml version -->
 | <html>
 |   <head>
 |   <body>
 #data
 FOO<!----->BAZ
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,10): unexpected-dash-after-double-dash-in-comment
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO"
 |     <!-- - -->
 |     "BAZ"
 #data
 <html><!-- comment --><title>Comment before head</title>
 #errors
 (1,6): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <!--  comment  -->
 |   <head>
 |     <title>
 |       "Comment before head"
 |   <body>
--- a/lib/html5lib/tests/testdata/tree-construction/doctype01.dat
+++ b/lib/html5lib/tests/testdata/tree-construction/doctype01.dat
@ -1,470 +0,0 @@
 #data
 <!DOCTYPE html>Hello
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     "Hello"
 #data
 <!dOctYpE HtMl>Hello
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     "Hello"
 #data
 <!DOCTYPEhtml>Hello
 #errors
 (1,9): need-space-after-doctype
 #new-errors
 (1:10) missing-whitespace-before-doctype-name
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     "Hello"
 #data
 <!DOCTYPE>Hello
 #errors
 (1,9): need-space-after-doctype
 (1,10): expected-doctype-name-but-got-right-bracket
 (1,10): unknown-doctype
 #new-errors
 (1:10) missing-doctype-name
 #document
 | <!DOCTYPE >
 | <html>
 |   <head>
 |   <body>
 |     "Hello"
 #data
 <!DOCTYPE >Hello
 #errors
 (1,11): expected-doctype-name-but-got-right-bracket
 (1,11): unknown-doctype
 #new-errors
 (1:11) missing-doctype-name
 #document
 | <!DOCTYPE >
 | <html>
 |   <head>
 |   <body>
 |     "Hello"
 #data
 <!DOCTYPE potato>Hello
 #errors
 (1,17): unknown-doctype
 #document
 | <!DOCTYPE potato>
 | <html>
 |   <head>
 |   <body>
 |     "Hello"
 #data
 <!DOCTYPE potato >Hello
 #errors
 (1,18): unknown-doctype
 #document
 | <!DOCTYPE potato>
 | <html>
 |   <head>
 |   <body>
 |     "Hello"
 #data
 <!DOCTYPE potato taco>Hello
 #errors
 (1,17): expected-space-or-right-bracket-in-doctype
 (1,22): unknown-doctype
 #new-errors
 (1:18) invalid-character-sequence-after-doctype-name
 #document
 | <!DOCTYPE potato>
 | <html>
 |   <head>
 |   <body>
 |     "Hello"
 #data
 <!DOCTYPE potato taco "ddd>Hello
 #errors
 (1,17): expected-space-or-right-bracket-in-doctype
 (1,27): unknown-doctype
 #new-errors
 (1:18) invalid-character-sequence-after-doctype-name
 #document
 | <!DOCTYPE potato>
 | <html>
 |   <head>
 |   <body>
 |     "Hello"
 #data
 <!DOCTYPE potato sYstEM>Hello
 #errors
 (1,24): unexpected-char-in-doctype
 (1,24): unknown-doctype
 #new-errors
 (1:24) missing-doctype-system-identifier
 #document
 | <!DOCTYPE potato>
 | <html>
 |   <head>
 |   <body>
 |     "Hello"
 #data
 <!DOCTYPE potato sYstEM    >Hello
 #errors
 (1,28): unexpected-char-in-doctype
 (1,28): unknown-doctype
 #new-errors
 (1:28) missing-doctype-system-identifier
 #document
 | <!DOCTYPE potato>
 | <html>
 |   <head>
 |   <body>
 |     "Hello"
 #data
 <!DOCTYPE   potato       sYstEM  ggg>Hello
 #errors
 (1,34): unexpected-char-in-doctype
 (1,37): unknown-doctype
 #new-errors
 (1:34) missing-quote-before-doctype-system-identifier
 #document
 | <!DOCTYPE potato>
 | <html>
 |   <head>
 |   <body>
 |     "Hello"
 #data
 <!DOCTYPE potato SYSTEM taco  >Hello
 #errors
 (1,25): unexpected-char-in-doctype
 (1,31): unknown-doctype
 #new-errors
 (1:25) missing-quote-before-doctype-system-identifier
 #document
 | <!DOCTYPE potato>
 | <html>
 |   <head>
 |   <body>
 |     "Hello"
 #data
 <!DOCTYPE potato SYSTEM 'taco"'>Hello
 #errors
 (1,32): unknown-doctype
 #document
 | <!DOCTYPE potato "" "taco"">
 | <html>
 |   <head>
 |   <body>
 |     "Hello"
 #data
 <!DOCTYPE potato SYSTEM "taco">Hello
 #errors
 (1,31): unknown-doctype
 #document
 | <!DOCTYPE potato "" "taco">
 | <html>
 |   <head>
 |   <body>
 |     "Hello"
 #data
 <!DOCTYPE potato SYSTEM "tai'co">Hello
 #errors
 (1,33): unknown-doctype
 #document
 | <!DOCTYPE potato "" "tai'co">
 | <html>
 |   <head>
 |   <body>
 |     "Hello"
 #data
 <!DOCTYPE potato SYSTEMtaco "ddd">Hello
 #errors
 (1,24): unexpected-char-in-doctype
 (1,34): unknown-doctype
 #new-errors
 (1:24) missing-quote-before-doctype-system-identifier
 #document
 | <!DOCTYPE potato>
 | <html>
 |   <head>
 |   <body>
 |     "Hello"
 #data
 <!DOCTYPE potato grass SYSTEM taco>Hello
 #errors
 (1,17): expected-space-or-right-bracket-in-doctype
 (1,35): unknown-doctype
 #new-errors
 (1:18) invalid-character-sequence-after-doctype-name
 #document
 | <!DOCTYPE potato>
 | <html>
 |   <head>
 |   <body>
 |     "Hello"
 #data
 <!DOCTYPE potato pUbLIc>Hello
 #errors
 (1,24): unexpected-end-of-doctype
 (1,24): unknown-doctype
 #new-errors
 (1:24) missing-doctype-public-identifier
 #document
 | <!DOCTYPE potato>
 | <html>
 |   <head>
 |   <body>
 |     "Hello"
 #data
 <!DOCTYPE potato pUbLIc >Hello
 #errors
 (1,25): unexpected-end-of-doctype
 (1,25): unknown-doctype
 #new-errors
 (1:25) missing-doctype-public-identifier
 #document
 | <!DOCTYPE potato>
 | <html>
 |   <head>
 |   <body>
 |     "Hello"
 #data
 <!DOCTYPE potato pUbLIcgoof>Hello
 #errors
 (1,24): unexpected-char-in-doctype
 (1,28): unknown-doctype
 #new-errors
 (1:24) missing-quote-before-doctype-public-identifier
 #document
 | <!DOCTYPE potato>
 | <html>
 |   <head>
 |   <body>
 |     "Hello"
 #data
 <!DOCTYPE potato PUBLIC goof>Hello
 #errors
 (1,25): unexpected-char-in-doctype
 (1,29): unknown-doctype
 #new-errors
 (1:25) missing-quote-before-doctype-public-identifier
 #document
 | <!DOCTYPE potato>
 | <html>
 |   <head>
 |   <body>
 |     "Hello"
 #data
 <!DOCTYPE potato PUBLIC "go'of">Hello
 #errors
 (1,32): unknown-doctype
 #document
 | <!DOCTYPE potato "go'of" "">
 | <html>
 |   <head>
 |   <body>
 |     "Hello"
 #data
 <!DOCTYPE potato PUBLIC 'go'of'>Hello
 #errors
 (1,29): unexpected-char-in-doctype
 (1,32): unknown-doctype
 #new-errors
 (1:29) missing-quote-before-doctype-system-identifier
 #document
 | <!DOCTYPE potato "go" "">
 | <html>
 |   <head>
 |   <body>
 |     "Hello"
 #data
 <!DOCTYPE potato PUBLIC 'go:hh   of' >Hello
 #errors
 (1,38): unknown-doctype
 #document
 | <!DOCTYPE potato "go:hh   of" "">
 | <html>
 |   <head>
 |   <body>
 |     "Hello"
 #data
 <!DOCTYPE potato PUBLIC "W3C-//dfdf" SYSTEM ggg>Hello
 #errors
 (1,38): unexpected-char-in-doctype
 (1,48): unknown-doctype
 #new-errors
 (1:38) missing-quote-before-doctype-system-identifier
 #document
 | <!DOCTYPE potato "W3C-//dfdf" "">
 | <html>
 |   <head>
 |   <body>
 |     "Hello"
 #data
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
   "http://www.w3.org/TR/html4/strict.dtd">Hello
 #errors
 #document
 | <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
 | <html>
 |   <head>
 |   <body>
 |     "Hello"
 #data
 <!DOCTYPE ...>Hello
 #errors
 (1,14): unknown-doctype
 #document
 | <!DOCTYPE ...>
 | <html>
 |   <head>
 |   <body>
 |     "Hello"
 #data
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 #errors
 (2,58): unknown-doctype
 #document
 | <!DOCTYPE html "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 | <html>
 |   <head>
 |   <body>
 #data
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
 #errors
 (2,54): unknown-doctype
 #document
 | <!DOCTYPE html "-//W3C//DTD XHTML 1.0 Frameset//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
 | <html>
 |   <head>
 |   <body>
 #data
 <!DOCTYPE root-element [SYSTEM OR PUBLIC FPI] "uri" [ 
 <!-- internal declarations -->
 ]>
 #errors
 (1,23): expected-space-or-right-bracket-in-doctype
 (2,30): unknown-doctype
 #new-errors
 (1:24) invalid-character-sequence-after-doctype-name
 #document
 | <!DOCTYPE root-element>
 | <html>
 |   <head>
 |   <body>
 |     "]>"
 #data
 <!DOCTYPE html PUBLIC
  "-//WAPFORUM//DTD XHTML Mobile 1.0//EN"
    "http://www.wapforum.org/DTD/xhtml-mobile10.dtd">
 #errors
 (3,53): unknown-doctype
 #document
 | <!DOCTYPE html "-//WAPFORUM//DTD XHTML Mobile 1.0//EN" "http://www.wapforum.org/DTD/xhtml-mobile10.dtd">
 | <html>
 |   <head>
 |   <body>
 #data
 <!DOCTYPE HTML SYSTEM "http://www.w3.org/DTD/HTML4-strict.dtd"><body><b>Mine!</b></body>
 #errors
 (1,63): unknown-doctype
 #document
 | <!DOCTYPE html "" "http://www.w3.org/DTD/HTML4-strict.dtd">
 | <html>
 |   <head>
 |   <body>
 |     <b>
 |       "Mine!"
 #data
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN""http://www.w3.org/TR/html4/strict.dtd">
 #errors
 (1,50): unexpected-char-in-doctype
 #new-errors
 (1:50) missing-whitespace-between-doctype-public-and-system-identifiers
 #document
 | <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
 | <html>
 |   <head>
 |   <body>
 #data
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"'http://www.w3.org/TR/html4/strict.dtd'>
 #errors
 (1,50): unexpected-char-in-doctype
 #new-errors
 (1:50) missing-whitespace-between-doctype-public-and-system-identifiers
 #document
 | <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
 | <html>
 |   <head>
 |   <body>
 #data
 <!DOCTYPE HTML PUBLIC"-//W3C//DTD HTML 4.01//EN"'http://www.w3.org/TR/html4/strict.dtd'>
 #errors
 (1,21): unexpected-char-in-doctype
 (1,49): unexpected-char-in-doctype
 #new-errors
 (1:22) missing-whitespace-after-doctype-public-keyword
 (1:49) missing-whitespace-between-doctype-public-and-system-identifiers
 #document
 | <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
 | <html>
 |   <head>
 |   <body>
 #data
 <!DOCTYPE HTML PUBLIC'-//W3C//DTD HTML 4.01//EN''http://www.w3.org/TR/html4/strict.dtd'>
 #errors
 (1,21): unexpected-char-in-doctype
 (1,49): unexpected-char-in-doctype
 #new-errors
 (1:22) missing-whitespace-after-doctype-public-keyword
 (1:49) missing-whitespace-between-doctype-public-and-system-identifiers
 #document
 | <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
 | <html>
 |   <head>
 |   <body>
--- a/lib/html5lib/tests/testdata/tree-construction/domjs-unsafe.dat
+++ b/lib/html5lib/tests/testdata/tree-construction/domjs-unsafe.dat
--- a/lib/html5lib/tests/testdata/tree-construction/entities01.dat
+++ b/lib/html5lib/tests/testdata/tree-construction/entities01.dat
@ -1,943 +0,0 @@
 #data
 FOO&gt;BAR
 #errors
 (1,3): expected-doctype-but-got-chars
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO>BAR"
 #data
 FOO&gtBAR
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,6): named-entity-without-semicolon
 #new-errors
 (1:7) missing-semicolon-after-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO>BAR"
 #data
 FOO&gt BAR
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,6): named-entity-without-semicolon
 #new-errors
 (1:7) missing-semicolon-after-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO> BAR"
 #data
 FOO&gt;;;BAR
 #errors
 (1,3): expected-doctype-but-got-chars
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO>;;BAR"
 #data
 I'm &notit; I tell you
 #errors
 (1,4): expected-doctype-but-got-chars
 (1,9): named-entity-without-semicolon
 #new-errors
 (1:9) missing-semicolon-after-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "I'm ¬it; I tell you"
 #data
 I'm &notin; I tell you
 #errors
 (1,4): expected-doctype-but-got-chars
 #document
 | <html>
 |   <head>
 |   <body>
 |     "I'm ∉ I tell you"
 #data
 &ammmp;
 #errors
 (1,1): expected-doctype-but-got-chars
 (1,7): unknown-named-character-reference
 #new-errors
 (1:7) unknown-named-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "&ammmp;"
 #data
 &ammmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmp;
 #errors
 (1,1): expected-doctype-but-got-chars
 (1,950): unknown-named-character-reference
 #new-errors
 (1:950) unknown-named-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "&ammmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmp;"
 #data
 FOO& BAR
 #errors
 (1,3): expected-doctype-but-got-chars
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO& BAR"
 #data
 FOO&<BAR>
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,9): expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO&"
 |     <bar>
 #data
 FOO&&&&gt;BAR
 #errors
 (1,3): expected-doctype-but-got-chars
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO&&&>BAR"
 #data
 FOO&#41;BAR
 #errors
 (1,3): expected-doctype-but-got-chars
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO)BAR"
 #data
 FOO&#x41;BAR
 #errors
 (1,3): expected-doctype-but-got-chars
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOOABAR"
 #data
 FOO&#X41;BAR
 #errors
 (1,3): expected-doctype-but-got-chars
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOOABAR"
 #data
 FOO&#BAR
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,5): expected-numeric-entity
 #new-errors
 (1:6) absence-of-digits-in-numeric-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO&#BAR"
 #data
 FOO&#ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,5): expected-numeric-entity
 #new-errors
 (1:6) absence-of-digits-in-numeric-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO&#ZOO"
 #data
 FOO&#xBAR
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,7): expected-numeric-entity
 #new-errors
 (1:9) missing-semicolon-after-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOOºR"
 #data
 FOO&#xZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,6): expected-numeric-entity
 #new-errors
 (1:7) absence-of-digits-in-numeric-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO&#xZOO"
 #data
 FOO&#XZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,6): expected-numeric-entity
 #new-errors
 (1:7) absence-of-digits-in-numeric-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO&#XZOO"
 #data
 FOO&#41BAR
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,7): numeric-entity-without-semicolon
 #new-errors
 (1:8) missing-semicolon-after-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO)BAR"
 #data
 FOO&#x41BAR
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,10): numeric-entity-without-semicolon
 #new-errors
 (1:11) missing-semicolon-after-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO䆺R"
 #data
 FOO&#x41ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,8): numeric-entity-without-semicolon
 #new-errors
 (1:9) missing-semicolon-after-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOOAZOO"
 #data
 FOO&#x0000;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,11): illegal-codepoint-for-numeric-entity
 #new-errors
 (1:12) null-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO<4F>ZOO"
 #data
 FOO&#x0078;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOOxZOO"
 #data
 FOO&#x0079;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOOyZOO"
 #data
 FOO&#x0080;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,11): illegal-codepoint-for-numeric-entity
 #new-errors
 (1:12) control-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO€ZOO"
 #data
 FOO&#x0081;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,11): illegal-codepoint-for-numeric-entity
 #new-errors
 (1:12) control-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOOZOO"
 #data
 FOO&#x0082;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,11): illegal-codepoint-for-numeric-entity
 #new-errors
 (1:12) control-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO‚ZOO"
 #data
 FOO&#x0083;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,11): illegal-codepoint-for-numeric-entity
 #new-errors
 (1:12) control-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOOƒZOO"
 #data
 FOO&#x0084;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,11): illegal-codepoint-for-numeric-entity
 #new-errors
 (1:12) control-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO„ZOO"
 #data
 FOO&#x0085;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,11): illegal-codepoint-for-numeric-entity
 #new-errors
 (1:12) control-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO…ZOO"
 #data
 FOO&#x0086;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,11): illegal-codepoint-for-numeric-entity
 #new-errors
 (1:12) control-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO†ZOO"
 #data
 FOO&#x0087;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,11): illegal-codepoint-for-numeric-entity
 #new-errors
 (1:12) control-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO‡ZOO"
 #data
 FOO&#x0088;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,11): illegal-codepoint-for-numeric-entity
 #new-errors
 (1:12) control-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOOˆZOO"
 #data
 FOO&#x0089;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,11): illegal-codepoint-for-numeric-entity
 #new-errors
 (1:12) control-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO‰ZOO"
 #data
 FOO&#x008A;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,11): illegal-codepoint-for-numeric-entity
 #new-errors
 (1:12) control-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOOŠZOO"
 #data
 FOO&#x008B;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,11): illegal-codepoint-for-numeric-entity
 #new-errors
 (1:12) control-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO‹ZOO"
 #data
 FOO&#x008C;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,11): illegal-codepoint-for-numeric-entity
 #new-errors
 (1:12) control-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOOŒZOO"
 #data
 FOO&#x008D;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,11): illegal-codepoint-for-numeric-entity
 #new-errors
 (1:12) control-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOOZOO"
 #data
 FOO&#x008E;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,11): illegal-codepoint-for-numeric-entity
 #new-errors
 (1:12) control-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOOŽZOO"
 #data
 FOO&#x008F;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,11): illegal-codepoint-for-numeric-entity
 #new-errors
 (1:12) control-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOOZOO"
 #data
 FOO&#x0090;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,11): illegal-codepoint-for-numeric-entity
 #new-errors
 (1:12) control-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOOZOO"
 #data
 FOO&#x0091;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,11): illegal-codepoint-for-numeric-entity
 #new-errors
 (1:12) control-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO‘ZOO"
 #data
 FOO&#x0092;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,11): illegal-codepoint-for-numeric-entity
 #new-errors
 (1:12) control-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO’ZOO"
 #data
 FOO&#x0093;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,11): illegal-codepoint-for-numeric-entity
 #new-errors
 (1:12) control-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO“ZOO"
 #data
 FOO&#x0094;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,11): illegal-codepoint-for-numeric-entity
 #new-errors
 (1:12) control-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO”ZOO"
 #data
 FOO&#x0095;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,11): illegal-codepoint-for-numeric-entity
 #new-errors
 (1:12) control-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO•ZOO"
 #data
 FOO&#x0096;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,11): illegal-codepoint-for-numeric-entity
 #new-errors
 (1:12) control-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO–ZOO"
 #data
 FOO&#x0097;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,11): illegal-codepoint-for-numeric-entity
 #new-errors
 (1:12) control-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO—ZOO"
 #data
 FOO&#x0098;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,11): illegal-codepoint-for-numeric-entity
 #new-errors
 (1:12) control-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO˜ZOO"
 #data
 FOO&#x0099;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,11): illegal-codepoint-for-numeric-entity
 #new-errors
 (1:12) control-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO™ZOO"
 #data
 FOO&#x009A;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,11): illegal-codepoint-for-numeric-entity
 #new-errors
 (1:12) control-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOOšZOO"
 #data
 FOO&#x009B;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,11): illegal-codepoint-for-numeric-entity
 #new-errors
 (1:12) control-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO›ZOO"
 #data
 FOO&#x009C;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,11): illegal-codepoint-for-numeric-entity
 #new-errors
 (1:12) control-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOOœZOO"
 #data
 FOO&#x009D;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,11): illegal-codepoint-for-numeric-entity
 #new-errors
 (1:12) control-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOOZOO"
 #data
 FOO&#x009E;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,11): illegal-codepoint-for-numeric-entity
 #new-errors
 (1:12) control-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOOžZOO"
 #data
 FOO&#x009F;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,11): illegal-codepoint-for-numeric-entity
 #new-errors
 (1:12) control-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOOŸZOO"
 #data
 FOO&#x00A0;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO ZOO"
 #data
 FOO&#xD7FF;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO퟿ZOO"
 #data
 FOO&#xD800;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,11): illegal-codepoint-for-numeric-entity
 #new-errors
 (1:12) surrogate-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO<4F>ZOO"
 #data
 FOO&#xD801;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,11): illegal-codepoint-for-numeric-entity
 #new-errors
 (1:12) surrogate-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO<4F>ZOO"
 #data
 FOO&#xDFFE;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,11): illegal-codepoint-for-numeric-entity
 #new-errors
 (1:12) surrogate-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO<4F>ZOO"
 #data
 FOO&#xDFFF;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,11): illegal-codepoint-for-numeric-entity
 #new-errors
 (1:12) surrogate-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO<4F>ZOO"
 #data
 FOO&#xE000;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOOZOO"
 #data
 FOO&#x10FFFE;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,13): illegal-codepoint-for-numeric-entity
 #new-errors
 (1:14) noncharacter-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO􏿾ZOO"
 #data
 FOO&#x1087D4;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO􈟔ZOO"
 #data
 FOO&#x10FFFF;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,13): illegal-codepoint-for-numeric-entity
 #new-errors
 (1:14) noncharacter-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO􏿿ZOO"
 #data
 FOO&#x110000;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,13): illegal-codepoint-for-numeric-entity
 #new-errors
 (1:14) character-reference-outside-unicode-range
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO<4F>ZOO"
 #data
 FOO&#xFFFFFF;ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,13): illegal-codepoint-for-numeric-entity
 #new-errors
 (1:14) character-reference-outside-unicode-range
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO<4F>ZOO"
 #data
 FOO&#11111111111
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,13): illegal-codepoint-for-numeric-entity
 (1,13): eof-in-numeric-entity
 #new-errors
 (1:17) missing-semicolon-after-character-reference
 (1:17) character-reference-outside-unicode-range
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO<4F>"
 #data
 FOO&#1111111111
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,13): illegal-codepoint-for-numeric-entity
 (1,13): eof-in-numeric-entity
 #new-errors
 (1:16) missing-semicolon-after-character-reference
 (1:16) character-reference-outside-unicode-range
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO<4F>"
 #data
 FOO&#111111111111
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,13): illegal-codepoint-for-numeric-entity
 (1,13): eof-in-numeric-entity
 #new-errors
 (1:18) missing-semicolon-after-character-reference
 (1:18) character-reference-outside-unicode-range
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO<4F>"
 #data
 FOO&#11111111111ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,16): numeric-entity-without-semicolon
 (1,16): illegal-codepoint-for-numeric-entity
 #new-errors
 (1:17) missing-semicolon-after-character-reference
 (1:17) character-reference-outside-unicode-range
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO<4F>ZOO"
 #data
 FOO&#1111111111ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,15): numeric-entity-without-semicolon
 (1,15): illegal-codepoint-for-numeric-entity
 #new-errors
 (1:16) missing-semicolon-after-character-reference
 (1:16) character-reference-outside-unicode-range
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO<4F>ZOO"
 #data
 FOO&#111111111111ZOO
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,17): numeric-entity-without-semicolon
 (1,17): illegal-codepoint-for-numeric-entity
 #new-errors
 (1:18) missing-semicolon-after-character-reference
 (1:18) character-reference-outside-unicode-range
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO<4F>ZOO"
--- a/lib/html5lib/tests/testdata/tree-construction/entities02.dat
+++ b/lib/html5lib/tests/testdata/tree-construction/entities02.dat
@ -1,309 +0,0 @@
 #data
 <div bar="ZZ&gt;YY"></div>
 #errors
 (1,20): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <div>
 |       bar="ZZ>YY"
 #data
 <div bar="ZZ&"></div>
 #errors
 (1,15): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <div>
 |       bar="ZZ&"
 #data
 <div bar='ZZ&'></div>
 #errors
 (1,15): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <div>
 |       bar="ZZ&"
 #data
 <div bar=ZZ&></div>
 #errors
 (1,13): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <div>
 |       bar="ZZ&"
 #data
 <div bar="ZZ&gt=YY"></div>
 #errors
 (1,15): named-entity-without-semicolon
 (1,20): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <div>
 |       bar="ZZ&gt=YY"
 #data
 <div bar="ZZ&gt0YY"></div>
 #errors
 (1,20): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <div>
 |       bar="ZZ&gt0YY"
 #data
 <div bar="ZZ&gt9YY"></div>
 #errors
 (1,20): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <div>
 |       bar="ZZ&gt9YY"
 #data
 <div bar="ZZ&gtaYY"></div>
 #errors
 (1,20): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <div>
 |       bar="ZZ&gtaYY"
 #data
 <div bar="ZZ&gtZYY"></div>
 #errors
 (1,20): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <div>
 |       bar="ZZ&gtZYY"
 #data
 <div bar="ZZ&gt YY"></div>
 #errors
 (1,15): named-entity-without-semicolon
 (1,20): expected-doctype-but-got-start-tag
 #new-errors
 (1:16) missing-semicolon-after-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     <div>
 |       bar="ZZ> YY"
 #data
 <div bar="ZZ&gt"></div>
 #errors
 (1,15): named-entity-without-semicolon
 (1,17): expected-doctype-but-got-start-tag
 #new-errors
 (1:16) missing-semicolon-after-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     <div>
 |       bar="ZZ>"
 #data
 <div bar='ZZ&gt'></div>
 #errors
 (1,15): named-entity-without-semicolon
 (1,17): expected-doctype-but-got-start-tag
 #new-errors
 (1:16) missing-semicolon-after-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     <div>
 |       bar="ZZ>"
 #data
 <div bar=ZZ&gt></div>
 #errors
 (1,14): named-entity-without-semicolon
 (1,15): expected-doctype-but-got-start-tag
 #new-errors
 (1:15) missing-semicolon-after-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     <div>
 |       bar="ZZ>"
 #data
 <div bar="ZZ&pound_id=23"></div>
 #errors
 (1,18): named-entity-without-semicolon
 (1,26): expected-doctype-but-got-start-tag
 #new-errors
 (1:19) missing-semicolon-after-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     <div>
 |       bar="ZZ£_id=23"
 #data
 <div bar="ZZ&prod_id=23"></div>
 #errors
 (1,25): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <div>
 |       bar="ZZ&prod_id=23"
 #data
 <div bar="ZZ&pound;_id=23"></div>
 #errors
 (1,27): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <div>
 |       bar="ZZ£_id=23"
 #data
 <div bar="ZZ&prod;_id=23"></div>
 #errors
 (1,26): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <div>
 |       bar="ZZ∏_id=23"
 #data
 <div bar="ZZ&pound=23"></div>
 #errors
 (1,18): named-entity-without-semicolon
 (1,23): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <div>
 |       bar="ZZ&pound=23"
 #data
 <div bar="ZZ&prod=23"></div>
 #errors
 (1,22): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <div>
 |       bar="ZZ&prod=23"
 #data
 <div>ZZ&pound_id=23</div>
 #errors
 (1,5): expected-doctype-but-got-start-tag
 (1,13): named-entity-without-semicolon
 #new-errors
 (1:14) missing-semicolon-after-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     <div>
 |       "ZZ£_id=23"
 #data
 <div>ZZ&prod_id=23</div>
 #errors
 (1,5): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <div>
 |       "ZZ&prod_id=23"
 #data
 <div>ZZ&pound;_id=23</div>
 #errors
 (1,5): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <div>
 |       "ZZ£_id=23"
 #data
 <div>ZZ&prod;_id=23</div>
 #errors
 (1,5): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <div>
 |       "ZZ∏_id=23"
 #data
 <div>ZZ&pound=23</div>
 #errors
 (1,5): expected-doctype-but-got-start-tag
 (1,13): named-entity-without-semicolon
 #new-errors
 (1:14) missing-semicolon-after-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     <div>
 |       "ZZ£=23"
 #data
 <div>ZZ&prod=23</div>
 #errors
 (1,5): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <div>
 |       "ZZ&prod=23"
 #data
 <div>ZZ&AElig=</div>
 #errors
 #new-errors
 (1:14) missing-semicolon-after-character-reference
 #document
 | <html>
 |   <head>
 |   <body>
 |     <div>
 |       "ZZÆ="
--- a/lib/html5lib/tests/testdata/tree-construction/foreign-fragment.dat
+++ b/lib/html5lib/tests/testdata/tree-construction/foreign-fragment.dat
@ -1,559 +0,0 @@
 #data
 <nobr>X
 #errors
 6: HTML start tag “nobr” in a foreign namespace context.
 7: End of file seen and there were open elements.
 6: Unclosed element “nobr”.
 #document-fragment
 svg path
 #document
 | <svg nobr>
 |   "X"
 #data
 <font color></font>X
 #errors
 12: HTML start tag “font” in a foreign namespace context.
 #document-fragment
 svg path
 #document
 | <svg font>
 |   color=""
 | "X"
 #data
 <font></font>X
 #errors
 #document-fragment
 svg path
 #document
 | <svg font>
 | "X"
 #data
 <g></path>X
 #errors
 10: End tag “path” did not match the name of the current open element (“g”).
 11: End of file seen and there were open elements.
 3: Unclosed element “g”.
 #document-fragment
 svg path
 #document
 | <svg g>
 |   "X"
 #data
 </path>X
 #errors
 5: Stray end tag “path”.
 #document-fragment
 svg path
 #document
 | "X"
 #data
 </foreignObject>X
 #errors
 5: Stray end tag “foreignobject”.
 #document-fragment
 svg foreignObject
 #document
 | "X"
 #data
 </desc>X
 #errors
 5: Stray end tag “desc”.
 #document-fragment
 svg desc
 #document
 | "X"
 #data
 </title>X
 #errors
 5: Stray end tag “title”.
 #document-fragment
 svg title
 #document
 | "X"
 #data
 </svg>X
 #errors
 5: Stray end tag “svg”.
 #document-fragment
 svg svg
 #document
 | "X"
 #data
 </mfenced>X
 #errors
 5: Stray end tag “mfenced”.
 #document-fragment
 math mfenced
 #document
 | "X"
 #data
 </malignmark>X
 #errors
 5: Stray end tag “malignmark”.
 #document-fragment
 math malignmark
 #document
 | "X"
 #data
 </math>X
 #errors
 5: Stray end tag “math”.
 #document-fragment
 math math
 #document
 | "X"
 #data
 </annotation-xml>X
 #errors
 5: Stray end tag “annotation-xml”.
 #document-fragment
 math annotation-xml
 #document
 | "X"
 #data
 </mtext>X
 #errors
 5: Stray end tag “mtext”.
 #document-fragment
 math mtext
 #document
 | "X"
 #data
 </mi>X
 #errors
 5: Stray end tag “mi”.
 #document-fragment
 math mi
 #document
 | "X"
 #data
 </mo>X
 #errors
 5: Stray end tag “mo”.
 #document-fragment
 math mo
 #document
 | "X"
 #data
 </mn>X
 #errors
 5: Stray end tag “mn”.
 #document-fragment
 math mn
 #document
 | "X"
 #data
 </ms>X
 #errors
 5: Stray end tag “ms”.
 #document-fragment
 math ms
 #document
 | "X"
 #data
 <b></b><mglyph/><i></i><malignmark/><u></u><ms/>X
 #errors
 51: Self-closing syntax (“/>”) used on a non-void HTML element. Ignoring the slash and treating as a start tag.
 52: End of file seen and there were open elements.
 51: Unclosed element “ms”.
 #new-errors
 (1:44-1:49) non-void-html-element-start-tag-with-trailing-solidus
 #document-fragment
 math ms
 #document
 | <b>
 | <math mglyph>
 | <i>
 | <math malignmark>
 | <u>
 | <ms>
 |   "X"
 #data
 <malignmark></malignmark>
 #errors
 #document-fragment
 math ms
 #document
 | <math malignmark>
 #data
 <div></div>
 #errors
 #document-fragment
 math ms
 #document
 | <div>
 #data
 <figure></figure>
 #errors
 #document-fragment
 math ms
 #document
 | <figure>
 #data
 <b></b><mglyph/><i></i><malignmark/><u></u><mn/>X
 #errors
 51: Self-closing syntax (“/>”) used on a non-void HTML element. Ignoring the slash and treating as a start tag.
 52: End of file seen and there were open elements.
 51: Unclosed element “mn”.
 #new-errors
 (1:44-1:49) non-void-html-element-start-tag-with-trailing-solidus
 #document-fragment
 math mn
 #document
 | <b>
 | <math mglyph>
 | <i>
 | <math malignmark>
 | <u>
 | <mn>
 |   "X"
 #data
 <malignmark></malignmark>
 #errors
 #document-fragment
 math mn
 #document
 | <math malignmark>
 #data
 <div></div>
 #errors
 #document-fragment
 math mn
 #document
 | <div>
 #data
 <figure></figure>
 #errors
 #document-fragment
 math mn
 #document
 | <figure>
 #data
 <b></b><mglyph/><i></i><malignmark/><u></u><mo/>X
 #errors
 51: Self-closing syntax (“/>”) used on a non-void HTML element. Ignoring the slash and treating as a start tag.
 52: End of file seen and there were open elements.
 51: Unclosed element “mo”.
 #new-errors
 (1:44-1:49) non-void-html-element-start-tag-with-trailing-solidus
 #document-fragment
 math mo
 #document
 | <b>
 | <math mglyph>
 | <i>
 | <math malignmark>
 | <u>
 | <mo>
 |   "X"
 #data
 <malignmark></malignmark>
 #errors
 #document-fragment
 math mo
 #document
 | <math malignmark>
 #data
 <div></div>
 #errors
 #document-fragment
 math mo
 #document
 | <div>
 #data
 <figure></figure>
 #errors
 #document-fragment
 math mo
 #document
 | <figure>
 #data
 <b></b><mglyph/><i></i><malignmark/><u></u><mi/>X
 #errors
 51: Self-closing syntax (“/>”) used on a non-void HTML element. Ignoring the slash and treating as a start tag.
 52: End of file seen and there were open elements.
 51: Unclosed element “mi”.
 #new-errors
 (1:44-1:49) non-void-html-element-start-tag-with-trailing-solidus
 #document-fragment
 math mi
 #document
 | <b>
 | <math mglyph>
 | <i>
 | <math malignmark>
 | <u>
 | <mi>
 |   "X"
 #data
 <malignmark></malignmark>
 #errors
 #document-fragment
 math mi
 #document
 | <math malignmark>
 #data
 <div></div>
 #errors
 #document-fragment
 math mi
 #document
 | <div>
 #data
 <figure></figure>
 #errors
 #document-fragment
 math mi
 #document
 | <figure>
 #data
 <b></b><mglyph/><i></i><malignmark/><u></u><mtext/>X
 #errors
 51: Self-closing syntax (“/>”) used on a non-void HTML element. Ignoring the slash and treating as a start tag.
 52: End of file seen and there were open elements.
 51: Unclosed element “mtext”.
 #new-errors
 (1:44-1:52) non-void-html-element-start-tag-with-trailing-solidus
 #document-fragment
 math mtext
 #document
 | <b>
 | <math mglyph>
 | <i>
 | <math malignmark>
 | <u>
 | <mtext>
 |   "X"
 #data
 <malignmark></malignmark>
 #errors
 #document-fragment
 math mtext
 #document
 | <math malignmark>
 #data
 <div></div>
 #errors
 #document-fragment
 math mtext
 #document
 | <div>
 #data
 <figure></figure>
 #errors
 #document-fragment
 math mtext
 #document
 | <figure>
 #data
 <div></div>
 #errors
 5: HTML start tag “div” in a foreign namespace context.
 #document-fragment
 math annotation-xml
 #document
 | <math div>
 #data
 <figure></figure>
 #errors
 #document-fragment
 math annotation-xml
 #document
 | <math figure>
 #data
 <div></div>
 #errors
 5: HTML start tag “div” in a foreign namespace context.
 #document-fragment
 math math
 #document
 | <math div>
 #data
 <figure></figure>
 #errors
 #document-fragment
 math math
 #document
 | <math figure>
 #data
 <div></div>
 #errors
 #document-fragment
 svg foreignObject
 #document
 | <div>
 #data
 <figure></figure>
 #errors
 #document-fragment
 svg foreignObject
 #document
 | <figure>
 #data
 <div></div>
 #errors
 #document-fragment
 svg title
 #document
 | <div>
 #data
 <figure></figure>
 #errors
 #document-fragment
 svg title
 #document
 | <figure>
 #data
 <figure></figure>
 #errors
 #document-fragment
 svg desc
 #document
 | <figure>
 #data
 <div><h1>X</h1></div>
 #errors
 5: HTML start tag “div” in a foreign namespace context.
 9: HTML start tag “h1” in a foreign namespace context.
 #document-fragment
 svg svg
 #document
 | <svg div>
 |   <svg h1>
 |     "X"
 #data
 <div></div>
 #errors
 5: HTML start tag “div” in a foreign namespace context.
 #document-fragment
 svg svg
 #document
 | <svg div>
 #data
 <div></div>
 #errors
 #document-fragment
 svg desc
 #document
 | <div>
 #data
 <figure></figure>
 #errors
 #document-fragment
 svg desc
 #document
 | <figure>
 #data
 <plaintext><foo>
 #errors
 (1,16): expected-closing-tag-but-got-eof
 #document-fragment
 svg desc
 #document
 | <plaintext>
 |   "<foo>"
 #data
 <frameset>X
 #errors
 6: Stray start tag “frameset”.
 #document-fragment
 svg desc
 #document
 | "X"
 #data
 <head>X
 #errors
 6: Stray start tag “head”.
 #document-fragment
 svg desc
 #document
 | "X"
 #data
 <body>X
 #errors
 6: Stray start tag “body”.
 #document-fragment
 svg desc
 #document
 | "X"
 #data
 <html>X
 #errors
 6: Stray start tag “html”.
 #document-fragment
 svg desc
 #document
 | "X"
 #data
 <html class="foo">X
 #errors
 6: Stray start tag “html”.
 #document-fragment
 svg desc
 #document
 | "X"
 #data
 <body class="foo">X
 #errors
 6: Stray start tag “body”.
 #document-fragment
 svg desc
 #document
 | "X"
--- a/lib/html5lib/tests/testdata/tree-construction/html5test-com.dat
+++ b/lib/html5lib/tests/testdata/tree-construction/html5test-com.dat
@ -1,302 +0,0 @@
 #data
 <div<div>
 #errors
 (1,9): expected-doctype-but-got-start-tag
 (1,9): expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     <div<div>
 #data
 <div foo<bar=''>
 #errors
 (1,9): invalid-character-in-attribute-name
 (1,16): expected-doctype-but-got-start-tag
 (1,16): expected-closing-tag-but-got-eof
 #new-errors
 (1:9) unexpected-character-in-attribute-name
 #document
 | <html>
 |   <head>
 |   <body>
 |     <div>
 |       foo<bar=""
 #data
 <div foo=`bar`>
 #errors
 (1,10): equals-in-unquoted-attribute-value
 (1,14): unexpected-character-in-unquoted-attribute-value
 (1,15): expected-doctype-but-got-start-tag
 (1,15): expected-closing-tag-but-got-eof
 #new-errors
 (1:10) unexpected-character-in-unquoted-attribute-value
 (1:14) unexpected-character-in-unquoted-attribute-value
 #document
 | <html>
 |   <head>
 |   <body>
 |     <div>
 |       foo="`bar`"
 #data
 <div \"foo=''>
 #errors
 (1,7): invalid-character-in-attribute-name
 (1,14): expected-doctype-but-got-start-tag
 (1,14): expected-closing-tag-but-got-eof
 #new-errors
 (1:7) unexpected-character-in-attribute-name
 #document
 | <html>
 |   <head>
 |   <body>
 |     <div>
 |       \"foo=""
 #data
 <a href='\nbar'></a>
 #errors
 (1,16): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <a>
 |       href="\nbar"
 #data
 <!DOCTYPE html>
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 #data
 &lang;&rang;
 #errors
 (1,6): expected-doctype-but-got-chars
 #document
 | <html>
 |   <head>
 |   <body>
 |     "⟨⟩"
 #data
 &apos;
 #errors
 (1,6): expected-doctype-but-got-chars
 #document
 | <html>
 |   <head>
 |   <body>
 |     "'"
 #data
 &ImaginaryI;
 #errors
 (1,12): expected-doctype-but-got-chars
 #document
 | <html>
 |   <head>
 |   <body>
 |     "ⅈ"
 #data
 &Kopf;
 #errors
 (1,6): expected-doctype-but-got-chars
 #document
 | <html>
 |   <head>
 |   <body>
 |     "𝕂"
 #data
 &notinva;
 #errors
 (1,9): expected-doctype-but-got-chars
 #document
 | <html>
 |   <head>
 |   <body>
 |     "∉"
 #data
 <?import namespace="foo" implementation="#bar">
 #errors
 (1,1): expected-tag-name-but-got-question-mark
 (1,47): expected-doctype-but-got-eof
 #new-errors
 (1:2) unexpected-question-mark-instead-of-tag-name
 #document
 | <!-- ?import namespace="foo" implementation="#bar" -->
 | <html>
 |   <head>
 |   <body>
 #data
 <!--foo--bar-->
 #errors
 (1,10): unexpected-char-in-comment
 (1,15): expected-doctype-but-got-eof
 #document
 | <!-- foo--bar -->
 | <html>
 |   <head>
 |   <body>
 #data
 <![CDATA[x]]>
 #errors
 (1,2): expected-dashes-or-doctype
 (1,13): expected-doctype-but-got-eof
 #new-errors
 (1:9) cdata-in-html-content
 #document
 | <!-- [CDATA[x]] -->
 | <html>
 |   <head>
 |   <body>
 #data
 <textarea><!--</textarea>--></textarea>
 #errors
 (1,10): expected-doctype-but-got-start-tag
 (1,39): unexpected-end-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <textarea>
 |       "<!--"
 |     "-->"
 #data
 <textarea><!--</textarea>-->
 #errors
 (1,10): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <textarea>
 |       "<!--"
 |     "-->"
 #data
 <style><!--</style>--></style>
 #errors
 (1,7): expected-doctype-but-got-start-tag
 (1,30): unexpected-end-tag
 #document
 | <html>
 |   <head>
 |     <style>
 |       "<!--"
 |   <body>
 |     "-->"
 #data
 <style><!--</style>-->
 #errors
 (1,7): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |     <style>
 |       "<!--"
 |   <body>
 |     "-->"
 #data
 <ul><li>A </li> <li>B</li></ul>
 #errors
 (1,4): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <ul>
 |       <li>
 |         "A "
 |       " "
 |       <li>
 |         "B"
 #data
 <table><form><input type=hidden><input></form><div></div></table>
 #errors
 (1,7): expected-doctype-but-got-start-tag
 (1,13): unexpected-form-in-table
 (1,32): unexpected-hidden-input-in-table
 (1,39): unexpected-start-tag-implies-table-voodoo
 (1,46): unexpected-end-tag-implies-table-voodoo
 (1,46): unexpected-end-tag
 (1,51): unexpected-start-tag-implies-table-voodoo
 (1,57): unexpected-end-tag-implies-table-voodoo
 #document
 | <html>
 |   <head>
 |   <body>
 |     <input>
 |     <div>
 |     <table>
 |       <form>
 |       <input>
 |         type="hidden"
 #data
 <i>A<b>B<p></i>C</b>D
 #errors
 (1,3): expected-doctype-but-got-start-tag
 (1,15): adoption-agency-1.3
 (1,20): adoption-agency-1.3
 #document
 | <html>
 |   <head>
 |   <body>
 |     <i>
 |       "A"
 |       <b>
 |         "B"
 |     <b>
 |     <p>
 |       <b>
 |         <i>
 |         "C"
 |       "D"
 #data
 <div></div>
 #errors
 (1,5): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <div>
 #data
 <svg></svg>
 #errors
 (1,5): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <svg svg>
 #data
 <math></math>
 #errors
 (1,6): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <math math>
--- a/lib/html5lib/tests/testdata/tree-construction/inbody01.dat
+++ b/lib/html5lib/tests/testdata/tree-construction/inbody01.dat
@ -1,54 +0,0 @@
 #data
 <button>1</foo>
 #errors
 (1,8): expected-doctype-but-got-start-tag
 (1,15): unexpected-end-tag
 (1,15): expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     <button>
 |       "1"
 #data
 <foo>1<p>2</foo>
 #errors
 (1,5): expected-doctype-but-got-start-tag
 (1,16): unexpected-end-tag
 (1,16): expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     <foo>
 |       "1"
 |       <p>
 |         "2"
 #data
 <dd>1</foo>
 #errors
 (1,4): expected-doctype-but-got-start-tag
 (1,11): unexpected-end-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <dd>
 |       "1"
 #data
 <foo>1<dd>2</foo>
 #errors
 (1,5): expected-doctype-but-got-start-tag
 (1,17): unexpected-end-tag
 (1,17): expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     <foo>
 |       "1"
 |       <dd>
 |         "2"
--- a/lib/html5lib/tests/testdata/tree-construction/isindex.dat
+++ b/lib/html5lib/tests/testdata/tree-construction/isindex.dat
@ -1,49 +0,0 @@
 #data
 <isindex>
 #errors
 (1,9): expected-doctype-but-got-start-tag
 (1,9): expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     <isindex>
 #data
 <isindex name="A" action="B" prompt="C" foo="D">
 #errors
 (1,48): expected-doctype-but-got-start-tag
 (1,48): expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     <isindex>
 |       action="B"
 |       foo="D"
 |       name="A"
 |       prompt="C"
 #data
 <form><isindex>
 #errors
 (1,6): expected-doctype-but-got-start-tag
 (1,15): expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     <form>
 |       <isindex>
 #data
 <!doctype html><isindex>x</isindex>x
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <isindex>
 |       "x"
 |     "x"
--- a/lib/html5lib/tests/testdata/tree-construction/main-element.dat
+++ b/lib/html5lib/tests/testdata/tree-construction/main-element.dat
@ -1,46 +0,0 @@
 #data
 <!doctype html><p>foo<main>bar<p>baz
 #errors
 (1,36): expected-closing-tag-but-got-eof
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <p>
 |       "foo"
 |     <main>
 |       "bar"
 |       <p>
 |         "baz"
 #data
 <!doctype html><main><p>foo</main>bar
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <main>
 |       <p>
 |         "foo"
 |     "bar"
 #data
 <!DOCTYPE html>xxx<svg><x><g><a><main><b>
 #errors
 * (1,42) unexpected HTML-like start tag token in foreign content
 * (1,42) unexpected end of file
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     "xxx"
 |     <svg svg>
 |       <svg x>
 |         <svg g>
 |           <svg a>
 |             <svg main>
 |     <b>
--- a/lib/html5lib/tests/testdata/tree-construction/math.dat
+++ b/lib/html5lib/tests/testdata/tree-construction/math.dat
@ -1,81 +0,0 @@
 #data
 <math><tr><td><mo><tr>
 #errors
 #document-fragment
 td
 #document
 | <math math>
 |   <math tr>
 |     <math td>
 |       <math mo>
 #data
 <math><tr><td><mo><tr>
 #errors
 #document-fragment
 tr
 #document
 | <math math>
 |   <math tr>
 |     <math td>
 |       <math mo>
 #data
 <math><thead><mo><tbody>
 #errors
 #document-fragment
 thead
 #document
 | <math math>
 |   <math thead>
 |     <math mo>
 #data
 <math><tfoot><mo><tbody>
 #errors
 #document-fragment
 tfoot
 #document
 | <math math>
 |   <math tfoot>
 |     <math mo>
 #data
 <math><tbody><mo><tfoot>
 #errors
 #document-fragment
 tbody
 #document
 | <math math>
 |   <math tbody>
 |     <math mo>
 #data
 <math><tbody><mo></table>
 #errors
 #document-fragment
 tbody
 #document
 | <math math>
 |   <math tbody>
 |     <math mo>
 #data
 <math><thead><mo></table>
 #errors
 #document-fragment
 tbody
 #document
 | <math math>
 |   <math thead>
 |     <math mo>
 #data
 <math><tfoot><mo></table>
 #errors
 #document-fragment
 tbody
 #document
 | <math math>
 |   <math tfoot>
 |     <math mo>
--- a/lib/html5lib/tests/testdata/tree-construction/menuitem-element.dat
+++ b/lib/html5lib/tests/testdata/tree-construction/menuitem-element.dat
@ -1,257 +0,0 @@
 #data
 <menuitem>
 #errors
 10: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
 10: End of file seen and there were open elements.
 10: Unclosed element “menuitem”.
 #document
 | <html>
 |   <head>
 |   <body>
 |     <menuitem>
 #data
 </menuitem>
 #errors
 11: End tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
 11: Stray end tag “menuitem”.
 #document
 | <html>
 |   <head>
 |   <body>
 #data
 <!DOCTYPE html><body><menuitem>A
 #errors
 32: End of file seen and there were open elements.
 31: Unclosed element “menuitem”.
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <menuitem>
 |       "A"
 #data
 <!DOCTYPE html><body><menuitem>A<menuitem>B
 #errors
 43: End of file seen and there were open elements.
 42: Unclosed element “menuitem”.
 31: Unclosed element “menuitem”.
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <menuitem>
 |       "A"
 |       <menuitem>
 |         "B"
 #data
 <!DOCTYPE html><body><menuitem>A<menu>B</menu>
 #errors
 46: End of file seen and there were open elements.
 31: Unclosed element “menuitem”.
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <menuitem>
 |       "A"
 |       <menu>
 |         "B"
 #data
 <!DOCTYPE html><body><menuitem>A<hr>B
 #errors
 37: End of file seen and there were open elements.
 31: Unclosed element “menuitem”.
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <menuitem>
 |       "A"
 |       <hr>
 |       "B"
 #data
 <!DOCTYPE html><li><menuitem><li>
 #errors
 33: End tag “li” implied, but there were open elements.
 29: Unclosed element “menuitem”.
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <li>
 |       <menuitem>
 |     <li>
 #data
 <!DOCTYPE html><menuitem><p></menuitem>x
 #errors
 39: Stray end tag “menuitem”.
 40: End of file seen and there were open elements.
 25: Unclosed element “menuitem”.
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <menuitem>
 |       <p>
 |         "x"
 #data
 <!DOCTYPE html><p><b></p><menuitem>
 #errors
 25: End tag “p” seen, but there were open elements.
 21: Unclosed element “b”.
 35: End of file seen and there were open elements.
 35: Unclosed element “menuitem”.
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <p>
 |       <b>
 |     <b>
 |       <menuitem>
 #data
 <!DOCTYPE html><menuitem><asdf></menuitem>x
 #errors
 42: End tag “menuitem” seen, but there were open elements.
 31: Unclosed element “asdf”.
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <menuitem>
 |       <asdf>
 |     "x"
 #data
 <!DOCTYPE html></menuitem>
 #errors
 26: Stray end tag “menuitem”.
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 #data
 <!DOCTYPE html><html></menuitem>
 #errors
 26: Stray end tag “menuitem”.
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 #data
 <!DOCTYPE html><head></menuitem>
 #errors
 26: Stray end tag “menuitem”.
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 #data
 <!DOCTYPE html><select><menuitem></select>
 #errors
 33: Stray start tag “menuitem”.
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <select>
 #data
 <!DOCTYPE html><option><menuitem>
 #errors
 33: End of file seen and there were open elements.
 33: Unclosed element “menuitem”.
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <option>
 |       <menuitem>
 #data
 <!DOCTYPE html><menuitem><option>
 #errors
 33: End of file seen and there were open elements.
 25: Unclosed element “menuitem”.
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <menuitem>
 |       <option>
 #data
 <!DOCTYPE html><menuitem></body>
 #errors
 32: End tag for  “body” seen, but there were unclosed elements.
 25: Unclosed element “menuitem”.
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <menuitem>
 #data
 <!DOCTYPE html><menuitem></html>
 #errors
 32: End tag for  “html” seen, but there were unclosed elements.
 25: Unclosed element “menuitem”.
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <menuitem>
 #data
 <!DOCTYPE html><menuitem><p>
 #errors
 28: End of file seen and there were open elements.
 25: Unclosed element “menuitem”.
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <menuitem>
 |       <p>
 #data
 <!DOCTYPE html><menuitem><li>
 #errors
 29: End of file seen and there were open elements.
 25: Unclosed element “menuitem”.
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <menuitem>
 |       <li>
--- a/lib/html5lib/tests/testdata/tree-construction/namespace-sensitivity.dat
+++ b/lib/html5lib/tests/testdata/tree-construction/namespace-sensitivity.dat
@ -1,16 +0,0 @@
 #data
 <body><table><tr><td><svg><td><foreignObject><span></td>Foo
 #errors
 #document
 | <html>
 |   <head>
 |   <body>
 |     "Foo"
 |     <table>
 |       <tbody>
 |         <tr>
 |           <td>
 |             <svg svg>
 |               <svg td>
 |                 <svg foreignObject>
 |                   <span>
--- a/lib/html5lib/tests/testdata/tree-construction/noscript01.dat
+++ b/lib/html5lib/tests/testdata/tree-construction/noscript01.dat
@ -1,237 +0,0 @@
 #data
 <head><noscript><!doctype html><!--foo--></noscript>
 #errors
 Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
 Line: 1 Col: 31 Unexpected DOCTYPE. Ignored.
 #script-off
 #document
 | <html>
 |   <head>
 |     <noscript>
 |       <!-- foo -->
 |   <body>
 #data
 <head><noscript><html class="foo"><!--foo--></noscript>
 #errors
 Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
 Line: 1 Col: 34 html needs to be the first start tag.
 #script-off
 #document
 | <html>
 |   class="foo"
 |   <head>
 |     <noscript>
 |       <!-- foo -->
 |   <body>
 #data
 <head><noscript></noscript>
 #errors
 (1,6): expected-doctype-but-got-tag
 #script-off
 #document
 | <html>
 |   <head>
 |     <noscript>
 |   <body>
 #data
 <head><noscript>   </noscript>
 #errors
 Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
 #script-off
 #document
 | <html>
 |   <head>
 |     <noscript>
 |       "   "
 |   <body>
 #data
 <head><noscript><!--foo--></noscript>
 #errors
 (1,6): expected-doctype-but-got-tag
 #script-off
 #document
 | <html>
 |   <head>
 |     <noscript>
 |       <!-- foo -->
 |   <body>
 #data
 <head><noscript><basefont><!--foo--></noscript>
 #errors
 Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
 #script-off
 #document
 | <html>
 |   <head>
 |     <noscript>
 |       <basefont>
 |       <!-- foo -->
 |   <body>
 #data
 <head><noscript><bgsound><!--foo--></noscript>
 #errors
 Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
 #script-off
 #document
 | <html>
 |   <head>
 |     <noscript>
 |       <bgsound>
 |       <!-- foo -->
 |   <body>
 #data
 <head><noscript><link><!--foo--></noscript>
 #errors
 Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
 #script-off
 #document
 | <html>
 |   <head>
 |     <noscript>
 |       <link>
 |       <!-- foo -->
 |   <body>
 #data
 <head><noscript><meta><!--foo--></noscript>
 #errors
 Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
 #script-off
 #document
 | <html>
 |   <head>
 |     <noscript>
 |       <meta>
 |       <!-- foo -->
 |   <body>
 #data
 <head><noscript><noframes>XXX</noscript></noframes></noscript>
 #errors
 Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
 #script-off
 #document
 | <html>
 |   <head>
 |     <noscript>
 |       <noframes>
 |         "XXX</noscript>"
 |   <body>
 #data
 <head><noscript><style>XXX</style></noscript>
 #errors
 Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
 #script-off
 #document
 | <html>
 |   <head>
 |     <noscript>
 |       <style>
 |         "XXX"
 |   <body>
 #data
 <head><noscript></br><!--foo--></noscript>
 #errors
 Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
 Line: 1 Col: 21 Element br not allowed in a inhead-noscript context
 Line: 1 Col: 21 Unexpected end tag (br). Treated as br element.
 Line: 1 Col: 42 Unexpected end tag (noscript). Ignored.
 #script-off
 #document
 | <html>
 |   <head>
 |     <noscript>
 |   <body>
 |     <br>
 |     <!-- foo -->
 #data
 <head><noscript><head class="foo"><!--foo--></noscript>
 #errors
 Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
 Line: 1 Col: 34 Unexpected start tag (head).
 #script-off
 #document
 | <html>
 |   <head>
 |     <noscript>
 |       <!-- foo -->
 |   <body>
 #data
 <head><noscript><noscript class="foo"><!--foo--></noscript>
 #errors
 Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
 Line: 1 Col: 34 Unexpected start tag (noscript).
 #script-off
 #document
 | <html>
 |   <head>
 |     <noscript>
 |       <!-- foo -->
 |   <body>
 #data
 <head><noscript></p><!--foo--></noscript>
 #errors
 Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
 Line: 1 Col: 20 Unexpected end tag (p). Ignored.
 #script-off
 #document
 | <html>
 |   <head>
 |     <noscript>
 |       <!-- foo -->
 |   <body>
 #data
 <head><noscript><p><!--foo--></noscript>
 #errors
 Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
 Line: 1 Col: 19 Element p not allowed in a inhead-noscript context
 Line: 1 Col: 40 Unexpected end tag (noscript). Ignored.
 #script-off
 #document
 | <html>
 |   <head>
 |     <noscript>
 |   <body>
 |     <p>
 |       <!-- foo -->
 #data
 <head><noscript>XXX<!--foo--></noscript></head>
 #errors
 Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
 Line: 1 Col: 19 Unexpected non-space character. Expected inhead-noscript content
 Line: 1 Col: 30 Unexpected end tag (noscript). Ignored.
 Line: 1 Col: 37 Unexpected end tag (head). Ignored.
 #script-off
 #document
 | <html>
 |   <head>
 |     <noscript>
 |   <body>
 |     "XXX"
 |     <!-- foo -->
 #data
 <head><noscript>
 #errors
 (1,6): expected-doctype-but-got-tag
 (1,6): eof-in-head-noscript
 #script-off
 #document
 | <html>
 |   <head>
 |     <noscript>
 |   <body>
--- a/lib/html5lib/tests/testdata/tree-construction/pending-spec-changes-plain-text-unsafe.dat
+++ b/lib/html5lib/tests/testdata/tree-construction/pending-spec-changes-plain-text-unsafe.dat
--- a/lib/html5lib/tests/testdata/tree-construction/pending-spec-changes.dat
+++ b/lib/html5lib/tests/testdata/tree-construction/pending-spec-changes.dat
@ -1,46 +0,0 @@
 #data
 <input type="hidden"><frameset>
 #errors
 (1,21): expected-doctype-but-got-start-tag
 (1,31): unexpected-start-tag
 (1,31): eof-in-frameset
 #document
 | <html>
 |   <head>
 |   <frameset>
 #data
 <!DOCTYPE html><table><caption><svg>foo</table>bar
 #errors
 (1,47): unexpected-end-tag
 (1,47): end-table-tag-in-caption
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <table>
 |       <caption>
 |         <svg svg>
 |           "foo"
 |     "bar"
 #data
 <table><tr><td><svg><desc><td></desc><circle>
 #errors
 (1,7): expected-doctype-but-got-start-tag
 (1,30): unexpected-cell-end-tag
 (1,37): unexpected-end-tag
 (1,45): expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     <table>
 |       <tbody>
 |         <tr>
 |           <td>
 |             <svg svg>
 |               <svg desc>
 |           <td>
 |             <circle>
--- a/lib/html5lib/tests/testdata/tree-construction/plain-text-unsafe.dat
+++ b/lib/html5lib/tests/testdata/tree-construction/plain-text-unsafe.dat
--- a/lib/html5lib/tests/testdata/tree-construction/ruby.dat
+++ b/lib/html5lib/tests/testdata/tree-construction/ruby.dat
@ -1,301 +0,0 @@
 #data
 <html><ruby>a<rb>b<rb></ruby></html>
 #errors
 (1,6): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <ruby>
 |       "a"
 |       <rb>
 |         "b"
 |       <rb>
 #data
 <html><ruby>a<rb>b<rt></ruby></html>
 #errors
 (1,6): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <ruby>
 |       "a"
 |       <rb>
 |         "b"
 |       <rt>
 #data
 <html><ruby>a<rb>b<rtc></ruby></html>
 #errors
 (1,6): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <ruby>
 |       "a"
 |       <rb>
 |         "b"
 |       <rtc>
 #data
 <html><ruby>a<rb>b<rp></ruby></html>
 #errors
 (1,6): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <ruby>
 |       "a"
 |       <rb>
 |         "b"
 |       <rp>
 #data
 <html><ruby>a<rb>b<span></ruby></html>
 #errors
 (1,6): expected-doctype-but-got-start-tag
 (1,31): unexpected-end-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <ruby>
 |       "a"
 |       <rb>
 |         "b"
 |         <span>
 #data
 <html><ruby>a<rt>b<rb></ruby></html>
 #errors
 (1,6): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <ruby>
 |       "a"
 |       <rt>
 |         "b"
 |       <rb>
 #data
 <html><ruby>a<rt>b<rt></ruby></html>
 #errors
 (1,6): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <ruby>
 |       "a"
 |       <rt>
 |         "b"
 |       <rt>
 #data
 <html><ruby>a<rt>b<rtc></ruby></html>
 #errors
 (1,6): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <ruby>
 |       "a"
 |       <rt>
 |         "b"
 |       <rtc>
 #data
 <html><ruby>a<rt>b<rp></ruby></html>
 #errors
 (1,6): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <ruby>
 |       "a"
 |       <rt>
 |         "b"
 |       <rp>
 #data
 <html><ruby>a<rt>b<span></ruby></html>
 #errors
 (1,6): expected-doctype-but-got-start-tag
 (1,31): unexpected-end-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <ruby>
 |       "a"
 |       <rt>
 |         "b"
 |         <span>
 #data
 <html><ruby>a<rtc>b<rb></ruby></html>
 #errors
 (1,6): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <ruby>
 |       "a"
 |       <rtc>
 |         "b"
 |       <rb>
 #data
 <html><ruby>a<rtc>b<rt>c<rt>d</ruby></html>
 #errors
 (1,6): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <ruby>
 |       "a"
 |       <rtc>
 |         "b"
 |         <rt>
 |           "c"
 |         <rt>
 |           "d"
 #data
 <html><ruby>a<rtc>b<rtc></ruby></html>
 #errors
 (1,6): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <ruby>
 |       "a"
 |       <rtc>
 |         "b"
 |       <rtc>
 #data
 <html><ruby>a<rtc>b<rp></ruby></html>
 #errors
 (1,6): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <ruby>
 |       "a"
 |       <rtc>
 |         "b"
 |         <rp>
 #data
 <html><ruby>a<rtc>b<span></ruby></html>
 #errors
 (1,6): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <ruby>
 |       "a"
 |       <rtc>
 |         "b"
 |         <span>
 #data
 <html><ruby>a<rp>b<rb></ruby></html>
 #errors
 (1,6): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <ruby>
 |       "a"
 |       <rp>
 |         "b"
 |       <rb>
 #data
 <html><ruby>a<rp>b<rt></ruby></html>
 #errors
 (1,6): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <ruby>
 |       "a"
 |       <rp>
 |         "b"
 |       <rt>
 #data
 <html><ruby>a<rp>b<rtc></ruby></html>
 #errors
 (1,6): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <ruby>
 |       "a"
 |       <rp>
 |         "b"
 |       <rtc>
 #data
 <html><ruby>a<rp>b<rp></ruby></html>
 #errors
 (1,6): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <ruby>
 |       "a"
 |       <rp>
 |         "b"
 |       <rp>
 #data
 <html><ruby>a<rp>b<span></ruby></html>
 #errors
 (1,6): expected-doctype-but-got-start-tag
 (1,31): unexpected-end-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <ruby>
 |       "a"
 |       <rp>
 |         "b"
 |         <span>
 #data
 <html><ruby><rtc><ruby>a<rb>b<rt></ruby></ruby></html>
 #errors
 (1,6): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <ruby>
 |       <rtc>
 |         <ruby>
 |           "a"
 |           <rb>
 |             "b"
 |           <rt>
--- a/lib/html5lib/tests/testdata/tree-construction/scriptdata01.dat
+++ b/lib/html5lib/tests/testdata/tree-construction/scriptdata01.dat
@ -1,385 +0,0 @@
 #data
 FOO<script>'Hello'</script>BAR
 #errors
 (1,3): expected-doctype-but-got-chars
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO"
 |     <script>
 |       "'Hello'"
 |     "BAR"
 #data
 FOO<script></script>BAR
 #errors
 (1,3): expected-doctype-but-got-chars
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO"
 |     <script>
 |     "BAR"
 #data
 FOO<script></script >BAR
 #errors
 (1,3): expected-doctype-but-got-chars
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO"
 |     <script>
 |     "BAR"
 #data
 FOO<script></script/>BAR
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,21): self-closing-flag-on-end-tag
 #new-errors
 (1:21) end-tag-with-trailing-solidus
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO"
 |     <script>
 |     "BAR"
 #data
 FOO<script></script/ >BAR
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,20): unexpected-character-after-solidus-in-tag
 #new-errors
 (1:21) unexpected-solidus-in-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO"
 |     <script>
 |     "BAR"
 #data
 FOO<script type="text/plain"></scriptx>BAR
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,42): expected-named-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO"
 |     <script>
 |       type="text/plain"
 |       "</scriptx>BAR"
 #data
 FOO<script></script foo=">" dd>BAR
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,31): attributes-in-end-tag
 #new-errors
 (1:31) end-tag-with-attributes
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO"
 |     <script>
 |     "BAR"
 #data
 FOO<script>'<'</script>BAR
 #errors
 (1,3): expected-doctype-but-got-chars
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO"
 |     <script>
 |       "'<'"
 |     "BAR"
 #data
 FOO<script>'<!'</script>BAR
 #errors
 (1,3): expected-doctype-but-got-chars
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO"
 |     <script>
 |       "'<!'"
 |     "BAR"
 #data
 FOO<script>'<!-'</script>BAR
 #errors
 (1,3): expected-doctype-but-got-chars
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO"
 |     <script>
 |       "'<!-'"
 |     "BAR"
 #data
 FOO<script>'<!--'</script>BAR
 #errors
 (1,3): expected-doctype-but-got-chars
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO"
 |     <script>
 |       "'<!--'"
 |     "BAR"
 #data
 FOO<script>'<!---'</script>BAR
 #errors
 (1,3): expected-doctype-but-got-chars
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO"
 |     <script>
 |       "'<!---'"
 |     "BAR"
 #data
 FOO<script>'<!-->'</script>BAR
 #errors
 (1,3): expected-doctype-but-got-chars
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO"
 |     <script>
 |       "'<!-->'"
 |     "BAR"
 #data
 FOO<script>'<!-->'</script>BAR
 #errors
 (1,3): expected-doctype-but-got-chars
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO"
 |     <script>
 |       "'<!-->'"
 |     "BAR"
 #data
 FOO<script>'<!-- potato'</script>BAR
 #errors
 (1,3): expected-doctype-but-got-chars
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO"
 |     <script>
 |       "'<!-- potato'"
 |     "BAR"
 #data
 FOO<script>'<!-- <sCrIpt'</script>BAR
 #errors
 (1,3): expected-doctype-but-got-chars
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO"
 |     <script>
 |       "'<!-- <sCrIpt'"
 |     "BAR"
 #data
 FOO<script type="text/plain">'<!-- <sCrIpt>'</script>BAR
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,56): expected-script-data-but-got-eof
 (1,56): expected-named-closing-tag-but-got-eof
 #new-errors
 (1:57) eof-in-script-html-comment-like-text
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO"
 |     <script>
 |       type="text/plain"
 |       "'<!-- <sCrIpt>'</script>BAR"
 #data
 FOO<script type="text/plain">'<!-- <sCrIpt> -'</script>BAR
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,58): expected-script-data-but-got-eof
 (1,58): expected-named-closing-tag-but-got-eof
 #new-errors
 (1:59) eof-in-script-html-comment-like-text
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO"
 |     <script>
 |       type="text/plain"
 |       "'<!-- <sCrIpt> -'</script>BAR"
 #data
 FOO<script type="text/plain">'<!-- <sCrIpt> --'</script>BAR
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,59): expected-script-data-but-got-eof
 (1,59): expected-named-closing-tag-but-got-eof
 #new-errors
 (1:60) eof-in-script-html-comment-like-text
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO"
 |     <script>
 |       type="text/plain"
 |       "'<!-- <sCrIpt> --'</script>BAR"
 #data
 FOO<script>'<!-- <sCrIpt> -->'</script>BAR
 #errors
 (1,3): expected-doctype-but-got-chars
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO"
 |     <script>
 |       "'<!-- <sCrIpt> -->'"
 |     "BAR"
 #data
 FOO<script type="text/plain">'<!-- <sCrIpt> --!>'</script>BAR
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,61): expected-script-data-but-got-eof
 (1,61): expected-named-closing-tag-but-got-eof
 #new-errors
 (1:62) eof-in-script-html-comment-like-text
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO"
 |     <script>
 |       type="text/plain"
 |       "'<!-- <sCrIpt> --!>'</script>BAR"
 #data
 FOO<script type="text/plain">'<!-- <sCrIpt> -- >'</script>BAR
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,61): expected-script-data-but-got-eof
 (1,61): expected-named-closing-tag-but-got-eof
 #new-errors
 (1:62) eof-in-script-html-comment-like-text
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO"
 |     <script>
 |       type="text/plain"
 |       "'<!-- <sCrIpt> -- >'</script>BAR"
 #data
 FOO<script type="text/plain">'<!-- <sCrIpt '</script>BAR
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,56): expected-script-data-but-got-eof
 (1,56): expected-named-closing-tag-but-got-eof
 #new-errors
 (1:57) eof-in-script-html-comment-like-text
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO"
 |     <script>
 |       type="text/plain"
 |       "'<!-- <sCrIpt '</script>BAR"
 #data
 FOO<script type="text/plain">'<!-- <sCrIpt/'</script>BAR
 #errors
 (1,3): expected-doctype-but-got-chars
 (1,56): expected-script-data-but-got-eof
 (1,56): expected-named-closing-tag-but-got-eof
 #new-errors
 (1:57) eof-in-script-html-comment-like-text
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO"
 |     <script>
 |       type="text/plain"
 |       "'<!-- <sCrIpt/'</script>BAR"
 #data
 FOO<script type="text/plain">'<!-- <sCrIpt\'</script>BAR
 #errors
 (1,3): expected-doctype-but-got-chars
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO"
 |     <script>
 |       type="text/plain"
 |       "'<!-- <sCrIpt\'"
 |     "BAR"
 #data
 FOO<script type="text/plain">'<!-- <sCrIpt/'</script>BAR</script>QUX
 #errors
 (1,3): expected-doctype-but-got-chars
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO"
 |     <script>
 |       type="text/plain"
 |       "'<!-- <sCrIpt/'</script>BAR"
 |     "QUX"
 #data
 FOO<script><!--<script>-></script>--></script>QUX
 #errors
 (1,3): expected-doctype-but-got-chars
 #document
 | <html>
 |   <head>
 |   <body>
 |     "FOO"
 |     <script>
 |       "<!--<script>-></script>-->"
 |     "QUX"
--- a/lib/html5lib/tests/testdata/tree-construction/scripted/adoption01.dat
+++ b/lib/html5lib/tests/testdata/tree-construction/scripted/adoption01.dat
@ -1,16 +0,0 @@
 #data
 <p><b id="A"><script>document.getElementById("A").id = "B"</script></p>TEXT</b>
 #errors
 #script-on
 #document
 | <html>
 |   <head>
 |   <body>
 |     <p>
 |       <b>
 |         id="B"
 |         <script>
 |           "document.getElementById("A").id = "B""
 |     <b>
 |       id="A"
 |       "TEXT"
--- a/lib/html5lib/tests/testdata/tree-construction/scripted/ark.dat
+++ b/lib/html5lib/tests/testdata/tree-construction/scripted/ark.dat
@ -1,27 +0,0 @@
 #data
 <p><font size=4><font size=4><font size=4><script>document.getElementsByTagName("font")[2].setAttribute("size", "5");</script><font size=4><p>X
 #errors
 #script-on
 #document
 | <html>
 |   <head>
 |   <body>
 |     <p>
 |       <font>
 |         size="4"
 |         <font>
 |           size="4"
 |           <font>
 |             size="5"
 |             <script>
 |               "document.getElementsByTagName("font")[2].setAttribute("size", "5");"
 |             <font>
 |               size="4"
 |     <p>
 |       <font>
 |         size="4"
 |         <font>
 |           size="4"
 |           <font>
 |             size="4"
 |             "X"
--- a/lib/html5lib/tests/testdata/tree-construction/scripted/webkit01.dat
+++ b/lib/html5lib/tests/testdata/tree-construction/scripted/webkit01.dat
@ -1,30 +0,0 @@
 #data
 1<script>document.write("2")</script>3
 #errors
 #script-on
 #document
 | <html>
 |   <head>
 |   <body>
 |     "1"
 |     <script>
 |       "document.write("2")"
 |     "23"
 #data
 1<script>document.write("<script>document.write('2')</scr"+ "ipt><script>document.write('3')</scr" + "ipt>")</script>4
 #errors
 #script-on
 #document
 | <html>
 |   <head>
 |   <body>
 |     "1"
 |     <script>
 |       "document.write("<script>document.write('2')</scr"+ "ipt><script>document.write('3')</scr" + "ipt>")"
 |     <script>
 |       "document.write('2')"
 |     "2"
 |     <script>
 |       "document.write('3')"
 |     "34"
--- a/lib/html5lib/tests/testdata/tree-construction/tables01.dat
+++ b/lib/html5lib/tests/testdata/tree-construction/tables01.dat
@ -1,286 +0,0 @@
 #data
 <table><th>
 #errors
 (1,7): expected-doctype-but-got-start-tag
 (1,11): unexpected-cell-in-table-body
 (1,11): expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     <table>
 |       <tbody>
 |         <tr>
 |           <th>
 #data
 <table><td>
 #errors
 (1,7): expected-doctype-but-got-start-tag
 (1,11): unexpected-cell-in-table-body
 (1,11): expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     <table>
 |       <tbody>
 |         <tr>
 |           <td>
 #data
 <table><col foo='bar'>
 #errors
 (1,7): expected-doctype-but-got-start-tag
 (1,22): eof-in-table
 #document
 | <html>
 |   <head>
 |   <body>
 |     <table>
 |       <colgroup>
 |         <col>
 |           foo="bar"
 #data
 <table><colgroup></html>foo
 #errors
 (1,7): expected-doctype-but-got-start-tag
 (1,24): unexpected-end-tag
 (1,27): foster-parenting-character-in-table
 (1,27): foster-parenting-character-in-table
 (1,27): foster-parenting-character-in-table
 (1,27): eof-in-table
 #document
 | <html>
 |   <head>
 |   <body>
 |     "foo"
 |     <table>
 |       <colgroup>
 #data
 <table></table><p>foo
 #errors
 (1,7): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <table>
 |     <p>
 |       "foo"
 #data
 <table></body></caption></col></colgroup></html></tbody></td></tfoot></th></thead></tr><td>
 #errors
 (1,7): expected-doctype-but-got-start-tag
 (1,14): unexpected-end-tag
 (1,24): unexpected-end-tag
 (1,30): unexpected-end-tag
 (1,41): unexpected-end-tag
 (1,48): unexpected-end-tag
 (1,56): unexpected-end-tag
 (1,61): unexpected-end-tag
 (1,69): unexpected-end-tag
 (1,74): unexpected-end-tag
 (1,82): unexpected-end-tag
 (1,87): unexpected-end-tag
 (1,91): unexpected-cell-in-table-body
 (1,91): expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     <table>
 |       <tbody>
 |         <tr>
 |           <td>
 #data
 <table><select><option>3</select></table>
 #errors
 (1,7): expected-doctype-but-got-start-tag
 (1,15): unexpected-start-tag-implies-table-voodoo
 #document
 | <html>
 |   <head>
 |   <body>
 |     <select>
 |       <option>
 |         "3"
 |     <table>
 #data
 <table><select><table></table></select></table>
 #errors
 (1,7): expected-doctype-but-got-start-tag
 (1,15): unexpected-start-tag-implies-table-voodoo
 (1,22): unexpected-table-element-start-tag-in-select-in-table
 (1,22): unexpected-start-tag-implies-end-tag
 (1,39): unexpected-end-tag
 (1,47): unexpected-end-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <select>
 |     <table>
 |     <table>
 #data
 <table><select></table>
 #errors
 (1,7): expected-doctype-but-got-start-tag
 (1,15): unexpected-start-tag-implies-table-voodoo
 (1,23): unexpected-table-element-end-tag-in-select-in-table
 #document
 | <html>
 |   <head>
 |   <body>
 |     <select>
 |     <table>
 #data
 <table><select><option>A<tr><td>B</td></tr></table>
 #errors
 (1,7): expected-doctype-but-got-start-tag
 (1,15): unexpected-start-tag-implies-table-voodoo
 (1,28): unexpected-table-element-start-tag-in-select-in-table
 #document
 | <html>
 |   <head>
 |   <body>
 |     <select>
 |       <option>
 |         "A"
 |     <table>
 |       <tbody>
 |         <tr>
 |           <td>
 |             "B"
 #data
 <table><td></body></caption></col></colgroup></html>foo
 #errors
 (1,7): expected-doctype-but-got-start-tag
 (1,11): unexpected-cell-in-table-body
 (1,18): unexpected-end-tag
 (1,28): unexpected-end-tag
 (1,34): unexpected-end-tag
 (1,45): unexpected-end-tag
 (1,52): unexpected-end-tag
 (1,55): expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     <table>
 |       <tbody>
 |         <tr>
 |           <td>
 |             "foo"
 #data
 <table><td>A</table>B
 #errors
 (1,7): expected-doctype-but-got-start-tag
 (1,11): unexpected-cell-in-table-body
 #document
 | <html>
 |   <head>
 |   <body>
 |     <table>
 |       <tbody>
 |         <tr>
 |           <td>
 |             "A"
 |     "B"
 #data
 <table><tr><caption>
 #errors
 (1,7): expected-doctype-but-got-start-tag
 (1,20): expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     <table>
 |       <tbody>
 |         <tr>
 |       <caption>
 #data
 <table><tr></body></caption></col></colgroup></html></td></th><td>foo
 #errors
 (1,7): expected-doctype-but-got-start-tag
 (1,18): unexpected-end-tag-in-table-row
 (1,28): unexpected-end-tag-in-table-row
 (1,34): unexpected-end-tag-in-table-row
 (1,45): unexpected-end-tag-in-table-row
 (1,52): unexpected-end-tag-in-table-row
 (1,57): unexpected-end-tag-in-table-row
 (1,62): unexpected-end-tag-in-table-row
 (1,69): expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     <table>
 |       <tbody>
 |         <tr>
 |           <td>
 |             "foo"
 #data
 <table><td><tr>
 #errors
 (1,7): expected-doctype-but-got-start-tag
 (1,11): unexpected-cell-in-table-body
 (1,15): eof-in-table
 #document
 | <html>
 |   <head>
 |   <body>
 |     <table>
 |       <tbody>
 |         <tr>
 |           <td>
 |         <tr>
 #data
 <table><td><button><td>
 #errors
 (1,7): expected-doctype-but-got-start-tag
 (1,11): unexpected-cell-in-table-body
 (1,23): unexpected-cell-end-tag
 (1,23): expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     <table>
 |       <tbody>
 |         <tr>
 |           <td>
 |             <button>
 |           <td>
 #data
 <table><tr><td><svg><desc><td>
 #errors
 (1,7): expected-doctype-but-got-start-tag
 (1,30): unexpected-cell-end-tag
 (1,30): expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     <table>
 |       <tbody>
 |         <tr>
 |           <td>
 |             <svg svg>
 |               <svg desc>
 |           <td>
--- a/lib/html5lib/tests/testdata/tree-construction/template.dat
+++ b/lib/html5lib/tests/testdata/tree-construction/template.dat
--- a/lib/html5lib/tests/testdata/tree-construction/tests1.dat
+++ b/lib/html5lib/tests/testdata/tree-construction/tests1.dat
--- a/lib/html5lib/tests/testdata/tree-construction/tests10.dat
+++ b/lib/html5lib/tests/testdata/tree-construction/tests10.dat
@ -1,849 +0,0 @@
 #data
 <!DOCTYPE html><svg></svg>
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <svg svg>
 #data
 <!DOCTYPE html><svg></svg><![CDATA[a]]>
 #errors
 (1,28) expected-dashes-or-doctype
 #new-errors
 (1:35) cdata-in-html-content
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <svg svg>
 |     <!-- [CDATA[a]] -->
 #data
 <!DOCTYPE html><body><svg></svg>
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <svg svg>
 #data
 <!DOCTYPE html><body><select><svg></svg></select>
 #errors
 (1,34) unexpected-start-tag-in-select
 (1,40) unexpected-end-tag-in-select
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <select>
 #data
 <!DOCTYPE html><body><select><option><svg></svg></option></select>
 #errors
 (1,42) unexpected-start-tag-in-select
 (1,48) unexpected-end-tag-in-select
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <select>
 |       <option>
 #data
 <!DOCTYPE html><body><table><svg></svg></table>
 #errors
 (1,33) foster-parenting-start-tag
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <svg svg>
 |     <table>
 #data
 <!DOCTYPE html><body><table><svg><g>foo</g></svg></table>
 #errors
 (1,33) foster-parenting-start-tag
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <svg svg>
 |       <svg g>
 |         "foo"
 |     <table>
 #data
 <!DOCTYPE html><body><table><svg><g>foo</g><g>bar</g></svg></table>
 #errors
 (1,33) foster-parenting-start-tag
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <svg svg>
 |       <svg g>
 |         "foo"
 |       <svg g>
 |         "bar"
 |     <table>
 #data
 <!DOCTYPE html><body><table><tbody><svg><g>foo</g><g>bar</g></svg></tbody></table>
 #errors
 (1,40) foster-parenting-start-tag
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <svg svg>
 |       <svg g>
 |         "foo"
 |       <svg g>
 |         "bar"
 |     <table>
 |       <tbody>
 #data
 <!DOCTYPE html><body><table><tbody><tr><svg><g>foo</g><g>bar</g></svg></tr></tbody></table>
 #errors
 (1,44) foster-parenting-start-tag
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <svg svg>
 |       <svg g>
 |         "foo"
 |       <svg g>
 |         "bar"
 |     <table>
 |       <tbody>
 |         <tr>
 #data
 <!DOCTYPE html><body><table><tbody><tr><td><svg><g>foo</g><g>bar</g></svg></td></tr></tbody></table>
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <table>
 |       <tbody>
 |         <tr>
 |           <td>
 |             <svg svg>
 |               <svg g>
 |                 "foo"
 |               <svg g>
 |                 "bar"
 #data
 <!DOCTYPE html><body><table><tbody><tr><td><svg><g>foo</g><g>bar</g></svg><p>baz</td></tr></tbody></table>
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <table>
 |       <tbody>
 |         <tr>
 |           <td>
 |             <svg svg>
 |               <svg g>
 |                 "foo"
 |               <svg g>
 |                 "bar"
 |             <p>
 |               "baz"
 #data
 <!DOCTYPE html><body><table><caption><svg><g>foo</g><g>bar</g></svg><p>baz</caption></table>
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <table>
 |       <caption>
 |         <svg svg>
 |           <svg g>
 |             "foo"
 |           <svg g>
 |             "bar"
 |         <p>
 |           "baz"
 #data
 <!DOCTYPE html><body><table><caption><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
 #errors
 (1,65) unexpected-html-element-in-foreign-content
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <table>
 |       <caption>
 |         <svg svg>
 |           <svg g>
 |             "foo"
 |           <svg g>
 |             "bar"
 |         <p>
 |           "baz"
 |     <p>
 |       "quux"
 #data
 <!DOCTYPE html><body><table><caption><svg><g>foo</g><g>bar</g>baz</table><p>quux
 #errors
 (1,73) unexpected-end-tag
 (1,73) expected-one-end-tag-but-got-another
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <table>
 |       <caption>
 |         <svg svg>
 |           <svg g>
 |             "foo"
 |           <svg g>
 |             "bar"
 |           "baz"
 |     <p>
 |       "quux"
 #data
 <!DOCTYPE html><body><table><colgroup><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
 #errors
 (1,43) foster-parenting-start-tag svg
 (1,66) unexpected HTML-like start tag token in foreign content
 (1,66) foster-parenting-start-tag
 (1,67) foster-parenting-character
 (1,68) foster-parenting-character
 (1,69) foster-parenting-character
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <svg svg>
 |       <svg g>
 |         "foo"
 |       <svg g>
 |         "bar"
 |     <p>
 |       "baz"
 |     <table>
 |       <colgroup>
 |     <p>
 |       "quux"
 #data
 <!DOCTYPE html><body><table><tr><td><select><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
 #errors
 (1,49) unexpected-start-tag-in-select
 (1,52) unexpected-start-tag-in-select
 (1,59) unexpected-end-tag-in-select
 (1,62) unexpected-start-tag-in-select
 (1,69) unexpected-end-tag-in-select
 (1,72) unexpected-start-tag-in-select
 (1,83) unexpected-table-element-end-tag-in-select-in-table
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <table>
 |       <tbody>
 |         <tr>
 |           <td>
 |             <select>
 |               "foobarbaz"
 |     <p>
 |       "quux"
 #data
 <!DOCTYPE html><body><table><select><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
 #errors
 (1,36) unexpected-start-tag-implies-table-voodoo
 (1,41) unexpected-start-tag-in-select
 (1,44) unexpected-start-tag-in-select
 (1,51) unexpected-end-tag-in-select
 (1,54) unexpected-start-tag-in-select
 (1,61) unexpected-end-tag-in-select
 (1,64) unexpected-start-tag-in-select
 (1,75) unexpected-table-element-end-tag-in-select-in-table
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <select>
 |       "foobarbaz"
 |     <table>
 |     <p>
 |       "quux"
 #data
 <!DOCTYPE html><body></body></html><svg><g>foo</g><g>bar</g><p>baz
 #errors
 (1,40) expected-eof-but-got-start-tag
 (1,63) unexpected-html-element-in-foreign-content
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <svg svg>
 |       <svg g>
 |         "foo"
 |       <svg g>
 |         "bar"
 |     <p>
 |       "baz"
 #data
 <!DOCTYPE html><body></body><svg><g>foo</g><g>bar</g><p>baz
 #errors
 (1,33) unexpected-start-tag-after-body
 (1,56) unexpected-html-element-in-foreign-content
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <svg svg>
 |       <svg g>
 |         "foo"
 |       <svg g>
 |         "bar"
 |     <p>
 |       "baz"
 #data
 <!DOCTYPE html><frameset><svg><g></g><g></g><p><span>
 #errors
 (1,30) unexpected-start-tag-in-frameset
 (1,33) unexpected-start-tag-in-frameset
 (1,37) unexpected-end-tag-in-frameset
 (1,40) unexpected-start-tag-in-frameset
 (1,44) unexpected-end-tag-in-frameset
 (1,47) unexpected-start-tag-in-frameset
 (1,53) unexpected-start-tag-in-frameset
 (1,53) eof-in-frameset
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <frameset>
 #data
 <!DOCTYPE html><frameset></frameset><svg><g></g><g></g><p><span>
 #errors
 (1,41) unexpected-start-tag-after-frameset
 (1,44) unexpected-start-tag-after-frameset
 (1,48) unexpected-end-tag-after-frameset
 (1,51) unexpected-start-tag-after-frameset
 (1,55) unexpected-end-tag-after-frameset
 (1,58) unexpected-start-tag-after-frameset
 (1,64) unexpected-start-tag-after-frameset
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <frameset>
 #data
 <!DOCTYPE html><body xlink:href=foo><svg xlink:href=foo></svg>
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     xlink:href="foo"
 |     <svg svg>
 |       xlink href="foo"
 #data
 <!DOCTYPE html><body xlink:href=foo xml:lang=en><svg><g xml:lang=en xlink:href=foo></g></svg>
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     xlink:href="foo"
 |     xml:lang="en"
 |     <svg svg>
 |       <svg g>
 |         xlink href="foo"
 |         xml lang="en"
 #data
 <!DOCTYPE html><body xlink:href=foo xml:lang=en><svg><g xml:lang=en xlink:href=foo /></svg>
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     xlink:href="foo"
 |     xml:lang="en"
 |     <svg svg>
 |       <svg g>
 |         xlink href="foo"
 |         xml lang="en"
 #data
 <!DOCTYPE html><body xlink:href=foo xml:lang=en><svg><g xml:lang=en xlink:href=foo />bar</svg>
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     xlink:href="foo"
 |     xml:lang="en"
 |     <svg svg>
 |       <svg g>
 |         xlink href="foo"
 |         xml lang="en"
 |       "bar"
 #data
 <svg></path>
 #errors
 (1,5) expected-doctype-but-got-start-tag
 (1,12) unexpected-end-tag
 (1,12) unexpected-end-tag
 (1,12) expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     <svg svg>
 #data
 <div><svg></div>a
 #errors
 (1,5) expected-doctype-but-got-start-tag
 (1,16) unexpected-end-tag
 (1,16) end-tag-too-early
 #document
 | <html>
 |   <head>
 |   <body>
 |     <div>
 |       <svg svg>
 |     "a"
 #data
 <div><svg><path></div>a
 #errors
 (1,5) expected-doctype-but-got-start-tag
 (1,22) unexpected-end-tag
 (1,22) end-tag-too-early
 #document
 | <html>
 |   <head>
 |   <body>
 |     <div>
 |       <svg svg>
 |         <svg path>
 |     "a"
 #data
 <div><svg><path></svg><path>
 #errors
 (1,5) expected-doctype-but-got-start-tag
 (1,22) unexpected-end-tag
 (1,28) expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     <div>
 |       <svg svg>
 |         <svg path>
 |       <path>
 #data
 <div><svg><path><foreignObject><math></div>a
 #errors
 (1,5) expected-doctype-but-got-start-tag
 (1,43) unexpected-end-tag
 (1,43) end-tag-too-early
 (1,44) expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     <div>
 |       <svg svg>
 |         <svg path>
 |           <svg foreignObject>
 |             <math math>
 |               "a"
 #data
 <div><svg><path><foreignObject><p></div>a
 #errors
 (1,5) expected-doctype-but-got-start-tag
 (1,40) end-tag-too-early
 (1,41) expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     <div>
 |       <svg svg>
 |         <svg path>
 |           <svg foreignObject>
 |             <p>
 |               "a"
 #data
 <!DOCTYPE html><svg><desc><div><svg><ul>a
 #errors
 (1,40) unexpected-html-element-in-foreign-content
 (1,41) expected-closing-tag-but-got-eof
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <svg svg>
 |       <svg desc>
 |         <div>
 |           <svg svg>
 |           <ul>
 |             "a"
 #data
 <!DOCTYPE html><svg><desc><svg><ul>a
 #errors
 (1,35) unexpected-html-element-in-foreign-content
 (1,36) expected-closing-tag-but-got-eof
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <svg svg>
 |       <svg desc>
 |         <svg svg>
 |         <ul>
 |           "a"
 #data
 <!DOCTYPE html><p><svg><desc><p>
 #errors
 (1,32) expected-closing-tag-but-got-eof
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <p>
 |       <svg svg>
 |         <svg desc>
 |           <p>
 #data
 <!DOCTYPE html><p><svg><title><p>
 #errors
 (1,33) expected-closing-tag-but-got-eof
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <p>
 |       <svg svg>
 |         <svg title>
 |           <p>
 #data
 <div><svg><path><foreignObject><p></foreignObject><p>
 #errors
 (1,5) expected-doctype-but-got-start-tag
 (1,50) unexpected-end-tag
 (1,53) expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     <div>
 |       <svg svg>
 |         <svg path>
 |           <svg foreignObject>
 |             <p>
 |             <p>
 #data
 <math><mi><div><object><div><span></span></div></object></div></mi><mi>
 #errors
 (1,6) expected-doctype-but-got-start-tag
 (1,71) expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     <math math>
 |       <math mi>
 |         <div>
 |           <object>
 |             <div>
 |               <span>
 |       <math mi>
 #data
 <math><mi><svg><foreignObject><div><div></div></div></foreignObject></svg></mi><mi>
 #errors
 (1,6) expected-doctype-but-got-start-tag
 (1,83) expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     <math math>
 |       <math mi>
 |         <svg svg>
 |           <svg foreignObject>
 |             <div>
 |               <div>
 |       <math mi>
 #data
 <svg><script></script><path>
 #errors
 (1,5) expected-doctype-but-got-start-tag
 (1,28) expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     <svg svg>
 |       <svg script>
 |       <svg path>
 #data
 <table><svg></svg><tr>
 #errors
 (1,7) expected-doctype-but-got-start-tag
 (1,12) unexpected-start-tag-implies-table-voodoo
 (1,22) eof-in-table
 #document
 | <html>
 |   <head>
 |   <body>
 |     <svg svg>
 |     <table>
 |       <tbody>
 |         <tr>
 #data
 <math><mi><mglyph>
 #errors
 (1,6) expected-doctype-but-got-start-tag
 (1,18) expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     <math math>
 |       <math mi>
 |         <math mglyph>
 #data
 <math><mi><malignmark>
 #errors
 (1,6) expected-doctype-but-got-start-tag
 (1,22) expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     <math math>
 |       <math mi>
 |         <math malignmark>
 #data
 <math><mo><mglyph>
 #errors
 (1,6) expected-doctype-but-got-start-tag
 (1,18) expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     <math math>
 |       <math mo>
 |         <math mglyph>
 #data
 <math><mo><malignmark>
 #errors
 (1,6) expected-doctype-but-got-start-tag
 (1,22) expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     <math math>
 |       <math mo>
 |         <math malignmark>
 #data
 <math><mn><mglyph>
 #errors
 (1,6) expected-doctype-but-got-start-tag
 (1,18) expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     <math math>
 |       <math mn>
 |         <math mglyph>
 #data
 <math><mn><malignmark>
 #errors
 (1,6) expected-doctype-but-got-start-tag
 (1,22) expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     <math math>
 |       <math mn>
 |         <math malignmark>
 #data
 <math><ms><mglyph>
 #errors
 (1,6) expected-doctype-but-got-start-tag
 (1,18) expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     <math math>
 |       <math ms>
 |         <math mglyph>
 #data
 <math><ms><malignmark>
 #errors
 (1,6) expected-doctype-but-got-start-tag
 (1,22) expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     <math math>
 |       <math ms>
 |         <math malignmark>
 #data
 <math><mtext><mglyph>
 #errors
 (1,6) expected-doctype-but-got-start-tag
 (1,21) expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     <math math>
 |       <math mtext>
 |         <math mglyph>
 #data
 <math><mtext><malignmark>
 #errors
 (1,6) expected-doctype-but-got-start-tag
 (1,25) expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     <math math>
 |       <math mtext>
 |         <math malignmark>
 #data
 <math><annotation-xml><svg></svg></annotation-xml><mi>
 #errors
 (1,6) expected-doctype-but-got-start-tag
 (1,54) expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     <math math>
 |       <math annotation-xml>
 |         <svg svg>
 |       <math mi>
 #data
 <math><annotation-xml><svg><foreignObject><div><math><mi></mi></math><span></span></div></foreignObject><path></path></svg></annotation-xml><mi>
 #errors
 (1,6) expected-doctype-but-got-start-tag
 (1,144) expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     <math math>
 |       <math annotation-xml>
 |         <svg svg>
 |           <svg foreignObject>
 |             <div>
 |               <math math>
 |                 <math mi>
 |               <span>
 |           <svg path>
 |       <math mi>
 #data
 <math><annotation-xml><svg><foreignObject><math><mi><svg></svg></mi><mo></mo></math><span></span></foreignObject><path></path></svg></annotation-xml><mi>
 #errors
 (1,6) expected-doctype-but-got-start-tag
 (1,153) expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     <math math>
 |       <math annotation-xml>
 |         <svg svg>
 |           <svg foreignObject>
 |             <math math>
 |               <math mi>
 |                 <svg svg>
 |               <math mo>
 |             <span>
 |           <svg path>
 |       <math mi>
--- a/lib/html5lib/tests/testdata/tree-construction/tests11.dat
+++ b/lib/html5lib/tests/testdata/tree-construction/tests11.dat
@ -1,523 +0,0 @@
 #data
 <!DOCTYPE html><body><svg attributeName='' attributeType='' baseFrequency='' baseProfile='' calcMode='' clipPathUnits='' diffuseConstant='' edgeMode='' filterUnits='' glyphRef='' gradientTransform='' gradientUnits='' kernelMatrix='' kernelUnitLength='' keyPoints='' keySplines='' keyTimes='' lengthAdjust='' limitingConeAngle='' markerHeight='' markerUnits='' markerWidth='' maskContentUnits='' maskUnits='' numOctaves='' pathLength='' patternContentUnits='' patternTransform='' patternUnits='' pointsAtX='' pointsAtY='' pointsAtZ='' preserveAlpha='' preserveAspectRatio='' primitiveUnits='' refX='' refY='' repeatCount='' repeatDur='' requiredExtensions='' requiredFeatures='' specularConstant='' specularExponent='' spreadMethod='' startOffset='' stdDeviation='' stitchTiles='' surfaceScale='' systemLanguage='' tableValues='' targetX='' targetY='' textLength='' viewBox='' viewTarget='' xChannelSelector='' yChannelSelector='' zoomAndPan=''></svg>
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <svg svg>
 |       attributeName=""
 |       attributeType=""
 |       baseFrequency=""
 |       baseProfile=""
 |       calcMode=""
 |       clipPathUnits=""
 |       diffuseConstant=""
 |       edgeMode=""
 |       filterUnits=""
 |       glyphRef=""
 |       gradientTransform=""
 |       gradientUnits=""
 |       kernelMatrix=""
 |       kernelUnitLength=""
 |       keyPoints=""
 |       keySplines=""
 |       keyTimes=""
 |       lengthAdjust=""
 |       limitingConeAngle=""
 |       markerHeight=""
 |       markerUnits=""
 |       markerWidth=""
 |       maskContentUnits=""
 |       maskUnits=""
 |       numOctaves=""
 |       pathLength=""
 |       patternContentUnits=""
 |       patternTransform=""
 |       patternUnits=""
 |       pointsAtX=""
 |       pointsAtY=""
 |       pointsAtZ=""
 |       preserveAlpha=""
 |       preserveAspectRatio=""
 |       primitiveUnits=""
 |       refX=""
 |       refY=""
 |       repeatCount=""
 |       repeatDur=""
 |       requiredExtensions=""
 |       requiredFeatures=""
 |       specularConstant=""
 |       specularExponent=""
 |       spreadMethod=""
 |       startOffset=""
 |       stdDeviation=""
 |       stitchTiles=""
 |       surfaceScale=""
 |       systemLanguage=""
 |       tableValues=""
 |       targetX=""
 |       targetY=""
 |       textLength=""
 |       viewBox=""
 |       viewTarget=""
 |       xChannelSelector=""
 |       yChannelSelector=""
 |       zoomAndPan=""
 #data
 <!DOCTYPE html><BODY><SVG ATTRIBUTENAME='' ATTRIBUTETYPE='' BASEFREQUENCY='' BASEPROFILE='' CALCMODE='' CLIPPATHUNITS='' DIFFUSECONSTANT='' EDGEMODE='' FILTERUNITS='' GLYPHREF='' GRADIENTTRANSFORM='' GRADIENTUNITS='' KERNELMATRIX='' KERNELUNITLENGTH='' KEYPOINTS='' KEYSPLINES='' KEYTIMES='' LENGTHADJUST='' LIMITINGCONEANGLE='' MARKERHEIGHT='' MARKERUNITS='' MARKERWIDTH='' MASKCONTENTUNITS='' MASKUNITS='' NUMOCTAVES='' PATHLENGTH='' PATTERNCONTENTUNITS='' PATTERNTRANSFORM='' PATTERNUNITS='' POINTSATX='' POINTSATY='' POINTSATZ='' PRESERVEALPHA='' PRESERVEASPECTRATIO='' PRIMITIVEUNITS='' REFX='' REFY='' REPEATCOUNT='' REPEATDUR='' REQUIREDEXTENSIONS='' REQUIREDFEATURES='' SPECULARCONSTANT='' SPECULAREXPONENT='' SPREADMETHOD='' STARTOFFSET='' STDDEVIATION='' STITCHTILES='' SURFACESCALE='' SYSTEMLANGUAGE='' TABLEVALUES='' TARGETX='' TARGETY='' TEXTLENGTH='' VIEWBOX='' VIEWTARGET='' XCHANNELSELECTOR='' YCHANNELSELECTOR='' ZOOMANDPAN=''></SVG>
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <svg svg>
 |       attributeName=""
 |       attributeType=""
 |       baseFrequency=""
 |       baseProfile=""
 |       calcMode=""
 |       clipPathUnits=""
 |       diffuseConstant=""
 |       edgeMode=""
 |       filterUnits=""
 |       glyphRef=""
 |       gradientTransform=""
 |       gradientUnits=""
 |       kernelMatrix=""
 |       kernelUnitLength=""
 |       keyPoints=""
 |       keySplines=""
 |       keyTimes=""
 |       lengthAdjust=""
 |       limitingConeAngle=""
 |       markerHeight=""
 |       markerUnits=""
 |       markerWidth=""
 |       maskContentUnits=""
 |       maskUnits=""
 |       numOctaves=""
 |       pathLength=""
 |       patternContentUnits=""
 |       patternTransform=""
 |       patternUnits=""
 |       pointsAtX=""
 |       pointsAtY=""
 |       pointsAtZ=""
 |       preserveAlpha=""
 |       preserveAspectRatio=""
 |       primitiveUnits=""
 |       refX=""
 |       refY=""
 |       repeatCount=""
 |       repeatDur=""
 |       requiredExtensions=""
 |       requiredFeatures=""
 |       specularConstant=""
 |       specularExponent=""
 |       spreadMethod=""
 |       startOffset=""
 |       stdDeviation=""
 |       stitchTiles=""
 |       surfaceScale=""
 |       systemLanguage=""
 |       tableValues=""
 |       targetX=""
 |       targetY=""
 |       textLength=""
 |       viewBox=""
 |       viewTarget=""
 |       xChannelSelector=""
 |       yChannelSelector=""
 |       zoomAndPan=""
 #data
 <!DOCTYPE html><body><svg attributename='' attributetype='' basefrequency='' baseprofile='' calcmode='' clippathunits='' diffuseconstant='' edgemode='' filterunits='' filterres='' glyphref='' gradienttransform='' gradientunits='' kernelmatrix='' kernelunitlength='' keypoints='' keysplines='' keytimes='' lengthadjust='' limitingconeangle='' markerheight='' markerunits='' markerwidth='' maskcontentunits='' maskunits='' numoctaves='' pathlength='' patterncontentunits='' patterntransform='' patternunits='' pointsatx='' pointsaty='' pointsatz='' preservealpha='' preserveaspectratio='' primitiveunits='' refx='' refy='' repeatcount='' repeatdur='' requiredextensions='' requiredfeatures='' specularconstant='' specularexponent='' spreadmethod='' startoffset='' stddeviation='' stitchtiles='' surfacescale='' systemlanguage='' tablevalues='' targetx='' targety='' textlength='' viewbox='' viewtarget='' xchannelselector='' ychannelselector='' zoomandpan=''></svg>
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <svg svg>
 |       attributeName=""
 |       attributeType=""
 |       baseFrequency=""
 |       baseProfile=""
 |       calcMode=""
 |       clipPathUnits=""
 |       diffuseConstant=""
 |       edgeMode=""
 |       filterUnits=""
 |       filterres=""
 |       glyphRef=""
 |       gradientTransform=""
 |       gradientUnits=""
 |       kernelMatrix=""
 |       kernelUnitLength=""
 |       keyPoints=""
 |       keySplines=""
 |       keyTimes=""
 |       lengthAdjust=""
 |       limitingConeAngle=""
 |       markerHeight=""
 |       markerUnits=""
 |       markerWidth=""
 |       maskContentUnits=""
 |       maskUnits=""
 |       numOctaves=""
 |       pathLength=""
 |       patternContentUnits=""
 |       patternTransform=""
 |       patternUnits=""
 |       pointsAtX=""
 |       pointsAtY=""
 |       pointsAtZ=""
 |       preserveAlpha=""
 |       preserveAspectRatio=""
 |       primitiveUnits=""
 |       refX=""
 |       refY=""
 |       repeatCount=""
 |       repeatDur=""
 |       requiredExtensions=""
 |       requiredFeatures=""
 |       specularConstant=""
 |       specularExponent=""
 |       spreadMethod=""
 |       startOffset=""
 |       stdDeviation=""
 |       stitchTiles=""
 |       surfaceScale=""
 |       systemLanguage=""
 |       tableValues=""
 |       targetX=""
 |       targetY=""
 |       textLength=""
 |       viewBox=""
 |       viewTarget=""
 |       xChannelSelector=""
 |       yChannelSelector=""
 |       zoomAndPan=""
 #data
 <!DOCTYPE html><body><math attributeName='' attributeType='' baseFrequency='' baseProfile='' calcMode='' clipPathUnits='' diffuseConstant='' edgeMode='' filterUnits='' glyphRef='' gradientTransform='' gradientUnits='' kernelMatrix='' kernelUnitLength='' keyPoints='' keySplines='' keyTimes='' lengthAdjust='' limitingConeAngle='' markerHeight='' markerUnits='' markerWidth='' maskContentUnits='' maskUnits='' numOctaves='' pathLength='' patternContentUnits='' patternTransform='' patternUnits='' pointsAtX='' pointsAtY='' pointsAtZ='' preserveAlpha='' preserveAspectRatio='' primitiveUnits='' refX='' refY='' repeatCount='' repeatDur='' requiredExtensions='' requiredFeatures='' specularConstant='' specularExponent='' spreadMethod='' startOffset='' stdDeviation='' stitchTiles='' surfaceScale='' systemLanguage='' tableValues='' targetX='' targetY='' textLength='' viewBox='' viewTarget='' xChannelSelector='' yChannelSelector='' zoomAndPan=''></math>
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <math math>
 |       attributename=""
 |       attributetype=""
 |       basefrequency=""
 |       baseprofile=""
 |       calcmode=""
 |       clippathunits=""
 |       diffuseconstant=""
 |       edgemode=""
 |       filterunits=""
 |       glyphref=""
 |       gradienttransform=""
 |       gradientunits=""
 |       kernelmatrix=""
 |       kernelunitlength=""
 |       keypoints=""
 |       keysplines=""
 |       keytimes=""
 |       lengthadjust=""
 |       limitingconeangle=""
 |       markerheight=""
 |       markerunits=""
 |       markerwidth=""
 |       maskcontentunits=""
 |       maskunits=""
 |       numoctaves=""
 |       pathlength=""
 |       patterncontentunits=""
 |       patterntransform=""
 |       patternunits=""
 |       pointsatx=""
 |       pointsaty=""
 |       pointsatz=""
 |       preservealpha=""
 |       preserveaspectratio=""
 |       primitiveunits=""
 |       refx=""
 |       refy=""
 |       repeatcount=""
 |       repeatdur=""
 |       requiredextensions=""
 |       requiredfeatures=""
 |       specularconstant=""
 |       specularexponent=""
 |       spreadmethod=""
 |       startoffset=""
 |       stddeviation=""
 |       stitchtiles=""
 |       surfacescale=""
 |       systemlanguage=""
 |       tablevalues=""
 |       targetx=""
 |       targety=""
 |       textlength=""
 |       viewbox=""
 |       viewtarget=""
 |       xchannelselector=""
 |       ychannelselector=""
 |       zoomandpan=""
 #data
 <!DOCTYPE html><body><svg contentScriptType='' contentStyleType='' externalResourcesRequired='' filterRes=''></svg>
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <svg svg>
 |       contentscripttype=""
 |       contentstyletype=""
 |       externalresourcesrequired=""
 |       filterres=""
 #data
 <!DOCTYPE html><body><svg CONTENTSCRIPTTYPE='' CONTENTSTYLETYPE='' EXTERNALRESOURCESREQUIRED='' FILTERRES=''></svg>
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <svg svg>
 |       contentscripttype=""
 |       contentstyletype=""
 |       externalresourcesrequired=""
 |       filterres=""
 #data
 <!DOCTYPE html><body><svg contentscripttype='' contentstyletype='' externalresourcesrequired='' filterres=''></svg>
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <svg svg>
 |       contentscripttype=""
 |       contentstyletype=""
 |       externalresourcesrequired=""
 |       filterres=""
 #data
 <!DOCTYPE html><body><math contentScriptType='' contentStyleType='' externalResourcesRequired='' filterRes=''></math>
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <math math>
 |       contentscripttype=""
 |       contentstyletype=""
 |       externalresourcesrequired=""
 |       filterres=""
 #data
 <!DOCTYPE html><body><svg><altGlyph /><altGlyphDef /><altGlyphItem /><animateColor /><animateMotion /><animateTransform /><clipPath /><feBlend /><feColorMatrix /><feComponentTransfer /><feComposite /><feConvolveMatrix /><feDiffuseLighting /><feDisplacementMap /><feDistantLight /><feFlood /><feFuncA /><feFuncB /><feFuncG /><feFuncR /><feGaussianBlur /><feImage /><feMerge /><feMergeNode /><feMorphology /><feOffset /><fePointLight /><feSpecularLighting /><feSpotLight /><feTile /><feTurbulence /><foreignObject /><glyphRef /><linearGradient /><radialGradient /><textPath /></svg>
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <svg svg>
 |       <svg altGlyph>
 |       <svg altGlyphDef>
 |       <svg altGlyphItem>
 |       <svg animateColor>
 |       <svg animateMotion>
 |       <svg animateTransform>
 |       <svg clipPath>
 |       <svg feBlend>
 |       <svg feColorMatrix>
 |       <svg feComponentTransfer>
 |       <svg feComposite>
 |       <svg feConvolveMatrix>
 |       <svg feDiffuseLighting>
 |       <svg feDisplacementMap>
 |       <svg feDistantLight>
 |       <svg feFlood>
 |       <svg feFuncA>
 |       <svg feFuncB>
 |       <svg feFuncG>
 |       <svg feFuncR>
 |       <svg feGaussianBlur>
 |       <svg feImage>
 |       <svg feMerge>
 |       <svg feMergeNode>
 |       <svg feMorphology>
 |       <svg feOffset>
 |       <svg fePointLight>
 |       <svg feSpecularLighting>
 |       <svg feSpotLight>
 |       <svg feTile>
 |       <svg feTurbulence>
 |       <svg foreignObject>
 |       <svg glyphRef>
 |       <svg linearGradient>
 |       <svg radialGradient>
 |       <svg textPath>
 #data
 <!DOCTYPE html><body><svg><altglyph /><altglyphdef /><altglyphitem /><animatecolor /><animatemotion /><animatetransform /><clippath /><feblend /><fecolormatrix /><fecomponenttransfer /><fecomposite /><feconvolvematrix /><fediffuselighting /><fedisplacementmap /><fedistantlight /><feflood /><fefunca /><fefuncb /><fefuncg /><fefuncr /><fegaussianblur /><feimage /><femerge /><femergenode /><femorphology /><feoffset /><fepointlight /><fespecularlighting /><fespotlight /><fetile /><feturbulence /><foreignobject /><glyphref /><lineargradient /><radialgradient /><textpath /></svg>
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <svg svg>
 |       <svg altGlyph>
 |       <svg altGlyphDef>
 |       <svg altGlyphItem>
 |       <svg animateColor>
 |       <svg animateMotion>
 |       <svg animateTransform>
 |       <svg clipPath>
 |       <svg feBlend>
 |       <svg feColorMatrix>
 |       <svg feComponentTransfer>
 |       <svg feComposite>
 |       <svg feConvolveMatrix>
 |       <svg feDiffuseLighting>
 |       <svg feDisplacementMap>
 |       <svg feDistantLight>
 |       <svg feFlood>
 |       <svg feFuncA>
 |       <svg feFuncB>
 |       <svg feFuncG>
 |       <svg feFuncR>
 |       <svg feGaussianBlur>
 |       <svg feImage>
 |       <svg feMerge>
 |       <svg feMergeNode>
 |       <svg feMorphology>
 |       <svg feOffset>
 |       <svg fePointLight>
 |       <svg feSpecularLighting>
 |       <svg feSpotLight>
 |       <svg feTile>
 |       <svg feTurbulence>
 |       <svg foreignObject>
 |       <svg glyphRef>
 |       <svg linearGradient>
 |       <svg radialGradient>
 |       <svg textPath>
 #data
 <!DOCTYPE html><BODY><SVG><ALTGLYPH /><ALTGLYPHDEF /><ALTGLYPHITEM /><ANIMATECOLOR /><ANIMATEMOTION /><ANIMATETRANSFORM /><CLIPPATH /><FEBLEND /><FECOLORMATRIX /><FECOMPONENTTRANSFER /><FECOMPOSITE /><FECONVOLVEMATRIX /><FEDIFFUSELIGHTING /><FEDISPLACEMENTMAP /><FEDISTANTLIGHT /><FEFLOOD /><FEFUNCA /><FEFUNCB /><FEFUNCG /><FEFUNCR /><FEGAUSSIANBLUR /><FEIMAGE /><FEMERGE /><FEMERGENODE /><FEMORPHOLOGY /><FEOFFSET /><FEPOINTLIGHT /><FESPECULARLIGHTING /><FESPOTLIGHT /><FETILE /><FETURBULENCE /><FOREIGNOBJECT /><GLYPHREF /><LINEARGRADIENT /><RADIALGRADIENT /><TEXTPATH /></SVG>
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <svg svg>
 |       <svg altGlyph>
 |       <svg altGlyphDef>
 |       <svg altGlyphItem>
 |       <svg animateColor>
 |       <svg animateMotion>
 |       <svg animateTransform>
 |       <svg clipPath>
 |       <svg feBlend>
 |       <svg feColorMatrix>
 |       <svg feComponentTransfer>
 |       <svg feComposite>
 |       <svg feConvolveMatrix>
 |       <svg feDiffuseLighting>
 |       <svg feDisplacementMap>
 |       <svg feDistantLight>
 |       <svg feFlood>
 |       <svg feFuncA>
 |       <svg feFuncB>
 |       <svg feFuncG>
 |       <svg feFuncR>
 |       <svg feGaussianBlur>
 |       <svg feImage>
 |       <svg feMerge>
 |       <svg feMergeNode>
 |       <svg feMorphology>
 |       <svg feOffset>
 |       <svg fePointLight>
 |       <svg feSpecularLighting>
 |       <svg feSpotLight>
 |       <svg feTile>
 |       <svg feTurbulence>
 |       <svg foreignObject>
 |       <svg glyphRef>
 |       <svg linearGradient>
 |       <svg radialGradient>
 |       <svg textPath>
 #data
 <!DOCTYPE html><body><math><altGlyph /><altGlyphDef /><altGlyphItem /><animateColor /><animateMotion /><animateTransform /><clipPath /><feBlend /><feColorMatrix /><feComponentTransfer /><feComposite /><feConvolveMatrix /><feDiffuseLighting /><feDisplacementMap /><feDistantLight /><feFlood /><feFuncA /><feFuncB /><feFuncG /><feFuncR /><feGaussianBlur /><feImage /><feMerge /><feMergeNode /><feMorphology /><feOffset /><fePointLight /><feSpecularLighting /><feSpotLight /><feTile /><feTurbulence /><foreignObject /><glyphRef /><linearGradient /><radialGradient /><textPath /></math>
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <math math>
 |       <math altglyph>
 |       <math altglyphdef>
 |       <math altglyphitem>
 |       <math animatecolor>
 |       <math animatemotion>
 |       <math animatetransform>
 |       <math clippath>
 |       <math feblend>
 |       <math fecolormatrix>
 |       <math fecomponenttransfer>
 |       <math fecomposite>
 |       <math feconvolvematrix>
 |       <math fediffuselighting>
 |       <math fedisplacementmap>
 |       <math fedistantlight>
 |       <math feflood>
 |       <math fefunca>
 |       <math fefuncb>
 |       <math fefuncg>
 |       <math fefuncr>
 |       <math fegaussianblur>
 |       <math feimage>
 |       <math femerge>
 |       <math femergenode>
 |       <math femorphology>
 |       <math feoffset>
 |       <math fepointlight>
 |       <math fespecularlighting>
 |       <math fespotlight>
 |       <math fetile>
 |       <math feturbulence>
 |       <math foreignobject>
 |       <math glyphref>
 |       <math lineargradient>
 |       <math radialgradient>
 |       <math textpath>
 #data
 <!DOCTYPE html><body><svg><solidColor /></svg>
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <svg svg>
 |       <svg solidcolor>
--- a/lib/html5lib/tests/testdata/tree-construction/tests12.dat
+++ b/lib/html5lib/tests/testdata/tree-construction/tests12.dat
@ -1,62 +0,0 @@
 #data
 <!DOCTYPE html><body><p>foo<math><mtext><i>baz</i></mtext><annotation-xml><svg><desc><b>eggs</b></desc><g><foreignObject><P>spam<TABLE><tr><td><img></td></table></foreignObject></g><g>quux</g></svg></annotation-xml></math>bar
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <p>
 |       "foo"
 |       <math math>
 |         <math mtext>
 |           <i>
 |             "baz"
 |         <math annotation-xml>
 |           <svg svg>
 |             <svg desc>
 |               <b>
 |                 "eggs"
 |             <svg g>
 |               <svg foreignObject>
 |                 <p>
 |                   "spam"
 |                 <table>
 |                   <tbody>
 |                     <tr>
 |                       <td>
 |                         <img>
 |             <svg g>
 |               "quux"
 |       "bar"
 #data
 <!DOCTYPE html><body>foo<math><mtext><i>baz</i></mtext><annotation-xml><svg><desc><b>eggs</b></desc><g><foreignObject><P>spam<TABLE><tr><td><img></td></table></foreignObject></g><g>quux</g></svg></annotation-xml></math>bar
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     "foo"
 |     <math math>
 |       <math mtext>
 |         <i>
 |           "baz"
 |       <math annotation-xml>
 |         <svg svg>
 |           <svg desc>
 |             <b>
 |               "eggs"
 |           <svg g>
 |             <svg foreignObject>
 |               <p>
 |                 "spam"
 |               <table>
 |                 <tbody>
 |                   <tr>
 |                     <td>
 |                       <img>
 |           <svg g>
 |             "quux"
 |     "bar"
--- a/lib/html5lib/tests/testdata/tree-construction/tests14.dat
+++ b/lib/html5lib/tests/testdata/tree-construction/tests14.dat
@ -1,75 +0,0 @@
 #data
 <!DOCTYPE html><html><body><xyz:abc></xyz:abc>
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <xyz:abc>
 #data
 <!DOCTYPE html><html><body><xyz:abc></xyz:abc><span></span>
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <xyz:abc>
 |     <span>
 #data
 <!DOCTYPE html><html><html abc:def=gh><xyz:abc></xyz:abc>
 #errors
 (1,38): non-html-root
 #document
 | <!DOCTYPE html>
 | <html>
 |   abc:def="gh"
 |   <head>
 |   <body>
 |     <xyz:abc>
 #data
 <!DOCTYPE html><html xml:lang=bar><html xml:lang=foo>
 #errors
 (1,53): non-html-root
 #document
 | <!DOCTYPE html>
 | <html>
 |   xml:lang="bar"
 |   <head>
 |   <body>
 #data
 <!DOCTYPE html><html 123=456>
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   123="456"
 |   <head>
 |   <body>
 #data
 <!DOCTYPE html><html 123=456><html 789=012>
 #errors
 (1,43): non-html-root
 #document
 | <!DOCTYPE html>
 | <html>
 |   123="456"
 |   789="012"
 |   <head>
 |   <body>
 #data
 <!DOCTYPE html><html><body 789=012>
 #errors
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     789="012"
--- a/Show more
+++ b/Show more
		`@ -1 +0,0 @@`
			`from __future__ import absolute_import, division, unicode_literals`