Remove unnecessary lib files

This commit is contained in:
JonnyWong16 2021-10-15 01:51:46 -07:00
parent afbfebbe59
commit 53369cd8a6
No known key found for this signature in database
GPG key ID: B1F1F9807184697A
135 changed files with 0 additions and 107828 deletions

View file

@ -1 +0,0 @@
"The beautifulsoup tests."

View file

@ -1,147 +0,0 @@
"""Tests of the builder registry."""
import unittest
import warnings
from bs4 import BeautifulSoup
from bs4.builder import (
builder_registry as registry,
HTMLParserTreeBuilder,
TreeBuilderRegistry,
)
try:
from bs4.builder import HTML5TreeBuilder
HTML5LIB_PRESENT = True
except ImportError:
HTML5LIB_PRESENT = False
try:
from bs4.builder import (
LXMLTreeBuilderForXML,
LXMLTreeBuilder,
)
LXML_PRESENT = True
except ImportError:
LXML_PRESENT = False
class BuiltInRegistryTest(unittest.TestCase):
"""Test the built-in registry with the default builders registered."""
def test_combination(self):
if LXML_PRESENT:
self.assertEqual(registry.lookup('fast', 'html'),
LXMLTreeBuilder)
if LXML_PRESENT:
self.assertEqual(registry.lookup('permissive', 'xml'),
LXMLTreeBuilderForXML)
self.assertEqual(registry.lookup('strict', 'html'),
HTMLParserTreeBuilder)
if HTML5LIB_PRESENT:
self.assertEqual(registry.lookup('html5lib', 'html'),
HTML5TreeBuilder)
def test_lookup_by_markup_type(self):
if LXML_PRESENT:
self.assertEqual(registry.lookup('html'), LXMLTreeBuilder)
self.assertEqual(registry.lookup('xml'), LXMLTreeBuilderForXML)
else:
self.assertEqual(registry.lookup('xml'), None)
if HTML5LIB_PRESENT:
self.assertEqual(registry.lookup('html'), HTML5TreeBuilder)
else:
self.assertEqual(registry.lookup('html'), HTMLParserTreeBuilder)
def test_named_library(self):
if LXML_PRESENT:
self.assertEqual(registry.lookup('lxml', 'xml'),
LXMLTreeBuilderForXML)
self.assertEqual(registry.lookup('lxml', 'html'),
LXMLTreeBuilder)
if HTML5LIB_PRESENT:
self.assertEqual(registry.lookup('html5lib'),
HTML5TreeBuilder)
self.assertEqual(registry.lookup('html.parser'),
HTMLParserTreeBuilder)
def test_beautifulsoup_constructor_does_lookup(self):
with warnings.catch_warnings(record=True) as w:
# This will create a warning about not explicitly
# specifying a parser, but we'll ignore it.
# You can pass in a string.
BeautifulSoup("", features="html")
# Or a list of strings.
BeautifulSoup("", features=["html", "fast"])
# You'll get an exception if BS can't find an appropriate
# builder.
self.assertRaises(ValueError, BeautifulSoup,
"", features="no-such-feature")
class RegistryTest(unittest.TestCase):
"""Test the TreeBuilderRegistry class in general."""
def setUp(self):
self.registry = TreeBuilderRegistry()
def builder_for_features(self, *feature_list):
cls = type('Builder_' + '_'.join(feature_list),
(object,), {'features' : feature_list})
self.registry.register(cls)
return cls
def test_register_with_no_features(self):
builder = self.builder_for_features()
# Since the builder advertises no features, you can't find it
# by looking up features.
self.assertEqual(self.registry.lookup('foo'), None)
# But you can find it by doing a lookup with no features, if
# this happens to be the only registered builder.
self.assertEqual(self.registry.lookup(), builder)
def test_register_with_features_makes_lookup_succeed(self):
builder = self.builder_for_features('foo', 'bar')
self.assertEqual(self.registry.lookup('foo'), builder)
self.assertEqual(self.registry.lookup('bar'), builder)
def test_lookup_fails_when_no_builder_implements_feature(self):
builder = self.builder_for_features('foo', 'bar')
self.assertEqual(self.registry.lookup('baz'), None)
def test_lookup_gets_most_recent_registration_when_no_feature_specified(self):
builder1 = self.builder_for_features('foo')
builder2 = self.builder_for_features('bar')
self.assertEqual(self.registry.lookup(), builder2)
def test_lookup_fails_when_no_tree_builders_registered(self):
self.assertEqual(self.registry.lookup(), None)
def test_lookup_gets_most_recent_builder_supporting_all_features(self):
has_one = self.builder_for_features('foo')
has_the_other = self.builder_for_features('bar')
has_both_early = self.builder_for_features('foo', 'bar', 'baz')
has_both_late = self.builder_for_features('foo', 'bar', 'quux')
lacks_one = self.builder_for_features('bar')
has_the_other = self.builder_for_features('foo')
# There are two builders featuring 'foo' and 'bar', but
# the one that also features 'quux' was registered later.
self.assertEqual(self.registry.lookup('foo', 'bar'),
has_both_late)
# There is only one builder featuring 'foo', 'bar', and 'baz'.
self.assertEqual(self.registry.lookup('foo', 'bar', 'baz'),
has_both_early)
def test_lookup_fails_when_cannot_reconcile_requested_features(self):
builder1 = self.builder_for_features('foo', 'bar')
builder2 = self.builder_for_features('foo', 'baz')
self.assertEqual(self.registry.lookup('bar', 'baz'), None)

View file

@ -1,36 +0,0 @@
"Test harness for doctests."
# pylint: disable-msg=E0611,W0142
__metaclass__ = type
__all__ = [
'additional_tests',
]
import atexit
import doctest
import os
#from pkg_resources import (
# resource_filename, resource_exists, resource_listdir, cleanup_resources)
import unittest
DOCTEST_FLAGS = (
doctest.ELLIPSIS |
doctest.NORMALIZE_WHITESPACE |
doctest.REPORT_NDIFF)
# def additional_tests():
# "Run the doc tests (README.txt and docs/*, if any exist)"
# doctest_files = [
# os.path.abspath(resource_filename('bs4', 'README.txt'))]
# if resource_exists('bs4', 'docs'):
# for name in resource_listdir('bs4', 'docs'):
# if name.endswith('.txt'):
# doctest_files.append(
# os.path.abspath(
# resource_filename('bs4', 'docs/%s' % name)))
# kwargs = dict(module_relative=False, optionflags=DOCTEST_FLAGS)
# atexit.register(cleanup_resources)
# return unittest.TestSuite((
# doctest.DocFileSuite(*doctest_files, **kwargs)))

View file

@ -1,226 +0,0 @@
"""Tests to ensure that the html5lib tree builder generates good trees."""
import warnings
try:
from bs4.builder import HTML5TreeBuilder
HTML5LIB_PRESENT = True
except ImportError as e:
HTML5LIB_PRESENT = False
from bs4.element import SoupStrainer
from bs4.testing import (
HTML5TreeBuilderSmokeTest,
SoupTest,
skipIf,
)
@skipIf(
not HTML5LIB_PRESENT,
"html5lib seems not to be present, not testing its tree builder.")
class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest):
"""See ``HTML5TreeBuilderSmokeTest``."""
@property
def default_builder(self):
return HTML5TreeBuilder
def test_soupstrainer(self):
# The html5lib tree builder does not support SoupStrainers.
strainer = SoupStrainer("b")
markup = "<p>A <b>bold</b> statement.</p>"
with warnings.catch_warnings(record=True) as w:
soup = self.soup(markup, parse_only=strainer)
self.assertEqual(
soup.decode(), self.document_for(markup))
self.assertTrue(
"the html5lib tree builder doesn't support parse_only" in
str(w[0].message))
def test_correctly_nested_tables(self):
"""html5lib inserts <tbody> tags where other parsers don't."""
markup = ('<table id="1">'
'<tr>'
"<td>Here's another table:"
'<table id="2">'
'<tr><td>foo</td></tr>'
'</table></td>')
self.assertSoupEquals(
markup,
'<table id="1"><tbody><tr><td>Here\'s another table:'
'<table id="2"><tbody><tr><td>foo</td></tr></tbody></table>'
'</td></tr></tbody></table>')
self.assertSoupEquals(
"<table><thead><tr><td>Foo</td></tr></thead>"
"<tbody><tr><td>Bar</td></tr></tbody>"
"<tfoot><tr><td>Baz</td></tr></tfoot></table>")
def test_xml_declaration_followed_by_doctype(self):
markup = '''<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html>
<html>
<head>
</head>
<body>
<p>foo</p>
</body>
</html>'''
soup = self.soup(markup)
# Verify that we can reach the <p> tag; this means the tree is connected.
self.assertEqual(b"<p>foo</p>", soup.p.encode())
def test_reparented_markup(self):
markup = '<p><em>foo</p>\n<p>bar<a></a></em></p>'
soup = self.soup(markup)
self.assertEqual("<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p></body>", soup.body.decode())
self.assertEqual(2, len(soup.find_all('p')))
def test_reparented_markup_ends_with_whitespace(self):
markup = '<p><em>foo</p>\n<p>bar<a></a></em></p>\n'
soup = self.soup(markup)
self.assertEqual("<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p>\n</body>", soup.body.decode())
self.assertEqual(2, len(soup.find_all('p')))
def test_reparented_markup_containing_identical_whitespace_nodes(self):
"""Verify that we keep the two whitespace nodes in this
document distinct when reparenting the adjacent <tbody> tags.
"""
markup = '<table> <tbody><tbody><ims></tbody> </table>'
soup = self.soup(markup)
space1, space2 = soup.find_all(string=' ')
tbody1, tbody2 = soup.find_all('tbody')
assert space1.next_element is tbody1
assert tbody2.next_element is space2
def test_reparented_markup_containing_children(self):
markup = '<div><a>aftermath<p><noscript>target</noscript>aftermath</a></p></div>'
soup = self.soup(markup)
noscript = soup.noscript
self.assertEqual("target", noscript.next_element)
target = soup.find(string='target')
# The 'aftermath' string was duplicated; we want the second one.
final_aftermath = soup.find_all(string='aftermath')[-1]
# The <noscript> tag was moved beneath a copy of the <a> tag,
# but the 'target' string within is still connected to the
# (second) 'aftermath' string.
self.assertEqual(final_aftermath, target.next_element)
self.assertEqual(target, final_aftermath.previous_element)
def test_processing_instruction(self):
"""Processing instructions become comments."""
markup = b"""<?PITarget PIContent?>"""
soup = self.soup(markup)
assert str(soup).startswith("<!--?PITarget PIContent?-->")
def test_cloned_multivalue_node(self):
markup = b"""<a class="my_class"><p></a>"""
soup = self.soup(markup)
a1, a2 = soup.find_all('a')
self.assertEqual(a1, a2)
assert a1 is not a2
def test_foster_parenting(self):
markup = b"""<table><td></tbody>A"""
soup = self.soup(markup)
self.assertEqual("<body>A<table><tbody><tr><td></td></tr></tbody></table></body>", soup.body.decode())
def test_extraction(self):
"""
Test that extraction does not destroy the tree.
https://bugs.launchpad.net/beautifulsoup/+bug/1782928
"""
markup = """
<html><head></head>
<style>
</style><script></script><body><p>hello</p></body></html>
"""
soup = self.soup(markup)
[s.extract() for s in soup('script')]
[s.extract() for s in soup('style')]
self.assertEqual(len(soup.find_all("p")), 1)
def test_empty_comment(self):
"""
Test that empty comment does not break structure.
https://bugs.launchpad.net/beautifulsoup/+bug/1806598
"""
markup = """
<html>
<body>
<form>
<!----><input type="text">
</form>
</body>
</html>
"""
soup = self.soup(markup)
inputs = []
for form in soup.find_all('form'):
inputs.extend(form.find_all('input'))
self.assertEqual(len(inputs), 1)
def test_tracking_line_numbers(self):
# The html.parser TreeBuilder keeps track of line number and
# position of each element.
markup = "\n <p>\n\n<sourceline>\n<b>text</b></sourceline><sourcepos></p>"
soup = self.soup(markup)
self.assertEqual(2, soup.p.sourceline)
self.assertEqual(5, soup.p.sourcepos)
self.assertEqual("sourceline", soup.p.find('sourceline').name)
# You can deactivate this behavior.
soup = self.soup(markup, store_line_numbers=False)
self.assertEqual("sourceline", soup.p.sourceline.name)
self.assertEqual("sourcepos", soup.p.sourcepos.name)
def test_special_string_containers(self):
# The html5lib tree builder doesn't support this standard feature,
# because there's no way of knowing, when a string is created,
# where in the tree it will eventually end up.
pass
def test_html5_attributes(self):
# The html5lib TreeBuilder can convert any entity named in
# the HTML5 spec to a sequence of Unicode characters, and
# convert those Unicode characters to a (potentially
# different) named entity on the way out.
#
# This is a copy of the same test from
# HTMLParserTreeBuilderSmokeTest. It's not in the superclass
# because the lxml HTML TreeBuilder _doesn't_ work this way.
for input_element, output_unicode, output_element in (
("&RightArrowLeftArrow;", '\u21c4', b'&rlarr;'),
('&models;', '\u22a7', b'&models;'),
('&Nfr;', '\U0001d511', b'&Nfr;'),
('&ngeqq;', '\u2267\u0338', b'&ngeqq;'),
('&not;', '\xac', b'&not;'),
('&Not;', '\u2aec', b'&Not;'),
('&quot;', '"', b'"'),
('&there4;', '\u2234', b'&there4;'),
('&Therefore;', '\u2234', b'&there4;'),
('&therefore;', '\u2234', b'&there4;'),
("&fjlig;", 'fj', b'fj'),
("&sqcup;", '\u2294', b'&sqcup;'),
("&sqcups;", '\u2294\ufe00', b'&sqcups;'),
("&apos;", "'", b"'"),
("&verbar;", "|", b"|"),
):
markup = '<div>%s</div>' % input_element
div = self.soup(markup).div
without_element = div.encode()
expect = b"<div>%s</div>" % output_unicode.encode("utf8")
self.assertEqual(without_element, expect)
with_element = div.encode(formatter="html")
expect = b"<div>%s</div>" % output_element
self.assertEqual(with_element, expect)

View file

@ -1,134 +0,0 @@
"""Tests to ensure that the html.parser tree builder generates good
trees."""
from pdb import set_trace
import pickle
import warnings
from bs4.testing import SoupTest, HTMLTreeBuilderSmokeTest
from bs4.builder import HTMLParserTreeBuilder
from bs4.builder._htmlparser import BeautifulSoupHTMLParser
class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
default_builder = HTMLParserTreeBuilder
def test_namespaced_system_doctype(self):
# html.parser can't handle namespaced doctypes, so skip this one.
pass
def test_namespaced_public_doctype(self):
# html.parser can't handle namespaced doctypes, so skip this one.
pass
def test_builder_is_pickled(self):
"""Unlike most tree builders, HTMLParserTreeBuilder and will
be restored after pickling.
"""
tree = self.soup("<a><b>foo</a>")
dumped = pickle.dumps(tree, 2)
loaded = pickle.loads(dumped)
self.assertTrue(isinstance(loaded.builder, type(tree.builder)))
def test_redundant_empty_element_closing_tags(self):
self.assertSoupEquals('<br></br><br></br><br></br>', "<br/><br/><br/>")
self.assertSoupEquals('</br></br></br>', "")
def test_empty_element(self):
# This verifies that any buffered data present when the parser
# finishes working is handled.
self.assertSoupEquals("foo &# bar", "foo &amp;# bar")
def test_tracking_line_numbers(self):
# The html.parser TreeBuilder keeps track of line number and
# position of each element.
markup = "\n <p>\n\n<sourceline>\n<b>text</b></sourceline><sourcepos></p>"
soup = self.soup(markup)
self.assertEqual(2, soup.p.sourceline)
self.assertEqual(3, soup.p.sourcepos)
self.assertEqual("sourceline", soup.p.find('sourceline').name)
# You can deactivate this behavior.
soup = self.soup(markup, store_line_numbers=False)
self.assertEqual("sourceline", soup.p.sourceline.name)
self.assertEqual("sourcepos", soup.p.sourcepos.name)
def test_on_duplicate_attribute(self):
# The html.parser tree builder has a variety of ways of
# handling a tag that contains the same attribute multiple times.
markup = '<a class="cls" href="url1" href="url2" href="url3" id="id">'
# If you don't provide any particular value for
# on_duplicate_attribute, later values replace earlier values.
soup = self.soup(markup)
self.assertEqual("url3", soup.a['href'])
self.assertEqual(["cls"], soup.a['class'])
self.assertEqual("id", soup.a['id'])
# You can also get this behavior explicitly.
def assert_attribute(on_duplicate_attribute, expected):
soup = self.soup(
markup, on_duplicate_attribute=on_duplicate_attribute
)
self.assertEqual(expected, soup.a['href'])
# Verify that non-duplicate attributes are treated normally.
self.assertEqual(["cls"], soup.a['class'])
self.assertEqual("id", soup.a['id'])
assert_attribute(None, "url3")
assert_attribute(BeautifulSoupHTMLParser.REPLACE, "url3")
# You can ignore subsequent values in favor of the first.
assert_attribute(BeautifulSoupHTMLParser.IGNORE, "url1")
# And you can pass in a callable that does whatever you want.
def accumulate(attrs, key, value):
if not isinstance(attrs[key], list):
attrs[key] = [attrs[key]]
attrs[key].append(value)
assert_attribute(accumulate, ["url1", "url2", "url3"])
def test_html5_attributes(self):
# The html.parser TreeBuilder can convert any entity named in
# the HTML5 spec to a sequence of Unicode characters, and
# convert those Unicode characters to a (potentially
# different) named entity on the way out.
for input_element, output_unicode, output_element in (
("&RightArrowLeftArrow;", '\u21c4', b'&rlarr;'),
('&models;', '\u22a7', b'&models;'),
('&Nfr;', '\U0001d511', b'&Nfr;'),
('&ngeqq;', '\u2267\u0338', b'&ngeqq;'),
('&not;', '\xac', b'&not;'),
('&Not;', '\u2aec', b'&Not;'),
('&quot;', '"', b'"'),
('&there4;', '\u2234', b'&there4;'),
('&Therefore;', '\u2234', b'&there4;'),
('&therefore;', '\u2234', b'&there4;'),
("&fjlig;", 'fj', b'fj'),
("&sqcup;", '\u2294', b'&sqcup;'),
("&sqcups;", '\u2294\ufe00', b'&sqcups;'),
("&apos;", "'", b"'"),
("&verbar;", "|", b"|"),
):
markup = '<div>%s</div>' % input_element
div = self.soup(markup).div
without_element = div.encode()
expect = b"<div>%s</div>" % output_unicode.encode("utf8")
self.assertEqual(without_element, expect)
with_element = div.encode(formatter="html")
expect = b"<div>%s</div>" % output_element
self.assertEqual(with_element, expect)
class TestHTMLParserSubclass(SoupTest):
def test_error(self):
"""Verify that our HTMLParser subclass implements error() in a way
that doesn't cause a crash.
"""
parser = BeautifulSoupHTMLParser()
with warnings.catch_warnings(record=True) as warns:
parser.error("don't crash")
[warning] = warns
assert "don't crash" == str(warning.message)

View file

@ -1,115 +0,0 @@
"""Tests to ensure that the lxml tree builder generates good trees."""
import re
import warnings
try:
import lxml.etree
LXML_PRESENT = True
LXML_VERSION = lxml.etree.LXML_VERSION
except ImportError as e:
LXML_PRESENT = False
LXML_VERSION = (0,)
if LXML_PRESENT:
from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
from bs4 import (
BeautifulSoup,
BeautifulStoneSoup,
)
from bs4.element import Comment, Doctype, SoupStrainer
from bs4.testing import skipIf
from bs4.tests import test_htmlparser
from bs4.testing import (
HTMLTreeBuilderSmokeTest,
XMLTreeBuilderSmokeTest,
SoupTest,
skipIf,
)
@skipIf(
not LXML_PRESENT,
"lxml seems not to be present, not testing its tree builder.")
class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
"""See ``HTMLTreeBuilderSmokeTest``."""
@property
def default_builder(self):
return LXMLTreeBuilder
def test_out_of_range_entity(self):
self.assertSoupEquals(
"<p>foo&#10000000000000;bar</p>", "<p>foobar</p>")
self.assertSoupEquals(
"<p>foo&#x10000000000000;bar</p>", "<p>foobar</p>")
self.assertSoupEquals(
"<p>foo&#1000000000;bar</p>", "<p>foobar</p>")
def test_entities_in_foreign_document_encoding(self):
# We can't implement this case correctly because by the time we
# hear about markup like "&#147;", it's been (incorrectly) converted into
# a string like u'\x93'
pass
# In lxml < 2.3.5, an empty doctype causes a segfault. Skip this
# test if an old version of lxml is installed.
@skipIf(
not LXML_PRESENT or LXML_VERSION < (2,3,5,0),
"Skipping doctype test for old version of lxml to avoid segfault.")
def test_empty_doctype(self):
soup = self.soup("<!DOCTYPE>")
doctype = soup.contents[0]
self.assertEqual("", doctype.strip())
def test_beautifulstonesoup_is_xml_parser(self):
# Make sure that the deprecated BSS class uses an xml builder
# if one is installed.
with warnings.catch_warnings(record=True) as w:
soup = BeautifulStoneSoup("<b />")
self.assertEqual("<b/>", str(soup.b))
self.assertTrue("BeautifulStoneSoup class is deprecated" in str(w[0].message))
def test_tracking_line_numbers(self):
# The lxml TreeBuilder cannot keep track of line numbers from
# the original markup. Even if you ask for line numbers, we
# don't have 'em.
#
# This means that if you have a tag like <sourceline> or
# <sourcepos>, attribute access will find it rather than
# giving you a numeric answer.
soup = self.soup(
"\n <p>\n\n<sourceline>\n<b>text</b></sourceline><sourcepos></p>",
store_line_numbers=True
)
self.assertEqual("sourceline", soup.p.sourceline.name)
self.assertEqual("sourcepos", soup.p.sourcepos.name)
@skipIf(
not LXML_PRESENT,
"lxml seems not to be present, not testing its XML tree builder.")
class LXMLXMLTreeBuilderSmokeTest(SoupTest, XMLTreeBuilderSmokeTest):
"""See ``HTMLTreeBuilderSmokeTest``."""
@property
def default_builder(self):
return LXMLTreeBuilderForXML
def test_namespace_indexing(self):
# We should not track un-prefixed namespaces as we can only hold one
# and it will be recognized as the default namespace by soupsieve,
# which may be confusing in some situations. When no namespace is provided
# for a selector, the default namespace (if defined) is assumed.
soup = self.soup(
'<?xml version="1.1"?>\n'
'<root>'
'<tag xmlns="http://unprefixed-namespace.com">content</tag>'
'<prefix:tag xmlns:prefix="http://prefixed-namespace.com">content</tag>'
'</root>'
)
self.assertEqual(
soup._namespaces,
{'xml': 'http://www.w3.org/XML/1998/namespace', 'prefix': 'http://prefixed-namespace.com'}
)

View file

@ -1,579 +0,0 @@
# -*- coding: utf-8 -*-
"""Tests of Beautiful Soup as a whole."""
from pdb import set_trace
import logging
import os
import unittest
import sys
import tempfile
from bs4 import (
BeautifulSoup,
BeautifulStoneSoup,
GuessedAtParserWarning,
MarkupResemblesLocatorWarning,
)
from bs4.builder import (
TreeBuilder,
ParserRejectedMarkup,
)
from bs4.element import (
CharsetMetaAttributeValue,
Comment,
ContentMetaAttributeValue,
SoupStrainer,
NamespacedAttribute,
Tag,
NavigableString,
)
import bs4.dammit
from bs4.dammit import (
EntitySubstitution,
UnicodeDammit,
)
from bs4.testing import (
default_builder,
SoupTest,
skipIf,
)
import warnings
try:
from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
LXML_PRESENT = True
except ImportError as e:
LXML_PRESENT = False
PYTHON_3_PRE_3_2 = (sys.version_info[0] == 3 and sys.version_info < (3,2))
class TestConstructor(SoupTest):
def test_short_unicode_input(self):
data = "<h1>éé</h1>"
soup = self.soup(data)
self.assertEqual("éé", soup.h1.string)
def test_embedded_null(self):
data = "<h1>foo\0bar</h1>"
soup = self.soup(data)
self.assertEqual("foo\0bar", soup.h1.string)
def test_exclude_encodings(self):
utf8_data = "Räksmörgås".encode("utf-8")
soup = self.soup(utf8_data, exclude_encodings=["utf-8"])
self.assertEqual("windows-1252", soup.original_encoding)
def test_custom_builder_class(self):
# Verify that you can pass in a custom Builder class and
# it'll be instantiated with the appropriate keyword arguments.
class Mock(object):
def __init__(self, **kwargs):
self.called_with = kwargs
self.is_xml = True
self.store_line_numbers = False
self.cdata_list_attributes = []
self.preserve_whitespace_tags = []
self.string_containers = {}
def initialize_soup(self, soup):
pass
def feed(self, markup):
self.fed = markup
def reset(self):
pass
def ignore(self, ignore):
pass
set_up_substitutions = can_be_empty_element = ignore
def prepare_markup(self, *args, **kwargs):
yield "prepared markup", "original encoding", "declared encoding", "contains replacement characters"
kwargs = dict(
var="value",
# This is a deprecated BS3-era keyword argument, which
# will be stripped out.
convertEntities=True,
)
with warnings.catch_warnings(record=True):
soup = BeautifulSoup('', builder=Mock, **kwargs)
assert isinstance(soup.builder, Mock)
self.assertEqual(dict(var="value"), soup.builder.called_with)
self.assertEqual("prepared markup", soup.builder.fed)
# You can also instantiate the TreeBuilder yourself. In this
# case, that specific object is used and any keyword arguments
# to the BeautifulSoup constructor are ignored.
builder = Mock(**kwargs)
with warnings.catch_warnings(record=True) as w:
soup = BeautifulSoup(
'', builder=builder, ignored_value=True,
)
msg = str(w[0].message)
assert msg.startswith("Keyword arguments to the BeautifulSoup constructor will be ignored.")
self.assertEqual(builder, soup.builder)
self.assertEqual(kwargs, builder.called_with)
def test_parser_markup_rejection(self):
# If markup is completely rejected by the parser, an
# explanatory ParserRejectedMarkup exception is raised.
class Mock(TreeBuilder):
def feed(self, *args, **kwargs):
raise ParserRejectedMarkup("Nope.")
def prepare_markup(self, *args, **kwargs):
# We're going to try two different ways of preparing this markup,
# but feed() will reject both of them.
yield markup, None, None, False
yield markup, None, None, False
import re
self.assertRaisesRegex(
ParserRejectedMarkup,
"The markup you provided was rejected by the parser. Trying a different parser or a different encoding may help.",
BeautifulSoup, '', builder=Mock,
)
def test_cdata_list_attributes(self):
# Most attribute values are represented as scalars, but the
# HTML standard says that some attributes, like 'class' have
# space-separated lists as values.
markup = '<a id=" an id " class=" a class "></a>'
soup = self.soup(markup)
# Note that the spaces are stripped for 'class' but not for 'id'.
a = soup.a
self.assertEqual(" an id ", a['id'])
self.assertEqual(["a", "class"], a['class'])
# TreeBuilder takes an argument called 'mutli_valued_attributes' which lets
# you customize or disable this. As always, you can customize the TreeBuilder
# by passing in a keyword argument to the BeautifulSoup constructor.
soup = self.soup(markup, builder=default_builder, multi_valued_attributes=None)
self.assertEqual(" a class ", soup.a['class'])
# Here are two ways of saying that `id` is a multi-valued
# attribute in this context, but 'class' is not.
for switcheroo in ({'*': 'id'}, {'a': 'id'}):
with warnings.catch_warnings(record=True) as w:
# This will create a warning about not explicitly
# specifying a parser, but we'll ignore it.
soup = self.soup(markup, builder=None, multi_valued_attributes=switcheroo)
a = soup.a
self.assertEqual(["an", "id"], a['id'])
self.assertEqual(" a class ", a['class'])
def test_replacement_classes(self):
# Test the ability to pass in replacements for element classes
# which will be used when building the tree.
class TagPlus(Tag):
pass
class StringPlus(NavigableString):
pass
class CommentPlus(Comment):
pass
soup = self.soup(
"<a><b>foo</b>bar</a><!--whee-->",
element_classes = {
Tag: TagPlus,
NavigableString: StringPlus,
Comment: CommentPlus,
}
)
# The tree was built with TagPlus, StringPlus, and CommentPlus objects,
# rather than Tag, String, and Comment objects.
assert all(
isinstance(x, (TagPlus, StringPlus, CommentPlus))
for x in soup.recursiveChildGenerator()
)
def test_alternate_string_containers(self):
# Test the ability to customize the string containers for
# different types of tags.
class PString(NavigableString):
pass
class BString(NavigableString):
pass
soup = self.soup(
"<div>Hello.<p>Here is <b>some <i>bolded</i></b> text",
string_containers = {
'b': BString,
'p': PString,
}
)
# The string before the <p> tag is a regular NavigableString.
assert isinstance(soup.div.contents[0], NavigableString)
# The string inside the <p> tag, but not inside the <i> tag,
# is a PString.
assert isinstance(soup.p.contents[0], PString)
# Every string inside the <b> tag is a BString, even the one that
# was also inside an <i> tag.
for s in soup.b.strings:
assert isinstance(s, BString)
# Now that parsing was complete, the string_container_stack
# (where this information was kept) has been cleared out.
self.assertEqual([], soup.string_container_stack)
class TestWarnings(SoupTest):
def _assert_warning(self, warnings, cls):
for w in warnings:
if isinstance(w.message, cls):
return w
raise Exception("%s warning not found in %r" % cls, warnings)
def _assert_no_parser_specified(self, w):
warning = self._assert_warning(w, GuessedAtParserWarning)
message = str(warning.message)
self.assertTrue(
message.startswith(BeautifulSoup.NO_PARSER_SPECIFIED_WARNING[:60])
)
def test_warning_if_no_parser_specified(self):
with warnings.catch_warnings(record=True) as w:
soup = BeautifulSoup("<a><b></b></a>")
self._assert_no_parser_specified(w)
def test_warning_if_parser_specified_too_vague(self):
with warnings.catch_warnings(record=True) as w:
soup = BeautifulSoup("<a><b></b></a>", "html")
self._assert_no_parser_specified(w)
def test_no_warning_if_explicit_parser_specified(self):
with warnings.catch_warnings(record=True) as w:
soup = BeautifulSoup("<a><b></b></a>", "html.parser")
self.assertEqual([], w)
def test_parseOnlyThese_renamed_to_parse_only(self):
with warnings.catch_warnings(record=True) as w:
soup = self.soup("<a><b></b></a>", parseOnlyThese=SoupStrainer("b"))
msg = str(w[0].message)
self.assertTrue("parseOnlyThese" in msg)
self.assertTrue("parse_only" in msg)
self.assertEqual(b"<b></b>", soup.encode())
def test_fromEncoding_renamed_to_from_encoding(self):
with warnings.catch_warnings(record=True) as w:
utf8 = b"\xc3\xa9"
soup = self.soup(utf8, fromEncoding="utf8")
msg = str(w[0].message)
self.assertTrue("fromEncoding" in msg)
self.assertTrue("from_encoding" in msg)
self.assertEqual("utf8", soup.original_encoding)
def test_unrecognized_keyword_argument(self):
self.assertRaises(
TypeError, self.soup, "<a>", no_such_argument=True)
def test_disk_file_warning(self):
filehandle = tempfile.NamedTemporaryFile()
filename = filehandle.name
try:
with warnings.catch_warnings(record=True) as w:
soup = self.soup(filename)
warning = self._assert_warning(w, MarkupResemblesLocatorWarning)
self.assertTrue("looks like a filename" in str(warning.message))
finally:
filehandle.close()
# The file no longer exists, so Beautiful Soup will no longer issue the warning.
with warnings.catch_warnings(record=True) as w:
soup = self.soup(filename)
self.assertEqual([], w)
def test_directory_warning(self):
try:
filename = tempfile.mkdtemp()
with warnings.catch_warnings(record=True) as w:
soup = self.soup(filename)
warning = self._assert_warning(w, MarkupResemblesLocatorWarning)
self.assertTrue("looks like a directory" in str(warning.message))
finally:
os.rmdir(filename)
# The directory no longer exists, so Beautiful Soup will no longer issue the warning.
with warnings.catch_warnings(record=True) as w:
soup = self.soup(filename)
self.assertEqual([], w)
def test_url_warning_with_bytes_url(self):
with warnings.catch_warnings(record=True) as warning_list:
soup = self.soup(b"http://www.crummybytes.com/")
warning = self._assert_warning(
warning_list, MarkupResemblesLocatorWarning
)
self.assertTrue("looks like a URL" in str(warning.message))
def test_url_warning_with_unicode_url(self):
with warnings.catch_warnings(record=True) as warning_list:
# note - this url must differ from the bytes one otherwise
# python's warnings system swallows the second warning
soup = self.soup("http://www.crummyunicode.com/")
warning = self._assert_warning(
warning_list, MarkupResemblesLocatorWarning
)
self.assertTrue("looks like a URL" in str(warning.message))
def test_url_warning_with_bytes_and_space(self):
# Here the markup contains something besides a URL, so no warning
# is issued.
with warnings.catch_warnings(record=True) as warning_list:
soup = self.soup(b"http://www.crummybytes.com/ is great")
self.assertFalse(any("looks like a URL" in str(w.message)
for w in warning_list))
def test_url_warning_with_unicode_and_space(self):
with warnings.catch_warnings(record=True) as warning_list:
soup = self.soup("http://www.crummyuncode.com/ is great")
self.assertFalse(any("looks like a URL" in str(w.message)
for w in warning_list))
class TestSelectiveParsing(SoupTest):
def test_parse_with_soupstrainer(self):
markup = "No<b>Yes</b><a>No<b>Yes <c>Yes</c></b>"
strainer = SoupStrainer("b")
soup = self.soup(markup, parse_only=strainer)
self.assertEqual(soup.encode(), b"<b>Yes</b><b>Yes <c>Yes</c></b>")
class TestEntitySubstitution(unittest.TestCase):
"""Standalone tests of the EntitySubstitution class."""
def setUp(self):
self.sub = EntitySubstitution
def test_simple_html_substitution(self):
# Unicode characters corresponding to named HTML entites
# are substituted, and no others.
s = "foo\u2200\N{SNOWMAN}\u00f5bar"
self.assertEqual(self.sub.substitute_html(s),
"foo&forall;\N{SNOWMAN}&otilde;bar")
def test_smart_quote_substitution(self):
# MS smart quotes are a common source of frustration, so we
# give them a special test.
quotes = b"\x91\x92foo\x93\x94"
dammit = UnicodeDammit(quotes)
self.assertEqual(self.sub.substitute_html(dammit.markup),
"&lsquo;&rsquo;foo&ldquo;&rdquo;")
def test_html5_entity(self):
# Some HTML5 entities correspond to single- or multi-character
# Unicode sequences.
for entity, u in (
# A few spot checks of our ability to recognize
# special character sequences and convert them
# to named entities.
('&models;', '\u22a7'),
('&Nfr;', '\U0001d511'),
('&ngeqq;', '\u2267\u0338'),
('&not;', '\xac'),
('&Not;', '\u2aec'),
# We _could_ convert | to &verbarr;, but we don't, because
# | is an ASCII character.
('|' '|'),
# Similarly for the fj ligature, which we could convert to
# &fjlig;, but we don't.
("fj", "fj"),
# We do convert _these_ ASCII characters to HTML entities,
# because that's required to generate valid HTML.
('&gt;', '>'),
('&lt;', '<'),
('&amp;', '&'),
):
template = '3 %s 4'
raw = template % u
with_entities = template % entity
self.assertEqual(self.sub.substitute_html(raw), with_entities)
def test_html5_entity_with_variation_selector(self):
# Some HTML5 entities correspond either to a single-character
# Unicode sequence _or_ to the same character plus U+FE00,
# VARIATION SELECTOR 1. We can handle this.
data = "fjords \u2294 penguins"
markup = "fjords &sqcup; penguins"
self.assertEqual(self.sub.substitute_html(data), markup)
data = "fjords \u2294\ufe00 penguins"
markup = "fjords &sqcups; penguins"
self.assertEqual(self.sub.substitute_html(data), markup)
def test_xml_converstion_includes_no_quotes_if_make_quoted_attribute_is_false(self):
s = 'Welcome to "my bar"'
self.assertEqual(self.sub.substitute_xml(s, False), s)
def test_xml_attribute_quoting_normally_uses_double_quotes(self):
self.assertEqual(self.sub.substitute_xml("Welcome", True),
'"Welcome"')
self.assertEqual(self.sub.substitute_xml("Bob's Bar", True),
'"Bob\'s Bar"')
def test_xml_attribute_quoting_uses_single_quotes_when_value_contains_double_quotes(self):
s = 'Welcome to "my bar"'
self.assertEqual(self.sub.substitute_xml(s, True),
"'Welcome to \"my bar\"'")
def test_xml_attribute_quoting_escapes_single_quotes_when_value_contains_both_single_and_double_quotes(self):
s = 'Welcome to "Bob\'s Bar"'
self.assertEqual(
self.sub.substitute_xml(s, True),
'"Welcome to &quot;Bob\'s Bar&quot;"')
def test_xml_quotes_arent_escaped_when_value_is_not_being_quoted(self):
quoted = 'Welcome to "Bob\'s Bar"'
self.assertEqual(self.sub.substitute_xml(quoted), quoted)
def test_xml_quoting_handles_angle_brackets(self):
self.assertEqual(
self.sub.substitute_xml("foo<bar>"),
"foo&lt;bar&gt;")
def test_xml_quoting_handles_ampersands(self):
self.assertEqual(self.sub.substitute_xml("AT&T"), "AT&amp;T")
def test_xml_quoting_including_ampersands_when_they_are_part_of_an_entity(self):
self.assertEqual(
self.sub.substitute_xml("&Aacute;T&T"),
"&amp;Aacute;T&amp;T")
def test_xml_quoting_ignoring_ampersands_when_they_are_part_of_an_entity(self):
self.assertEqual(
self.sub.substitute_xml_containing_entities("&Aacute;T&T"),
"&Aacute;T&amp;T")
def test_quotes_not_html_substituted(self):
"""There's no need to do this except inside attribute values."""
text = 'Bob\'s "bar"'
self.assertEqual(self.sub.substitute_html(text), text)
class TestEncodingConversion(SoupTest):
# Test Beautiful Soup's ability to decode and encode from various
# encodings.
def setUp(self):
super(TestEncodingConversion, self).setUp()
self.unicode_data = '<html><head><meta charset="utf-8"/></head><body><foo>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</foo></body></html>'
self.utf8_data = self.unicode_data.encode("utf-8")
# Just so you know what it looks like.
self.assertEqual(
self.utf8_data,
b'<html><head><meta charset="utf-8"/></head><body><foo>Sacr\xc3\xa9 bleu!</foo></body></html>')
def test_ascii_in_unicode_out(self):
# ASCII input is converted to Unicode. The original_encoding
# attribute is set to 'utf-8', a superset of ASCII.
chardet = bs4.dammit.chardet_dammit
logging.disable(logging.WARNING)
try:
def noop(str):
return None
# Disable chardet, which will realize that the ASCII is ASCII.
bs4.dammit.chardet_dammit = noop
ascii = b"<foo>a</foo>"
soup_from_ascii = self.soup(ascii)
unicode_output = soup_from_ascii.decode()
self.assertTrue(isinstance(unicode_output, str))
self.assertEqual(unicode_output, self.document_for(ascii.decode()))
self.assertEqual(soup_from_ascii.original_encoding.lower(), "utf-8")
finally:
logging.disable(logging.NOTSET)
bs4.dammit.chardet_dammit = chardet
def test_unicode_in_unicode_out(self):
# Unicode input is left alone. The original_encoding attribute
# is not set.
soup_from_unicode = self.soup(self.unicode_data)
self.assertEqual(soup_from_unicode.decode(), self.unicode_data)
self.assertEqual(soup_from_unicode.foo.string, 'Sacr\xe9 bleu!')
self.assertEqual(soup_from_unicode.original_encoding, None)
def test_utf8_in_unicode_out(self):
# UTF-8 input is converted to Unicode. The original_encoding
# attribute is set.
soup_from_utf8 = self.soup(self.utf8_data)
self.assertEqual(soup_from_utf8.decode(), self.unicode_data)
self.assertEqual(soup_from_utf8.foo.string, 'Sacr\xe9 bleu!')
def test_utf8_out(self):
# The internal data structures can be encoded as UTF-8.
soup_from_unicode = self.soup(self.unicode_data)
self.assertEqual(soup_from_unicode.encode('utf-8'), self.utf8_data)
@skipIf(
PYTHON_3_PRE_3_2,
"Bad HTMLParser detected; skipping test of non-ASCII characters in attribute name.")
def test_attribute_name_containing_unicode_characters(self):
markup = '<div><a \N{SNOWMAN}="snowman"></a></div>'
self.assertEqual(self.soup(markup).div.encode("utf8"), markup.encode("utf8"))
class TestNamedspacedAttribute(SoupTest):
def test_name_may_be_none_or_missing(self):
a = NamespacedAttribute("xmlns", None)
self.assertEqual(a, "xmlns")
a = NamespacedAttribute("xmlns", "")
self.assertEqual(a, "xmlns")
a = NamespacedAttribute("xmlns")
self.assertEqual(a, "xmlns")
def test_namespace_may_be_none_or_missing(self):
a = NamespacedAttribute(None, "tag")
self.assertEqual(a, "tag")
a = NamespacedAttribute("", "tag")
self.assertEqual(a, "tag")
def test_attribute_is_equivalent_to_colon_separated_string(self):
a = NamespacedAttribute("a", "b")
self.assertEqual("a:b", a)
def test_attributes_are_equivalent_if_prefix_and_name_identical(self):
a = NamespacedAttribute("a", "b", "c")
b = NamespacedAttribute("a", "b", "c")
self.assertEqual(a, b)
# The actual namespace is not considered.
c = NamespacedAttribute("a", "b", None)
self.assertEqual(a, c)
# But name and prefix are important.
d = NamespacedAttribute("a", "z", "c")
self.assertNotEqual(a, d)
e = NamespacedAttribute("z", "b", "c")
self.assertNotEqual(a, e)
class TestAttributeValueWithCharsetSubstitution(unittest.TestCase):
def test_content_meta_attribute_value(self):
value = CharsetMetaAttributeValue("euc-jp")
self.assertEqual("euc-jp", value)
self.assertEqual("euc-jp", value.original_value)
self.assertEqual("utf8", value.encode("utf8"))
def test_content_meta_attribute_value(self):
value = ContentMetaAttributeValue("text/html; charset=euc-jp")
self.assertEqual("text/html; charset=euc-jp", value)
self.assertEqual("text/html; charset=euc-jp", value.original_value)
self.assertEqual("text/html; charset=utf8", value.encode("utf8"))

File diff suppressed because it is too large Load diff

View file

@ -1,233 +0,0 @@
from __future__ import unicode_literals
import os
import time
import subprocess
import warnings
import tempfile
import pickle
import pytest
class PicklableMixin(object):
def _get_nobj_bytes(self, obj, dump_kwargs, load_kwargs):
"""
Pickle and unpickle an object using ``pickle.dumps`` / ``pickle.loads``
"""
pkl = pickle.dumps(obj, **dump_kwargs)
return pickle.loads(pkl, **load_kwargs)
def _get_nobj_file(self, obj, dump_kwargs, load_kwargs):
"""
Pickle and unpickle an object using ``pickle.dump`` / ``pickle.load`` on
a temporary file.
"""
with tempfile.TemporaryFile('w+b') as pkl:
pickle.dump(obj, pkl, **dump_kwargs)
pkl.seek(0) # Reset the file to the beginning to read it
nobj = pickle.load(pkl, **load_kwargs)
return nobj
def assertPicklable(self, obj, singleton=False, asfile=False,
dump_kwargs=None, load_kwargs=None):
"""
Assert that an object can be pickled and unpickled. This assertion
assumes that the desired behavior is that the unpickled object compares
equal to the original object, but is not the same object.
"""
get_nobj = self._get_nobj_file if asfile else self._get_nobj_bytes
dump_kwargs = dump_kwargs or {}
load_kwargs = load_kwargs or {}
nobj = get_nobj(obj, dump_kwargs, load_kwargs)
if not singleton:
self.assertIsNot(obj, nobj)
self.assertEqual(obj, nobj)
class TZContextBase(object):
"""
Base class for a context manager which allows changing of time zones.
Subclasses may define a guard variable to either block or or allow time
zone changes by redefining ``_guard_var_name`` and ``_guard_allows_change``.
The default is that the guard variable must be affirmatively set.
Subclasses must define ``get_current_tz`` and ``set_current_tz``.
"""
_guard_var_name = "DATEUTIL_MAY_CHANGE_TZ"
_guard_allows_change = True
def __init__(self, tzval):
self.tzval = tzval
self._old_tz = None
@classmethod
def tz_change_allowed(cls):
"""
Class method used to query whether or not this class allows time zone
changes.
"""
guard = bool(os.environ.get(cls._guard_var_name, False))
# _guard_allows_change gives the "default" behavior - if True, the
# guard is overcoming a block. If false, the guard is causing a block.
# Whether tz_change is allowed is therefore the XNOR of the two.
return guard == cls._guard_allows_change
@classmethod
def tz_change_disallowed_message(cls):
""" Generate instructions on how to allow tz changes """
msg = ('Changing time zone not allowed. Set {envar} to {gval} '
'if you would like to allow this behavior')
return msg.format(envar=cls._guard_var_name,
gval=cls._guard_allows_change)
def __enter__(self):
if not self.tz_change_allowed():
msg = self.tz_change_disallowed_message()
pytest.skip(msg)
# If this is used outside of a test suite, we still want an error.
raise ValueError(msg) # pragma: no cover
self._old_tz = self.get_current_tz()
self.set_current_tz(self.tzval)
def __exit__(self, type, value, traceback):
if self._old_tz is not None:
self.set_current_tz(self._old_tz)
self._old_tz = None
def get_current_tz(self):
raise NotImplementedError
def set_current_tz(self):
raise NotImplementedError
class TZEnvContext(TZContextBase):
"""
Context manager that temporarily sets the `TZ` variable (for use on
*nix-like systems). Because the effect is local to the shell anyway, this
will apply *unless* a guard is set.
If you do not want the TZ environment variable set, you may set the
``DATEUTIL_MAY_NOT_CHANGE_TZ_VAR`` variable to a truthy value.
"""
_guard_var_name = "DATEUTIL_MAY_NOT_CHANGE_TZ_VAR"
_guard_allows_change = False
def get_current_tz(self):
return os.environ.get('TZ', UnsetTz)
def set_current_tz(self, tzval):
if tzval is UnsetTz and 'TZ' in os.environ:
del os.environ['TZ']
else:
os.environ['TZ'] = tzval
time.tzset()
class TZWinContext(TZContextBase):
"""
Context manager for changing local time zone on Windows.
Because the effect of this is system-wide and global, it may have
unintended side effect. Set the ``DATEUTIL_MAY_CHANGE_TZ`` environment
variable to a truthy value before using this context manager.
"""
def get_current_tz(self):
p = subprocess.Popen(['tzutil', '/g'], stdout=subprocess.PIPE)
ctzname, err = p.communicate()
ctzname = ctzname.decode() # Popen returns
if p.returncode:
raise OSError('Failed to get current time zone: ' + err)
return ctzname
def set_current_tz(self, tzname):
p = subprocess.Popen('tzutil /s "' + tzname + '"')
out, err = p.communicate()
if p.returncode:
raise OSError('Failed to set current time zone: ' +
(err or 'Unknown error.'))
###
# Utility classes
class NotAValueClass(object):
"""
A class analogous to NaN that has operations defined for any type.
"""
def _op(self, other):
return self # Operation with NotAValue returns NotAValue
def _cmp(self, other):
return False
__add__ = __radd__ = _op
__sub__ = __rsub__ = _op
__mul__ = __rmul__ = _op
__div__ = __rdiv__ = _op
__truediv__ = __rtruediv__ = _op
__floordiv__ = __rfloordiv__ = _op
__lt__ = __rlt__ = _op
__gt__ = __rgt__ = _op
__eq__ = __req__ = _op
__le__ = __rle__ = _op
__ge__ = __rge__ = _op
NotAValue = NotAValueClass()
class ComparesEqualClass(object):
"""
A class that is always equal to whatever you compare it to.
"""
def __eq__(self, other):
return True
def __ne__(self, other):
return False
def __le__(self, other):
return True
def __ge__(self, other):
return True
def __lt__(self, other):
return False
def __gt__(self, other):
return False
__req__ = __eq__
__rne__ = __ne__
__rle__ = __le__
__rge__ = __ge__
__rlt__ = __lt__
__rgt__ = __gt__
ComparesEqual = ComparesEqualClass()
class UnsetTzClass(object):
""" Sentinel class for unset time zone variable """
pass
UnsetTz = UnsetTzClass()

View file

@ -1,41 +0,0 @@
import os
import pytest
# Configure pytest to ignore xfailing tests
# See: https://stackoverflow.com/a/53198349/467366
def pytest_collection_modifyitems(items):
for item in items:
marker_getter = getattr(item, 'get_closest_marker', None)
# Python 3.3 support
if marker_getter is None:
marker_getter = item.get_marker
marker = marker_getter('xfail')
# Need to query the args because conditional xfail tests still have
# the xfail mark even if they are not expected to fail
if marker and (not marker.args or marker.args[0]):
item.add_marker(pytest.mark.no_cover)
def set_tzpath():
"""
Sets the TZPATH variable if it's specified in an environment variable.
"""
tzpath = os.environ.get('DATEUTIL_TZPATH', None)
if tzpath is None:
return
path_components = tzpath.split(':')
print("Setting TZPATH to {}".format(path_components))
from dateutil import tz
tz.TZPATHS.clear()
tz.TZPATHS.extend(path_components)
set_tzpath()

View file

@ -1,27 +0,0 @@
from hypothesis import given, assume
from hypothesis import strategies as st
from dateutil import tz
from dateutil.parser import isoparse
import pytest
# Strategies
TIME_ZONE_STRATEGY = st.sampled_from([None, tz.UTC] +
[tz.gettz(zname) for zname in ('US/Eastern', 'US/Pacific',
'Australia/Sydney', 'Europe/London')])
ASCII_STRATEGY = st.characters(max_codepoint=127)
@pytest.mark.isoparser
@given(dt=st.datetimes(timezones=TIME_ZONE_STRATEGY), sep=ASCII_STRATEGY)
def test_timespec_auto(dt, sep):
if dt.tzinfo is not None:
# Assume offset has no sub-second components
assume(dt.utcoffset().total_seconds() % 60 == 0)
sep = str(sep) # Python 2.7 requires bytes
dtstr = dt.isoformat(sep=sep)
dt_rt = isoparse(dtstr)
assert dt_rt == dt

View file

@ -1,22 +0,0 @@
from hypothesis.strategies import integers
from hypothesis import given
import pytest
from dateutil.parser import parserinfo
@pytest.mark.parserinfo
@given(integers(min_value=100, max_value=9999))
def test_convertyear(n):
assert n == parserinfo().convertyear(n)
@pytest.mark.parserinfo
@given(integers(min_value=-50,
max_value=49))
def test_convertyear_no_specified_century(n):
p = parserinfo()
new_year = p._year + n
result = p.convertyear(new_year % 100, century_specified=False)
assert result == new_year

View file

@ -1,35 +0,0 @@
from datetime import datetime, timedelta
import pytest
import six
from hypothesis import assume, given
from hypothesis import strategies as st
from dateutil import tz as tz
EPOCHALYPSE = datetime.fromtimestamp(2147483647)
NEGATIVE_EPOCHALYPSE = datetime.fromtimestamp(0) - timedelta(seconds=2147483648)
@pytest.mark.gettz
@pytest.mark.parametrize("gettz_arg", [None, ""])
# TODO: Remove bounds when GH #590 is resolved
@given(
dt=st.datetimes(
min_value=NEGATIVE_EPOCHALYPSE, max_value=EPOCHALYPSE, timezones=st.just(tz.UTC),
)
)
def test_gettz_returns_local(gettz_arg, dt):
act_tz = tz.gettz(gettz_arg)
if isinstance(act_tz, tz.tzlocal):
return
dt_act = dt.astimezone(tz.gettz(gettz_arg))
if six.PY2:
dt_exp = dt.astimezone(tz.tzlocal())
else:
dt_exp = dt.astimezone()
assert dt_act == dt_exp
assert dt_act.tzname() == dt_exp.tzname()
assert dt_act.utcoffset() == dt_exp.utcoffset()

View file

@ -1,93 +0,0 @@
from dateutil.easter import easter
from dateutil.easter import EASTER_WESTERN, EASTER_ORTHODOX, EASTER_JULIAN
from datetime import date
import pytest
# List of easters between 1990 and 2050
western_easter_dates = [
date(1990, 4, 15), date(1991, 3, 31), date(1992, 4, 19), date(1993, 4, 11),
date(1994, 4, 3), date(1995, 4, 16), date(1996, 4, 7), date(1997, 3, 30),
date(1998, 4, 12), date(1999, 4, 4),
date(2000, 4, 23), date(2001, 4, 15), date(2002, 3, 31), date(2003, 4, 20),
date(2004, 4, 11), date(2005, 3, 27), date(2006, 4, 16), date(2007, 4, 8),
date(2008, 3, 23), date(2009, 4, 12),
date(2010, 4, 4), date(2011, 4, 24), date(2012, 4, 8), date(2013, 3, 31),
date(2014, 4, 20), date(2015, 4, 5), date(2016, 3, 27), date(2017, 4, 16),
date(2018, 4, 1), date(2019, 4, 21),
date(2020, 4, 12), date(2021, 4, 4), date(2022, 4, 17), date(2023, 4, 9),
date(2024, 3, 31), date(2025, 4, 20), date(2026, 4, 5), date(2027, 3, 28),
date(2028, 4, 16), date(2029, 4, 1),
date(2030, 4, 21), date(2031, 4, 13), date(2032, 3, 28), date(2033, 4, 17),
date(2034, 4, 9), date(2035, 3, 25), date(2036, 4, 13), date(2037, 4, 5),
date(2038, 4, 25), date(2039, 4, 10),
date(2040, 4, 1), date(2041, 4, 21), date(2042, 4, 6), date(2043, 3, 29),
date(2044, 4, 17), date(2045, 4, 9), date(2046, 3, 25), date(2047, 4, 14),
date(2048, 4, 5), date(2049, 4, 18), date(2050, 4, 10)
]
orthodox_easter_dates = [
date(1990, 4, 15), date(1991, 4, 7), date(1992, 4, 26), date(1993, 4, 18),
date(1994, 5, 1), date(1995, 4, 23), date(1996, 4, 14), date(1997, 4, 27),
date(1998, 4, 19), date(1999, 4, 11),
date(2000, 4, 30), date(2001, 4, 15), date(2002, 5, 5), date(2003, 4, 27),
date(2004, 4, 11), date(2005, 5, 1), date(2006, 4, 23), date(2007, 4, 8),
date(2008, 4, 27), date(2009, 4, 19),
date(2010, 4, 4), date(2011, 4, 24), date(2012, 4, 15), date(2013, 5, 5),
date(2014, 4, 20), date(2015, 4, 12), date(2016, 5, 1), date(2017, 4, 16),
date(2018, 4, 8), date(2019, 4, 28),
date(2020, 4, 19), date(2021, 5, 2), date(2022, 4, 24), date(2023, 4, 16),
date(2024, 5, 5), date(2025, 4, 20), date(2026, 4, 12), date(2027, 5, 2),
date(2028, 4, 16), date(2029, 4, 8),
date(2030, 4, 28), date(2031, 4, 13), date(2032, 5, 2), date(2033, 4, 24),
date(2034, 4, 9), date(2035, 4, 29), date(2036, 4, 20), date(2037, 4, 5),
date(2038, 4, 25), date(2039, 4, 17),
date(2040, 5, 6), date(2041, 4, 21), date(2042, 4, 13), date(2043, 5, 3),
date(2044, 4, 24), date(2045, 4, 9), date(2046, 4, 29), date(2047, 4, 21),
date(2048, 4, 5), date(2049, 4, 25), date(2050, 4, 17)
]
# A random smattering of Julian dates.
# Pulled values from http://www.kevinlaughery.com/east4099.html
julian_easter_dates = [
date( 326, 4, 3), date( 375, 4, 5), date( 492, 4, 5), date( 552, 3, 31),
date( 562, 4, 9), date( 569, 4, 21), date( 597, 4, 14), date( 621, 4, 19),
date( 636, 3, 31), date( 655, 3, 29), date( 700, 4, 11), date( 725, 4, 8),
date( 750, 3, 29), date( 782, 4, 7), date( 835, 4, 18), date( 849, 4, 14),
date( 867, 3, 30), date( 890, 4, 12), date( 922, 4, 21), date( 934, 4, 6),
date(1049, 3, 26), date(1058, 4, 19), date(1113, 4, 6), date(1119, 3, 30),
date(1242, 4, 20), date(1255, 3, 28), date(1257, 4, 8), date(1258, 3, 24),
date(1261, 4, 24), date(1278, 4, 17), date(1333, 4, 4), date(1351, 4, 17),
date(1371, 4, 6), date(1391, 3, 26), date(1402, 3, 26), date(1412, 4, 3),
date(1439, 4, 5), date(1445, 3, 28), date(1531, 4, 9), date(1555, 4, 14)
]
@pytest.mark.parametrize("easter_date", western_easter_dates)
def test_easter_western(easter_date):
assert easter_date == easter(easter_date.year, EASTER_WESTERN)
@pytest.mark.parametrize("easter_date", orthodox_easter_dates)
def test_easter_orthodox(easter_date):
assert easter_date == easter(easter_date.year, EASTER_ORTHODOX)
@pytest.mark.parametrize("easter_date", julian_easter_dates)
def test_easter_julian(easter_date):
assert easter_date == easter(easter_date.year, EASTER_JULIAN)
def test_easter_bad_method():
with pytest.raises(ValueError):
easter(1975, 4)

View file

@ -1,33 +0,0 @@
"""Test for the "import *" functionality.
As import * can be only done at module level, it has been added in a separate file
"""
import pytest
prev_locals = list(locals())
from dateutil import *
new_locals = {name:value for name,value in locals().items()
if name not in prev_locals}
new_locals.pop('prev_locals')
@pytest.mark.import_star
def test_imported_modules():
""" Test that `from dateutil import *` adds modules in __all__ locally """
import dateutil.easter
import dateutil.parser
import dateutil.relativedelta
import dateutil.rrule
import dateutil.tz
import dateutil.utils
import dateutil.zoneinfo
assert dateutil.easter == new_locals.pop("easter")
assert dateutil.parser == new_locals.pop("parser")
assert dateutil.relativedelta == new_locals.pop("relativedelta")
assert dateutil.rrule == new_locals.pop("rrule")
assert dateutil.tz == new_locals.pop("tz")
assert dateutil.utils == new_locals.pop("utils")
assert dateutil.zoneinfo == new_locals.pop("zoneinfo")
assert not new_locals

View file

@ -1,176 +0,0 @@
import sys
import pytest
HOST_IS_WINDOWS = sys.platform.startswith('win')
def test_import_version_str():
""" Test that dateutil.__version__ can be imported"""
from dateutil import __version__
def test_import_version_root():
import dateutil
assert hasattr(dateutil, '__version__')
# Test that dateutil.easter-related imports work properly
def test_import_easter_direct():
import dateutil.easter
def test_import_easter_from():
from dateutil import easter
def test_import_easter_start():
from dateutil.easter import easter
# Test that dateutil.parser-related imports work properly
def test_import_parser_direct():
import dateutil.parser
def test_import_parser_from():
from dateutil import parser
def test_import_parser_all():
# All interface
from dateutil.parser import parse
from dateutil.parser import parserinfo
# Other public classes
from dateutil.parser import parser
for var in (parse, parserinfo, parser):
assert var is not None
# Test that dateutil.relativedelta-related imports work properly
def test_import_relative_delta_direct():
import dateutil.relativedelta
def test_import_relative_delta_from():
from dateutil import relativedelta
def test_import_relative_delta_all():
from dateutil.relativedelta import relativedelta
from dateutil.relativedelta import MO, TU, WE, TH, FR, SA, SU
for var in (relativedelta, MO, TU, WE, TH, FR, SA, SU):
assert var is not None
# In the public interface but not in all
from dateutil.relativedelta import weekday
assert weekday is not None
# Test that dateutil.rrule related imports work properly
def test_import_rrule_direct():
import dateutil.rrule
def test_import_rrule_from():
from dateutil import rrule
def test_import_rrule_all():
from dateutil.rrule import rrule
from dateutil.rrule import rruleset
from dateutil.rrule import rrulestr
from dateutil.rrule import YEARLY, MONTHLY, WEEKLY, DAILY
from dateutil.rrule import HOURLY, MINUTELY, SECONDLY
from dateutil.rrule import MO, TU, WE, TH, FR, SA, SU
rr_all = (rrule, rruleset, rrulestr,
YEARLY, MONTHLY, WEEKLY, DAILY,
HOURLY, MINUTELY, SECONDLY,
MO, TU, WE, TH, FR, SA, SU)
for var in rr_all:
assert var is not None
# In the public interface but not in all
from dateutil.rrule import weekday
assert weekday is not None
# Test that dateutil.tz related imports work properly
def test_import_tztest_direct():
import dateutil.tz
def test_import_tz_from():
from dateutil import tz
def test_import_tz_all():
from dateutil.tz import tzutc
from dateutil.tz import tzoffset
from dateutil.tz import tzlocal
from dateutil.tz import tzfile
from dateutil.tz import tzrange
from dateutil.tz import tzstr
from dateutil.tz import tzical
from dateutil.tz import gettz
from dateutil.tz import tzwin
from dateutil.tz import tzwinlocal
from dateutil.tz import UTC
from dateutil.tz import datetime_ambiguous
from dateutil.tz import datetime_exists
from dateutil.tz import resolve_imaginary
tz_all = ["tzutc", "tzoffset", "tzlocal", "tzfile", "tzrange",
"tzstr", "tzical", "gettz", "datetime_ambiguous",
"datetime_exists", "resolve_imaginary", "UTC"]
tz_all += ["tzwin", "tzwinlocal"] if sys.platform.startswith("win") else []
lvars = locals()
for var in tz_all:
assert lvars[var] is not None
# Test that dateutil.tzwin related imports work properly
@pytest.mark.skipif(not HOST_IS_WINDOWS, reason="Requires Windows")
def test_import_tz_windows_direct():
import dateutil.tzwin
@pytest.mark.skipif(not HOST_IS_WINDOWS, reason="Requires Windows")
def test_import_tz_windows_from():
from dateutil import tzwin
@pytest.mark.skipif(not HOST_IS_WINDOWS, reason="Requires Windows")
def test_import_tz_windows_star():
from dateutil.tzwin import tzwin
from dateutil.tzwin import tzwinlocal
tzwin_all = [tzwin, tzwinlocal]
for var in tzwin_all:
assert var is not None
# Test imports of Zone Info
def test_import_zone_info_direct():
import dateutil.zoneinfo
def test_import_zone_info_from():
from dateutil import zoneinfo
def test_import_zone_info_star():
from dateutil.zoneinfo import gettz
from dateutil.zoneinfo import gettz_db_metadata
from dateutil.zoneinfo import rebuild
zi_all = (gettz, gettz_db_metadata, rebuild)
for var in zi_all:
assert var is not None

View file

@ -1,91 +0,0 @@
# -*- coding: utf-8 -*-
"""
Tests for implementation details, not necessarily part of the user-facing
API.
The motivating case for these tests is #483, where we want to smoke-test
code that may be difficult to reach through the standard API calls.
"""
import sys
import pytest
from dateutil.parser._parser import _ymd
from dateutil import tz
IS_PY32 = sys.version_info[0:2] == (3, 2)
@pytest.mark.smoke
def test_YMD_could_be_day():
ymd = _ymd('foo bar 124 baz')
ymd.append(2, 'M')
assert ymd.has_month
assert not ymd.has_year
assert ymd.could_be_day(4)
assert not ymd.could_be_day(-6)
assert not ymd.could_be_day(32)
# Assumes leap year
assert ymd.could_be_day(29)
ymd.append(1999)
assert ymd.has_year
assert not ymd.could_be_day(29)
ymd.append(16, 'D')
assert ymd.has_day
assert not ymd.could_be_day(1)
ymd = _ymd('foo bar 124 baz')
ymd.append(1999)
assert ymd.could_be_day(31)
###
# Test that private interfaces in _parser are deprecated properly
@pytest.mark.skipif(IS_PY32, reason='pytest.warns not supported on Python 3.2')
def test_parser_private_warns():
from dateutil.parser import _timelex, _tzparser
from dateutil.parser import _parsetz
with pytest.warns(DeprecationWarning):
_tzparser()
with pytest.warns(DeprecationWarning):
_timelex('2014-03-03')
with pytest.warns(DeprecationWarning):
_parsetz('+05:00')
@pytest.mark.skipif(IS_PY32, reason='pytest.warns not supported on Python 3.2')
def test_parser_parser_private_not_warns():
from dateutil.parser._parser import _timelex, _tzparser
from dateutil.parser._parser import _parsetz
with pytest.warns(None) as recorder:
_tzparser()
assert len(recorder) == 0
with pytest.warns(None) as recorder:
_timelex('2014-03-03')
assert len(recorder) == 0
with pytest.warns(None) as recorder:
_parsetz('+05:00')
assert len(recorder) == 0
@pytest.mark.tzstr
def test_tzstr_internal_timedeltas():
with pytest.warns(tz.DeprecatedTzFormatWarning):
tz1 = tz.tzstr("EST5EDT,5,4,0,7200,11,-3,0,7200")
with pytest.warns(tz.DeprecatedTzFormatWarning):
tz2 = tz.tzstr("EST5EDT,4,1,0,7200,10,-1,0,7200")
assert tz1._start_delta != tz2._start_delta
assert tz1._end_delta != tz2._end_delta

View file

@ -1,509 +0,0 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from datetime import datetime, timedelta, date, time
import itertools as it
from dateutil import tz
from dateutil.tz import UTC
from dateutil.parser import isoparser, isoparse
import pytest
import six
def _generate_tzoffsets(limited):
def _mkoffset(hmtuple, fmt):
h, m = hmtuple
m_td = (-1 if h < 0 else 1) * m
tzo = tz.tzoffset(None, timedelta(hours=h, minutes=m_td))
return tzo, fmt.format(h, m)
out = []
if not limited:
# The subset that's just hours
hm_out_h = [(h, 0) for h in (-23, -5, 0, 5, 23)]
out.extend([_mkoffset(hm, '{:+03d}') for hm in hm_out_h])
# Ones that have hours and minutes
hm_out = [] + hm_out_h
hm_out += [(-12, 15), (11, 30), (10, 2), (5, 15), (-5, 30)]
else:
hm_out = [(-5, -0)]
fmts = ['{:+03d}:{:02d}', '{:+03d}{:02d}']
out += [_mkoffset(hm, fmt) for hm in hm_out for fmt in fmts]
# Also add in UTC and naive
out.append((UTC, 'Z'))
out.append((None, ''))
return out
FULL_TZOFFSETS = _generate_tzoffsets(False)
FULL_TZOFFSETS_AWARE = [x for x in FULL_TZOFFSETS if x[1]]
TZOFFSETS = _generate_tzoffsets(True)
DATES = [datetime(1996, 1, 1), datetime(2017, 1, 1)]
@pytest.mark.parametrize('dt', tuple(DATES))
def test_year_only(dt):
dtstr = dt.strftime('%Y')
assert isoparse(dtstr) == dt
DATES += [datetime(2000, 2, 1), datetime(2017, 4, 1)]
@pytest.mark.parametrize('dt', tuple(DATES))
def test_year_month(dt):
fmt = '%Y-%m'
dtstr = dt.strftime(fmt)
assert isoparse(dtstr) == dt
DATES += [datetime(2016, 2, 29), datetime(2018, 3, 15)]
YMD_FMTS = ('%Y%m%d', '%Y-%m-%d')
@pytest.mark.parametrize('dt', tuple(DATES))
@pytest.mark.parametrize('fmt', YMD_FMTS)
def test_year_month_day(dt, fmt):
dtstr = dt.strftime(fmt)
assert isoparse(dtstr) == dt
def _isoparse_date_and_time(dt, date_fmt, time_fmt, tzoffset,
microsecond_precision=None):
tzi, offset_str = tzoffset
fmt = date_fmt + 'T' + time_fmt
dt = dt.replace(tzinfo=tzi)
dtstr = dt.strftime(fmt)
if microsecond_precision is not None:
if not fmt.endswith('%f'): # pragma: nocover
raise ValueError('Time format has no microseconds!')
if microsecond_precision != 6:
dtstr = dtstr[:-(6 - microsecond_precision)]
elif microsecond_precision > 6: # pragma: nocover
raise ValueError('Precision must be 1-6')
dtstr += offset_str
assert isoparse(dtstr) == dt
DATETIMES = [datetime(1998, 4, 16, 12),
datetime(2019, 11, 18, 23),
datetime(2014, 12, 16, 4)]
@pytest.mark.parametrize('dt', tuple(DATETIMES))
@pytest.mark.parametrize('date_fmt', YMD_FMTS)
@pytest.mark.parametrize('tzoffset', TZOFFSETS)
def test_ymd_h(dt, date_fmt, tzoffset):
_isoparse_date_and_time(dt, date_fmt, '%H', tzoffset)
DATETIMES = [datetime(2012, 1, 6, 9, 37)]
@pytest.mark.parametrize('dt', tuple(DATETIMES))
@pytest.mark.parametrize('date_fmt', YMD_FMTS)
@pytest.mark.parametrize('time_fmt', ('%H%M', '%H:%M'))
@pytest.mark.parametrize('tzoffset', TZOFFSETS)
def test_ymd_hm(dt, date_fmt, time_fmt, tzoffset):
_isoparse_date_and_time(dt, date_fmt, time_fmt, tzoffset)
DATETIMES = [datetime(2003, 9, 2, 22, 14, 2),
datetime(2003, 8, 8, 14, 9, 14),
datetime(2003, 4, 7, 6, 14, 59)]
HMS_FMTS = ('%H%M%S', '%H:%M:%S')
@pytest.mark.parametrize('dt', tuple(DATETIMES))
@pytest.mark.parametrize('date_fmt', YMD_FMTS)
@pytest.mark.parametrize('time_fmt', HMS_FMTS)
@pytest.mark.parametrize('tzoffset', TZOFFSETS)
def test_ymd_hms(dt, date_fmt, time_fmt, tzoffset):
_isoparse_date_and_time(dt, date_fmt, time_fmt, tzoffset)
DATETIMES = [datetime(2017, 11, 27, 6, 14, 30, 123456)]
@pytest.mark.parametrize('dt', tuple(DATETIMES))
@pytest.mark.parametrize('date_fmt', YMD_FMTS)
@pytest.mark.parametrize('time_fmt', (x + sep + '%f' for x in HMS_FMTS
for sep in '.,'))
@pytest.mark.parametrize('tzoffset', TZOFFSETS)
@pytest.mark.parametrize('precision', list(range(3, 7)))
def test_ymd_hms_micro(dt, date_fmt, time_fmt, tzoffset, precision):
# Truncate the microseconds to the desired precision for the representation
dt = dt.replace(microsecond=int(round(dt.microsecond, precision-6)))
_isoparse_date_and_time(dt, date_fmt, time_fmt, tzoffset, precision)
###
# Truncation of extra digits beyond microsecond precision
@pytest.mark.parametrize('dt_str', [
'2018-07-03T14:07:00.123456000001',
'2018-07-03T14:07:00.123456999999',
])
def test_extra_subsecond_digits(dt_str):
assert isoparse(dt_str) == datetime(2018, 7, 3, 14, 7, 0, 123456)
@pytest.mark.parametrize('tzoffset', FULL_TZOFFSETS)
def test_full_tzoffsets(tzoffset):
dt = datetime(2017, 11, 27, 6, 14, 30, 123456)
date_fmt = '%Y-%m-%d'
time_fmt = '%H:%M:%S.%f'
_isoparse_date_and_time(dt, date_fmt, time_fmt, tzoffset)
@pytest.mark.parametrize('dt_str', [
'2014-04-11T00',
'2014-04-10T24',
'2014-04-11T00:00',
'2014-04-10T24:00',
'2014-04-11T00:00:00',
'2014-04-10T24:00:00',
'2014-04-11T00:00:00.000',
'2014-04-10T24:00:00.000',
'2014-04-11T00:00:00.000000',
'2014-04-10T24:00:00.000000']
)
def test_datetime_midnight(dt_str):
assert isoparse(dt_str) == datetime(2014, 4, 11, 0, 0, 0, 0)
@pytest.mark.parametrize('datestr', [
'2014-01-01',
'20140101',
])
@pytest.mark.parametrize('sep', [' ', 'a', 'T', '_', '-'])
def test_isoparse_sep_none(datestr, sep):
isostr = datestr + sep + '14:33:09'
assert isoparse(isostr) == datetime(2014, 1, 1, 14, 33, 9)
##
# Uncommon date formats
TIME_ARGS = ('time_args',
((None, time(0), None), ) + tuple(('%H:%M:%S.%f', _t, _tz)
for _t, _tz in it.product([time(0), time(9, 30), time(14, 47)],
TZOFFSETS)))
@pytest.mark.parametrize('isocal,dt_expected',[
((2017, 10), datetime(2017, 3, 6)),
((2020, 1), datetime(2019, 12, 30)), # ISO year != Cal year
((2004, 53), datetime(2004, 12, 27)), # Only half the week is in 2014
])
def test_isoweek(isocal, dt_expected):
# TODO: Figure out how to parametrize this on formats, too
for fmt in ('{:04d}-W{:02d}', '{:04d}W{:02d}'):
dtstr = fmt.format(*isocal)
assert isoparse(dtstr) == dt_expected
@pytest.mark.parametrize('isocal,dt_expected',[
((2016, 13, 7), datetime(2016, 4, 3)),
((2004, 53, 7), datetime(2005, 1, 2)), # ISO year != Cal year
((2009, 1, 2), datetime(2008, 12, 30)), # ISO year < Cal year
((2009, 53, 6), datetime(2010, 1, 2)) # ISO year > Cal year
])
def test_isoweek_day(isocal, dt_expected):
# TODO: Figure out how to parametrize this on formats, too
for fmt in ('{:04d}-W{:02d}-{:d}', '{:04d}W{:02d}{:d}'):
dtstr = fmt.format(*isocal)
assert isoparse(dtstr) == dt_expected
@pytest.mark.parametrize('isoord,dt_expected', [
((2004, 1), datetime(2004, 1, 1)),
((2016, 60), datetime(2016, 2, 29)),
((2017, 60), datetime(2017, 3, 1)),
((2016, 366), datetime(2016, 12, 31)),
((2017, 365), datetime(2017, 12, 31))
])
def test_iso_ordinal(isoord, dt_expected):
for fmt in ('{:04d}-{:03d}', '{:04d}{:03d}'):
dtstr = fmt.format(*isoord)
assert isoparse(dtstr) == dt_expected
###
# Acceptance of bytes
@pytest.mark.parametrize('isostr,dt', [
(b'2014', datetime(2014, 1, 1)),
(b'20140204', datetime(2014, 2, 4)),
(b'2014-02-04', datetime(2014, 2, 4)),
(b'2014-02-04T12', datetime(2014, 2, 4, 12)),
(b'2014-02-04T12:30', datetime(2014, 2, 4, 12, 30)),
(b'2014-02-04T12:30:15', datetime(2014, 2, 4, 12, 30, 15)),
(b'2014-02-04T12:30:15.224', datetime(2014, 2, 4, 12, 30, 15, 224000)),
(b'20140204T123015.224', datetime(2014, 2, 4, 12, 30, 15, 224000)),
(b'2014-02-04T12:30:15.224Z', datetime(2014, 2, 4, 12, 30, 15, 224000,
UTC)),
(b'2014-02-04T12:30:15.224z', datetime(2014, 2, 4, 12, 30, 15, 224000,
UTC)),
(b'2014-02-04T12:30:15.224+05:00',
datetime(2014, 2, 4, 12, 30, 15, 224000,
tzinfo=tz.tzoffset(None, timedelta(hours=5))))])
def test_bytes(isostr, dt):
assert isoparse(isostr) == dt
###
# Invalid ISO strings
@pytest.mark.parametrize('isostr,exception', [
('201', ValueError), # ISO string too short
('2012-0425', ValueError), # Inconsistent date separators
('201204-25', ValueError), # Inconsistent date separators
('20120425T0120:00', ValueError), # Inconsistent time separators
('20120425T01:2000', ValueError), # Inconsistent time separators
('14:3015', ValueError), # Inconsistent time separator
('20120425T012500-334', ValueError), # Wrong microsecond separator
('2001-1', ValueError), # YYYY-M not valid
('2012-04-9', ValueError), # YYYY-MM-D not valid
('201204', ValueError), # YYYYMM not valid
('20120411T03:30+', ValueError), # Time zone too short
('20120411T03:30+1234567', ValueError), # Time zone too long
('20120411T03:30-25:40', ValueError), # Time zone invalid
('2012-1a', ValueError), # Invalid month
('20120411T03:30+00:60', ValueError), # Time zone invalid minutes
('20120411T03:30+00:61', ValueError), # Time zone invalid minutes
('20120411T033030.123456012:00', # No sign in time zone
ValueError),
('2012-W00', ValueError), # Invalid ISO week
('2012-W55', ValueError), # Invalid ISO week
('2012-W01-0', ValueError), # Invalid ISO week day
('2012-W01-8', ValueError), # Invalid ISO week day
('2013-000', ValueError), # Invalid ordinal day
('2013-366', ValueError), # Invalid ordinal day
('2013366', ValueError), # Invalid ordinal day
('2014-03-12Т12:30:14', ValueError), # Cyrillic T
('2014-04-21T24:00:01', ValueError), # Invalid use of 24 for midnight
('2014_W01-1', ValueError), # Invalid separator
('2014W01-1', ValueError), # Inconsistent use of dashes
('2014-W011', ValueError), # Inconsistent use of dashes
])
def test_iso_raises(isostr, exception):
with pytest.raises(exception):
isoparse(isostr)
@pytest.mark.parametrize('sep_act, valid_sep, exception', [
('T', 'C', ValueError),
('C', 'T', ValueError),
])
def test_iso_with_sep_raises(sep_act, valid_sep, exception):
parser = isoparser(sep=valid_sep)
isostr = '2012-04-25' + sep_act + '01:25:00'
with pytest.raises(exception):
parser.isoparse(isostr)
###
# Test ISOParser constructor
@pytest.mark.parametrize('sep', [' ', '9', '🍛'])
def test_isoparser_invalid_sep(sep):
with pytest.raises(ValueError):
isoparser(sep=sep)
# This only fails on Python 3
@pytest.mark.xfail(not six.PY2, reason="Fails on Python 3 only")
def test_isoparser_byte_sep():
dt = datetime(2017, 12, 6, 12, 30, 45)
dt_str = dt.isoformat(sep=str('T'))
dt_rt = isoparser(sep=b'T').isoparse(dt_str)
assert dt == dt_rt
###
# Test parse_tzstr
@pytest.mark.parametrize('tzoffset', FULL_TZOFFSETS)
def test_parse_tzstr(tzoffset):
dt = datetime(2017, 11, 27, 6, 14, 30, 123456)
date_fmt = '%Y-%m-%d'
time_fmt = '%H:%M:%S.%f'
_isoparse_date_and_time(dt, date_fmt, time_fmt, tzoffset)
@pytest.mark.parametrize('tzstr', [
'-00:00', '+00:00', '+00', '-00', '+0000', '-0000'
])
@pytest.mark.parametrize('zero_as_utc', [True, False])
def test_parse_tzstr_zero_as_utc(tzstr, zero_as_utc):
tzi = isoparser().parse_tzstr(tzstr, zero_as_utc=zero_as_utc)
assert tzi == UTC
assert (type(tzi) == tz.tzutc) == zero_as_utc
@pytest.mark.parametrize('tzstr,exception', [
('00:00', ValueError), # No sign
('05:00', ValueError), # No sign
('_00:00', ValueError), # Invalid sign
('+25:00', ValueError), # Offset too large
('00:0000', ValueError), # String too long
])
def test_parse_tzstr_fails(tzstr, exception):
with pytest.raises(exception):
isoparser().parse_tzstr(tzstr)
###
# Test parse_isodate
def __make_date_examples():
dates_no_day = [
date(1999, 12, 1),
date(2016, 2, 1)
]
if not six.PY2:
# strftime does not support dates before 1900 in Python 2
dates_no_day.append(date(1000, 11, 1))
# Only one supported format for dates with no day
o = zip(dates_no_day, it.repeat('%Y-%m'))
dates_w_day = [
date(1969, 12, 31),
date(1900, 1, 1),
date(2016, 2, 29),
date(2017, 11, 14)
]
dates_w_day_fmts = ('%Y%m%d', '%Y-%m-%d')
o = it.chain(o, it.product(dates_w_day, dates_w_day_fmts))
return list(o)
@pytest.mark.parametrize('d,dt_fmt', __make_date_examples())
@pytest.mark.parametrize('as_bytes', [True, False])
def test_parse_isodate(d, dt_fmt, as_bytes):
d_str = d.strftime(dt_fmt)
if isinstance(d_str, six.text_type) and as_bytes:
d_str = d_str.encode('ascii')
elif isinstance(d_str, bytes) and not as_bytes:
d_str = d_str.decode('ascii')
iparser = isoparser()
assert iparser.parse_isodate(d_str) == d
@pytest.mark.parametrize('isostr,exception', [
('243', ValueError), # ISO string too short
('2014-0423', ValueError), # Inconsistent date separators
('201404-23', ValueError), # Inconsistent date separators
('2014日03月14', ValueError), # Not ASCII
('2013-02-29', ValueError), # Not a leap year
('2014/12/03', ValueError), # Wrong separators
('2014-04-19T', ValueError), # Unknown components
('201202', ValueError), # Invalid format
])
def test_isodate_raises(isostr, exception):
with pytest.raises(exception):
isoparser().parse_isodate(isostr)
def test_parse_isodate_error_text():
with pytest.raises(ValueError) as excinfo:
isoparser().parse_isodate('2014-0423')
# ensure the error message does not contain b' prefixes
if six.PY2:
expected_error = "String contains unknown ISO components: u'2014-0423'"
else:
expected_error = "String contains unknown ISO components: '2014-0423'"
assert expected_error == str(excinfo.value)
###
# Test parse_isotime
def __make_time_examples():
outputs = []
# HH
time_h = [time(0), time(8), time(22)]
time_h_fmts = ['%H']
outputs.append(it.product(time_h, time_h_fmts))
# HHMM / HH:MM
time_hm = [time(0, 0), time(0, 30), time(8, 47), time(16, 1)]
time_hm_fmts = ['%H%M', '%H:%M']
outputs.append(it.product(time_hm, time_hm_fmts))
# HHMMSS / HH:MM:SS
time_hms = [time(0, 0, 0), time(0, 15, 30),
time(8, 2, 16), time(12, 0), time(16, 2), time(20, 45)]
time_hms_fmts = ['%H%M%S', '%H:%M:%S']
outputs.append(it.product(time_hms, time_hms_fmts))
# HHMMSS.ffffff / HH:MM:SS.ffffff
time_hmsu = [time(0, 0, 0, 0), time(4, 15, 3, 247993),
time(14, 21, 59, 948730),
time(23, 59, 59, 999999)]
time_hmsu_fmts = ['%H%M%S.%f', '%H:%M:%S.%f']
outputs.append(it.product(time_hmsu, time_hmsu_fmts))
outputs = list(map(list, outputs))
# Time zones
ex_naive = list(it.chain.from_iterable(x[0:2] for x in outputs))
o = it.product(ex_naive, TZOFFSETS) # ((time, fmt), (tzinfo, offsetstr))
o = ((t.replace(tzinfo=tzi), fmt + off_str)
for (t, fmt), (tzi, off_str) in o)
outputs.append(o)
return list(it.chain.from_iterable(outputs))
@pytest.mark.parametrize('time_val,time_fmt', __make_time_examples())
@pytest.mark.parametrize('as_bytes', [True, False])
def test_isotime(time_val, time_fmt, as_bytes):
tstr = time_val.strftime(time_fmt)
if isinstance(tstr, six.text_type) and as_bytes:
tstr = tstr.encode('ascii')
elif isinstance(tstr, bytes) and not as_bytes:
tstr = tstr.decode('ascii')
iparser = isoparser()
assert iparser.parse_isotime(tstr) == time_val
@pytest.mark.parametrize('isostr', [
'24:00',
'2400',
'24:00:00',
'240000',
'24:00:00.000',
'24:00:00,000',
'24:00:00.000000',
'24:00:00,000000',
])
def test_isotime_midnight(isostr):
iparser = isoparser()
assert iparser.parse_isotime(isostr) == time(0, 0, 0, 0)
@pytest.mark.parametrize('isostr,exception', [
('3', ValueError), # ISO string too short
('14時30分15秒', ValueError), # Not ASCII
('14_30_15', ValueError), # Invalid separators
('1430:15', ValueError), # Inconsistent separator use
('25', ValueError), # Invalid hours
('25:15', ValueError), # Invalid hours
('14:60', ValueError), # Invalid minutes
('14:59:61', ValueError), # Invalid seconds
('14:30:15.34468305:00', ValueError), # No sign in time zone
('14:30:15+', ValueError), # Time zone too short
('14:30:15+1234567', ValueError), # Time zone invalid
('14:59:59+25:00', ValueError), # Invalid tz hours
('14:59:59+12:62', ValueError), # Invalid tz minutes
('14:59:30_344583', ValueError), # Invalid microsecond separator
('24:01', ValueError), # 24 used for non-midnight time
('24:00:01', ValueError), # 24 used for non-midnight time
('24:00:00.001', ValueError), # 24 used for non-midnight time
('24:00:00.000001', ValueError), # 24 used for non-midnight time
])
def test_isotime_raises(isostr, exception):
iparser = isoparser()
with pytest.raises(exception):
iparser.parse_isotime(isostr)

View file

@ -1,964 +0,0 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import itertools
from datetime import datetime, timedelta
import unittest
import sys
from dateutil import tz
from dateutil.tz import tzoffset
from dateutil.parser import parse, parserinfo
from dateutil.parser import ParserError
from dateutil.parser import UnknownTimezoneWarning
from ._common import TZEnvContext
from six import assertRaisesRegex, PY2
from io import StringIO
import pytest
# Platform info
IS_WIN = sys.platform.startswith('win')
PLATFORM_HAS_DASH_D = False
try:
if datetime.now().strftime('%-d'):
PLATFORM_HAS_DASH_D = True
except ValueError:
pass
@pytest.fixture(params=[True, False])
def fuzzy(request):
"""Fixture to pass fuzzy=True or fuzzy=False to parse"""
return request.param
# Parser test cases using no keyword arguments. Format: (parsable_text, expected_datetime, assertion_message)
PARSER_TEST_CASES = [
("Thu Sep 25 10:36:28 2003", datetime(2003, 9, 25, 10, 36, 28), "date command format strip"),
("Thu Sep 25 2003", datetime(2003, 9, 25), "date command format strip"),
("2003-09-25T10:49:41", datetime(2003, 9, 25, 10, 49, 41), "iso format strip"),
("2003-09-25T10:49", datetime(2003, 9, 25, 10, 49), "iso format strip"),
("2003-09-25T10", datetime(2003, 9, 25, 10), "iso format strip"),
("2003-09-25", datetime(2003, 9, 25), "iso format strip"),
("20030925T104941", datetime(2003, 9, 25, 10, 49, 41), "iso stripped format strip"),
("20030925T1049", datetime(2003, 9, 25, 10, 49, 0), "iso stripped format strip"),
("20030925T10", datetime(2003, 9, 25, 10), "iso stripped format strip"),
("20030925", datetime(2003, 9, 25), "iso stripped format strip"),
("2003-09-25 10:49:41,502", datetime(2003, 9, 25, 10, 49, 41, 502000), "python logger format"),
("199709020908", datetime(1997, 9, 2, 9, 8), "no separator"),
("19970902090807", datetime(1997, 9, 2, 9, 8, 7), "no separator"),
("09-25-2003", datetime(2003, 9, 25), "date with dash"),
("25-09-2003", datetime(2003, 9, 25), "date with dash"),
("10-09-2003", datetime(2003, 10, 9), "date with dash"),
("10-09-03", datetime(2003, 10, 9), "date with dash"),
("2003.09.25", datetime(2003, 9, 25), "date with dot"),
("09.25.2003", datetime(2003, 9, 25), "date with dot"),
("25.09.2003", datetime(2003, 9, 25), "date with dot"),
("10.09.2003", datetime(2003, 10, 9), "date with dot"),
("10.09.03", datetime(2003, 10, 9), "date with dot"),
("2003/09/25", datetime(2003, 9, 25), "date with slash"),
("09/25/2003", datetime(2003, 9, 25), "date with slash"),
("25/09/2003", datetime(2003, 9, 25), "date with slash"),
("10/09/2003", datetime(2003, 10, 9), "date with slash"),
("10/09/03", datetime(2003, 10, 9), "date with slash"),
("2003 09 25", datetime(2003, 9, 25), "date with space"),
("09 25 2003", datetime(2003, 9, 25), "date with space"),
("25 09 2003", datetime(2003, 9, 25), "date with space"),
("10 09 2003", datetime(2003, 10, 9), "date with space"),
("10 09 03", datetime(2003, 10, 9), "date with space"),
("25 09 03", datetime(2003, 9, 25), "date with space"),
("03 25 Sep", datetime(2003, 9, 25), "strangely ordered date"),
("25 03 Sep", datetime(2025, 9, 3), "strangely ordered date"),
(" July 4 , 1976 12:01:02 am ", datetime(1976, 7, 4, 0, 1, 2), "extra space"),
("Wed, July 10, '96", datetime(1996, 7, 10, 0, 0), "random format"),
("1996.July.10 AD 12:08 PM", datetime(1996, 7, 10, 12, 8), "random format"),
("July 4, 1976", datetime(1976, 7, 4), "random format"),
("7 4 1976", datetime(1976, 7, 4), "random format"),
("4 jul 1976", datetime(1976, 7, 4), "random format"),
("4 Jul 1976", datetime(1976, 7, 4), "'%-d %b %Y' format"),
("7-4-76", datetime(1976, 7, 4), "random format"),
("19760704", datetime(1976, 7, 4), "random format"),
("0:01:02 on July 4, 1976", datetime(1976, 7, 4, 0, 1, 2), "random format"),
("July 4, 1976 12:01:02 am", datetime(1976, 7, 4, 0, 1, 2), "random format"),
("Mon Jan 2 04:24:27 1995", datetime(1995, 1, 2, 4, 24, 27), "random format"),
("04.04.95 00:22", datetime(1995, 4, 4, 0, 22), "random format"),
("Jan 1 1999 11:23:34.578", datetime(1999, 1, 1, 11, 23, 34, 578000), "random format"),
("950404 122212", datetime(1995, 4, 4, 12, 22, 12), "random format"),
("3rd of May 2001", datetime(2001, 5, 3), "random format"),
("5th of March 2001", datetime(2001, 3, 5), "random format"),
("1st of May 2003", datetime(2003, 5, 1), "random format"),
('0099-01-01T00:00:00', datetime(99, 1, 1, 0, 0), "99 ad"),
('0031-01-01T00:00:00', datetime(31, 1, 1, 0, 0), "31 ad"),
("20080227T21:26:01.123456789", datetime(2008, 2, 27, 21, 26, 1, 123456), "high precision seconds"),
('13NOV2017', datetime(2017, 11, 13), "dBY (See GH360)"),
('0003-03-04', datetime(3, 3, 4), "pre 12 year same month (See GH PR #293)"),
('December.0031.30', datetime(31, 12, 30), "BYd corner case (GH#687)"),
# Cases with legacy h/m/s format, candidates for deprecation (GH#886)
("2016-12-21 04.2h", datetime(2016, 12, 21, 4, 12), "Fractional Hours"),
]
# Check that we don't have any duplicates
assert len(set([x[0] for x in PARSER_TEST_CASES])) == len(PARSER_TEST_CASES)
@pytest.mark.parametrize("parsable_text,expected_datetime,assertion_message", PARSER_TEST_CASES)
def test_parser(parsable_text, expected_datetime, assertion_message):
assert parse(parsable_text) == expected_datetime, assertion_message
# Parser test cases using datetime(2003, 9, 25) as a default.
# Format: (parsable_text, expected_datetime, assertion_message)
PARSER_DEFAULT_TEST_CASES = [
("Thu Sep 25 10:36:28", datetime(2003, 9, 25, 10, 36, 28), "date command format strip"),
("Thu Sep 10:36:28", datetime(2003, 9, 25, 10, 36, 28), "date command format strip"),
("Thu 10:36:28", datetime(2003, 9, 25, 10, 36, 28), "date command format strip"),
("Sep 10:36:28", datetime(2003, 9, 25, 10, 36, 28), "date command format strip"),
("10:36:28", datetime(2003, 9, 25, 10, 36, 28), "date command format strip"),
("10:36", datetime(2003, 9, 25, 10, 36), "date command format strip"),
("Sep 2003", datetime(2003, 9, 25), "date command format strip"),
("Sep", datetime(2003, 9, 25), "date command format strip"),
("2003", datetime(2003, 9, 25), "date command format strip"),
("10h36m28.5s", datetime(2003, 9, 25, 10, 36, 28, 500000), "hour with letters"),
("10h36m28s", datetime(2003, 9, 25, 10, 36, 28), "hour with letters strip"),
("10h36m", datetime(2003, 9, 25, 10, 36), "hour with letters strip"),
("10h", datetime(2003, 9, 25, 10), "hour with letters strip"),
("10 h 36", datetime(2003, 9, 25, 10, 36), "hour with letters strip"),
("10 h 36.5", datetime(2003, 9, 25, 10, 36, 30), "hour with letter strip"),
("36 m 5", datetime(2003, 9, 25, 0, 36, 5), "hour with letters spaces"),
("36 m 5 s", datetime(2003, 9, 25, 0, 36, 5), "minute with letters spaces"),
("36 m 05", datetime(2003, 9, 25, 0, 36, 5), "minute with letters spaces"),
("36 m 05 s", datetime(2003, 9, 25, 0, 36, 5), "minutes with letters spaces"),
("10h am", datetime(2003, 9, 25, 10), "hour am pm"),
("10h pm", datetime(2003, 9, 25, 22), "hour am pm"),
("10am", datetime(2003, 9, 25, 10), "hour am pm"),
("10pm", datetime(2003, 9, 25, 22), "hour am pm"),
("10:00 am", datetime(2003, 9, 25, 10), "hour am pm"),
("10:00 pm", datetime(2003, 9, 25, 22), "hour am pm"),
("10:00am", datetime(2003, 9, 25, 10), "hour am pm"),
("10:00pm", datetime(2003, 9, 25, 22), "hour am pm"),
("10:00a.m", datetime(2003, 9, 25, 10), "hour am pm"),
("10:00p.m", datetime(2003, 9, 25, 22), "hour am pm"),
("10:00a.m.", datetime(2003, 9, 25, 10), "hour am pm"),
("10:00p.m.", datetime(2003, 9, 25, 22), "hour am pm"),
("Wed", datetime(2003, 10, 1), "weekday alone"),
("Wednesday", datetime(2003, 10, 1), "long weekday"),
("October", datetime(2003, 10, 25), "long month"),
("31-Dec-00", datetime(2000, 12, 31), "zero year"),
("0:01:02", datetime(2003, 9, 25, 0, 1, 2), "random format"),
("12h 01m02s am", datetime(2003, 9, 25, 0, 1, 2), "random format"),
("12:08 PM", datetime(2003, 9, 25, 12, 8), "random format"),
("01h02m03", datetime(2003, 9, 25, 1, 2, 3), "random format"),
("01h02", datetime(2003, 9, 25, 1, 2), "random format"),
("01h02s", datetime(2003, 9, 25, 1, 0, 2), "random format"),
("01m02", datetime(2003, 9, 25, 0, 1, 2), "random format"),
("01m02h", datetime(2003, 9, 25, 2, 1), "random format"),
("2004 10 Apr 11h30m", datetime(2004, 4, 10, 11, 30), "random format")
]
# Check that we don't have any duplicates
assert len(set([x[0] for x in PARSER_DEFAULT_TEST_CASES])) == len(PARSER_DEFAULT_TEST_CASES)
@pytest.mark.parametrize("parsable_text,expected_datetime,assertion_message", PARSER_DEFAULT_TEST_CASES)
def test_parser_default(parsable_text, expected_datetime, assertion_message):
assert parse(parsable_text, default=datetime(2003, 9, 25)) == expected_datetime, assertion_message
@pytest.mark.parametrize('sep', ['-', '.', '/', ' '])
def test_parse_dayfirst(sep):
expected = datetime(2003, 9, 10)
fmt = sep.join(['%d', '%m', '%Y'])
dstr = expected.strftime(fmt)
result = parse(dstr, dayfirst=True)
assert result == expected
@pytest.mark.parametrize('sep', ['-', '.', '/', ' '])
def test_parse_yearfirst(sep):
expected = datetime(2010, 9, 3)
fmt = sep.join(['%Y', '%m', '%d'])
dstr = expected.strftime(fmt)
result = parse(dstr, yearfirst=True)
assert result == expected
@pytest.mark.parametrize('dstr,expected', [
("Thu Sep 25 10:36:28 BRST 2003", datetime(2003, 9, 25, 10, 36, 28)),
("1996.07.10 AD at 15:08:56 PDT", datetime(1996, 7, 10, 15, 8, 56)),
("Tuesday, April 12, 1952 AD 3:30:42pm PST",
datetime(1952, 4, 12, 15, 30, 42)),
("November 5, 1994, 8:15:30 am EST", datetime(1994, 11, 5, 8, 15, 30)),
("1994-11-05T08:15:30-05:00", datetime(1994, 11, 5, 8, 15, 30)),
("1994-11-05T08:15:30Z", datetime(1994, 11, 5, 8, 15, 30)),
("1976-07-04T00:01:02Z", datetime(1976, 7, 4, 0, 1, 2)),
("1986-07-05T08:15:30z", datetime(1986, 7, 5, 8, 15, 30)),
("Tue Apr 4 00:22:12 PDT 1995", datetime(1995, 4, 4, 0, 22, 12)),
])
def test_parse_ignoretz(dstr, expected):
result = parse(dstr, ignoretz=True)
assert result == expected
_brsttz = tzoffset("BRST", -10800)
@pytest.mark.parametrize('dstr,expected', [
("20030925T104941-0300",
datetime(2003, 9, 25, 10, 49, 41, tzinfo=_brsttz)),
("Thu, 25 Sep 2003 10:49:41 -0300",
datetime(2003, 9, 25, 10, 49, 41, tzinfo=_brsttz)),
("2003-09-25T10:49:41.5-03:00",
datetime(2003, 9, 25, 10, 49, 41, 500000, tzinfo=_brsttz)),
("2003-09-25T10:49:41-03:00",
datetime(2003, 9, 25, 10, 49, 41, tzinfo=_brsttz)),
("20030925T104941.5-0300",
datetime(2003, 9, 25, 10, 49, 41, 500000, tzinfo=_brsttz)),
])
def test_parse_with_tzoffset(dstr, expected):
# In these cases, we are _not_ passing a tzinfos arg
result = parse(dstr)
assert result == expected
class TestFormat(object):
def test_ybd(self):
# If we have a 4-digit year, a non-numeric month (abbreviated or not),
# and a day (1 or 2 digits), then there is no ambiguity as to which
# token is a year/month/day. This holds regardless of what order the
# terms are in and for each of the separators below.
seps = ['-', ' ', '/', '.']
year_tokens = ['%Y']
month_tokens = ['%b', '%B']
day_tokens = ['%d']
if PLATFORM_HAS_DASH_D:
day_tokens.append('%-d')
prods = itertools.product(year_tokens, month_tokens, day_tokens)
perms = [y for x in prods for y in itertools.permutations(x)]
unambig_fmts = [sep.join(perm) for sep in seps for perm in perms]
actual = datetime(2003, 9, 25)
for fmt in unambig_fmts:
dstr = actual.strftime(fmt)
res = parse(dstr)
assert res == actual
# TODO: some redundancy with PARSER_TEST_CASES cases
@pytest.mark.parametrize("fmt,dstr", [
("%a %b %d %Y", "Thu Sep 25 2003"),
("%b %d %Y", "Sep 25 2003"),
("%Y-%m-%d", "2003-09-25"),
("%Y%m%d", "20030925"),
("%Y-%b-%d", "2003-Sep-25"),
("%d-%b-%Y", "25-Sep-2003"),
("%b-%d-%Y", "Sep-25-2003"),
("%m-%d-%Y", "09-25-2003"),
("%d-%m-%Y", "25-09-2003"),
("%Y.%m.%d", "2003.09.25"),
("%Y.%b.%d", "2003.Sep.25"),
("%d.%b.%Y", "25.Sep.2003"),
("%b.%d.%Y", "Sep.25.2003"),
("%m.%d.%Y", "09.25.2003"),
("%d.%m.%Y", "25.09.2003"),
("%Y/%m/%d", "2003/09/25"),
("%Y/%b/%d", "2003/Sep/25"),
("%d/%b/%Y", "25/Sep/2003"),
("%b/%d/%Y", "Sep/25/2003"),
("%m/%d/%Y", "09/25/2003"),
("%d/%m/%Y", "25/09/2003"),
("%Y %m %d", "2003 09 25"),
("%Y %b %d", "2003 Sep 25"),
("%d %b %Y", "25 Sep 2003"),
("%m %d %Y", "09 25 2003"),
("%d %m %Y", "25 09 2003"),
("%y %d %b", "03 25 Sep",),
])
def test_strftime_formats_2003Sep25(self, fmt, dstr):
expected = datetime(2003, 9, 25)
# First check that the format strings behave as expected
# (not strictly necessary, but nice to have)
assert expected.strftime(fmt) == dstr
res = parse(dstr)
assert res == expected
class TestInputTypes(object):
def test_empty_string_invalid(self):
with pytest.raises(ParserError):
parse('')
def test_none_invalid(self):
with pytest.raises(TypeError):
parse(None)
def test_int_invalid(self):
with pytest.raises(TypeError):
parse(13)
def test_duck_typing(self):
# We want to support arbitrary classes that implement the stream
# interface.
class StringPassThrough(object):
def __init__(self, stream):
self.stream = stream
def read(self, *args, **kwargs):
return self.stream.read(*args, **kwargs)
dstr = StringPassThrough(StringIO('2014 January 19'))
res = parse(dstr)
expected = datetime(2014, 1, 19)
assert res == expected
def test_parse_stream(self):
dstr = StringIO('2014 January 19')
res = parse(dstr)
expected = datetime(2014, 1, 19)
assert res == expected
def test_parse_str(self):
# Parser should be able to handle bytestring and unicode
uni_str = '2014-05-01 08:00:00'
bytes_str = uni_str.encode()
res = parse(bytes_str)
expected = parse(uni_str)
assert res == expected
def test_parse_bytes(self):
res = parse(b'2014 January 19')
expected = datetime(2014, 1, 19)
assert res == expected
def test_parse_bytearray(self):
# GH#417
res = parse(bytearray(b'2014 January 19'))
expected = datetime(2014, 1, 19)
assert res == expected
class TestTzinfoInputTypes(object):
def assert_equal_same_tz(self, dt1, dt2):
assert dt1 == dt2
assert dt1.tzinfo is dt2.tzinfo
def test_tzinfo_dict_could_return_none(self):
dstr = "2017-02-03 12:40 BRST"
result = parse(dstr, tzinfos={"BRST": None})
expected = datetime(2017, 2, 3, 12, 40)
self.assert_equal_same_tz(result, expected)
def test_tzinfos_callable_could_return_none(self):
dstr = "2017-02-03 12:40 BRST"
result = parse(dstr, tzinfos=lambda *args: None)
expected = datetime(2017, 2, 3, 12, 40)
self.assert_equal_same_tz(result, expected)
def test_invalid_tzinfo_input(self):
dstr = "2014 January 19 09:00 UTC"
# Pass an absurd tzinfos object
tzinfos = {"UTC": ValueError}
with pytest.raises(TypeError):
parse(dstr, tzinfos=tzinfos)
def test_valid_tzinfo_tzinfo_input(self):
dstr = "2014 January 19 09:00 UTC"
tzinfos = {"UTC": tz.UTC}
expected = datetime(2014, 1, 19, 9, tzinfo=tz.UTC)
res = parse(dstr, tzinfos=tzinfos)
self.assert_equal_same_tz(res, expected)
def test_valid_tzinfo_unicode_input(self):
dstr = "2014 January 19 09:00 UTC"
tzinfos = {u"UTC": u"UTC+0"}
expected = datetime(2014, 1, 19, 9, tzinfo=tz.tzstr("UTC+0"))
res = parse(dstr, tzinfos=tzinfos)
self.assert_equal_same_tz(res, expected)
def test_valid_tzinfo_callable_input(self):
dstr = "2014 January 19 09:00 UTC"
def tzinfos(*args, **kwargs):
return u"UTC+0"
expected = datetime(2014, 1, 19, 9, tzinfo=tz.tzstr("UTC+0"))
res = parse(dstr, tzinfos=tzinfos)
self.assert_equal_same_tz(res, expected)
def test_valid_tzinfo_int_input(self):
dstr = "2014 January 19 09:00 UTC"
tzinfos = {u"UTC": -28800}
expected = datetime(2014, 1, 19, 9, tzinfo=tz.tzoffset(u"UTC", -28800))
res = parse(dstr, tzinfos=tzinfos)
self.assert_equal_same_tz(res, expected)
class ParserTest(unittest.TestCase):
@classmethod
def setup_class(cls):
cls.tzinfos = {"BRST": -10800}
cls.brsttz = tzoffset("BRST", -10800)
cls.default = datetime(2003, 9, 25)
# Parser should be able to handle bytestring and unicode
cls.uni_str = '2014-05-01 08:00:00'
cls.str_str = cls.uni_str.encode()
def testParserParseStr(self):
from dateutil.parser import parser
assert parser().parse(self.str_str) == parser().parse(self.uni_str)
def testParseUnicodeWords(self):
class rus_parserinfo(parserinfo):
MONTHS = [("янв", "Январь"),
("фев", "Февраль"),
("мар", "Март"),
("апр", "Апрель"),
("май", "Май"),
("июн", "Июнь"),
("июл", "Июль"),
("авг", "Август"),
("сен", "Сентябрь"),
("окт", "Октябрь"),
("ноя", "Ноябрь"),
("дек", "Декабрь")]
expected = datetime(2015, 9, 10, 10, 20)
res = parse('10 Сентябрь 2015 10:20', parserinfo=rus_parserinfo())
assert res == expected
def testParseWithNulls(self):
# This relies on the from __future__ import unicode_literals, because
# explicitly specifying a unicode literal is a syntax error in Py 3.2
# May want to switch to u'...' if we ever drop Python 3.2 support.
pstring = '\x00\x00August 29, 1924'
assert parse(pstring) == datetime(1924, 8, 29)
def testDateCommandFormat(self):
self.assertEqual(parse("Thu Sep 25 10:36:28 BRST 2003",
tzinfos=self.tzinfos),
datetime(2003, 9, 25, 10, 36, 28,
tzinfo=self.brsttz))
def testDateCommandFormatReversed(self):
self.assertEqual(parse("2003 10:36:28 BRST 25 Sep Thu",
tzinfos=self.tzinfos),
datetime(2003, 9, 25, 10, 36, 28,
tzinfo=self.brsttz))
def testDateCommandFormatWithLong(self):
if PY2:
self.assertEqual(parse("Thu Sep 25 10:36:28 BRST 2003",
tzinfos={"BRST": long(-10800)}),
datetime(2003, 9, 25, 10, 36, 28,
tzinfo=self.brsttz))
def testISOFormatStrip2(self):
self.assertEqual(parse("2003-09-25T10:49:41+03:00"),
datetime(2003, 9, 25, 10, 49, 41,
tzinfo=tzoffset(None, 10800)))
def testISOStrippedFormatStrip2(self):
self.assertEqual(parse("20030925T104941+0300"),
datetime(2003, 9, 25, 10, 49, 41,
tzinfo=tzoffset(None, 10800)))
def testAMPMNoHour(self):
with pytest.raises(ParserError):
parse("AM")
with pytest.raises(ParserError):
parse("Jan 20, 2015 PM")
def testAMPMRange(self):
with pytest.raises(ParserError):
parse("13:44 AM")
with pytest.raises(ParserError):
parse("January 25, 1921 23:13 PM")
def testPertain(self):
self.assertEqual(parse("Sep 03", default=self.default),
datetime(2003, 9, 3))
self.assertEqual(parse("Sep of 03", default=self.default),
datetime(2003, 9, 25))
def testFuzzy(self):
s = "Today is 25 of September of 2003, exactly " \
"at 10:49:41 with timezone -03:00."
self.assertEqual(parse(s, fuzzy=True),
datetime(2003, 9, 25, 10, 49, 41,
tzinfo=self.brsttz))
def testFuzzyWithTokens(self):
s1 = "Today is 25 of September of 2003, exactly " \
"at 10:49:41 with timezone -03:00."
self.assertEqual(parse(s1, fuzzy_with_tokens=True),
(datetime(2003, 9, 25, 10, 49, 41,
tzinfo=self.brsttz),
('Today is ', 'of ', ', exactly at ',
' with timezone ', '.')))
s2 = "http://biz.yahoo.com/ipo/p/600221.html"
self.assertEqual(parse(s2, fuzzy_with_tokens=True),
(datetime(2060, 2, 21, 0, 0, 0),
('http://biz.yahoo.com/ipo/p/', '.html')))
def testFuzzyAMPMProblem(self):
# Sometimes fuzzy parsing results in AM/PM flag being set without
# hours - if it's fuzzy it should ignore that.
s1 = "I have a meeting on March 1, 1974."
s2 = "On June 8th, 2020, I am going to be the first man on Mars"
# Also don't want any erroneous AM or PMs changing the parsed time
s3 = "Meet me at the AM/PM on Sunset at 3:00 AM on December 3rd, 2003"
s4 = "Meet me at 3:00AM on December 3rd, 2003 at the AM/PM on Sunset"
self.assertEqual(parse(s1, fuzzy=True), datetime(1974, 3, 1))
self.assertEqual(parse(s2, fuzzy=True), datetime(2020, 6, 8))
self.assertEqual(parse(s3, fuzzy=True), datetime(2003, 12, 3, 3))
self.assertEqual(parse(s4, fuzzy=True), datetime(2003, 12, 3, 3))
def testFuzzyIgnoreAMPM(self):
s1 = "Jan 29, 1945 14:45 AM I going to see you there?"
with pytest.warns(UnknownTimezoneWarning):
res = parse(s1, fuzzy=True)
self.assertEqual(res, datetime(1945, 1, 29, 14, 45))
def testRandomFormat24(self):
self.assertEqual(parse("0:00 PM, PST", default=self.default,
ignoretz=True),
datetime(2003, 9, 25, 12, 0))
def testRandomFormat26(self):
with pytest.warns(UnknownTimezoneWarning):
res = parse("5:50 A.M. on June 13, 1990")
self.assertEqual(res, datetime(1990, 6, 13, 5, 50))
def testUnspecifiedDayFallback(self):
# Test that for an unspecified day, the fallback behavior is correct.
self.assertEqual(parse("April 2009", default=datetime(2010, 1, 31)),
datetime(2009, 4, 30))
def testUnspecifiedDayFallbackFebNoLeapYear(self):
self.assertEqual(parse("Feb 2007", default=datetime(2010, 1, 31)),
datetime(2007, 2, 28))
def testUnspecifiedDayFallbackFebLeapYear(self):
self.assertEqual(parse("Feb 2008", default=datetime(2010, 1, 31)),
datetime(2008, 2, 29))
def testErrorType01(self):
with pytest.raises(ParserError):
parse('shouldfail')
def testCorrectErrorOnFuzzyWithTokens(self):
assertRaisesRegex(self, ParserError, 'Unknown string format',
parse, '04/04/32/423', fuzzy_with_tokens=True)
assertRaisesRegex(self, ParserError, 'Unknown string format',
parse, '04/04/04 +32423', fuzzy_with_tokens=True)
assertRaisesRegex(self, ParserError, 'Unknown string format',
parse, '04/04/0d4', fuzzy_with_tokens=True)
def testIncreasingCTime(self):
# This test will check 200 different years, every month, every day,
# every hour, every minute, every second, and every weekday, using
# a delta of more or less 1 year, 1 month, 1 day, 1 minute and
# 1 second.
delta = timedelta(days=365+31+1, seconds=1+60+60*60)
dt = datetime(1900, 1, 1, 0, 0, 0, 0)
for i in range(200):
assert parse(dt.ctime()) == dt
dt += delta
def testIncreasingISOFormat(self):
delta = timedelta(days=365+31+1, seconds=1+60+60*60)
dt = datetime(1900, 1, 1, 0, 0, 0, 0)
for i in range(200):
assert parse(dt.isoformat()) == dt
dt += delta
def testMicrosecondsPrecisionError(self):
# Skip found out that sad precision problem. :-(
dt1 = parse("00:11:25.01")
dt2 = parse("00:12:10.01")
assert dt1.microsecond == 10000
assert dt2.microsecond == 10000
def testMicrosecondPrecisionErrorReturns(self):
# One more precision issue, discovered by Eric Brown. This should
# be the last one, as we're no longer using floating points.
for ms in [100001, 100000, 99999, 99998,
10001, 10000, 9999, 9998,
1001, 1000, 999, 998,
101, 100, 99, 98]:
dt = datetime(2008, 2, 27, 21, 26, 1, ms)
assert parse(dt.isoformat()) == dt
def testCustomParserInfo(self):
# Custom parser info wasn't working, as Michael Elsdörfer discovered.
from dateutil.parser import parserinfo, parser
class myparserinfo(parserinfo):
MONTHS = parserinfo.MONTHS[:]
MONTHS[0] = ("Foo", "Foo")
myparser = parser(myparserinfo())
dt = myparser.parse("01/Foo/2007")
assert dt == datetime(2007, 1, 1)
def testCustomParserShortDaynames(self):
# Horacio Hoyos discovered that day names shorter than 3 characters,
# for example two letter German day name abbreviations, don't work:
# https://github.com/dateutil/dateutil/issues/343
from dateutil.parser import parserinfo, parser
class GermanParserInfo(parserinfo):
WEEKDAYS = [("Mo", "Montag"),
("Di", "Dienstag"),
("Mi", "Mittwoch"),
("Do", "Donnerstag"),
("Fr", "Freitag"),
("Sa", "Samstag"),
("So", "Sonntag")]
myparser = parser(GermanParserInfo())
dt = myparser.parse("Sa 21. Jan 2017")
self.assertEqual(dt, datetime(2017, 1, 21))
def testNoYearFirstNoDayFirst(self):
dtstr = '090107'
# Should be MMDDYY
self.assertEqual(parse(dtstr),
datetime(2007, 9, 1))
self.assertEqual(parse(dtstr, yearfirst=False, dayfirst=False),
datetime(2007, 9, 1))
def testYearFirst(self):
dtstr = '090107'
# Should be MMDDYY
self.assertEqual(parse(dtstr, yearfirst=True),
datetime(2009, 1, 7))
self.assertEqual(parse(dtstr, yearfirst=True, dayfirst=False),
datetime(2009, 1, 7))
def testDayFirst(self):
dtstr = '090107'
# Should be DDMMYY
self.assertEqual(parse(dtstr, dayfirst=True),
datetime(2007, 1, 9))
self.assertEqual(parse(dtstr, yearfirst=False, dayfirst=True),
datetime(2007, 1, 9))
def testDayFirstYearFirst(self):
dtstr = '090107'
# Should be YYDDMM
self.assertEqual(parse(dtstr, yearfirst=True, dayfirst=True),
datetime(2009, 7, 1))
def testUnambiguousYearFirst(self):
dtstr = '2015 09 25'
self.assertEqual(parse(dtstr, yearfirst=True),
datetime(2015, 9, 25))
def testUnambiguousDayFirst(self):
dtstr = '2015 09 25'
self.assertEqual(parse(dtstr, dayfirst=True),
datetime(2015, 9, 25))
def testUnambiguousDayFirstYearFirst(self):
dtstr = '2015 09 25'
self.assertEqual(parse(dtstr, dayfirst=True, yearfirst=True),
datetime(2015, 9, 25))
def test_mstridx(self):
# See GH408
dtstr = '2015-15-May'
self.assertEqual(parse(dtstr),
datetime(2015, 5, 15))
def test_idx_check(self):
dtstr = '2017-07-17 06:15:'
# Pre-PR, the trailing colon will cause an IndexError at 824-825
# when checking `i < len_l` and then accessing `l[i+1]`
res = parse(dtstr, fuzzy=True)
assert res == datetime(2017, 7, 17, 6, 15)
def test_hmBY(self):
# See GH#483
dtstr = '02:17NOV2017'
res = parse(dtstr, default=self.default)
assert res == datetime(2017, 11, self.default.day, 2, 17)
def test_validate_hour(self):
# See GH353
invalid = "201A-01-01T23:58:39.239769+03:00"
with pytest.raises(ParserError):
parse(invalid)
def test_era_trailing_year(self):
dstr = 'AD2001'
res = parse(dstr)
assert res.year == 2001, res
def test_includes_timestr(self):
timestr = "2020-13-97T44:61:83"
try:
parse(timestr)
except ParserError as e:
assert e.args[1] == timestr
else:
pytest.fail("Failed to raise ParserError")
class TestOutOfBounds(object):
def test_no_year_zero(self):
with pytest.raises(ParserError):
parse("0000 Jun 20")
def test_out_of_bound_day(self):
with pytest.raises(ParserError):
parse("Feb 30, 2007")
def test_illegal_month_error(self):
with pytest.raises(ParserError):
parse("0-100")
def test_day_sanity(self, fuzzy):
dstr = "2014-15-25"
with pytest.raises(ParserError):
parse(dstr, fuzzy=fuzzy)
def test_minute_sanity(self, fuzzy):
dstr = "2014-02-28 22:64"
with pytest.raises(ParserError):
parse(dstr, fuzzy=fuzzy)
def test_hour_sanity(self, fuzzy):
dstr = "2014-02-28 25:16 PM"
with pytest.raises(ParserError):
parse(dstr, fuzzy=fuzzy)
def test_second_sanity(self, fuzzy):
dstr = "2014-02-28 22:14:64"
with pytest.raises(ParserError):
parse(dstr, fuzzy=fuzzy)
class TestParseUnimplementedCases(object):
@pytest.mark.xfail
def test_somewhat_ambiguous_string(self):
# Ref: github issue #487
# The parser is choosing the wrong part for hour
# causing datetime to raise an exception.
dtstr = '1237 PM BRST Mon Oct 30 2017'
res = parse(dtstr, tzinfo=self.tzinfos)
assert res == datetime(2017, 10, 30, 12, 37, tzinfo=self.tzinfos)
@pytest.mark.xfail
def test_YmdH_M_S(self):
# found in nasdaq's ftp data
dstr = '1991041310:19:24'
expected = datetime(1991, 4, 13, 10, 19, 24)
res = parse(dstr)
assert res == expected, (res, expected)
@pytest.mark.xfail
def test_first_century(self):
dstr = '0031 Nov 03'
expected = datetime(31, 11, 3)
res = parse(dstr)
assert res == expected, res
@pytest.mark.xfail
def test_era_trailing_year_with_dots(self):
dstr = 'A.D.2001'
res = parse(dstr)
assert res.year == 2001, res
@pytest.mark.xfail
def test_ad_nospace(self):
expected = datetime(6, 5, 19)
for dstr in [' 6AD May 19', ' 06AD May 19',
' 006AD May 19', ' 0006AD May 19']:
res = parse(dstr)
assert res == expected, (dstr, res)
@pytest.mark.xfail
def test_four_letter_day(self):
dstr = 'Frid Dec 30, 2016'
expected = datetime(2016, 12, 30)
res = parse(dstr)
assert res == expected
@pytest.mark.xfail
def test_non_date_number(self):
dstr = '1,700'
with pytest.raises(ParserError):
parse(dstr)
@pytest.mark.xfail
def test_on_era(self):
# This could be classified as an "eras" test, but the relevant part
# about this is the ` on `
dstr = '2:15 PM on January 2nd 1973 A.D.'
expected = datetime(1973, 1, 2, 14, 15)
res = parse(dstr)
assert res == expected
@pytest.mark.xfail
def test_extraneous_year(self):
# This was found in the wild at insidertrading.org
dstr = "2011 MARTIN CHILDREN'S IRREVOCABLE TRUST u/a/d NOVEMBER 7, 2012"
res = parse(dstr, fuzzy_with_tokens=True)
expected = datetime(2012, 11, 7)
assert res == expected
@pytest.mark.xfail
def test_extraneous_year_tokens(self):
# This was found in the wild at insidertrading.org
# Unlike in the case above, identifying the first "2012" as the year
# would not be a problem, but inferring that the latter 2012 is hhmm
# is a problem.
dstr = "2012 MARTIN CHILDREN'S IRREVOCABLE TRUST u/a/d NOVEMBER 7, 2012"
expected = datetime(2012, 11, 7)
(res, tokens) = parse(dstr, fuzzy_with_tokens=True)
assert res == expected
assert tokens == ("2012 MARTIN CHILDREN'S IRREVOCABLE TRUST u/a/d ",)
@pytest.mark.xfail
def test_extraneous_year2(self):
# This was found in the wild at insidertrading.org
dstr = ("Berylson Amy Smith 1998 Grantor Retained Annuity Trust "
"u/d/t November 2, 1998 f/b/o Jennifer L Berylson")
res = parse(dstr, fuzzy_with_tokens=True)
expected = datetime(1998, 11, 2)
assert res == expected
@pytest.mark.xfail
def test_extraneous_year3(self):
# This was found in the wild at insidertrading.org
dstr = "SMITH R & WEISS D 94 CHILD TR FBO M W SMITH UDT 12/1/1994"
res = parse(dstr, fuzzy_with_tokens=True)
expected = datetime(1994, 12, 1)
assert res == expected
@pytest.mark.xfail
def test_unambiguous_YYYYMM(self):
# 171206 can be parsed as YYMMDD. However, 201712 cannot be parsed
# as instance of YYMMDD and parser could fallback to YYYYMM format.
dstr = "201712"
res = parse(dstr)
expected = datetime(2017, 12, 1)
assert res == expected
@pytest.mark.xfail
def test_extraneous_numerical_content(self):
# ref: https://github.com/dateutil/dateutil/issues/1029
# parser interprets price and percentage as parts of the date
dstr = "£14.99 (25% off, until April 20)"
res = parse(dstr, fuzzy=True, default=datetime(2000, 1, 1))
expected = datetime(2000, 4, 20)
assert res == expected
@pytest.mark.skipif(IS_WIN, reason="Windows does not use TZ var")
class TestTZVar(object):
def test_parse_unambiguous_nonexistent_local(self):
# When dates are specified "EST" even when they should be "EDT" in the
# local time zone, we should still assign the local time zone
with TZEnvContext('EST+5EDT,M3.2.0/2,M11.1.0/2'):
dt_exp = datetime(2011, 8, 1, 12, 30, tzinfo=tz.tzlocal())
dt = parse('2011-08-01T12:30 EST')
assert dt.tzname() == 'EDT'
assert dt == dt_exp
def test_tzlocal_in_gmt(self):
# GH #318
with TZEnvContext('GMT0BST,M3.5.0,M10.5.0'):
# This is an imaginary datetime in tz.tzlocal() but should still
# parse using the GMT-as-alias-for-UTC rule
dt = parse('2004-05-01T12:00 GMT')
dt_exp = datetime(2004, 5, 1, 12, tzinfo=tz.UTC)
assert dt == dt_exp
def test_tzlocal_parse_fold(self):
# One manifestion of GH #318
with TZEnvContext('EST+5EDT,M3.2.0/2,M11.1.0/2'):
dt_exp = datetime(2011, 11, 6, 1, 30, tzinfo=tz.tzlocal())
dt_exp = tz.enfold(dt_exp, fold=1)
dt = parse('2011-11-06T01:30 EST')
# Because this is ambiguous, until `tz.tzlocal() is tz.tzlocal()`
# we'll just check the attributes we care about rather than
# dt == dt_exp
assert dt.tzname() == dt_exp.tzname()
assert dt.replace(tzinfo=None) == dt_exp.replace(tzinfo=None)
assert getattr(dt, 'fold') == getattr(dt_exp, 'fold')
assert dt.astimezone(tz.UTC) == dt_exp.astimezone(tz.UTC)
def test_parse_tzinfos_fold():
NYC = tz.gettz('America/New_York')
tzinfos = {'EST': NYC, 'EDT': NYC}
dt_exp = tz.enfold(datetime(2011, 11, 6, 1, 30, tzinfo=NYC), fold=1)
dt = parse('2011-11-06T01:30 EST', tzinfos=tzinfos)
assert dt == dt_exp
assert dt.tzinfo is dt_exp.tzinfo
assert getattr(dt, 'fold') == getattr(dt_exp, 'fold')
assert dt.astimezone(tz.UTC) == dt_exp.astimezone(tz.UTC)
@pytest.mark.parametrize('dtstr,dt', [
('5.6h', datetime(2003, 9, 25, 5, 36)),
('5.6m', datetime(2003, 9, 25, 0, 5, 36)),
# '5.6s' never had a rounding problem, test added for completeness
('5.6s', datetime(2003, 9, 25, 0, 0, 5, 600000))
])
def test_rounding_floatlike_strings(dtstr, dt):
assert parse(dtstr, default=datetime(2003, 9, 25)) == dt
@pytest.mark.parametrize('value', ['1: test', 'Nan'])
def test_decimal_error(value):
# GH 632, GH 662 - decimal.Decimal raises some non-ParserError exception
# when constructed with an invalid value
with pytest.raises(ParserError):
parse(value)
def test_parsererror_repr():
# GH 991 — the __repr__ was not properly indented and so was never defined.
# This tests the current behavior of the ParserError __repr__, but the
# precise format is not guaranteed to be stable and may change even in
# minor versions. This test exists to avoid regressions.
s = repr(ParserError("Problem with string: %s", "2019-01-01"))
assert s == "ParserError('Problem with string: %s', '2019-01-01')"

View file

@ -1,706 +0,0 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from ._common import NotAValue
import calendar
from datetime import datetime, date, timedelta
import unittest
import pytest
from dateutil.relativedelta import relativedelta, MO, TU, WE, FR, SU
class RelativeDeltaTest(unittest.TestCase):
now = datetime(2003, 9, 17, 20, 54, 47, 282310)
today = date(2003, 9, 17)
def testInheritance(self):
# Ensure that relativedelta is inheritance-friendly.
class rdChildClass(relativedelta):
pass
ccRD = rdChildClass(years=1, months=1, days=1, leapdays=1, weeks=1,
hours=1, minutes=1, seconds=1, microseconds=1)
rd = relativedelta(years=1, months=1, days=1, leapdays=1, weeks=1,
hours=1, minutes=1, seconds=1, microseconds=1)
self.assertEqual(type(ccRD + rd), type(ccRD),
msg='Addition does not inherit type.')
self.assertEqual(type(ccRD - rd), type(ccRD),
msg='Subtraction does not inherit type.')
self.assertEqual(type(-ccRD), type(ccRD),
msg='Negation does not inherit type.')
self.assertEqual(type(ccRD * 5.0), type(ccRD),
msg='Multiplication does not inherit type.')
self.assertEqual(type(ccRD / 5.0), type(ccRD),
msg='Division does not inherit type.')
def testMonthEndMonthBeginning(self):
self.assertEqual(relativedelta(datetime(2003, 1, 31, 23, 59, 59),
datetime(2003, 3, 1, 0, 0, 0)),
relativedelta(months=-1, seconds=-1))
self.assertEqual(relativedelta(datetime(2003, 3, 1, 0, 0, 0),
datetime(2003, 1, 31, 23, 59, 59)),
relativedelta(months=1, seconds=1))
def testMonthEndMonthBeginningLeapYear(self):
self.assertEqual(relativedelta(datetime(2012, 1, 31, 23, 59, 59),
datetime(2012, 3, 1, 0, 0, 0)),
relativedelta(months=-1, seconds=-1))
self.assertEqual(relativedelta(datetime(2003, 3, 1, 0, 0, 0),
datetime(2003, 1, 31, 23, 59, 59)),
relativedelta(months=1, seconds=1))
def testNextMonth(self):
self.assertEqual(self.now+relativedelta(months=+1),
datetime(2003, 10, 17, 20, 54, 47, 282310))
def testNextMonthPlusOneWeek(self):
self.assertEqual(self.now+relativedelta(months=+1, weeks=+1),
datetime(2003, 10, 24, 20, 54, 47, 282310))
def testNextMonthPlusOneWeek10am(self):
self.assertEqual(self.today +
relativedelta(months=+1, weeks=+1, hour=10),
datetime(2003, 10, 24, 10, 0))
def testNextMonthPlusOneWeek10amDiff(self):
self.assertEqual(relativedelta(datetime(2003, 10, 24, 10, 0),
self.today),
relativedelta(months=+1, days=+7, hours=+10))
def testOneMonthBeforeOneYear(self):
self.assertEqual(self.now+relativedelta(years=+1, months=-1),
datetime(2004, 8, 17, 20, 54, 47, 282310))
def testMonthsOfDiffNumOfDays(self):
self.assertEqual(date(2003, 1, 27)+relativedelta(months=+1),
date(2003, 2, 27))
self.assertEqual(date(2003, 1, 31)+relativedelta(months=+1),
date(2003, 2, 28))
self.assertEqual(date(2003, 1, 31)+relativedelta(months=+2),
date(2003, 3, 31))
def testMonthsOfDiffNumOfDaysWithYears(self):
self.assertEqual(date(2000, 2, 28)+relativedelta(years=+1),
date(2001, 2, 28))
self.assertEqual(date(2000, 2, 29)+relativedelta(years=+1),
date(2001, 2, 28))
self.assertEqual(date(1999, 2, 28)+relativedelta(years=+1),
date(2000, 2, 28))
self.assertEqual(date(1999, 3, 1)+relativedelta(years=+1),
date(2000, 3, 1))
self.assertEqual(date(1999, 3, 1)+relativedelta(years=+1),
date(2000, 3, 1))
self.assertEqual(date(2001, 2, 28)+relativedelta(years=-1),
date(2000, 2, 28))
self.assertEqual(date(2001, 3, 1)+relativedelta(years=-1),
date(2000, 3, 1))
def testNextFriday(self):
self.assertEqual(self.today+relativedelta(weekday=FR),
date(2003, 9, 19))
def testNextFridayInt(self):
self.assertEqual(self.today+relativedelta(weekday=calendar.FRIDAY),
date(2003, 9, 19))
def testLastFridayInThisMonth(self):
self.assertEqual(self.today+relativedelta(day=31, weekday=FR(-1)),
date(2003, 9, 26))
def testLastDayOfFebruary(self):
self.assertEqual(date(2021, 2, 1) + relativedelta(day=31),
date(2021, 2, 28))
def testLastDayOfFebruaryLeapYear(self):
self.assertEqual(date(2020, 2, 1) + relativedelta(day=31),
date(2020, 2, 29))
def testNextWednesdayIsToday(self):
self.assertEqual(self.today+relativedelta(weekday=WE),
date(2003, 9, 17))
def testNextWednesdayNotToday(self):
self.assertEqual(self.today+relativedelta(days=+1, weekday=WE),
date(2003, 9, 24))
def testAddMoreThan12Months(self):
self.assertEqual(date(2003, 12, 1) + relativedelta(months=+13),
date(2005, 1, 1))
def testAddNegativeMonths(self):
self.assertEqual(date(2003, 1, 1) + relativedelta(months=-2),
date(2002, 11, 1))
def test15thISOYearWeek(self):
self.assertEqual(date(2003, 1, 1) +
relativedelta(day=4, weeks=+14, weekday=MO(-1)),
date(2003, 4, 7))
def testMillenniumAge(self):
self.assertEqual(relativedelta(self.now, date(2001, 1, 1)),
relativedelta(years=+2, months=+8, days=+16,
hours=+20, minutes=+54, seconds=+47,
microseconds=+282310))
def testJohnAge(self):
self.assertEqual(relativedelta(self.now,
datetime(1978, 4, 5, 12, 0)),
relativedelta(years=+25, months=+5, days=+12,
hours=+8, minutes=+54, seconds=+47,
microseconds=+282310))
def testJohnAgeWithDate(self):
self.assertEqual(relativedelta(self.today,
datetime(1978, 4, 5, 12, 0)),
relativedelta(years=+25, months=+5, days=+11,
hours=+12))
def testYearDay(self):
self.assertEqual(date(2003, 1, 1)+relativedelta(yearday=260),
date(2003, 9, 17))
self.assertEqual(date(2002, 1, 1)+relativedelta(yearday=260),
date(2002, 9, 17))
self.assertEqual(date(2000, 1, 1)+relativedelta(yearday=260),
date(2000, 9, 16))
self.assertEqual(self.today+relativedelta(yearday=261),
date(2003, 9, 18))
def testYearDayBug(self):
# Tests a problem reported by Adam Ryan.
self.assertEqual(date(2010, 1, 1)+relativedelta(yearday=15),
date(2010, 1, 15))
def testNonLeapYearDay(self):
self.assertEqual(date(2003, 1, 1)+relativedelta(nlyearday=260),
date(2003, 9, 17))
self.assertEqual(date(2002, 1, 1)+relativedelta(nlyearday=260),
date(2002, 9, 17))
self.assertEqual(date(2000, 1, 1)+relativedelta(nlyearday=260),
date(2000, 9, 17))
self.assertEqual(self.today+relativedelta(yearday=261),
date(2003, 9, 18))
def testAddition(self):
self.assertEqual(relativedelta(days=10) +
relativedelta(years=1, months=2, days=3, hours=4,
minutes=5, microseconds=6),
relativedelta(years=1, months=2, days=13, hours=4,
minutes=5, microseconds=6))
def testAbsoluteAddition(self):
self.assertEqual(relativedelta() + relativedelta(day=0, hour=0),
relativedelta(day=0, hour=0))
self.assertEqual(relativedelta(day=0, hour=0) + relativedelta(),
relativedelta(day=0, hour=0))
def testAdditionToDatetime(self):
self.assertEqual(datetime(2000, 1, 1) + relativedelta(days=1),
datetime(2000, 1, 2))
def testRightAdditionToDatetime(self):
self.assertEqual(relativedelta(days=1) + datetime(2000, 1, 1),
datetime(2000, 1, 2))
def testAdditionInvalidType(self):
with self.assertRaises(TypeError):
relativedelta(days=3) + 9
def testAdditionUnsupportedType(self):
# For unsupported types that define their own comparators, etc.
self.assertIs(relativedelta(days=1) + NotAValue, NotAValue)
def testAdditionFloatValue(self):
self.assertEqual(datetime(2000, 1, 1) + relativedelta(days=float(1)),
datetime(2000, 1, 2))
self.assertEqual(datetime(2000, 1, 1) + relativedelta(months=float(1)),
datetime(2000, 2, 1))
self.assertEqual(datetime(2000, 1, 1) + relativedelta(years=float(1)),
datetime(2001, 1, 1))
def testAdditionFloatFractionals(self):
self.assertEqual(datetime(2000, 1, 1, 0) +
relativedelta(days=float(0.5)),
datetime(2000, 1, 1, 12))
self.assertEqual(datetime(2000, 1, 1, 0, 0) +
relativedelta(hours=float(0.5)),
datetime(2000, 1, 1, 0, 30))
self.assertEqual(datetime(2000, 1, 1, 0, 0, 0) +
relativedelta(minutes=float(0.5)),
datetime(2000, 1, 1, 0, 0, 30))
self.assertEqual(datetime(2000, 1, 1, 0, 0, 0, 0) +
relativedelta(seconds=float(0.5)),
datetime(2000, 1, 1, 0, 0, 0, 500000))
self.assertEqual(datetime(2000, 1, 1, 0, 0, 0, 0) +
relativedelta(microseconds=float(500000.25)),
datetime(2000, 1, 1, 0, 0, 0, 500000))
def testSubtraction(self):
self.assertEqual(relativedelta(days=10) -
relativedelta(years=1, months=2, days=3, hours=4,
minutes=5, microseconds=6),
relativedelta(years=-1, months=-2, days=7, hours=-4,
minutes=-5, microseconds=-6))
def testRightSubtractionFromDatetime(self):
self.assertEqual(datetime(2000, 1, 2) - relativedelta(days=1),
datetime(2000, 1, 1))
def testSubractionWithDatetime(self):
self.assertRaises(TypeError, lambda x, y: x - y,
(relativedelta(days=1), datetime(2000, 1, 1)))
def testSubtractionInvalidType(self):
with self.assertRaises(TypeError):
relativedelta(hours=12) - 14
def testSubtractionUnsupportedType(self):
self.assertIs(relativedelta(days=1) + NotAValue, NotAValue)
def testMultiplication(self):
self.assertEqual(datetime(2000, 1, 1) + relativedelta(days=1) * 28,
datetime(2000, 1, 29))
self.assertEqual(datetime(2000, 1, 1) + 28 * relativedelta(days=1),
datetime(2000, 1, 29))
def testMultiplicationUnsupportedType(self):
self.assertIs(relativedelta(days=1) * NotAValue, NotAValue)
def testDivision(self):
self.assertEqual(datetime(2000, 1, 1) + relativedelta(days=28) / 28,
datetime(2000, 1, 2))
def testDivisionUnsupportedType(self):
self.assertIs(relativedelta(days=1) / NotAValue, NotAValue)
def testBoolean(self):
self.assertFalse(relativedelta(days=0))
self.assertTrue(relativedelta(days=1))
def testAbsoluteValueNegative(self):
rd_base = relativedelta(years=-1, months=-5, days=-2, hours=-3,
minutes=-5, seconds=-2, microseconds=-12)
rd_expected = relativedelta(years=1, months=5, days=2, hours=3,
minutes=5, seconds=2, microseconds=12)
self.assertEqual(abs(rd_base), rd_expected)
def testAbsoluteValuePositive(self):
rd_base = relativedelta(years=1, months=5, days=2, hours=3,
minutes=5, seconds=2, microseconds=12)
rd_expected = rd_base
self.assertEqual(abs(rd_base), rd_expected)
def testComparison(self):
d1 = relativedelta(years=1, months=1, days=1, leapdays=0, hours=1,
minutes=1, seconds=1, microseconds=1)
d2 = relativedelta(years=1, months=1, days=1, leapdays=0, hours=1,
minutes=1, seconds=1, microseconds=1)
d3 = relativedelta(years=1, months=1, days=1, leapdays=0, hours=1,
minutes=1, seconds=1, microseconds=2)
self.assertEqual(d1, d2)
self.assertNotEqual(d1, d3)
def testInequalityTypeMismatch(self):
# Different type
self.assertFalse(relativedelta(year=1) == 19)
def testInequalityUnsupportedType(self):
self.assertIs(relativedelta(hours=3) == NotAValue, NotAValue)
def testInequalityWeekdays(self):
# Different weekdays
no_wday = relativedelta(year=1997, month=4)
wday_mo_1 = relativedelta(year=1997, month=4, weekday=MO(+1))
wday_mo_2 = relativedelta(year=1997, month=4, weekday=MO(+2))
wday_tu = relativedelta(year=1997, month=4, weekday=TU)
self.assertTrue(wday_mo_1 == wday_mo_1)
self.assertFalse(no_wday == wday_mo_1)
self.assertFalse(wday_mo_1 == no_wday)
self.assertFalse(wday_mo_1 == wday_mo_2)
self.assertFalse(wday_mo_2 == wday_mo_1)
self.assertFalse(wday_mo_1 == wday_tu)
self.assertFalse(wday_tu == wday_mo_1)
def testMonthOverflow(self):
self.assertEqual(relativedelta(months=273),
relativedelta(years=22, months=9))
def testWeeks(self):
# Test that the weeks property is working properly.
rd = relativedelta(years=4, months=2, weeks=8, days=6)
self.assertEqual((rd.weeks, rd.days), (8, 8 * 7 + 6))
rd.weeks = 3
self.assertEqual((rd.weeks, rd.days), (3, 3 * 7 + 6))
def testRelativeDeltaRepr(self):
self.assertEqual(repr(relativedelta(years=1, months=-1, days=15)),
'relativedelta(years=+1, months=-1, days=+15)')
self.assertEqual(repr(relativedelta(months=14, seconds=-25)),
'relativedelta(years=+1, months=+2, seconds=-25)')
self.assertEqual(repr(relativedelta(month=3, hour=3, weekday=SU(3))),
'relativedelta(month=3, weekday=SU(+3), hour=3)')
def testRelativeDeltaFractionalYear(self):
with self.assertRaises(ValueError):
relativedelta(years=1.5)
def testRelativeDeltaFractionalMonth(self):
with self.assertRaises(ValueError):
relativedelta(months=1.5)
def testRelativeDeltaInvalidDatetimeObject(self):
with self.assertRaises(TypeError):
relativedelta(dt1='2018-01-01', dt2='2018-01-02')
with self.assertRaises(TypeError):
relativedelta(dt1=datetime(2018, 1, 1), dt2='2018-01-02')
with self.assertRaises(TypeError):
relativedelta(dt1='2018-01-01', dt2=datetime(2018, 1, 2))
def testRelativeDeltaFractionalAbsolutes(self):
# Fractional absolute values will soon be unsupported,
# check for the deprecation warning.
with pytest.warns(DeprecationWarning):
relativedelta(year=2.86)
with pytest.warns(DeprecationWarning):
relativedelta(month=1.29)
with pytest.warns(DeprecationWarning):
relativedelta(day=0.44)
with pytest.warns(DeprecationWarning):
relativedelta(hour=23.98)
with pytest.warns(DeprecationWarning):
relativedelta(minute=45.21)
with pytest.warns(DeprecationWarning):
relativedelta(second=13.2)
with pytest.warns(DeprecationWarning):
relativedelta(microsecond=157221.93)
def testRelativeDeltaFractionalRepr(self):
rd = relativedelta(years=3, months=-2, days=1.25)
self.assertEqual(repr(rd),
'relativedelta(years=+3, months=-2, days=+1.25)')
rd = relativedelta(hours=0.5, seconds=9.22)
self.assertEqual(repr(rd),
'relativedelta(hours=+0.5, seconds=+9.22)')
def testRelativeDeltaFractionalWeeks(self):
# Equivalent to days=8, hours=18
rd = relativedelta(weeks=1.25)
d1 = datetime(2009, 9, 3, 0, 0)
self.assertEqual(d1 + rd,
datetime(2009, 9, 11, 18))
def testRelativeDeltaFractionalDays(self):
rd1 = relativedelta(days=1.48)
d1 = datetime(2009, 9, 3, 0, 0)
self.assertEqual(d1 + rd1,
datetime(2009, 9, 4, 11, 31, 12))
rd2 = relativedelta(days=1.5)
self.assertEqual(d1 + rd2,
datetime(2009, 9, 4, 12, 0, 0))
def testRelativeDeltaFractionalHours(self):
rd = relativedelta(days=1, hours=12.5)
d1 = datetime(2009, 9, 3, 0, 0)
self.assertEqual(d1 + rd,
datetime(2009, 9, 4, 12, 30, 0))
def testRelativeDeltaFractionalMinutes(self):
rd = relativedelta(hours=1, minutes=30.5)
d1 = datetime(2009, 9, 3, 0, 0)
self.assertEqual(d1 + rd,
datetime(2009, 9, 3, 1, 30, 30))
def testRelativeDeltaFractionalSeconds(self):
rd = relativedelta(hours=5, minutes=30, seconds=30.5)
d1 = datetime(2009, 9, 3, 0, 0)
self.assertEqual(d1 + rd,
datetime(2009, 9, 3, 5, 30, 30, 500000))
def testRelativeDeltaFractionalPositiveOverflow(self):
# Equivalent to (days=1, hours=14)
rd1 = relativedelta(days=1.5, hours=2)
d1 = datetime(2009, 9, 3, 0, 0)
self.assertEqual(d1 + rd1,
datetime(2009, 9, 4, 14, 0, 0))
# Equivalent to (days=1, hours=14, minutes=45)
rd2 = relativedelta(days=1.5, hours=2.5, minutes=15)
d1 = datetime(2009, 9, 3, 0, 0)
self.assertEqual(d1 + rd2,
datetime(2009, 9, 4, 14, 45))
# Carry back up - equivalent to (days=2, hours=2, minutes=0, seconds=1)
rd3 = relativedelta(days=1.5, hours=13, minutes=59.5, seconds=31)
self.assertEqual(d1 + rd3,
datetime(2009, 9, 5, 2, 0, 1))
def testRelativeDeltaFractionalNegativeDays(self):
# Equivalent to (days=-1, hours=-1)
rd1 = relativedelta(days=-1.5, hours=11)
d1 = datetime(2009, 9, 3, 12, 0)
self.assertEqual(d1 + rd1,
datetime(2009, 9, 2, 11, 0, 0))
# Equivalent to (days=-1, hours=-9)
rd2 = relativedelta(days=-1.25, hours=-3)
self.assertEqual(d1 + rd2,
datetime(2009, 9, 2, 3))
def testRelativeDeltaNormalizeFractionalDays(self):
# Equivalent to (days=2, hours=18)
rd1 = relativedelta(days=2.75)
self.assertEqual(rd1.normalized(), relativedelta(days=2, hours=18))
# Equivalent to (days=1, hours=11, minutes=31, seconds=12)
rd2 = relativedelta(days=1.48)
self.assertEqual(rd2.normalized(),
relativedelta(days=1, hours=11, minutes=31, seconds=12))
def testRelativeDeltaNormalizeFractionalDays2(self):
# Equivalent to (hours=1, minutes=30)
rd1 = relativedelta(hours=1.5)
self.assertEqual(rd1.normalized(), relativedelta(hours=1, minutes=30))
# Equivalent to (hours=3, minutes=17, seconds=5, microseconds=100)
rd2 = relativedelta(hours=3.28472225)
self.assertEqual(rd2.normalized(),
relativedelta(hours=3, minutes=17, seconds=5, microseconds=100))
def testRelativeDeltaNormalizeFractionalMinutes(self):
# Equivalent to (minutes=15, seconds=36)
rd1 = relativedelta(minutes=15.6)
self.assertEqual(rd1.normalized(),
relativedelta(minutes=15, seconds=36))
# Equivalent to (minutes=25, seconds=20, microseconds=25000)
rd2 = relativedelta(minutes=25.33375)
self.assertEqual(rd2.normalized(),
relativedelta(minutes=25, seconds=20, microseconds=25000))
def testRelativeDeltaNormalizeFractionalSeconds(self):
# Equivalent to (seconds=45, microseconds=25000)
rd1 = relativedelta(seconds=45.025)
self.assertEqual(rd1.normalized(),
relativedelta(seconds=45, microseconds=25000))
def testRelativeDeltaFractionalPositiveOverflow2(self):
# Equivalent to (days=1, hours=14)
rd1 = relativedelta(days=1.5, hours=2)
self.assertEqual(rd1.normalized(),
relativedelta(days=1, hours=14))
# Equivalent to (days=1, hours=14, minutes=45)
rd2 = relativedelta(days=1.5, hours=2.5, minutes=15)
self.assertEqual(rd2.normalized(),
relativedelta(days=1, hours=14, minutes=45))
# Carry back up - equivalent to:
# (days=2, hours=2, minutes=0, seconds=2, microseconds=3)
rd3 = relativedelta(days=1.5, hours=13, minutes=59.50045,
seconds=31.473, microseconds=500003)
self.assertEqual(rd3.normalized(),
relativedelta(days=2, hours=2, minutes=0,
seconds=2, microseconds=3))
def testRelativeDeltaFractionalNegativeOverflow(self):
# Equivalent to (days=-1)
rd1 = relativedelta(days=-0.5, hours=-12)
self.assertEqual(rd1.normalized(),
relativedelta(days=-1))
# Equivalent to (days=-1)
rd2 = relativedelta(days=-1.5, hours=12)
self.assertEqual(rd2.normalized(),
relativedelta(days=-1))
# Equivalent to (days=-1, hours=-14, minutes=-45)
rd3 = relativedelta(days=-1.5, hours=-2.5, minutes=-15)
self.assertEqual(rd3.normalized(),
relativedelta(days=-1, hours=-14, minutes=-45))
# Equivalent to (days=-1, hours=-14, minutes=+15)
rd4 = relativedelta(days=-1.5, hours=-2.5, minutes=45)
self.assertEqual(rd4.normalized(),
relativedelta(days=-1, hours=-14, minutes=+15))
# Carry back up - equivalent to:
# (days=-2, hours=-2, minutes=0, seconds=-2, microseconds=-3)
rd3 = relativedelta(days=-1.5, hours=-13, minutes=-59.50045,
seconds=-31.473, microseconds=-500003)
self.assertEqual(rd3.normalized(),
relativedelta(days=-2, hours=-2, minutes=0,
seconds=-2, microseconds=-3))
def testInvalidYearDay(self):
with self.assertRaises(ValueError):
relativedelta(yearday=367)
def testAddTimedeltaToUnpopulatedRelativedelta(self):
td = timedelta(
days=1,
seconds=1,
microseconds=1,
milliseconds=1,
minutes=1,
hours=1,
weeks=1
)
expected = relativedelta(
weeks=1,
days=1,
hours=1,
minutes=1,
seconds=1,
microseconds=1001
)
self.assertEqual(expected, relativedelta() + td)
def testAddTimedeltaToPopulatedRelativeDelta(self):
td = timedelta(
days=1,
seconds=1,
microseconds=1,
milliseconds=1,
minutes=1,
hours=1,
weeks=1
)
rd = relativedelta(
year=1,
month=1,
day=1,
hour=1,
minute=1,
second=1,
microsecond=1,
years=1,
months=1,
days=1,
weeks=1,
hours=1,
minutes=1,
seconds=1,
microseconds=1
)
expected = relativedelta(
year=1,
month=1,
day=1,
hour=1,
minute=1,
second=1,
microsecond=1,
years=1,
months=1,
weeks=2,
days=2,
hours=2,
minutes=2,
seconds=2,
microseconds=1002,
)
self.assertEqual(expected, rd + td)
def testHashable(self):
try:
{relativedelta(minute=1): 'test'}
except:
self.fail("relativedelta() failed to hash!")
class RelativeDeltaWeeksPropertyGetterTest(unittest.TestCase):
"""Test the weeks property getter"""
def test_one_day(self):
rd = relativedelta(days=1)
self.assertEqual(rd.days, 1)
self.assertEqual(rd.weeks, 0)
def test_minus_one_day(self):
rd = relativedelta(days=-1)
self.assertEqual(rd.days, -1)
self.assertEqual(rd.weeks, 0)
def test_height_days(self):
rd = relativedelta(days=8)
self.assertEqual(rd.days, 8)
self.assertEqual(rd.weeks, 1)
def test_minus_height_days(self):
rd = relativedelta(days=-8)
self.assertEqual(rd.days, -8)
self.assertEqual(rd.weeks, -1)
class RelativeDeltaWeeksPropertySetterTest(unittest.TestCase):
"""Test the weeks setter which makes a "smart" update of the days attribute"""
def test_one_day_set_one_week(self):
rd = relativedelta(days=1)
rd.weeks = 1 # add 7 days
self.assertEqual(rd.days, 8)
self.assertEqual(rd.weeks, 1)
def test_minus_one_day_set_one_week(self):
rd = relativedelta(days=-1)
rd.weeks = 1 # add 7 days
self.assertEqual(rd.days, 6)
self.assertEqual(rd.weeks, 0)
def test_height_days_set_minus_one_week(self):
rd = relativedelta(days=8)
rd.weeks = -1 # change from 1 week, 1 day to -1 week, 1 day
self.assertEqual(rd.days, -6)
self.assertEqual(rd.weeks, 0)
def test_minus_height_days_set_minus_one_week(self):
rd = relativedelta(days=-8)
rd.weeks = -1 # does not change anything
self.assertEqual(rd.days, -8)
self.assertEqual(rd.weeks, -1)
# vim:ts=4:sw=4:et

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,52 +0,0 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from datetime import timedelta, datetime
from dateutil import tz
from dateutil import utils
from dateutil.tz import UTC
from dateutil.utils import within_delta
from freezegun import freeze_time
NYC = tz.gettz("America/New_York")
@freeze_time(datetime(2014, 12, 15, 1, 21, 33, 4003))
def test_utils_today():
assert utils.today() == datetime(2014, 12, 15, 0, 0, 0)
@freeze_time(datetime(2014, 12, 15, 12), tz_offset=5)
def test_utils_today_tz_info():
assert utils.today(NYC) == datetime(2014, 12, 15, 0, 0, 0, tzinfo=NYC)
@freeze_time(datetime(2014, 12, 15, 23), tz_offset=5)
def test_utils_today_tz_info_different_day():
assert utils.today(UTC) == datetime(2014, 12, 16, 0, 0, 0, tzinfo=UTC)
def test_utils_default_tz_info_naive():
dt = datetime(2014, 9, 14, 9, 30)
assert utils.default_tzinfo(dt, NYC).tzinfo is NYC
def test_utils_default_tz_info_aware():
dt = datetime(2014, 9, 14, 9, 30, tzinfo=UTC)
assert utils.default_tzinfo(dt, NYC).tzinfo is UTC
def test_utils_within_delta():
d1 = datetime(2016, 1, 1, 12, 14, 1, 9)
d2 = d1.replace(microsecond=15)
assert within_delta(d1, d2, timedelta(seconds=1))
assert not within_delta(d1, d2, timedelta(microseconds=1))
def test_utils_within_delta_with_negative_delta():
d1 = datetime(2016, 1, 1)
d2 = datetime(2015, 12, 31)
assert within_delta(d2, d1, timedelta(days=-1))

View file

@ -1 +0,0 @@
from __future__ import absolute_import, division, unicode_literals

View file

@ -1,108 +0,0 @@
from __future__ import print_function
import os.path
import sys
import pkg_resources
import pytest
from .tree_construction import TreeConstructionFile
from .tokenizer import TokenizerFile
from .sanitizer import SanitizerFile
_dir = os.path.abspath(os.path.dirname(__file__))
_root = os.path.join(_dir, "..", "..")
_testdata = os.path.join(_dir, "testdata")
_tree_construction = os.path.join(_testdata, "tree-construction")
_tokenizer = os.path.join(_testdata, "tokenizer")
_sanitizer_testdata = os.path.join(_dir, "sanitizer-testdata")
def fail_if_missing_pytest_expect():
"""Throws an exception halting pytest if pytest-expect isn't working"""
try:
from pytest_expect import expect # noqa
except ImportError:
header = '*' * 78
print(
'\n' +
header + '\n' +
'ERROR: Either pytest-expect or its dependency u-msgpack-python is not\n' +
'installed. Please install them both before running pytest.\n' +
header + '\n',
file=sys.stderr
)
raise
fail_if_missing_pytest_expect()
def pytest_configure(config):
msgs = []
if not os.path.exists(_testdata):
msg = "testdata not available! "
if os.path.exists(os.path.join(_root, ".git")):
msg += ("Please run git submodule update --init --recursive " +
"and then run tests again.")
else:
msg += ("The testdata doesn't appear to be included with this package, " +
"so finding the right version will be hard. :(")
msgs.append(msg)
if config.option.update_xfail:
# Check for optional requirements
req_file = os.path.join(_root, "requirements-optional.txt")
if os.path.exists(req_file):
with open(req_file, "r") as fp:
for line in fp:
if (line.strip() and
not (line.startswith("-r") or
line.startswith("#"))):
if ";" in line:
spec, marker = line.strip().split(";", 1)
else:
spec, marker = line.strip(), None
req = pkg_resources.Requirement.parse(spec)
if marker and not pkg_resources.evaluate_marker(marker):
msgs.append("%s not available in this environment" % spec)
else:
try:
installed = pkg_resources.working_set.find(req)
except pkg_resources.VersionConflict:
msgs.append("Outdated version of %s installed, need %s" % (req.name, spec))
else:
if not installed:
msgs.append("Need %s" % spec)
# Check cElementTree
import xml.etree.ElementTree as ElementTree
try:
import xml.etree.cElementTree as cElementTree
except ImportError:
msgs.append("cElementTree unable to be imported")
else:
if cElementTree.Element is ElementTree.Element:
msgs.append("cElementTree is just an alias for ElementTree")
if msgs:
pytest.exit("\n".join(msgs))
def pytest_collect_file(path, parent):
dir = os.path.abspath(path.dirname)
dir_and_parents = set()
while dir not in dir_and_parents:
dir_and_parents.add(dir)
dir = os.path.dirname(dir)
if _tree_construction in dir_and_parents:
if path.ext == ".dat":
return TreeConstructionFile(path, parent)
elif _tokenizer in dir_and_parents:
if path.ext == ".test":
return TokenizerFile(path, parent)
elif _sanitizer_testdata in dir_and_parents:
if path.ext == ".dat":
return SanitizerFile(path, parent)

View file

@ -1,51 +0,0 @@
from __future__ import absolute_import, division, unicode_literals
import codecs
import json
import pytest
from html5lib import parseFragment, serialize
class SanitizerFile(pytest.File):
def collect(self):
with codecs.open(str(self.fspath), "r", encoding="utf-8") as fp:
tests = json.load(fp)
for i, test in enumerate(tests):
yield SanitizerTest(str(i), self, test=test)
class SanitizerTest(pytest.Item):
def __init__(self, name, parent, test):
super(SanitizerTest, self).__init__(name, parent)
self.obj = lambda: 1 # this is to hack around skipif needing a function!
self.test = test
def runtest(self):
input = self.test["input"]
expected = self.test["output"]
parsed = parseFragment(input)
with pytest.deprecated_call():
serialized = serialize(parsed,
sanitize=True,
omit_optional_tags=False,
use_trailing_solidus=True,
space_before_trailing_solidus=False,
quote_attr_values="always",
quote_char="'",
alphabetical_attributes=True)
errorMsg = "\n".join(["\n\nInput:", input,
"\nExpected:", expected,
"\nReceived:", serialized])
assert expected == serialized, errorMsg
def repr_failure(self, excinfo):
traceback = excinfo.traceback
ntraceback = traceback.cut(path=__file__)
excinfo.traceback = ntraceback.filter()
return excinfo.getrepr(funcargs=True,
showlocals=False,
style="short", tbfilter=False)

View file

@ -1,199 +0,0 @@
from __future__ import absolute_import, division, unicode_literals
# pylint:disable=wrong-import-position
import os
import sys
import codecs
import glob
import xml.sax.handler
base_path = os.path.split(__file__)[0]
test_dir = os.path.join(base_path, 'testdata')
sys.path.insert(0, os.path.abspath(os.path.join(base_path,
os.path.pardir,
os.path.pardir)))
from html5lib import treebuilders, treewalkers, treeadapters # noqa
del base_path
# Build a dict of available trees
treeTypes = {}
# DOM impls
treeTypes["DOM"] = {
"builder": treebuilders.getTreeBuilder("dom"),
"walker": treewalkers.getTreeWalker("dom")
}
# ElementTree impls
import xml.etree.ElementTree as ElementTree # noqa
treeTypes['ElementTree'] = {
"builder": treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True),
"walker": treewalkers.getTreeWalker("etree", ElementTree)
}
try:
import xml.etree.cElementTree as cElementTree # noqa
except ImportError:
treeTypes['cElementTree'] = None
else:
# On Python 3.3 and above cElementTree is an alias, don't run them twice.
if cElementTree.Element is ElementTree.Element:
treeTypes['cElementTree'] = None
else:
treeTypes['cElementTree'] = {
"builder": treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True),
"walker": treewalkers.getTreeWalker("etree", cElementTree)
}
try:
import lxml.etree as lxml # noqa
except ImportError:
treeTypes['lxml'] = None
else:
treeTypes['lxml'] = {
"builder": treebuilders.getTreeBuilder("lxml"),
"walker": treewalkers.getTreeWalker("lxml")
}
# Genshi impls
try:
import genshi # noqa
except ImportError:
treeTypes["genshi"] = None
else:
treeTypes["genshi"] = {
"builder": treebuilders.getTreeBuilder("dom"),
"adapter": lambda tree: treeadapters.genshi.to_genshi(treewalkers.getTreeWalker("dom")(tree)),
"walker": treewalkers.getTreeWalker("genshi")
}
# pylint:enable=wrong-import-position
def get_data_files(subdirectory, files='*.dat', search_dir=test_dir):
return sorted(glob.glob(os.path.join(search_dir, subdirectory, files)))
class DefaultDict(dict):
def __init__(self, default, *args, **kwargs):
self.default = default
dict.__init__(self, *args, **kwargs)
def __getitem__(self, key):
return dict.get(self, key, self.default)
class TestData(object):
def __init__(self, filename, newTestHeading="data", encoding="utf8"):
if encoding is None:
self.f = open(filename, mode="rb")
else:
self.f = codecs.open(filename, encoding=encoding)
self.encoding = encoding
self.newTestHeading = newTestHeading
def __iter__(self):
data = DefaultDict(None)
key = None
for line in self.f:
heading = self.isSectionHeading(line)
if heading:
if data and heading == self.newTestHeading:
# Remove trailing newline
data[key] = data[key][:-1]
yield self.normaliseOutput(data)
data = DefaultDict(None)
key = heading
data[key] = "" if self.encoding else b""
elif key is not None:
data[key] += line
if data:
yield self.normaliseOutput(data)
def isSectionHeading(self, line):
"""If the current heading is a test section heading return the heading,
otherwise return False"""
# print(line)
if line.startswith("#" if self.encoding else b"#"):
return line[1:].strip()
else:
return False
def normaliseOutput(self, data):
# Remove trailing newlines
for key, value in data.items():
if value.endswith("\n" if self.encoding else b"\n"):
data[key] = value[:-1]
return data
def convert(stripChars):
def convertData(data):
"""convert the output of str(document) to the format used in the testcases"""
data = data.split("\n")
rv = []
for line in data:
if line.startswith("|"):
rv.append(line[stripChars:])
else:
rv.append(line)
return "\n".join(rv)
return convertData
convertExpected = convert(2)
def errorMessage(input, expected, actual):
msg = ("Input:\n%s\nExpected:\n%s\nReceived\n%s\n" %
(repr(input), repr(expected), repr(actual)))
if sys.version_info[0] == 2:
msg = msg.encode("ascii", "backslashreplace")
return msg
class TracingSaxHandler(xml.sax.handler.ContentHandler):
def __init__(self):
xml.sax.handler.ContentHandler.__init__(self)
self.visited = []
def startDocument(self):
self.visited.append('startDocument')
def endDocument(self):
self.visited.append('endDocument')
def startPrefixMapping(self, prefix, uri):
# These are ignored as their order is not guaranteed
pass
def endPrefixMapping(self, prefix):
# These are ignored as their order is not guaranteed
pass
def startElement(self, name, attrs):
self.visited.append(('startElement', name, attrs))
def endElement(self, name):
self.visited.append(('endElement', name))
def startElementNS(self, name, qname, attrs):
self.visited.append(('startElementNS', name, qname, dict(attrs)))
def endElementNS(self, name, qname):
self.visited.append(('endElementNS', name, qname))
def characters(self, content):
self.visited.append(('characters', content))
def ignorableWhitespace(self, whitespace):
self.visited.append(('ignorableWhitespace', whitespace))
def processingInstruction(self, target, data):
self.visited.append(('processingInstruction', target, data))
def skippedEntity(self, name):
self.visited.append(('skippedEntity', name))

View file

@ -1,78 +0,0 @@
from __future__ import absolute_import, division, unicode_literals
from collections import OrderedDict
import pytest
import html5lib
from html5lib.filters.alphabeticalattributes import Filter
from html5lib.serializer import HTMLSerializer
@pytest.mark.parametrize('msg, attrs, expected_attrs', [
(
'no attrs',
{},
{}
),
(
'one attr',
{(None, 'alt'): 'image'},
OrderedDict([((None, 'alt'), 'image')])
),
(
'multiple attrs',
{
(None, 'src'): 'foo',
(None, 'alt'): 'image',
(None, 'style'): 'border: 1px solid black;'
},
OrderedDict([
((None, 'alt'), 'image'),
((None, 'src'), 'foo'),
((None, 'style'), 'border: 1px solid black;')
])
),
])
def test_alphabetizing(msg, attrs, expected_attrs):
tokens = [{'type': 'StartTag', 'name': 'img', 'data': attrs}]
output_tokens = list(Filter(tokens))
attrs = output_tokens[0]['data']
assert attrs == expected_attrs
def test_with_different_namespaces():
tokens = [{
'type': 'StartTag',
'name': 'pattern',
'data': {
(None, 'id'): 'patt1',
('http://www.w3.org/1999/xlink', 'href'): '#patt2'
}
}]
output_tokens = list(Filter(tokens))
attrs = output_tokens[0]['data']
assert attrs == OrderedDict([
((None, 'id'), 'patt1'),
(('http://www.w3.org/1999/xlink', 'href'), '#patt2')
])
def test_with_serializer():
"""Verify filter works in the context of everything else"""
parser = html5lib.HTMLParser()
dom = parser.parseFragment('<svg><pattern xlink:href="#patt2" id="patt1"></svg>')
walker = html5lib.getTreeWalker('etree')
ser = HTMLSerializer(
alphabetical_attributes=True,
quote_attr_values='always'
)
# FIXME(willkg): The "xlink" namespace gets dropped by the serializer. When
# that gets fixed, we can fix this expected result.
assert (
ser.render(walker(dom)) ==
'<svg><pattern id="patt1" href="#patt2"></pattern></svg>'
)

View file

@ -1,117 +0,0 @@
from __future__ import absolute_import, division, unicode_literals
import os
import pytest
from .support import get_data_files, test_dir, errorMessage, TestData as _TestData
from html5lib import HTMLParser, _inputstream
def test_basic_prescan_length():
data = "<title>Caf\u00E9</title><!--a--><meta charset='utf-8'>".encode('utf-8')
pad = 1024 - len(data) + 1
data = data.replace(b"-a-", b"-" + (b"a" * pad) + b"-")
assert len(data) == 1024 # Sanity
stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False)
assert 'utf-8' == stream.charEncoding[0].name
def test_parser_reparse():
data = "<title>Caf\u00E9</title><!--a--><meta charset='utf-8'>".encode('utf-8')
pad = 10240 - len(data) + 1
data = data.replace(b"-a-", b"-" + (b"a" * pad) + b"-")
assert len(data) == 10240 # Sanity
stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False)
assert 'windows-1252' == stream.charEncoding[0].name
p = HTMLParser(namespaceHTMLElements=False)
doc = p.parse(data, useChardet=False)
assert 'utf-8' == p.documentEncoding
assert doc.find(".//title").text == "Caf\u00E9"
@pytest.mark.parametrize("expected,data,kwargs", [
("utf-16le", b"\xFF\xFE", {"override_encoding": "iso-8859-2"}),
("utf-16be", b"\xFE\xFF", {"override_encoding": "iso-8859-2"}),
("utf-8", b"\xEF\xBB\xBF", {"override_encoding": "iso-8859-2"}),
("iso-8859-2", b"", {"override_encoding": "iso-8859-2", "transport_encoding": "iso-8859-3"}),
("iso-8859-2", b"<meta charset=iso-8859-3>", {"transport_encoding": "iso-8859-2"}),
("iso-8859-2", b"<meta charset=iso-8859-2>", {"same_origin_parent_encoding": "iso-8859-3"}),
("iso-8859-2", b"", {"same_origin_parent_encoding": "iso-8859-2", "likely_encoding": "iso-8859-3"}),
("iso-8859-2", b"", {"same_origin_parent_encoding": "utf-16", "likely_encoding": "iso-8859-2"}),
("iso-8859-2", b"", {"same_origin_parent_encoding": "utf-16be", "likely_encoding": "iso-8859-2"}),
("iso-8859-2", b"", {"same_origin_parent_encoding": "utf-16le", "likely_encoding": "iso-8859-2"}),
("iso-8859-2", b"", {"likely_encoding": "iso-8859-2", "default_encoding": "iso-8859-3"}),
("iso-8859-2", b"", {"default_encoding": "iso-8859-2"}),
("windows-1252", b"", {"default_encoding": "totally-bogus-string"}),
("windows-1252", b"", {}),
])
def test_parser_args(expected, data, kwargs):
stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False, **kwargs)
assert expected == stream.charEncoding[0].name
p = HTMLParser()
p.parse(data, useChardet=False, **kwargs)
assert expected == p.documentEncoding
@pytest.mark.parametrize("kwargs", [
{"override_encoding": "iso-8859-2"},
{"override_encoding": None},
{"transport_encoding": "iso-8859-2"},
{"transport_encoding": None},
{"same_origin_parent_encoding": "iso-8859-2"},
{"same_origin_parent_encoding": None},
{"likely_encoding": "iso-8859-2"},
{"likely_encoding": None},
{"default_encoding": "iso-8859-2"},
{"default_encoding": None},
{"foo_encoding": "iso-8859-2"},
{"foo_encoding": None},
])
def test_parser_args_raises(kwargs):
with pytest.raises(TypeError) as exc_info:
p = HTMLParser()
p.parse("", useChardet=False, **kwargs)
assert exc_info.value.args[0].startswith("Cannot set an encoding with a unicode input")
def param_encoding():
for filename in get_data_files("encoding"):
tests = _TestData(filename, b"data", encoding=None)
for test in tests:
yield test[b'data'], test[b'encoding']
@pytest.mark.parametrize("data, encoding", param_encoding())
def test_parser_encoding(data, encoding):
p = HTMLParser()
assert p.documentEncoding is None
p.parse(data, useChardet=False)
encoding = encoding.lower().decode("ascii")
assert encoding == p.documentEncoding, errorMessage(data, encoding, p.documentEncoding)
@pytest.mark.parametrize("data, encoding", param_encoding())
def test_prescan_encoding(data, encoding):
stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False)
encoding = encoding.lower().decode("ascii")
# Very crude way to ignore irrelevant tests
if len(data) > stream.numBytesMeta:
return
assert encoding == stream.charEncoding[0].name, errorMessage(data, encoding, stream.charEncoding[0].name)
# pylint:disable=wrong-import-position
try:
import chardet # noqa
except ImportError:
print("chardet not found, skipping chardet tests")
else:
def test_chardet():
with open(os.path.join(test_dir, "encoding", "chardet", "test_big5.txt"), "rb") as fp:
encoding = _inputstream.HTMLInputStream(fp.read()).charEncoding
assert encoding[0].name == "big5"
# pylint:enable=wrong-import-position

View file

@ -1,41 +0,0 @@
from __future__ import absolute_import, division, unicode_literals
import six
from mock import Mock
from . import support
def _createReprMock(r):
"""Creates a mock with a __repr__ returning r
Also provides __str__ mock with default mock behaviour"""
mock = Mock()
mock.__repr__ = Mock()
mock.__repr__.return_value = r
mock.__str__ = Mock(wraps=mock.__str__)
return mock
def test_errorMessage():
# Create mock objects to take repr of
input = _createReprMock("1")
expected = _createReprMock("2")
actual = _createReprMock("3")
# Run the actual test
r = support.errorMessage(input, expected, actual)
# Assertions!
if six.PY2:
assert b"Input:\n1\nExpected:\n2\nReceived\n3\n" == r
else:
assert six.PY3
assert "Input:\n1\nExpected:\n2\nReceived\n3\n" == r
assert input.__repr__.call_count == 1
assert expected.__repr__.call_count == 1
assert actual.__repr__.call_count == 1
assert not input.__str__.called
assert not expected.__str__.called
assert not actual.__str__.called

View file

@ -1,7 +0,0 @@
from __future__ import absolute_import, division, unicode_literals
from html5lib.filters.optionaltags import Filter
def test_empty():
assert list(Filter([])) == []

View file

@ -1,94 +0,0 @@
from __future__ import absolute_import, division, unicode_literals
from six import PY2, text_type
import io
from . import support # noqa
from html5lib.constants import namespaces
from html5lib import parse, parseFragment, HTMLParser
# tests that aren't autogenerated from text files
def test_assertDoctypeCloneable():
doc = parse('<!DOCTYPE HTML>', treebuilder="dom")
assert doc.cloneNode(True) is not None
def test_line_counter():
# http://groups.google.com/group/html5lib-discuss/browse_frm/thread/f4f00e4a2f26d5c0
assert parse("<pre>\nx\n&gt;\n</pre>") is not None
def test_namespace_html_elements_0_dom():
doc = parse("<html></html>",
treebuilder="dom",
namespaceHTMLElements=True)
assert doc.childNodes[0].namespaceURI == namespaces["html"]
def test_namespace_html_elements_1_dom():
doc = parse("<html></html>",
treebuilder="dom",
namespaceHTMLElements=False)
assert doc.childNodes[0].namespaceURI is None
def test_namespace_html_elements_0_etree():
doc = parse("<html></html>",
treebuilder="etree",
namespaceHTMLElements=True)
assert doc.tag == "{%s}html" % (namespaces["html"],)
def test_namespace_html_elements_1_etree():
doc = parse("<html></html>",
treebuilder="etree",
namespaceHTMLElements=False)
assert doc.tag == "html"
def test_unicode_file():
assert parse(io.StringIO("a")) is not None
def test_debug_log():
parser = HTMLParser(debug=True)
parser.parse("<!doctype html><title>a</title><p>b<script>c</script>d</p>e")
expected = [('dataState', 'InitialPhase', 'InitialPhase', 'processDoctype', {'type': 'Doctype'}),
('dataState', 'BeforeHtmlPhase', 'BeforeHtmlPhase', 'processStartTag', {'name': 'title', 'type': 'StartTag'}),
('dataState', 'BeforeHeadPhase', 'BeforeHeadPhase', 'processStartTag', {'name': 'title', 'type': 'StartTag'}),
('dataState', 'InHeadPhase', 'InHeadPhase', 'processStartTag', {'name': 'title', 'type': 'StartTag'}),
('rcdataState', 'TextPhase', 'TextPhase', 'processCharacters', {'type': 'Characters'}),
('dataState', 'TextPhase', 'TextPhase', 'processEndTag', {'name': 'title', 'type': 'EndTag'}),
('dataState', 'InHeadPhase', 'InHeadPhase', 'processStartTag', {'name': 'p', 'type': 'StartTag'}),
('dataState', 'AfterHeadPhase', 'AfterHeadPhase', 'processStartTag', {'name': 'p', 'type': 'StartTag'}),
('dataState', 'InBodyPhase', 'InBodyPhase', 'processStartTag', {'name': 'p', 'type': 'StartTag'}),
('dataState', 'InBodyPhase', 'InBodyPhase', 'processCharacters', {'type': 'Characters'}),
('dataState', 'InBodyPhase', 'InBodyPhase', 'processStartTag', {'name': 'script', 'type': 'StartTag'}),
('dataState', 'InBodyPhase', 'InHeadPhase', 'processStartTag', {'name': 'script', 'type': 'StartTag'}),
('scriptDataState', 'TextPhase', 'TextPhase', 'processCharacters', {'type': 'Characters'}),
('dataState', 'TextPhase', 'TextPhase', 'processEndTag', {'name': 'script', 'type': 'EndTag'}),
('dataState', 'InBodyPhase', 'InBodyPhase', 'processCharacters', {'type': 'Characters'}),
('dataState', 'InBodyPhase', 'InBodyPhase', 'processEndTag', {'name': 'p', 'type': 'EndTag'}),
('dataState', 'InBodyPhase', 'InBodyPhase', 'processCharacters', {'type': 'Characters'})]
if PY2:
for i, log in enumerate(expected):
log = [x.encode("ascii") if isinstance(x, text_type) else x for x in log]
expected[i] = tuple(log)
assert parser.log == expected
def test_no_duplicate_clone():
frag = parseFragment("<b><em><foo><foob><fooc><aside></b></em>")
assert len(frag) == 2
def test_self_closing_col():
parser = HTMLParser()
parser.parseFragment('<table><colgroup><col /></colgroup></table>')
assert not parser.errors

View file

@ -1,133 +0,0 @@
from __future__ import absolute_import, division, unicode_literals
import pytest
from html5lib import constants, parseFragment, serialize
from html5lib.filters import sanitizer
def sanitize_html(stream):
parsed = parseFragment(stream)
with pytest.deprecated_call():
serialized = serialize(parsed,
sanitize=True,
omit_optional_tags=False,
use_trailing_solidus=True,
space_before_trailing_solidus=False,
quote_attr_values="always",
quote_char='"',
alphabetical_attributes=True)
return serialized
def test_should_handle_astral_plane_characters():
sanitized = sanitize_html("<p>&#x1d4b5; &#x1d538;</p>")
expected = '<p>\U0001d4b5 \U0001d538</p>'
assert expected == sanitized
def test_should_allow_relative_uris():
sanitized = sanitize_html('<p><a href="/example.com"></a></p>')
expected = '<p><a href="/example.com"></a></p>'
assert expected == sanitized
def test_invalid_data_uri():
sanitized = sanitize_html('<audio controls="" src="data:foobar"></audio>')
expected = '<audio controls></audio>'
assert expected == sanitized
def test_invalid_ipv6_url():
sanitized = sanitize_html('<a href="h://]">')
expected = "<a></a>"
assert expected == sanitized
def test_data_uri_disallowed_type():
sanitized = sanitize_html('<audio controls="" src="data:text/html,<html>"></audio>')
expected = "<audio controls></audio>"
assert expected == sanitized
def param_sanitizer():
for ns, tag_name in sanitizer.allowed_elements:
if ns != constants.namespaces["html"]:
continue
if tag_name in ['caption', 'col', 'colgroup', 'optgroup', 'option', 'table', 'tbody', 'td',
'tfoot', 'th', 'thead', 'tr', 'select']:
continue # TODO
if tag_name == 'image':
yield ("test_should_allow_%s_tag" % tag_name,
"<img title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz",
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
elif tag_name == 'br':
yield ("test_should_allow_%s_tag" % tag_name,
"<br title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz<br/>",
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
elif tag_name in constants.voidElements:
yield ("test_should_allow_%s_tag" % tag_name,
"<%s title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz" % tag_name,
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
else:
yield ("test_should_allow_%s_tag" % tag_name,
"<%s title=\"1\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</%s>" % (tag_name, tag_name),
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
for ns, attribute_name in sanitizer.allowed_attributes:
if ns is not None:
continue
if attribute_name != attribute_name.lower():
continue # TODO
if attribute_name == 'style':
continue
attribute_value = 'foo'
if attribute_name in sanitizer.attr_val_is_uri:
attribute_value = '%s://sub.domain.tld/path/object.ext' % sanitizer.allowed_protocols[0]
yield ("test_should_allow_%s_attribute" % attribute_name,
"<p %s=\"%s\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>" % (attribute_name, attribute_value),
"<p %s='%s'>foo <bad>bar</bad> baz</p>" % (attribute_name, attribute_value))
for protocol in sanitizer.allowed_protocols:
rest_of_uri = '//sub.domain.tld/path/object.ext'
if protocol == 'data':
rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ='
yield ("test_should_allow_uppercase_%s_uris" % protocol,
"<img src=\"%s:%s\">foo</a>" % (protocol, rest_of_uri),
"""<img src="%s:%s">foo</a>""" % (protocol, rest_of_uri))
for protocol in sanitizer.allowed_protocols:
rest_of_uri = '//sub.domain.tld/path/object.ext'
if protocol == 'data':
rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ='
protocol = protocol.upper()
yield ("test_should_allow_uppercase_%s_uris" % protocol,
"<img src=\"%s:%s\">foo</a>" % (protocol, rest_of_uri),
"""<img src="%s:%s">foo</a>""" % (protocol, rest_of_uri))
@pytest.mark.parametrize("expected, input",
(pytest.param(expected, input, id=id)
for id, expected, input in param_sanitizer()))
def test_sanitizer(expected, input):
parsed = parseFragment(expected)
expected = serialize(parsed,
omit_optional_tags=False,
use_trailing_solidus=True,
space_before_trailing_solidus=False,
quote_attr_values="always",
quote_char='"',
alphabetical_attributes=True)
assert expected == sanitize_html(input)
def test_lowercase_color_codes_in_style():
sanitized = sanitize_html("<p style=\"border: 1px solid #a2a2a2;\"></p>")
expected = '<p style=\"border: 1px solid #a2a2a2;\"></p>'
assert expected == sanitized
def test_uppercase_color_codes_in_style():
sanitized = sanitize_html("<p style=\"border: 1px solid #A2A2A2;\"></p>")
expected = '<p style=\"border: 1px solid #A2A2A2;\"></p>'
assert expected == sanitized

View file

@ -1,226 +0,0 @@
from __future__ import absolute_import, division, unicode_literals
import os
import json
import pytest
from .support import get_data_files
from html5lib import constants
from html5lib.filters.lint import Filter as Lint
from html5lib.serializer import HTMLSerializer, serialize
from html5lib.treewalkers.base import TreeWalker
# pylint:disable=wrong-import-position
optionals_loaded = []
try:
from lxml import etree
optionals_loaded.append("lxml")
except ImportError:
pass
# pylint:enable=wrong-import-position
default_namespace = constants.namespaces["html"]
class JsonWalker(TreeWalker):
def __iter__(self):
for token in self.tree:
type = token[0]
if type == "StartTag":
if len(token) == 4:
namespace, name, attrib = token[1:4]
else:
namespace = default_namespace
name, attrib = token[1:3]
yield self.startTag(namespace, name, self._convertAttrib(attrib))
elif type == "EndTag":
if len(token) == 3:
namespace, name = token[1:3]
else:
namespace = default_namespace
name = token[1]
yield self.endTag(namespace, name)
elif type == "EmptyTag":
if len(token) == 4:
namespace, name, attrib = token[1:]
else:
namespace = default_namespace
name, attrib = token[1:]
for token in self.emptyTag(namespace, name, self._convertAttrib(attrib)):
yield token
elif type == "Comment":
yield self.comment(token[1])
elif type in ("Characters", "SpaceCharacters"):
for token in self.text(token[1]):
yield token
elif type == "Doctype":
if len(token) == 4:
yield self.doctype(token[1], token[2], token[3])
elif len(token) == 3:
yield self.doctype(token[1], token[2])
else:
yield self.doctype(token[1])
else:
raise ValueError("Unknown token type: " + type)
def _convertAttrib(self, attribs):
"""html5lib tree-walkers use a dict of (namespace, name): value for
attributes, but JSON cannot represent this. Convert from the format
in the serializer tests (a list of dicts with "namespace", "name",
and "value" as keys) to html5lib's tree-walker format."""
attrs = {}
for attrib in attribs:
name = (attrib["namespace"], attrib["name"])
assert(name not in attrs)
attrs[name] = attrib["value"]
return attrs
def serialize_html(input, options):
options = {str(k): v for k, v in options.items()}
encoding = options.get("encoding", None)
if "encoding" in options:
del options["encoding"]
stream = Lint(JsonWalker(input), False)
serializer = HTMLSerializer(alphabetical_attributes=True, **options)
return serializer.render(stream, encoding)
def throwsWithLatin1(input):
with pytest.raises(UnicodeEncodeError):
serialize_html(input, {"encoding": "iso-8859-1"})
def testDoctypeName():
throwsWithLatin1([["Doctype", "\u0101"]])
def testDoctypePublicId():
throwsWithLatin1([["Doctype", "potato", "\u0101"]])
def testDoctypeSystemId():
throwsWithLatin1([["Doctype", "potato", "potato", "\u0101"]])
def testCdataCharacters():
test_serializer([["StartTag", "http://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\u0101"]],
["<style>&amacr;"], {"encoding": "iso-8859-1"})
def testCharacters():
test_serializer([["Characters", "\u0101"]],
["&amacr;"], {"encoding": "iso-8859-1"})
def testStartTagName():
throwsWithLatin1([["StartTag", "http://www.w3.org/1999/xhtml", "\u0101", []]])
def testAttributeName():
throwsWithLatin1([["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": None, "name": "\u0101", "value": "potato"}]]])
def testAttributeValue():
test_serializer([["StartTag", "http://www.w3.org/1999/xhtml", "span",
[{"namespace": None, "name": "potato", "value": "\u0101"}]]],
["<span potato=&amacr;>"], {"encoding": "iso-8859-1"})
def testEndTagName():
throwsWithLatin1([["EndTag", "http://www.w3.org/1999/xhtml", "\u0101"]])
def testComment():
throwsWithLatin1([["Comment", "\u0101"]])
def testThrowsUnknownOption():
with pytest.raises(TypeError):
HTMLSerializer(foobar=None)
@pytest.mark.parametrize("c", list("\t\n\u000C\x20\r\"'=<>`"))
def testSpecQuoteAttribute(c):
input_ = [["StartTag", "http://www.w3.org/1999/xhtml", "span",
[{"namespace": None, "name": "foo", "value": c}]]]
if c == '"':
output_ = ["<span foo='%s'>" % c]
else:
output_ = ['<span foo="%s">' % c]
options_ = {"quote_attr_values": "spec"}
test_serializer(input_, output_, options_)
@pytest.mark.parametrize("c", list("\t\n\u000C\x20\r\"'=<>`"
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n"
"\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15"
"\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
"\x20\x2f\x60\xa0\u1680\u180e\u180f\u2000"
"\u2001\u2002\u2003\u2004\u2005\u2006\u2007"
"\u2008\u2009\u200a\u2028\u2029\u202f\u205f"
"\u3000"))
def testLegacyQuoteAttribute(c):
input_ = [["StartTag", "http://www.w3.org/1999/xhtml", "span",
[{"namespace": None, "name": "foo", "value": c}]]]
if c == '"':
output_ = ["<span foo='%s'>" % c]
else:
output_ = ['<span foo="%s">' % c]
options_ = {"quote_attr_values": "legacy"}
test_serializer(input_, output_, options_)
@pytest.fixture
def lxml_parser():
return etree.XMLParser(resolve_entities=False)
@pytest.mark.skipif("lxml" not in optionals_loaded, reason="lxml not importable")
def testEntityReplacement(lxml_parser):
doc = '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>'
tree = etree.fromstring(doc, parser=lxml_parser).getroottree()
result = serialize(tree, tree="lxml", omit_optional_tags=False)
assert result == '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>\u03B2</html>'
@pytest.mark.skipif("lxml" not in optionals_loaded, reason="lxml not importable")
def testEntityXML(lxml_parser):
doc = '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&gt;</html>'
tree = etree.fromstring(doc, parser=lxml_parser).getroottree()
result = serialize(tree, tree="lxml", omit_optional_tags=False)
assert result == '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&gt;</html>'
@pytest.mark.skipif("lxml" not in optionals_loaded, reason="lxml not importable")
def testEntityNoResolve(lxml_parser):
doc = '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>'
tree = etree.fromstring(doc, parser=lxml_parser).getroottree()
result = serialize(tree, tree="lxml", omit_optional_tags=False,
resolve_entities=False)
assert result == '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>'
def param_serializer():
for filename in get_data_files('serializer-testdata', '*.test', os.path.dirname(__file__)):
with open(filename) as fp:
tests = json.load(fp)
for test in tests['tests']:
yield test["input"], test["expected"], test.get("options", {})
@pytest.mark.parametrize("input, expected, options", param_serializer())
def test_serializer(input, expected, options):
encoding = options.get("encoding", None)
if encoding:
expected = list(map(lambda x: x.encode(encoding), expected))
result = serialize_html(input, options)
if len(expected) == 1:
assert expected[0] == result, "Expected:\n%s\nActual:\n%s\nOptions:\n%s" % (expected[0], result, str(options))
elif result not in expected:
assert False, "Expected: %s, Received: %s" % (expected, result)

View file

@ -1,325 +0,0 @@
from __future__ import absolute_import, division, unicode_literals
from . import support # noqa
import codecs
import sys
from io import BytesIO, StringIO
import pytest
import six
from six.moves import http_client, urllib
from html5lib._inputstream import (BufferedStream, HTMLInputStream,
HTMLUnicodeInputStream, HTMLBinaryInputStream)
from html5lib._utils import supports_lone_surrogates
def test_basic():
s = b"abc"
fp = BufferedStream(BytesIO(s))
read = fp.read(10)
assert read == s
def test_read_length():
fp = BufferedStream(BytesIO(b"abcdef"))
read1 = fp.read(1)
assert read1 == b"a"
read2 = fp.read(2)
assert read2 == b"bc"
read3 = fp.read(3)
assert read3 == b"def"
read4 = fp.read(4)
assert read4 == b""
def test_tell():
fp = BufferedStream(BytesIO(b"abcdef"))
read1 = fp.read(1)
assert read1 == b"a"
assert fp.tell() == 1
read2 = fp.read(2)
assert read2 == b"bc"
assert fp.tell() == 3
read3 = fp.read(3)
assert read3 == b"def"
assert fp.tell() == 6
read4 = fp.read(4)
assert read4 == b""
assert fp.tell() == 6
def test_seek():
fp = BufferedStream(BytesIO(b"abcdef"))
read1 = fp.read(1)
assert read1 == b"a"
fp.seek(0)
read2 = fp.read(1)
assert read2 == b"a"
read3 = fp.read(2)
assert read3 == b"bc"
fp.seek(2)
read4 = fp.read(2)
assert read4 == b"cd"
fp.seek(4)
read5 = fp.read(2)
assert read5 == b"ef"
def test_seek_tell():
fp = BufferedStream(BytesIO(b"abcdef"))
read1 = fp.read(1)
assert read1 == b"a"
assert fp.tell() == 1
fp.seek(0)
read2 = fp.read(1)
assert read2 == b"a"
assert fp.tell() == 1
read3 = fp.read(2)
assert read3 == b"bc"
assert fp.tell() == 3
fp.seek(2)
read4 = fp.read(2)
assert read4 == b"cd"
assert fp.tell() == 4
fp.seek(4)
read5 = fp.read(2)
assert read5 == b"ef"
assert fp.tell() == 6
class HTMLUnicodeInputStreamShortChunk(HTMLUnicodeInputStream):
_defaultChunkSize = 2
class HTMLBinaryInputStreamShortChunk(HTMLBinaryInputStream):
_defaultChunkSize = 2
def test_char_ascii():
stream = HTMLInputStream(b"'", override_encoding='ascii')
assert stream.charEncoding[0].name == 'windows-1252'
assert stream.char() == "'"
def test_char_utf8():
stream = HTMLInputStream('\u2018'.encode('utf-8'), override_encoding='utf-8')
assert stream.charEncoding[0].name == 'utf-8'
assert stream.char() == '\u2018'
def test_char_win1252():
stream = HTMLInputStream("\xa9\xf1\u2019".encode('windows-1252'))
assert stream.charEncoding[0].name == 'windows-1252'
assert stream.char() == "\xa9"
assert stream.char() == "\xf1"
assert stream.char() == "\u2019"
def test_bom():
stream = HTMLInputStream(codecs.BOM_UTF8 + b"'")
assert stream.charEncoding[0].name == 'utf-8'
assert stream.char() == "'"
def test_utf_16():
stream = HTMLInputStream((' ' * 1025).encode('utf-16'))
assert stream.charEncoding[0].name in ['utf-16le', 'utf-16be']
assert len(stream.charsUntil(' ', True)) == 1025
def test_newlines():
stream = HTMLBinaryInputStreamShortChunk(codecs.BOM_UTF8 + b"a\nbb\r\nccc\rddddxe")
assert stream.position() == (1, 0)
assert stream.charsUntil('c') == "a\nbb\n"
assert stream.position() == (3, 0)
assert stream.charsUntil('x') == "ccc\ndddd"
assert stream.position() == (4, 4)
assert stream.charsUntil('e') == "x"
assert stream.position() == (4, 5)
def test_newlines2():
size = HTMLUnicodeInputStream._defaultChunkSize
stream = HTMLInputStream("\r" * size + "\n")
assert stream.charsUntil('x') == "\n" * size
def test_position():
stream = HTMLBinaryInputStreamShortChunk(codecs.BOM_UTF8 + b"a\nbb\nccc\nddde\nf\ngh")
assert stream.position() == (1, 0)
assert stream.charsUntil('c') == "a\nbb\n"
assert stream.position() == (3, 0)
stream.unget("\n")
assert stream.position() == (2, 2)
assert stream.charsUntil('c') == "\n"
assert stream.position() == (3, 0)
stream.unget("\n")
assert stream.position() == (2, 2)
assert stream.char() == "\n"
assert stream.position() == (3, 0)
assert stream.charsUntil('e') == "ccc\nddd"
assert stream.position() == (4, 3)
assert stream.charsUntil('h') == "e\nf\ng"
assert stream.position() == (6, 1)
def test_position2():
stream = HTMLUnicodeInputStreamShortChunk("abc\nd")
assert stream.position() == (1, 0)
assert stream.char() == "a"
assert stream.position() == (1, 1)
assert stream.char() == "b"
assert stream.position() == (1, 2)
assert stream.char() == "c"
assert stream.position() == (1, 3)
assert stream.char() == "\n"
assert stream.position() == (2, 0)
assert stream.char() == "d"
assert stream.position() == (2, 1)
def test_python_issue_20007():
"""
Make sure we have a work-around for Python bug #20007
http://bugs.python.org/issue20007
"""
class FakeSocket(object):
def makefile(self, _mode, _bufsize=None):
# pylint:disable=unused-argument
return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText")
source = http_client.HTTPResponse(FakeSocket())
source.begin()
stream = HTMLInputStream(source)
assert stream.charsUntil(" ") == "Text"
def test_python_issue_20007_b():
"""
Make sure we have a work-around for Python bug #20007
http://bugs.python.org/issue20007
"""
if six.PY2:
return
class FakeSocket(object):
def makefile(self, _mode, _bufsize=None):
# pylint:disable=unused-argument
return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText")
source = http_client.HTTPResponse(FakeSocket())
source.begin()
wrapped = urllib.response.addinfourl(source, source.msg, "http://example.com")
stream = HTMLInputStream(wrapped)
assert stream.charsUntil(" ") == "Text"
@pytest.mark.parametrize("inp,num",
[("\u0000", 0),
("\u0001", 1),
("\u0008", 1),
("\u0009", 0),
("\u000A", 0),
("\u000B", 1),
("\u000C", 0),
("\u000D", 0),
("\u000E", 1),
("\u001F", 1),
("\u0020", 0),
("\u007E", 0),
("\u007F", 1),
("\u009F", 1),
("\u00A0", 0),
("\uFDCF", 0),
("\uFDD0", 1),
("\uFDEF", 1),
("\uFDF0", 0),
("\uFFFD", 0),
("\uFFFE", 1),
("\uFFFF", 1),
("\U0001FFFD", 0),
("\U0001FFFE", 1),
("\U0001FFFF", 1),
("\U0002FFFD", 0),
("\U0002FFFE", 1),
("\U0002FFFF", 1),
("\U0003FFFD", 0),
("\U0003FFFE", 1),
("\U0003FFFF", 1),
("\U0004FFFD", 0),
("\U0004FFFE", 1),
("\U0004FFFF", 1),
("\U0005FFFD", 0),
("\U0005FFFE", 1),
("\U0005FFFF", 1),
("\U0006FFFD", 0),
("\U0006FFFE", 1),
("\U0006FFFF", 1),
("\U0007FFFD", 0),
("\U0007FFFE", 1),
("\U0007FFFF", 1),
("\U0008FFFD", 0),
("\U0008FFFE", 1),
("\U0008FFFF", 1),
("\U0009FFFD", 0),
("\U0009FFFE", 1),
("\U0009FFFF", 1),
("\U000AFFFD", 0),
("\U000AFFFE", 1),
("\U000AFFFF", 1),
("\U000BFFFD", 0),
("\U000BFFFE", 1),
("\U000BFFFF", 1),
("\U000CFFFD", 0),
("\U000CFFFE", 1),
("\U000CFFFF", 1),
("\U000DFFFD", 0),
("\U000DFFFE", 1),
("\U000DFFFF", 1),
("\U000EFFFD", 0),
("\U000EFFFE", 1),
("\U000EFFFF", 1),
("\U000FFFFD", 0),
("\U000FFFFE", 1),
("\U000FFFFF", 1),
("\U0010FFFD", 0),
("\U0010FFFE", 1),
("\U0010FFFF", 1),
("\x01\x01\x01", 3),
("a\x01a\x01a\x01a", 3)])
def test_invalid_codepoints(inp, num):
stream = HTMLUnicodeInputStream(StringIO(inp))
for _i in range(len(inp)):
stream.char()
assert len(stream.errors) == num
@pytest.mark.skipif(not supports_lone_surrogates, reason="doesn't support lone surrogates")
@pytest.mark.parametrize("inp,num",
[("'\\uD7FF'", 0),
("'\\uD800'", 1),
("'\\uDBFF'", 1),
("'\\uDC00'", 1),
("'\\uDFFF'", 1),
("'\\uE000'", 0),
("'\\uD800\\uD800\\uD800'", 3),
("'a\\uD800a\\uD800a\\uD800a'", 3),
("'\\uDFFF\\uDBFF'", 2),
pytest.param(
"'\\uDBFF\\uDFFF'", 2,
marks=pytest.mark.skipif(
sys.maxunicode == 0xFFFF,
reason="narrow Python"))])
def test_invalid_codepoints_surrogates(inp, num):
inp = eval(inp) # pylint:disable=eval-used
fp = StringIO(inp)
if ord(max(fp.read())) > 0xFFFF:
pytest.skip("StringIO altered string")
fp.seek(0)
stream = HTMLUnicodeInputStream(fp)
for _i in range(len(inp)):
stream.char()
assert len(stream.errors) == num

View file

@ -1,66 +0,0 @@
from __future__ import absolute_import, division, unicode_literals
import io
from six import unichr, text_type
from html5lib._tokenizer import HTMLTokenizer
from html5lib.constants import tokenTypes
def ignore_parse_errors(toks):
for tok in toks:
if tok['type'] != tokenTypes['ParseError']:
yield tok
def test_maintain_attribute_order():
# generate loads to maximize the chance a hash-based mutation will occur
attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
stream = io.StringIO("<span " + " ".join("%s='%s'" % (x, i) for x, i in attrs) + ">")
toks = HTMLTokenizer(stream)
out = list(ignore_parse_errors(toks))
assert len(out) == 1
assert out[0]['type'] == tokenTypes['StartTag']
attrs_tok = out[0]['data']
assert len(attrs_tok) == len(attrs)
for (in_name, in_value), (out_name, out_value) in zip(attrs, attrs_tok.items()):
assert in_name == out_name
assert in_value == out_value
def test_duplicate_attribute():
stream = io.StringIO("<span a=1 a=2 a=3>")
toks = HTMLTokenizer(stream)
out = list(ignore_parse_errors(toks))
assert len(out) == 1
assert out[0]['type'] == tokenTypes['StartTag']
attrs_tok = out[0]['data']
assert len(attrs_tok) == 1
assert list(attrs_tok.items()) == [('a', '1')]
def test_maintain_duplicate_attribute_order():
# generate loads to maximize the chance a hash-based mutation will occur
attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
stream = io.StringIO("<span " + " ".join("%s='%s'" % (x, i) for x, i in attrs) + " a=100>")
toks = HTMLTokenizer(stream)
out = list(ignore_parse_errors(toks))
assert len(out) == 1
assert out[0]['type'] == tokenTypes['StartTag']
attrs_tok = out[0]['data']
assert len(attrs_tok) == len(attrs)
for (in_name, in_value), (out_name, out_value) in zip(attrs, attrs_tok.items()):
assert in_name == out_name
assert in_value == out_value

View file

@ -1,40 +0,0 @@
from __future__ import absolute_import, division, unicode_literals
from . import support # noqa
import html5lib
from html5lib.treeadapters import sax
from html5lib.treewalkers import getTreeWalker
def test_to_sax():
handler = support.TracingSaxHandler()
tree = html5lib.parse("""<html xml:lang="en">
<title>Directory Listing</title>
<a href="/"><b/></p>
""", treebuilder="etree")
walker = getTreeWalker("etree")
sax.to_sax(walker(tree), handler)
expected = [
'startDocument',
('startElementNS', ('http://www.w3.org/1999/xhtml', 'html'),
'html', {(None, 'xml:lang'): 'en'}),
('startElementNS', ('http://www.w3.org/1999/xhtml', 'head'), 'head', {}),
('startElementNS', ('http://www.w3.org/1999/xhtml', 'title'), 'title', {}),
('characters', 'Directory Listing'),
('endElementNS', ('http://www.w3.org/1999/xhtml', 'title'), 'title'),
('characters', '\n '),
('endElementNS', ('http://www.w3.org/1999/xhtml', 'head'), 'head'),
('startElementNS', ('http://www.w3.org/1999/xhtml', 'body'), 'body', {}),
('startElementNS', ('http://www.w3.org/1999/xhtml', 'a'), 'a', {(None, 'href'): '/'}),
('startElementNS', ('http://www.w3.org/1999/xhtml', 'b'), 'b', {}),
('startElementNS', ('http://www.w3.org/1999/xhtml', 'p'), 'p', {}),
('endElementNS', ('http://www.w3.org/1999/xhtml', 'p'), 'p'),
('characters', '\n '),
('endElementNS', ('http://www.w3.org/1999/xhtml', 'b'), 'b'),
('endElementNS', ('http://www.w3.org/1999/xhtml', 'a'), 'a'),
('endElementNS', ('http://www.w3.org/1999/xhtml', 'body'), 'body'),
('endElementNS', ('http://www.w3.org/1999/xhtml', 'html'), 'html'),
'endDocument',
]
assert expected == handler.visited

View file

@ -1,205 +0,0 @@
from __future__ import absolute_import, division, unicode_literals
import itertools
import sys
from six import unichr, text_type
import pytest
try:
import lxml.etree
except ImportError:
pass
from .support import treeTypes
from html5lib import html5parser, treewalkers
from html5lib.filters.lint import Filter as Lint
import re
attrlist = re.compile(r"^(\s+)\w+=.*(\n\1\w+=.*)+", re.M)
def sortattrs(x):
lines = x.group(0).split("\n")
lines.sort()
return "\n".join(lines)
def test_all_tokens():
expected = [
{'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'html'},
{'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'head'},
{'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'head'},
{'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'body'},
{'data': 'a', 'type': 'Characters'},
{'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'div'},
{'data': 'b', 'type': 'Characters'},
{'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'div'},
{'data': 'c', 'type': 'Characters'},
{'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'body'},
{'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'html'}
]
for _, treeCls in sorted(treeTypes.items()):
if treeCls is None:
continue
p = html5parser.HTMLParser(tree=treeCls["builder"])
document = p.parse("<html><head></head><body>a<div>b</div>c</body></html>")
document = treeCls.get("adapter", lambda x: x)(document)
output = Lint(treeCls["walker"](document))
for expectedToken, outputToken in zip(expected, output):
assert expectedToken == outputToken
def set_attribute_on_first_child(docfrag, name, value, treeName):
"""naively sets an attribute on the first child of the document
fragment passed in"""
setter = {'ElementTree': lambda d: d[0].set,
'DOM': lambda d: d.firstChild.setAttribute}
setter['cElementTree'] = setter['ElementTree']
try:
setter.get(treeName, setter['DOM'])(docfrag)(name, value)
except AttributeError:
setter['ElementTree'](docfrag)(name, value)
def param_treewalker_six_mix():
"""Str/Unicode mix. If str attrs added to tree"""
# On Python 2.x string literals are of type str. Unless, like this
# file, the programmer imports unicode_literals from __future__.
# In that case, string literals become objects of type unicode.
# This test simulates a Py2 user, modifying attributes on a document
# fragment but not using the u'' syntax nor importing unicode_literals
sm_tests = [
('<a href="http://example.com">Example</a>',
[(str('class'), str('test123'))],
'<a>\n class="test123"\n href="http://example.com"\n "Example"'),
('<link href="http://example.com/cow">',
[(str('rel'), str('alternate'))],
'<link>\n href="http://example.com/cow"\n rel="alternate"\n "Example"')
]
for tree in sorted(treeTypes.items()):
for intext, attrs, expected in sm_tests:
yield intext, expected, attrs, tree
@pytest.mark.parametrize("intext, expected, attrs_to_add, tree", param_treewalker_six_mix())
def test_treewalker_six_mix(intext, expected, attrs_to_add, tree):
"""tests what happens when we add attributes to the intext"""
treeName, treeClass = tree
if treeClass is None:
pytest.skip("Treebuilder not loaded")
parser = html5parser.HTMLParser(tree=treeClass["builder"])
document = parser.parseFragment(intext)
for nom, val in attrs_to_add:
set_attribute_on_first_child(document, nom, val, treeName)
document = treeClass.get("adapter", lambda x: x)(document)
output = treewalkers.pprint(treeClass["walker"](document))
output = attrlist.sub(sortattrs, output)
if output not in expected:
raise AssertionError("TreewalkerEditTest: %s\nExpected:\n%s\nReceived:\n%s" % (treeName, expected, output))
@pytest.mark.parametrize("tree,char", itertools.product(sorted(treeTypes.items()), ["x", "\u1234"]))
def test_fragment_single_char(tree, char):
expected = [
{'data': char, 'type': 'Characters'}
]
treeName, treeClass = tree
if treeClass is None:
pytest.skip("Treebuilder not loaded")
parser = html5parser.HTMLParser(tree=treeClass["builder"])
document = parser.parseFragment(char)
document = treeClass.get("adapter", lambda x: x)(document)
output = Lint(treeClass["walker"](document))
assert list(output) == expected
@pytest.mark.skipif(treeTypes["lxml"] is None, reason="lxml not importable")
def test_lxml_xml():
expected = [
{'data': {}, 'name': 'div', 'namespace': None, 'type': 'StartTag'},
{'data': {}, 'name': 'div', 'namespace': None, 'type': 'StartTag'},
{'name': 'div', 'namespace': None, 'type': 'EndTag'},
{'name': 'div', 'namespace': None, 'type': 'EndTag'}
]
lxmltree = lxml.etree.fromstring('<div><div></div></div>')
walker = treewalkers.getTreeWalker('lxml')
output = Lint(walker(lxmltree))
assert list(output) == expected
@pytest.mark.parametrize("treeName",
[pytest.param(treeName, marks=[getattr(pytest.mark, treeName),
pytest.mark.skipif(
treeName != "lxml" or
sys.version_info < (3, 7), reason="dict order undef")])
for treeName in sorted(treeTypes.keys())])
def test_maintain_attribute_order(treeName):
treeAPIs = treeTypes[treeName]
if treeAPIs is None:
pytest.skip("Treebuilder not loaded")
# generate loads to maximize the chance a hash-based mutation will occur
attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
data = "<span " + " ".join("%s='%s'" % (x, i) for x, i in attrs) + ">"
parser = html5parser.HTMLParser(tree=treeAPIs["builder"])
document = parser.parseFragment(data)
document = treeAPIs.get("adapter", lambda x: x)(document)
output = list(Lint(treeAPIs["walker"](document)))
assert len(output) == 2
assert output[0]['type'] == 'StartTag'
assert output[1]['type'] == "EndTag"
attrs_out = output[0]['data']
assert len(attrs) == len(attrs_out)
for (in_name, in_value), (out_name, out_value) in zip(attrs, attrs_out.items()):
assert (None, in_name) == out_name
assert in_value == out_value
@pytest.mark.parametrize("treeName",
[pytest.param(treeName, marks=[getattr(pytest.mark, treeName),
pytest.mark.skipif(
treeName != "lxml" or
sys.version_info < (3, 7), reason="dict order undef")])
for treeName in sorted(treeTypes.keys())])
def test_maintain_attribute_order_adjusted(treeName):
treeAPIs = treeTypes[treeName]
if treeAPIs is None:
pytest.skip("Treebuilder not loaded")
# generate loads to maximize the chance a hash-based mutation will occur
data = "<svg a=1 refx=2 b=3 xml:lang=4 c=5>"
parser = html5parser.HTMLParser(tree=treeAPIs["builder"])
document = parser.parseFragment(data)
document = treeAPIs.get("adapter", lambda x: x)(document)
output = list(Lint(treeAPIs["walker"](document)))
assert len(output) == 2
assert output[0]['type'] == 'StartTag'
assert output[1]['type'] == "EndTag"
attrs_out = output[0]['data']
assert list(attrs_out.items()) == [((None, 'a'), '1'),
((None, 'refX'), '2'),
((None, 'b'), '3'),
(('http://www.w3.org/XML/1998/namespace', 'lang'), '4'),
((None, 'c'), '5')]

View file

@ -1,125 +0,0 @@
from __future__ import absolute_import, division, unicode_literals
from html5lib.filters.whitespace import Filter
from html5lib.constants import spaceCharacters
spaceCharacters = "".join(spaceCharacters)
def runTest(input, expected):
output = list(Filter(input))
errorMsg = "\n".join(["\n\nInput:", str(input),
"\nExpected:", str(expected),
"\nReceived:", str(output)])
assert expected == output, errorMsg
def runTestUnmodifiedOutput(input):
runTest(input, input)
def testPhrasingElements():
runTestUnmodifiedOutput(
[{"type": "Characters", "data": "This is a "},
{"type": "StartTag", "name": "span", "data": []},
{"type": "Characters", "data": "phrase"},
{"type": "EndTag", "name": "span", "data": []},
{"type": "SpaceCharacters", "data": " "},
{"type": "Characters", "data": "with"},
{"type": "SpaceCharacters", "data": " "},
{"type": "StartTag", "name": "em", "data": []},
{"type": "Characters", "data": "emphasised text"},
{"type": "EndTag", "name": "em", "data": []},
{"type": "Characters", "data": " and an "},
{"type": "StartTag", "name": "img", "data": [["alt", "image"]]},
{"type": "Characters", "data": "."}])
def testLeadingWhitespace():
runTest(
[{"type": "StartTag", "name": "p", "data": []},
{"type": "SpaceCharacters", "data": spaceCharacters},
{"type": "Characters", "data": "foo"},
{"type": "EndTag", "name": "p", "data": []}],
[{"type": "StartTag", "name": "p", "data": []},
{"type": "SpaceCharacters", "data": " "},
{"type": "Characters", "data": "foo"},
{"type": "EndTag", "name": "p", "data": []}])
def testLeadingWhitespaceAsCharacters():
runTest(
[{"type": "StartTag", "name": "p", "data": []},
{"type": "Characters", "data": spaceCharacters + "foo"},
{"type": "EndTag", "name": "p", "data": []}],
[{"type": "StartTag", "name": "p", "data": []},
{"type": "Characters", "data": " foo"},
{"type": "EndTag", "name": "p", "data": []}])
def testTrailingWhitespace():
runTest(
[{"type": "StartTag", "name": "p", "data": []},
{"type": "Characters", "data": "foo"},
{"type": "SpaceCharacters", "data": spaceCharacters},
{"type": "EndTag", "name": "p", "data": []}],
[{"type": "StartTag", "name": "p", "data": []},
{"type": "Characters", "data": "foo"},
{"type": "SpaceCharacters", "data": " "},
{"type": "EndTag", "name": "p", "data": []}])
def testTrailingWhitespaceAsCharacters():
runTest(
[{"type": "StartTag", "name": "p", "data": []},
{"type": "Characters", "data": "foo" + spaceCharacters},
{"type": "EndTag", "name": "p", "data": []}],
[{"type": "StartTag", "name": "p", "data": []},
{"type": "Characters", "data": "foo "},
{"type": "EndTag", "name": "p", "data": []}])
def testWhitespace():
runTest(
[{"type": "StartTag", "name": "p", "data": []},
{"type": "Characters", "data": "foo" + spaceCharacters + "bar"},
{"type": "EndTag", "name": "p", "data": []}],
[{"type": "StartTag", "name": "p", "data": []},
{"type": "Characters", "data": "foo bar"},
{"type": "EndTag", "name": "p", "data": []}])
def testLeadingWhitespaceInPre():
runTestUnmodifiedOutput(
[{"type": "StartTag", "name": "pre", "data": []},
{"type": "SpaceCharacters", "data": spaceCharacters},
{"type": "Characters", "data": "foo"},
{"type": "EndTag", "name": "pre", "data": []}])
def testLeadingWhitespaceAsCharactersInPre():
runTestUnmodifiedOutput(
[{"type": "StartTag", "name": "pre", "data": []},
{"type": "Characters", "data": spaceCharacters + "foo"},
{"type": "EndTag", "name": "pre", "data": []}])
def testTrailingWhitespaceInPre():
runTestUnmodifiedOutput(
[{"type": "StartTag", "name": "pre", "data": []},
{"type": "Characters", "data": "foo"},
{"type": "SpaceCharacters", "data": spaceCharacters},
{"type": "EndTag", "name": "pre", "data": []}])
def testTrailingWhitespaceAsCharactersInPre():
runTestUnmodifiedOutput(
[{"type": "StartTag", "name": "pre", "data": []},
{"type": "Characters", "data": "foo" + spaceCharacters},
{"type": "EndTag", "name": "pre", "data": []}])
def testWhitespaceInPre():
runTestUnmodifiedOutput(
[{"type": "StartTag", "name": "pre", "data": []},
{"type": "Characters", "data": "foo" + spaceCharacters + "bar"},
{"type": "EndTag", "name": "pre", "data": []}])

View file

@ -1,2 +0,0 @@
*.dat -text diff
*.test -text diff

View file

@ -1,34 +0,0 @@
Credits
=======
The ``html5lib`` test data is maintained by:
- James Graham
- Geoffrey Sneddon
Contributors
------------
- Adam Barth
- Andi Sidwell
- Anne van Kesteren
- David Flanagan
- Edward Z. Yang
- Geoffrey Sneddon
- Henri Sivonen
- Ian Hickson
- Jacques Distler
- James Graham
- Lachlan Hunt
- lantis63
- Mark Pilgrim
- Mats Palmgren
- Ms2ger
- Nolan Waite
- Philip Taylor
- Rafael Weinstein
- Ryan King
- Sam Ruby
- Simon Pieters
- Thomas Broyer

View file

@ -1,21 +0,0 @@
Copyright (c) 2006-2013 James Graham, Geoffrey Sneddon, and
other contributors
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View file

@ -1,51 +0,0 @@
老子《道德經》 第一~四十章
老子道經
第一章
道可道,非常道。名可名,非常名。無,名天地之始﹔有,名萬物之母。
故常無,欲以觀其妙;常有,欲以觀其徼。此兩者,同出而異名,同謂之
玄。玄之又玄,眾妙之門。
第二章
天下皆知美之為美,斯惡矣﹔皆知善之為善,斯不善矣。故有無相生,難
易相成,長短相形,高下相傾,音聲相和,前後相隨。是以聖人處「無為
」之事,行「不言」之教。萬物作焉而不辭,生而不有,為而不恃,功成
而弗居。夫唯弗居,是以不去。
第三章
不尚賢,使民不爭﹔不貴難得之貨,使民不為盜﹔不見可欲,使民心不亂
。是以「聖人」之治,虛其心,實其腹,弱其志,強其骨。常使民無知無
欲。使夫智者不敢為也。為「無為」,則無不治。
第四章
「道」沖,而用之或不盈。淵兮,似萬物之宗﹔挫其銳,解其紛,和其光
,同其塵﹔湛兮似或存。吾不知誰之子?象帝之先。
第五章
天地不仁,以萬物為芻狗﹔聖人不仁,以百姓為芻狗。天地之間,其猶橐
蘥乎?虛而不屈,動而愈出。多言數窮,不如守中。
第六章
谷神不死,是謂玄牝。玄牝之門,是謂天地根。綿綿若存,用之不勤。
第七章
天長地久。天地所以能長且久者,以其不自生,故能長久。是以聖人後其
身而身先,外其身而身存。非以其無私邪?故能成其私。
第八章
上善若水。水善利萬物而不爭。處眾人之所惡,故幾於道。居善地,心善
淵,與善仁,言善信,政善治,事善能,動善時。夫唯不爭,故無尤。
第九章
持而盈之,不如其已﹔揣而銳之,不可長保。金玉滿堂,莫之能守﹔富貴
而驕,自遺其咎。功遂身退,天之道。

View file

@ -1,10 +0,0 @@
#data
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=euc-jp">
<!--京-->
<title>Yahoo! JAPAN</title>
<meta name="description" content="日本最大級のポータルサイト。検索、オークション、ニュース、メール、コミュニティ、ショッピング、など80以上のサービスを展開。あなたの生活をより豊かにする「ライフ・エンジン」を目指していきます。">
<style type="text/css" media="all">
#encoding
euc-jp

File diff suppressed because one or more lines are too long

View file

@ -1,115 +0,0 @@
#data
<meta
#encoding
windows-1252
#data
<
#encoding
windows-1252
#data
<!
#encoding
windows-1252
#data
<meta charset = "
#encoding
windows-1252
#data
<meta charset=euc-jp
#encoding
windows-1252
#data
<meta <meta charset='euc-jp'>
#encoding
euc-jp
#data
<meta charset = 'euc-jp'>
#encoding
euc-jp
#data
<!-- -->
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
#encoding
utf-8
#data
<!-- -->
<meta http-equiv="Content-Type" content="text/html; charset=utf
#encoding
windows-1252
#data
<meta http-equiv="Content-Type<meta charset="utf-8">
#encoding
windows-1252
#data
<meta http-equiv="Content-Type" content="text/html; charset='utf-8'">
#encoding
utf-8
#data
<meta http-equiv="Content-Type" content="text/html; charset='utf-8">
#encoding
windows-1252
#data
<meta
#encoding
windows-1252
#data
<meta charset =
#encoding
windows-1252
#data
<meta charset= utf-8
>
#encoding
utf-8
#data
<meta content = "text/html;
#encoding
windows-1252
#data
<meta charset="UTF-16">
#encoding
utf-8
#data
<meta charset="UTF-16LE">
#encoding
utf-8
#data
<meta charset="UTF-16BE">
#encoding
utf-8
#data
<html a=ñ>
<meta charset="utf-8">
#encoding
utf-8
#data
<html ñ>
<meta charset="utf-8">
#encoding
utf-8
#data
<html>ñ
<meta charset="utf-8">
#encoding
utf-8

View file

@ -1,125 +0,0 @@
{"tests": [
{"description": "proper attribute value escaping",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "test \"with\" &quot;"}]]],
"expected": ["<span title='test \"with\" &amp;quot;'>"]
},
{"description": "proper attribute value non-quoting",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo"}]]],
"expected": ["<span title=foo>"],
"xhtml": ["<span title=\"foo\">"]
},
{"description": "proper attribute value non-quoting (with <)",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo<bar"}]]],
"expected": ["<span title=foo<bar>"],
"xhtml": ["<span title=\"foo&lt;bar\">"]
},
{"description": "proper attribute value quoting (with =)",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo=bar"}]]],
"expected": ["<span title=\"foo=bar\">"]
},
{"description": "proper attribute value quoting (with >)",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo>bar"}]]],
"expected": ["<span title=\"foo>bar\">"]
},
{"description": "proper attribute value quoting (with \")",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\"bar"}]]],
"expected": ["<span title='foo\"bar'>"]
},
{"description": "proper attribute value quoting (with ')",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo'bar"}]]],
"expected": ["<span title=\"foo'bar\">"]
},
{"description": "proper attribute value quoting (with both \" and ')",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo'bar\"baz"}]]],
"expected": ["<span title=\"foo'bar&quot;baz\">"]
},
{"description": "proper attribute value quoting (with space)",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo bar"}]]],
"expected": ["<span title=\"foo bar\">"]
},
{"description": "proper attribute value quoting (with tab)",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\tbar"}]]],
"expected": ["<span title=\"foo\tbar\">"]
},
{"description": "proper attribute value quoting (with LF)",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\nbar"}]]],
"expected": ["<span title=\"foo\nbar\">"]
},
{"description": "proper attribute value quoting (with CR)",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\rbar"}]]],
"expected": ["<span title=\"foo\rbar\">"]
},
{"description": "proper attribute value non-quoting (with linetab)",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\u000Bbar"}]]],
"expected": ["<span title=foo\u000Bbar>"],
"xhtml": ["<span title=\"foo\u000Bbar\">"]
},
{"description": "proper attribute value quoting (with form feed)",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\u000Cbar"}]]],
"expected": ["<span title=\"foo\u000Cbar\">"]
},
{"description": "void element (as EmptyTag token)",
"input": [["EmptyTag", "img", {}]],
"expected": ["<img>"],
"xhtml": ["<img />"]
},
{"description": "void element (as StartTag token)",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "img", {}]],
"expected": ["<img>"],
"xhtml": ["<img />"]
},
{"description": "doctype in error",
"input": [["Doctype", "foo"]],
"expected": ["<!DOCTYPE foo>"]
},
{"description": "character data",
"options": {"encoding":"utf-8"},
"input": [["Characters", "a<b>c&d"]],
"expected": ["a&lt;b&gt;c&amp;d"]
},
{"description": "rcdata",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "script", {}], ["Characters", "a<b>c&d"]],
"expected": ["<script>a<b>c&d"],
"xhtml": ["<script>a&lt;b&gt;c&amp;d"]
},
{"description": "doctype",
"input": [["Doctype", "HTML"]],
"expected": ["<!DOCTYPE HTML>"]
},
{"description": "HTML 4.01 DOCTYPE",
"input": [["Doctype", "HTML", "-//W3C//DTD HTML 4.01//EN", "http://www.w3.org/TR/html4/strict.dtd"]],
"expected": ["<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">"]
},
{"description": "HTML 4.01 DOCTYPE without system identifer",
"input": [["Doctype", "HTML", "-//W3C//DTD HTML 4.01//EN"]],
"expected": ["<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\">"]
},
{"description": "IBM DOCTYPE without public identifer",
"input": [["Doctype", "html", "", "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"]],
"expected": ["<!DOCTYPE html SYSTEM \"http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd\">"]
}
]}

View file

@ -1,66 +0,0 @@
{"tests": [
{"description": "no encoding",
"options": {"inject_meta_charset": true},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
"expected": [""],
"xhtml": ["<head></head>"]
},
{"description": "empytag head",
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
"expected": ["<meta charset=utf-8>"],
"xhtml": ["<head><meta charset=\"utf-8\" /></head>"]
},
{"description": "head w/title",
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["StartTag", "http://www.w3.org/1999/xhtml","title",{}], ["Characters", "foo"],["EndTag", "http://www.w3.org/1999/xhtml", "title"], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
"expected": ["<meta charset=utf-8><title>foo</title>"],
"xhtml": ["<head><meta charset=\"utf-8\" /><title>foo</title></head>"]
},
{"description": "head w/meta-charset",
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "charset", "value": "ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
"expected": ["<meta charset=utf-8>"],
"xhtml": ["<head><meta charset=\"utf-8\" /></head>"]
},
{"description": "head w/ two meta-charset",
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "charset", "value": "ascii"}]], ["EmptyTag","meta",[{"namespace": null, "name": "charset", "value": "ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
"expected": ["<meta charset=utf-8><meta charset=utf-8>", "<head><meta charset=utf-8><meta charset=ascii>"],
"xhtml": ["<head><meta charset=\"utf-8\" /><meta charset=\"utf-8\" /></head>", "<head><meta charset=\"utf-8\" /><meta charset=\"ascii\" /></head>"]
},
{"description": "head w/robots",
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "name", "value": "robots"},{"namespace": null, "name": "content", "value": "noindex"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
"expected": ["<meta charset=utf-8><meta content=noindex name=robots>"],
"xhtml": ["<head><meta charset=\"utf-8\" /><meta content=\"noindex\" name=\"robots\" /></head>"]
},
{"description": "head w/robots & charset",
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "name", "value": "robots"},{"namespace": null, "name": "content", "value": "noindex"}]], ["EmptyTag","meta",[{"namespace": null, "name": "charset", "value": "ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
"expected": ["<meta content=noindex name=robots><meta charset=utf-8>"],
"xhtml": ["<head><meta content=\"noindex\" name=\"robots\" /><meta charset=\"utf-8\" /></head>"]
},
{"description": "head w/ charset in http-equiv content-type",
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "http-equiv", "value": "content-type"}, {"namespace": null, "name": "content", "value": "text/html; charset=ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
"expected": ["<meta content=\"text/html; charset=utf-8\" http-equiv=content-type>"],
"xhtml": ["<head><meta content=\"text/html; charset=utf-8\" http-equiv=\"content-type\" /></head>"]
},
{"description": "head w/robots & charset in http-equiv content-type",
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "name", "value": "robots"},{"namespace": null, "name": "content", "value": "noindex"}]], ["EmptyTag","meta",[{"namespace": null, "name": "http-equiv", "value": "content-type"}, {"namespace": null, "name": "content", "value": "text/html; charset=ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
"expected": ["<meta content=noindex name=robots><meta content=\"text/html; charset=utf-8\" http-equiv=content-type>"],
"xhtml": ["<head><meta content=\"noindex\" name=\"robots\" /><meta content=\"text/html; charset=utf-8\" http-equiv=\"content-type\" /></head>"]
}
]}

View file

@ -1,965 +0,0 @@
{"tests": [
{"description": "html start-tag followed by text, with attributes",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", [{"namespace": null, "name": "lang", "value": "en"}]], ["Characters", "foo"]],
"expected": ["<html lang=en>foo"]
},
{"description": "html start-tag followed by comment",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}], ["Comment", "foo"]],
"expected": ["<html><!--foo-->"]
},
{"description": "html start-tag followed by space character",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}], ["Characters", " foo"]],
"expected": ["<html> foo"]
},
{"description": "html start-tag followed by text",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}], ["Characters", "foo"]],
"expected": ["foo"]
},
{"description": "html start-tag followed by start-tag",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["<foo>"]
},
{"description": "html start-tag followed by end-tag",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "html start-tag at EOF (shouldn't ever happen?!)",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}]],
"expected": [""]
},
{"description": "html end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"], ["Comment", "foo"]],
"expected": ["</html><!--foo-->"]
},
{"description": "html end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"], ["Characters", " foo"]],
"expected": ["</html> foo"]
},
{"description": "html end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"], ["Characters", "foo"]],
"expected": ["foo"]
},
{"description": "html end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["<foo>"]
},
{"description": "html end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "html end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"]],
"expected": [""]
},
{"description": "head start-tag followed by comment",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["Comment", "foo"]],
"expected": ["<head><!--foo-->"]
},
{"description": "head start-tag followed by space character",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["Characters", " foo"]],
"expected": ["<head> foo"]
},
{"description": "head start-tag followed by text",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["Characters", "foo"]],
"expected": ["<head>foo"]
},
{"description": "head start-tag followed by start-tag",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["<foo>"]
},
{"description": "head start-tag followed by end-tag (shouldn't ever happen?!)",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["<head></foo>", "</foo>"]
},
{"description": "empty head element",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
"expected": [""]
},
{"description": "head start-tag followed by empty-tag",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag", "foo", {}]],
"expected": ["<foo>"]
},
{"description": "head start-tag at EOF (shouldn't ever happen?!)",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}]],
"expected": ["<head>", ""]
},
{"description": "head end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"], ["Comment", "foo"]],
"expected": ["</head><!--foo-->"]
},
{"description": "head end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"], ["Characters", " foo"]],
"expected": ["</head> foo"]
},
{"description": "head end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"], ["Characters", "foo"]],
"expected": ["foo"]
},
{"description": "head end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["<foo>"]
},
{"description": "head end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "head end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
"expected": [""]
},
{"description": "body start-tag followed by comment",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}], ["Comment", "foo"]],
"expected": ["<body><!--foo-->"]
},
{"description": "body start-tag followed by space character",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}], ["Characters", " foo"]],
"expected": ["<body> foo"]
},
{"description": "body start-tag followed by text",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}], ["Characters", "foo"]],
"expected": ["foo"]
},
{"description": "body start-tag followed by start-tag",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["<foo>"]
},
{"description": "body start-tag followed by end-tag",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "body start-tag at EOF (shouldn't ever happen?!)",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}]],
"expected": [""]
},
{"description": "body end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"], ["Comment", "foo"]],
"expected": ["</body><!--foo-->"]
},
{"description": "body end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"], ["Characters", " foo"]],
"expected": ["</body> foo"]
},
{"description": "body end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"], ["Characters", "foo"]],
"expected": ["foo"]
},
{"description": "body end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["<foo>"]
},
{"description": "body end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "body end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"]],
"expected": [""]
},
{"description": "li end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["Comment", "foo"]],
"expected": ["</li><!--foo-->"]
},
{"description": "li end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["Characters", " foo"]],
"expected": ["</li> foo"]
},
{"description": "li end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["Characters", "foo"]],
"expected": ["</li>foo"]
},
{"description": "li end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["</li><foo>"]
},
{"description": "li end-tag followed by li start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["StartTag", "http://www.w3.org/1999/xhtml", "li", {}]],
"expected": ["<li>"]
},
{"description": "li end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "li end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"]],
"expected": [""]
},
{"description": "dt end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["Comment", "foo"]],
"expected": ["</dt><!--foo-->"]
},
{"description": "dt end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["Characters", " foo"]],
"expected": ["</dt> foo"]
},
{"description": "dt end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["Characters", "foo"]],
"expected": ["</dt>foo"]
},
{"description": "dt end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["</dt><foo>"]
},
{"description": "dt end-tag followed by dt start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["StartTag", "http://www.w3.org/1999/xhtml", "dt", {}]],
"expected": ["<dt>"]
},
{"description": "dt end-tag followed by dd start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["StartTag", "http://www.w3.org/1999/xhtml", "dd", {}]],
"expected": ["<dd>"]
},
{"description": "dt end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</dt></foo>"]
},
{"description": "dt end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"]],
"expected": ["</dt>"]
},
{"description": "dd end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["Comment", "foo"]],
"expected": ["</dd><!--foo-->"]
},
{"description": "dd end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["Characters", " foo"]],
"expected": ["</dd> foo"]
},
{"description": "dd end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["Characters", "foo"]],
"expected": ["</dd>foo"]
},
{"description": "dd end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["</dd><foo>"]
},
{"description": "dd end-tag followed by dd start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["StartTag", "http://www.w3.org/1999/xhtml", "dd", {}]],
"expected": ["<dd>"]
},
{"description": "dd end-tag followed by dt start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["StartTag", "http://www.w3.org/1999/xhtml", "dt", {}]],
"expected": ["<dt>"]
},
{"description": "dd end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "dd end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"]],
"expected": [""]
},
{"description": "p end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["Comment", "foo"]],
"expected": ["</p><!--foo-->"]
},
{"description": "p end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["Characters", " foo"]],
"expected": ["</p> foo"]
},
{"description": "p end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["Characters", "foo"]],
"expected": ["</p>foo"]
},
{"description": "p end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["</p><foo>"]
},
{"description": "p end-tag followed by address start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "address", {}]],
"expected": ["<address>"]
},
{"description": "p end-tag followed by article start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "article", {}]],
"expected": ["<article>"]
},
{"description": "p end-tag followed by aside start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "aside", {}]],
"expected": ["<aside>"]
},
{"description": "p end-tag followed by blockquote start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "blockquote", {}]],
"expected": ["<blockquote>"]
},
{"description": "p end-tag followed by datagrid start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "datagrid", {}]],
"expected": ["<datagrid>"]
},
{"description": "p end-tag followed by dialog start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "dialog", {}]],
"expected": ["<dialog>"]
},
{"description": "p end-tag followed by dir start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "dir", {}]],
"expected": ["<dir>"]
},
{"description": "p end-tag followed by div start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "div", {}]],
"expected": ["<div>"]
},
{"description": "p end-tag followed by dl start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "dl", {}]],
"expected": ["<dl>"]
},
{"description": "p end-tag followed by fieldset start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "fieldset", {}]],
"expected": ["<fieldset>"]
},
{"description": "p end-tag followed by footer start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "footer", {}]],
"expected": ["<footer>"]
},
{"description": "p end-tag followed by form start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "form", {}]],
"expected": ["<form>"]
},
{"description": "p end-tag followed by h1 start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h1", {}]],
"expected": ["<h1>"]
},
{"description": "p end-tag followed by h2 start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h2", {}]],
"expected": ["<h2>"]
},
{"description": "p end-tag followed by h3 start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h3", {}]],
"expected": ["<h3>"]
},
{"description": "p end-tag followed by h4 start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h4", {}]],
"expected": ["<h4>"]
},
{"description": "p end-tag followed by h5 start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h5", {}]],
"expected": ["<h5>"]
},
{"description": "p end-tag followed by h6 start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h6", {}]],
"expected": ["<h6>"]
},
{"description": "p end-tag followed by header start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "header", {}]],
"expected": ["<header>"]
},
{"description": "p end-tag followed by hr empty-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["EmptyTag", "hr", {}]],
"expected": ["<hr>"]
},
{"description": "p end-tag followed by menu start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "menu", {}]],
"expected": ["<menu>"]
},
{"description": "p end-tag followed by nav start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "nav", {}]],
"expected": ["<nav>"]
},
{"description": "p end-tag followed by ol start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "ol", {}]],
"expected": ["<ol>"]
},
{"description": "p end-tag followed by p start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "p", {}]],
"expected": ["<p>"]
},
{"description": "p end-tag followed by pre start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "pre", {}]],
"expected": ["<pre>"]
},
{"description": "p end-tag followed by section start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "section", {}]],
"expected": ["<section>"]
},
{"description": "p end-tag followed by table start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "table", {}]],
"expected": ["<table>"]
},
{"description": "p end-tag followed by ul start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "ul", {}]],
"expected": ["<ul>"]
},
{"description": "p end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "p end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"]],
"expected": [""]
},
{"description": "optgroup end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["Comment", "foo"]],
"expected": ["</optgroup><!--foo-->"]
},
{"description": "optgroup end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["Characters", " foo"]],
"expected": ["</optgroup> foo"]
},
{"description": "optgroup end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["Characters", "foo"]],
"expected": ["</optgroup>foo"]
},
{"description": "optgroup end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["</optgroup><foo>"]
},
{"description": "optgroup end-tag followed by optgroup start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["StartTag", "http://www.w3.org/1999/xhtml", "optgroup", {}]],
"expected": ["<optgroup>"]
},
{"description": "optgroup end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "optgroup end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"]],
"expected": [""]
},
{"description": "option end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["Comment", "foo"]],
"expected": ["</option><!--foo-->"]
},
{"description": "option end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["Characters", " foo"]],
"expected": ["</option> foo"]
},
{"description": "option end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["Characters", "foo"]],
"expected": ["</option>foo"]
},
{"description": "option end-tag followed by optgroup start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["StartTag", "http://www.w3.org/1999/xhtml", "optgroup", {}]],
"expected": ["<optgroup>"]
},
{"description": "option end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["</option><foo>"]
},
{"description": "option end-tag followed by option start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["StartTag", "http://www.w3.org/1999/xhtml", "option", {}]],
"expected": ["<option>"]
},
{"description": "option end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "option end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"]],
"expected": [""]
},
{"description": "colgroup start-tag followed by comment",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["Comment", "foo"]],
"expected": ["<colgroup><!--foo-->"]
},
{"description": "colgroup start-tag followed by space character",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["Characters", " foo"]],
"expected": ["<colgroup> foo"]
},
{"description": "colgroup start-tag followed by text",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["Characters", "foo"]],
"expected": ["<colgroup>foo"]
},
{"description": "colgroup start-tag followed by start-tag",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["<colgroup><foo>"]
},
{"description": "first colgroup in a table with a col child",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "table", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["EmptyTag", "col", {}]],
"expected": ["<table><col>"]
},
{"description": "colgroup with a col child, following another colgroup",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "col", {}]],
"expected": ["</colgroup><col>", "<colgroup><col>"]
},
{"description": "colgroup start-tag followed by end-tag",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["<colgroup></foo>"]
},
{"description": "colgroup start-tag at EOF",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}]],
"expected": ["<colgroup>"]
},
{"description": "colgroup end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["Comment", "foo"]],
"expected": ["</colgroup><!--foo-->"]
},
{"description": "colgroup end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["Characters", " foo"]],
"expected": ["</colgroup> foo"]
},
{"description": "colgroup end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["Characters", "foo"]],
"expected": ["foo"]
},
{"description": "colgroup end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["<foo>"]
},
{"description": "colgroup end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "colgroup end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"]],
"expected": [""]
},
{"description": "thead end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["Comment", "foo"]],
"expected": ["</thead><!--foo-->"]
},
{"description": "thead end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["Characters", " foo"]],
"expected": ["</thead> foo"]
},
{"description": "thead end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["Characters", "foo"]],
"expected": ["</thead>foo"]
},
{"description": "thead end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["</thead><foo>"]
},
{"description": "thead end-tag followed by tbody start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}]],
"expected": ["<tbody>"]
},
{"description": "thead end-tag followed by tfoot start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["StartTag", "http://www.w3.org/1999/xhtml", "tfoot", {}]],
"expected": ["<tfoot>"]
},
{"description": "thead end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</thead></foo>"]
},
{"description": "thead end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"]],
"expected": ["</thead>"]
},
{"description": "tbody start-tag followed by comment",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["Comment", "foo"]],
"expected": ["<tbody><!--foo-->"]
},
{"description": "tbody start-tag followed by space character",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["Characters", " foo"]],
"expected": ["<tbody> foo"]
},
{"description": "tbody start-tag followed by text",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["Characters", "foo"]],
"expected": ["<tbody>foo"]
},
{"description": "tbody start-tag followed by start-tag",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["<tbody><foo>"]
},
{"description": "first tbody in a table with a tr child",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "table", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "tr", {}]],
"expected": ["<table><tr>"]
},
{"description": "tbody with a tr child, following another tbody",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "tr", {}]],
"expected": ["<tbody><tr>", "</tbody><tr>"]
},
{"description": "tbody with a tr child, following a thead",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "tr", {}]],
"expected": ["<tbody><tr>", "</thead><tr>"]
},
{"description": "tbody with a tr child, following a tfoot",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "tr", {}]],
"expected": ["<tbody><tr>", "</tfoot><tr>"]
},
{"description": "tbody start-tag followed by end-tag",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["<tbody></foo>"]
},
{"description": "tbody start-tag at EOF",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}]],
"expected": ["<tbody>"]
},
{"description": "tbody end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["Comment", "foo"]],
"expected": ["</tbody><!--foo-->"]
},
{"description": "tbody end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["Characters", " foo"]],
"expected": ["</tbody> foo"]
},
{"description": "tbody end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["Characters", "foo"]],
"expected": ["</tbody>foo"]
},
{"description": "tbody end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["</tbody><foo>"]
},
{"description": "tbody end-tag followed by tbody start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}]],
"expected": ["<tbody>", "</tbody>"]
},
{"description": "tbody end-tag followed by tfoot start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["StartTag", "http://www.w3.org/1999/xhtml", "tfoot", {}]],
"expected": ["<tfoot>"]
},
{"description": "tbody end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "tbody end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"]],
"expected": [""]
},
{"description": "tfoot end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["Comment", "foo"]],
"expected": ["</tfoot><!--foo-->"]
},
{"description": "tfoot end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["Characters", " foo"]],
"expected": ["</tfoot> foo"]
},
{"description": "tfoot end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["Characters", "foo"]],
"expected": ["</tfoot>foo"]
},
{"description": "tfoot end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["</tfoot><foo>"]
},
{"description": "tfoot end-tag followed by tbody start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}]],
"expected": ["<tbody>", "</tfoot>"]
},
{"description": "tfoot end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "tfoot end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"]],
"expected": [""]
},
{"description": "tr end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["Comment", "foo"]],
"expected": ["</tr><!--foo-->"]
},
{"description": "tr end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["Characters", " foo"]],
"expected": ["</tr> foo"]
},
{"description": "tr end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["Characters", "foo"]],
"expected": ["</tr>foo"]
},
{"description": "tr end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["</tr><foo>"]
},
{"description": "tr end-tag followed by tr start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["StartTag", "http://www.w3.org/1999/xhtml", "tr", {}]],
"expected": ["<tr>", "</tr>"]
},
{"description": "tr end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "tr end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"]],
"expected": [""]
},
{"description": "td end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["Comment", "foo"]],
"expected": ["</td><!--foo-->"]
},
{"description": "td end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["Characters", " foo"]],
"expected": ["</td> foo"]
},
{"description": "td end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["Characters", "foo"]],
"expected": ["</td>foo"]
},
{"description": "td end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["</td><foo>"]
},
{"description": "td end-tag followed by td start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["StartTag", "http://www.w3.org/1999/xhtml", "td", {}]],
"expected": ["<td>", "</td>"]
},
{"description": "td end-tag followed by th start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["StartTag", "http://www.w3.org/1999/xhtml", "th", {}]],
"expected": ["<th>", "</td>"]
},
{"description": "td end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "td end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"]],
"expected": [""]
},
{"description": "th end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["Comment", "foo"]],
"expected": ["</th><!--foo-->"]
},
{"description": "th end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["Characters", " foo"]],
"expected": ["</th> foo"]
},
{"description": "th end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["Characters", "foo"]],
"expected": ["</th>foo"]
},
{"description": "th end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["</th><foo>"]
},
{"description": "th end-tag followed by th start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["StartTag", "http://www.w3.org/1999/xhtml", "th", {}]],
"expected": ["<th>", "</th>"]
},
{"description": "th end-tag followed by td start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["StartTag", "http://www.w3.org/1999/xhtml", "td", {}]],
"expected": ["<td>", "</th>"]
},
{"description": "th end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "th end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml" , "th"]],
"expected": [""]
}
]}

View file

@ -1,60 +0,0 @@
{"tests":[
{"description": "quote_char=\"'\"",
"options": {"quote_char": "'"},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "test 'with' quote_char"}]]],
"expected": ["<span title='test &#39;with&#39; quote_char'>"]
},
{"description": "quote_attr_values=true",
"options": {"quote_attr_values": true},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "button", [{"namespace": null, "name": "disabled", "value" :"disabled"}]]],
"expected": ["<button disabled>"],
"xhtml": ["<button disabled=\"disabled\">"]
},
{"description": "quote_attr_values=true with irrelevant",
"options": {"quote_attr_values": true},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", [{"namespace": null, "name": "irrelevant", "value" :"irrelevant"}]]],
"expected": ["<div irrelevant>"],
"xhtml": ["<div irrelevant=\"irrelevant\">"]
},
{"description": "use_trailing_solidus=true with void element",
"options": {"use_trailing_solidus": true},
"input": [["EmptyTag", "img", {}]],
"expected": ["<img />"]
},
{"description": "use_trailing_solidus=true with non-void element",
"options": {"use_trailing_solidus": true},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", {}]],
"expected": ["<div>"]
},
{"description": "minimize_boolean_attributes=false",
"options": {"minimize_boolean_attributes": false},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", [{"namespace": null, "name": "irrelevant", "value" :"irrelevant"}]]],
"expected": ["<div irrelevant=irrelevant>"],
"xhtml": ["<div irrelevant=\"irrelevant\">"]
},
{"description": "minimize_boolean_attributes=false with empty value",
"options": {"minimize_boolean_attributes": false},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", [{"namespace": null, "name": "irrelevant", "value" :""}]]],
"expected": ["<div irrelevant=\"\">"]
},
{"description": "escape less than signs in attribute values",
"options": {"escape_lt_in_attrs": true},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "a", [{"namespace": null, "name": "title", "value": "a<b>c&d"}]]],
"expected": ["<a title=\"a&lt;b>c&amp;d\">"]
},
{"description": "rcdata",
"options": {"escape_rcdata": true},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "script", {}], ["Characters", "a<b>c&d"]],
"expected": ["<script>a&lt;b&gt;c&amp;d"]
}
]}

View file

@ -1,51 +0,0 @@
{"tests": [
{"description": "bare text with leading spaces",
"options": {"strip_whitespace": true},
"input": [["Characters", "\t\r\n\u000C foo"]],
"expected": [" foo"]
},
{"description": "bare text with trailing spaces",
"options": {"strip_whitespace": true},
"input": [["Characters", "foo \t\r\n\u000C"]],
"expected": ["foo "]
},
{"description": "bare text with inner spaces",
"options": {"strip_whitespace": true},
"input": [["Characters", "foo \t\r\n\u000C bar"]],
"expected": ["foo bar"]
},
{"description": "text within <pre>",
"options": {"strip_whitespace": true},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "pre", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "pre"]],
"expected": ["<pre>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</pre>"]
},
{"description": "text within <pre>, with inner markup",
"options": {"strip_whitespace": true},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "pre", {}], ["Characters", "\t\r\n\u000C fo"], ["StartTag", "http://www.w3.org/1999/xhtml", "span", {}], ["Characters", "o \t\r\n\u000C b"], ["EndTag", "http://www.w3.org/1999/xhtml", "span"], ["Characters", "ar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "pre"]],
"expected": ["<pre>\t\r\n\u000C fo<span>o \t\r\n\u000C b</span>ar \t\r\n\u000C</pre>"]
},
{"description": "text within <textarea>",
"options": {"strip_whitespace": true},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "textarea", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "textarea"]],
"expected": ["<textarea>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</textarea>"]
},
{"description": "text within <script>",
"options": {"strip_whitespace": true},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "script", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "script"]],
"expected": ["<script>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</script>"]
},
{"description": "text within <style>",
"options": {"strip_whitespace": true},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "style"]],
"expected": ["<style>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</style>"]
}
]}

View file

@ -1,107 +0,0 @@
Tokenizer tests
===============
The test format is [JSON](http://www.json.org/). This has the advantage
that the syntax allows backward-compatible extensions to the tests and
the disadvantage that it is relatively verbose.
Basic Structure
---------------
{"tests": [
    {"description": "Test description",
    "input": "input_string",
    "output": [expected_output_tokens],
    "initialStates": [initial_states],
    "lastStartTag": last_start_tag,
"errors": [parse_errors]
    }
]}
Multiple tests per file are allowed simply by adding more objects to the
"tests" list.
Each parse error is an object that contains error `code` and one-based
error location indices: `line` and `col`.
`description`, `input` and `output` are always present. The other values
are optional.
### Test set-up
`test.input` is a string containing the characters to pass to the
tokenizer. Specifically, it represents the characters of the **input
stream**, and so implementations are expected to perform the processing
described in the spec's **Preprocessing the input stream** section
before feeding the result to the tokenizer.
If `test.doubleEscaped` is present and `true`, then `test.input` is not
quite as described above. Instead, it must first be subjected to another
round of unescaping (i.e., in addition to any unescaping involved in the
JSON import), and the result of *that* represents the characters of the
input stream. Currently, the only unescaping required by this option is
to convert each sequence of the form \\uHHHH (where H is a hex digit)
into the corresponding Unicode code point. (Note that this option also
affects the interpretation of `test.output`.)
`test.initialStates` is a list of strings, each being the name of a
tokenizer state which can be one of the following:
- `Data state`
- `PLAINTEXT state`
- `RCDATA state`
- `RAWTEXT state`
- `Script data state`
- `CDATA section state`
The test should be run once for each string, using it
to set the tokenizer's initial state for that run. If
`test.initialStates` is omitted, it defaults to `["Data state"]`.
`test.lastStartTag` is a lowercase string that should be used as "the
tag name of the last start tag to have been emitted from this
tokenizer", referenced in the spec's definition of **appropriate end tag
token**. If it is omitted, it is treated as if "no start tag has been
emitted from this tokenizer".
### Test results
`test.output` is a list of tokens, ordered with the first produced by
the tokenizer the first (leftmost) in the list. The list must mach the
**complete** list of tokens that the tokenizer should produce. Valid
tokens are:
["DOCTYPE", name, public_id, system_id, correctness]
["StartTag", name, {attributes}*, true*]
["StartTag", name, {attributes}]
["EndTag", name]
["Comment", data]
["Character", data]
`public_id` and `system_id` are either strings or `null`. `correctness`
is either `true` or `false`; `true` corresponds to the force-quirks flag
being false, and vice-versa.
When the self-closing flag is set, the `StartTag` array has `true` as
its fourth entry. When the flag is not set, the array has only three
entries for backwards compatibility.
All adjacent character tokens are coalesced into a single
`["Character", data]` token.
If `test.doubleEscaped` is present and `true`, then every string within
`test.output` must be further unescaped (as described above) before
comparing with the tokenizer's output.
xmlViolation tests
------------------
`tokenizer/xmlViolation.test` differs from the above in a couple of
ways:
- The name of the single member of the top-level JSON object is
"xmlViolationTests" instead of "tests".
- Each test's expected output assumes that implementation is applying
the tweaks given in the spec's "Coercing an HTML DOM into an
infoset" section.

View file

@ -1,93 +0,0 @@
{"tests": [
{"description":"PLAINTEXT content model flag",
"initialStates":["PLAINTEXT state"],
"lastStartTag":"plaintext",
"input":"<head>&body;",
"output":[["Character", "<head>&body;"]]},
{"description":"PLAINTEXT with seeming close tag",
"initialStates":["PLAINTEXT state"],
"lastStartTag":"plaintext",
"input":"</plaintext>&body;",
"output":[["Character", "</plaintext>&body;"]]},
{"description":"End tag closing RCDATA or RAWTEXT",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"foo</xmp>",
"output":[["Character", "foo"], ["EndTag", "xmp"]]},
{"description":"End tag closing RCDATA or RAWTEXT (case-insensitivity)",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"foo</xMp>",
"output":[["Character", "foo"], ["EndTag", "xmp"]]},
{"description":"End tag closing RCDATA or RAWTEXT (ending with space)",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"foo</xmp ",
"output":[["Character", "foo"]],
"errors":[
{ "code": "eof-in-tag", "line": 1, "col": 10 }
]},
{"description":"End tag closing RCDATA or RAWTEXT (ending with EOF)",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"foo</xmp",
"output":[["Character", "foo</xmp"]]},
{"description":"End tag closing RCDATA or RAWTEXT (ending with slash)",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"foo</xmp/",
"output":[["Character", "foo"]],
"errors":[
{ "code": "eof-in-tag", "line": 1, "col": 10 }
]},
{"description":"End tag not closing RCDATA or RAWTEXT (ending with left-angle-bracket)",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"foo</xmp<",
"output":[["Character", "foo</xmp<"]]},
{"description":"End tag with incorrect name in RCDATA or RAWTEXT",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"</foo>bar</xmp>",
"output":[["Character", "</foo>bar"], ["EndTag", "xmp"]]},
{"description":"Partial end tags leading straight into partial end tags",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"</xmp</xmp</xmp>",
"output":[["Character", "</xmp</xmp"], ["EndTag", "xmp"]]},
{"description":"End tag with incorrect name in RCDATA or RAWTEXT (starting like correct name)",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"</foo>bar</xmpaar>",
"output":[["Character", "</foo>bar</xmpaar>"]]},
{"description":"End tag closing RCDATA or RAWTEXT, switching back to PCDATA",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"foo</xmp></baz>",
"output":[["Character", "foo"], ["EndTag", "xmp"], ["EndTag", "baz"]]},
{"description":"RAWTEXT w/ something looking like an entity",
"initialStates":["RAWTEXT state"],
"lastStartTag":"xmp",
"input":"&foo;",
"output":[["Character", "&foo;"]]},
{"description":"RCDATA w/ an entity",
"initialStates":["RCDATA state"],
"lastStartTag":"textarea",
"input":"&lt;",
"output":[["Character", "<"]]}
]}

View file

@ -1,330 +0,0 @@
{
"tests": [
{
"description":"CR in bogus comment state",
"input":"<?\u000d",
"output":[["Comment", "?\u000a"]],
"errors":[
{ "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
]
},
{
"description":"CRLF in bogus comment state",
"input":"<?\u000d\u000a",
"output":[["Comment", "?\u000a"]],
"errors":[
{ "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
]
},
{
"description":"CRLFLF in bogus comment state",
"input":"<?\u000d\u000a\u000a",
"output":[["Comment", "?\u000a\u000a"]],
"errors":[
{ "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
]
},
{
"description":"Raw NUL replacement",
"doubleEscaped":true,
"initialStates":["RCDATA state", "RAWTEXT state", "PLAINTEXT state", "Script data state"],
"input":"\\u0000",
"output":[["Character", "\\uFFFD"]],
"errors":[
{ "code": "unexpected-null-character", "line": 1, "col": 1 }
]
},
{
"description":"NUL in CDATA section",
"doubleEscaped":true,
"initialStates":["CDATA section state"],
"input":"\\u0000]]>",
"output":[["Character", "\\u0000"]]
},
{
"description":"NUL in script HTML comment",
"doubleEscaped":true,
"initialStates":["Script data state"],
"input":"<!--test\\u0000--><!--test-\\u0000--><!--test--\\u0000-->",
"output":[["Character", "<!--test\\uFFFD--><!--test-\\uFFFD--><!--test--\\uFFFD-->"]],
"errors":[
{ "code": "unexpected-null-character", "line": 1, "col": 9 },
{ "code": "unexpected-null-character", "line": 1, "col": 22 },
{ "code": "unexpected-null-character", "line": 1, "col": 36 }
]
},
{
"description":"NUL in script HTML comment - double escaped",
"doubleEscaped":true,
"initialStates":["Script data state"],
"input":"<!--<script>\\u0000--><!--<script>-\\u0000--><!--<script>--\\u0000-->",
"output":[["Character", "<!--<script>\\uFFFD--><!--<script>-\\uFFFD--><!--<script>--\\uFFFD-->"]],
"errors":[
{ "code": "unexpected-null-character", "line": 1, "col": 13 },
{ "code": "unexpected-null-character", "line": 1, "col": 30 },
{ "code": "unexpected-null-character", "line": 1, "col": 48 }
]
},
{
"description":"EOF in script HTML comment",
"initialStates":["Script data state"],
"input":"<!--test",
"output":[["Character", "<!--test"]],
"errors":[
{ "code": "eof-in-script-html-comment-like-text", "line": 1, "col": 9 }
]
},
{
"description":"EOF in script HTML comment after dash",
"initialStates":["Script data state"],
"input":"<!--test-",
"output":[["Character", "<!--test-"]],
"errors":[
{ "code": "eof-in-script-html-comment-like-text", "line": 1, "col": 10 }
]
},
{
"description":"EOF in script HTML comment after dash dash",
"initialStates":["Script data state"],
"input":"<!--test--",
"output":[["Character", "<!--test--"]],
"errors":[
{ "code": "eof-in-script-html-comment-like-text", "line": 1, "col": 11 }
]
},
{
"description":"EOF in script HTML comment double escaped after dash",
"initialStates":["Script data state"],
"input":"<!--<script>-",
"output":[["Character", "<!--<script>-"]],
"errors":[
{ "code": "eof-in-script-html-comment-like-text", "line": 1, "col": 14 }
]
},
{
"description":"EOF in script HTML comment double escaped after dash dash",
"initialStates":["Script data state"],
"input":"<!--<script>--",
"output":[["Character", "<!--<script>--"]],
"errors":[
{ "code": "eof-in-script-html-comment-like-text", "line": 1, "col": 15 }
]
},
{
"description":"EOF in script HTML comment - double escaped",
"initialStates":["Script data state"],
"input":"<!--<script>",
"output":[["Character", "<!--<script>"]],
"errors":[
{ "code": "eof-in-script-html-comment-like-text", "line": 1, "col": 13 }
]
},
{
"description":"Dash in script HTML comment",
"initialStates":["Script data state"],
"input":"<!-- - -->",
"output":[["Character", "<!-- - -->"]]
},
{
"description":"Dash less-than in script HTML comment",
"initialStates":["Script data state"],
"input":"<!-- -< -->",
"output":[["Character", "<!-- -< -->"]]
},
{
"description":"Dash at end of script HTML comment",
"initialStates":["Script data state"],
"input":"<!--test--->",
"output":[["Character", "<!--test--->"]]
},
{
"description":"</script> in script HTML comment",
"initialStates":["Script data state"],
"lastStartTag":"script",
"input":"<!-- </script> --></script>",
"output":[["Character", "<!-- "], ["EndTag", "script"], ["Character", " -->"], ["EndTag", "script"]]
},
{
"description":"</script> in script HTML comment - double escaped",
"initialStates":["Script data state"],
"lastStartTag":"script",
"input":"<!-- <script></script> --></script>",
"output":[["Character", "<!-- <script></script> -->"], ["EndTag", "script"]]
},
{
"description":"</script> in script HTML comment - double escaped with nested <script>",
"initialStates":["Script data state"],
"lastStartTag":"script",
"input":"<!-- <script><script></script></script> --></script>",
"output":[["Character", "<!-- <script><script></script>"], ["EndTag", "script"], ["Character", " -->"], ["EndTag", "script"]]
},
{
"description":"</script> in script HTML comment - double escaped with abrupt end",
"initialStates":["Script data state"],
"lastStartTag":"script",
"input":"<!-- <script>--></script> --></script>",
"output":[["Character", "<!-- <script>-->"], ["EndTag", "script"], ["Character", " -->"], ["EndTag", "script"]]
},
{
"description":"Incomplete start tag in script HTML comment double escaped",
"initialStates":["Script data state"],
"lastStartTag":"script",
"input":"<!--<scrip></script>-->",
"output":[["Character", "<!--<scrip>"], ["EndTag", "script"], ["Character", "-->"]]
},
{
"description":"Unclosed start tag in script HTML comment double escaped",
"initialStates":["Script data state"],
"lastStartTag":"script",
"input":"<!--<script</script>-->",
"output":[["Character", "<!--<script"], ["EndTag", "script"], ["Character", "-->"]]
},
{
"description":"Incomplete end tag in script HTML comment double escaped",
"initialStates":["Script data state"],
"lastStartTag":"script",
"input":"<!--<script></scrip>-->",
"output":[["Character", "<!--<script></scrip>-->"]]
},
{
"description":"Unclosed end tag in script HTML comment double escaped",
"initialStates":["Script data state"],
"lastStartTag":"script",
"input":"<!--<script></script-->",
"output":[["Character", "<!--<script></script-->"]]
},
{
"description":"leading U+FEFF must pass through",
"initialStates":["Data state", "RCDATA state", "RAWTEXT state", "Script data state"],
"doubleEscaped":true,
"input":"\\uFEFFfoo\\uFEFFbar",
"output":[["Character", "\\uFEFFfoo\\uFEFFbar"]]
},
{
"description":"Non BMP-charref in RCDATA",
"initialStates":["RCDATA state"],
"input":"&NotEqualTilde;",
"output":[["Character", "\u2242\u0338"]]
},
{
"description":"Bad charref in RCDATA",
"initialStates":["RCDATA state"],
"input":"&NotEqualTild;",
"output":[["Character", "&NotEqualTild;"]],
"errors":[
{ "code": "unknown-named-character-reference", "line": 1, "col": 14 }
]
},
{
"description":"lowercase endtags",
"initialStates":["RCDATA state", "RAWTEXT state", "Script data state"],
"lastStartTag":"xmp",
"input":"</XMP>",
"output":[["EndTag","xmp"]]
},
{
"description":"bad endtag (space before name)",
"initialStates":["RCDATA state", "RAWTEXT state", "Script data state"],
"lastStartTag":"xmp",
"input":"</ XMP>",
"output":[["Character","</ XMP>"]]
},
{
"description":"bad endtag (not matching last start tag)",
"initialStates":["RCDATA state", "RAWTEXT state", "Script data state"],
"lastStartTag":"xmp",
"input":"</xm>",
"output":[["Character","</xm>"]]
},
{
"description":"bad endtag (without close bracket)",
"initialStates":["RCDATA state", "RAWTEXT state", "Script data state"],
"lastStartTag":"xmp",
"input":"</xm ",
"output":[["Character","</xm "]]
},
{
"description":"bad endtag (trailing solidus)",
"initialStates":["RCDATA state", "RAWTEXT state", "Script data state"],
"lastStartTag":"xmp",
"input":"</xm/",
"output":[["Character","</xm/"]]
},
{
"description":"Non BMP-charref in attribute",
"input":"<p id=\"&NotEqualTilde;\">",
"output":[["StartTag", "p", {"id":"\u2242\u0338"}]]
},
{
"description":"--!NUL in comment ",
"doubleEscaped":true,
"input":"<!----!\\u0000-->",
"output":[["Comment", "--!\\uFFFD"]],
"errors":[
{ "code": "unexpected-null-character", "line": 1, "col": 8 }
]
},
{
"description":"space EOF after doctype ",
"input":"<!DOCTYPE html ",
"output":[["DOCTYPE", "html", null, null , false]],
"errors":[
{ "code": "eof-in-doctype", "line": 1, "col": 16 }
]
},
{
"description":"CDATA in HTML content",
"input":"<![CDATA[foo]]>",
"output":[["Comment", "[CDATA[foo]]"]],
"errors":[
{ "code": "cdata-in-html-content", "line": 1, "col": 9 }
]
},
{
"description":"CDATA content",
"input":"foo&#32;]]>",
"initialStates":["CDATA section state"],
"output":[["Character", "foo&#32;"]]
},
{
"description":"CDATA followed by HTML content",
"input":"foo&#32;]]>&#32;",
"initialStates":["CDATA section state"],
"output":[["Character", "foo&#32; "]]
},
{
"description":"CDATA with extra bracket",
"input":"foo]]]>",
"initialStates":["CDATA section state"],
"output":[["Character", "foo]"]]
},
{
"description":"CDATA without end marker",
"input":"foo",
"initialStates":["CDATA section state"],
"output":[["Character", "foo"]],
"errors":[
{ "code": "eof-in-cdata", "line": 1, "col": 4 }
]
},
{
"description":"CDATA with single bracket ending",
"input":"foo]",
"initialStates":["CDATA section state"],
"output":[["Character", "foo]"]],
"errors":[
{ "code": "eof-in-cdata", "line": 1, "col": 5 }
]
},
{
"description":"CDATA with two brackets ending",
"input":"foo]]",
"initialStates":["CDATA section state"],
"output":[["Character", "foo]]"]],
"errors":[
{ "code": "eof-in-cdata", "line": 1, "col": 6 }
]
}
]
}

View file

@ -1,542 +0,0 @@
{"tests": [
{"description": "Undefined named entity in a double-quoted attribute value ending in semicolon and whose name starts with a known entity name.",
"input":"<h a=\"&noti;\">",
"output": [["StartTag", "h", {"a": "&noti;"}]]},
{"description": "Entity name requiring semicolon instead followed by the equals sign in a double-quoted attribute value.",
"input":"<h a=\"&lang=\">",
"output": [["StartTag", "h", {"a": "&lang="}]]},
{"description": "Valid entity name followed by the equals sign in a double-quoted attribute value.",
"input":"<h a=\"&not=\">",
"output": [["StartTag", "h", {"a": "&not="}]]},
{"description": "Undefined named entity in a single-quoted attribute value ending in semicolon and whose name starts with a known entity name.",
"input":"<h a='&noti;'>",
"output": [["StartTag", "h", {"a": "&noti;"}]]},
{"description": "Entity name requiring semicolon instead followed by the equals sign in a single-quoted attribute value.",
"input":"<h a='&lang='>",
"output": [["StartTag", "h", {"a": "&lang="}]]},
{"description": "Valid entity name followed by the equals sign in a single-quoted attribute value.",
"input":"<h a='&not='>",
"output": [["StartTag", "h", {"a": "&not="}]]},
{"description": "Undefined named entity in an unquoted attribute value ending in semicolon and whose name starts with a known entity name.",
"input":"<h a=&noti;>",
"output": [["StartTag", "h", {"a": "&noti;"}]]},
{"description": "Entity name requiring semicolon instead followed by the equals sign in an unquoted attribute value.",
"input":"<h a=&lang=>",
"output": [["StartTag", "h", {"a": "&lang="}]],
"errors":[
{ "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 11 }
]},
{"description": "Valid entity name followed by the equals sign in an unquoted attribute value.",
"input":"<h a=&not=>",
"output": [["StartTag", "h", {"a": "&not="}]],
"errors":[
{ "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 10 }
]},
{"description": "Ambiguous ampersand.",
"input":"&rrrraannddom;",
"output": [["Character", "&rrrraannddom;"]],
"errors":[
{ "code": "unknown-named-character-reference", "line": 1, "col": 14 }
]},
{"description": "Semicolonless named entity 'not' followed by 'i;' in body",
"input":"&noti;",
"output": [["Character", "\u00ACi;"]],
"errors":[
{ "code": "missing-semicolon-after-character-reference", "line": 1, "col": 5 }
]},
{"description": "Very long undefined named entity in body",
"input":"&ammmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmp;",
"output": [["Character", "&ammmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmp;"]],
"errors":[
{ "code": "unknown-named-character-reference", "line": 1, "col": 950 }
]},
{"description": "CR as numeric entity",
"input":"&#013;",
"output": [["Character", "\r"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 7 }
]},
{"description": "CR as hexadecimal numeric entity",
"input":"&#x00D;",
"output": [["Character", "\r"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 EURO SIGN numeric entity.",
"input":"&#0128;",
"output": [["Character", "\u20AC"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
"input":"&#0129;",
"output": [["Character", "\u0081"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 SINGLE LOW-9 QUOTATION MARK numeric entity.",
"input":"&#0130;",
"output": [["Character", "\u201A"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 LATIN SMALL LETTER F WITH HOOK numeric entity.",
"input":"&#0131;",
"output": [["Character", "\u0192"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 DOUBLE LOW-9 QUOTATION MARK numeric entity.",
"input":"&#0132;",
"output": [["Character", "\u201E"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 HORIZONTAL ELLIPSIS numeric entity.",
"input":"&#0133;",
"output": [["Character", "\u2026"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 DAGGER numeric entity.",
"input":"&#0134;",
"output": [["Character", "\u2020"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 DOUBLE DAGGER numeric entity.",
"input":"&#0135;",
"output": [["Character", "\u2021"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 MODIFIER LETTER CIRCUMFLEX ACCENT numeric entity.",
"input":"&#0136;",
"output": [["Character", "\u02C6"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 PER MILLE SIGN numeric entity.",
"input":"&#0137;",
"output": [["Character", "\u2030"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 LATIN CAPITAL LETTER S WITH CARON numeric entity.",
"input":"&#0138;",
"output": [["Character", "\u0160"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 SINGLE LEFT-POINTING ANGLE QUOTATION MARK numeric entity.",
"input":"&#0139;",
"output": [["Character", "\u2039"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 LATIN CAPITAL LIGATURE OE numeric entity.",
"input":"&#0140;",
"output": [["Character", "\u0152"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
"input":"&#0141;",
"output": [["Character", "\u008D"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 LATIN CAPITAL LETTER Z WITH CARON numeric entity.",
"input":"&#0142;",
"output": [["Character", "\u017D"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
"input":"&#0143;",
"output": [["Character", "\u008F"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
"input":"&#0144;",
"output": [["Character", "\u0090"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 LEFT SINGLE QUOTATION MARK numeric entity.",
"input":"&#0145;",
"output": [["Character", "\u2018"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 RIGHT SINGLE QUOTATION MARK numeric entity.",
"input":"&#0146;",
"output": [["Character", "\u2019"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 LEFT DOUBLE QUOTATION MARK numeric entity.",
"input":"&#0147;",
"output": [["Character", "\u201C"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 RIGHT DOUBLE QUOTATION MARK numeric entity.",
"input":"&#0148;",
"output": [["Character", "\u201D"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 BULLET numeric entity.",
"input":"&#0149;",
"output": [["Character", "\u2022"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 EN DASH numeric entity.",
"input":"&#0150;",
"output": [["Character", "\u2013"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 EM DASH numeric entity.",
"input":"&#0151;",
"output": [["Character", "\u2014"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 SMALL TILDE numeric entity.",
"input":"&#0152;",
"output": [["Character", "\u02DC"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 TRADE MARK SIGN numeric entity.",
"input":"&#0153;",
"output": [["Character", "\u2122"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 LATIN SMALL LETTER S WITH CARON numeric entity.",
"input":"&#0154;",
"output": [["Character", "\u0161"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 SINGLE RIGHT-POINTING ANGLE QUOTATION MARK numeric entity.",
"input":"&#0155;",
"output": [["Character", "\u203A"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 LATIN SMALL LIGATURE OE numeric entity.",
"input":"&#0156;",
"output": [["Character", "\u0153"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
"input":"&#0157;",
"output": [["Character", "\u009D"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 EURO SIGN hexadecimal numeric entity.",
"input":"&#x080;",
"output": [["Character", "\u20AC"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
"input":"&#x081;",
"output": [["Character", "\u0081"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 SINGLE LOW-9 QUOTATION MARK hexadecimal numeric entity.",
"input":"&#x082;",
"output": [["Character", "\u201A"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 LATIN SMALL LETTER F WITH HOOK hexadecimal numeric entity.",
"input":"&#x083;",
"output": [["Character", "\u0192"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 DOUBLE LOW-9 QUOTATION MARK hexadecimal numeric entity.",
"input":"&#x084;",
"output": [["Character", "\u201E"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 HORIZONTAL ELLIPSIS hexadecimal numeric entity.",
"input":"&#x085;",
"output": [["Character", "\u2026"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 DAGGER hexadecimal numeric entity.",
"input":"&#x086;",
"output": [["Character", "\u2020"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 DOUBLE DAGGER hexadecimal numeric entity.",
"input":"&#x087;",
"output": [["Character", "\u2021"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 MODIFIER LETTER CIRCUMFLEX ACCENT hexadecimal numeric entity.",
"input":"&#x088;",
"output": [["Character", "\u02C6"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 PER MILLE SIGN hexadecimal numeric entity.",
"input":"&#x089;",
"output": [["Character", "\u2030"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 LATIN CAPITAL LETTER S WITH CARON hexadecimal numeric entity.",
"input":"&#x08A;",
"output": [["Character", "\u0160"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 SINGLE LEFT-POINTING ANGLE QUOTATION MARK hexadecimal numeric entity.",
"input":"&#x08B;",
"output": [["Character", "\u2039"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 LATIN CAPITAL LIGATURE OE hexadecimal numeric entity.",
"input":"&#x08C;",
"output": [["Character", "\u0152"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
"input":"&#x08D;",
"output": [["Character", "\u008D"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 LATIN CAPITAL LETTER Z WITH CARON hexadecimal numeric entity.",
"input":"&#x08E;",
"output": [["Character", "\u017D"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
"input":"&#x08F;",
"output": [["Character", "\u008F"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
"input":"&#x090;",
"output": [["Character", "\u0090"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 LEFT SINGLE QUOTATION MARK hexadecimal numeric entity.",
"input":"&#x091;",
"output": [["Character", "\u2018"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 RIGHT SINGLE QUOTATION MARK hexadecimal numeric entity.",
"input":"&#x092;",
"output": [["Character", "\u2019"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 LEFT DOUBLE QUOTATION MARK hexadecimal numeric entity.",
"input":"&#x093;",
"output": [["Character", "\u201C"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 RIGHT DOUBLE QUOTATION MARK hexadecimal numeric entity.",
"input":"&#x094;",
"output": [["Character", "\u201D"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 BULLET hexadecimal numeric entity.",
"input":"&#x095;",
"output": [["Character", "\u2022"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 EN DASH hexadecimal numeric entity.",
"input":"&#x096;",
"output": [["Character", "\u2013"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 EM DASH hexadecimal numeric entity.",
"input":"&#x097;",
"output": [["Character", "\u2014"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 SMALL TILDE hexadecimal numeric entity.",
"input":"&#x098;",
"output": [["Character", "\u02DC"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 TRADE MARK SIGN hexadecimal numeric entity.",
"input":"&#x099;",
"output": [["Character", "\u2122"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 LATIN SMALL LETTER S WITH CARON hexadecimal numeric entity.",
"input":"&#x09A;",
"output": [["Character", "\u0161"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 SINGLE RIGHT-POINTING ANGLE QUOTATION MARK hexadecimal numeric entity.",
"input":"&#x09B;",
"output": [["Character", "\u203A"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 LATIN SMALL LIGATURE OE hexadecimal numeric entity.",
"input":"&#x09C;",
"output": [["Character", "\u0153"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
"input":"&#x09D;",
"output": [["Character", "\u009D"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 LATIN SMALL LETTER Z WITH CARON hexadecimal numeric entity.",
"input":"&#x09E;",
"output": [["Character", "\u017E"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Windows-1252 LATIN CAPITAL LETTER Y WITH DIAERESIS hexadecimal numeric entity.",
"input":"&#x09F;",
"output": [["Character", "\u0178"]],
"errors":[
{ "code": "control-character-reference", "line": 1, "col": 8 }
]},
{"description": "Decimal numeric entity followed by hex character a.",
"input":"&#97a",
"output": [["Character", "aa"]],
"errors":[
{ "code": "missing-semicolon-after-character-reference", "line": 1, "col": 5 }
]},
{"description": "Decimal numeric entity followed by hex character A.",
"input":"&#97A",
"output": [["Character", "aA"]],
"errors":[
{ "code": "missing-semicolon-after-character-reference", "line": 1, "col": 5 }
]},
{"description": "Decimal numeric entity followed by hex character f.",
"input":"&#97f",
"output": [["Character", "af"]],
"errors":[
{ "code": "missing-semicolon-after-character-reference", "line": 1, "col": 5 }
]},
{"description": "Decimal numeric entity followed by hex character A.",
"input":"&#97F",
"output": [["Character", "aF"]],
"errors":[
{ "code": "missing-semicolon-after-character-reference", "line": 1, "col": 5 }
]}
]}

View file

@ -1,36 +0,0 @@
{"tests": [
{"description":"Commented close tag in RCDATA or RAWTEXT",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"foo<!--</xmp>--></xmp>",
"output":[["Character", "foo<!--"], ["EndTag", "xmp"], ["Character", "-->"], ["EndTag", "xmp"]]},
{"description":"Bogus comment in RCDATA or RAWTEXT",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"foo<!-->baz</xmp>",
"output":[["Character", "foo<!-->baz"], ["EndTag", "xmp"]]},
{"description":"End tag surrounded by bogus comment in RCDATA or RAWTEXT",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"foo<!--></xmp><!-->baz</xmp>",
"output":[["Character", "foo<!-->"], ["EndTag", "xmp"], ["Comment", ""], ["Character", "baz"], ["EndTag", "xmp"]],
"errors":[
{ "code": "abrupt-closing-of-empty-comment", "line": 1, "col": 19 }
]},
{"description":"Commented entities in RCDATA",
"initialStates":["RCDATA state"],
"lastStartTag":"xmp",
"input":" &amp; <!-- &amp; --> &amp; </xmp>",
"output":[["Character", " & <!-- & --> & "], ["EndTag", "xmp"]]},
{"description":"Incorrect comment ending sequences in RCDATA or RAWTEXT",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"foo<!-- x --x>x-- >x--!>x--<></xmp>",
"output":[["Character", "foo<!-- x --x>x-- >x--!>x--<>"], ["EndTag", "xmp"]]}
]}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,9 +0,0 @@
{"tests": [
{"description":"<!---- >",
"input":"<!---- >",
"output":[["Comment","-- >"]],
"errors":[
{ "code": "eof-in-comment", "line": 1, "col": 9 }
]}
]}

View file

@ -1,349 +0,0 @@
{"tests": [
{"description":"Correct Doctype lowercase",
"input":"<!DOCTYPE html>",
"output":[["DOCTYPE", "html", null, null, true]]},
{"description":"Correct Doctype uppercase",
"input":"<!DOCTYPE HTML>",
"output":[["DOCTYPE", "html", null, null, true]]},
{"description":"Correct Doctype mixed case",
"input":"<!DOCTYPE HtMl>",
"output":[["DOCTYPE", "html", null, null, true]]},
{"description":"Correct Doctype case with EOF",
"input":"<!DOCTYPE HtMl",
"output":[["DOCTYPE", "html", null, null, false]],
"errors":[
{ "code": "eof-in-doctype", "line": 1, "col": 15 }
]},
{"description":"Truncated doctype start",
"input":"<!DOC>",
"output":[["Comment", "DOC"]],
"errors":[
{ "code": "incorrectly-opened-comment", "line": 1, "col": 3 }
]},
{"description":"Doctype in error",
"input":"<!DOCTYPE foo>",
"output":[["DOCTYPE", "foo", null, null, true]]},
{"description":"Single Start Tag",
"input":"<h>",
"output":[["StartTag", "h", {}]]},
{"description":"Empty end tag",
"input":"</>",
"output":[],
"errors":[
{ "code": "missing-end-tag-name", "line": 1, "col": 3 }
]},
{"description":"Empty start tag",
"input":"<>",
"output":[["Character", "<>"]],
"errors":[
{ "code": "invalid-first-character-of-tag-name", "line": 1, "col": 2 }
]},
{"description":"Start Tag w/attribute",
"input":"<h a='b'>",
"output":[["StartTag", "h", {"a":"b"}]]},
{"description":"Start Tag w/attribute no quotes",
"input":"<h a=b>",
"output":[["StartTag", "h", {"a":"b"}]]},
{"description":"Start/End Tag",
"input":"<h></h>",
"output":[["StartTag", "h", {}], ["EndTag", "h"]]},
{"description":"Two unclosed start tags",
"input":"<p>One<p>Two",
"output":[["StartTag", "p", {}], ["Character", "One"], ["StartTag", "p", {}], ["Character", "Two"]]},
{"description":"End Tag w/attribute",
"input":"<h></h a='b'>",
"output":[["StartTag", "h", {}], ["EndTag", "h"]],
"errors":[
{ "code": "end-tag-with-attributes", "line": 1, "col": 13 }
]},
{"description":"Multiple atts",
"input":"<h a='b' c='d'>",
"output":[["StartTag", "h", {"a":"b", "c":"d"}]]},
{"description":"Multiple atts no space",
"input":"<h a='b'c='d'>",
"output":[["StartTag", "h", {"a":"b", "c":"d"}]],
"errors":[
{ "code": "missing-whitespace-between-attributes", "line": 1, "col": 9 }
]},
{"description":"Repeated attr",
"input":"<h a='b' a='d'>",
"output":[["StartTag", "h", {"a":"b"}]],
"errors":[
{ "code": "duplicate-attribute", "line": 1, "col": 11 }
]},
{"description":"Simple comment",
"input":"<!--comment-->",
"output":[["Comment", "comment"]]},
{"description":"Comment, Central dash no space",
"input":"<!----->",
"output":[["Comment", "-"]]},
{"description":"Comment, two central dashes",
"input":"<!-- --comment -->",
"output":[["Comment", " --comment "]]},
{"description":"Comment, central less-than bang",
"input":"<!--<!-->",
"output":[["Comment", "<!"]]},
{"description":"Unfinished comment",
"input":"<!--comment",
"output":[["Comment", "comment"]],
"errors":[
{ "code": "eof-in-comment", "line": 1, "col": 12 }
]},
{"description":"Unfinished comment after start of nested comment",
"input":"<!-- <!--",
"output":[["Comment", " <!"]],
"errors":[
{ "code": "eof-in-comment", "line": 1, "col": 10 }
]},
{"description":"Start of a comment",
"input":"<!-",
"output":[["Comment", "-"]],
"errors":[
{ "code": "incorrectly-opened-comment", "line": 1, "col": 3 }
]},
{"description":"Short comment",
"input":"<!-->",
"output":[["Comment", ""]],
"errors":[
{ "code": "abrupt-closing-of-empty-comment", "line": 1, "col": 5 }
]},
{"description":"Short comment two",
"input":"<!--->",
"output":[["Comment", ""]],
"errors":[
{ "code": "abrupt-closing-of-empty-comment", "line": 1, "col": 6 }
]},
{"description":"Short comment three",
"input":"<!---->",
"output":[["Comment", ""]]},
{"description":"< in comment",
"input":"<!-- <test-->",
"output":[["Comment", " <test"]]},
{"description":"<! in comment",
"input":"<!-- <!test-->",
"output":[["Comment", " <!test"]]},
{"description":"<!- in comment",
"input":"<!-- <!-test-->",
"output":[["Comment", " <!-test"]]},
{"description":"Nested comment",
"input":"<!-- <!--test-->",
"output":[["Comment", " <!--test"]],
"errors":[
{ "code": "nested-comment", "line": 1, "col": 10 }
]},
{"description":"Nested comment with extra <",
"input":"<!-- <<!--test-->",
"output":[["Comment", " <<!--test"]],
"errors":[
{ "code": "nested-comment", "line": 1, "col": 11 }
]},
{"description":"< in script data",
"initialStates":["Script data state"],
"input":"<test-->",
"output":[["Character", "<test-->"]]},
{"description":"<! in script data",
"initialStates":["Script data state"],
"input":"<!test-->",
"output":[["Character", "<!test-->"]]},
{"description":"<!- in script data",
"initialStates":["Script data state"],
"input":"<!-test-->",
"output":[["Character", "<!-test-->"]]},
{"description":"Escaped script data",
"initialStates":["Script data state"],
"input":"<!--test-->",
"output":[["Character", "<!--test-->"]]},
{"description":"< in script HTML comment",
"initialStates":["Script data state"],
"input":"<!-- < test -->",
"output":[["Character", "<!-- < test -->"]]},
{"description":"</ in script HTML comment",
"initialStates":["Script data state"],
"input":"<!-- </ test -->",
"output":[["Character", "<!-- </ test -->"]]},
{"description":"Start tag in script HTML comment",
"initialStates":["Script data state"],
"input":"<!-- <test> -->",
"output":[["Character", "<!-- <test> -->"]]},
{"description":"End tag in script HTML comment",
"initialStates":["Script data state"],
"input":"<!-- </test> -->",
"output":[["Character", "<!-- </test> -->"]]},
{"description":"- in script HTML comment double escaped",
"initialStates":["Script data state"],
"input":"<!--<script>-</script>-->",
"output":[["Character", "<!--<script>-</script>-->"]]},
{"description":"-- in script HTML comment double escaped",
"initialStates":["Script data state"],
"input":"<!--<script>--</script>-->",
"output":[["Character", "<!--<script>--</script>-->"]]},
{"description":"--- in script HTML comment double escaped",
"initialStates":["Script data state"],
"input":"<!--<script>---</script>-->",
"output":[["Character", "<!--<script>---</script>-->"]]},
{"description":"- spaced in script HTML comment double escaped",
"initialStates":["Script data state"],
"input":"<!--<script> - </script>-->",
"output":[["Character", "<!--<script> - </script>-->"]]},
{"description":"-- spaced in script HTML comment double escaped",
"initialStates":["Script data state"],
"input":"<!--<script> -- </script>-->",
"output":[["Character", "<!--<script> -- </script>-->"]]},
{"description":"Ampersand EOF",
"input":"&",
"output":[["Character", "&"]]},
{"description":"Ampersand ampersand EOF",
"input":"&&",
"output":[["Character", "&&"]]},
{"description":"Ampersand space EOF",
"input":"& ",
"output":[["Character", "& "]]},
{"description":"Unfinished entity",
"input":"&f",
"output":[["Character", "&f"]]},
{"description":"Ampersand, number sign",
"input":"&#",
"output":[["Character", "&#"]],
"errors":[
{ "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 3 }
]},
{"description":"Unfinished numeric entity",
"input":"&#x",
"output":[["Character", "&#x"]],
"errors":[
{ "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 4 }
]},
{"description":"Entity with trailing semicolon (1)",
"input":"I'm &not;it",
"output":[["Character","I'm \u00ACit"]]},
{"description":"Entity with trailing semicolon (2)",
"input":"I'm &notin;",
"output":[["Character","I'm \u2209"]]},
{"description":"Entity without trailing semicolon (1)",
"input":"I'm &notit",
"output":[["Character","I'm \u00ACit"]],
"errors": [
{"code" : "missing-semicolon-after-character-reference", "line": 1, "col": 9 }
]},
{"description":"Entity without trailing semicolon (2)",
"input":"I'm &notin",
"output":[["Character","I'm \u00ACin"]],
"errors": [
{"code" : "missing-semicolon-after-character-reference", "line": 1, "col": 9 }
]},
{"description":"Partial entity match at end of file",
"input":"I'm &no",
"output":[["Character","I'm &no"]]},
{"description":"Non-ASCII character reference name",
"input":"&\u00AC;",
"output":[["Character", "&\u00AC;"]]},
{"description":"ASCII decimal entity",
"input":"&#0036;",
"output":[["Character","$"]]},
{"description":"ASCII hexadecimal entity",
"input":"&#x3f;",
"output":[["Character","?"]]},
{"description":"Hexadecimal entity in attribute",
"input":"<h a='&#x3f;'></h>",
"output":[["StartTag", "h", {"a":"?"}], ["EndTag", "h"]]},
{"description":"Entity in attribute without semicolon ending in x",
"input":"<h a='&notx'>",
"output":[["StartTag", "h", {"a":"&notx"}]]},
{"description":"Entity in attribute without semicolon ending in 1",
"input":"<h a='&not1'>",
"output":[["StartTag", "h", {"a":"&not1"}]]},
{"description":"Entity in attribute without semicolon ending in i",
"input":"<h a='&noti'>",
"output":[["StartTag", "h", {"a":"&noti"}]]},
{"description":"Entity in attribute without semicolon",
"input":"<h a='&COPY'>",
"output":[["StartTag", "h", {"a":"\u00A9"}]],
"errors": [
{"code" : "missing-semicolon-after-character-reference", "line": 1, "col": 12 }
]},
{"description":"Unquoted attribute ending in ampersand",
"input":"<s o=& t>",
"output":[["StartTag","s",{"o":"&","t":""}]]},
{"description":"Unquoted attribute at end of tag with final character of &, with tag followed by characters",
"input":"<a a=a&>foo",
"output":[["StartTag", "a", {"a":"a&"}], ["Character", "foo"]]},
{"description":"plaintext element",
"input":"<plaintext>foobar",
"output":[["StartTag","plaintext",{}], ["Character","foobar"]]},
{"description":"Open angled bracket in unquoted attribute value state",
"input":"<a a=f<>",
"output":[["StartTag", "a", {"a":"f<"}]],
"errors":[
{ "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 7 }
]}
]}

View file

@ -1,275 +0,0 @@
{"tests": [
{"description":"DOCTYPE without name",
"input":"<!DOCTYPE>",
"output":[["DOCTYPE", null, null, null, false]],
"errors":[
{ "code": "missing-doctype-name", "line": 1, "col": 10 }
]},
{"description":"DOCTYPE without space before name",
"input":"<!DOCTYPEhtml>",
"output":[["DOCTYPE", "html", null, null, true]],
"errors":[
{ "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 }
]},
{"description":"Incorrect DOCTYPE without a space before name",
"input":"<!DOCTYPEfoo>",
"output":[["DOCTYPE", "foo", null, null, true]],
"errors":[
{ "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 }
]},
{"description":"DOCTYPE with publicId",
"input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\">",
"output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", null, true]]},
{"description":"DOCTYPE with EOF after PUBLIC",
"input":"<!DOCTYPE html PUBLIC",
"output":[["DOCTYPE", "html", null, null, false]],
"errors": [
{ "code": "eof-in-doctype", "col": 22, "line": 1 }
]},
{"description":"DOCTYPE with EOF after PUBLIC '",
"input":"<!DOCTYPE html PUBLIC '",
"output":[["DOCTYPE", "html", "", null, false]],
"errors": [
{ "code": "eof-in-doctype", "col": 24, "line": 1 }
]},
{"description":"DOCTYPE with EOF after PUBLIC 'x",
"input":"<!DOCTYPE html PUBLIC 'x",
"output":[["DOCTYPE", "html", "x", null, false]],
"errors": [
{ "code": "eof-in-doctype", "col": 25, "line": 1 }
]},
{"description":"DOCTYPE with systemId",
"input":"<!DOCTYPE html SYSTEM \"-//W3C//DTD HTML Transitional 4.01//EN\">",
"output":[["DOCTYPE", "html", null, "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
{"description":"DOCTYPE with single-quoted systemId",
"input":"<!DOCTYPE html SYSTEM '-//W3C//DTD HTML Transitional 4.01//EN'>",
"output":[["DOCTYPE", "html", null, "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
{"description":"DOCTYPE with publicId and systemId",
"input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\" \"-//W3C//DTD HTML Transitional 4.01//EN\">",
"output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
{"description":"DOCTYPE with > in double-quoted publicId",
"input":"<!DOCTYPE html PUBLIC \">x",
"output":[["DOCTYPE", "html", "", null, false], ["Character", "x"]],
"errors": [
{ "code": "abrupt-doctype-public-identifier", "col": 24, "line": 1 }
]},
{"description":"DOCTYPE with > in single-quoted publicId",
"input":"<!DOCTYPE html PUBLIC '>x",
"output":[["DOCTYPE", "html", "", null, false], ["Character", "x"]],
"errors": [
{ "code": "abrupt-doctype-public-identifier", "col": 24, "line": 1 }
]},
{"description":"DOCTYPE with > in double-quoted systemId",
"input":"<!DOCTYPE html PUBLIC \"foo\" \">x",
"output":[["DOCTYPE", "html", "foo", "", false], ["Character", "x"]],
"errors": [
{ "code": "abrupt-doctype-system-identifier", "col": 30, "line": 1 }
]},
{"description":"DOCTYPE with > in single-quoted systemId",
"input":"<!DOCTYPE html PUBLIC 'foo' '>x",
"output":[["DOCTYPE", "html", "foo", "", false], ["Character", "x"]],
"errors": [
{ "code": "abrupt-doctype-system-identifier", "col": 30, "line": 1 }
]},
{"description":"Incomplete doctype",
"input":"<!DOCTYPE html ",
"output":[["DOCTYPE", "html", null, null, false]],
"errors":[
{ "code": "eof-in-doctype", "line": 1, "col": 16 }
]},
{"description":"Numeric entity representing the NUL character",
"input":"&#0000;",
"output":[["Character", "\uFFFD"]],
"errors":[
{ "code": "null-character-reference", "line": 1, "col": 8 }
]},
{"description":"Hexadecimal entity representing the NUL character",
"input":"&#x0000;",
"output":[["Character", "\uFFFD"]],
"errors":[
{ "code": "null-character-reference", "line": 1, "col": 9 }
]},
{"description":"Numeric entity representing a codepoint after 1114111 (U+10FFFF)",
"input":"&#2225222;",
"output":[["Character", "\uFFFD"]],
"errors":[
{ "code": "character-reference-outside-unicode-range", "line": 1, "col": 11 }
]},
{"description":"Hexadecimal entity representing a codepoint after 1114111 (U+10FFFF)",
"input":"&#x1010FFFF;",
"output":[["Character", "\uFFFD"]],
"errors":[
{ "code": "character-reference-outside-unicode-range", "line": 1, "col": 13 }
]},
{"description":"Hexadecimal entity pair representing a surrogate pair",
"input":"&#xD869;&#xDED6;",
"output":[["Character", "\uFFFD\uFFFD"]],
"errors":[
{ "code": "surrogate-character-reference", "line": 1, "col": 9 },
{ "code": "surrogate-character-reference", "line": 1, "col": 17 }
]},
{"description":"Hexadecimal entity with mixed uppercase and lowercase",
"input":"&#xaBcD;",
"output":[["Character", "\uABCD"]]},
{"description":"Entity without a name",
"input":"&;",
"output":[["Character", "&;"]]},
{"description":"Unescaped ampersand in attribute value",
"input":"<h a='&'>",
"output":[["StartTag", "h", { "a":"&" }]]},
{"description":"StartTag containing <",
"input":"<a<b>",
"output":[["StartTag", "a<b", { }]]},
{"description":"Non-void element containing trailing /",
"input":"<h/>",
"output":[["StartTag","h",{},true]]},
{"description":"Void element with permitted slash",
"input":"<br/>",
"output":[["StartTag","br",{},true]]},
{"description":"Void element with permitted slash (with attribute)",
"input":"<br foo='bar'/>",
"output":[["StartTag","br",{"foo":"bar"},true]]},
{"description":"StartTag containing /",
"input":"<h/a='b'>",
"output":[["StartTag", "h", { "a":"b" }]],
"errors":[
{ "code": "unexpected-solidus-in-tag", "line": 1, "col": 4 }
]},
{"description":"Double-quoted attribute value",
"input":"<h a=\"b\">",
"output":[["StartTag", "h", { "a":"b" }]]},
{"description":"Unescaped </",
"input":"</",
"output":[["Character", "</"]],
"errors":[
{ "code": "eof-before-tag-name", "line": 1, "col": 3 }
]},
{"description":"Illegal end tag name",
"input":"</1>",
"output":[["Comment", "1"]],
"errors":[
{ "code": "invalid-first-character-of-tag-name", "line": 1, "col": 3 }
]},
{"description":"Simili processing instruction",
"input":"<?namespace>",
"output":[["Comment", "?namespace"]],
"errors":[
{ "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
]},
{"description":"A bogus comment stops at >, even if preceeded by two dashes",
"input":"<?foo-->",
"output":[["Comment", "?foo--"]],
"errors":[
{ "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
]},
{"description":"Unescaped <",
"input":"foo < bar",
"output":[["Character", "foo < bar"]],
"errors":[
{ "code": "invalid-first-character-of-tag-name", "line": 1, "col": 6 }
]},
{"description":"Null Byte Replacement",
"input":"\u0000",
"output":[["Character", "\u0000"]],
"errors":[
{ "code": "unexpected-null-character", "line": 1, "col": 1 }
]},
{"description":"Comment with dash",
"input":"<!---x",
"output":[["Comment", "-x"]],
"errors":[
{ "code": "eof-in-comment", "line": 1, "col": 7 }
]},
{"description":"Entity + newline",
"input":"\nx\n&gt;\n",
"output":[["Character","\nx\n>\n"]]},
{"description":"Start tag with no attributes but space before the greater-than sign",
"input":"<h >",
"output":[["StartTag", "h", {}]]},
{"description":"Empty attribute followed by uppercase attribute",
"input":"<h a B=''>",
"output":[["StartTag", "h", {"a":"", "b":""}]]},
{"description":"Double-quote after attribute name",
"input":"<h a \">",
"output":[["StartTag", "h", {"a":"", "\"":""}]],
"errors":[
{ "code": "unexpected-character-in-attribute-name", "line": 1, "col": 6 }
]},
{"description":"Single-quote after attribute name",
"input":"<h a '>",
"output":[["StartTag", "h", {"a":"", "'":""}]],
"errors":[
{ "code": "unexpected-character-in-attribute-name", "line": 1, "col": 6 }
]},
{"description":"Empty end tag with following characters",
"input":"a</>bc",
"output":[["Character", "abc"]],
"errors":[
{ "code": "missing-end-tag-name", "line": 1, "col": 4 }
]},
{"description":"Empty end tag with following tag",
"input":"a</><b>c",
"output":[["Character", "a"], ["StartTag", "b", {}], ["Character", "c"]],
"errors":[
{ "code": "missing-end-tag-name", "line": 1, "col": 4 }
]},
{"description":"Empty end tag with following comment",
"input":"a</><!--b-->c",
"output":[["Character", "a"], ["Comment", "b"], ["Character", "c"]],
"errors":[
{ "code": "missing-end-tag-name", "line": 1, "col": 4 }
]},
{"description":"Empty end tag with following end tag",
"input":"a</></b>c",
"output":[["Character", "a"], ["EndTag", "b"], ["Character", "c"]],
"errors":[
{ "code": "missing-end-tag-name", "line": 1, "col": 4 }
]}
]}

File diff suppressed because it is too large Load diff

View file

@ -1,532 +0,0 @@
{"tests": [
{"description":"< in attribute name",
"input":"<z/0 <>",
"output":[["StartTag", "z", {"0": "", "<": ""}]],
"errors":[
{ "code": "unexpected-solidus-in-tag", "line": 1, "col": 4 },
{ "code": "unexpected-character-in-attribute-name", "line": 1, "col": 7 }
]},
{"description":"< in unquoted attribute value",
"input":"<z x=<>",
"output":[["StartTag", "z", {"x": "<"}]],
"errors":[
{ "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 6 }
]},
{"description":"= in unquoted attribute value",
"input":"<z z=z=z>",
"output":[["StartTag", "z", {"z": "z=z"}]],
"errors":[
{ "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 7 }
]},
{"description":"= attribute",
"input":"<z =>",
"output":[["StartTag", "z", {"=": ""}]],
"errors":[
{ "code": "unexpected-equals-sign-before-attribute-name", "line": 1, "col": 4 }
]},
{"description":"== attribute",
"input":"<z ==>",
"output":[["StartTag", "z", {"=": ""}]],
"errors":[
{ "code": "unexpected-equals-sign-before-attribute-name", "line": 1, "col": 4 },
{ "code": "missing-attribute-value", "line": 1, "col": 6 }
]},
{"description":"=== attribute",
"input":"<z ===>",
"output":[["StartTag", "z", {"=": "="}]],
"errors":[
{ "code": "unexpected-equals-sign-before-attribute-name", "line": 1, "col": 4 },
{ "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 6 }
]},
{"description":"==== attribute",
"input":"<z ====>",
"output":[["StartTag", "z", {"=": "=="}]],
"errors":[
{ "code": "unexpected-equals-sign-before-attribute-name", "line": 1, "col": 4 },
{ "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 6 },
{ "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 7 }
]},
{"description":"\" after ampersand in double-quoted attribute value",
"input":"<z z=\"&\">",
"output":[["StartTag", "z", {"z": "&"}]]},
{"description":"' after ampersand in double-quoted attribute value",
"input":"<z z=\"&'\">",
"output":[["StartTag", "z", {"z": "&'"}]]},
{"description":"' after ampersand in single-quoted attribute value",
"input":"<z z='&'>",
"output":[["StartTag", "z", {"z": "&"}]]},
{"description":"\" after ampersand in single-quoted attribute value",
"input":"<z z='&\"'>",
"output":[["StartTag", "z", {"z": "&\""}]]},
{"description":"Text after bogus character reference",
"input":"<z z='&xlink_xmlns;'>bar<z>",
"output":[["StartTag","z",{"z":"&xlink_xmlns;"}],["Character","bar"],["StartTag","z",{}]]},
{"description":"Text after hex character reference",
"input":"<z z='&#x0020; foo'>bar<z>",
"output":[["StartTag","z",{"z":" foo"}],["Character","bar"],["StartTag","z",{}]]},
{"description":"Attribute name starting with \"",
"input":"<foo \"='bar'>",
"output":[["StartTag", "foo", {"\"": "bar"}]],
"errors":[
{ "code": "unexpected-character-in-attribute-name", "line": 1, "col": 6 }
]},
{"description":"Attribute name starting with '",
"input":"<foo '='bar'>",
"output":[["StartTag", "foo", {"'": "bar"}]],
"errors":[
{ "code": "unexpected-character-in-attribute-name", "line": 1, "col": 6 }
]},
{"description":"Attribute name containing \"",
"input":"<foo a\"b='bar'>",
"output":[["StartTag", "foo", {"a\"b": "bar"}]],
"errors":[
{ "code": "unexpected-character-in-attribute-name", "line": 1, "col": 7 }
]},
{"description":"Attribute name containing '",
"input":"<foo a'b='bar'>",
"output":[["StartTag", "foo", {"a'b": "bar"}]],
"errors":[
{ "code": "unexpected-character-in-attribute-name", "line": 1, "col": 7 }
]},
{"description":"Unquoted attribute value containing '",
"input":"<foo a=b'c>",
"output":[["StartTag", "foo", {"a": "b'c"}]],
"errors":[
{ "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 9 }
]},
{"description":"Unquoted attribute value containing \"",
"input":"<foo a=b\"c>",
"output":[["StartTag", "foo", {"a": "b\"c"}]],
"errors":[
{ "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 9 }
]},
{"description":"Double-quoted attribute value not followed by whitespace",
"input":"<foo a=\"b\"c>",
"output":[["StartTag", "foo", {"a": "b", "c": ""}]],
"errors":[
{ "code": "missing-whitespace-between-attributes", "line": 1, "col": 11 }
]},
{"description":"Single-quoted attribute value not followed by whitespace",
"input":"<foo a='b'c>",
"output":[["StartTag", "foo", {"a": "b", "c": ""}]],
"errors":[
{ "code": "missing-whitespace-between-attributes", "line": 1, "col": 11 }
]},
{"description":"Quoted attribute followed by permitted /",
"input":"<br a='b'/>",
"output":[["StartTag","br",{"a":"b"},true]]},
{"description":"Quoted attribute followed by non-permitted /",
"input":"<bar a='b'/>",
"output":[["StartTag","bar",{"a":"b"},true]]},
{"description":"CR EOF after doctype name",
"input":"<!doctype html \r",
"output":[["DOCTYPE", "html", null, null, false]],
"errors":[
{ "code": "eof-in-doctype", "line": 2, "col": 1 }
]},
{"description":"CR EOF in tag name",
"input":"<z\r",
"output":[],
"errors":[
{ "code": "eof-in-tag", "line": 2, "col": 1 }
]},
{"description":"Slash EOF in tag name",
"input":"<z/",
"output":[],
"errors":[
{ "code": "eof-in-tag", "line": 1, "col": 4 }
]},
{"description":"Zero hex numeric entity",
"input":"&#x0",
"output":[["Character", "\uFFFD"]],
"errors":[
{ "code": "missing-semicolon-after-character-reference", "line": 1, "col": 5 },
{ "code": "null-character-reference", "line": 1, "col": 5 }
]},
{"description":"Zero decimal numeric entity",
"input":"&#0",
"output":[["Character", "\uFFFD"]],
"errors":[
{ "code": "missing-semicolon-after-character-reference", "line": 1, "col": 4 },
{ "code": "null-character-reference", "line": 1, "col": 4 }
]},
{"description":"Zero-prefixed hex numeric entity",
"input":"&#x000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000041;",
"output":[["Character", "A"]]},
{"description":"Zero-prefixed decimal numeric entity",
"input":"&#000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000065;",
"output":[["Character", "A"]]},
{"description":"Empty hex numeric entities",
"input":"&#x &#X ",
"output":[["Character", "&#x &#X "]],
"errors":[
{ "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 4 },
{ "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 8 }
]},
{"description":"Invalid digit in hex numeric entity",
"input":"&#xZ",
"output":[["Character", "&#xZ"]],
"errors":[
{ "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 4 }
]},
{"description":"Empty decimal numeric entities",
"input":"&# &#; ",
"output":[["Character", "&# &#; "]],
"errors":[
{ "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 3 },
{ "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 6 }
]},
{"description":"Invalid digit in decimal numeric entity",
"input":"&#A",
"output":[["Character", "&#A"]],
"errors":[
{ "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 3 }
]},
{"description":"Non-BMP numeric entity",
"input":"&#x10000;",
"output":[["Character", "\uD800\uDC00"]]},
{"description":"Maximum non-BMP numeric entity",
"input":"&#X10FFFF;",
"output":[["Character", "\uDBFF\uDFFF"]],
"errors":[
{ "code": "noncharacter-character-reference", "line": 1, "col": 11 }
]},
{"description":"Above maximum numeric entity",
"input":"&#x110000;",
"output":[["Character", "\uFFFD"]],
"errors":[
{ "code": "character-reference-outside-unicode-range", "line": 1, "col": 11 }
]},
{"description":"32-bit hex numeric entity",
"input":"&#x80000041;",
"output":[["Character", "\uFFFD"]],
"errors":[
{ "code": "character-reference-outside-unicode-range", "line": 1, "col": 13 }
]},
{"description":"33-bit hex numeric entity",
"input":"&#x100000041;",
"output":[["Character", "\uFFFD"]],
"errors":[
{ "code": "character-reference-outside-unicode-range", "line": 1, "col": 14 }
]},
{"description":"33-bit decimal numeric entity",
"input":"&#4294967361;",
"output":[["Character", "\uFFFD"]],
"errors":[
{ "code": "character-reference-outside-unicode-range", "line": 1, "col": 14 }
]},
{"description":"65-bit hex numeric entity",
"input":"&#x10000000000000041;",
"output":[["Character", "\uFFFD"]],
"errors":[
{ "code": "character-reference-outside-unicode-range", "line": 1, "col": 22 }
]},
{"description":"65-bit decimal numeric entity",
"input":"&#18446744073709551681;",
"output":[["Character", "\uFFFD"]],
"errors":[
{ "code": "character-reference-outside-unicode-range", "line": 1, "col": 24 }
]},
{"description":"Surrogate code point edge cases",
"input":"&#xD7FF;&#xD800;&#xD801;&#xDFFE;&#xDFFF;&#xE000;",
"output":[["Character", "\uD7FF\uFFFD\uFFFD\uFFFD\uFFFD\uE000"]],
"errors":[
{ "code": "surrogate-character-reference", "line": 1, "col": 17 },
{ "code": "surrogate-character-reference", "line": 1, "col": 25 },
{ "code": "surrogate-character-reference", "line": 1, "col": 33 },
{ "code": "surrogate-character-reference", "line": 1, "col": 41 }
]},
{"description":"Uppercase start tag name",
"input":"<X>",
"output":[["StartTag", "x", {}]]},
{"description":"Uppercase end tag name",
"input":"</X>",
"output":[["EndTag", "x"]]},
{"description":"Uppercase attribute name",
"input":"<x X>",
"output":[["StartTag", "x", { "x":"" }]]},
{"description":"Tag/attribute name case edge values",
"input":"<x@AZ[`az{ @AZ[`az{>",
"output":[["StartTag", "x@az[`az{", { "@az[`az{":"" }]]},
{"description":"Duplicate different-case attributes",
"input":"<x x=1 x=2 X=3>",
"output":[["StartTag", "x", { "x":"1" }]],
"errors":[
{ "code": "duplicate-attribute", "line": 1, "col": 9 },
{ "code": "duplicate-attribute", "line": 1, "col": 13 }
]},
{"description":"Uppercase close tag attributes",
"input":"</x X>",
"output":[["EndTag", "x"]],
"errors":[
{ "code": "end-tag-with-attributes", "line": 1, "col": 6 }
]},
{"description":"Duplicate close tag attributes",
"input":"</x x x>",
"output":[["EndTag", "x"]],
"errors":[
{ "code": "duplicate-attribute", "line": 1, "col": 8 },
{ "code": "end-tag-with-attributes", "line": 1, "col": 8 }
]},
{"description":"Permitted slash",
"input":"<br/>",
"output":[["StartTag","br",{},true]]},
{"description":"Non-permitted slash",
"input":"<xr/>",
"output":[["StartTag","xr",{},true]]},
{"description":"Permitted slash but in close tag",
"input":"</br/>",
"output":[["EndTag", "br"]],
"errors":[
{ "code": "end-tag-with-trailing-solidus", "line": 1, "col": 6 }
]},
{"description":"Doctype public case-sensitivity (1)",
"input":"<!DoCtYpE HtMl PuBlIc \"AbC\" \"XyZ\">",
"output":[["DOCTYPE", "html", "AbC", "XyZ", true]]},
{"description":"Doctype public case-sensitivity (2)",
"input":"<!dOcTyPe hTmL pUbLiC \"aBc\" \"xYz\">",
"output":[["DOCTYPE", "html", "aBc", "xYz", true]]},
{"description":"Doctype system case-sensitivity (1)",
"input":"<!DoCtYpE HtMl SyStEm \"XyZ\">",
"output":[["DOCTYPE", "html", null, "XyZ", true]]},
{"description":"Doctype system case-sensitivity (2)",
"input":"<!dOcTyPe hTmL sYsTeM \"xYz\">",
"output":[["DOCTYPE", "html", null, "xYz", true]]},
{"description":"U+0000 in lookahead region after non-matching character",
"input":"<!doc>\u0000",
"output":[["Comment", "doc"], ["Character", "\u0000"]],
"errors":[
{ "code": "incorrectly-opened-comment", "line": 1, "col": 3 },
{ "code": "unexpected-null-character", "line": 1, "col": 7 }
]},
{"description":"U+0000 in lookahead region",
"input":"<!doc\u0000",
"output":[["Comment", "doc\uFFFD"]],
"errors":[
{ "code": "incorrectly-opened-comment", "line": 1, "col": 3 },
{ "code": "unexpected-null-character", "line": 1, "col": 6 }
]},
{"description":"U+0080 in lookahead region",
"input":"<!doc\u0080",
"output":[["Comment", "doc\u0080"]],
"errors":[
{ "code": "incorrectly-opened-comment", "line": 1, "col": 3 },
{ "code": "control-character-in-input-stream", "line": 1, "col": 6 }
]},
{"description":"U+FDD1 in lookahead region",
"input":"<!doc\uFDD1",
"output":[["Comment", "doc\uFDD1"]],
"errors":[
{ "code": "incorrectly-opened-comment", "line": 1, "col": 3 },
{ "code": "noncharacter-in-input-stream", "line": 1, "col": 6 }
]},
{"description":"U+1FFFF in lookahead region",
"input":"<!doc\uD83F\uDFFF",
"output":[["Comment", "doc\uD83F\uDFFF"]],
"errors":[
{ "code": "incorrectly-opened-comment", "line": 1, "col": 3 },
{ "code": "noncharacter-in-input-stream", "line": 1, "col": 6 }
]},
{"description":"CR followed by non-LF",
"input":"\r?",
"output":[["Character", "\n?"]]},
{"description":"CR at EOF",
"input":"\r",
"output":[["Character", "\n"]]},
{"description":"LF at EOF",
"input":"\n",
"output":[["Character", "\n"]]},
{"description":"CR LF",
"input":"\r\n",
"output":[["Character", "\n"]]},
{"description":"CR CR",
"input":"\r\r",
"output":[["Character", "\n\n"]]},
{"description":"LF LF",
"input":"\n\n",
"output":[["Character", "\n\n"]]},
{"description":"LF CR",
"input":"\n\r",
"output":[["Character", "\n\n"]]},
{"description":"text CR CR CR text",
"input":"text\r\r\rtext",
"output":[["Character", "text\n\n\ntext"]]},
{"description":"Doctype publik",
"input":"<!DOCTYPE html PUBLIK \"AbC\" \"XyZ\">",
"output":[["DOCTYPE", "html", null, null, false]],
"errors":[
{ "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 16 }
]},
{"description":"Doctype publi",
"input":"<!DOCTYPE html PUBLI",
"output":[["DOCTYPE", "html", null, null, false]],
"errors":[
{ "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 16 }
]},
{"description":"Doctype sistem",
"input":"<!DOCTYPE html SISTEM \"AbC\">",
"output":[["DOCTYPE", "html", null, null, false]],
"errors":[
{ "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 16 }
]},
{"description":"Doctype sys",
"input":"<!DOCTYPE html SYS",
"output":[["DOCTYPE", "html", null, null, false]],
"errors":[
{ "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 16 }
]},
{"description":"Doctype html x>text",
"input":"<!DOCTYPE html x>text",
"output":[["DOCTYPE", "html", null, null, false], ["Character", "text"]],
"errors":[
{ "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 16 }
]},
{"description":"Grave accent in unquoted attribute",
"input":"<a a=aa`>",
"output":[["StartTag", "a", {"a":"aa`"}]],
"errors":[
{ "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 8 }
]},
{"description":"EOF in tag name state ",
"input":"<a",
"output":[],
"errors": [
{ "code": "eof-in-tag", "line": 1, "col": 3 }
]},
{"description":"EOF in before attribute name state",
"input":"<a ",
"output":[],
"errors":[
{ "code": "eof-in-tag", "line": 1, "col": 4 }
]},
{"description":"EOF in attribute name state",
"input":"<a a",
"output":[],
"errors":[
{ "code": "eof-in-tag", "line": 1, "col": 5 }
]},
{"description":"EOF in after attribute name state",
"input":"<a a ",
"output":[],
"errors":[
{ "code": "eof-in-tag", "line": 1, "col": 6 }
]},
{"description":"EOF in before attribute value state",
"input":"<a a =",
"output":[],
"errors":[
{ "code": "eof-in-tag", "line": 1, "col": 7 }
]},
{"description":"EOF in attribute value (double quoted) state",
"input":"<a a =\"a",
"output":[],
"errors":[
{ "code": "eof-in-tag", "line": 1, "col": 9 }
]},
{"description":"EOF in attribute value (single quoted) state",
"input":"<a a ='a",
"output":[],
"errors":[
{ "code": "eof-in-tag", "line": 1, "col": 9 }
]},
{"description":"EOF in attribute value (unquoted) state",
"input":"<a a =a",
"output":[],
"errors":[
{ "code": "eof-in-tag", "line": 1, "col": 8 }
]},
{"description":"EOF in after attribute value state",
"input":"<a a ='a'",
"output":[],
"errors":[
{ "code": "eof-in-tag", "line": 1, "col": 10 }
]}
]}

File diff suppressed because it is too large Load diff

View file

@ -1,41 +0,0 @@
{"tests" : [
{"description": "Invalid Unicode character U+DFFF",
"doubleEscaped":true,
"input": "\\uDFFF",
"output":[["Character", "\\uDFFF"]],
"errors":[
{ "code": "surrogate-in-input-stream", "line": 1, "col": 1 }
]},
{"description": "Invalid Unicode character U+D800",
"doubleEscaped":true,
"input": "\\uD800",
"output":[["Character", "\\uD800"]],
"errors":[
{ "code": "surrogate-in-input-stream", "line": 1, "col": 1 }
]},
{"description": "Invalid Unicode character U+DFFF with valid preceding character",
"doubleEscaped":true,
"input": "a\\uDFFF",
"output":[["Character", "a\\uDFFF"]],
"errors":[
{ "code": "surrogate-in-input-stream", "line": 1, "col": 2 }
]},
{"description": "Invalid Unicode character U+D800 with valid following character",
"doubleEscaped":true,
"input": "\\uD800a",
"output":[["Character", "\\uD800a"]],
"errors":[
{ "code": "surrogate-in-input-stream", "line": 1, "col": 1 }
]},
{"description":"CR followed by U+0000",
"input":"\r\u0000",
"output":[["Character", "\n\u0000"]],
"errors":[
{ "code": "unexpected-null-character", "line": 2, "col": 1 }
]}
]
}

View file

@ -1,20 +0,0 @@
{"xmlViolationTests": [
{"description":"Non-XML character",
"input":"a\uFFFFb",
"output":[["Character","a\uFFFDb"]]},
{"description":"Non-XML space",
"input":"a\u000Cb",
"output":[["Character","a b"]]},
{"description":"Double hyphen in comment",
"input":"<!-- foo -- bar -->",
"output":[["Comment"," foo - - bar "]]},
{"description":"FF between attributes",
"input":"<a b=''\u000Cc=''>",
"output":[["StartTag","a",{"b":"","c":""}]]}
]}

View file

@ -1,108 +0,0 @@
Tree Construction Tests
=======================
Each file containing tree construction tests consists of any number of
tests separated by two newlines (LF) and a single newline before the end
of the file. For instance:
[TEST]LF
LF
[TEST]LF
LF
[TEST]LF
Where [TEST] is the following format:
Each test must begin with a string "\#data" followed by a newline (LF).
All subsequent lines until a line that says "\#errors" are the test data
and must be passed to the system being tested unchanged, except with the
final newline (on the last line) removed.
Then there must be a line that says "\#errors". It must be followed by
one line per parse error that a conformant checker would return. It
doesn't matter what those lines are, although they can't be
"\#new-errors", "\#document-fragment", "\#document", "\#script-off",
"\#script-on", or empty, the only thing that matters is that there be
the right number of parse errors.
Then there \*may\* be a line that says "\#new-errors", which works like
the "\#errors" section adding more errors to the expected number of
errors.
Then there \*may\* be a line that says "\#document-fragment", which must
be followed by a newline (LF), followed by a string of characters that
indicates the context element, followed by a newline (LF). If the string
of characters starts with "svg ", the context element is in the SVG
namespace and the substring after "svg " is the local name. If the
string of characters starts with "math ", the context element is in the
MathML namespace and the substring after "math " is the local name.
Otherwise, the context element is in the HTML namespace and the string
is the local name. If this line is present the "\#data" must be parsed
using the HTML fragment parsing algorithm with the context element as
context.
Then there \*may\* be a line that says "\#script-off" or
"\#script-on". If a line that says "\#script-off" is present, the
parser must set the scripting flag to disabled. If a line that says
"\#script-on" is present, it must set it to enabled. Otherwise, the
test should be run in both modes.
Then there must be a line that says "\#document", which must be followed
by a dump of the tree of the parsed DOM. Each node must be represented
by a single line. Each line must start with "| ", followed by two spaces
per parent node that the node has before the root document node.
- Element nodes must be represented by a "`<`" then the *tag name
string* "`>`", and all the attributes must be given, sorted
lexicographically by UTF-16 code unit according to their *attribute
name string*, on subsequent lines, as if they were children of the
element node.
- Attribute nodes must have the *attribute name string*, then an "="
sign, then the attribute value in double quotes (").
- Text nodes must be the string, in double quotes. Newlines aren't
escaped.
- Comments must be "`<`" then "`!-- `" then the data then "` -->`".
- DOCTYPEs must be "`<!DOCTYPE `" then the name then if either of the
system id or public id is non-empty a space, public id in
double-quotes, another space an the system id in double-quotes, and
then in any case "`>`".
- Processing instructions must be "`<?`", then the target, then a
space, then the data and then "`>`". (The HTML parser cannot emit
processing instructions, but scripts can, and the WebVTT to DOM
rules can emit them.)
- Template contents are represented by the string "content" with the
children below it.
The *tag name string* is the local name prefixed by a namespace
designator. For the HTML namespace, the namespace designator is the
empty string, i.e. there's no prefix. For the SVG namespace, the
namespace designator is "svg ". For the MathML namespace, the namespace
designator is "math ".
The *attribute name string* is the local name prefixed by a namespace
designator. For no namespace, the namespace designator is the empty
string, i.e. there's no prefix. For the XLink namespace, the namespace
designator is "xlink ". For the XML namespace, the namespace designator
is "xml ". For the XMLNS namespace, the namespace designator is "xmlns
". Note the difference between "xlink:href" which is an attribute in no
namespace with the local name "xlink:href" and "xlink href" which is an
attribute in the xlink namespace with the local name "href".
If there is also a "\#document-fragment" the bit following "\#document"
must be a representation of the HTML fragment serialization for the
context element given by "\#document-fragment".
For example:
#data
<p>One<p>Two
#errors
3: Missing document type declaration
#document
| <html>
| <head>
| <body>
| <p>
| "One"
| <p>
| "Two"

View file

@ -1,354 +0,0 @@
#data
<a><p></a></p>
#errors
(1,3): expected-doctype-but-got-start-tag
(1,10): adoption-agency-1.3
#document
| <html>
| <head>
| <body>
| <a>
| <p>
| <a>
#data
<a>1<p>2</a>3</p>
#errors
(1,3): expected-doctype-but-got-start-tag
(1,12): adoption-agency-1.3
#document
| <html>
| <head>
| <body>
| <a>
| "1"
| <p>
| <a>
| "2"
| "3"
#data
<a>1<button>2</a>3</button>
#errors
(1,3): expected-doctype-but-got-start-tag
(1,17): adoption-agency-1.3
#document
| <html>
| <head>
| <body>
| <a>
| "1"
| <button>
| <a>
| "2"
| "3"
#data
<a>1<b>2</a>3</b>
#errors
(1,3): expected-doctype-but-got-start-tag
(1,12): adoption-agency-1.3
#document
| <html>
| <head>
| <body>
| <a>
| "1"
| <b>
| "2"
| <b>
| "3"
#data
<a>1<div>2<div>3</a>4</div>5</div>
#errors
(1,3): expected-doctype-but-got-start-tag
(1,20): adoption-agency-1.3
(1,20): adoption-agency-1.3
#document
| <html>
| <head>
| <body>
| <a>
| "1"
| <div>
| <a>
| "2"
| <div>
| <a>
| "3"
| "4"
| "5"
#data
<table><a>1<p>2</a>3</p>
#errors
(1,7): expected-doctype-but-got-start-tag
(1,10): unexpected-start-tag-implies-table-voodoo
(1,11): unexpected-character-implies-table-voodoo
(1,14): unexpected-start-tag-implies-table-voodoo
(1,15): unexpected-character-implies-table-voodoo
(1,19): unexpected-end-tag-implies-table-voodoo
(1,19): adoption-agency-1.3
(1,20): unexpected-character-implies-table-voodoo
(1,24): unexpected-end-tag-implies-table-voodoo
(1,24): eof-in-table
#document
| <html>
| <head>
| <body>
| <a>
| "1"
| <p>
| <a>
| "2"
| "3"
| <table>
#data
<b><b><a><p></a>
#errors
(1,3): expected-doctype-but-got-start-tag
(1,16): adoption-agency-1.3
(1,16): expected-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| <b>
| <b>
| <a>
| <p>
| <a>
#data
<b><a><b><p></a>
#errors
(1,3): expected-doctype-but-got-start-tag
(1,16): adoption-agency-1.3
(1,16): expected-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| <b>
| <a>
| <b>
| <b>
| <p>
| <a>
#data
<a><b><b><p></a>
#errors
(1,3): expected-doctype-but-got-start-tag
(1,16): adoption-agency-1.3
(1,16): expected-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| <a>
| <b>
| <b>
| <b>
| <b>
| <p>
| <a>
#data
<p>1<s id="A">2<b id="B">3</p>4</s>5</b>
#errors
(1,3): expected-doctype-but-got-start-tag
(1,30): unexpected-end-tag
(1,35): adoption-agency-1.3
#document
| <html>
| <head>
| <body>
| <p>
| "1"
| <s>
| id="A"
| "2"
| <b>
| id="B"
| "3"
| <s>
| id="A"
| <b>
| id="B"
| "4"
| <b>
| id="B"
| "5"
#data
<table><a>1<td>2</td>3</table>
#errors
(1,7): expected-doctype-but-got-start-tag
(1,10): unexpected-start-tag-implies-table-voodoo
(1,11): unexpected-character-implies-table-voodoo
(1,15): unexpected-cell-in-table-body
(1,30): unexpected-implied-end-tag-in-table-view
#document
| <html>
| <head>
| <body>
| <a>
| "1"
| <a>
| "3"
| <table>
| <tbody>
| <tr>
| <td>
| "2"
#data
<table>A<td>B</td>C</table>
#errors
(1,7): expected-doctype-but-got-start-tag
(1,8): unexpected-character-implies-table-voodoo
(1,12): unexpected-cell-in-table-body
(1,22): unexpected-character-implies-table-voodoo
#document
| <html>
| <head>
| <body>
| "AC"
| <table>
| <tbody>
| <tr>
| <td>
| "B"
#data
<a><svg><tr><input></a>
#errors
(1,3): expected-doctype-but-got-start-tag
(1,23): unexpected-end-tag
(1,23): adoption-agency-1.3
#document
| <html>
| <head>
| <body>
| <a>
| <svg svg>
| <svg tr>
| <svg input>
#data
<div><a><b><div><div><div><div><div><div><div><div><div><div></a>
#errors
(1,5): expected-doctype-but-got-start-tag
(1,65): adoption-agency-1.3
(1,65): adoption-agency-1.3
(1,65): adoption-agency-1.3
(1,65): adoption-agency-1.3
(1,65): adoption-agency-1.3
(1,65): adoption-agency-1.3
(1,65): adoption-agency-1.3
(1,65): adoption-agency-1.3
(1,65): expected-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| <div>
| <a>
| <b>
| <b>
| <div>
| <a>
| <div>
| <a>
| <div>
| <a>
| <div>
| <a>
| <div>
| <a>
| <div>
| <a>
| <div>
| <a>
| <div>
| <a>
| <div>
| <div>
#data
<div><a><b><u><i><code><div></a>
#errors
(1,5): expected-doctype-but-got-start-tag
(1,32): adoption-agency-1.3
(1,32): expected-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| <div>
| <a>
| <b>
| <u>
| <i>
| <code>
| <u>
| <i>
| <code>
| <div>
| <a>
#data
<b><b><b><b>x</b></b></b></b>y
#errors
(1,3): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <body>
| <b>
| <b>
| <b>
| <b>
| "x"
| "y"
#data
<p><b><b><b><b><p>x
#errors
(1,3): expected-doctype-but-got-start-tag
(1,18): unexpected-end-tag
(1,19): expected-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| <p>
| <b>
| <b>
| <b>
| <b>
| <p>
| <b>
| <b>
| <b>
| "x"
#data
<b><em><foo><foob><fooc><aside></b></em>
#errors
(1,35): adoption-agency-1.3
(1,40): adoption-agency-1.3
(1,40): expected-closing-tag-but-got-eof
#document-fragment
div
#document
| <b>
| <em>
| <foo>
| <foob>
| <fooc>
| <aside>
| <b>

View file

@ -1,39 +0,0 @@
#data
<b>1<i>2<p>3</b>4
#errors
(1,3): expected-doctype-but-got-start-tag
(1,16): adoption-agency-1.3
(1,17): expected-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| <b>
| "1"
| <i>
| "2"
| <i>
| <p>
| <b>
| "3"
| "4"
#data
<a><div><style></style><address><a>
#errors
(1,3): expected-doctype-but-got-start-tag
(1,35): unexpected-start-tag-implies-end-tag
(1,35): adoption-agency-1.3
(1,35): adoption-agency-1.3
(1,35): expected-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| <a>
| <div>
| <a>
| <style>
| <address>
| <a>
| <a>

View file

@ -1,719 +0,0 @@
#data
<!doctype html><p>foo<address>bar<p>baz
#errors
(1,39): expected-closing-tag-but-got-eof
30: Unclosed element “address”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| "foo"
| <address>
| "bar"
| <p>
| "baz"
#data
<!doctype html><address><p>foo</address>bar
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <address>
| <p>
| "foo"
| "bar"
#data
<!doctype html><p>foo<article>bar<p>baz
#errors
(1,39): expected-closing-tag-but-got-eof
30: Unclosed element “article”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| "foo"
| <article>
| "bar"
| <p>
| "baz"
#data
<!doctype html><article><p>foo</article>bar
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <article>
| <p>
| "foo"
| "bar"
#data
<!doctype html><p>foo<aside>bar<p>baz
#errors
(1,37): expected-closing-tag-but-got-eof
28: Unclosed element “aside”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| "foo"
| <aside>
| "bar"
| <p>
| "baz"
#data
<!doctype html><aside><p>foo</aside>bar
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <aside>
| <p>
| "foo"
| "bar"
#data
<!doctype html><p>foo<blockquote>bar<p>baz
#errors
(1,42): expected-closing-tag-but-got-eof
33: Unclosed element “blockquote”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| "foo"
| <blockquote>
| "bar"
| <p>
| "baz"
#data
<!doctype html><blockquote><p>foo</blockquote>bar
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <blockquote>
| <p>
| "foo"
| "bar"
#data
<!doctype html><p>foo<center>bar<p>baz
#errors
(1,38): expected-closing-tag-but-got-eof
29: Unclosed element “center”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| "foo"
| <center>
| "bar"
| <p>
| "baz"
#data
<!doctype html><center><p>foo</center>bar
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <center>
| <p>
| "foo"
| "bar"
#data
<!doctype html><p>foo<details>bar<p>baz
#errors
(1,39): expected-closing-tag-but-got-eof
30: Unclosed element “details”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| "foo"
| <details>
| "bar"
| <p>
| "baz"
#data
<!doctype html><details><p>foo</details>bar
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <details>
| <p>
| "foo"
| "bar"
#data
<!doctype html><p>foo<dialog>bar<p>baz
#errors
(1,38): expected-closing-tag-but-got-eof
29: Unclosed element “dialog”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| "foo"
| <dialog>
| "bar"
| <p>
| "baz"
#data
<!doctype html><dialog><p>foo</dialog>bar
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <dialog>
| <p>
| "foo"
| "bar"
#data
<!doctype html><p>foo<dir>bar<p>baz
#errors
(1,35): expected-closing-tag-but-got-eof
26: Unclosed element “dir”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| "foo"
| <dir>
| "bar"
| <p>
| "baz"
#data
<!doctype html><dir><p>foo</dir>bar
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <dir>
| <p>
| "foo"
| "bar"
#data
<!doctype html><p>foo<div>bar<p>baz
#errors
(1,35): expected-closing-tag-but-got-eof
26: Unclosed element “div”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| "foo"
| <div>
| "bar"
| <p>
| "baz"
#data
<!doctype html><div><p>foo</div>bar
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <div>
| <p>
| "foo"
| "bar"
#data
<!doctype html><p>foo<dl>bar<p>baz
#errors
(1,34): expected-closing-tag-but-got-eof
25: Unclosed element “dl”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| "foo"
| <dl>
| "bar"
| <p>
| "baz"
#data
<!doctype html><dl><p>foo</dl>bar
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <dl>
| <p>
| "foo"
| "bar"
#data
<!doctype html><p>foo<fieldset>bar<p>baz
#errors
(1,40): expected-closing-tag-but-got-eof
31: Unclosed element “fieldset”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| "foo"
| <fieldset>
| "bar"
| <p>
| "baz"
#data
<!doctype html><fieldset><p>foo</fieldset>bar
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <fieldset>
| <p>
| "foo"
| "bar"
#data
<!doctype html><p>foo<figcaption>bar<p>baz
#errors
(1,42): expected-closing-tag-but-got-eof
33: Unclosed element “figcaption”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| "foo"
| <figcaption>
| "bar"
| <p>
| "baz"
#data
<!doctype html><figcaption><p>foo</figcaption>bar
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <figcaption>
| <p>
| "foo"
| "bar"
#data
<!doctype html><p>foo<figure>bar<p>baz
#errors
(1,38): expected-closing-tag-but-got-eof
29: Unclosed element “figure”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| "foo"
| <figure>
| "bar"
| <p>
| "baz"
#data
<!doctype html><figure><p>foo</figure>bar
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <figure>
| <p>
| "foo"
| "bar"
#data
<!doctype html><p>foo<footer>bar<p>baz
#errors
(1,38): expected-closing-tag-but-got-eof
29: Unclosed element “footer”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| "foo"
| <footer>
| "bar"
| <p>
| "baz"
#data
<!doctype html><footer><p>foo</footer>bar
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <footer>
| <p>
| "foo"
| "bar"
#data
<!doctype html><p>foo<header>bar<p>baz
#errors
(1,38): expected-closing-tag-but-got-eof
29: Unclosed element “header”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| "foo"
| <header>
| "bar"
| <p>
| "baz"
#data
<!doctype html><header><p>foo</header>bar
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <header>
| <p>
| "foo"
| "bar"
#data
<!doctype html><p>foo<hgroup>bar<p>baz
#errors
(1,38): expected-closing-tag-but-got-eof
29: Unclosed element “hgroup”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| "foo"
| <hgroup>
| "bar"
| <p>
| "baz"
#data
<!doctype html><hgroup><p>foo</hgroup>bar
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <hgroup>
| <p>
| "foo"
| "bar"
#data
<!doctype html><p>foo<listing>bar<p>baz
#errors
(1,39): expected-closing-tag-but-got-eof
30: Unclosed element “listing”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| "foo"
| <listing>
| "bar"
| <p>
| "baz"
#data
<!doctype html><listing><p>foo</listing>bar
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <listing>
| <p>
| "foo"
| "bar"
#data
<!doctype html><p>foo<menu>bar<p>baz
#errors
(1,36): expected-closing-tag-but-got-eof
27: Unclosed element “menu”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| "foo"
| <menu>
| "bar"
| <p>
| "baz"
#data
<!doctype html><menu><p>foo</menu>bar
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <menu>
| <p>
| "foo"
| "bar"
#data
<!doctype html><p>foo<nav>bar<p>baz
#errors
(1,35): expected-closing-tag-but-got-eof
26: Unclosed element “nav”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| "foo"
| <nav>
| "bar"
| <p>
| "baz"
#data
<!doctype html><nav><p>foo</nav>bar
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <nav>
| <p>
| "foo"
| "bar"
#data
<!doctype html><p>foo<ol>bar<p>baz
#errors
(1,34): expected-closing-tag-but-got-eof
25: Unclosed element “ol”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| "foo"
| <ol>
| "bar"
| <p>
| "baz"
#data
<!doctype html><ol><p>foo</ol>bar
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <ol>
| <p>
| "foo"
| "bar"
#data
<!doctype html><p>foo<pre>bar<p>baz
#errors
(1,35): expected-closing-tag-but-got-eof
26: Unclosed element “pre”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| "foo"
| <pre>
| "bar"
| <p>
| "baz"
#data
<!doctype html><pre><p>foo</pre>bar
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <pre>
| <p>
| "foo"
| "bar"
#data
<!doctype html><p>foo<section>bar<p>baz
#errors
(1,39): expected-closing-tag-but-got-eof
30: Unclosed element “section”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| "foo"
| <section>
| "bar"
| <p>
| "baz"
#data
<!doctype html><section><p>foo</section>bar
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <section>
| <p>
| "foo"
| "bar"
#data
<!doctype html><p>foo<summary>bar<p>baz
#errors
(1,39): expected-closing-tag-but-got-eof
30: Unclosed element “summary”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| "foo"
| <summary>
| "bar"
| <p>
| "baz"
#data
<!doctype html><summary><p>foo</summary>bar
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <summary>
| <p>
| "foo"
| "bar"
#data
<!doctype html><p>foo<ul>bar<p>baz
#errors
(1,34): expected-closing-tag-but-got-eof
25: Unclosed element “ul”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| "foo"
| <ul>
| "bar"
| <p>
| "baz"
#data
<!doctype html><ul><p>foo</ul>bar
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <ul>
| <p>
| "foo"
| "bar"

View file

@ -1,224 +0,0 @@
#data
FOO<!-- BAR -->BAZ
#errors
(1,3): expected-doctype-but-got-chars
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- BAR -->
| "BAZ"
#data
FOO<!-- BAR --!>BAZ
#errors
(1,3): expected-doctype-but-got-chars
(1,15): unexpected-bang-after-double-dash-in-comment
#new-errors
(1:16) incorrectly-closed-comment
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- BAR -->
| "BAZ"
#data
FOO<!-- BAR --! >BAZ
#errors
(1,3): expected-doctype-but-got-chars
#new-errors
(1:20) eof-in-comment
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- BAR --! >BAZ -->
#data
FOO<!-- BAR --!
>BAZ
#errors
(1,3): expected-doctype-but-got-chars
#new-errors
(1:20) eof-in-comment
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- BAR --!
>BAZ -->
#data
FOO<!-- BAR -- >BAZ
#errors
(1,3): expected-doctype-but-got-chars
(1,15): unexpected-char-in-comment
(1,21): eof-in-comment
#new-errors
(1:22) eof-in-comment
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- BAR -- >BAZ -->
#data
FOO<!-- BAR -- <QUX> -- MUX -->BAZ
#errors
(1,3): expected-doctype-but-got-chars
(1,15): unexpected-char-in-comment
(1,24): unexpected-char-in-comment
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- BAR -- <QUX> -- MUX -->
| "BAZ"
#data
FOO<!-- BAR -- <QUX> -- MUX --!>BAZ
#errors
(1,3): expected-doctype-but-got-chars
(1,15): unexpected-char-in-comment
(1,24): unexpected-char-in-comment
(1,31): unexpected-bang-after-double-dash-in-comment
#new-errors
(1:32) incorrectly-closed-comment
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- BAR -- <QUX> -- MUX -->
| "BAZ"
#data
FOO<!-- BAR -- <QUX> -- MUX -- >BAZ
#errors
(1,3): expected-doctype-but-got-chars
(1,15): unexpected-char-in-comment
(1,24): unexpected-char-in-comment
(1,31): unexpected-char-in-comment
(1,35): eof-in-comment
#new-errors
(1:36) eof-in-comment
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- BAR -- <QUX> -- MUX -- >BAZ -->
#data
FOO<!---->BAZ
#errors
(1,3): expected-doctype-but-got-chars
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- -->
| "BAZ"
#data
FOO<!--->BAZ
#errors
(1,3): expected-doctype-but-got-chars
(1,9): incorrect-comment
#new-errors
(1:9) abrupt-closing-of-empty-comment
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- -->
| "BAZ"
#data
FOO<!-->BAZ
#errors
(1,3): expected-doctype-but-got-chars
(1,8): incorrect-comment
#new-errors
(1:8) abrupt-closing-of-empty-comment
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- -->
| "BAZ"
#data
<?xml version="1.0">Hi
#errors
(1,1): expected-tag-name-but-got-question-mark
(1,22): expected-doctype-but-got-chars
#new-errors
(1:2) unexpected-question-mark-instead-of-tag-name
#document
| <!-- ?xml version="1.0" -->
| <html>
| <head>
| <body>
| "Hi"
#data
<?xml version="1.0">
#errors
(1,1): expected-tag-name-but-got-question-mark
(1,20): expected-doctype-but-got-eof
#new-errors
(1:2) unexpected-question-mark-instead-of-tag-name
#document
| <!-- ?xml version="1.0" -->
| <html>
| <head>
| <body>
#data
<?xml version
#errors
(1,1): expected-tag-name-but-got-question-mark
(1,13): expected-doctype-but-got-eof
#new-errors
(1:2) unexpected-question-mark-instead-of-tag-name
#document
| <!-- ?xml version -->
| <html>
| <head>
| <body>
#data
FOO<!----->BAZ
#errors
(1,3): expected-doctype-but-got-chars
(1,10): unexpected-dash-after-double-dash-in-comment
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- - -->
| "BAZ"
#data
<html><!-- comment --><title>Comment before head</title>
#errors
(1,6): expected-doctype-but-got-start-tag
#document
| <html>
| <!-- comment -->
| <head>
| <title>
| "Comment before head"
| <body>

View file

@ -1,470 +0,0 @@
#data
<!DOCTYPE html>Hello
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "Hello"
#data
<!dOctYpE HtMl>Hello
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPEhtml>Hello
#errors
(1,9): need-space-after-doctype
#new-errors
(1:10) missing-whitespace-before-doctype-name
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE>Hello
#errors
(1,9): need-space-after-doctype
(1,10): expected-doctype-name-but-got-right-bracket
(1,10): unknown-doctype
#new-errors
(1:10) missing-doctype-name
#document
| <!DOCTYPE >
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE >Hello
#errors
(1,11): expected-doctype-name-but-got-right-bracket
(1,11): unknown-doctype
#new-errors
(1:11) missing-doctype-name
#document
| <!DOCTYPE >
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato>Hello
#errors
(1,17): unknown-doctype
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato >Hello
#errors
(1,18): unknown-doctype
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato taco>Hello
#errors
(1,17): expected-space-or-right-bracket-in-doctype
(1,22): unknown-doctype
#new-errors
(1:18) invalid-character-sequence-after-doctype-name
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato taco "ddd>Hello
#errors
(1,17): expected-space-or-right-bracket-in-doctype
(1,27): unknown-doctype
#new-errors
(1:18) invalid-character-sequence-after-doctype-name
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato sYstEM>Hello
#errors
(1,24): unexpected-char-in-doctype
(1,24): unknown-doctype
#new-errors
(1:24) missing-doctype-system-identifier
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato sYstEM >Hello
#errors
(1,28): unexpected-char-in-doctype
(1,28): unknown-doctype
#new-errors
(1:28) missing-doctype-system-identifier
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato sYstEM ggg>Hello
#errors
(1,34): unexpected-char-in-doctype
(1,37): unknown-doctype
#new-errors
(1:34) missing-quote-before-doctype-system-identifier
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato SYSTEM taco >Hello
#errors
(1,25): unexpected-char-in-doctype
(1,31): unknown-doctype
#new-errors
(1:25) missing-quote-before-doctype-system-identifier
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato SYSTEM 'taco"'>Hello
#errors
(1,32): unknown-doctype
#document
| <!DOCTYPE potato "" "taco"">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato SYSTEM "taco">Hello
#errors
(1,31): unknown-doctype
#document
| <!DOCTYPE potato "" "taco">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato SYSTEM "tai'co">Hello
#errors
(1,33): unknown-doctype
#document
| <!DOCTYPE potato "" "tai'co">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato SYSTEMtaco "ddd">Hello
#errors
(1,24): unexpected-char-in-doctype
(1,34): unknown-doctype
#new-errors
(1:24) missing-quote-before-doctype-system-identifier
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato grass SYSTEM taco>Hello
#errors
(1,17): expected-space-or-right-bracket-in-doctype
(1,35): unknown-doctype
#new-errors
(1:18) invalid-character-sequence-after-doctype-name
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato pUbLIc>Hello
#errors
(1,24): unexpected-end-of-doctype
(1,24): unknown-doctype
#new-errors
(1:24) missing-doctype-public-identifier
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato pUbLIc >Hello
#errors
(1,25): unexpected-end-of-doctype
(1,25): unknown-doctype
#new-errors
(1:25) missing-doctype-public-identifier
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato pUbLIcgoof>Hello
#errors
(1,24): unexpected-char-in-doctype
(1,28): unknown-doctype
#new-errors
(1:24) missing-quote-before-doctype-public-identifier
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato PUBLIC goof>Hello
#errors
(1,25): unexpected-char-in-doctype
(1,29): unknown-doctype
#new-errors
(1:25) missing-quote-before-doctype-public-identifier
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato PUBLIC "go'of">Hello
#errors
(1,32): unknown-doctype
#document
| <!DOCTYPE potato "go'of" "">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato PUBLIC 'go'of'>Hello
#errors
(1,29): unexpected-char-in-doctype
(1,32): unknown-doctype
#new-errors
(1:29) missing-quote-before-doctype-system-identifier
#document
| <!DOCTYPE potato "go" "">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato PUBLIC 'go:hh of' >Hello
#errors
(1,38): unknown-doctype
#document
| <!DOCTYPE potato "go:hh of" "">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato PUBLIC "W3C-//dfdf" SYSTEM ggg>Hello
#errors
(1,38): unexpected-char-in-doctype
(1,48): unknown-doctype
#new-errors
(1:38) missing-quote-before-doctype-system-identifier
#document
| <!DOCTYPE potato "W3C-//dfdf" "">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">Hello
#errors
#document
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE ...>Hello
#errors
(1,14): unknown-doctype
#document
| <!DOCTYPE ...>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
#errors
(2,58): unknown-doctype
#document
| <!DOCTYPE html "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
| <html>
| <head>
| <body>
#data
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
#errors
(2,54): unknown-doctype
#document
| <!DOCTYPE html "-//W3C//DTD XHTML 1.0 Frameset//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
| <html>
| <head>
| <body>
#data
<!DOCTYPE root-element [SYSTEM OR PUBLIC FPI] "uri" [
<!-- internal declarations -->
]>
#errors
(1,23): expected-space-or-right-bracket-in-doctype
(2,30): unknown-doctype
#new-errors
(1:24) invalid-character-sequence-after-doctype-name
#document
| <!DOCTYPE root-element>
| <html>
| <head>
| <body>
| "]>"
#data
<!DOCTYPE html PUBLIC
"-//WAPFORUM//DTD XHTML Mobile 1.0//EN"
"http://www.wapforum.org/DTD/xhtml-mobile10.dtd">
#errors
(3,53): unknown-doctype
#document
| <!DOCTYPE html "-//WAPFORUM//DTD XHTML Mobile 1.0//EN" "http://www.wapforum.org/DTD/xhtml-mobile10.dtd">
| <html>
| <head>
| <body>
#data
<!DOCTYPE HTML SYSTEM "http://www.w3.org/DTD/HTML4-strict.dtd"><body><b>Mine!</b></body>
#errors
(1,63): unknown-doctype
#document
| <!DOCTYPE html "" "http://www.w3.org/DTD/HTML4-strict.dtd">
| <html>
| <head>
| <body>
| <b>
| "Mine!"
#data
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN""http://www.w3.org/TR/html4/strict.dtd">
#errors
(1,50): unexpected-char-in-doctype
#new-errors
(1:50) missing-whitespace-between-doctype-public-and-system-identifiers
#document
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
| <html>
| <head>
| <body>
#data
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"'http://www.w3.org/TR/html4/strict.dtd'>
#errors
(1,50): unexpected-char-in-doctype
#new-errors
(1:50) missing-whitespace-between-doctype-public-and-system-identifiers
#document
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
| <html>
| <head>
| <body>
#data
<!DOCTYPE HTML PUBLIC"-//W3C//DTD HTML 4.01//EN"'http://www.w3.org/TR/html4/strict.dtd'>
#errors
(1,21): unexpected-char-in-doctype
(1,49): unexpected-char-in-doctype
#new-errors
(1:22) missing-whitespace-after-doctype-public-keyword
(1:49) missing-whitespace-between-doctype-public-and-system-identifiers
#document
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
| <html>
| <head>
| <body>
#data
<!DOCTYPE HTML PUBLIC'-//W3C//DTD HTML 4.01//EN''http://www.w3.org/TR/html4/strict.dtd'>
#errors
(1,21): unexpected-char-in-doctype
(1,49): unexpected-char-in-doctype
#new-errors
(1:22) missing-whitespace-after-doctype-public-keyword
(1:49) missing-whitespace-between-doctype-public-and-system-identifiers
#document
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
| <html>
| <head>
| <body>

View file

@ -1,943 +0,0 @@
#data
FOO&gt;BAR
#errors
(1,3): expected-doctype-but-got-chars
#document
| <html>
| <head>
| <body>
| "FOO>BAR"
#data
FOO&gtBAR
#errors
(1,3): expected-doctype-but-got-chars
(1,6): named-entity-without-semicolon
#new-errors
(1:7) missing-semicolon-after-character-reference
#document
| <html>
| <head>
| <body>
| "FOO>BAR"
#data
FOO&gt BAR
#errors
(1,3): expected-doctype-but-got-chars
(1,6): named-entity-without-semicolon
#new-errors
(1:7) missing-semicolon-after-character-reference
#document
| <html>
| <head>
| <body>
| "FOO> BAR"
#data
FOO&gt;;;BAR
#errors
(1,3): expected-doctype-but-got-chars
#document
| <html>
| <head>
| <body>
| "FOO>;;BAR"
#data
I'm &notit; I tell you
#errors
(1,4): expected-doctype-but-got-chars
(1,9): named-entity-without-semicolon
#new-errors
(1:9) missing-semicolon-after-character-reference
#document
| <html>
| <head>
| <body>
| "I'm ¬it; I tell you"
#data
I'm &notin; I tell you
#errors
(1,4): expected-doctype-but-got-chars
#document
| <html>
| <head>
| <body>
| "I'm ∉ I tell you"
#data
&ammmp;
#errors
(1,1): expected-doctype-but-got-chars
(1,7): unknown-named-character-reference
#new-errors
(1:7) unknown-named-character-reference
#document
| <html>
| <head>
| <body>
| "&ammmp;"
#data
&ammmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmp;
#errors
(1,1): expected-doctype-but-got-chars
(1,950): unknown-named-character-reference
#new-errors
(1:950) unknown-named-character-reference
#document
| <html>
| <head>
| <body>
| "&ammmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmp;"
#data
FOO& BAR
#errors
(1,3): expected-doctype-but-got-chars
#document
| <html>
| <head>
| <body>
| "FOO& BAR"
#data
FOO&<BAR>
#errors
(1,3): expected-doctype-but-got-chars
(1,9): expected-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| "FOO&"
| <bar>
#data
FOO&&&&gt;BAR
#errors
(1,3): expected-doctype-but-got-chars
#document
| <html>
| <head>
| <body>
| "FOO&&&>BAR"
#data
FOO&#41;BAR
#errors
(1,3): expected-doctype-but-got-chars
#document
| <html>
| <head>
| <body>
| "FOO)BAR"
#data
FOO&#x41;BAR
#errors
(1,3): expected-doctype-but-got-chars
#document
| <html>
| <head>
| <body>
| "FOOABAR"
#data
FOO&#X41;BAR
#errors
(1,3): expected-doctype-but-got-chars
#document
| <html>
| <head>
| <body>
| "FOOABAR"
#data
FOO&#BAR
#errors
(1,3): expected-doctype-but-got-chars
(1,5): expected-numeric-entity
#new-errors
(1:6) absence-of-digits-in-numeric-character-reference
#document
| <html>
| <head>
| <body>
| "FOO&#BAR"
#data
FOO&#ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,5): expected-numeric-entity
#new-errors
(1:6) absence-of-digits-in-numeric-character-reference
#document
| <html>
| <head>
| <body>
| "FOO&#ZOO"
#data
FOO&#xBAR
#errors
(1,3): expected-doctype-but-got-chars
(1,7): expected-numeric-entity
#new-errors
(1:9) missing-semicolon-after-character-reference
#document
| <html>
| <head>
| <body>
| "FOOºR"
#data
FOO&#xZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,6): expected-numeric-entity
#new-errors
(1:7) absence-of-digits-in-numeric-character-reference
#document
| <html>
| <head>
| <body>
| "FOO&#xZOO"
#data
FOO&#XZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,6): expected-numeric-entity
#new-errors
(1:7) absence-of-digits-in-numeric-character-reference
#document
| <html>
| <head>
| <body>
| "FOO&#XZOO"
#data
FOO&#41BAR
#errors
(1,3): expected-doctype-but-got-chars
(1,7): numeric-entity-without-semicolon
#new-errors
(1:8) missing-semicolon-after-character-reference
#document
| <html>
| <head>
| <body>
| "FOO)BAR"
#data
FOO&#x41BAR
#errors
(1,3): expected-doctype-but-got-chars
(1,10): numeric-entity-without-semicolon
#new-errors
(1:11) missing-semicolon-after-character-reference
#document
| <html>
| <head>
| <body>
| "FOO䆺R"
#data
FOO&#x41ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,8): numeric-entity-without-semicolon
#new-errors
(1:9) missing-semicolon-after-character-reference
#document
| <html>
| <head>
| <body>
| "FOOAZOO"
#data
FOO&#x0000;ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,11): illegal-codepoint-for-numeric-entity
#new-errors
(1:12) null-character-reference
#document
| <html>
| <head>
| <body>
| "FOO<4F>ZOO"
#data
FOO&#x0078;ZOO
#errors
(1,3): expected-doctype-but-got-chars
#document
| <html>
| <head>
| <body>
| "FOOxZOO"
#data
FOO&#x0079;ZOO
#errors
(1,3): expected-doctype-but-got-chars
#document
| <html>
| <head>
| <body>
| "FOOyZOO"
#data
FOO&#x0080;ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,11): illegal-codepoint-for-numeric-entity
#new-errors
(1:12) control-character-reference
#document
| <html>
| <head>
| <body>
| "FOO€ZOO"
#data
FOO&#x0081;ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,11): illegal-codepoint-for-numeric-entity
#new-errors
(1:12) control-character-reference
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x0082;ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,11): illegal-codepoint-for-numeric-entity
#new-errors
(1:12) control-character-reference
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x0083;ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,11): illegal-codepoint-for-numeric-entity
#new-errors
(1:12) control-character-reference
#document
| <html>
| <head>
| <body>
| "FOOƒZOO"
#data
FOO&#x0084;ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,11): illegal-codepoint-for-numeric-entity
#new-errors
(1:12) control-character-reference
#document
| <html>
| <head>
| <body>
| "FOO„ZOO"
#data
FOO&#x0085;ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,11): illegal-codepoint-for-numeric-entity
#new-errors
(1:12) control-character-reference
#document
| <html>
| <head>
| <body>
| "FOO…ZOO"
#data
FOO&#x0086;ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,11): illegal-codepoint-for-numeric-entity
#new-errors
(1:12) control-character-reference
#document
| <html>
| <head>
| <body>
| "FOO†ZOO"
#data
FOO&#x0087;ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,11): illegal-codepoint-for-numeric-entity
#new-errors
(1:12) control-character-reference
#document
| <html>
| <head>
| <body>
| "FOO‡ZOO"
#data
FOO&#x0088;ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,11): illegal-codepoint-for-numeric-entity
#new-errors
(1:12) control-character-reference
#document
| <html>
| <head>
| <body>
| "FOOˆZOO"
#data
FOO&#x0089;ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,11): illegal-codepoint-for-numeric-entity
#new-errors
(1:12) control-character-reference
#document
| <html>
| <head>
| <body>
| "FOO‰ZOO"
#data
FOO&#x008A;ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,11): illegal-codepoint-for-numeric-entity
#new-errors
(1:12) control-character-reference
#document
| <html>
| <head>
| <body>
| "FOOŠZOO"
#data
FOO&#x008B;ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,11): illegal-codepoint-for-numeric-entity
#new-errors
(1:12) control-character-reference
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x008C;ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,11): illegal-codepoint-for-numeric-entity
#new-errors
(1:12) control-character-reference
#document
| <html>
| <head>
| <body>
| "FOOŒZOO"
#data
FOO&#x008D;ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,11): illegal-codepoint-for-numeric-entity
#new-errors
(1:12) control-character-reference
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x008E;ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,11): illegal-codepoint-for-numeric-entity
#new-errors
(1:12) control-character-reference
#document
| <html>
| <head>
| <body>
| "FOOŽZOO"
#data
FOO&#x008F;ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,11): illegal-codepoint-for-numeric-entity
#new-errors
(1:12) control-character-reference
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x0090;ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,11): illegal-codepoint-for-numeric-entity
#new-errors
(1:12) control-character-reference
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x0091;ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,11): illegal-codepoint-for-numeric-entity
#new-errors
(1:12) control-character-reference
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x0092;ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,11): illegal-codepoint-for-numeric-entity
#new-errors
(1:12) control-character-reference
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x0093;ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,11): illegal-codepoint-for-numeric-entity
#new-errors
(1:12) control-character-reference
#document
| <html>
| <head>
| <body>
| "FOO“ZOO"
#data
FOO&#x0094;ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,11): illegal-codepoint-for-numeric-entity
#new-errors
(1:12) control-character-reference
#document
| <html>
| <head>
| <body>
| "FOO”ZOO"
#data
FOO&#x0095;ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,11): illegal-codepoint-for-numeric-entity
#new-errors
(1:12) control-character-reference
#document
| <html>
| <head>
| <body>
| "FOO•ZOO"
#data
FOO&#x0096;ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,11): illegal-codepoint-for-numeric-entity
#new-errors
(1:12) control-character-reference
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x0097;ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,11): illegal-codepoint-for-numeric-entity
#new-errors
(1:12) control-character-reference
#document
| <html>
| <head>
| <body>
| "FOO—ZOO"
#data
FOO&#x0098;ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,11): illegal-codepoint-for-numeric-entity
#new-errors
(1:12) control-character-reference
#document
| <html>
| <head>
| <body>
| "FOO˜ZOO"
#data
FOO&#x0099;ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,11): illegal-codepoint-for-numeric-entity
#new-errors
(1:12) control-character-reference
#document
| <html>
| <head>
| <body>
| "FOO™ZOO"
#data
FOO&#x009A;ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,11): illegal-codepoint-for-numeric-entity
#new-errors
(1:12) control-character-reference
#document
| <html>
| <head>
| <body>
| "FOOšZOO"
#data
FOO&#x009B;ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,11): illegal-codepoint-for-numeric-entity
#new-errors
(1:12) control-character-reference
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x009C;ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,11): illegal-codepoint-for-numeric-entity
#new-errors
(1:12) control-character-reference
#document
| <html>
| <head>
| <body>
| "FOOœZOO"
#data
FOO&#x009D;ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,11): illegal-codepoint-for-numeric-entity
#new-errors
(1:12) control-character-reference
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x009E;ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,11): illegal-codepoint-for-numeric-entity
#new-errors
(1:12) control-character-reference
#document
| <html>
| <head>
| <body>
| "FOOžZOO"
#data
FOO&#x009F;ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,11): illegal-codepoint-for-numeric-entity
#new-errors
(1:12) control-character-reference
#document
| <html>
| <head>
| <body>
| "FOOŸZOO"
#data
FOO&#x00A0;ZOO
#errors
(1,3): expected-doctype-but-got-chars
#document
| <html>
| <head>
| <body>
| "FOO ZOO"
#data
FOO&#xD7FF;ZOO
#errors
(1,3): expected-doctype-but-got-chars
#document
| <html>
| <head>
| <body>
| "FOO퟿ZOO"
#data
FOO&#xD800;ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,11): illegal-codepoint-for-numeric-entity
#new-errors
(1:12) surrogate-character-reference
#document
| <html>
| <head>
| <body>
| "FOO<4F>ZOO"
#data
FOO&#xD801;ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,11): illegal-codepoint-for-numeric-entity
#new-errors
(1:12) surrogate-character-reference
#document
| <html>
| <head>
| <body>
| "FOO<4F>ZOO"
#data
FOO&#xDFFE;ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,11): illegal-codepoint-for-numeric-entity
#new-errors
(1:12) surrogate-character-reference
#document
| <html>
| <head>
| <body>
| "FOO<4F>ZOO"
#data
FOO&#xDFFF;ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,11): illegal-codepoint-for-numeric-entity
#new-errors
(1:12) surrogate-character-reference
#document
| <html>
| <head>
| <body>
| "FOO<4F>ZOO"
#data
FOO&#xE000;ZOO
#errors
(1,3): expected-doctype-but-got-chars
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x10FFFE;ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,13): illegal-codepoint-for-numeric-entity
#new-errors
(1:14) noncharacter-character-reference
#document
| <html>
| <head>
| <body>
| "FOO􏿾ZOO"
#data
FOO&#x1087D4;ZOO
#errors
(1,3): expected-doctype-but-got-chars
#document
| <html>
| <head>
| <body>
| "FOO􈟔ZOO"
#data
FOO&#x10FFFF;ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,13): illegal-codepoint-for-numeric-entity
#new-errors
(1:14) noncharacter-character-reference
#document
| <html>
| <head>
| <body>
| "FOO􏿿ZOO"
#data
FOO&#x110000;ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,13): illegal-codepoint-for-numeric-entity
#new-errors
(1:14) character-reference-outside-unicode-range
#document
| <html>
| <head>
| <body>
| "FOO<4F>ZOO"
#data
FOO&#xFFFFFF;ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,13): illegal-codepoint-for-numeric-entity
#new-errors
(1:14) character-reference-outside-unicode-range
#document
| <html>
| <head>
| <body>
| "FOO<4F>ZOO"
#data
FOO&#11111111111
#errors
(1,3): expected-doctype-but-got-chars
(1,13): illegal-codepoint-for-numeric-entity
(1,13): eof-in-numeric-entity
#new-errors
(1:17) missing-semicolon-after-character-reference
(1:17) character-reference-outside-unicode-range
#document
| <html>
| <head>
| <body>
| "FOO<4F>"
#data
FOO&#1111111111
#errors
(1,3): expected-doctype-but-got-chars
(1,13): illegal-codepoint-for-numeric-entity
(1,13): eof-in-numeric-entity
#new-errors
(1:16) missing-semicolon-after-character-reference
(1:16) character-reference-outside-unicode-range
#document
| <html>
| <head>
| <body>
| "FOO<4F>"
#data
FOO&#111111111111
#errors
(1,3): expected-doctype-but-got-chars
(1,13): illegal-codepoint-for-numeric-entity
(1,13): eof-in-numeric-entity
#new-errors
(1:18) missing-semicolon-after-character-reference
(1:18) character-reference-outside-unicode-range
#document
| <html>
| <head>
| <body>
| "FOO<4F>"
#data
FOO&#11111111111ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,16): numeric-entity-without-semicolon
(1,16): illegal-codepoint-for-numeric-entity
#new-errors
(1:17) missing-semicolon-after-character-reference
(1:17) character-reference-outside-unicode-range
#document
| <html>
| <head>
| <body>
| "FOO<4F>ZOO"
#data
FOO&#1111111111ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,15): numeric-entity-without-semicolon
(1,15): illegal-codepoint-for-numeric-entity
#new-errors
(1:16) missing-semicolon-after-character-reference
(1:16) character-reference-outside-unicode-range
#document
| <html>
| <head>
| <body>
| "FOO<4F>ZOO"
#data
FOO&#111111111111ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,17): numeric-entity-without-semicolon
(1,17): illegal-codepoint-for-numeric-entity
#new-errors
(1:18) missing-semicolon-after-character-reference
(1:18) character-reference-outside-unicode-range
#document
| <html>
| <head>
| <body>
| "FOO<4F>ZOO"

View file

@ -1,309 +0,0 @@
#data
<div bar="ZZ&gt;YY"></div>
#errors
(1,20): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ>YY"
#data
<div bar="ZZ&"></div>
#errors
(1,15): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&"
#data
<div bar='ZZ&'></div>
#errors
(1,15): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&"
#data
<div bar=ZZ&></div>
#errors
(1,13): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&"
#data
<div bar="ZZ&gt=YY"></div>
#errors
(1,15): named-entity-without-semicolon
(1,20): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&gt=YY"
#data
<div bar="ZZ&gt0YY"></div>
#errors
(1,20): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&gt0YY"
#data
<div bar="ZZ&gt9YY"></div>
#errors
(1,20): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&gt9YY"
#data
<div bar="ZZ&gtaYY"></div>
#errors
(1,20): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&gtaYY"
#data
<div bar="ZZ&gtZYY"></div>
#errors
(1,20): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&gtZYY"
#data
<div bar="ZZ&gt YY"></div>
#errors
(1,15): named-entity-without-semicolon
(1,20): expected-doctype-but-got-start-tag
#new-errors
(1:16) missing-semicolon-after-character-reference
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ> YY"
#data
<div bar="ZZ&gt"></div>
#errors
(1,15): named-entity-without-semicolon
(1,17): expected-doctype-but-got-start-tag
#new-errors
(1:16) missing-semicolon-after-character-reference
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ>"
#data
<div bar='ZZ&gt'></div>
#errors
(1,15): named-entity-without-semicolon
(1,17): expected-doctype-but-got-start-tag
#new-errors
(1:16) missing-semicolon-after-character-reference
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ>"
#data
<div bar=ZZ&gt></div>
#errors
(1,14): named-entity-without-semicolon
(1,15): expected-doctype-but-got-start-tag
#new-errors
(1:15) missing-semicolon-after-character-reference
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ>"
#data
<div bar="ZZ&pound_id=23"></div>
#errors
(1,18): named-entity-without-semicolon
(1,26): expected-doctype-but-got-start-tag
#new-errors
(1:19) missing-semicolon-after-character-reference
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ£_id=23"
#data
<div bar="ZZ&prod_id=23"></div>
#errors
(1,25): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&prod_id=23"
#data
<div bar="ZZ&pound;_id=23"></div>
#errors
(1,27): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ£_id=23"
#data
<div bar="ZZ&prod;_id=23"></div>
#errors
(1,26): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ∏_id=23"
#data
<div bar="ZZ&pound=23"></div>
#errors
(1,18): named-entity-without-semicolon
(1,23): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&pound=23"
#data
<div bar="ZZ&prod=23"></div>
#errors
(1,22): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&prod=23"
#data
<div>ZZ&pound_id=23</div>
#errors
(1,5): expected-doctype-but-got-start-tag
(1,13): named-entity-without-semicolon
#new-errors
(1:14) missing-semicolon-after-character-reference
#document
| <html>
| <head>
| <body>
| <div>
| "ZZ£_id=23"
#data
<div>ZZ&prod_id=23</div>
#errors
(1,5): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <body>
| <div>
| "ZZ&prod_id=23"
#data
<div>ZZ&pound;_id=23</div>
#errors
(1,5): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <body>
| <div>
| "ZZ£_id=23"
#data
<div>ZZ&prod;_id=23</div>
#errors
(1,5): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <body>
| <div>
| "ZZ∏_id=23"
#data
<div>ZZ&pound=23</div>
#errors
(1,5): expected-doctype-but-got-start-tag
(1,13): named-entity-without-semicolon
#new-errors
(1:14) missing-semicolon-after-character-reference
#document
| <html>
| <head>
| <body>
| <div>
| "ZZ£=23"
#data
<div>ZZ&prod=23</div>
#errors
(1,5): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <body>
| <div>
| "ZZ&prod=23"
#data
<div>ZZ&AElig=</div>
#errors
#new-errors
(1:14) missing-semicolon-after-character-reference
#document
| <html>
| <head>
| <body>
| <div>
| "ZZÆ="

View file

@ -1,559 +0,0 @@
#data
<nobr>X
#errors
6: HTML start tag “nobr” in a foreign namespace context.
7: End of file seen and there were open elements.
6: Unclosed element “nobr”.
#document-fragment
svg path
#document
| <svg nobr>
| "X"
#data
<font color></font>X
#errors
12: HTML start tag “font” in a foreign namespace context.
#document-fragment
svg path
#document
| <svg font>
| color=""
| "X"
#data
<font></font>X
#errors
#document-fragment
svg path
#document
| <svg font>
| "X"
#data
<g></path>X
#errors
10: End tag “path” did not match the name of the current open element (“g”).
11: End of file seen and there were open elements.
3: Unclosed element “g”.
#document-fragment
svg path
#document
| <svg g>
| "X"
#data
</path>X
#errors
5: Stray end tag “path”.
#document-fragment
svg path
#document
| "X"
#data
</foreignObject>X
#errors
5: Stray end tag “foreignobject”.
#document-fragment
svg foreignObject
#document
| "X"
#data
</desc>X
#errors
5: Stray end tag “desc”.
#document-fragment
svg desc
#document
| "X"
#data
</title>X
#errors
5: Stray end tag “title”.
#document-fragment
svg title
#document
| "X"
#data
</svg>X
#errors
5: Stray end tag “svg”.
#document-fragment
svg svg
#document
| "X"
#data
</mfenced>X
#errors
5: Stray end tag “mfenced”.
#document-fragment
math mfenced
#document
| "X"
#data
</malignmark>X
#errors
5: Stray end tag “malignmark”.
#document-fragment
math malignmark
#document
| "X"
#data
</math>X
#errors
5: Stray end tag “math”.
#document-fragment
math math
#document
| "X"
#data
</annotation-xml>X
#errors
5: Stray end tag “annotation-xml”.
#document-fragment
math annotation-xml
#document
| "X"
#data
</mtext>X
#errors
5: Stray end tag “mtext”.
#document-fragment
math mtext
#document
| "X"
#data
</mi>X
#errors
5: Stray end tag “mi”.
#document-fragment
math mi
#document
| "X"
#data
</mo>X
#errors
5: Stray end tag “mo”.
#document-fragment
math mo
#document
| "X"
#data
</mn>X
#errors
5: Stray end tag “mn”.
#document-fragment
math mn
#document
| "X"
#data
</ms>X
#errors
5: Stray end tag “ms”.
#document-fragment
math ms
#document
| "X"
#data
<b></b><mglyph/><i></i><malignmark/><u></u><ms/>X
#errors
51: Self-closing syntax (“/>”) used on a non-void HTML element. Ignoring the slash and treating as a start tag.
52: End of file seen and there were open elements.
51: Unclosed element “ms”.
#new-errors
(1:44-1:49) non-void-html-element-start-tag-with-trailing-solidus
#document-fragment
math ms
#document
| <b>
| <math mglyph>
| <i>
| <math malignmark>
| <u>
| <ms>
| "X"
#data
<malignmark></malignmark>
#errors
#document-fragment
math ms
#document
| <math malignmark>
#data
<div></div>
#errors
#document-fragment
math ms
#document
| <div>
#data
<figure></figure>
#errors
#document-fragment
math ms
#document
| <figure>
#data
<b></b><mglyph/><i></i><malignmark/><u></u><mn/>X
#errors
51: Self-closing syntax (“/>”) used on a non-void HTML element. Ignoring the slash and treating as a start tag.
52: End of file seen and there were open elements.
51: Unclosed element “mn”.
#new-errors
(1:44-1:49) non-void-html-element-start-tag-with-trailing-solidus
#document-fragment
math mn
#document
| <b>
| <math mglyph>
| <i>
| <math malignmark>
| <u>
| <mn>
| "X"
#data
<malignmark></malignmark>
#errors
#document-fragment
math mn
#document
| <math malignmark>
#data
<div></div>
#errors
#document-fragment
math mn
#document
| <div>
#data
<figure></figure>
#errors
#document-fragment
math mn
#document
| <figure>
#data
<b></b><mglyph/><i></i><malignmark/><u></u><mo/>X
#errors
51: Self-closing syntax (“/>”) used on a non-void HTML element. Ignoring the slash and treating as a start tag.
52: End of file seen and there were open elements.
51: Unclosed element “mo”.
#new-errors
(1:44-1:49) non-void-html-element-start-tag-with-trailing-solidus
#document-fragment
math mo
#document
| <b>
| <math mglyph>
| <i>
| <math malignmark>
| <u>
| <mo>
| "X"
#data
<malignmark></malignmark>
#errors
#document-fragment
math mo
#document
| <math malignmark>
#data
<div></div>
#errors
#document-fragment
math mo
#document
| <div>
#data
<figure></figure>
#errors
#document-fragment
math mo
#document
| <figure>
#data
<b></b><mglyph/><i></i><malignmark/><u></u><mi/>X
#errors
51: Self-closing syntax (“/>”) used on a non-void HTML element. Ignoring the slash and treating as a start tag.
52: End of file seen and there were open elements.
51: Unclosed element “mi”.
#new-errors
(1:44-1:49) non-void-html-element-start-tag-with-trailing-solidus
#document-fragment
math mi
#document
| <b>
| <math mglyph>
| <i>
| <math malignmark>
| <u>
| <mi>
| "X"
#data
<malignmark></malignmark>
#errors
#document-fragment
math mi
#document
| <math malignmark>
#data
<div></div>
#errors
#document-fragment
math mi
#document
| <div>
#data
<figure></figure>
#errors
#document-fragment
math mi
#document
| <figure>
#data
<b></b><mglyph/><i></i><malignmark/><u></u><mtext/>X
#errors
51: Self-closing syntax (“/>”) used on a non-void HTML element. Ignoring the slash and treating as a start tag.
52: End of file seen and there were open elements.
51: Unclosed element “mtext”.
#new-errors
(1:44-1:52) non-void-html-element-start-tag-with-trailing-solidus
#document-fragment
math mtext
#document
| <b>
| <math mglyph>
| <i>
| <math malignmark>
| <u>
| <mtext>
| "X"
#data
<malignmark></malignmark>
#errors
#document-fragment
math mtext
#document
| <math malignmark>
#data
<div></div>
#errors
#document-fragment
math mtext
#document
| <div>
#data
<figure></figure>
#errors
#document-fragment
math mtext
#document
| <figure>
#data
<div></div>
#errors
5: HTML start tag “div” in a foreign namespace context.
#document-fragment
math annotation-xml
#document
| <math div>
#data
<figure></figure>
#errors
#document-fragment
math annotation-xml
#document
| <math figure>
#data
<div></div>
#errors
5: HTML start tag “div” in a foreign namespace context.
#document-fragment
math math
#document
| <math div>
#data
<figure></figure>
#errors
#document-fragment
math math
#document
| <math figure>
#data
<div></div>
#errors
#document-fragment
svg foreignObject
#document
| <div>
#data
<figure></figure>
#errors
#document-fragment
svg foreignObject
#document
| <figure>
#data
<div></div>
#errors
#document-fragment
svg title
#document
| <div>
#data
<figure></figure>
#errors
#document-fragment
svg title
#document
| <figure>
#data
<figure></figure>
#errors
#document-fragment
svg desc
#document
| <figure>
#data
<div><h1>X</h1></div>
#errors
5: HTML start tag “div” in a foreign namespace context.
9: HTML start tag “h1” in a foreign namespace context.
#document-fragment
svg svg
#document
| <svg div>
| <svg h1>
| "X"
#data
<div></div>
#errors
5: HTML start tag “div” in a foreign namespace context.
#document-fragment
svg svg
#document
| <svg div>
#data
<div></div>
#errors
#document-fragment
svg desc
#document
| <div>
#data
<figure></figure>
#errors
#document-fragment
svg desc
#document
| <figure>
#data
<plaintext><foo>
#errors
(1,16): expected-closing-tag-but-got-eof
#document-fragment
svg desc
#document
| <plaintext>
| "<foo>"
#data
<frameset>X
#errors
6: Stray start tag “frameset”.
#document-fragment
svg desc
#document
| "X"
#data
<head>X
#errors
6: Stray start tag “head”.
#document-fragment
svg desc
#document
| "X"
#data
<body>X
#errors
6: Stray start tag “body”.
#document-fragment
svg desc
#document
| "X"
#data
<html>X
#errors
6: Stray start tag “html”.
#document-fragment
svg desc
#document
| "X"
#data
<html class="foo">X
#errors
6: Stray start tag “html”.
#document-fragment
svg desc
#document
| "X"
#data
<body class="foo">X
#errors
6: Stray start tag “body”.
#document-fragment
svg desc
#document
| "X"

View file

@ -1,302 +0,0 @@
#data
<div<div>
#errors
(1,9): expected-doctype-but-got-start-tag
(1,9): expected-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| <div<div>
#data
<div foo<bar=''>
#errors
(1,9): invalid-character-in-attribute-name
(1,16): expected-doctype-but-got-start-tag
(1,16): expected-closing-tag-but-got-eof
#new-errors
(1:9) unexpected-character-in-attribute-name
#document
| <html>
| <head>
| <body>
| <div>
| foo<bar=""
#data
<div foo=`bar`>
#errors
(1,10): equals-in-unquoted-attribute-value
(1,14): unexpected-character-in-unquoted-attribute-value
(1,15): expected-doctype-but-got-start-tag
(1,15): expected-closing-tag-but-got-eof
#new-errors
(1:10) unexpected-character-in-unquoted-attribute-value
(1:14) unexpected-character-in-unquoted-attribute-value
#document
| <html>
| <head>
| <body>
| <div>
| foo="`bar`"
#data
<div \"foo=''>
#errors
(1,7): invalid-character-in-attribute-name
(1,14): expected-doctype-but-got-start-tag
(1,14): expected-closing-tag-but-got-eof
#new-errors
(1:7) unexpected-character-in-attribute-name
#document
| <html>
| <head>
| <body>
| <div>
| \"foo=""
#data
<a href='\nbar'></a>
#errors
(1,16): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <body>
| <a>
| href="\nbar"
#data
<!DOCTYPE html>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
#data
&lang;&rang;
#errors
(1,6): expected-doctype-but-got-chars
#document
| <html>
| <head>
| <body>
| "⟨⟩"
#data
&apos;
#errors
(1,6): expected-doctype-but-got-chars
#document
| <html>
| <head>
| <body>
| "'"
#data
&ImaginaryI;
#errors
(1,12): expected-doctype-but-got-chars
#document
| <html>
| <head>
| <body>
| ""
#data
&Kopf;
#errors
(1,6): expected-doctype-but-got-chars
#document
| <html>
| <head>
| <body>
| "𝕂"
#data
&notinva;
#errors
(1,9): expected-doctype-but-got-chars
#document
| <html>
| <head>
| <body>
| "∉"
#data
<?import namespace="foo" implementation="#bar">
#errors
(1,1): expected-tag-name-but-got-question-mark
(1,47): expected-doctype-but-got-eof
#new-errors
(1:2) unexpected-question-mark-instead-of-tag-name
#document
| <!-- ?import namespace="foo" implementation="#bar" -->
| <html>
| <head>
| <body>
#data
<!--foo--bar-->
#errors
(1,10): unexpected-char-in-comment
(1,15): expected-doctype-but-got-eof
#document
| <!-- foo--bar -->
| <html>
| <head>
| <body>
#data
<![CDATA[x]]>
#errors
(1,2): expected-dashes-or-doctype
(1,13): expected-doctype-but-got-eof
#new-errors
(1:9) cdata-in-html-content
#document
| <!-- [CDATA[x]] -->
| <html>
| <head>
| <body>
#data
<textarea><!--</textarea>--></textarea>
#errors
(1,10): expected-doctype-but-got-start-tag
(1,39): unexpected-end-tag
#document
| <html>
| <head>
| <body>
| <textarea>
| "<!--"
| "-->"
#data
<textarea><!--</textarea>-->
#errors
(1,10): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <body>
| <textarea>
| "<!--"
| "-->"
#data
<style><!--</style>--></style>
#errors
(1,7): expected-doctype-but-got-start-tag
(1,30): unexpected-end-tag
#document
| <html>
| <head>
| <style>
| "<!--"
| <body>
| "-->"
#data
<style><!--</style>-->
#errors
(1,7): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <style>
| "<!--"
| <body>
| "-->"
#data
<ul><li>A </li> <li>B</li></ul>
#errors
(1,4): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <body>
| <ul>
| <li>
| "A "
| " "
| <li>
| "B"
#data
<table><form><input type=hidden><input></form><div></div></table>
#errors
(1,7): expected-doctype-but-got-start-tag
(1,13): unexpected-form-in-table
(1,32): unexpected-hidden-input-in-table
(1,39): unexpected-start-tag-implies-table-voodoo
(1,46): unexpected-end-tag-implies-table-voodoo
(1,46): unexpected-end-tag
(1,51): unexpected-start-tag-implies-table-voodoo
(1,57): unexpected-end-tag-implies-table-voodoo
#document
| <html>
| <head>
| <body>
| <input>
| <div>
| <table>
| <form>
| <input>
| type="hidden"
#data
<i>A<b>B<p></i>C</b>D
#errors
(1,3): expected-doctype-but-got-start-tag
(1,15): adoption-agency-1.3
(1,20): adoption-agency-1.3
#document
| <html>
| <head>
| <body>
| <i>
| "A"
| <b>
| "B"
| <b>
| <p>
| <b>
| <i>
| "C"
| "D"
#data
<div></div>
#errors
(1,5): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <body>
| <div>
#data
<svg></svg>
#errors
(1,5): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <body>
| <svg svg>
#data
<math></math>
#errors
(1,6): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <body>
| <math math>

View file

@ -1,54 +0,0 @@
#data
<button>1</foo>
#errors
(1,8): expected-doctype-but-got-start-tag
(1,15): unexpected-end-tag
(1,15): expected-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| <button>
| "1"
#data
<foo>1<p>2</foo>
#errors
(1,5): expected-doctype-but-got-start-tag
(1,16): unexpected-end-tag
(1,16): expected-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| <foo>
| "1"
| <p>
| "2"
#data
<dd>1</foo>
#errors
(1,4): expected-doctype-but-got-start-tag
(1,11): unexpected-end-tag
#document
| <html>
| <head>
| <body>
| <dd>
| "1"
#data
<foo>1<dd>2</foo>
#errors
(1,5): expected-doctype-but-got-start-tag
(1,17): unexpected-end-tag
(1,17): expected-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| <foo>
| "1"
| <dd>
| "2"

View file

@ -1,49 +0,0 @@
#data
<isindex>
#errors
(1,9): expected-doctype-but-got-start-tag
(1,9): expected-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| <isindex>
#data
<isindex name="A" action="B" prompt="C" foo="D">
#errors
(1,48): expected-doctype-but-got-start-tag
(1,48): expected-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| <isindex>
| action="B"
| foo="D"
| name="A"
| prompt="C"
#data
<form><isindex>
#errors
(1,6): expected-doctype-but-got-start-tag
(1,15): expected-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| <form>
| <isindex>
#data
<!doctype html><isindex>x</isindex>x
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <isindex>
| "x"
| "x"

View file

@ -1,46 +0,0 @@
#data
<!doctype html><p>foo<main>bar<p>baz
#errors
(1,36): expected-closing-tag-but-got-eof
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| "foo"
| <main>
| "bar"
| <p>
| "baz"
#data
<!doctype html><main><p>foo</main>bar
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <main>
| <p>
| "foo"
| "bar"
#data
<!DOCTYPE html>xxx<svg><x><g><a><main><b>
#errors
* (1,42) unexpected HTML-like start tag token in foreign content
* (1,42) unexpected end of file
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "xxx"
| <svg svg>
| <svg x>
| <svg g>
| <svg a>
| <svg main>
| <b>

View file

@ -1,81 +0,0 @@
#data
<math><tr><td><mo><tr>
#errors
#document-fragment
td
#document
| <math math>
| <math tr>
| <math td>
| <math mo>
#data
<math><tr><td><mo><tr>
#errors
#document-fragment
tr
#document
| <math math>
| <math tr>
| <math td>
| <math mo>
#data
<math><thead><mo><tbody>
#errors
#document-fragment
thead
#document
| <math math>
| <math thead>
| <math mo>
#data
<math><tfoot><mo><tbody>
#errors
#document-fragment
tfoot
#document
| <math math>
| <math tfoot>
| <math mo>
#data
<math><tbody><mo><tfoot>
#errors
#document-fragment
tbody
#document
| <math math>
| <math tbody>
| <math mo>
#data
<math><tbody><mo></table>
#errors
#document-fragment
tbody
#document
| <math math>
| <math tbody>
| <math mo>
#data
<math><thead><mo></table>
#errors
#document-fragment
tbody
#document
| <math math>
| <math thead>
| <math mo>
#data
<math><tfoot><mo></table>
#errors
#document-fragment
tbody
#document
| <math math>
| <math tfoot>
| <math mo>

View file

@ -1,257 +0,0 @@
#data
<menuitem>
#errors
10: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
10: End of file seen and there were open elements.
10: Unclosed element “menuitem”.
#document
| <html>
| <head>
| <body>
| <menuitem>
#data
</menuitem>
#errors
11: End tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
11: Stray end tag “menuitem”.
#document
| <html>
| <head>
| <body>
#data
<!DOCTYPE html><body><menuitem>A
#errors
32: End of file seen and there were open elements.
31: Unclosed element “menuitem”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <menuitem>
| "A"
#data
<!DOCTYPE html><body><menuitem>A<menuitem>B
#errors
43: End of file seen and there were open elements.
42: Unclosed element “menuitem”.
31: Unclosed element “menuitem”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <menuitem>
| "A"
| <menuitem>
| "B"
#data
<!DOCTYPE html><body><menuitem>A<menu>B</menu>
#errors
46: End of file seen and there were open elements.
31: Unclosed element “menuitem”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <menuitem>
| "A"
| <menu>
| "B"
#data
<!DOCTYPE html><body><menuitem>A<hr>B
#errors
37: End of file seen and there were open elements.
31: Unclosed element “menuitem”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <menuitem>
| "A"
| <hr>
| "B"
#data
<!DOCTYPE html><li><menuitem><li>
#errors
33: End tag “li” implied, but there were open elements.
29: Unclosed element “menuitem”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <li>
| <menuitem>
| <li>
#data
<!DOCTYPE html><menuitem><p></menuitem>x
#errors
39: Stray end tag “menuitem”.
40: End of file seen and there were open elements.
25: Unclosed element “menuitem”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <menuitem>
| <p>
| "x"
#data
<!DOCTYPE html><p><b></p><menuitem>
#errors
25: End tag “p” seen, but there were open elements.
21: Unclosed element “b”.
35: End of file seen and there were open elements.
35: Unclosed element “menuitem”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <b>
| <b>
| <menuitem>
#data
<!DOCTYPE html><menuitem><asdf></menuitem>x
#errors
42: End tag “menuitem” seen, but there were open elements.
31: Unclosed element “asdf”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <menuitem>
| <asdf>
| "x"
#data
<!DOCTYPE html></menuitem>
#errors
26: Stray end tag “menuitem”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
#data
<!DOCTYPE html><html></menuitem>
#errors
26: Stray end tag “menuitem”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
#data
<!DOCTYPE html><head></menuitem>
#errors
26: Stray end tag “menuitem”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
#data
<!DOCTYPE html><select><menuitem></select>
#errors
33: Stray start tag “menuitem”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
#data
<!DOCTYPE html><option><menuitem>
#errors
33: End of file seen and there were open elements.
33: Unclosed element “menuitem”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <option>
| <menuitem>
#data
<!DOCTYPE html><menuitem><option>
#errors
33: End of file seen and there were open elements.
25: Unclosed element “menuitem”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <menuitem>
| <option>
#data
<!DOCTYPE html><menuitem></body>
#errors
32: End tag for “body” seen, but there were unclosed elements.
25: Unclosed element “menuitem”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <menuitem>
#data
<!DOCTYPE html><menuitem></html>
#errors
32: End tag for “html” seen, but there were unclosed elements.
25: Unclosed element “menuitem”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <menuitem>
#data
<!DOCTYPE html><menuitem><p>
#errors
28: End of file seen and there were open elements.
25: Unclosed element “menuitem”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <menuitem>
| <p>
#data
<!DOCTYPE html><menuitem><li>
#errors
29: End of file seen and there were open elements.
25: Unclosed element “menuitem”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <menuitem>
| <li>

View file

@ -1,16 +0,0 @@
#data
<body><table><tr><td><svg><td><foreignObject><span></td>Foo
#errors
#document
| <html>
| <head>
| <body>
| "Foo"
| <table>
| <tbody>
| <tr>
| <td>
| <svg svg>
| <svg td>
| <svg foreignObject>
| <span>

View file

@ -1,237 +0,0 @@
#data
<head><noscript><!doctype html><!--foo--></noscript>
#errors
Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
Line: 1 Col: 31 Unexpected DOCTYPE. Ignored.
#script-off
#document
| <html>
| <head>
| <noscript>
| <!-- foo -->
| <body>
#data
<head><noscript><html class="foo"><!--foo--></noscript>
#errors
Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
Line: 1 Col: 34 html needs to be the first start tag.
#script-off
#document
| <html>
| class="foo"
| <head>
| <noscript>
| <!-- foo -->
| <body>
#data
<head><noscript></noscript>
#errors
(1,6): expected-doctype-but-got-tag
#script-off
#document
| <html>
| <head>
| <noscript>
| <body>
#data
<head><noscript> </noscript>
#errors
Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
#script-off
#document
| <html>
| <head>
| <noscript>
| " "
| <body>
#data
<head><noscript><!--foo--></noscript>
#errors
(1,6): expected-doctype-but-got-tag
#script-off
#document
| <html>
| <head>
| <noscript>
| <!-- foo -->
| <body>
#data
<head><noscript><basefont><!--foo--></noscript>
#errors
Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
#script-off
#document
| <html>
| <head>
| <noscript>
| <basefont>
| <!-- foo -->
| <body>
#data
<head><noscript><bgsound><!--foo--></noscript>
#errors
Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
#script-off
#document
| <html>
| <head>
| <noscript>
| <bgsound>
| <!-- foo -->
| <body>
#data
<head><noscript><link><!--foo--></noscript>
#errors
Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
#script-off
#document
| <html>
| <head>
| <noscript>
| <link>
| <!-- foo -->
| <body>
#data
<head><noscript><meta><!--foo--></noscript>
#errors
Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
#script-off
#document
| <html>
| <head>
| <noscript>
| <meta>
| <!-- foo -->
| <body>
#data
<head><noscript><noframes>XXX</noscript></noframes></noscript>
#errors
Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
#script-off
#document
| <html>
| <head>
| <noscript>
| <noframes>
| "XXX</noscript>"
| <body>
#data
<head><noscript><style>XXX</style></noscript>
#errors
Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
#script-off
#document
| <html>
| <head>
| <noscript>
| <style>
| "XXX"
| <body>
#data
<head><noscript></br><!--foo--></noscript>
#errors
Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
Line: 1 Col: 21 Element br not allowed in a inhead-noscript context
Line: 1 Col: 21 Unexpected end tag (br). Treated as br element.
Line: 1 Col: 42 Unexpected end tag (noscript). Ignored.
#script-off
#document
| <html>
| <head>
| <noscript>
| <body>
| <br>
| <!-- foo -->
#data
<head><noscript><head class="foo"><!--foo--></noscript>
#errors
Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
Line: 1 Col: 34 Unexpected start tag (head).
#script-off
#document
| <html>
| <head>
| <noscript>
| <!-- foo -->
| <body>
#data
<head><noscript><noscript class="foo"><!--foo--></noscript>
#errors
Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
Line: 1 Col: 34 Unexpected start tag (noscript).
#script-off
#document
| <html>
| <head>
| <noscript>
| <!-- foo -->
| <body>
#data
<head><noscript></p><!--foo--></noscript>
#errors
Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
Line: 1 Col: 20 Unexpected end tag (p). Ignored.
#script-off
#document
| <html>
| <head>
| <noscript>
| <!-- foo -->
| <body>
#data
<head><noscript><p><!--foo--></noscript>
#errors
Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
Line: 1 Col: 19 Element p not allowed in a inhead-noscript context
Line: 1 Col: 40 Unexpected end tag (noscript). Ignored.
#script-off
#document
| <html>
| <head>
| <noscript>
| <body>
| <p>
| <!-- foo -->
#data
<head><noscript>XXX<!--foo--></noscript></head>
#errors
Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
Line: 1 Col: 19 Unexpected non-space character. Expected inhead-noscript content
Line: 1 Col: 30 Unexpected end tag (noscript). Ignored.
Line: 1 Col: 37 Unexpected end tag (head). Ignored.
#script-off
#document
| <html>
| <head>
| <noscript>
| <body>
| "XXX"
| <!-- foo -->
#data
<head><noscript>
#errors
(1,6): expected-doctype-but-got-tag
(1,6): eof-in-head-noscript
#script-off
#document
| <html>
| <head>
| <noscript>
| <body>

View file

@ -1,46 +0,0 @@
#data
<input type="hidden"><frameset>
#errors
(1,21): expected-doctype-but-got-start-tag
(1,31): unexpected-start-tag
(1,31): eof-in-frameset
#document
| <html>
| <head>
| <frameset>
#data
<!DOCTYPE html><table><caption><svg>foo</table>bar
#errors
(1,47): unexpected-end-tag
(1,47): end-table-tag-in-caption
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <caption>
| <svg svg>
| "foo"
| "bar"
#data
<table><tr><td><svg><desc><td></desc><circle>
#errors
(1,7): expected-doctype-but-got-start-tag
(1,30): unexpected-cell-end-tag
(1,37): unexpected-end-tag
(1,45): expected-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <svg svg>
| <svg desc>
| <td>
| <circle>

View file

@ -1,301 +0,0 @@
#data
<html><ruby>a<rb>b<rb></ruby></html>
#errors
(1,6): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <body>
| <ruby>
| "a"
| <rb>
| "b"
| <rb>
#data
<html><ruby>a<rb>b<rt></ruby></html>
#errors
(1,6): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <body>
| <ruby>
| "a"
| <rb>
| "b"
| <rt>
#data
<html><ruby>a<rb>b<rtc></ruby></html>
#errors
(1,6): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <body>
| <ruby>
| "a"
| <rb>
| "b"
| <rtc>
#data
<html><ruby>a<rb>b<rp></ruby></html>
#errors
(1,6): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <body>
| <ruby>
| "a"
| <rb>
| "b"
| <rp>
#data
<html><ruby>a<rb>b<span></ruby></html>
#errors
(1,6): expected-doctype-but-got-start-tag
(1,31): unexpected-end-tag
#document
| <html>
| <head>
| <body>
| <ruby>
| "a"
| <rb>
| "b"
| <span>
#data
<html><ruby>a<rt>b<rb></ruby></html>
#errors
(1,6): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <body>
| <ruby>
| "a"
| <rt>
| "b"
| <rb>
#data
<html><ruby>a<rt>b<rt></ruby></html>
#errors
(1,6): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <body>
| <ruby>
| "a"
| <rt>
| "b"
| <rt>
#data
<html><ruby>a<rt>b<rtc></ruby></html>
#errors
(1,6): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <body>
| <ruby>
| "a"
| <rt>
| "b"
| <rtc>
#data
<html><ruby>a<rt>b<rp></ruby></html>
#errors
(1,6): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <body>
| <ruby>
| "a"
| <rt>
| "b"
| <rp>
#data
<html><ruby>a<rt>b<span></ruby></html>
#errors
(1,6): expected-doctype-but-got-start-tag
(1,31): unexpected-end-tag
#document
| <html>
| <head>
| <body>
| <ruby>
| "a"
| <rt>
| "b"
| <span>
#data
<html><ruby>a<rtc>b<rb></ruby></html>
#errors
(1,6): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <body>
| <ruby>
| "a"
| <rtc>
| "b"
| <rb>
#data
<html><ruby>a<rtc>b<rt>c<rt>d</ruby></html>
#errors
(1,6): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <body>
| <ruby>
| "a"
| <rtc>
| "b"
| <rt>
| "c"
| <rt>
| "d"
#data
<html><ruby>a<rtc>b<rtc></ruby></html>
#errors
(1,6): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <body>
| <ruby>
| "a"
| <rtc>
| "b"
| <rtc>
#data
<html><ruby>a<rtc>b<rp></ruby></html>
#errors
(1,6): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <body>
| <ruby>
| "a"
| <rtc>
| "b"
| <rp>
#data
<html><ruby>a<rtc>b<span></ruby></html>
#errors
(1,6): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <body>
| <ruby>
| "a"
| <rtc>
| "b"
| <span>
#data
<html><ruby>a<rp>b<rb></ruby></html>
#errors
(1,6): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <body>
| <ruby>
| "a"
| <rp>
| "b"
| <rb>
#data
<html><ruby>a<rp>b<rt></ruby></html>
#errors
(1,6): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <body>
| <ruby>
| "a"
| <rp>
| "b"
| <rt>
#data
<html><ruby>a<rp>b<rtc></ruby></html>
#errors
(1,6): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <body>
| <ruby>
| "a"
| <rp>
| "b"
| <rtc>
#data
<html><ruby>a<rp>b<rp></ruby></html>
#errors
(1,6): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <body>
| <ruby>
| "a"
| <rp>
| "b"
| <rp>
#data
<html><ruby>a<rp>b<span></ruby></html>
#errors
(1,6): expected-doctype-but-got-start-tag
(1,31): unexpected-end-tag
#document
| <html>
| <head>
| <body>
| <ruby>
| "a"
| <rp>
| "b"
| <span>
#data
<html><ruby><rtc><ruby>a<rb>b<rt></ruby></ruby></html>
#errors
(1,6): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <body>
| <ruby>
| <rtc>
| <ruby>
| "a"
| <rb>
| "b"
| <rt>

View file

@ -1,385 +0,0 @@
#data
FOO<script>'Hello'</script>BAR
#errors
(1,3): expected-doctype-but-got-chars
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'Hello'"
| "BAR"
#data
FOO<script></script>BAR
#errors
(1,3): expected-doctype-but-got-chars
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "BAR"
#data
FOO<script></script >BAR
#errors
(1,3): expected-doctype-but-got-chars
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "BAR"
#data
FOO<script></script/>BAR
#errors
(1,3): expected-doctype-but-got-chars
(1,21): self-closing-flag-on-end-tag
#new-errors
(1:21) end-tag-with-trailing-solidus
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "BAR"
#data
FOO<script></script/ >BAR
#errors
(1,3): expected-doctype-but-got-chars
(1,20): unexpected-character-after-solidus-in-tag
#new-errors
(1:21) unexpected-solidus-in-tag
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "BAR"
#data
FOO<script type="text/plain"></scriptx>BAR
#errors
(1,3): expected-doctype-but-got-chars
(1,42): expected-named-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "</scriptx>BAR"
#data
FOO<script></script foo=">" dd>BAR
#errors
(1,3): expected-doctype-but-got-chars
(1,31): attributes-in-end-tag
#new-errors
(1:31) end-tag-with-attributes
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "BAR"
#data
FOO<script>'<'</script>BAR
#errors
(1,3): expected-doctype-but-got-chars
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<'"
| "BAR"
#data
FOO<script>'<!'</script>BAR
#errors
(1,3): expected-doctype-but-got-chars
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!'"
| "BAR"
#data
FOO<script>'<!-'</script>BAR
#errors
(1,3): expected-doctype-but-got-chars
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!-'"
| "BAR"
#data
FOO<script>'<!--'</script>BAR
#errors
(1,3): expected-doctype-but-got-chars
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!--'"
| "BAR"
#data
FOO<script>'<!---'</script>BAR
#errors
(1,3): expected-doctype-but-got-chars
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!---'"
| "BAR"
#data
FOO<script>'<!-->'</script>BAR
#errors
(1,3): expected-doctype-but-got-chars
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!-->'"
| "BAR"
#data
FOO<script>'<!-->'</script>BAR
#errors
(1,3): expected-doctype-but-got-chars
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!-->'"
| "BAR"
#data
FOO<script>'<!-- potato'</script>BAR
#errors
(1,3): expected-doctype-but-got-chars
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!-- potato'"
| "BAR"
#data
FOO<script>'<!-- <sCrIpt'</script>BAR
#errors
(1,3): expected-doctype-but-got-chars
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!-- <sCrIpt'"
| "BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt>'</script>BAR
#errors
(1,3): expected-doctype-but-got-chars
(1,56): expected-script-data-but-got-eof
(1,56): expected-named-closing-tag-but-got-eof
#new-errors
(1:57) eof-in-script-html-comment-like-text
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt>'</script>BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt> -'</script>BAR
#errors
(1,3): expected-doctype-but-got-chars
(1,58): expected-script-data-but-got-eof
(1,58): expected-named-closing-tag-but-got-eof
#new-errors
(1:59) eof-in-script-html-comment-like-text
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt> -'</script>BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt> --'</script>BAR
#errors
(1,3): expected-doctype-but-got-chars
(1,59): expected-script-data-but-got-eof
(1,59): expected-named-closing-tag-but-got-eof
#new-errors
(1:60) eof-in-script-html-comment-like-text
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt> --'</script>BAR"
#data
FOO<script>'<!-- <sCrIpt> -->'</script>BAR
#errors
(1,3): expected-doctype-but-got-chars
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!-- <sCrIpt> -->'"
| "BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt> --!>'</script>BAR
#errors
(1,3): expected-doctype-but-got-chars
(1,61): expected-script-data-but-got-eof
(1,61): expected-named-closing-tag-but-got-eof
#new-errors
(1:62) eof-in-script-html-comment-like-text
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt> --!>'</script>BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt> -- >'</script>BAR
#errors
(1,3): expected-doctype-but-got-chars
(1,61): expected-script-data-but-got-eof
(1,61): expected-named-closing-tag-but-got-eof
#new-errors
(1:62) eof-in-script-html-comment-like-text
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt> -- >'</script>BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt '</script>BAR
#errors
(1,3): expected-doctype-but-got-chars
(1,56): expected-script-data-but-got-eof
(1,56): expected-named-closing-tag-but-got-eof
#new-errors
(1:57) eof-in-script-html-comment-like-text
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt '</script>BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt/'</script>BAR
#errors
(1,3): expected-doctype-but-got-chars
(1,56): expected-script-data-but-got-eof
(1,56): expected-named-closing-tag-but-got-eof
#new-errors
(1:57) eof-in-script-html-comment-like-text
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt/'</script>BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt\'</script>BAR
#errors
(1,3): expected-doctype-but-got-chars
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt\'"
| "BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt/'</script>BAR</script>QUX
#errors
(1,3): expected-doctype-but-got-chars
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt/'</script>BAR"
| "QUX"
#data
FOO<script><!--<script>-></script>--></script>QUX
#errors
(1,3): expected-doctype-but-got-chars
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "<!--<script>-></script>-->"
| "QUX"

View file

@ -1,16 +0,0 @@
#data
<p><b id="A"><script>document.getElementById("A").id = "B"</script></p>TEXT</b>
#errors
#script-on
#document
| <html>
| <head>
| <body>
| <p>
| <b>
| id="B"
| <script>
| "document.getElementById("A").id = "B""
| <b>
| id="A"
| "TEXT"

View file

@ -1,27 +0,0 @@
#data
<p><font size=4><font size=4><font size=4><script>document.getElementsByTagName("font")[2].setAttribute("size", "5");</script><font size=4><p>X
#errors
#script-on
#document
| <html>
| <head>
| <body>
| <p>
| <font>
| size="4"
| <font>
| size="4"
| <font>
| size="5"
| <script>
| "document.getElementsByTagName("font")[2].setAttribute("size", "5");"
| <font>
| size="4"
| <p>
| <font>
| size="4"
| <font>
| size="4"
| <font>
| size="4"
| "X"

View file

@ -1,30 +0,0 @@
#data
1<script>document.write("2")</script>3
#errors
#script-on
#document
| <html>
| <head>
| <body>
| "1"
| <script>
| "document.write("2")"
| "23"
#data
1<script>document.write("<script>document.write('2')</scr"+ "ipt><script>document.write('3')</scr" + "ipt>")</script>4
#errors
#script-on
#document
| <html>
| <head>
| <body>
| "1"
| <script>
| "document.write("<script>document.write('2')</scr"+ "ipt><script>document.write('3')</scr" + "ipt>")"
| <script>
| "document.write('2')"
| "2"
| <script>
| "document.write('3')"
| "34"

View file

@ -1,286 +0,0 @@
#data
<table><th>
#errors
(1,7): expected-doctype-but-got-start-tag
(1,11): unexpected-cell-in-table-body
(1,11): expected-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <th>
#data
<table><td>
#errors
(1,7): expected-doctype-but-got-start-tag
(1,11): unexpected-cell-in-table-body
(1,11): expected-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
#data
<table><col foo='bar'>
#errors
(1,7): expected-doctype-but-got-start-tag
(1,22): eof-in-table
#document
| <html>
| <head>
| <body>
| <table>
| <colgroup>
| <col>
| foo="bar"
#data
<table><colgroup></html>foo
#errors
(1,7): expected-doctype-but-got-start-tag
(1,24): unexpected-end-tag
(1,27): foster-parenting-character-in-table
(1,27): foster-parenting-character-in-table
(1,27): foster-parenting-character-in-table
(1,27): eof-in-table
#document
| <html>
| <head>
| <body>
| "foo"
| <table>
| <colgroup>
#data
<table></table><p>foo
#errors
(1,7): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <body>
| <table>
| <p>
| "foo"
#data
<table></body></caption></col></colgroup></html></tbody></td></tfoot></th></thead></tr><td>
#errors
(1,7): expected-doctype-but-got-start-tag
(1,14): unexpected-end-tag
(1,24): unexpected-end-tag
(1,30): unexpected-end-tag
(1,41): unexpected-end-tag
(1,48): unexpected-end-tag
(1,56): unexpected-end-tag
(1,61): unexpected-end-tag
(1,69): unexpected-end-tag
(1,74): unexpected-end-tag
(1,82): unexpected-end-tag
(1,87): unexpected-end-tag
(1,91): unexpected-cell-in-table-body
(1,91): expected-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
#data
<table><select><option>3</select></table>
#errors
(1,7): expected-doctype-but-got-start-tag
(1,15): unexpected-start-tag-implies-table-voodoo
#document
| <html>
| <head>
| <body>
| <select>
| <option>
| "3"
| <table>
#data
<table><select><table></table></select></table>
#errors
(1,7): expected-doctype-but-got-start-tag
(1,15): unexpected-start-tag-implies-table-voodoo
(1,22): unexpected-table-element-start-tag-in-select-in-table
(1,22): unexpected-start-tag-implies-end-tag
(1,39): unexpected-end-tag
(1,47): unexpected-end-tag
#document
| <html>
| <head>
| <body>
| <select>
| <table>
| <table>
#data
<table><select></table>
#errors
(1,7): expected-doctype-but-got-start-tag
(1,15): unexpected-start-tag-implies-table-voodoo
(1,23): unexpected-table-element-end-tag-in-select-in-table
#document
| <html>
| <head>
| <body>
| <select>
| <table>
#data
<table><select><option>A<tr><td>B</td></tr></table>
#errors
(1,7): expected-doctype-but-got-start-tag
(1,15): unexpected-start-tag-implies-table-voodoo
(1,28): unexpected-table-element-start-tag-in-select-in-table
#document
| <html>
| <head>
| <body>
| <select>
| <option>
| "A"
| <table>
| <tbody>
| <tr>
| <td>
| "B"
#data
<table><td></body></caption></col></colgroup></html>foo
#errors
(1,7): expected-doctype-but-got-start-tag
(1,11): unexpected-cell-in-table-body
(1,18): unexpected-end-tag
(1,28): unexpected-end-tag
(1,34): unexpected-end-tag
(1,45): unexpected-end-tag
(1,52): unexpected-end-tag
(1,55): expected-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| "foo"
#data
<table><td>A</table>B
#errors
(1,7): expected-doctype-but-got-start-tag
(1,11): unexpected-cell-in-table-body
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| "A"
| "B"
#data
<table><tr><caption>
#errors
(1,7): expected-doctype-but-got-start-tag
(1,20): expected-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <caption>
#data
<table><tr></body></caption></col></colgroup></html></td></th><td>foo
#errors
(1,7): expected-doctype-but-got-start-tag
(1,18): unexpected-end-tag-in-table-row
(1,28): unexpected-end-tag-in-table-row
(1,34): unexpected-end-tag-in-table-row
(1,45): unexpected-end-tag-in-table-row
(1,52): unexpected-end-tag-in-table-row
(1,57): unexpected-end-tag-in-table-row
(1,62): unexpected-end-tag-in-table-row
(1,69): expected-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| "foo"
#data
<table><td><tr>
#errors
(1,7): expected-doctype-but-got-start-tag
(1,11): unexpected-cell-in-table-body
(1,15): eof-in-table
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <tr>
#data
<table><td><button><td>
#errors
(1,7): expected-doctype-but-got-start-tag
(1,11): unexpected-cell-in-table-body
(1,23): unexpected-cell-end-tag
(1,23): expected-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <button>
| <td>
#data
<table><tr><td><svg><desc><td>
#errors
(1,7): expected-doctype-but-got-start-tag
(1,30): unexpected-cell-end-tag
(1,30): expected-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <svg svg>
| <svg desc>
| <td>

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,849 +0,0 @@
#data
<!DOCTYPE html><svg></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
#data
<!DOCTYPE html><svg></svg><![CDATA[a]]>
#errors
(1,28) expected-dashes-or-doctype
#new-errors
(1:35) cdata-in-html-content
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <!-- [CDATA[a]] -->
#data
<!DOCTYPE html><body><svg></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
#data
<!DOCTYPE html><body><select><svg></svg></select>
#errors
(1,34) unexpected-start-tag-in-select
(1,40) unexpected-end-tag-in-select
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
#data
<!DOCTYPE html><body><select><option><svg></svg></option></select>
#errors
(1,42) unexpected-start-tag-in-select
(1,48) unexpected-end-tag-in-select
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| <option>
#data
<!DOCTYPE html><body><table><svg></svg></table>
#errors
(1,33) foster-parenting-start-tag
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <table>
#data
<!DOCTYPE html><body><table><svg><g>foo</g></svg></table>
#errors
(1,33) foster-parenting-start-tag
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg g>
| "foo"
| <table>
#data
<!DOCTYPE html><body><table><svg><g>foo</g><g>bar</g></svg></table>
#errors
(1,33) foster-parenting-start-tag
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <table>
#data
<!DOCTYPE html><body><table><tbody><svg><g>foo</g><g>bar</g></svg></tbody></table>
#errors
(1,40) foster-parenting-start-tag
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <table>
| <tbody>
#data
<!DOCTYPE html><body><table><tbody><tr><svg><g>foo</g><g>bar</g></svg></tr></tbody></table>
#errors
(1,44) foster-parenting-start-tag
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <table>
| <tbody>
| <tr>
#data
<!DOCTYPE html><body><table><tbody><tr><td><svg><g>foo</g><g>bar</g></svg></td></tr></tbody></table>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
#data
<!DOCTYPE html><body><table><tbody><tr><td><svg><g>foo</g><g>bar</g></svg><p>baz</td></tr></tbody></table>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <p>
| "baz"
#data
<!DOCTYPE html><body><table><caption><svg><g>foo</g><g>bar</g></svg><p>baz</caption></table>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <caption>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <p>
| "baz"
#data
<!DOCTYPE html><body><table><caption><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
#errors
(1,65) unexpected-html-element-in-foreign-content
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <caption>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <p>
| "baz"
| <p>
| "quux"
#data
<!DOCTYPE html><body><table><caption><svg><g>foo</g><g>bar</g>baz</table><p>quux
#errors
(1,73) unexpected-end-tag
(1,73) expected-one-end-tag-but-got-another
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <caption>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| "baz"
| <p>
| "quux"
#data
<!DOCTYPE html><body><table><colgroup><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
#errors
(1,43) foster-parenting-start-tag svg
(1,66) unexpected HTML-like start tag token in foreign content
(1,66) foster-parenting-start-tag
(1,67) foster-parenting-character
(1,68) foster-parenting-character
(1,69) foster-parenting-character
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <p>
| "baz"
| <table>
| <colgroup>
| <p>
| "quux"
#data
<!DOCTYPE html><body><table><tr><td><select><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
#errors
(1,49) unexpected-start-tag-in-select
(1,52) unexpected-start-tag-in-select
(1,59) unexpected-end-tag-in-select
(1,62) unexpected-start-tag-in-select
(1,69) unexpected-end-tag-in-select
(1,72) unexpected-start-tag-in-select
(1,83) unexpected-table-element-end-tag-in-select-in-table
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <select>
| "foobarbaz"
| <p>
| "quux"
#data
<!DOCTYPE html><body><table><select><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
#errors
(1,36) unexpected-start-tag-implies-table-voodoo
(1,41) unexpected-start-tag-in-select
(1,44) unexpected-start-tag-in-select
(1,51) unexpected-end-tag-in-select
(1,54) unexpected-start-tag-in-select
(1,61) unexpected-end-tag-in-select
(1,64) unexpected-start-tag-in-select
(1,75) unexpected-table-element-end-tag-in-select-in-table
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| "foobarbaz"
| <table>
| <p>
| "quux"
#data
<!DOCTYPE html><body></body></html><svg><g>foo</g><g>bar</g><p>baz
#errors
(1,40) expected-eof-but-got-start-tag
(1,63) unexpected-html-element-in-foreign-content
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <p>
| "baz"
#data
<!DOCTYPE html><body></body><svg><g>foo</g><g>bar</g><p>baz
#errors
(1,33) unexpected-start-tag-after-body
(1,56) unexpected-html-element-in-foreign-content
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <p>
| "baz"
#data
<!DOCTYPE html><frameset><svg><g></g><g></g><p><span>
#errors
(1,30) unexpected-start-tag-in-frameset
(1,33) unexpected-start-tag-in-frameset
(1,37) unexpected-end-tag-in-frameset
(1,40) unexpected-start-tag-in-frameset
(1,44) unexpected-end-tag-in-frameset
(1,47) unexpected-start-tag-in-frameset
(1,53) unexpected-start-tag-in-frameset
(1,53) eof-in-frameset
#document
| <!DOCTYPE html>
| <html>
| <head>
| <frameset>
#data
<!DOCTYPE html><frameset></frameset><svg><g></g><g></g><p><span>
#errors
(1,41) unexpected-start-tag-after-frameset
(1,44) unexpected-start-tag-after-frameset
(1,48) unexpected-end-tag-after-frameset
(1,51) unexpected-start-tag-after-frameset
(1,55) unexpected-end-tag-after-frameset
(1,58) unexpected-start-tag-after-frameset
(1,64) unexpected-start-tag-after-frameset
#document
| <!DOCTYPE html>
| <html>
| <head>
| <frameset>
#data
<!DOCTYPE html><body xlink:href=foo><svg xlink:href=foo></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| xlink:href="foo"
| <svg svg>
| xlink href="foo"
#data
<!DOCTYPE html><body xlink:href=foo xml:lang=en><svg><g xml:lang=en xlink:href=foo></g></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| xlink:href="foo"
| xml:lang="en"
| <svg svg>
| <svg g>
| xlink href="foo"
| xml lang="en"
#data
<!DOCTYPE html><body xlink:href=foo xml:lang=en><svg><g xml:lang=en xlink:href=foo /></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| xlink:href="foo"
| xml:lang="en"
| <svg svg>
| <svg g>
| xlink href="foo"
| xml lang="en"
#data
<!DOCTYPE html><body xlink:href=foo xml:lang=en><svg><g xml:lang=en xlink:href=foo />bar</svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| xlink:href="foo"
| xml:lang="en"
| <svg svg>
| <svg g>
| xlink href="foo"
| xml lang="en"
| "bar"
#data
<svg></path>
#errors
(1,5) expected-doctype-but-got-start-tag
(1,12) unexpected-end-tag
(1,12) unexpected-end-tag
(1,12) expected-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| <svg svg>
#data
<div><svg></div>a
#errors
(1,5) expected-doctype-but-got-start-tag
(1,16) unexpected-end-tag
(1,16) end-tag-too-early
#document
| <html>
| <head>
| <body>
| <div>
| <svg svg>
| "a"
#data
<div><svg><path></div>a
#errors
(1,5) expected-doctype-but-got-start-tag
(1,22) unexpected-end-tag
(1,22) end-tag-too-early
#document
| <html>
| <head>
| <body>
| <div>
| <svg svg>
| <svg path>
| "a"
#data
<div><svg><path></svg><path>
#errors
(1,5) expected-doctype-but-got-start-tag
(1,22) unexpected-end-tag
(1,28) expected-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| <div>
| <svg svg>
| <svg path>
| <path>
#data
<div><svg><path><foreignObject><math></div>a
#errors
(1,5) expected-doctype-but-got-start-tag
(1,43) unexpected-end-tag
(1,43) end-tag-too-early
(1,44) expected-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| <div>
| <svg svg>
| <svg path>
| <svg foreignObject>
| <math math>
| "a"
#data
<div><svg><path><foreignObject><p></div>a
#errors
(1,5) expected-doctype-but-got-start-tag
(1,40) end-tag-too-early
(1,41) expected-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| <div>
| <svg svg>
| <svg path>
| <svg foreignObject>
| <p>
| "a"
#data
<!DOCTYPE html><svg><desc><div><svg><ul>a
#errors
(1,40) unexpected-html-element-in-foreign-content
(1,41) expected-closing-tag-but-got-eof
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg desc>
| <div>
| <svg svg>
| <ul>
| "a"
#data
<!DOCTYPE html><svg><desc><svg><ul>a
#errors
(1,35) unexpected-html-element-in-foreign-content
(1,36) expected-closing-tag-but-got-eof
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg desc>
| <svg svg>
| <ul>
| "a"
#data
<!DOCTYPE html><p><svg><desc><p>
#errors
(1,32) expected-closing-tag-but-got-eof
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <svg svg>
| <svg desc>
| <p>
#data
<!DOCTYPE html><p><svg><title><p>
#errors
(1,33) expected-closing-tag-but-got-eof
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <svg svg>
| <svg title>
| <p>
#data
<div><svg><path><foreignObject><p></foreignObject><p>
#errors
(1,5) expected-doctype-but-got-start-tag
(1,50) unexpected-end-tag
(1,53) expected-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| <div>
| <svg svg>
| <svg path>
| <svg foreignObject>
| <p>
| <p>
#data
<math><mi><div><object><div><span></span></div></object></div></mi><mi>
#errors
(1,6) expected-doctype-but-got-start-tag
(1,71) expected-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| <math math>
| <math mi>
| <div>
| <object>
| <div>
| <span>
| <math mi>
#data
<math><mi><svg><foreignObject><div><div></div></div></foreignObject></svg></mi><mi>
#errors
(1,6) expected-doctype-but-got-start-tag
(1,83) expected-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| <math math>
| <math mi>
| <svg svg>
| <svg foreignObject>
| <div>
| <div>
| <math mi>
#data
<svg><script></script><path>
#errors
(1,5) expected-doctype-but-got-start-tag
(1,28) expected-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| <svg svg>
| <svg script>
| <svg path>
#data
<table><svg></svg><tr>
#errors
(1,7) expected-doctype-but-got-start-tag
(1,12) unexpected-start-tag-implies-table-voodoo
(1,22) eof-in-table
#document
| <html>
| <head>
| <body>
| <svg svg>
| <table>
| <tbody>
| <tr>
#data
<math><mi><mglyph>
#errors
(1,6) expected-doctype-but-got-start-tag
(1,18) expected-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| <math math>
| <math mi>
| <math mglyph>
#data
<math><mi><malignmark>
#errors
(1,6) expected-doctype-but-got-start-tag
(1,22) expected-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| <math math>
| <math mi>
| <math malignmark>
#data
<math><mo><mglyph>
#errors
(1,6) expected-doctype-but-got-start-tag
(1,18) expected-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| <math math>
| <math mo>
| <math mglyph>
#data
<math><mo><malignmark>
#errors
(1,6) expected-doctype-but-got-start-tag
(1,22) expected-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| <math math>
| <math mo>
| <math malignmark>
#data
<math><mn><mglyph>
#errors
(1,6) expected-doctype-but-got-start-tag
(1,18) expected-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| <math math>
| <math mn>
| <math mglyph>
#data
<math><mn><malignmark>
#errors
(1,6) expected-doctype-but-got-start-tag
(1,22) expected-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| <math math>
| <math mn>
| <math malignmark>
#data
<math><ms><mglyph>
#errors
(1,6) expected-doctype-but-got-start-tag
(1,18) expected-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| <math math>
| <math ms>
| <math mglyph>
#data
<math><ms><malignmark>
#errors
(1,6) expected-doctype-but-got-start-tag
(1,22) expected-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| <math math>
| <math ms>
| <math malignmark>
#data
<math><mtext><mglyph>
#errors
(1,6) expected-doctype-but-got-start-tag
(1,21) expected-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| <math math>
| <math mtext>
| <math mglyph>
#data
<math><mtext><malignmark>
#errors
(1,6) expected-doctype-but-got-start-tag
(1,25) expected-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| <math math>
| <math mtext>
| <math malignmark>
#data
<math><annotation-xml><svg></svg></annotation-xml><mi>
#errors
(1,6) expected-doctype-but-got-start-tag
(1,54) expected-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| <math math>
| <math annotation-xml>
| <svg svg>
| <math mi>
#data
<math><annotation-xml><svg><foreignObject><div><math><mi></mi></math><span></span></div></foreignObject><path></path></svg></annotation-xml><mi>
#errors
(1,6) expected-doctype-but-got-start-tag
(1,144) expected-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| <math math>
| <math annotation-xml>
| <svg svg>
| <svg foreignObject>
| <div>
| <math math>
| <math mi>
| <span>
| <svg path>
| <math mi>
#data
<math><annotation-xml><svg><foreignObject><math><mi><svg></svg></mi><mo></mo></math><span></span></foreignObject><path></path></svg></annotation-xml><mi>
#errors
(1,6) expected-doctype-but-got-start-tag
(1,153) expected-closing-tag-but-got-eof
#document
| <html>
| <head>
| <body>
| <math math>
| <math annotation-xml>
| <svg svg>
| <svg foreignObject>
| <math math>
| <math mi>
| <svg svg>
| <math mo>
| <span>
| <svg path>
| <math mi>

View file

@ -1,523 +0,0 @@
#data
<!DOCTYPE html><body><svg attributeName='' attributeType='' baseFrequency='' baseProfile='' calcMode='' clipPathUnits='' diffuseConstant='' edgeMode='' filterUnits='' glyphRef='' gradientTransform='' gradientUnits='' kernelMatrix='' kernelUnitLength='' keyPoints='' keySplines='' keyTimes='' lengthAdjust='' limitingConeAngle='' markerHeight='' markerUnits='' markerWidth='' maskContentUnits='' maskUnits='' numOctaves='' pathLength='' patternContentUnits='' patternTransform='' patternUnits='' pointsAtX='' pointsAtY='' pointsAtZ='' preserveAlpha='' preserveAspectRatio='' primitiveUnits='' refX='' refY='' repeatCount='' repeatDur='' requiredExtensions='' requiredFeatures='' specularConstant='' specularExponent='' spreadMethod='' startOffset='' stdDeviation='' stitchTiles='' surfaceScale='' systemLanguage='' tableValues='' targetX='' targetY='' textLength='' viewBox='' viewTarget='' xChannelSelector='' yChannelSelector='' zoomAndPan=''></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| attributeName=""
| attributeType=""
| baseFrequency=""
| baseProfile=""
| calcMode=""
| clipPathUnits=""
| diffuseConstant=""
| edgeMode=""
| filterUnits=""
| glyphRef=""
| gradientTransform=""
| gradientUnits=""
| kernelMatrix=""
| kernelUnitLength=""
| keyPoints=""
| keySplines=""
| keyTimes=""
| lengthAdjust=""
| limitingConeAngle=""
| markerHeight=""
| markerUnits=""
| markerWidth=""
| maskContentUnits=""
| maskUnits=""
| numOctaves=""
| pathLength=""
| patternContentUnits=""
| patternTransform=""
| patternUnits=""
| pointsAtX=""
| pointsAtY=""
| pointsAtZ=""
| preserveAlpha=""
| preserveAspectRatio=""
| primitiveUnits=""
| refX=""
| refY=""
| repeatCount=""
| repeatDur=""
| requiredExtensions=""
| requiredFeatures=""
| specularConstant=""
| specularExponent=""
| spreadMethod=""
| startOffset=""
| stdDeviation=""
| stitchTiles=""
| surfaceScale=""
| systemLanguage=""
| tableValues=""
| targetX=""
| targetY=""
| textLength=""
| viewBox=""
| viewTarget=""
| xChannelSelector=""
| yChannelSelector=""
| zoomAndPan=""
#data
<!DOCTYPE html><BODY><SVG ATTRIBUTENAME='' ATTRIBUTETYPE='' BASEFREQUENCY='' BASEPROFILE='' CALCMODE='' CLIPPATHUNITS='' DIFFUSECONSTANT='' EDGEMODE='' FILTERUNITS='' GLYPHREF='' GRADIENTTRANSFORM='' GRADIENTUNITS='' KERNELMATRIX='' KERNELUNITLENGTH='' KEYPOINTS='' KEYSPLINES='' KEYTIMES='' LENGTHADJUST='' LIMITINGCONEANGLE='' MARKERHEIGHT='' MARKERUNITS='' MARKERWIDTH='' MASKCONTENTUNITS='' MASKUNITS='' NUMOCTAVES='' PATHLENGTH='' PATTERNCONTENTUNITS='' PATTERNTRANSFORM='' PATTERNUNITS='' POINTSATX='' POINTSATY='' POINTSATZ='' PRESERVEALPHA='' PRESERVEASPECTRATIO='' PRIMITIVEUNITS='' REFX='' REFY='' REPEATCOUNT='' REPEATDUR='' REQUIREDEXTENSIONS='' REQUIREDFEATURES='' SPECULARCONSTANT='' SPECULAREXPONENT='' SPREADMETHOD='' STARTOFFSET='' STDDEVIATION='' STITCHTILES='' SURFACESCALE='' SYSTEMLANGUAGE='' TABLEVALUES='' TARGETX='' TARGETY='' TEXTLENGTH='' VIEWBOX='' VIEWTARGET='' XCHANNELSELECTOR='' YCHANNELSELECTOR='' ZOOMANDPAN=''></SVG>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| attributeName=""
| attributeType=""
| baseFrequency=""
| baseProfile=""
| calcMode=""
| clipPathUnits=""
| diffuseConstant=""
| edgeMode=""
| filterUnits=""
| glyphRef=""
| gradientTransform=""
| gradientUnits=""
| kernelMatrix=""
| kernelUnitLength=""
| keyPoints=""
| keySplines=""
| keyTimes=""
| lengthAdjust=""
| limitingConeAngle=""
| markerHeight=""
| markerUnits=""
| markerWidth=""
| maskContentUnits=""
| maskUnits=""
| numOctaves=""
| pathLength=""
| patternContentUnits=""
| patternTransform=""
| patternUnits=""
| pointsAtX=""
| pointsAtY=""
| pointsAtZ=""
| preserveAlpha=""
| preserveAspectRatio=""
| primitiveUnits=""
| refX=""
| refY=""
| repeatCount=""
| repeatDur=""
| requiredExtensions=""
| requiredFeatures=""
| specularConstant=""
| specularExponent=""
| spreadMethod=""
| startOffset=""
| stdDeviation=""
| stitchTiles=""
| surfaceScale=""
| systemLanguage=""
| tableValues=""
| targetX=""
| targetY=""
| textLength=""
| viewBox=""
| viewTarget=""
| xChannelSelector=""
| yChannelSelector=""
| zoomAndPan=""
#data
<!DOCTYPE html><body><svg attributename='' attributetype='' basefrequency='' baseprofile='' calcmode='' clippathunits='' diffuseconstant='' edgemode='' filterunits='' filterres='' glyphref='' gradienttransform='' gradientunits='' kernelmatrix='' kernelunitlength='' keypoints='' keysplines='' keytimes='' lengthadjust='' limitingconeangle='' markerheight='' markerunits='' markerwidth='' maskcontentunits='' maskunits='' numoctaves='' pathlength='' patterncontentunits='' patterntransform='' patternunits='' pointsatx='' pointsaty='' pointsatz='' preservealpha='' preserveaspectratio='' primitiveunits='' refx='' refy='' repeatcount='' repeatdur='' requiredextensions='' requiredfeatures='' specularconstant='' specularexponent='' spreadmethod='' startoffset='' stddeviation='' stitchtiles='' surfacescale='' systemlanguage='' tablevalues='' targetx='' targety='' textlength='' viewbox='' viewtarget='' xchannelselector='' ychannelselector='' zoomandpan=''></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| attributeName=""
| attributeType=""
| baseFrequency=""
| baseProfile=""
| calcMode=""
| clipPathUnits=""
| diffuseConstant=""
| edgeMode=""
| filterUnits=""
| filterres=""
| glyphRef=""
| gradientTransform=""
| gradientUnits=""
| kernelMatrix=""
| kernelUnitLength=""
| keyPoints=""
| keySplines=""
| keyTimes=""
| lengthAdjust=""
| limitingConeAngle=""
| markerHeight=""
| markerUnits=""
| markerWidth=""
| maskContentUnits=""
| maskUnits=""
| numOctaves=""
| pathLength=""
| patternContentUnits=""
| patternTransform=""
| patternUnits=""
| pointsAtX=""
| pointsAtY=""
| pointsAtZ=""
| preserveAlpha=""
| preserveAspectRatio=""
| primitiveUnits=""
| refX=""
| refY=""
| repeatCount=""
| repeatDur=""
| requiredExtensions=""
| requiredFeatures=""
| specularConstant=""
| specularExponent=""
| spreadMethod=""
| startOffset=""
| stdDeviation=""
| stitchTiles=""
| surfaceScale=""
| systemLanguage=""
| tableValues=""
| targetX=""
| targetY=""
| textLength=""
| viewBox=""
| viewTarget=""
| xChannelSelector=""
| yChannelSelector=""
| zoomAndPan=""
#data
<!DOCTYPE html><body><math attributeName='' attributeType='' baseFrequency='' baseProfile='' calcMode='' clipPathUnits='' diffuseConstant='' edgeMode='' filterUnits='' glyphRef='' gradientTransform='' gradientUnits='' kernelMatrix='' kernelUnitLength='' keyPoints='' keySplines='' keyTimes='' lengthAdjust='' limitingConeAngle='' markerHeight='' markerUnits='' markerWidth='' maskContentUnits='' maskUnits='' numOctaves='' pathLength='' patternContentUnits='' patternTransform='' patternUnits='' pointsAtX='' pointsAtY='' pointsAtZ='' preserveAlpha='' preserveAspectRatio='' primitiveUnits='' refX='' refY='' repeatCount='' repeatDur='' requiredExtensions='' requiredFeatures='' specularConstant='' specularExponent='' spreadMethod='' startOffset='' stdDeviation='' stitchTiles='' surfaceScale='' systemLanguage='' tableValues='' targetX='' targetY='' textLength='' viewBox='' viewTarget='' xChannelSelector='' yChannelSelector='' zoomAndPan=''></math>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
| attributename=""
| attributetype=""
| basefrequency=""
| baseprofile=""
| calcmode=""
| clippathunits=""
| diffuseconstant=""
| edgemode=""
| filterunits=""
| glyphref=""
| gradienttransform=""
| gradientunits=""
| kernelmatrix=""
| kernelunitlength=""
| keypoints=""
| keysplines=""
| keytimes=""
| lengthadjust=""
| limitingconeangle=""
| markerheight=""
| markerunits=""
| markerwidth=""
| maskcontentunits=""
| maskunits=""
| numoctaves=""
| pathlength=""
| patterncontentunits=""
| patterntransform=""
| patternunits=""
| pointsatx=""
| pointsaty=""
| pointsatz=""
| preservealpha=""
| preserveaspectratio=""
| primitiveunits=""
| refx=""
| refy=""
| repeatcount=""
| repeatdur=""
| requiredextensions=""
| requiredfeatures=""
| specularconstant=""
| specularexponent=""
| spreadmethod=""
| startoffset=""
| stddeviation=""
| stitchtiles=""
| surfacescale=""
| systemlanguage=""
| tablevalues=""
| targetx=""
| targety=""
| textlength=""
| viewbox=""
| viewtarget=""
| xchannelselector=""
| ychannelselector=""
| zoomandpan=""
#data
<!DOCTYPE html><body><svg contentScriptType='' contentStyleType='' externalResourcesRequired='' filterRes=''></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| contentscripttype=""
| contentstyletype=""
| externalresourcesrequired=""
| filterres=""
#data
<!DOCTYPE html><body><svg CONTENTSCRIPTTYPE='' CONTENTSTYLETYPE='' EXTERNALRESOURCESREQUIRED='' FILTERRES=''></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| contentscripttype=""
| contentstyletype=""
| externalresourcesrequired=""
| filterres=""
#data
<!DOCTYPE html><body><svg contentscripttype='' contentstyletype='' externalresourcesrequired='' filterres=''></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| contentscripttype=""
| contentstyletype=""
| externalresourcesrequired=""
| filterres=""
#data
<!DOCTYPE html><body><math contentScriptType='' contentStyleType='' externalResourcesRequired='' filterRes=''></math>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
| contentscripttype=""
| contentstyletype=""
| externalresourcesrequired=""
| filterres=""
#data
<!DOCTYPE html><body><svg><altGlyph /><altGlyphDef /><altGlyphItem /><animateColor /><animateMotion /><animateTransform /><clipPath /><feBlend /><feColorMatrix /><feComponentTransfer /><feComposite /><feConvolveMatrix /><feDiffuseLighting /><feDisplacementMap /><feDistantLight /><feFlood /><feFuncA /><feFuncB /><feFuncG /><feFuncR /><feGaussianBlur /><feImage /><feMerge /><feMergeNode /><feMorphology /><feOffset /><fePointLight /><feSpecularLighting /><feSpotLight /><feTile /><feTurbulence /><foreignObject /><glyphRef /><linearGradient /><radialGradient /><textPath /></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg altGlyph>
| <svg altGlyphDef>
| <svg altGlyphItem>
| <svg animateColor>
| <svg animateMotion>
| <svg animateTransform>
| <svg clipPath>
| <svg feBlend>
| <svg feColorMatrix>
| <svg feComponentTransfer>
| <svg feComposite>
| <svg feConvolveMatrix>
| <svg feDiffuseLighting>
| <svg feDisplacementMap>
| <svg feDistantLight>
| <svg feFlood>
| <svg feFuncA>
| <svg feFuncB>
| <svg feFuncG>
| <svg feFuncR>
| <svg feGaussianBlur>
| <svg feImage>
| <svg feMerge>
| <svg feMergeNode>
| <svg feMorphology>
| <svg feOffset>
| <svg fePointLight>
| <svg feSpecularLighting>
| <svg feSpotLight>
| <svg feTile>
| <svg feTurbulence>
| <svg foreignObject>
| <svg glyphRef>
| <svg linearGradient>
| <svg radialGradient>
| <svg textPath>
#data
<!DOCTYPE html><body><svg><altglyph /><altglyphdef /><altglyphitem /><animatecolor /><animatemotion /><animatetransform /><clippath /><feblend /><fecolormatrix /><fecomponenttransfer /><fecomposite /><feconvolvematrix /><fediffuselighting /><fedisplacementmap /><fedistantlight /><feflood /><fefunca /><fefuncb /><fefuncg /><fefuncr /><fegaussianblur /><feimage /><femerge /><femergenode /><femorphology /><feoffset /><fepointlight /><fespecularlighting /><fespotlight /><fetile /><feturbulence /><foreignobject /><glyphref /><lineargradient /><radialgradient /><textpath /></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg altGlyph>
| <svg altGlyphDef>
| <svg altGlyphItem>
| <svg animateColor>
| <svg animateMotion>
| <svg animateTransform>
| <svg clipPath>
| <svg feBlend>
| <svg feColorMatrix>
| <svg feComponentTransfer>
| <svg feComposite>
| <svg feConvolveMatrix>
| <svg feDiffuseLighting>
| <svg feDisplacementMap>
| <svg feDistantLight>
| <svg feFlood>
| <svg feFuncA>
| <svg feFuncB>
| <svg feFuncG>
| <svg feFuncR>
| <svg feGaussianBlur>
| <svg feImage>
| <svg feMerge>
| <svg feMergeNode>
| <svg feMorphology>
| <svg feOffset>
| <svg fePointLight>
| <svg feSpecularLighting>
| <svg feSpotLight>
| <svg feTile>
| <svg feTurbulence>
| <svg foreignObject>
| <svg glyphRef>
| <svg linearGradient>
| <svg radialGradient>
| <svg textPath>
#data
<!DOCTYPE html><BODY><SVG><ALTGLYPH /><ALTGLYPHDEF /><ALTGLYPHITEM /><ANIMATECOLOR /><ANIMATEMOTION /><ANIMATETRANSFORM /><CLIPPATH /><FEBLEND /><FECOLORMATRIX /><FECOMPONENTTRANSFER /><FECOMPOSITE /><FECONVOLVEMATRIX /><FEDIFFUSELIGHTING /><FEDISPLACEMENTMAP /><FEDISTANTLIGHT /><FEFLOOD /><FEFUNCA /><FEFUNCB /><FEFUNCG /><FEFUNCR /><FEGAUSSIANBLUR /><FEIMAGE /><FEMERGE /><FEMERGENODE /><FEMORPHOLOGY /><FEOFFSET /><FEPOINTLIGHT /><FESPECULARLIGHTING /><FESPOTLIGHT /><FETILE /><FETURBULENCE /><FOREIGNOBJECT /><GLYPHREF /><LINEARGRADIENT /><RADIALGRADIENT /><TEXTPATH /></SVG>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg altGlyph>
| <svg altGlyphDef>
| <svg altGlyphItem>
| <svg animateColor>
| <svg animateMotion>
| <svg animateTransform>
| <svg clipPath>
| <svg feBlend>
| <svg feColorMatrix>
| <svg feComponentTransfer>
| <svg feComposite>
| <svg feConvolveMatrix>
| <svg feDiffuseLighting>
| <svg feDisplacementMap>
| <svg feDistantLight>
| <svg feFlood>
| <svg feFuncA>
| <svg feFuncB>
| <svg feFuncG>
| <svg feFuncR>
| <svg feGaussianBlur>
| <svg feImage>
| <svg feMerge>
| <svg feMergeNode>
| <svg feMorphology>
| <svg feOffset>
| <svg fePointLight>
| <svg feSpecularLighting>
| <svg feSpotLight>
| <svg feTile>
| <svg feTurbulence>
| <svg foreignObject>
| <svg glyphRef>
| <svg linearGradient>
| <svg radialGradient>
| <svg textPath>
#data
<!DOCTYPE html><body><math><altGlyph /><altGlyphDef /><altGlyphItem /><animateColor /><animateMotion /><animateTransform /><clipPath /><feBlend /><feColorMatrix /><feComponentTransfer /><feComposite /><feConvolveMatrix /><feDiffuseLighting /><feDisplacementMap /><feDistantLight /><feFlood /><feFuncA /><feFuncB /><feFuncG /><feFuncR /><feGaussianBlur /><feImage /><feMerge /><feMergeNode /><feMorphology /><feOffset /><fePointLight /><feSpecularLighting /><feSpotLight /><feTile /><feTurbulence /><foreignObject /><glyphRef /><linearGradient /><radialGradient /><textPath /></math>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
| <math altglyph>
| <math altglyphdef>
| <math altglyphitem>
| <math animatecolor>
| <math animatemotion>
| <math animatetransform>
| <math clippath>
| <math feblend>
| <math fecolormatrix>
| <math fecomponenttransfer>
| <math fecomposite>
| <math feconvolvematrix>
| <math fediffuselighting>
| <math fedisplacementmap>
| <math fedistantlight>
| <math feflood>
| <math fefunca>
| <math fefuncb>
| <math fefuncg>
| <math fefuncr>
| <math fegaussianblur>
| <math feimage>
| <math femerge>
| <math femergenode>
| <math femorphology>
| <math feoffset>
| <math fepointlight>
| <math fespecularlighting>
| <math fespotlight>
| <math fetile>
| <math feturbulence>
| <math foreignobject>
| <math glyphref>
| <math lineargradient>
| <math radialgradient>
| <math textpath>
#data
<!DOCTYPE html><body><svg><solidColor /></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg solidcolor>

View file

@ -1,62 +0,0 @@
#data
<!DOCTYPE html><body><p>foo<math><mtext><i>baz</i></mtext><annotation-xml><svg><desc><b>eggs</b></desc><g><foreignObject><P>spam<TABLE><tr><td><img></td></table></foreignObject></g><g>quux</g></svg></annotation-xml></math>bar
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| "foo"
| <math math>
| <math mtext>
| <i>
| "baz"
| <math annotation-xml>
| <svg svg>
| <svg desc>
| <b>
| "eggs"
| <svg g>
| <svg foreignObject>
| <p>
| "spam"
| <table>
| <tbody>
| <tr>
| <td>
| <img>
| <svg g>
| "quux"
| "bar"
#data
<!DOCTYPE html><body>foo<math><mtext><i>baz</i></mtext><annotation-xml><svg><desc><b>eggs</b></desc><g><foreignObject><P>spam<TABLE><tr><td><img></td></table></foreignObject></g><g>quux</g></svg></annotation-xml></math>bar
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "foo"
| <math math>
| <math mtext>
| <i>
| "baz"
| <math annotation-xml>
| <svg svg>
| <svg desc>
| <b>
| "eggs"
| <svg g>
| <svg foreignObject>
| <p>
| "spam"
| <table>
| <tbody>
| <tr>
| <td>
| <img>
| <svg g>
| "quux"
| "bar"

View file

@ -1,75 +0,0 @@
#data
<!DOCTYPE html><html><body><xyz:abc></xyz:abc>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <xyz:abc>
#data
<!DOCTYPE html><html><body><xyz:abc></xyz:abc><span></span>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <xyz:abc>
| <span>
#data
<!DOCTYPE html><html><html abc:def=gh><xyz:abc></xyz:abc>
#errors
(1,38): non-html-root
#document
| <!DOCTYPE html>
| <html>
| abc:def="gh"
| <head>
| <body>
| <xyz:abc>
#data
<!DOCTYPE html><html xml:lang=bar><html xml:lang=foo>
#errors
(1,53): non-html-root
#document
| <!DOCTYPE html>
| <html>
| xml:lang="bar"
| <head>
| <body>
#data
<!DOCTYPE html><html 123=456>
#errors
#document
| <!DOCTYPE html>
| <html>
| 123="456"
| <head>
| <body>
#data
<!DOCTYPE html><html 123=456><html 789=012>
#errors
(1,43): non-html-root
#document
| <!DOCTYPE html>
| <html>
| 123="456"
| 789="012"
| <head>
| <body>
#data
<!DOCTYPE html><html><body 789=012>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| 789="012"

Some files were not shown because too many files have changed in this diff Show more