summaryrefslogtreecommitdiffhomepage
path: root/libs/html5lib
diff options
context:
space:
mode:
authormorpheus65535 <[email protected]>2022-01-23 23:07:52 -0500
committermorpheus65535 <[email protected]>2022-01-23 23:07:52 -0500
commit0c3c5a02a75bc61b6bf6e303de20e11741d2afac (patch)
tree30ae1d524ffe5d54172b7a4a8445d90c3461e659 /libs/html5lib
parent36bf0d219d0432c20e6314e0ce752b36f4d88e3c (diff)
downloadbazarr-0c3c5a02a75bc61b6bf6e303de20e11741d2afac.tar.gz
bazarr-0c3c5a02a75bc61b6bf6e303de20e11741d2afac.zip
Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies.v1.0.3-beta.16
Diffstat (limited to 'libs/html5lib')
-rw-r--r--libs/html5lib/tests/__init__.py1
-rw-r--r--libs/html5lib/tests/conftest.py108
-rw-r--r--libs/html5lib/tests/sanitizer-testdata/tests1.dat433
-rw-r--r--libs/html5lib/tests/sanitizer.py51
-rw-r--r--libs/html5lib/tests/serializer-testdata/core.test395
-rw-r--r--libs/html5lib/tests/serializer-testdata/injectmeta.test350
-rw-r--r--libs/html5lib/tests/serializer-testdata/optionaltags.test3254
-rw-r--r--libs/html5lib/tests/serializer-testdata/options.test334
-rw-r--r--libs/html5lib/tests/serializer-testdata/whitespace.test198
-rw-r--r--libs/html5lib/tests/support.py199
-rw-r--r--libs/html5lib/tests/test_alphabeticalattributes.py78
-rw-r--r--libs/html5lib/tests/test_encoding.py117
-rw-r--r--libs/html5lib/tests/test_meta.py41
-rw-r--r--libs/html5lib/tests/test_optionaltags_filter.py7
-rw-r--r--libs/html5lib/tests/test_parser2.py94
-rw-r--r--libs/html5lib/tests/test_sanitizer.py133
-rw-r--r--libs/html5lib/tests/test_serializer.py226
-rw-r--r--libs/html5lib/tests/test_stream.py325
-rw-r--r--libs/html5lib/tests/test_tokenizer2.py66
-rw-r--r--libs/html5lib/tests/test_treeadapters.py40
-rw-r--r--libs/html5lib/tests/test_treewalkers.py205
-rw-r--r--libs/html5lib/tests/test_whitespace_filter.py125
-rw-r--r--libs/html5lib/tests/tokenizer.py253
-rw-r--r--libs/html5lib/tests/tokenizertotree.py69
-rw-r--r--libs/html5lib/tests/tree_construction.py205
-rw-r--r--libs/html5lib/tests/us-ascii.html3
-rw-r--r--libs/html5lib/tests/utf-8-bom.html3
27 files changed, 0 insertions, 7313 deletions
diff --git a/libs/html5lib/tests/__init__.py b/libs/html5lib/tests/__init__.py
deleted file mode 100644
index b8ce2de32..000000000
--- a/libs/html5lib/tests/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
diff --git a/libs/html5lib/tests/conftest.py b/libs/html5lib/tests/conftest.py
deleted file mode 100644
index dad167c58..000000000
--- a/libs/html5lib/tests/conftest.py
+++ /dev/null
@@ -1,108 +0,0 @@
-from __future__ import print_function
-import os.path
-import sys
-
-import pkg_resources
-import pytest
-
-from .tree_construction import TreeConstructionFile
-from .tokenizer import TokenizerFile
-from .sanitizer import SanitizerFile
-
-_dir = os.path.abspath(os.path.dirname(__file__))
-_root = os.path.join(_dir, "..", "..")
-_testdata = os.path.join(_dir, "testdata")
-_tree_construction = os.path.join(_testdata, "tree-construction")
-_tokenizer = os.path.join(_testdata, "tokenizer")
-_sanitizer_testdata = os.path.join(_dir, "sanitizer-testdata")
-
-
-def fail_if_missing_pytest_expect():
- """Throws an exception halting pytest if pytest-expect isn't working"""
- try:
- from pytest_expect import expect # noqa
- except ImportError:
- header = '*' * 78
- print(
- '\n' +
- header + '\n' +
- 'ERROR: Either pytest-expect or its dependency u-msgpack-python is not\n' +
- 'installed. Please install them both before running pytest.\n' +
- header + '\n',
- file=sys.stderr
- )
- raise
-
-
-fail_if_missing_pytest_expect()
-
-
-def pytest_configure(config):
- msgs = []
-
- if not os.path.exists(_testdata):
- msg = "testdata not available! "
- if os.path.exists(os.path.join(_root, ".git")):
- msg += ("Please run git submodule update --init --recursive " +
- "and then run tests again.")
- else:
- msg += ("The testdata doesn't appear to be included with this package, " +
- "so finding the right version will be hard. :(")
- msgs.append(msg)
-
- if config.option.update_xfail:
- # Check for optional requirements
- req_file = os.path.join(_root, "requirements-optional.txt")
- if os.path.exists(req_file):
- with open(req_file, "r") as fp:
- for line in fp:
- if (line.strip() and
- not (line.startswith("-r") or
- line.startswith("#"))):
- if ";" in line:
- spec, marker = line.strip().split(";", 1)
- else:
- spec, marker = line.strip(), None
- req = pkg_resources.Requirement.parse(spec)
- if marker and not pkg_resources.evaluate_marker(marker):
- msgs.append("%s not available in this environment" % spec)
- else:
- try:
- installed = pkg_resources.working_set.find(req)
- except pkg_resources.VersionConflict:
- msgs.append("Outdated version of %s installed, need %s" % (req.name, spec))
- else:
- if not installed:
- msgs.append("Need %s" % spec)
-
- # Check cElementTree
- import xml.etree.ElementTree as ElementTree
-
- try:
- import xml.etree.cElementTree as cElementTree
- except ImportError:
- msgs.append("cElementTree unable to be imported")
- else:
- if cElementTree.Element is ElementTree.Element:
- msgs.append("cElementTree is just an alias for ElementTree")
-
- if msgs:
- pytest.exit("\n".join(msgs))
-
-
-def pytest_collect_file(path, parent):
- dir = os.path.abspath(path.dirname)
- dir_and_parents = set()
- while dir not in dir_and_parents:
- dir_and_parents.add(dir)
- dir = os.path.dirname(dir)
-
- if _tree_construction in dir_and_parents:
- if path.ext == ".dat":
- return TreeConstructionFile(path, parent)
- elif _tokenizer in dir_and_parents:
- if path.ext == ".test":
- return TokenizerFile(path, parent)
- elif _sanitizer_testdata in dir_and_parents:
- if path.ext == ".dat":
- return SanitizerFile(path, parent)
diff --git a/libs/html5lib/tests/sanitizer-testdata/tests1.dat b/libs/html5lib/tests/sanitizer-testdata/tests1.dat
deleted file mode 100644
index 74e883368..000000000
--- a/libs/html5lib/tests/sanitizer-testdata/tests1.dat
+++ /dev/null
@@ -1,433 +0,0 @@
-[
- {
- "name": "IE_Comments",
- "input": "<!--[if gte IE 4]><script>alert('XSS');</script><![endif]-->",
- "output": ""
- },
-
- {
- "name": "IE_Comments_2",
- "input": "<![if !IE 5]><script>alert('XSS');</script><![endif]>",
- "output": "&lt;script&gt;alert('XSS');&lt;/script&gt;"
- },
-
- {
- "name": "allow_colons_in_path_component",
- "input": "<a href=\"./this:that\">foo</a>",
- "output": "<a href='./this:that'>foo</a>"
- },
-
- {
- "name": "background_attribute",
- "input": "<div background=\"javascript:alert('XSS')\"></div>",
- "output": "<div></div>"
- },
-
- {
- "name": "bgsound",
- "input": "<bgsound src=\"javascript:alert('XSS');\" />",
- "output": "&lt;bgsound src=\"javascript:alert('XSS');\"&gt;&lt;/bgsound&gt;"
- },
-
- {
- "name": "div_background_image_unicode_encoded",
- "input": "<div style=\"background-image:\u00a5\u00a2\u006C\u0028'\u006a\u0061\u00a6\u0061\u00a3\u0063\u00a2\u0069\u00a0\u00a4\u003a\u0061\u006c\u0065\u00a2\u00a4\u0028.1027\u0058.1053\u0053\u0027\u0029'\u0029\">foo</div>",
- "output": "<div style=''>foo</div>"
- },
-
- {
- "name": "div_expression",
- "input": "<div style=\"width: expression(alert('XSS'));\">foo</div>",
- "output": "<div style=''>foo</div>"
- },
-
- {
- "name": "double_open_angle_brackets",
- "input": "<img src=http://ha.ckers.org/scriptlet.html <",
- "output": ""
- },
-
- {
- "name": "double_open_angle_brackets_2",
- "input": "<script src=http://ha.ckers.org/scriptlet.html <",
- "output": ""
- },
-
- {
- "name": "grave_accents",
- "input": "<img src=`javascript:alert('XSS')` />",
- "output": "<img/>"
- },
-
- {
- "name": "img_dynsrc_lowsrc",
- "input": "<img dynsrc=\"javascript:alert('XSS')\" />",
- "output": "<img/>"
- },
-
- {
- "name": "img_vbscript",
- "input": "<img src='vbscript:msgbox(\"XSS\")' />",
- "output": "<img/>"
- },
-
- {
- "name": "input_image",
- "input": "<input type=\"image\" src=\"javascript:alert('XSS');\" />",
- "output": "<input type='image'/>"
- },
-
- {
- "name": "link_stylesheets",
- "input": "<link rel=\"stylesheet\" href=\"javascript:alert('XSS');\" />",
- "output": "&lt;link href=\"javascript:alert('XSS');\" rel=\"stylesheet\"&gt;"
- },
-
- {
- "name": "link_stylesheets_2",
- "input": "<link rel=\"stylesheet\" href=\"http://ha.ckers.org/xss.css\" />",
- "output": "&lt;link href=\"http://ha.ckers.org/xss.css\" rel=\"stylesheet\"&gt;"
- },
-
- {
- "name": "list_style_image",
- "input": "<li style=\"list-style-image: url(javascript:alert('XSS'))\">foo</li>",
- "output": "<li style=''>foo</li>"
- },
-
- {
- "name": "no_closing_script_tags",
- "input": "<script src=http://ha.ckers.org/xss.js?<b>",
- "output": "&lt;script src=\"http://ha.ckers.org/xss.js?&amp;lt;b\"&gt;&lt;/script&gt;"
- },
-
- {
- "name": "non_alpha_non_digit",
- "input": "<script/XSS src=\"http://ha.ckers.org/xss.js\"></script>",
- "output": "&lt;script src=\"http://ha.ckers.org/xss.js\" xss=\"\"&gt;&lt;/script&gt;"
- },
-
- {
- "name": "non_alpha_non_digit_2",
- "input": "<a onclick!\\#$%&()*~+-_.,:;?@[/|\\]^`=alert(\"XSS\")>foo</a>",
- "output": "<a>foo</a>"
- },
-
- {
- "name": "non_alpha_non_digit_3",
- "input": "<img/src=\"http://ha.ckers.org/xss.js\"/>",
- "output": "<img src='http://ha.ckers.org/xss.js'/>"
- },
-
- {
- "name": "non_alpha_non_digit_II",
- "input": "<a href!\\#$%&()*~+-_.,:;?@[/|]^`=alert('XSS')>foo</a>",
- "output": "<a>foo</a>"
- },
-
- {
- "name": "non_alpha_non_digit_III",
- "input": "<a/href=\"javascript:alert('XSS');\">foo</a>",
- "output": "<a>foo</a>"
- },
-
- {
- "name": "platypus",
- "input": "<a href=\"http://www.ragingplatypus.com/\" style=\"display:block; position:absolute; left:0; top:0; width:100%; height:100%; z-index:1; background-color:black; background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg); background-x:center; background-y:center; background-repeat:repeat;\">never trust your upstream platypus</a>",
- "output": "<a href='http://www.ragingplatypus.com/' style='display: block; width: 100%; height: 100%; background-color: black; background-x: center; background-y: center;'>never trust your upstream platypus</a>"
- },
-
- {
- "name": "protocol_resolution_in_script_tag",
- "input": "<script src=//ha.ckers.org/.j></script>",
- "output": "&lt;script src=\"//ha.ckers.org/.j\"&gt;&lt;/script&gt;"
- },
-
- {
- "name": "should_allow_anchors",
- "input": "<a href='foo' onclick='bar'><script>baz</script></a>",
- "output": "<a href='foo'>&lt;script&gt;baz&lt;/script&gt;</a>"
- },
-
- {
- "name": "should_allow_image_alt_attribute",
- "input": "<img alt='foo' onclick='bar' />",
- "output": "<img alt='foo'/>"
- },
-
- {
- "name": "should_allow_image_height_attribute",
- "input": "<img height='foo' onclick='bar' />",
- "output": "<img height='foo'/>"
- },
-
- {
- "name": "should_allow_image_src_attribute",
- "input": "<img src='foo' onclick='bar' />",
- "output": "<img src='foo'/>"
- },
-
- {
- "name": "should_allow_image_width_attribute",
- "input": "<img width='foo' onclick='bar' />",
- "output": "<img width='foo'/>"
- },
-
- {
- "name": "should_handle_blank_text",
- "input": "",
- "output": ""
- },
-
- {
- "name": "should_handle_malformed_image_tags",
- "input": "<img \"\"\"><script>alert(\"XSS\")</script>\">",
- "output": "<img/>&lt;script&gt;alert(\"XSS\")&lt;/script&gt;\"&gt;"
- },
-
- {
- "name": "should_handle_non_html",
- "input": "abc",
- "output": "abc"
- },
-
- {
- "name": "should_not_fall_for_ridiculous_hack",
- "input": "<img\nsrc\n=\n\"\nj\na\nv\na\ns\nc\nr\ni\np\nt\n:\na\nl\ne\nr\nt\n(\n'\nX\nS\nS\n'\n)\n\"\n />",
- "output": "<img/>"
- },
-
- {
- "name": "should_not_fall_for_xss_image_hack_0",
- "input": "<img src=\"javascript:alert('XSS');\" />",
- "output": "<img/>"
- },
-
- {
- "name": "should_not_fall_for_xss_image_hack_1",
- "input": "<img src=javascript:alert('XSS') />",
- "output": "<img/>"
- },
-
- {
- "name": "should_not_fall_for_xss_image_hack_10",
- "input": "<img src=\"jav&#x0A;ascript:alert('XSS');\" />",
- "output": "<img/>"
- },
-
- {
- "name": "should_not_fall_for_xss_image_hack_11",
- "input": "<img src=\"jav&#x0D;ascript:alert('XSS');\" />",
- "output": "<img/>"
- },
-
- {
- "name": "should_not_fall_for_xss_image_hack_12",
- "input": "<img src=\" &#14; javascript:alert('XSS');\" />",
- "output": "<img/>"
- },
-
- {
- "name": "should_not_fall_for_xss_image_hack_13",
- "input": "<img src=\"&#x20;javascript:alert('XSS');\" />",
- "output": "<img/>"
- },
-
- {
- "name": "should_not_fall_for_xss_image_hack_14",
- "input": "<img src=\"&#xA0;javascript:alert('XSS');\" />",
- "output": "<img/>"
- },
-
- {
- "name": "should_not_fall_for_xss_image_hack_2",
- "input": "<img src=\"JaVaScRiPt:alert('XSS')\" />",
- "output": "<img/>"
- },
-
- {
- "name": "should_not_fall_for_xss_image_hack_3",
- "input": "<img src='javascript:alert(&quot;XSS&quot;)' />",
- "output": "<img/>"
- },
-
- {
- "name": "should_not_fall_for_xss_image_hack_4",
- "input": "<img src='javascript:alert(String.fromCharCode(88,83,83))' />",
- "output": "<img/>"
- },
-
- {
- "name": "should_not_fall_for_xss_image_hack_5",
- "input": "<img src='&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;' />",
- "output": "<img/>"
- },
-
- {
- "name": "should_not_fall_for_xss_image_hack_6",
- "input": "<img src='&#0000106;&#0000097;&#0000118;&#0000097;&#0000115;&#0000099;&#0000114;&#0000105;&#0000112;&#0000116;&#0000058;&#0000097;&#0000108;&#0000101;&#0000114;&#0000116;&#0000040;&#0000039;&#0000088;&#0000083;&#0000083;&#0000039;&#0000041' />",
- "output": "<img/>"
- },
-
- {
- "name": "should_not_fall_for_xss_image_hack_7",
- "input": "<img src='&#x6A;&#x61;&#x76;&#x61;&#x73;&#x63;&#x72;&#x69;&#x70;&#x74;&#x3A;&#x61;&#x6C;&#x65;&#x72;&#x74;&#x28;&#x27;&#x58;&#x53;&#x53;&#x27;&#x29' />",
- "output": "<img/>"
- },
-
- {
- "name": "should_not_fall_for_xss_image_hack_8",
- "input": "<img src=\"jav\tascript:alert('XSS');\" />",
- "output": "<img/>"
- },
-
- {
- "name": "should_not_fall_for_xss_image_hack_9",
- "input": "<img src=\"jav&#x09;ascript:alert('XSS');\" />",
- "output": "<img/>"
- },
-
- {
- "name": "should_sanitize_half_open_scripts",
- "input": "<img src=\"javascript:alert('XSS')\"",
- "output": ""
- },
-
- {
- "name": "should_sanitize_invalid_script_tag",
- "input": "<script/XSS SRC=\"http://ha.ckers.org/xss.js\"></script>",
- "output": "&lt;script src=\"http://ha.ckers.org/xss.js\" xss=\"\"&gt;&lt;/script&gt;"
- },
-
- {
- "name": "should_sanitize_script_tag_with_multiple_open_brackets",
- "input": "<<script>alert(\"XSS\");//<</script>",
- "output": "&lt;&lt;script&gt;alert(\"XSS\");//&lt;&lt;/script&gt;"
- },
-
- {
- "name": "should_sanitize_script_tag_with_multiple_open_brackets_2",
- "input": "<iframe src=http://ha.ckers.org/scriptlet.html\n<",
- "output": ""
- },
-
- {
- "name": "should_sanitize_tag_broken_up_by_null",
- "input": "<scr\u0000ipt>alert(\"XSS\")</scr\u0000ipt>",
- "output": "&lt;scr\ufffdipt&gt;alert(\"XSS\")&lt;/scr\ufffdipt&gt;"
- },
-
- {
- "name": "should_sanitize_unclosed_script",
- "input": "<script src=http://ha.ckers.org/xss.js?<b>",
- "output": "&lt;script src=\"http://ha.ckers.org/xss.js?&amp;lt;b\"&gt;&lt;/script&gt;"
- },
-
- {
- "name": "should_strip_href_attribute_in_a_with_bad_protocols",
- "input": "<a href=\"javascript:XSS\" title=\"1\">boo</a>",
- "output": "<a title='1'>boo</a>"
- },
-
- {
- "name": "should_strip_href_attribute_in_a_with_bad_protocols_and_whitespace",
- "input": "<a href=\" javascript:XSS\" title=\"1\">boo</a>",
- "output": "<a title='1'>boo</a>"
- },
-
- {
- "name": "should_strip_src_attribute_in_img_with_bad_protocols",
- "input": "<img src=\"javascript:XSS\" title=\"1\">boo</img>",
- "output": "<img title='1'/>boo"
- },
-
- {
- "name": "should_strip_src_attribute_in_img_with_bad_protocols_and_whitespace",
- "input": "<img src=\" javascript:XSS\" title=\"1\">boo</img>",
- "output": "<img title='1'/>boo"
- },
-
- {
- "name": "xml_base",
- "input": "<div xml:base=\"javascript:alert('XSS');//\">foo</div>",
- "output": "<div>foo</div>"
- },
-
- {
- "name": "xul",
- "input": "<p style=\"-moz-binding:url('http://ha.ckers.org/xssmoz.xml#xss')\">fubar</p>",
- "output": "<p style=''>fubar</p>"
- },
-
- {
- "name": "quotes_in_attributes",
- "input": "<img src='foo' title='\"foo\" bar' />",
- "output": "<img src='foo' title='\"foo\" bar'/>"
- },
-
- {
- "name": "uri_refs_in_svg_attributes",
- "input": "<svg><rect fill='url(#foo)' />",
- "output": "<svg><rect fill='url(#foo)'></rect></svg>"
- },
-
- {
- "name": "absolute_uri_refs_in_svg_attributes",
- "input": "<svg><rect fill='url(http://bad.com/) #fff' />",
- "output": "<svg><rect fill=' #fff'></rect></svg>"
- },
-
- {
- "name": "uri_ref_with_space_in svg_attribute",
- "input": "<svg><rect fill='url(\n#foo)' />",
- "output": "<svg><rect fill='url(\n#foo)'></rect></svg>"
- },
-
- {
- "name": "absolute_uri_ref_with_space_in svg_attribute",
- "input": "<svg><rect fill=\"url(\nhttp://bad.com/)\" />",
- "output": "<svg><rect fill=' '></rect></svg>"
- },
-
- {
- "name": "allow_html5_image_tag",
- "input": "<image src='foo' />",
- "output": "<img src='foo'/>"
- },
-
- {
- "name": "style_attr_end_with_nothing",
- "input": "<div style=\"color: blue\" />",
- "output": "<div style='color: blue;'></div>"
- },
-
- {
- "name": "style_attr_end_with_space",
- "input": "<div style=\"color: blue \" />",
- "output": "<div style='color: blue ;'></div>"
- },
-
- {
- "name": "style_attr_end_with_semicolon",
- "input": "<div style=\"color: blue;\" />",
- "output": "<div style='color: blue;'></div>"
- },
-
- {
- "name": "style_attr_end_with_semicolon_space",
- "input": "<div style=\"color: blue; \" />",
- "output": "<div style='color: blue;'></div>"
- },
-
- {
- "name": "attributes_with_embedded_quotes",
- "input": "<img src=doesntexist.jpg\"'onerror=\"alert(1) />",
- "output": "<img src='doesntexist.jpg\"&#39;onerror=\"alert(1)'/>"
- },
-
- {
- "name": "attributes_with_embedded_quotes_II",
- "input": "<img src=notthere.jpg\"\"onerror=\"alert(2) />",
- "output": "<img src='notthere.jpg\"\"onerror=\"alert(2)'/>"
- }
-]
diff --git a/libs/html5lib/tests/sanitizer.py b/libs/html5lib/tests/sanitizer.py
deleted file mode 100644
index bb4834214..000000000
--- a/libs/html5lib/tests/sanitizer.py
+++ /dev/null
@@ -1,51 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-import codecs
-import json
-
-import pytest
-
-from html5lib import parseFragment, serialize
-
-
-class SanitizerFile(pytest.File):
- def collect(self):
- with codecs.open(str(self.fspath), "r", encoding="utf-8") as fp:
- tests = json.load(fp)
- for i, test in enumerate(tests):
- yield SanitizerTest(str(i), self, test=test)
-
-
-class SanitizerTest(pytest.Item):
- def __init__(self, name, parent, test):
- super(SanitizerTest, self).__init__(name, parent)
- self.obj = lambda: 1 # this is to hack around skipif needing a function!
- self.test = test
-
- def runtest(self):
- input = self.test["input"]
- expected = self.test["output"]
-
- parsed = parseFragment(input)
- with pytest.deprecated_call():
- serialized = serialize(parsed,
- sanitize=True,
- omit_optional_tags=False,
- use_trailing_solidus=True,
- space_before_trailing_solidus=False,
- quote_attr_values="always",
- quote_char="'",
- alphabetical_attributes=True)
- errorMsg = "\n".join(["\n\nInput:", input,
- "\nExpected:", expected,
- "\nReceived:", serialized])
- assert expected == serialized, errorMsg
-
- def repr_failure(self, excinfo):
- traceback = excinfo.traceback
- ntraceback = traceback.cut(path=__file__)
- excinfo.traceback = ntraceback.filter()
-
- return excinfo.getrepr(funcargs=True,
- showlocals=False,
- style="short", tbfilter=False)
diff --git a/libs/html5lib/tests/serializer-testdata/core.test b/libs/html5lib/tests/serializer-testdata/core.test
deleted file mode 100644
index 55294b683..000000000
--- a/libs/html5lib/tests/serializer-testdata/core.test
+++ /dev/null
@@ -1,395 +0,0 @@
-{
- "tests": [
- {
- "expected": [
- "<span title='test \"with\" &amp;quot;'>"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "span",
- [
- {
- "namespace": null,
- "name": "title",
- "value": "test \"with\" &quot;"
- }
- ]
- ]
- ],
- "description": "proper attribute value escaping"
- },
- {
- "expected": [
- "<span title=foo>"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "span",
- [
- {
- "namespace": null,
- "name": "title",
- "value": "foo"
- }
- ]
- ]
- ],
- "description": "proper attribute value non-quoting"
- },
- {
- "expected": [
- "<span title=\"foo<bar\">"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "span",
- [
- {
- "namespace": null,
- "name": "title",
- "value": "foo<bar"
- }
- ]
- ]
- ],
- "description": "proper attribute value non-quoting (with <)"
- },
- {
- "expected": [
- "<span title=\"foo=bar\">"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "span",
- [
- {
- "namespace": null,
- "name": "title",
- "value": "foo=bar"
- }
- ]
- ]
- ],
- "description": "proper attribute value quoting (with =)"
- },
- {
- "expected": [
- "<span title=\"foo>bar\">"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "span",
- [
- {
- "namespace": null,
- "name": "title",
- "value": "foo>bar"
- }
- ]
- ]
- ],
- "description": "proper attribute value quoting (with >)"
- },
- {
- "expected": [
- "<span title='foo\"bar'>"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "span",
- [
- {
- "namespace": null,
- "name": "title",
- "value": "foo\"bar"
- }
- ]
- ]
- ],
- "description": "proper attribute value quoting (with \")"
- },
- {
- "expected": [
- "<span title=\"foo'bar\">"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "span",
- [
- {
- "namespace": null,
- "name": "title",
- "value": "foo'bar"
- }
- ]
- ]
- ],
- "description": "proper attribute value quoting (with ')"
- },
- {
- "expected": [
- "<span title=\"foo'bar&quot;baz\">"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "span",
- [
- {
- "namespace": null,
- "name": "title",
- "value": "foo'bar\"baz"
- }
- ]
- ]
- ],
- "description": "proper attribute value quoting (with both \" and ')"
- },
- {
- "expected": [
- "<span title=\"foo bar\">"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "span",
- [
- {
- "namespace": null,
- "name": "title",
- "value": "foo bar"
- }
- ]
- ]
- ],
- "description": "proper attribute value quoting (with space)"
- },
- {
- "expected": [
- "<span title=\"foo\tbar\">"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "span",
- [
- {
- "namespace": null,
- "name": "title",
- "value": "foo\tbar"
- }
- ]
- ]
- ],
- "description": "proper attribute value quoting (with tab)"
- },
- {
- "expected": [
- "<span title=\"foo\nbar\">"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "span",
- [
- {
- "namespace": null,
- "name": "title",
- "value": "foo\nbar"
- }
- ]
- ]
- ],
- "description": "proper attribute value quoting (with LF)"
- },
- {
- "expected": [
- "<span title=\"foo\rbar\">"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "span",
- [
- {
- "namespace": null,
- "name": "title",
- "value": "foo\rbar"
- }
- ]
- ]
- ],
- "description": "proper attribute value quoting (with CR)"
- },
- {
- "expected": [
- "<span title=\"foo\u000bbar\">"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "span",
- [
- {
- "namespace": null,
- "name": "title",
- "value": "foo\u000bbar"
- }
- ]
- ]
- ],
- "description": "proper attribute value non-quoting (with linetab)"
- },
- {
- "expected": [
- "<span title=\"foo\fbar\">"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "span",
- [
- {
- "namespace": null,
- "name": "title",
- "value": "foo\fbar"
- }
- ]
- ]
- ],
- "description": "proper attribute value quoting (with form feed)"
- },
- {
- "expected": [
- "<img>"
- ],
- "input": [
- [
- "EmptyTag",
- "img",
- {}
- ]
- ],
- "description": "void element (as EmptyTag token)"
- },
- {
- "expected": [
- "<!DOCTYPE foo>"
- ],
- "input": [
- [
- "Doctype",
- "foo"
- ]
- ],
- "description": "doctype in error"
- },
- {
- "expected": [
- "a&lt;b&gt;c&amp;d"
- ],
- "input": [
- [
- "Characters",
- "a<b>c&d"
- ]
- ],
- "description": "character data",
- "options": {
- "encoding": "utf-8"
- }
- },
- {
- "expected": [
- "<script>a<b>c&d"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "script",
- {}
- ],
- [
- "Characters",
- "a<b>c&d"
- ]
- ],
- "description": "rcdata"
- },
- {
- "expected": [
- "<!DOCTYPE HTML>"
- ],
- "input": [
- [
- "Doctype",
- "HTML"
- ]
- ],
- "description": "doctype"
- },
- {
- "expected": [
- "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">"
- ],
- "input": [
- [
- "Doctype",
- "HTML",
- "-//W3C//DTD HTML 4.01//EN",
- "http://www.w3.org/TR/html4/strict.dtd"
- ]
- ],
- "description": "HTML 4.01 DOCTYPE"
- },
- {
- "expected": [
- "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\">"
- ],
- "input": [
- [
- "Doctype",
- "HTML",
- "-//W3C//DTD HTML 4.01//EN"
- ]
- ],
- "description": "HTML 4.01 DOCTYPE without system identifier"
- },
- {
- "expected": [
- "<!DOCTYPE html SYSTEM \"http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd\">"
- ],
- "input": [
- [
- "Doctype",
- "html",
- "",
- "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"
- ]
- ],
- "description": "IBM DOCTYPE without public identifier"
- }
- ]
-}
diff --git a/libs/html5lib/tests/serializer-testdata/injectmeta.test b/libs/html5lib/tests/serializer-testdata/injectmeta.test
deleted file mode 100644
index 399590c3f..000000000
--- a/libs/html5lib/tests/serializer-testdata/injectmeta.test
+++ /dev/null
@@ -1,350 +0,0 @@
-{
- "tests": [
- {
- "expected": [
- ""
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "head",
- {}
- ],
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "head"
- ]
- ],
- "description": "no encoding",
- "options": {
- "inject_meta_charset": true
- }
- },
- {
- "expected": [
- "<meta charset=utf-8>"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "head",
- {}
- ],
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "head"
- ]
- ],
- "description": "empytag head",
- "options": {
- "encoding": "utf-8",
- "inject_meta_charset": true
- }
- },
- {
- "expected": [
- "<meta charset=utf-8><title>foo</title>"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "head",
- {}
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "title",
- {}
- ],
- [
- "Characters",
- "foo"
- ],
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "title"
- ],
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "head"
- ]
- ],
- "description": "head w/title",
- "options": {
- "encoding": "utf-8",
- "inject_meta_charset": true
- }
- },
- {
- "expected": [
- "<meta charset=utf-8>"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "head",
- {}
- ],
- [
- "EmptyTag",
- "meta",
- [
- {
- "namespace": null,
- "name": "charset",
- "value": "ascii"
- }
- ]
- ],
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "head"
- ]
- ],
- "description": "head w/meta-charset",
- "options": {
- "encoding": "utf-8",
- "inject_meta_charset": true
- }
- },
- {
- "expected": [
- "<meta charset=utf-8><meta charset=utf-8>",
- "<head><meta charset=utf-8><meta charset=ascii>"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "head",
- {}
- ],
- [
- "EmptyTag",
- "meta",
- [
- {
- "namespace": null,
- "name": "charset",
- "value": "ascii"
- }
- ]
- ],
- [
- "EmptyTag",
- "meta",
- [
- {
- "namespace": null,
- "name": "charset",
- "value": "ascii"
- }
- ]
- ],
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "head"
- ]
- ],
- "description": "head w/ two meta-charset",
- "options": {
- "encoding": "utf-8",
- "inject_meta_charset": true
- }
- },
- {
- "expected": [
- "<meta charset=utf-8><meta content=noindex name=robots>"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "head",
- {}
- ],
- [
- "EmptyTag",
- "meta",
- [
- {
- "namespace": null,
- "name": "name",
- "value": "robots"
- },
- {
- "namespace": null,
- "name": "content",
- "value": "noindex"
- }
- ]
- ],
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "head"
- ]
- ],
- "description": "head w/robots",
- "options": {
- "encoding": "utf-8",
- "inject_meta_charset": true
- }
- },
- {
- "expected": [
- "<meta content=noindex name=robots><meta charset=utf-8>"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "head",
- {}
- ],
- [
- "EmptyTag",
- "meta",
- [
- {
- "namespace": null,
- "name": "name",
- "value": "robots"
- },
- {
- "namespace": null,
- "name": "content",
- "value": "noindex"
- }
- ]
- ],
- [
- "EmptyTag",
- "meta",
- [
- {
- "namespace": null,
- "name": "charset",
- "value": "ascii"
- }
- ]
- ],
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "head"
- ]
- ],
- "description": "head w/robots & charset",
- "options": {
- "encoding": "utf-8",
- "inject_meta_charset": true
- }
- },
- {
- "expected": [
- "<meta content=\"text/html; charset=utf-8\" http-equiv=content-type>"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "head",
- {}
- ],
- [
- "EmptyTag",
- "meta",
- [
- {
- "namespace": null,
- "name": "http-equiv",
- "value": "content-type"
- },
- {
- "namespace": null,
- "name": "content",
- "value": "text/html; charset=ascii"
- }
- ]
- ],
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "head"
- ]
- ],
- "description": "head w/ charset in http-equiv content-type",
- "options": {
- "encoding": "utf-8",
- "inject_meta_charset": true
- }
- },
- {
- "expected": [
- "<meta content=noindex name=robots><meta content=\"text/html; charset=utf-8\" http-equiv=content-type>"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "head",
- {}
- ],
- [
- "EmptyTag",
- "meta",
- [
- {
- "namespace": null,
- "name": "name",
- "value": "robots"
- },
- {
- "namespace": null,
- "name": "content",
- "value": "noindex"
- }
- ]
- ],
- [
- "EmptyTag",
- "meta",
- [
- {
- "namespace": null,
- "name": "http-equiv",
- "value": "content-type"
- },
- {
- "namespace": null,
- "name": "content",
- "value": "text/html; charset=ascii"
- }
- ]
- ],
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "head"
- ]
- ],
- "description": "head w/robots & charset in http-equiv content-type",
- "options": {
- "encoding": "utf-8",
- "inject_meta_charset": true
- }
- }
- ]
-} \ No newline at end of file
diff --git a/libs/html5lib/tests/serializer-testdata/optionaltags.test b/libs/html5lib/tests/serializer-testdata/optionaltags.test
deleted file mode 100644
index e67725ca2..000000000
--- a/libs/html5lib/tests/serializer-testdata/optionaltags.test
+++ /dev/null
@@ -1,3254 +0,0 @@
-{
- "tests": [
- {
- "expected": [
- "<html lang=en>foo"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "html",
- [
- {
- "namespace": null,
- "name": "lang",
- "value": "en"
- }
- ]
- ],
- [
- "Characters",
- "foo"
- ]
- ],
- "description": "html start-tag followed by text, with attributes"
- },
- {
- "expected": [
- "<html><!--foo-->"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "html",
- {}
- ],
- [
- "Comment",
- "foo"
- ]
- ],
- "description": "html start-tag followed by comment"
- },
- {
- "expected": [
- "<html> foo"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "html",
- {}
- ],
- [
- "Characters",
- " foo"
- ]
- ],
- "description": "html start-tag followed by space character"
- },
- {
- "expected": [
- "foo"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "html",
- {}
- ],
- [
- "Characters",
- "foo"
- ]
- ],
- "description": "html start-tag followed by text"
- },
- {
- "expected": [
- "<foo>"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "html",
- {}
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "foo",
- {}
- ]
- ],
- "description": "html start-tag followed by start-tag"
- },
- {
- "expected": [
- "</foo>"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "html",
- {}
- ],
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "foo"
- ]
- ],
- "description": "html start-tag followed by end-tag"
- },
- {
- "expected": [
- ""
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "html",
- {}
- ]
- ],
- "description": "html start-tag at EOF (shouldn't ever happen?!)"
- },
- {
- "expected": [
- "</html><!--foo-->"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "html"
- ],
- [
- "Comment",
- "foo"
- ]
- ],
- "description": "html end-tag followed by comment"
- },
- {
- "expected": [
- "</html> foo"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "html"
- ],
- [
- "Characters",
- " foo"
- ]
- ],
- "description": "html end-tag followed by space character"
- },
- {
- "expected": [
- "foo"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "html"
- ],
- [
- "Characters",
- "foo"
- ]
- ],
- "description": "html end-tag followed by text"
- },
- {
- "expected": [
- "<foo>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "html"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "foo",
- {}
- ]
- ],
- "description": "html end-tag followed by start-tag"
- },
- {
- "expected": [
- "</foo>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "html"
- ],
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "foo"
- ]
- ],
- "description": "html end-tag followed by end-tag"
- },
- {
- "expected": [
- ""
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "html"
- ]
- ],
- "description": "html end-tag at EOF"
- },
- {
- "expected": [
- "<head><!--foo-->"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "head",
- {}
- ],
- [
- "Comment",
- "foo"
- ]
- ],
- "description": "head start-tag followed by comment"
- },
- {
- "expected": [
- "<head> foo"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "head",
- {}
- ],
- [
- "Characters",
- " foo"
- ]
- ],
- "description": "head start-tag followed by space character"
- },
- {
- "expected": [
- "<head>foo"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "head",
- {}
- ],
- [
- "Characters",
- "foo"
- ]
- ],
- "description": "head start-tag followed by text"
- },
- {
- "expected": [
- "<foo>"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "head",
- {}
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "foo",
- {}
- ]
- ],
- "description": "head start-tag followed by start-tag"
- },
- {
- "expected": [
- "<head></foo>",
- "</foo>"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "head",
- {}
- ],
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "foo"
- ]
- ],
- "description": "head start-tag followed by end-tag (shouldn't ever happen?!)"
- },
- {
- "expected": [
- ""
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "head",
- {}
- ],
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "head"
- ]
- ],
- "description": "empty head element"
- },
- {
- "expected": [
- "<meta>"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "head",
- {}
- ],
- [
- "EmptyTag",
- "meta",
- {}
- ]
- ],
- "description": "head start-tag followed by empty-tag"
- },
- {
- "expected": [
- "<head>",
- ""
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "head",
- {}
- ]
- ],
- "description": "head start-tag at EOF (shouldn't ever happen?!)"
- },
- {
- "expected": [
- "</head><!--foo-->"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "head"
- ],
- [
- "Comment",
- "foo"
- ]
- ],
- "description": "head end-tag followed by comment"
- },
- {
- "expected": [
- "</head> foo"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "head"
- ],
- [
- "Characters",
- " foo"
- ]
- ],
- "description": "head end-tag followed by space character"
- },
- {
- "expected": [
- "foo"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "head"
- ],
- [
- "Characters",
- "foo"
- ]
- ],
- "description": "head end-tag followed by text"
- },
- {
- "expected": [
- "<foo>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "head"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "foo",
- {}
- ]
- ],
- "description": "head end-tag followed by start-tag"
- },
- {
- "expected": [
- "</foo>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "head"
- ],
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "foo"
- ]
- ],
- "description": "head end-tag followed by end-tag"
- },
- {
- "expected": [
- ""
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "head"
- ]
- ],
- "description": "head end-tag at EOF"
- },
- {
- "expected": [
- "<body><!--foo-->"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "body",
- {}
- ],
- [
- "Comment",
- "foo"
- ]
- ],
- "description": "body start-tag followed by comment"
- },
- {
- "expected": [
- "<body> foo"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "body",
- {}
- ],
- [
- "Characters",
- " foo"
- ]
- ],
- "description": "body start-tag followed by space character"
- },
- {
- "expected": [
- "foo"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "body",
- {}
- ],
- [
- "Characters",
- "foo"
- ]
- ],
- "description": "body start-tag followed by text"
- },
- {
- "expected": [
- "<foo>"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "body",
- {}
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "foo",
- {}
- ]
- ],
- "description": "body start-tag followed by start-tag"
- },
- {
- "expected": [
- "</foo>"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "body",
- {}
- ],
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "foo"
- ]
- ],
- "description": "body start-tag followed by end-tag"
- },
- {
- "expected": [
- ""
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "body",
- {}
- ]
- ],
- "description": "body start-tag at EOF (shouldn't ever happen?!)"
- },
- {
- "expected": [
- "</body><!--foo-->"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "body"
- ],
- [
- "Comment",
- "foo"
- ]
- ],
- "description": "body end-tag followed by comment"
- },
- {
- "expected": [
- "</body> foo"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "body"
- ],
- [
- "Characters",
- " foo"
- ]
- ],
- "description": "body end-tag followed by space character"
- },
- {
- "expected": [
- "foo"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "body"
- ],
- [
- "Characters",
- "foo"
- ]
- ],
- "description": "body end-tag followed by text"
- },
- {
- "expected": [
- "<foo>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "body"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "foo",
- {}
- ]
- ],
- "description": "body end-tag followed by start-tag"
- },
- {
- "expected": [
- "</foo>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "body"
- ],
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "foo"
- ]
- ],
- "description": "body end-tag followed by end-tag"
- },
- {
- "expected": [
- ""
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "body"
- ]
- ],
- "description": "body end-tag at EOF"
- },
- {
- "expected": [
- "</li><!--foo-->"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "li"
- ],
- [
- "Comment",
- "foo"
- ]
- ],
- "description": "li end-tag followed by comment"
- },
- {
- "expected": [
- "</li> foo"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "li"
- ],
- [
- "Characters",
- " foo"
- ]
- ],
- "description": "li end-tag followed by space character"
- },
- {
- "expected": [
- "</li>foo"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "li"
- ],
- [
- "Characters",
- "foo"
- ]
- ],
- "description": "li end-tag followed by text"
- },
- {
- "expected": [
- "</li><foo>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "li"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "foo",
- {}
- ]
- ],
- "description": "li end-tag followed by start-tag"
- },
- {
- "expected": [
- "<li>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "li"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "li",
- {}
- ]
- ],
- "description": "li end-tag followed by li start-tag"
- },
- {
- "expected": [
- "</foo>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "li"
- ],
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "foo"
- ]
- ],
- "description": "li end-tag followed by end-tag"
- },
- {
- "expected": [
- ""
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "li"
- ]
- ],
- "description": "li end-tag at EOF"
- },
- {
- "expected": [
- "</dt><!--foo-->"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "dt"
- ],
- [
- "Comment",
- "foo"
- ]
- ],
- "description": "dt end-tag followed by comment"
- },
- {
- "expected": [
- "</dt> foo"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "dt"
- ],
- [
- "Characters",
- " foo"
- ]
- ],
- "description": "dt end-tag followed by space character"
- },
- {
- "expected": [
- "</dt>foo"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "dt"
- ],
- [
- "Characters",
- "foo"
- ]
- ],
- "description": "dt end-tag followed by text"
- },
- {
- "expected": [
- "</dt><foo>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "dt"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "foo",
- {}
- ]
- ],
- "description": "dt end-tag followed by start-tag"
- },
- {
- "expected": [
- "<dt>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "dt"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "dt",
- {}
- ]
- ],
- "description": "dt end-tag followed by dt start-tag"
- },
- {
- "expected": [
- "<dd>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "dt"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "dd",
- {}
- ]
- ],
- "description": "dt end-tag followed by dd start-tag"
- },
- {
- "expected": [
- "</dt></foo>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "dt"
- ],
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "foo"
- ]
- ],
- "description": "dt end-tag followed by end-tag"
- },
- {
- "expected": [
- "</dt>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "dt"
- ]
- ],
- "description": "dt end-tag at EOF"
- },
- {
- "expected": [
- "</dd><!--foo-->"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "dd"
- ],
- [
- "Comment",
- "foo"
- ]
- ],
- "description": "dd end-tag followed by comment"
- },
- {
- "expected": [
- "</dd> foo"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "dd"
- ],
- [
- "Characters",
- " foo"
- ]
- ],
- "description": "dd end-tag followed by space character"
- },
- {
- "expected": [
- "</dd>foo"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "dd"
- ],
- [
- "Characters",
- "foo"
- ]
- ],
- "description": "dd end-tag followed by text"
- },
- {
- "expected": [
- "</dd><foo>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "dd"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "foo",
- {}
- ]
- ],
- "description": "dd end-tag followed by start-tag"
- },
- {
- "expected": [
- "<dd>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "dd"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "dd",
- {}
- ]
- ],
- "description": "dd end-tag followed by dd start-tag"
- },
- {
- "expected": [
- "<dt>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "dd"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "dt",
- {}
- ]
- ],
- "description": "dd end-tag followed by dt start-tag"
- },
- {
- "expected": [
- "</foo>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "dd"
- ],
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "foo"
- ]
- ],
- "description": "dd end-tag followed by end-tag"
- },
- {
- "expected": [
- ""
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "dd"
- ]
- ],
- "description": "dd end-tag at EOF"
- },
- {
- "expected": [
- "</p><!--foo-->"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "p"
- ],
- [
- "Comment",
- "foo"
- ]
- ],
- "description": "p end-tag followed by comment"
- },
- {
- "expected": [
- "</p> foo"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "p"
- ],
- [
- "Characters",
- " foo"
- ]
- ],
- "description": "p end-tag followed by space character"
- },
- {
- "expected": [
- "</p>foo"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "p"
- ],
- [
- "Characters",
- "foo"
- ]
- ],
- "description": "p end-tag followed by text"
- },
- {
- "expected": [
- "</p><foo>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "p"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "foo",
- {}
- ]
- ],
- "description": "p end-tag followed by start-tag"
- },
- {
- "expected": [
- "<address>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "p"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "address",
- {}
- ]
- ],
- "description": "p end-tag followed by address start-tag"
- },
- {
- "expected": [
- "<article>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "p"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "article",
- {}
- ]
- ],
- "description": "p end-tag followed by article start-tag"
- },
- {
- "expected": [
- "<aside>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "p"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "aside",
- {}
- ]
- ],
- "description": "p end-tag followed by aside start-tag"
- },
- {
- "expected": [
- "<blockquote>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "p"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "blockquote",
- {}
- ]
- ],
- "description": "p end-tag followed by blockquote start-tag"
- },
- {
- "expected": [
- "<datagrid>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "p"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "datagrid",
- {}
- ]
- ],
- "description": "p end-tag followed by datagrid start-tag"
- },
- {
- "expected": [
- "<dialog>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "p"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "dialog",
- {}
- ]
- ],
- "description": "p end-tag followed by dialog start-tag"
- },
- {
- "expected": [
- "<dir>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "p"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "dir",
- {}
- ]
- ],
- "description": "p end-tag followed by dir start-tag"
- },
- {
- "expected": [
- "<div>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "p"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "div",
- {}
- ]
- ],
- "description": "p end-tag followed by div start-tag"
- },
- {
- "expected": [
- "<dl>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "p"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "dl",
- {}
- ]
- ],
- "description": "p end-tag followed by dl start-tag"
- },
- {
- "expected": [
- "<fieldset>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "p"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "fieldset",
- {}
- ]
- ],
- "description": "p end-tag followed by fieldset start-tag"
- },
- {
- "expected": [
- "<footer>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "p"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "footer",
- {}
- ]
- ],
- "description": "p end-tag followed by footer start-tag"
- },
- {
- "expected": [
- "<form>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "p"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "form",
- {}
- ]
- ],
- "description": "p end-tag followed by form start-tag"
- },
- {
- "expected": [
- "<h1>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "p"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "h1",
- {}
- ]
- ],
- "description": "p end-tag followed by h1 start-tag"
- },
- {
- "expected": [
- "<h2>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "p"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "h2",
- {}
- ]
- ],
- "description": "p end-tag followed by h2 start-tag"
- },
- {
- "expected": [
- "<h3>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "p"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "h3",
- {}
- ]
- ],
- "description": "p end-tag followed by h3 start-tag"
- },
- {
- "expected": [
- "<h4>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "p"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "h4",
- {}
- ]
- ],
- "description": "p end-tag followed by h4 start-tag"
- },
- {
- "expected": [
- "<h5>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "p"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "h5",
- {}
- ]
- ],
- "description": "p end-tag followed by h5 start-tag"
- },
- {
- "expected": [
- "<h6>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "p"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "h6",
- {}
- ]
- ],
- "description": "p end-tag followed by h6 start-tag"
- },
- {
- "expected": [
- "<header>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "p"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "header",
- {}
- ]
- ],
- "description": "p end-tag followed by header start-tag"
- },
- {
- "expected": [
- "<hr>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "p"
- ],
- [
- "EmptyTag",
- "hr",
- {}
- ]
- ],
- "description": "p end-tag followed by hr empty-tag"
- },
- {
- "expected": [
- "<menu>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "p"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "menu",
- {}
- ]
- ],
- "description": "p end-tag followed by menu start-tag"
- },
- {
- "expected": [
- "<nav>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "p"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "nav",
- {}
- ]
- ],
- "description": "p end-tag followed by nav start-tag"
- },
- {
- "expected": [
- "<ol>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "p"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "ol",
- {}
- ]
- ],
- "description": "p end-tag followed by ol start-tag"
- },
- {
- "expected": [
- "<p>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "p"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "p",
- {}
- ]
- ],
- "description": "p end-tag followed by p start-tag"
- },
- {
- "expected": [
- "<pre>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "p"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "pre",
- {}
- ]
- ],
- "description": "p end-tag followed by pre start-tag"
- },
- {
- "expected": [
- "<section>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "p"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "section",
- {}
- ]
- ],
- "description": "p end-tag followed by section start-tag"
- },
- {
- "expected": [
- "<table>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "p"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "table",
- {}
- ]
- ],
- "description": "p end-tag followed by table start-tag"
- },
- {
- "expected": [
- "<ul>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "p"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "ul",
- {}
- ]
- ],
- "description": "p end-tag followed by ul start-tag"
- },
- {
- "expected": [
- "</foo>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "p"
- ],
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "foo"
- ]
- ],
- "description": "p end-tag followed by end-tag"
- },
- {
- "expected": [
- ""
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "p"
- ]
- ],
- "description": "p end-tag at EOF"
- },
- {
- "expected": [
- "</optgroup><!--foo-->"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "optgroup"
- ],
- [
- "Comment",
- "foo"
- ]
- ],
- "description": "optgroup end-tag followed by comment"
- },
- {
- "expected": [
- "</optgroup> foo"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "optgroup"
- ],
- [
- "Characters",
- " foo"
- ]
- ],
- "description": "optgroup end-tag followed by space character"
- },
- {
- "expected": [
- "</optgroup>foo"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "optgroup"
- ],
- [
- "Characters",
- "foo"
- ]
- ],
- "description": "optgroup end-tag followed by text"
- },
- {
- "expected": [
- "</optgroup><foo>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "optgroup"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "foo",
- {}
- ]
- ],
- "description": "optgroup end-tag followed by start-tag"
- },
- {
- "expected": [
- "<optgroup>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "optgroup"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "optgroup",
- {}
- ]
- ],
- "description": "optgroup end-tag followed by optgroup start-tag"
- },
- {
- "expected": [
- "</foo>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "optgroup"
- ],
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "foo"
- ]
- ],
- "description": "optgroup end-tag followed by end-tag"
- },
- {
- "expected": [
- ""
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "optgroup"
- ]
- ],
- "description": "optgroup end-tag at EOF"
- },
- {
- "expected": [
- "</option><!--foo-->"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "option"
- ],
- [
- "Comment",
- "foo"
- ]
- ],
- "description": "option end-tag followed by comment"
- },
- {
- "expected": [
- "</option> foo"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "option"
- ],
- [
- "Characters",
- " foo"
- ]
- ],
- "description": "option end-tag followed by space character"
- },
- {
- "expected": [
- "</option>foo"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "option"
- ],
- [
- "Characters",
- "foo"
- ]
- ],
- "description": "option end-tag followed by text"
- },
- {
- "expected": [
- "<optgroup>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "option"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "optgroup",
- {}
- ]
- ],
- "description": "option end-tag followed by optgroup start-tag"
- },
- {
- "expected": [
- "</option><foo>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "option"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "foo",
- {}
- ]
- ],
- "description": "option end-tag followed by start-tag"
- },
- {
- "expected": [
- "<option>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "option"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "option",
- {}
- ]
- ],
- "description": "option end-tag followed by option start-tag"
- },
- {
- "expected": [
- "</foo>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "option"
- ],
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "foo"
- ]
- ],
- "description": "option end-tag followed by end-tag"
- },
- {
- "expected": [
- ""
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "option"
- ]
- ],
- "description": "option end-tag at EOF"
- },
- {
- "expected": [
- "<colgroup><!--foo-->"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "colgroup",
- {}
- ],
- [
- "Comment",
- "foo"
- ]
- ],
- "description": "colgroup start-tag followed by comment"
- },
- {
- "expected": [
- "<colgroup> foo"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "colgroup",
- {}
- ],
- [
- "Characters",
- " foo"
- ]
- ],
- "description": "colgroup start-tag followed by space character"
- },
- {
- "expected": [
- "<colgroup>foo"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "colgroup",
- {}
- ],
- [
- "Characters",
- "foo"
- ]
- ],
- "description": "colgroup start-tag followed by text"
- },
- {
- "expected": [
- "<colgroup><foo>"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "colgroup",
- {}
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "foo",
- {}
- ]
- ],
- "description": "colgroup start-tag followed by start-tag"
- },
- {
- "expected": [
- "<table><col>"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "table",
- {}
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "colgroup",
- {}
- ],
- [
- "EmptyTag",
- "col",
- {}
- ]
- ],
- "description": "first colgroup in a table with a col child"
- },
- {
- "expected": [
- "</colgroup><col>",
- "<colgroup><col>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "colgroup"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "colgroup",
- {}
- ],
- [
- "EmptyTag",
- "http://www.w3.org/1999/xhtml",
- "col",
- {}
- ]
- ],
- "description": "colgroup with a col child, following another colgroup"
- },
- {
- "expected": [
- "<colgroup></foo>"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "colgroup",
- {}
- ],
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "foo"
- ]
- ],
- "description": "colgroup start-tag followed by end-tag"
- },
- {
- "expected": [
- "<colgroup>"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "colgroup",
- {}
- ]
- ],
- "description": "colgroup start-tag at EOF"
- },
- {
- "expected": [
- "</colgroup><!--foo-->"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "colgroup"
- ],
- [
- "Comment",
- "foo"
- ]
- ],
- "description": "colgroup end-tag followed by comment"
- },
- {
- "expected": [
- "</colgroup> foo"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "colgroup"
- ],
- [
- "Characters",
- " foo"
- ]
- ],
- "description": "colgroup end-tag followed by space character"
- },
- {
- "expected": [
- "foo"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "colgroup"
- ],
- [
- "Characters",
- "foo"
- ]
- ],
- "description": "colgroup end-tag followed by text"
- },
- {
- "expected": [
- "<foo>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "colgroup"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "foo",
- {}
- ]
- ],
- "description": "colgroup end-tag followed by start-tag"
- },
- {
- "expected": [
- "</foo>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "colgroup"
- ],
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "foo"
- ]
- ],
- "description": "colgroup end-tag followed by end-tag"
- },
- {
- "expected": [
- ""
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "colgroup"
- ]
- ],
- "description": "colgroup end-tag at EOF"
- },
- {
- "expected": [
- "</thead><!--foo-->"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "thead"
- ],
- [
- "Comment",
- "foo"
- ]
- ],
- "description": "thead end-tag followed by comment"
- },
- {
- "expected": [
- "</thead> foo"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "thead"
- ],
- [
- "Characters",
- " foo"
- ]
- ],
- "description": "thead end-tag followed by space character"
- },
- {
- "expected": [
- "</thead>foo"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "thead"
- ],
- [
- "Characters",
- "foo"
- ]
- ],
- "description": "thead end-tag followed by text"
- },
- {
- "expected": [
- "</thead><foo>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "thead"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "foo",
- {}
- ]
- ],
- "description": "thead end-tag followed by start-tag"
- },
- {
- "expected": [
- "<tbody>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "thead"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "tbody",
- {}
- ]
- ],
- "description": "thead end-tag followed by tbody start-tag"
- },
- {
- "expected": [
- "<tfoot>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "thead"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "tfoot",
- {}
- ]
- ],
- "description": "thead end-tag followed by tfoot start-tag"
- },
- {
- "expected": [
- "</thead></foo>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "thead"
- ],
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "foo"
- ]
- ],
- "description": "thead end-tag followed by end-tag"
- },
- {
- "expected": [
- "</thead>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "thead"
- ]
- ],
- "description": "thead end-tag at EOF"
- },
- {
- "expected": [
- "<tbody><!--foo-->"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "tbody",
- {}
- ],
- [
- "Comment",
- "foo"
- ]
- ],
- "description": "tbody start-tag followed by comment"
- },
- {
- "expected": [
- "<tbody> foo"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "tbody",
- {}
- ],
- [
- "Characters",
- " foo"
- ]
- ],
- "description": "tbody start-tag followed by space character"
- },
- {
- "expected": [
- "<tbody>foo"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "tbody",
- {}
- ],
- [
- "Characters",
- "foo"
- ]
- ],
- "description": "tbody start-tag followed by text"
- },
- {
- "expected": [
- "<tbody><foo>"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "tbody",
- {}
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "foo",
- {}
- ]
- ],
- "description": "tbody start-tag followed by start-tag"
- },
- {
- "expected": [
- "<table><tr>"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "table",
- {}
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "tbody",
- {}
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "tr",
- {}
- ]
- ],
- "description": "first tbody in a table with a tr child"
- },
- {
- "expected": [
- "<tbody><tr>",
- "</tbody><tr>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "tbody"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "tbody",
- {}
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "tr",
- {}
- ]
- ],
- "description": "tbody with a tr child, following another tbody"
- },
- {
- "expected": [
- "<tbody><tr>",
- "</thead><tr>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "thead"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "tbody",
- {}
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "tr",
- {}
- ]
- ],
- "description": "tbody with a tr child, following a thead"
- },
- {
- "expected": [
- "<tbody><tr>",
- "</tfoot><tr>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "tfoot"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "tbody",
- {}
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "tr",
- {}
- ]
- ],
- "description": "tbody with a tr child, following a tfoot"
- },
- {
- "expected": [
- "<tbody></foo>"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "tbody",
- {}
- ],
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "foo"
- ]
- ],
- "description": "tbody start-tag followed by end-tag"
- },
- {
- "expected": [
- "<tbody>"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "tbody",
- {}
- ]
- ],
- "description": "tbody start-tag at EOF"
- },
- {
- "expected": [
- "</tbody><!--foo-->"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "tbody"
- ],
- [
- "Comment",
- "foo"
- ]
- ],
- "description": "tbody end-tag followed by comment"
- },
- {
- "expected": [
- "</tbody> foo"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "tbody"
- ],
- [
- "Characters",
- " foo"
- ]
- ],
- "description": "tbody end-tag followed by space character"
- },
- {
- "expected": [
- "</tbody>foo"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "tbody"
- ],
- [
- "Characters",
- "foo"
- ]
- ],
- "description": "tbody end-tag followed by text"
- },
- {
- "expected": [
- "</tbody><foo>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "tbody"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "foo",
- {}
- ]
- ],
- "description": "tbody end-tag followed by start-tag"
- },
- {
- "expected": [
- "<tbody>",
- "</tbody>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "tbody"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "tbody",
- {}
- ]
- ],
- "description": "tbody end-tag followed by tbody start-tag"
- },
- {
- "expected": [
- "<tfoot>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "tbody"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "tfoot",
- {}
- ]
- ],
- "description": "tbody end-tag followed by tfoot start-tag"
- },
- {
- "expected": [
- "</foo>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "tbody"
- ],
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "foo"
- ]
- ],
- "description": "tbody end-tag followed by end-tag"
- },
- {
- "expected": [
- ""
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "tbody"
- ]
- ],
- "description": "tbody end-tag at EOF"
- },
- {
- "expected": [
- "</tfoot><!--foo-->"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "tfoot"
- ],
- [
- "Comment",
- "foo"
- ]
- ],
- "description": "tfoot end-tag followed by comment"
- },
- {
- "expected": [
- "</tfoot> foo"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "tfoot"
- ],
- [
- "Characters",
- " foo"
- ]
- ],
- "description": "tfoot end-tag followed by space character"
- },
- {
- "expected": [
- "</tfoot>foo"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "tfoot"
- ],
- [
- "Characters",
- "foo"
- ]
- ],
- "description": "tfoot end-tag followed by text"
- },
- {
- "expected": [
- "</tfoot><foo>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "tfoot"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "foo",
- {}
- ]
- ],
- "description": "tfoot end-tag followed by start-tag"
- },
- {
- "expected": [
- "<tbody>",
- "</tfoot>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "tfoot"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "tbody",
- {}
- ]
- ],
- "description": "tfoot end-tag followed by tbody start-tag"
- },
- {
- "expected": [
- "</foo>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "tfoot"
- ],
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "foo"
- ]
- ],
- "description": "tfoot end-tag followed by end-tag"
- },
- {
- "expected": [
- ""
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "tfoot"
- ]
- ],
- "description": "tfoot end-tag at EOF"
- },
- {
- "expected": [
- "</tr><!--foo-->"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "tr"
- ],
- [
- "Comment",
- "foo"
- ]
- ],
- "description": "tr end-tag followed by comment"
- },
- {
- "expected": [
- "</tr> foo"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "tr"
- ],
- [
- "Characters",
- " foo"
- ]
- ],
- "description": "tr end-tag followed by space character"
- },
- {
- "expected": [
- "</tr>foo"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "tr"
- ],
- [
- "Characters",
- "foo"
- ]
- ],
- "description": "tr end-tag followed by text"
- },
- {
- "expected": [
- "</tr><foo>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "tr"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "foo",
- {}
- ]
- ],
- "description": "tr end-tag followed by start-tag"
- },
- {
- "expected": [
- "<tr>",
- "</tr>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "tr"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "tr",
- {}
- ]
- ],
- "description": "tr end-tag followed by tr start-tag"
- },
- {
- "expected": [
- "</foo>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "tr"
- ],
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "foo"
- ]
- ],
- "description": "tr end-tag followed by end-tag"
- },
- {
- "expected": [
- ""
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "tr"
- ]
- ],
- "description": "tr end-tag at EOF"
- },
- {
- "expected": [
- "</td><!--foo-->"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "td"
- ],
- [
- "Comment",
- "foo"
- ]
- ],
- "description": "td end-tag followed by comment"
- },
- {
- "expected": [
- "</td> foo"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "td"
- ],
- [
- "Characters",
- " foo"
- ]
- ],
- "description": "td end-tag followed by space character"
- },
- {
- "expected": [
- "</td>foo"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "td"
- ],
- [
- "Characters",
- "foo"
- ]
- ],
- "description": "td end-tag followed by text"
- },
- {
- "expected": [
- "</td><foo>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "td"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "foo",
- {}
- ]
- ],
- "description": "td end-tag followed by start-tag"
- },
- {
- "expected": [
- "<td>",
- "</td>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "td"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "td",
- {}
- ]
- ],
- "description": "td end-tag followed by td start-tag"
- },
- {
- "expected": [
- "<th>",
- "</td>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "td"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "th",
- {}
- ]
- ],
- "description": "td end-tag followed by th start-tag"
- },
- {
- "expected": [
- "</foo>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "td"
- ],
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "foo"
- ]
- ],
- "description": "td end-tag followed by end-tag"
- },
- {
- "expected": [
- ""
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "td"
- ]
- ],
- "description": "td end-tag at EOF"
- },
- {
- "expected": [
- "</th><!--foo-->"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "th"
- ],
- [
- "Comment",
- "foo"
- ]
- ],
- "description": "th end-tag followed by comment"
- },
- {
- "expected": [
- "</th> foo"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "th"
- ],
- [
- "Characters",
- " foo"
- ]
- ],
- "description": "th end-tag followed by space character"
- },
- {
- "expected": [
- "</th>foo"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "th"
- ],
- [
- "Characters",
- "foo"
- ]
- ],
- "description": "th end-tag followed by text"
- },
- {
- "expected": [
- "</th><foo>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "th"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "foo",
- {}
- ]
- ],
- "description": "th end-tag followed by start-tag"
- },
- {
- "expected": [
- "<th>",
- "</th>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "th"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "th",
- {}
- ]
- ],
- "description": "th end-tag followed by th start-tag"
- },
- {
- "expected": [
- "<td>",
- "</th>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "th"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "td",
- {}
- ]
- ],
- "description": "th end-tag followed by td start-tag"
- },
- {
- "expected": [
- "</foo>"
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "th"
- ],
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "foo"
- ]
- ],
- "description": "th end-tag followed by end-tag"
- },
- {
- "expected": [
- ""
- ],
- "input": [
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "th"
- ]
- ],
- "description": "th end-tag at EOF"
- }
- ]
-} \ No newline at end of file
diff --git a/libs/html5lib/tests/serializer-testdata/options.test b/libs/html5lib/tests/serializer-testdata/options.test
deleted file mode 100644
index a22eebfcf..000000000
--- a/libs/html5lib/tests/serializer-testdata/options.test
+++ /dev/null
@@ -1,334 +0,0 @@
-{
- "tests": [
- {
- "expected": [
- "<span title='test &#39;with&#39; quote_char'>"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "span",
- [
- {
- "namespace": null,
- "name": "title",
- "value": "test 'with' quote_char"
- }
- ]
- ]
- ],
- "description": "quote_char=\"'\"",
- "options": {
- "quote_char": "'"
- }
- },
- {
- "expected": [
- "<button disabled>"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "button",
- [
- {
- "namespace": null,
- "name": "disabled",
- "value": "disabled"
- }
- ]
- ]
- ],
- "description": "quote_attr_values='always'",
- "options": {
- "quote_attr_values": "always"
- }
- },
- {
- "expected": [
- "<div itemscope>"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "div",
- [
- {
- "namespace": null,
- "name": "itemscope",
- "value": "itemscope"
- }
- ]
- ]
- ],
- "description": "quote_attr_values='always' with itemscope",
- "options": {
- "quote_attr_values": "always"
- }
- },
- {
- "expected": [
- "<div irrelevant>"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "div",
- [
- {
- "namespace": null,
- "name": "irrelevant",
- "value": "irrelevant"
- }
- ]
- ]
- ],
- "description": "quote_attr_values='always' with irrelevant",
- "options": {
- "quote_attr_values": "always"
- }
- },
- {
- "expected": [
- "<div class=\"foo\">"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "div",
- [
- {
- "namespace": null,
- "name": "class",
- "value": "foo"
- }
- ]
- ]
- ],
- "description": "non-minimized quote_attr_values='always'",
- "options": {
- "quote_attr_values": "always"
- }
- },
- {
- "expected": [
- "<div class=foo>"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "div",
- [
- {
- "namespace": null,
- "name": "class",
- "value": "foo"
- }
- ]
- ]
- ],
- "description": "non-minimized quote_attr_values='legacy'",
- "options": {
- "quote_attr_values": "legacy"
- }
- },
- {
- "expected": [
- "<div class=foo>"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "div",
- [
- {
- "namespace": null,
- "name": "class",
- "value": "foo"
- }
- ]
- ]
- ],
- "description": "non-minimized quote_attr_values='spec'",
- "options": {
- "quote_attr_values": "spec"
- }
- },
- {
- "expected": [
- "<img />"
- ],
- "input": [
- [
- "EmptyTag",
- "img",
- {}
- ]
- ],
- "description": "use_trailing_solidus=true with void element",
- "options": {
- "use_trailing_solidus": true
- }
- },
- {
- "expected": [
- "<div>"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "div",
- {}
- ]
- ],
- "description": "use_trailing_solidus=true with non-void element",
- "options": {
- "use_trailing_solidus": true
- }
- },
- {
- "expected": [
- "<div itemscope=itemscope>"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "div",
- [
- {
- "namespace": null,
- "name": "itemscope",
- "value": "itemscope"
- }
- ]
- ]
- ],
- "description": "minimize_boolean_attributes=false",
- "options": {
- "minimize_boolean_attributes": false
- }
- },
- {
- "expected": [
- "<div irrelevant=irrelevant>"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "div",
- [
- {
- "namespace": null,
- "name": "irrelevant",
- "value": "irrelevant"
- }
- ]
- ]
- ],
- "description": "minimize_boolean_attributes=false",
- "options": {
- "minimize_boolean_attributes": false
- }
- },
- {
- "expected": [
- "<div itemscope=\"\">"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "div",
- [
- {
- "namespace": null,
- "name": "itemscope",
- "value": ""
- }
- ]
- ]
- ],
- "description": "minimize_boolean_attributes=false with empty value",
- "options": {
- "minimize_boolean_attributes": false
- }
- },
- {
- "expected": [
- "<div irrelevant=\"\">"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "div",
- [
- {
- "namespace": null,
- "name": "irrelevant",
- "value": ""
- }
- ]
- ]
- ],
- "description": "minimize_boolean_attributes=false with empty value",
- "options": {
- "minimize_boolean_attributes": false
- }
- },
- {
- "expected": [
- "<a title=\"a&lt;b>c&amp;d\">"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "a",
- [
- {
- "namespace": null,
- "name": "title",
- "value": "a<b>c&d"
- }
- ]
- ]
- ],
- "description": "escape less than signs in attribute values",
- "options": {
- "escape_lt_in_attrs": true
- }
- },
- {
- "expected": [
- "<script>a&lt;b&gt;c&amp;d"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "script",
- {}
- ],
- [
- "Characters",
- "a<b>c&d"
- ]
- ],
- "description": "rcdata",
- "options": {
- "escape_rcdata": true
- }
- }
- ]
-} \ No newline at end of file
diff --git a/libs/html5lib/tests/serializer-testdata/whitespace.test b/libs/html5lib/tests/serializer-testdata/whitespace.test
deleted file mode 100644
index dac3a69e2..000000000
--- a/libs/html5lib/tests/serializer-testdata/whitespace.test
+++ /dev/null
@@ -1,198 +0,0 @@
-{
- "tests": [
- {
- "expected": [
- " foo"
- ],
- "input": [
- [
- "Characters",
- "\t\r\n\f foo"
- ]
- ],
- "description": "bare text with leading spaces",
- "options": {
- "strip_whitespace": true
- }
- },
- {
- "expected": [
- "foo "
- ],
- "input": [
- [
- "Characters",
- "foo \t\r\n\f"
- ]
- ],
- "description": "bare text with trailing spaces",
- "options": {
- "strip_whitespace": true
- }
- },
- {
- "expected": [
- "foo bar"
- ],
- "input": [
- [
- "Characters",
- "foo \t\r\n\f bar"
- ]
- ],
- "description": "bare text with inner spaces",
- "options": {
- "strip_whitespace": true
- }
- },
- {
- "expected": [
- "<pre>\t\r\n\f foo \t\r\n\f bar \t\r\n\f</pre>"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "pre",
- {}
- ],
- [
- "Characters",
- "\t\r\n\f foo \t\r\n\f bar \t\r\n\f"
- ],
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "pre"
- ]
- ],
- "description": "text within <pre>",
- "options": {
- "strip_whitespace": true
- }
- },
- {
- "expected": [
- "<pre>\t\r\n\f fo<span>o \t\r\n\f b</span>ar \t\r\n\f</pre>"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "pre",
- {}
- ],
- [
- "Characters",
- "\t\r\n\f fo"
- ],
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "span",
- {}
- ],
- [
- "Characters",
- "o \t\r\n\f b"
- ],
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "span"
- ],
- [
- "Characters",
- "ar \t\r\n\f"
- ],
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "pre"
- ]
- ],
- "description": "text within <pre>, with inner markup",
- "options": {
- "strip_whitespace": true
- }
- },
- {
- "expected": [
- "<textarea>\t\r\n\f foo \t\r\n\f bar \t\r\n\f</textarea>"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "textarea",
- {}
- ],
- [
- "Characters",
- "\t\r\n\f foo \t\r\n\f bar \t\r\n\f"
- ],
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "textarea"
- ]
- ],
- "description": "text within <textarea>",
- "options": {
- "strip_whitespace": true
- }
- },
- {
- "expected": [
- "<script>\t\r\n\f foo \t\r\n\f bar \t\r\n\f</script>"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "script",
- {}
- ],
- [
- "Characters",
- "\t\r\n\f foo \t\r\n\f bar \t\r\n\f"
- ],
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "script"
- ]
- ],
- "description": "text within <script>",
- "options": {
- "strip_whitespace": true
- }
- },
- {
- "expected": [
- "<style>\t\r\n\f foo \t\r\n\f bar \t\r\n\f</style>"
- ],
- "input": [
- [
- "StartTag",
- "http://www.w3.org/1999/xhtml",
- "style",
- {}
- ],
- [
- "Characters",
- "\t\r\n\f foo \t\r\n\f bar \t\r\n\f"
- ],
- [
- "EndTag",
- "http://www.w3.org/1999/xhtml",
- "style"
- ]
- ],
- "description": "text within <style>",
- "options": {
- "strip_whitespace": true
- }
- }
- ]
-} \ No newline at end of file
diff --git a/libs/html5lib/tests/support.py b/libs/html5lib/tests/support.py
deleted file mode 100644
index 9cd5afbe6..000000000
--- a/libs/html5lib/tests/support.py
+++ /dev/null
@@ -1,199 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-# pylint:disable=wrong-import-position
-
-import os
-import sys
-import codecs
-import glob
-import xml.sax.handler
-
-base_path = os.path.split(__file__)[0]
-
-test_dir = os.path.join(base_path, 'testdata')
-sys.path.insert(0, os.path.abspath(os.path.join(base_path,
- os.path.pardir,
- os.path.pardir)))
-
-from html5lib import treebuilders, treewalkers, treeadapters # noqa
-del base_path
-
-# Build a dict of available trees
-treeTypes = {}
-
-# DOM impls
-treeTypes["DOM"] = {
- "builder": treebuilders.getTreeBuilder("dom"),
- "walker": treewalkers.getTreeWalker("dom")
-}
-
-# ElementTree impls
-import xml.etree.ElementTree as ElementTree # noqa
-treeTypes['ElementTree'] = {
- "builder": treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True),
- "walker": treewalkers.getTreeWalker("etree", ElementTree)
-}
-
-try:
- import xml.etree.cElementTree as cElementTree # noqa
-except ImportError:
- treeTypes['cElementTree'] = None
-else:
- # On Python 3.3 and above cElementTree is an alias, don't run them twice.
- if cElementTree.Element is ElementTree.Element:
- treeTypes['cElementTree'] = None
- else:
- treeTypes['cElementTree'] = {
- "builder": treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True),
- "walker": treewalkers.getTreeWalker("etree", cElementTree)
- }
-
-try:
- import lxml.etree as lxml # noqa
-except ImportError:
- treeTypes['lxml'] = None
-else:
- treeTypes['lxml'] = {
- "builder": treebuilders.getTreeBuilder("lxml"),
- "walker": treewalkers.getTreeWalker("lxml")
- }
-
-# Genshi impls
-try:
- import genshi # noqa
-except ImportError:
- treeTypes["genshi"] = None
-else:
- treeTypes["genshi"] = {
- "builder": treebuilders.getTreeBuilder("dom"),
- "adapter": lambda tree: treeadapters.genshi.to_genshi(treewalkers.getTreeWalker("dom")(tree)),
- "walker": treewalkers.getTreeWalker("genshi")
- }
-
-# pylint:enable=wrong-import-position
-
-
-def get_data_files(subdirectory, files='*.dat', search_dir=test_dir):
- return sorted(glob.glob(os.path.join(search_dir, subdirectory, files)))
-
-
-class DefaultDict(dict):
- def __init__(self, default, *args, **kwargs):
- self.default = default
- dict.__init__(self, *args, **kwargs)
-
- def __getitem__(self, key):
- return dict.get(self, key, self.default)
-
-
-class TestData(object):
- def __init__(self, filename, newTestHeading="data", encoding="utf8"):
- if encoding is None:
- self.f = open(filename, mode="rb")
- else:
- self.f = codecs.open(filename, encoding=encoding)
- self.encoding = encoding
- self.newTestHeading = newTestHeading
-
- def __iter__(self):
- data = DefaultDict(None)
- key = None
- for line in self.f:
- heading = self.isSectionHeading(line)
- if heading:
- if data and heading == self.newTestHeading:
- # Remove trailing newline
- data[key] = data[key][:-1]
- yield self.normaliseOutput(data)
- data = DefaultDict(None)
- key = heading
- data[key] = "" if self.encoding else b""
- elif key is not None:
- data[key] += line
- if data:
- yield self.normaliseOutput(data)
-
- def isSectionHeading(self, line):
- """If the current heading is a test section heading return the heading,
- otherwise return False"""
- # print(line)
- if line.startswith("#" if self.encoding else b"#"):
- return line[1:].strip()
- else:
- return False
-
- def normaliseOutput(self, data):
- # Remove trailing newlines
- for key, value in data.items():
- if value.endswith("\n" if self.encoding else b"\n"):
- data[key] = value[:-1]
- return data
-
-
-def convert(stripChars):
- def convertData(data):
- """convert the output of str(document) to the format used in the testcases"""
- data = data.split("\n")
- rv = []
- for line in data:
- if line.startswith("|"):
- rv.append(line[stripChars:])
- else:
- rv.append(line)
- return "\n".join(rv)
- return convertData
-
-
-convertExpected = convert(2)
-
-
-def errorMessage(input, expected, actual):
- msg = ("Input:\n%s\nExpected:\n%s\nReceived\n%s\n" %
- (repr(input), repr(expected), repr(actual)))
- if sys.version_info[0] == 2:
- msg = msg.encode("ascii", "backslashreplace")
- return msg
-
-
-class TracingSaxHandler(xml.sax.handler.ContentHandler):
- def __init__(self):
- xml.sax.handler.ContentHandler.__init__(self)
- self.visited = []
-
- def startDocument(self):
- self.visited.append('startDocument')
-
- def endDocument(self):
- self.visited.append('endDocument')
-
- def startPrefixMapping(self, prefix, uri):
- # These are ignored as their order is not guaranteed
- pass
-
- def endPrefixMapping(self, prefix):
- # These are ignored as their order is not guaranteed
- pass
-
- def startElement(self, name, attrs):
- self.visited.append(('startElement', name, attrs))
-
- def endElement(self, name):
- self.visited.append(('endElement', name))
-
- def startElementNS(self, name, qname, attrs):
- self.visited.append(('startElementNS', name, qname, dict(attrs)))
-
- def endElementNS(self, name, qname):
- self.visited.append(('endElementNS', name, qname))
-
- def characters(self, content):
- self.visited.append(('characters', content))
-
- def ignorableWhitespace(self, whitespace):
- self.visited.append(('ignorableWhitespace', whitespace))
-
- def processingInstruction(self, target, data):
- self.visited.append(('processingInstruction', target, data))
-
- def skippedEntity(self, name):
- self.visited.append(('skippedEntity', name))
diff --git a/libs/html5lib/tests/test_alphabeticalattributes.py b/libs/html5lib/tests/test_alphabeticalattributes.py
deleted file mode 100644
index 7d5b8e0f6..000000000
--- a/libs/html5lib/tests/test_alphabeticalattributes.py
+++ /dev/null
@@ -1,78 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from collections import OrderedDict
-
-import pytest
-
-import html5lib
-from html5lib.filters.alphabeticalattributes import Filter
-from html5lib.serializer import HTMLSerializer
-
-
[email protected]('msg, attrs, expected_attrs', [
- (
- 'no attrs',
- {},
- {}
- ),
- (
- 'one attr',
- {(None, 'alt'): 'image'},
- OrderedDict([((None, 'alt'), 'image')])
- ),
- (
- 'multiple attrs',
- {
- (None, 'src'): 'foo',
- (None, 'alt'): 'image',
- (None, 'style'): 'border: 1px solid black;'
- },
- OrderedDict([
- ((None, 'alt'), 'image'),
- ((None, 'src'), 'foo'),
- ((None, 'style'), 'border: 1px solid black;')
- ])
- ),
-])
-def test_alphabetizing(msg, attrs, expected_attrs):
- tokens = [{'type': 'StartTag', 'name': 'img', 'data': attrs}]
- output_tokens = list(Filter(tokens))
-
- attrs = output_tokens[0]['data']
- assert attrs == expected_attrs
-
-
-def test_with_different_namespaces():
- tokens = [{
- 'type': 'StartTag',
- 'name': 'pattern',
- 'data': {
- (None, 'id'): 'patt1',
- ('http://www.w3.org/1999/xlink', 'href'): '#patt2'
- }
- }]
- output_tokens = list(Filter(tokens))
-
- attrs = output_tokens[0]['data']
- assert attrs == OrderedDict([
- ((None, 'id'), 'patt1'),
- (('http://www.w3.org/1999/xlink', 'href'), '#patt2')
- ])
-
-
-def test_with_serializer():
- """Verify filter works in the context of everything else"""
- parser = html5lib.HTMLParser()
- dom = parser.parseFragment('<svg><pattern xlink:href="#patt2" id="patt1"></svg>')
- walker = html5lib.getTreeWalker('etree')
- ser = HTMLSerializer(
- alphabetical_attributes=True,
- quote_attr_values='always'
- )
-
- # FIXME(willkg): The "xlink" namespace gets dropped by the serializer. When
- # that gets fixed, we can fix this expected result.
- assert (
- ser.render(walker(dom)) ==
- '<svg><pattern id="patt1" href="#patt2"></pattern></svg>'
- )
diff --git a/libs/html5lib/tests/test_encoding.py b/libs/html5lib/tests/test_encoding.py
deleted file mode 100644
index 47c4814a4..000000000
--- a/libs/html5lib/tests/test_encoding.py
+++ /dev/null
@@ -1,117 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-import os
-
-import pytest
-
-from .support import get_data_files, test_dir, errorMessage, TestData as _TestData
-from html5lib import HTMLParser, _inputstream
-
-
-def test_basic_prescan_length():
- data = "<title>Caf\u00E9</title><!--a--><meta charset='utf-8'>".encode('utf-8')
- pad = 1024 - len(data) + 1
- data = data.replace(b"-a-", b"-" + (b"a" * pad) + b"-")
- assert len(data) == 1024 # Sanity
- stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False)
- assert 'utf-8' == stream.charEncoding[0].name
-
-
-def test_parser_reparse():
- data = "<title>Caf\u00E9</title><!--a--><meta charset='utf-8'>".encode('utf-8')
- pad = 10240 - len(data) + 1
- data = data.replace(b"-a-", b"-" + (b"a" * pad) + b"-")
- assert len(data) == 10240 # Sanity
- stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False)
- assert 'windows-1252' == stream.charEncoding[0].name
- p = HTMLParser(namespaceHTMLElements=False)
- doc = p.parse(data, useChardet=False)
- assert 'utf-8' == p.documentEncoding
- assert doc.find(".//title").text == "Caf\u00E9"
-
-
[email protected]("expected,data,kwargs", [
- ("utf-16le", b"\xFF\xFE", {"override_encoding": "iso-8859-2"}),
- ("utf-16be", b"\xFE\xFF", {"override_encoding": "iso-8859-2"}),
- ("utf-8", b"\xEF\xBB\xBF", {"override_encoding": "iso-8859-2"}),
- ("iso-8859-2", b"", {"override_encoding": "iso-8859-2", "transport_encoding": "iso-8859-3"}),
- ("iso-8859-2", b"<meta charset=iso-8859-3>", {"transport_encoding": "iso-8859-2"}),
- ("iso-8859-2", b"<meta charset=iso-8859-2>", {"same_origin_parent_encoding": "iso-8859-3"}),
- ("iso-8859-2", b"", {"same_origin_parent_encoding": "iso-8859-2", "likely_encoding": "iso-8859-3"}),
- ("iso-8859-2", b"", {"same_origin_parent_encoding": "utf-16", "likely_encoding": "iso-8859-2"}),
- ("iso-8859-2", b"", {"same_origin_parent_encoding": "utf-16be", "likely_encoding": "iso-8859-2"}),
- ("iso-8859-2", b"", {"same_origin_parent_encoding": "utf-16le", "likely_encoding": "iso-8859-2"}),
- ("iso-8859-2", b"", {"likely_encoding": "iso-8859-2", "default_encoding": "iso-8859-3"}),
- ("iso-8859-2", b"", {"default_encoding": "iso-8859-2"}),
- ("windows-1252", b"", {"default_encoding": "totally-bogus-string"}),
- ("windows-1252", b"", {}),
-])
-def test_parser_args(expected, data, kwargs):
- stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False, **kwargs)
- assert expected == stream.charEncoding[0].name
- p = HTMLParser()
- p.parse(data, useChardet=False, **kwargs)
- assert expected == p.documentEncoding
-
-
[email protected]("kwargs", [
- {"override_encoding": "iso-8859-2"},
- {"override_encoding": None},
- {"transport_encoding": "iso-8859-2"},
- {"transport_encoding": None},
- {"same_origin_parent_encoding": "iso-8859-2"},
- {"same_origin_parent_encoding": None},
- {"likely_encoding": "iso-8859-2"},
- {"likely_encoding": None},
- {"default_encoding": "iso-8859-2"},
- {"default_encoding": None},
- {"foo_encoding": "iso-8859-2"},
- {"foo_encoding": None},
-])
-def test_parser_args_raises(kwargs):
- with pytest.raises(TypeError) as exc_info:
- p = HTMLParser()
- p.parse("", useChardet=False, **kwargs)
- assert exc_info.value.args[0].startswith("Cannot set an encoding with a unicode input")
-
-
-def param_encoding():
- for filename in get_data_files("encoding"):
- tests = _TestData(filename, b"data", encoding=None)
- for test in tests:
- yield test[b'data'], test[b'encoding']
-
-
[email protected]("data, encoding", param_encoding())
-def test_parser_encoding(data, encoding):
- p = HTMLParser()
- assert p.documentEncoding is None
- p.parse(data, useChardet=False)
- encoding = encoding.lower().decode("ascii")
-
- assert encoding == p.documentEncoding, errorMessage(data, encoding, p.documentEncoding)
-
-
[email protected]("data, encoding", param_encoding())
-def test_prescan_encoding(data, encoding):
- stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False)
- encoding = encoding.lower().decode("ascii")
-
- # Very crude way to ignore irrelevant tests
- if len(data) > stream.numBytesMeta:
- return
-
- assert encoding == stream.charEncoding[0].name, errorMessage(data, encoding, stream.charEncoding[0].name)
-
-
-# pylint:disable=wrong-import-position
-try:
- import chardet # noqa
-except ImportError:
- print("chardet not found, skipping chardet tests")
-else:
- def test_chardet():
- with open(os.path.join(test_dir, "encoding", "chardet", "test_big5.txt"), "rb") as fp:
- encoding = _inputstream.HTMLInputStream(fp.read()).charEncoding
- assert encoding[0].name == "big5"
-# pylint:enable=wrong-import-position
diff --git a/libs/html5lib/tests/test_meta.py b/libs/html5lib/tests/test_meta.py
deleted file mode 100644
index dd02dd7fb..000000000
--- a/libs/html5lib/tests/test_meta.py
+++ /dev/null
@@ -1,41 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-import six
-from mock import Mock
-
-from . import support
-
-
-def _createReprMock(r):
- """Creates a mock with a __repr__ returning r
-
- Also provides __str__ mock with default mock behaviour"""
- mock = Mock()
- mock.__repr__ = Mock()
- mock.__repr__.return_value = r
- mock.__str__ = Mock(wraps=mock.__str__)
- return mock
-
-
-def test_errorMessage():
- # Create mock objects to take repr of
- input = _createReprMock("1")
- expected = _createReprMock("2")
- actual = _createReprMock("3")
-
- # Run the actual test
- r = support.errorMessage(input, expected, actual)
-
- # Assertions!
- if six.PY2:
- assert b"Input:\n1\nExpected:\n2\nReceived\n3\n" == r
- else:
- assert six.PY3
- assert "Input:\n1\nExpected:\n2\nReceived\n3\n" == r
-
- assert input.__repr__.call_count == 1
- assert expected.__repr__.call_count == 1
- assert actual.__repr__.call_count == 1
- assert not input.__str__.called
- assert not expected.__str__.called
- assert not actual.__str__.called
diff --git a/libs/html5lib/tests/test_optionaltags_filter.py b/libs/html5lib/tests/test_optionaltags_filter.py
deleted file mode 100644
index cd2821497..000000000
--- a/libs/html5lib/tests/test_optionaltags_filter.py
+++ /dev/null
@@ -1,7 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from html5lib.filters.optionaltags import Filter
-
-
-def test_empty():
- assert list(Filter([])) == []
diff --git a/libs/html5lib/tests/test_parser2.py b/libs/html5lib/tests/test_parser2.py
deleted file mode 100644
index 879d2447d..000000000
--- a/libs/html5lib/tests/test_parser2.py
+++ /dev/null
@@ -1,94 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from six import PY2, text_type
-
-import io
-
-from . import support # noqa
-
-from html5lib.constants import namespaces
-from html5lib import parse, parseFragment, HTMLParser
-
-
-# tests that aren't autogenerated from text files
-def test_assertDoctypeCloneable():
- doc = parse('<!DOCTYPE HTML>', treebuilder="dom")
- assert doc.cloneNode(True) is not None
-
-
-def test_line_counter():
- # http://groups.google.com/group/html5lib-discuss/browse_frm/thread/f4f00e4a2f26d5c0
- assert parse("<pre>\nx\n&gt;\n</pre>") is not None
-
-
-def test_namespace_html_elements_0_dom():
- doc = parse("<html></html>",
- treebuilder="dom",
- namespaceHTMLElements=True)
- assert doc.childNodes[0].namespaceURI == namespaces["html"]
-
-
-def test_namespace_html_elements_1_dom():
- doc = parse("<html></html>",
- treebuilder="dom",
- namespaceHTMLElements=False)
- assert doc.childNodes[0].namespaceURI is None
-
-
-def test_namespace_html_elements_0_etree():
- doc = parse("<html></html>",
- treebuilder="etree",
- namespaceHTMLElements=True)
- assert doc.tag == "{%s}html" % (namespaces["html"],)
-
-
-def test_namespace_html_elements_1_etree():
- doc = parse("<html></html>",
- treebuilder="etree",
- namespaceHTMLElements=False)
- assert doc.tag == "html"
-
-
-def test_unicode_file():
- assert parse(io.StringIO("a")) is not None
-
-
-def test_debug_log():
- parser = HTMLParser(debug=True)
- parser.parse("<!doctype html><title>a</title><p>b<script>c</script>d</p>e")
-
- expected = [('dataState', 'InitialPhase', 'InitialPhase', 'processDoctype', {'type': 'Doctype'}),
- ('dataState', 'BeforeHtmlPhase', 'BeforeHtmlPhase', 'processStartTag', {'name': 'title', 'type': 'StartTag'}),
- ('dataState', 'BeforeHeadPhase', 'BeforeHeadPhase', 'processStartTag', {'name': 'title', 'type': 'StartTag'}),
- ('dataState', 'InHeadPhase', 'InHeadPhase', 'processStartTag', {'name': 'title', 'type': 'StartTag'}),
- ('rcdataState', 'TextPhase', 'TextPhase', 'processCharacters', {'type': 'Characters'}),
- ('dataState', 'TextPhase', 'TextPhase', 'processEndTag', {'name': 'title', 'type': 'EndTag'}),
- ('dataState', 'InHeadPhase', 'InHeadPhase', 'processStartTag', {'name': 'p', 'type': 'StartTag'}),
- ('dataState', 'AfterHeadPhase', 'AfterHeadPhase', 'processStartTag', {'name': 'p', 'type': 'StartTag'}),
- ('dataState', 'InBodyPhase', 'InBodyPhase', 'processStartTag', {'name': 'p', 'type': 'StartTag'}),
- ('dataState', 'InBodyPhase', 'InBodyPhase', 'processCharacters', {'type': 'Characters'}),
- ('dataState', 'InBodyPhase', 'InBodyPhase', 'processStartTag', {'name': 'script', 'type': 'StartTag'}),
- ('dataState', 'InBodyPhase', 'InHeadPhase', 'processStartTag', {'name': 'script', 'type': 'StartTag'}),
- ('scriptDataState', 'TextPhase', 'TextPhase', 'processCharacters', {'type': 'Characters'}),
- ('dataState', 'TextPhase', 'TextPhase', 'processEndTag', {'name': 'script', 'type': 'EndTag'}),
- ('dataState', 'InBodyPhase', 'InBodyPhase', 'processCharacters', {'type': 'Characters'}),
- ('dataState', 'InBodyPhase', 'InBodyPhase', 'processEndTag', {'name': 'p', 'type': 'EndTag'}),
- ('dataState', 'InBodyPhase', 'InBodyPhase', 'processCharacters', {'type': 'Characters'})]
-
- if PY2:
- for i, log in enumerate(expected):
- log = [x.encode("ascii") if isinstance(x, text_type) else x for x in log]
- expected[i] = tuple(log)
-
- assert parser.log == expected
-
-
-def test_no_duplicate_clone():
- frag = parseFragment("<b><em><foo><foob><fooc><aside></b></em>")
- assert len(frag) == 2
-
-
-def test_self_closing_col():
- parser = HTMLParser()
- parser.parseFragment('<table><colgroup><col /></colgroup></table>')
- assert not parser.errors
diff --git a/libs/html5lib/tests/test_sanitizer.py b/libs/html5lib/tests/test_sanitizer.py
deleted file mode 100644
index f3faeb805..000000000
--- a/libs/html5lib/tests/test_sanitizer.py
+++ /dev/null
@@ -1,133 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-import pytest
-
-from html5lib import constants, parseFragment, serialize
-from html5lib.filters import sanitizer
-
-
-def sanitize_html(stream):
- parsed = parseFragment(stream)
- with pytest.deprecated_call():
- serialized = serialize(parsed,
- sanitize=True,
- omit_optional_tags=False,
- use_trailing_solidus=True,
- space_before_trailing_solidus=False,
- quote_attr_values="always",
- quote_char='"',
- alphabetical_attributes=True)
- return serialized
-
-
-def test_should_handle_astral_plane_characters():
- sanitized = sanitize_html("<p>&#x1d4b5; &#x1d538;</p>")
- expected = '<p>\U0001d4b5 \U0001d538</p>'
- assert expected == sanitized
-
-
-def test_should_allow_relative_uris():
- sanitized = sanitize_html('<p><a href="/example.com"></a></p>')
- expected = '<p><a href="/example.com"></a></p>'
- assert expected == sanitized
-
-
-def test_invalid_data_uri():
- sanitized = sanitize_html('<audio controls="" src="data:foobar"></audio>')
- expected = '<audio controls></audio>'
- assert expected == sanitized
-
-
-def test_invalid_ipv6_url():
- sanitized = sanitize_html('<a href="h://]">')
- expected = "<a></a>"
- assert expected == sanitized
-
-
-def test_data_uri_disallowed_type():
- sanitized = sanitize_html('<audio controls="" src="data:text/html,<html>"></audio>')
- expected = "<audio controls></audio>"
- assert expected == sanitized
-
-
-def param_sanitizer():
- for ns, tag_name in sanitizer.allowed_elements:
- if ns != constants.namespaces["html"]:
- continue
- if tag_name in ['caption', 'col', 'colgroup', 'optgroup', 'option', 'table', 'tbody', 'td',
- 'tfoot', 'th', 'thead', 'tr', 'select']:
- continue # TODO
- if tag_name == 'image':
- yield ("test_should_allow_%s_tag" % tag_name,
- "<img title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz",
- "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
- elif tag_name == 'br':
- yield ("test_should_allow_%s_tag" % tag_name,
- "<br title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz<br/>",
- "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
- elif tag_name in constants.voidElements:
- yield ("test_should_allow_%s_tag" % tag_name,
- "<%s title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz" % tag_name,
- "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
- else:
- yield ("test_should_allow_%s_tag" % tag_name,
- "<%s title=\"1\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</%s>" % (tag_name, tag_name),
- "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
-
- for ns, attribute_name in sanitizer.allowed_attributes:
- if ns is not None:
- continue
- if attribute_name != attribute_name.lower():
- continue # TODO
- if attribute_name == 'style':
- continue
- attribute_value = 'foo'
- if attribute_name in sanitizer.attr_val_is_uri:
- attribute_value = '%s://sub.domain.tld/path/object.ext' % sanitizer.allowed_protocols[0]
- yield ("test_should_allow_%s_attribute" % attribute_name,
- "<p %s=\"%s\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>" % (attribute_name, attribute_value),
- "<p %s='%s'>foo <bad>bar</bad> baz</p>" % (attribute_name, attribute_value))
-
- for protocol in sanitizer.allowed_protocols:
- rest_of_uri = '//sub.domain.tld/path/object.ext'
- if protocol == 'data':
- rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ='
- yield ("test_should_allow_uppercase_%s_uris" % protocol,
- "<img src=\"%s:%s\">foo</a>" % (protocol, rest_of_uri),
- """<img src="%s:%s">foo</a>""" % (protocol, rest_of_uri))
-
- for protocol in sanitizer.allowed_protocols:
- rest_of_uri = '//sub.domain.tld/path/object.ext'
- if protocol == 'data':
- rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ='
- protocol = protocol.upper()
- yield ("test_should_allow_uppercase_%s_uris" % protocol,
- "<img src=\"%s:%s\">foo</a>" % (protocol, rest_of_uri),
- """<img src="%s:%s">foo</a>""" % (protocol, rest_of_uri))
-
-
[email protected]("expected, input",
- (pytest.param(expected, input, id=id)
- for id, expected, input in param_sanitizer()))
-def test_sanitizer(expected, input):
- parsed = parseFragment(expected)
- expected = serialize(parsed,
- omit_optional_tags=False,
- use_trailing_solidus=True,
- space_before_trailing_solidus=False,
- quote_attr_values="always",
- quote_char='"',
- alphabetical_attributes=True)
- assert expected == sanitize_html(input)
-
-
-def test_lowercase_color_codes_in_style():
- sanitized = sanitize_html("<p style=\"border: 1px solid #a2a2a2;\"></p>")
- expected = '<p style=\"border: 1px solid #a2a2a2;\"></p>'
- assert expected == sanitized
-
-
-def test_uppercase_color_codes_in_style():
- sanitized = sanitize_html("<p style=\"border: 1px solid #A2A2A2;\"></p>")
- expected = '<p style=\"border: 1px solid #A2A2A2;\"></p>'
- assert expected == sanitized
diff --git a/libs/html5lib/tests/test_serializer.py b/libs/html5lib/tests/test_serializer.py
deleted file mode 100644
index bce624590..000000000
--- a/libs/html5lib/tests/test_serializer.py
+++ /dev/null
@@ -1,226 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-import os
-import json
-
-import pytest
-
-from .support import get_data_files
-
-from html5lib import constants
-from html5lib.filters.lint import Filter as Lint
-from html5lib.serializer import HTMLSerializer, serialize
-from html5lib.treewalkers.base import TreeWalker
-
-# pylint:disable=wrong-import-position
-optionals_loaded = []
-
-try:
- from lxml import etree
- optionals_loaded.append("lxml")
-except ImportError:
- pass
-# pylint:enable=wrong-import-position
-
-default_namespace = constants.namespaces["html"]
-
-
-class JsonWalker(TreeWalker):
- def __iter__(self):
- for token in self.tree:
- type = token[0]
- if type == "StartTag":
- if len(token) == 4:
- namespace, name, attrib = token[1:4]
- else:
- namespace = default_namespace
- name, attrib = token[1:3]
- yield self.startTag(namespace, name, self._convertAttrib(attrib))
- elif type == "EndTag":
- if len(token) == 3:
- namespace, name = token[1:3]
- else:
- namespace = default_namespace
- name = token[1]
- yield self.endTag(namespace, name)
- elif type == "EmptyTag":
- if len(token) == 4:
- namespace, name, attrib = token[1:]
- else:
- namespace = default_namespace
- name, attrib = token[1:]
- for token in self.emptyTag(namespace, name, self._convertAttrib(attrib)):
- yield token
- elif type == "Comment":
- yield self.comment(token[1])
- elif type in ("Characters", "SpaceCharacters"):
- for token in self.text(token[1]):
- yield token
- elif type == "Doctype":
- if len(token) == 4:
- yield self.doctype(token[1], token[2], token[3])
- elif len(token) == 3:
- yield self.doctype(token[1], token[2])
- else:
- yield self.doctype(token[1])
- else:
- raise ValueError("Unknown token type: " + type)
-
- def _convertAttrib(self, attribs):
- """html5lib tree-walkers use a dict of (namespace, name): value for
- attributes, but JSON cannot represent this. Convert from the format
- in the serializer tests (a list of dicts with "namespace", "name",
- and "value" as keys) to html5lib's tree-walker format."""
- attrs = {}
- for attrib in attribs:
- name = (attrib["namespace"], attrib["name"])
- assert(name not in attrs)
- attrs[name] = attrib["value"]
- return attrs
-
-
-def serialize_html(input, options):
- options = {str(k): v for k, v in options.items()}
- encoding = options.get("encoding", None)
- if "encoding" in options:
- del options["encoding"]
- stream = Lint(JsonWalker(input), False)
- serializer = HTMLSerializer(alphabetical_attributes=True, **options)
- return serializer.render(stream, encoding)
-
-
-def throwsWithLatin1(input):
- with pytest.raises(UnicodeEncodeError):
- serialize_html(input, {"encoding": "iso-8859-1"})
-
-
-def testDoctypeName():
- throwsWithLatin1([["Doctype", "\u0101"]])
-
-
-def testDoctypePublicId():
- throwsWithLatin1([["Doctype", "potato", "\u0101"]])
-
-
-def testDoctypeSystemId():
- throwsWithLatin1([["Doctype", "potato", "potato", "\u0101"]])
-
-
-def testCdataCharacters():
- test_serializer([["StartTag", "http://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\u0101"]],
- ["<style>&amacr;"], {"encoding": "iso-8859-1"})
-
-
-def testCharacters():
- test_serializer([["Characters", "\u0101"]],
- ["&amacr;"], {"encoding": "iso-8859-1"})
-
-
-def testStartTagName():
- throwsWithLatin1([["StartTag", "http://www.w3.org/1999/xhtml", "\u0101", []]])
-
-
-def testAttributeName():
- throwsWithLatin1([["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": None, "name": "\u0101", "value": "potato"}]]])
-
-
-def testAttributeValue():
- test_serializer([["StartTag", "http://www.w3.org/1999/xhtml", "span",
- [{"namespace": None, "name": "potato", "value": "\u0101"}]]],
- ["<span potato=&amacr;>"], {"encoding": "iso-8859-1"})
-
-
-def testEndTagName():
- throwsWithLatin1([["EndTag", "http://www.w3.org/1999/xhtml", "\u0101"]])
-
-
-def testComment():
- throwsWithLatin1([["Comment", "\u0101"]])
-
-
-def testThrowsUnknownOption():
- with pytest.raises(TypeError):
- HTMLSerializer(foobar=None)
-
-
[email protected]("c", list("\t\n\u000C\x20\r\"'=<>`"))
-def testSpecQuoteAttribute(c):
- input_ = [["StartTag", "http://www.w3.org/1999/xhtml", "span",
- [{"namespace": None, "name": "foo", "value": c}]]]
- if c == '"':
- output_ = ["<span foo='%s'>" % c]
- else:
- output_ = ['<span foo="%s">' % c]
- options_ = {"quote_attr_values": "spec"}
- test_serializer(input_, output_, options_)
-
-
[email protected]("c", list("\t\n\u000C\x20\r\"'=<>`"
- "\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n"
- "\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15"
- "\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
- "\x20\x2f\x60\xa0\u1680\u180e\u180f\u2000"
- "\u2001\u2002\u2003\u2004\u2005\u2006\u2007"
- "\u2008\u2009\u200a\u2028\u2029\u202f\u205f"
- "\u3000"))
-def testLegacyQuoteAttribute(c):
- input_ = [["StartTag", "http://www.w3.org/1999/xhtml", "span",
- [{"namespace": None, "name": "foo", "value": c}]]]
- if c == '"':
- output_ = ["<span foo='%s'>" % c]
- else:
- output_ = ['<span foo="%s">' % c]
- options_ = {"quote_attr_values": "legacy"}
- test_serializer(input_, output_, options_)
-
-
-def lxml_parser():
- return etree.XMLParser(resolve_entities=False)
-
-
[email protected]("lxml" not in optionals_loaded, reason="lxml not importable")
-def testEntityReplacement(lxml_parser):
- doc = '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>'
- tree = etree.fromstring(doc, parser=lxml_parser).getroottree()
- result = serialize(tree, tree="lxml", omit_optional_tags=False)
- assert result == '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>\u03B2</html>'
-
-
[email protected]("lxml" not in optionals_loaded, reason="lxml not importable")
-def testEntityXML(lxml_parser):
- doc = '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&gt;</html>'
- tree = etree.fromstring(doc, parser=lxml_parser).getroottree()
- result = serialize(tree, tree="lxml", omit_optional_tags=False)
- assert result == '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&gt;</html>'
-
-
[email protected]("lxml" not in optionals_loaded, reason="lxml not importable")
-def testEntityNoResolve(lxml_parser):
- doc = '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>'
- tree = etree.fromstring(doc, parser=lxml_parser).getroottree()
- result = serialize(tree, tree="lxml", omit_optional_tags=False,
- resolve_entities=False)
- assert result == '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>'
-
-
-def param_serializer():
- for filename in get_data_files('serializer-testdata', '*.test', os.path.dirname(__file__)):
- with open(filename) as fp:
- tests = json.load(fp)
- for test in tests['tests']:
- yield test["input"], test["expected"], test.get("options", {})
-
-
[email protected]("input, expected, options", param_serializer())
-def test_serializer(input, expected, options):
- encoding = options.get("encoding", None)
-
- if encoding:
- expected = list(map(lambda x: x.encode(encoding), expected))
-
- result = serialize_html(input, options)
- if len(expected) == 1:
- assert expected[0] == result, "Expected:\n%s\nActual:\n%s\nOptions:\n%s" % (expected[0], result, str(options))
- elif result not in expected:
- assert False, "Expected: %s, Received: %s" % (expected, result)
diff --git a/libs/html5lib/tests/test_stream.py b/libs/html5lib/tests/test_stream.py
deleted file mode 100644
index efe9b472f..000000000
--- a/libs/html5lib/tests/test_stream.py
+++ /dev/null
@@ -1,325 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from . import support # noqa
-
-import codecs
-import sys
-from io import BytesIO, StringIO
-
-import pytest
-
-import six
-from six.moves import http_client, urllib
-
-from html5lib._inputstream import (BufferedStream, HTMLInputStream,
- HTMLUnicodeInputStream, HTMLBinaryInputStream)
-from html5lib._utils import supports_lone_surrogates
-
-
-def test_basic():
- s = b"abc"
- fp = BufferedStream(BytesIO(s))
- read = fp.read(10)
- assert read == s
-
-
-def test_read_length():
- fp = BufferedStream(BytesIO(b"abcdef"))
- read1 = fp.read(1)
- assert read1 == b"a"
- read2 = fp.read(2)
- assert read2 == b"bc"
- read3 = fp.read(3)
- assert read3 == b"def"
- read4 = fp.read(4)
- assert read4 == b""
-
-
-def test_tell():
- fp = BufferedStream(BytesIO(b"abcdef"))
- read1 = fp.read(1)
- assert read1 == b"a"
- assert fp.tell() == 1
- read2 = fp.read(2)
- assert read2 == b"bc"
- assert fp.tell() == 3
- read3 = fp.read(3)
- assert read3 == b"def"
- assert fp.tell() == 6
- read4 = fp.read(4)
- assert read4 == b""
- assert fp.tell() == 6
-
-
-def test_seek():
- fp = BufferedStream(BytesIO(b"abcdef"))
- read1 = fp.read(1)
- assert read1 == b"a"
- fp.seek(0)
- read2 = fp.read(1)
- assert read2 == b"a"
- read3 = fp.read(2)
- assert read3 == b"bc"
- fp.seek(2)
- read4 = fp.read(2)
- assert read4 == b"cd"
- fp.seek(4)
- read5 = fp.read(2)
- assert read5 == b"ef"
-
-
-def test_seek_tell():
- fp = BufferedStream(BytesIO(b"abcdef"))
- read1 = fp.read(1)
- assert read1 == b"a"
- assert fp.tell() == 1
- fp.seek(0)
- read2 = fp.read(1)
- assert read2 == b"a"
- assert fp.tell() == 1
- read3 = fp.read(2)
- assert read3 == b"bc"
- assert fp.tell() == 3
- fp.seek(2)
- read4 = fp.read(2)
- assert read4 == b"cd"
- assert fp.tell() == 4
- fp.seek(4)
- read5 = fp.read(2)
- assert read5 == b"ef"
- assert fp.tell() == 6
-
-
-class HTMLUnicodeInputStreamShortChunk(HTMLUnicodeInputStream):
- _defaultChunkSize = 2
-
-
-class HTMLBinaryInputStreamShortChunk(HTMLBinaryInputStream):
- _defaultChunkSize = 2
-
-
-def test_char_ascii():
- stream = HTMLInputStream(b"'", override_encoding='ascii')
- assert stream.charEncoding[0].name == 'windows-1252'
- assert stream.char() == "'"
-
-
-def test_char_utf8():
- stream = HTMLInputStream('\u2018'.encode('utf-8'), override_encoding='utf-8')
- assert stream.charEncoding[0].name == 'utf-8'
- assert stream.char() == '\u2018'
-
-
-def test_char_win1252():
- stream = HTMLInputStream("\xa9\xf1\u2019".encode('windows-1252'))
- assert stream.charEncoding[0].name == 'windows-1252'
- assert stream.char() == "\xa9"
- assert stream.char() == "\xf1"
- assert stream.char() == "\u2019"
-
-
-def test_bom():
- stream = HTMLInputStream(codecs.BOM_UTF8 + b"'")
- assert stream.charEncoding[0].name == 'utf-8'
- assert stream.char() == "'"
-
-
-def test_utf_16():
- stream = HTMLInputStream((' ' * 1025).encode('utf-16'))
- assert stream.charEncoding[0].name in ['utf-16le', 'utf-16be']
- assert len(stream.charsUntil(' ', True)) == 1025
-
-
-def test_newlines():
- stream = HTMLBinaryInputStreamShortChunk(codecs.BOM_UTF8 + b"a\nbb\r\nccc\rddddxe")
- assert stream.position() == (1, 0)
- assert stream.charsUntil('c') == "a\nbb\n"
- assert stream.position() == (3, 0)
- assert stream.charsUntil('x') == "ccc\ndddd"
- assert stream.position() == (4, 4)
- assert stream.charsUntil('e') == "x"
- assert stream.position() == (4, 5)
-
-
-def test_newlines2():
- size = HTMLUnicodeInputStream._defaultChunkSize
- stream = HTMLInputStream("\r" * size + "\n")
- assert stream.charsUntil('x') == "\n" * size
-
-
-def test_position():
- stream = HTMLBinaryInputStreamShortChunk(codecs.BOM_UTF8 + b"a\nbb\nccc\nddde\nf\ngh")
- assert stream.position() == (1, 0)
- assert stream.charsUntil('c') == "a\nbb\n"
- assert stream.position() == (3, 0)
- stream.unget("\n")
- assert stream.position() == (2, 2)
- assert stream.charsUntil('c') == "\n"
- assert stream.position() == (3, 0)
- stream.unget("\n")
- assert stream.position() == (2, 2)
- assert stream.char() == "\n"
- assert stream.position() == (3, 0)
- assert stream.charsUntil('e') == "ccc\nddd"
- assert stream.position() == (4, 3)
- assert stream.charsUntil('h') == "e\nf\ng"
- assert stream.position() == (6, 1)
-
-
-def test_position2():
- stream = HTMLUnicodeInputStreamShortChunk("abc\nd")
- assert stream.position() == (1, 0)
- assert stream.char() == "a"
- assert stream.position() == (1, 1)
- assert stream.char() == "b"
- assert stream.position() == (1, 2)
- assert stream.char() == "c"
- assert stream.position() == (1, 3)
- assert stream.char() == "\n"
- assert stream.position() == (2, 0)
- assert stream.char() == "d"
- assert stream.position() == (2, 1)
-
-
-def test_python_issue_20007():
- """
- Make sure we have a work-around for Python bug #20007
- http://bugs.python.org/issue20007
- """
- class FakeSocket(object):
- def makefile(self, _mode, _bufsize=None):
- # pylint:disable=unused-argument
- return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText")
-
- source = http_client.HTTPResponse(FakeSocket())
- source.begin()
- stream = HTMLInputStream(source)
- assert stream.charsUntil(" ") == "Text"
-
-
-def test_python_issue_20007_b():
- """
- Make sure we have a work-around for Python bug #20007
- http://bugs.python.org/issue20007
- """
- if six.PY2:
- return
-
- class FakeSocket(object):
- def makefile(self, _mode, _bufsize=None):
- # pylint:disable=unused-argument
- return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText")
-
- source = http_client.HTTPResponse(FakeSocket())
- source.begin()
- wrapped = urllib.response.addinfourl(source, source.msg, "http://example.com")
- stream = HTMLInputStream(wrapped)
- assert stream.charsUntil(" ") == "Text"
-
-
- [("\u0000", 0),
- ("\u0001", 1),
- ("\u0008", 1),
- ("\u0009", 0),
- ("\u000A", 0),
- ("\u000B", 1),
- ("\u000C", 0),
- ("\u000D", 0),
- ("\u000E", 1),
- ("\u001F", 1),
- ("\u0020", 0),
- ("\u007E", 0),
- ("\u007F", 1),
- ("\u009F", 1),
- ("\u00A0", 0),
- ("\uFDCF", 0),
- ("\uFDD0", 1),
- ("\uFDEF", 1),
- ("\uFDF0", 0),
- ("\uFFFD", 0),
- ("\uFFFE", 1),
- ("\uFFFF", 1),
- ("\U0001FFFD", 0),
- ("\U0001FFFE", 1),
- ("\U0001FFFF", 1),
- ("\U0002FFFD", 0),
- ("\U0002FFFE", 1),
- ("\U0002FFFF", 1),
- ("\U0003FFFD", 0),
- ("\U0003FFFE", 1),
- ("\U0003FFFF", 1),
- ("\U0004FFFD", 0),
- ("\U0004FFFE", 1),
- ("\U0004FFFF", 1),
- ("\U0005FFFD", 0),
- ("\U0005FFFE", 1),
- ("\U0005FFFF", 1),
- ("\U0006FFFD", 0),
- ("\U0006FFFE", 1),
- ("\U0006FFFF", 1),
- ("\U0007FFFD", 0),
- ("\U0007FFFE", 1),
- ("\U0007FFFF", 1),
- ("\U0008FFFD", 0),
- ("\U0008FFFE", 1),
- ("\U0008FFFF", 1),
- ("\U0009FFFD", 0),
- ("\U0009FFFE", 1),
- ("\U0009FFFF", 1),
- ("\U000AFFFD", 0),
- ("\U000AFFFE", 1),
- ("\U000AFFFF", 1),
- ("\U000BFFFD", 0),
- ("\U000BFFFE", 1),
- ("\U000BFFFF", 1),
- ("\U000CFFFD", 0),
- ("\U000CFFFE", 1),
- ("\U000CFFFF", 1),
- ("\U000DFFFD", 0),
- ("\U000DFFFE", 1),
- ("\U000DFFFF", 1),
- ("\U000EFFFD", 0),
- ("\U000EFFFE", 1),
- ("\U000EFFFF", 1),
- ("\U000FFFFD", 0),
- ("\U000FFFFE", 1),
- ("\U000FFFFF", 1),
- ("\U0010FFFD", 0),
- ("\U0010FFFE", 1),
- ("\U0010FFFF", 1),
- ("\x01\x01\x01", 3),
- ("a\x01a\x01a\x01a", 3)])
-def test_invalid_codepoints(inp, num):
- stream = HTMLUnicodeInputStream(StringIO(inp))
- for _i in range(len(inp)):
- stream.char()
- assert len(stream.errors) == num
-
-
[email protected](not supports_lone_surrogates, reason="doesn't support lone surrogates")
- [("'\\uD7FF'", 0),
- ("'\\uD800'", 1),
- ("'\\uDBFF'", 1),
- ("'\\uDC00'", 1),
- ("'\\uDFFF'", 1),
- ("'\\uE000'", 0),
- ("'\\uD800\\uD800\\uD800'", 3),
- ("'a\\uD800a\\uD800a\\uD800a'", 3),
- ("'\\uDFFF\\uDBFF'", 2),
- pytest.param(
- "'\\uDBFF\\uDFFF'", 2,
- marks=pytest.mark.skipif(
- sys.maxunicode == 0xFFFF,
- reason="narrow Python"))])
-def test_invalid_codepoints_surrogates(inp, num):
- inp = eval(inp) # pylint:disable=eval-used
- fp = StringIO(inp)
- if ord(max(fp.read())) > 0xFFFF:
- pytest.skip("StringIO altered string")
- fp.seek(0)
- stream = HTMLUnicodeInputStream(fp)
- for _i in range(len(inp)):
- stream.char()
- assert len(stream.errors) == num
diff --git a/libs/html5lib/tests/test_tokenizer2.py b/libs/html5lib/tests/test_tokenizer2.py
deleted file mode 100644
index 158d847a2..000000000
--- a/libs/html5lib/tests/test_tokenizer2.py
+++ /dev/null
@@ -1,66 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-import io
-
-from six import unichr, text_type
-
-from html5lib._tokenizer import HTMLTokenizer
-from html5lib.constants import tokenTypes
-
-
-def ignore_parse_errors(toks):
- for tok in toks:
- if tok['type'] != tokenTypes['ParseError']:
- yield tok
-
-
-def test_maintain_attribute_order():
- # generate loads to maximize the chance a hash-based mutation will occur
- attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
- stream = io.StringIO("<span " + " ".join("%s='%s'" % (x, i) for x, i in attrs) + ">")
-
- toks = HTMLTokenizer(stream)
- out = list(ignore_parse_errors(toks))
-
- assert len(out) == 1
- assert out[0]['type'] == tokenTypes['StartTag']
-
- attrs_tok = out[0]['data']
- assert len(attrs_tok) == len(attrs)
-
- for (in_name, in_value), (out_name, out_value) in zip(attrs, attrs_tok.items()):
- assert in_name == out_name
- assert in_value == out_value
-
-
-def test_duplicate_attribute():
- stream = io.StringIO("<span a=1 a=2 a=3>")
-
- toks = HTMLTokenizer(stream)
- out = list(ignore_parse_errors(toks))
-
- assert len(out) == 1
- assert out[0]['type'] == tokenTypes['StartTag']
-
- attrs_tok = out[0]['data']
- assert len(attrs_tok) == 1
- assert list(attrs_tok.items()) == [('a', '1')]
-
-
-def test_maintain_duplicate_attribute_order():
- # generate loads to maximize the chance a hash-based mutation will occur
- attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
- stream = io.StringIO("<span " + " ".join("%s='%s'" % (x, i) for x, i in attrs) + " a=100>")
-
- toks = HTMLTokenizer(stream)
- out = list(ignore_parse_errors(toks))
-
- assert len(out) == 1
- assert out[0]['type'] == tokenTypes['StartTag']
-
- attrs_tok = out[0]['data']
- assert len(attrs_tok) == len(attrs)
-
- for (in_name, in_value), (out_name, out_value) in zip(attrs, attrs_tok.items()):
- assert in_name == out_name
- assert in_value == out_value
diff --git a/libs/html5lib/tests/test_treeadapters.py b/libs/html5lib/tests/test_treeadapters.py
deleted file mode 100644
index 95e56c00c..000000000
--- a/libs/html5lib/tests/test_treeadapters.py
+++ /dev/null
@@ -1,40 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from . import support # noqa
-
-import html5lib
-from html5lib.treeadapters import sax
-from html5lib.treewalkers import getTreeWalker
-
-
-def test_to_sax():
- handler = support.TracingSaxHandler()
- tree = html5lib.parse("""<html xml:lang="en">
- <title>Directory Listing</title>
- <a href="/"><b/></p>
- """, treebuilder="etree")
- walker = getTreeWalker("etree")
- sax.to_sax(walker(tree), handler)
- expected = [
- 'startDocument',
- ('startElementNS', ('http://www.w3.org/1999/xhtml', 'html'),
- 'html', {(None, 'xml:lang'): 'en'}),
- ('startElementNS', ('http://www.w3.org/1999/xhtml', 'head'), 'head', {}),
- ('startElementNS', ('http://www.w3.org/1999/xhtml', 'title'), 'title', {}),
- ('characters', 'Directory Listing'),
- ('endElementNS', ('http://www.w3.org/1999/xhtml', 'title'), 'title'),
- ('characters', '\n '),
- ('endElementNS', ('http://www.w3.org/1999/xhtml', 'head'), 'head'),
- ('startElementNS', ('http://www.w3.org/1999/xhtml', 'body'), 'body', {}),
- ('startElementNS', ('http://www.w3.org/1999/xhtml', 'a'), 'a', {(None, 'href'): '/'}),
- ('startElementNS', ('http://www.w3.org/1999/xhtml', 'b'), 'b', {}),
- ('startElementNS', ('http://www.w3.org/1999/xhtml', 'p'), 'p', {}),
- ('endElementNS', ('http://www.w3.org/1999/xhtml', 'p'), 'p'),
- ('characters', '\n '),
- ('endElementNS', ('http://www.w3.org/1999/xhtml', 'b'), 'b'),
- ('endElementNS', ('http://www.w3.org/1999/xhtml', 'a'), 'a'),
- ('endElementNS', ('http://www.w3.org/1999/xhtml', 'body'), 'body'),
- ('endElementNS', ('http://www.w3.org/1999/xhtml', 'html'), 'html'),
- 'endDocument',
- ]
- assert expected == handler.visited
diff --git a/libs/html5lib/tests/test_treewalkers.py b/libs/html5lib/tests/test_treewalkers.py
deleted file mode 100644
index 780ca964b..000000000
--- a/libs/html5lib/tests/test_treewalkers.py
+++ /dev/null
@@ -1,205 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-import itertools
-import sys
-
-from six import unichr, text_type
-import pytest
-
-try:
- import lxml.etree
-except ImportError:
- pass
-
-from .support import treeTypes
-
-from html5lib import html5parser, treewalkers
-from html5lib.filters.lint import Filter as Lint
-
-import re
-attrlist = re.compile(r"^(\s+)\w+=.*(\n\1\w+=.*)+", re.M)
-
-
-def sortattrs(x):
- lines = x.group(0).split("\n")
- lines.sort()
- return "\n".join(lines)
-
-
-def test_all_tokens():
- expected = [
- {'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'html'},
- {'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'head'},
- {'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'head'},
- {'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'body'},
- {'data': 'a', 'type': 'Characters'},
- {'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'div'},
- {'data': 'b', 'type': 'Characters'},
- {'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'div'},
- {'data': 'c', 'type': 'Characters'},
- {'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'body'},
- {'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'html'}
- ]
- for _, treeCls in sorted(treeTypes.items()):
- if treeCls is None:
- continue
- p = html5parser.HTMLParser(tree=treeCls["builder"])
- document = p.parse("<html><head></head><body>a<div>b</div>c</body></html>")
- document = treeCls.get("adapter", lambda x: x)(document)
- output = Lint(treeCls["walker"](document))
- for expectedToken, outputToken in zip(expected, output):
- assert expectedToken == outputToken
-
-
-def set_attribute_on_first_child(docfrag, name, value, treeName):
- """naively sets an attribute on the first child of the document
- fragment passed in"""
- setter = {'ElementTree': lambda d: d[0].set,
- 'DOM': lambda d: d.firstChild.setAttribute}
- setter['cElementTree'] = setter['ElementTree']
- try:
- setter.get(treeName, setter['DOM'])(docfrag)(name, value)
- except AttributeError:
- setter['ElementTree'](docfrag)(name, value)
-
-
-def param_treewalker_six_mix():
- """Str/Unicode mix. If str attrs added to tree"""
-
- # On Python 2.x string literals are of type str. Unless, like this
- # file, the programmer imports unicode_literals from __future__.
- # In that case, string literals become objects of type unicode.
-
- # This test simulates a Py2 user, modifying attributes on a document
- # fragment but not using the u'' syntax nor importing unicode_literals
- sm_tests = [
- ('<a href="http://example.com">Example</a>',
- [(str('class'), str('test123'))],
- '<a>\n class="test123"\n href="http://example.com"\n "Example"'),
-
- ('<link href="http://example.com/cow">',
- [(str('rel'), str('alternate'))],
- '<link>\n href="http://example.com/cow"\n rel="alternate"\n "Example"')
- ]
-
- for tree in sorted(treeTypes.items()):
- for intext, attrs, expected in sm_tests:
- yield intext, expected, attrs, tree
-
-
[email protected]("intext, expected, attrs_to_add, tree", param_treewalker_six_mix())
-def test_treewalker_six_mix(intext, expected, attrs_to_add, tree):
- """tests what happens when we add attributes to the intext"""
- treeName, treeClass = tree
- if treeClass is None:
- pytest.skip("Treebuilder not loaded")
- parser = html5parser.HTMLParser(tree=treeClass["builder"])
- document = parser.parseFragment(intext)
- for nom, val in attrs_to_add:
- set_attribute_on_first_child(document, nom, val, treeName)
-
- document = treeClass.get("adapter", lambda x: x)(document)
- output = treewalkers.pprint(treeClass["walker"](document))
- output = attrlist.sub(sortattrs, output)
- if output not in expected:
- raise AssertionError("TreewalkerEditTest: %s\nExpected:\n%s\nReceived:\n%s" % (treeName, expected, output))
-
-
[email protected]("tree,char", itertools.product(sorted(treeTypes.items()), ["x", "\u1234"]))
-def test_fragment_single_char(tree, char):
- expected = [
- {'data': char, 'type': 'Characters'}
- ]
-
- treeName, treeClass = tree
- if treeClass is None:
- pytest.skip("Treebuilder not loaded")
-
- parser = html5parser.HTMLParser(tree=treeClass["builder"])
- document = parser.parseFragment(char)
- document = treeClass.get("adapter", lambda x: x)(document)
- output = Lint(treeClass["walker"](document))
-
- assert list(output) == expected
-
-
[email protected](treeTypes["lxml"] is None, reason="lxml not importable")
-def test_lxml_xml():
- expected = [
- {'data': {}, 'name': 'div', 'namespace': None, 'type': 'StartTag'},
- {'data': {}, 'name': 'div', 'namespace': None, 'type': 'StartTag'},
- {'name': 'div', 'namespace': None, 'type': 'EndTag'},
- {'name': 'div', 'namespace': None, 'type': 'EndTag'}
- ]
-
- lxmltree = lxml.etree.fromstring('<div><div></div></div>')
- walker = treewalkers.getTreeWalker('lxml')
- output = Lint(walker(lxmltree))
-
- assert list(output) == expected
-
-
[email protected]("treeName",
- [pytest.param(treeName, marks=[getattr(pytest.mark, treeName),
- pytest.mark.skipif(
- treeName != "lxml" or
- sys.version_info < (3, 7), reason="dict order undef")])
- for treeName in sorted(treeTypes.keys())])
-def test_maintain_attribute_order(treeName):
- treeAPIs = treeTypes[treeName]
- if treeAPIs is None:
- pytest.skip("Treebuilder not loaded")
-
- # generate loads to maximize the chance a hash-based mutation will occur
- attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
- data = "<span " + " ".join("%s='%s'" % (x, i) for x, i in attrs) + ">"
-
- parser = html5parser.HTMLParser(tree=treeAPIs["builder"])
- document = parser.parseFragment(data)
-
- document = treeAPIs.get("adapter", lambda x: x)(document)
- output = list(Lint(treeAPIs["walker"](document)))
-
- assert len(output) == 2
- assert output[0]['type'] == 'StartTag'
- assert output[1]['type'] == "EndTag"
-
- attrs_out = output[0]['data']
- assert len(attrs) == len(attrs_out)
-
- for (in_name, in_value), (out_name, out_value) in zip(attrs, attrs_out.items()):
- assert (None, in_name) == out_name
- assert in_value == out_value
-
-
[email protected]("treeName",
- [pytest.param(treeName, marks=[getattr(pytest.mark, treeName),
- pytest.mark.skipif(
- treeName != "lxml" or
- sys.version_info < (3, 7), reason="dict order undef")])
- for treeName in sorted(treeTypes.keys())])
-def test_maintain_attribute_order_adjusted(treeName):
- treeAPIs = treeTypes[treeName]
- if treeAPIs is None:
- pytest.skip("Treebuilder not loaded")
-
- # generate loads to maximize the chance a hash-based mutation will occur
- data = "<svg a=1 refx=2 b=3 xml:lang=4 c=5>"
-
- parser = html5parser.HTMLParser(tree=treeAPIs["builder"])
- document = parser.parseFragment(data)
-
- document = treeAPIs.get("adapter", lambda x: x)(document)
- output = list(Lint(treeAPIs["walker"](document)))
-
- assert len(output) == 2
- assert output[0]['type'] == 'StartTag'
- assert output[1]['type'] == "EndTag"
-
- attrs_out = output[0]['data']
-
- assert list(attrs_out.items()) == [((None, 'a'), '1'),
- ((None, 'refX'), '2'),
- ((None, 'b'), '3'),
- (('http://www.w3.org/XML/1998/namespace', 'lang'), '4'),
- ((None, 'c'), '5')]
diff --git a/libs/html5lib/tests/test_whitespace_filter.py b/libs/html5lib/tests/test_whitespace_filter.py
deleted file mode 100644
index e9da6140a..000000000
--- a/libs/html5lib/tests/test_whitespace_filter.py
+++ /dev/null
@@ -1,125 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from html5lib.filters.whitespace import Filter
-from html5lib.constants import spaceCharacters
-spaceCharacters = "".join(spaceCharacters)
-
-
-def runTest(input, expected):
- output = list(Filter(input))
- errorMsg = "\n".join(["\n\nInput:", str(input),
- "\nExpected:", str(expected),
- "\nReceived:", str(output)])
- assert expected == output, errorMsg
-
-
-def runTestUnmodifiedOutput(input):
- runTest(input, input)
-
-
-def testPhrasingElements():
- runTestUnmodifiedOutput(
- [{"type": "Characters", "data": "This is a "},
- {"type": "StartTag", "name": "span", "data": []},
- {"type": "Characters", "data": "phrase"},
- {"type": "EndTag", "name": "span", "data": []},
- {"type": "SpaceCharacters", "data": " "},
- {"type": "Characters", "data": "with"},
- {"type": "SpaceCharacters", "data": " "},
- {"type": "StartTag", "name": "em", "data": []},
- {"type": "Characters", "data": "emphasised text"},
- {"type": "EndTag", "name": "em", "data": []},
- {"type": "Characters", "data": " and an "},
- {"type": "StartTag", "name": "img", "data": [["alt", "image"]]},
- {"type": "Characters", "data": "."}])
-
-
-def testLeadingWhitespace():
- runTest(
- [{"type": "StartTag", "name": "p", "data": []},
- {"type": "SpaceCharacters", "data": spaceCharacters},
- {"type": "Characters", "data": "foo"},
- {"type": "EndTag", "name": "p", "data": []}],
- [{"type": "StartTag", "name": "p", "data": []},
- {"type": "SpaceCharacters", "data": " "},
- {"type": "Characters", "data": "foo"},
- {"type": "EndTag", "name": "p", "data": []}])
-
-
-def testLeadingWhitespaceAsCharacters():
- runTest(
- [{"type": "StartTag", "name": "p", "data": []},
- {"type": "Characters", "data": spaceCharacters + "foo"},
- {"type": "EndTag", "name": "p", "data": []}],
- [{"type": "StartTag", "name": "p", "data": []},
- {"type": "Characters", "data": " foo"},
- {"type": "EndTag", "name": "p", "data": []}])
-
-
-def testTrailingWhitespace():
- runTest(
- [{"type": "StartTag", "name": "p", "data": []},
- {"type": "Characters", "data": "foo"},
- {"type": "SpaceCharacters", "data": spaceCharacters},
- {"type": "EndTag", "name": "p", "data": []}],
- [{"type": "StartTag", "name": "p", "data": []},
- {"type": "Characters", "data": "foo"},
- {"type": "SpaceCharacters", "data": " "},
- {"type": "EndTag", "name": "p", "data": []}])
-
-
-def testTrailingWhitespaceAsCharacters():
- runTest(
- [{"type": "StartTag", "name": "p", "data": []},
- {"type": "Characters", "data": "foo" + spaceCharacters},
- {"type": "EndTag", "name": "p", "data": []}],
- [{"type": "StartTag", "name": "p", "data": []},
- {"type": "Characters", "data": "foo "},
- {"type": "EndTag", "name": "p", "data": []}])
-
-
-def testWhitespace():
- runTest(
- [{"type": "StartTag", "name": "p", "data": []},
- {"type": "Characters", "data": "foo" + spaceCharacters + "bar"},
- {"type": "EndTag", "name": "p", "data": []}],
- [{"type": "StartTag", "name": "p", "data": []},
- {"type": "Characters", "data": "foo bar"},
- {"type": "EndTag", "name": "p", "data": []}])
-
-
-def testLeadingWhitespaceInPre():
- runTestUnmodifiedOutput(
- [{"type": "StartTag", "name": "pre", "data": []},
- {"type": "SpaceCharacters", "data": spaceCharacters},
- {"type": "Characters", "data": "foo"},
- {"type": "EndTag", "name": "pre", "data": []}])
-
-
-def testLeadingWhitespaceAsCharactersInPre():
- runTestUnmodifiedOutput(
- [{"type": "StartTag", "name": "pre", "data": []},
- {"type": "Characters", "data": spaceCharacters + "foo"},
- {"type": "EndTag", "name": "pre", "data": []}])
-
-
-def testTrailingWhitespaceInPre():
- runTestUnmodifiedOutput(
- [{"type": "StartTag", "name": "pre", "data": []},
- {"type": "Characters", "data": "foo"},
- {"type": "SpaceCharacters", "data": spaceCharacters},
- {"type": "EndTag", "name": "pre", "data": []}])
-
-
-def testTrailingWhitespaceAsCharactersInPre():
- runTestUnmodifiedOutput(
- [{"type": "StartTag", "name": "pre", "data": []},
- {"type": "Characters", "data": "foo" + spaceCharacters},
- {"type": "EndTag", "name": "pre", "data": []}])
-
-
-def testWhitespaceInPre():
- runTestUnmodifiedOutput(
- [{"type": "StartTag", "name": "pre", "data": []},
- {"type": "Characters", "data": "foo" + spaceCharacters + "bar"},
- {"type": "EndTag", "name": "pre", "data": []}])
diff --git a/libs/html5lib/tests/tokenizer.py b/libs/html5lib/tests/tokenizer.py
deleted file mode 100644
index 47264cc32..000000000
--- a/libs/html5lib/tests/tokenizer.py
+++ /dev/null
@@ -1,253 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-import codecs
-import json
-import warnings
-import re
-
-import pytest
-from six import unichr
-
-from html5lib._tokenizer import HTMLTokenizer
-from html5lib import constants, _utils
-
-
-class TokenizerTestParser(object):
- def __init__(self, initialState, lastStartTag=None):
- self.tokenizer = HTMLTokenizer
- self._state = initialState
- self._lastStartTag = lastStartTag
-
- def parse(self, stream, encoding=None, innerHTML=False):
- # pylint:disable=unused-argument
- tokenizer = self.tokenizer(stream, encoding)
- self.outputTokens = []
-
- tokenizer.state = getattr(tokenizer, self._state)
- if self._lastStartTag is not None:
- tokenizer.currentToken = {"type": "startTag",
- "name": self._lastStartTag}
-
- types = {v: k for k, v in constants.tokenTypes.items()}
- for token in tokenizer:
- getattr(self, 'process%s' % types[token["type"]])(token)
-
- return self.outputTokens
-
- def processDoctype(self, token):
- self.outputTokens.append(["DOCTYPE", token["name"], token["publicId"],
- token["systemId"], token["correct"]])
-
- def processStartTag(self, token):
- self.outputTokens.append(["StartTag", token["name"],
- token["data"], token["selfClosing"]])
-
- def processEmptyTag(self, token):
- if token["name"] not in constants.voidElements:
- self.outputTokens.append("ParseError")
- self.outputTokens.append(["StartTag", token["name"], dict(token["data"][::-1])])
-
- def processEndTag(self, token):
- self.outputTokens.append(["EndTag", token["name"],
- token["selfClosing"]])
-
- def processComment(self, token):
- self.outputTokens.append(["Comment", token["data"]])
-
- def processSpaceCharacters(self, token):
- self.outputTokens.append(["Character", token["data"]])
- self.processSpaceCharacters = self.processCharacters
-
- def processCharacters(self, token):
- self.outputTokens.append(["Character", token["data"]])
-
- def processEOF(self, token):
- pass
-
- def processParseError(self, token):
- self.outputTokens.append(["ParseError", token["data"]])
-
-
-def concatenateCharacterTokens(tokens):
- outputTokens = []
- for token in tokens:
- if "ParseError" not in token and token[0] == "Character":
- if (outputTokens and "ParseError" not in outputTokens[-1] and
- outputTokens[-1][0] == "Character"):
- outputTokens[-1][1] += token[1]
- else:
- outputTokens.append(token)
- else:
- outputTokens.append(token)
- return outputTokens
-
-
-def normalizeTokens(tokens):
- # TODO: convert tests to reflect arrays
- for i, token in enumerate(tokens):
- if token[0] == 'ParseError':
- tokens[i] = token[0]
- return tokens
-
-
-def tokensMatch(expectedTokens, receivedTokens, ignoreErrorOrder,
- ignoreErrors=False):
- """Test whether the test has passed or failed
-
- If the ignoreErrorOrder flag is set to true we don't test the relative
- positions of parse errors and non parse errors
- """
- checkSelfClosing = False
- for token in expectedTokens:
- if (token[0] == "StartTag" and len(token) == 4 or
- token[0] == "EndTag" and len(token) == 3):
- checkSelfClosing = True
- break
-
- if not checkSelfClosing:
- for token in receivedTokens:
- if token[0] == "StartTag" or token[0] == "EndTag":
- token.pop()
-
- if not ignoreErrorOrder and not ignoreErrors:
- expectedTokens = concatenateCharacterTokens(expectedTokens)
- return expectedTokens == receivedTokens
- else:
- # Sort the tokens into two groups; non-parse errors and parse errors
- tokens = {"expected": [[], []], "received": [[], []]}
- for tokenType, tokenList in zip(list(tokens.keys()),
- (expectedTokens, receivedTokens)):
- for token in tokenList:
- if token != "ParseError":
- tokens[tokenType][0].append(token)
- else:
- if not ignoreErrors:
- tokens[tokenType][1].append(token)
- tokens[tokenType][0] = concatenateCharacterTokens(tokens[tokenType][0])
- return tokens["expected"] == tokens["received"]
-
-
-_surrogateRe = re.compile(r"\\u([0-9A-Fa-f]{4})(?:\\u([0-9A-Fa-f]{4}))?")
-
-
-def unescape(test):
- def decode(inp):
- """Decode \\uXXXX escapes
-
- This decodes \\uXXXX escapes, possibly into non-BMP characters when
- two surrogate character escapes are adjacent to each other.
- """
- # This cannot be implemented using the unicode_escape codec
- # because that requires its input be ISO-8859-1, and we need
- # arbitrary unicode as input.
- def repl(m):
- if m.group(2) is not None:
- high = int(m.group(1), 16)
- low = int(m.group(2), 16)
- if 0xD800 <= high <= 0xDBFF and 0xDC00 <= low <= 0xDFFF:
- cp = ((high - 0xD800) << 10) + (low - 0xDC00) + 0x10000
- return unichr(cp)
- else:
- return unichr(high) + unichr(low)
- else:
- return unichr(int(m.group(1), 16))
- try:
- return _surrogateRe.sub(repl, inp)
- except ValueError:
- # This occurs when unichr throws ValueError, which should
- # only be for a lone-surrogate.
- if _utils.supports_lone_surrogates:
- raise
- return None
-
- test["input"] = decode(test["input"])
- for token in test["output"]:
- if token == "ParseError":
- continue
- else:
- token[1] = decode(token[1])
- if len(token) > 2:
- for key, value in token[2]:
- del token[2][key]
- token[2][decode(key)] = decode(value)
- return test
-
-
-def _doCapitalize(match):
- return match.group(1).upper()
-
-
-_capitalizeRe = re.compile(r"\W+(\w)").sub
-
-
-def capitalize(s):
- s = s.lower()
- s = _capitalizeRe(_doCapitalize, s)
- return s
-
-
-class TokenizerFile(pytest.File):
- def collect(self):
- with codecs.open(str(self.fspath), "r", encoding="utf-8") as fp:
- tests = json.load(fp)
- if 'tests' in tests:
- for i, test in enumerate(tests['tests']):
- yield TokenizerTestCollector(str(i), self, testdata=test)
-
-
-class TokenizerTestCollector(pytest.Collector):
- def __init__(self, name, parent=None, config=None, session=None, testdata=None):
- super(TokenizerTestCollector, self).__init__(name, parent, config, session)
- if 'initialStates' not in testdata:
- testdata["initialStates"] = ["Data state"]
- if 'doubleEscaped' in testdata:
- testdata = unescape(testdata)
- self.testdata = testdata
-
- def collect(self):
- for initialState in self.testdata["initialStates"]:
- initialState = capitalize(initialState)
- item = TokenizerTest(initialState,
- self,
- self.testdata,
- initialState)
- if self.testdata["input"] is None:
- item.add_marker(pytest.mark.skipif(True, reason="Relies on lone surrogates"))
- yield item
-
-
-class TokenizerTest(pytest.Item):
- def __init__(self, name, parent, test, initialState):
- super(TokenizerTest, self).__init__(name, parent)
- self.obj = lambda: 1 # this is to hack around skipif needing a function!
- self.test = test
- self.initialState = initialState
-
- def runtest(self):
- warnings.resetwarnings()
- warnings.simplefilter("error")
-
- expected = self.test['output']
- if 'lastStartTag' not in self.test:
- self.test['lastStartTag'] = None
- parser = TokenizerTestParser(self.initialState,
- self.test['lastStartTag'])
- tokens = parser.parse(self.test['input'])
- received = normalizeTokens(tokens)
- errorMsg = "\n".join(["\n\nInitial state:",
- self.initialState,
- "\nInput:", self.test['input'],
- "\nExpected:", repr(expected),
- "\nreceived:", repr(tokens)])
- errorMsg = errorMsg
- ignoreErrorOrder = self.test.get('ignoreErrorOrder', False)
- assert tokensMatch(expected, received, ignoreErrorOrder, True), errorMsg
-
- def repr_failure(self, excinfo):
- traceback = excinfo.traceback
- ntraceback = traceback.cut(path=__file__)
- excinfo.traceback = ntraceback.filter()
-
- return excinfo.getrepr(funcargs=True,
- showlocals=False,
- style="short", tbfilter=False)
diff --git a/libs/html5lib/tests/tokenizertotree.py b/libs/html5lib/tests/tokenizertotree.py
deleted file mode 100644
index 8528e8766..000000000
--- a/libs/html5lib/tests/tokenizertotree.py
+++ /dev/null
@@ -1,69 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-import sys
-import os
-import json
-import re
-
-import html5lib
-from . import support
-from . import test_tokenizer
-
-p = html5lib.HTMLParser()
-
-unnamespaceExpected = re.compile(r"^(\|\s*)<html ([^>]+)>", re.M).sub
-
-
-def main(out_path):
- if not os.path.exists(out_path):
- sys.stderr.write("Path %s does not exist" % out_path)
- sys.exit(1)
-
- for filename in support.get_data_files('tokenizer', '*.test'):
- run_file(filename, out_path)
-
-
-def run_file(filename, out_path):
- try:
- tests_data = json.load(open(filename, "r"))
- except ValueError:
- sys.stderr.write("Failed to load %s\n" % filename)
- return
- name = os.path.splitext(os.path.split(filename)[1])[0]
- output_file = open(os.path.join(out_path, "tokenizer_%s.dat" % name), "w")
-
- if 'tests' in tests_data:
- for test_data in tests_data['tests']:
- if 'initialStates' not in test_data:
- test_data["initialStates"] = ["Data state"]
-
- for initial_state in test_data["initialStates"]:
- if initial_state != "Data state":
- # don't support this yet
- continue
- test = make_test(test_data)
- output_file.write(test)
-
- output_file.close()
-
-
-def make_test(test_data):
- if 'doubleEscaped' in test_data:
- test_data = test_tokenizer.unescape_test(test_data)
-
- rv = []
- rv.append("#data")
- rv.append(test_data["input"].encode("utf8"))
- rv.append("#errors")
- tree = p.parse(test_data["input"])
- output = p.tree.testSerializer(tree)
- output = "\n".join(("| " + line[3:]) if line.startswith("| ") else line
- for line in output.split("\n"))
- output = unnamespaceExpected(r"\1<\2>", output)
- rv.append(output.encode("utf8"))
- rv.append("")
- return "\n".join(rv)
-
-
-if __name__ == "__main__":
- main(sys.argv[1])
diff --git a/libs/html5lib/tests/tree_construction.py b/libs/html5lib/tests/tree_construction.py
deleted file mode 100644
index 1ef6e7250..000000000
--- a/libs/html5lib/tests/tree_construction.py
+++ /dev/null
@@ -1,205 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-import itertools
-import re
-import warnings
-from difflib import unified_diff
-
-import pytest
-
-from .support import TestData, convert, convertExpected, treeTypes
-from html5lib import html5parser, constants, treewalkers
-from html5lib.filters.lint import Filter as Lint
-
-_attrlist_re = re.compile(r"^(\s+)\w+=.*(\n\1\w+=.*)+", re.M)
-
-
-def sortattrs(s):
- def replace(m):
- lines = m.group(0).split("\n")
- lines.sort()
- return "\n".join(lines)
- return _attrlist_re.sub(replace, s)
-
-
-class TreeConstructionFile(pytest.File):
- def collect(self):
- tests = TestData(str(self.fspath), "data")
- for i, test in enumerate(tests):
- yield TreeConstructionTest(str(i), self, testdata=test)
-
-
-class TreeConstructionTest(pytest.Collector):
- def __init__(self, name, parent=None, config=None, session=None, testdata=None):
- super(TreeConstructionTest, self).__init__(name, parent, config, session)
- self.testdata = testdata
-
- def collect(self):
- for treeName, treeAPIs in sorted(treeTypes.items()):
- for x in itertools.chain(self._getParserTests(treeName, treeAPIs),
- self._getTreeWalkerTests(treeName, treeAPIs)):
- yield x
-
- def _getParserTests(self, treeName, treeAPIs):
- if treeAPIs is not None and "adapter" in treeAPIs:
- return
- for namespaceHTMLElements in (True, False):
- if namespaceHTMLElements:
- nodeid = "%s::parser::namespaced" % treeName
- else:
- nodeid = "%s::parser::void-namespace" % treeName
- item = ParserTest(nodeid,
- self,
- self.testdata,
- treeAPIs["builder"] if treeAPIs is not None else None,
- namespaceHTMLElements)
- item.add_marker(getattr(pytest.mark, treeName))
- item.add_marker(pytest.mark.parser)
- if namespaceHTMLElements:
- item.add_marker(pytest.mark.namespaced)
- yield item
-
- def _getTreeWalkerTests(self, treeName, treeAPIs):
- nodeid = "%s::treewalker" % treeName
- item = TreeWalkerTest(nodeid,
- self,
- self.testdata,
- treeAPIs)
- item.add_marker(getattr(pytest.mark, treeName))
- item.add_marker(pytest.mark.treewalker)
- yield item
-
-
-def convertTreeDump(data):
- return "\n".join(convert(3)(data).split("\n")[1:])
-
-
-namespaceExpected = re.compile(r"^(\s*)<(\S+)>", re.M).sub
-
-
-class ParserTest(pytest.Item):
- def __init__(self, name, parent, test, treeClass, namespaceHTMLElements):
- super(ParserTest, self).__init__(name, parent)
- self.test = test
- self.treeClass = treeClass
- self.namespaceHTMLElements = namespaceHTMLElements
-
- def runtest(self):
- if self.treeClass is None:
- pytest.skip("Treebuilder not loaded")
-
- p = html5parser.HTMLParser(tree=self.treeClass,
- namespaceHTMLElements=self.namespaceHTMLElements)
-
- input = self.test['data']
- fragmentContainer = self.test['document-fragment']
- expected = convertExpected(self.test['document'])
- expectedErrors = self.test['errors'].split("\n") if self.test['errors'] else []
-
- scripting = False
- if 'script-on' in self.test:
- scripting = True
-
- with warnings.catch_warnings():
- warnings.simplefilter("error")
- try:
- if fragmentContainer:
- document = p.parseFragment(input, fragmentContainer, scripting=scripting)
- else:
- document = p.parse(input, scripting=scripting)
- except constants.DataLossWarning:
- pytest.skip("data loss warning")
-
- output = convertTreeDump(p.tree.testSerializer(document))
-
- expected = expected
- if self.namespaceHTMLElements:
- expected = namespaceExpected(r"\1<html \2>", expected)
-
- errorMsg = "\n".join(["\n\nInput:", input, "\nExpected:", expected,
- "\nReceived:", output])
- assert expected == output, errorMsg
-
- errStr = []
- for (line, col), errorcode, datavars in p.errors:
- assert isinstance(datavars, dict), "%s, %s" % (errorcode, repr(datavars))
- errStr.append("Line: %i Col: %i %s" % (line, col,
- constants.E[errorcode] % datavars))
-
- errorMsg2 = "\n".join(["\n\nInput:", input,
- "\nExpected errors (" + str(len(expectedErrors)) + "):\n" + "\n".join(expectedErrors),
- "\nActual errors (" + str(len(p.errors)) + "):\n" + "\n".join(errStr)])
- if False: # we're currently not testing parse errors
- assert len(p.errors) == len(expectedErrors), errorMsg2
-
- def repr_failure(self, excinfo):
- traceback = excinfo.traceback
- ntraceback = traceback.cut(path=__file__)
- excinfo.traceback = ntraceback.filter()
-
- return excinfo.getrepr(funcargs=True,
- showlocals=False,
- style="short", tbfilter=False)
-
-
-class TreeWalkerTest(pytest.Item):
- def __init__(self, name, parent, test, treeAPIs):
- super(TreeWalkerTest, self).__init__(name, parent)
- self.test = test
- self.treeAPIs = treeAPIs
-
- def runtest(self):
- if self.treeAPIs is None:
- pytest.skip("Treebuilder not loaded")
-
- p = html5parser.HTMLParser(tree=self.treeAPIs["builder"])
-
- input = self.test['data']
- fragmentContainer = self.test['document-fragment']
- expected = convertExpected(self.test['document'])
-
- scripting = False
- if 'script-on' in self.test:
- scripting = True
-
- with warnings.catch_warnings():
- warnings.simplefilter("error")
- try:
- if fragmentContainer:
- document = p.parseFragment(input, fragmentContainer, scripting=scripting)
- else:
- document = p.parse(input, scripting=scripting)
- except constants.DataLossWarning:
- pytest.skip("data loss warning")
-
- poutput = convertTreeDump(p.tree.testSerializer(document))
- namespace_expected = namespaceExpected(r"\1<html \2>", expected)
- if poutput != namespace_expected:
- pytest.skip("parser output incorrect")
-
- document = self.treeAPIs.get("adapter", lambda x: x)(document)
-
- try:
- output = treewalkers.pprint(Lint(self.treeAPIs["walker"](document)))
- output = sortattrs(output)
- expected = sortattrs(expected)
- diff = "".join(unified_diff([line + "\n" for line in expected.splitlines()],
- [line + "\n" for line in output.splitlines()],
- "Expected", "Received"))
- assert expected == output, "\n".join([
- "", "Input:", input,
- "", "Expected:", expected,
- "", "Received:", output,
- "", "Diff:", diff,
- ])
- except NotImplementedError:
- pytest.skip("tree walker NotImplementedError")
-
- def repr_failure(self, excinfo):
- traceback = excinfo.traceback
- ntraceback = traceback.cut(path=__file__)
- excinfo.traceback = ntraceback.filter()
-
- return excinfo.getrepr(funcargs=True,
- showlocals=False,
- style="short", tbfilter=False)
diff --git a/libs/html5lib/tests/us-ascii.html b/libs/html5lib/tests/us-ascii.html
deleted file mode 100644
index bf8fb5761..000000000
--- a/libs/html5lib/tests/us-ascii.html
+++ /dev/null
@@ -1,3 +0,0 @@
-<!doctype html>
-<title>Test</title>
-<p>Hello World! \ No newline at end of file
diff --git a/libs/html5lib/tests/utf-8-bom.html b/libs/html5lib/tests/utf-8-bom.html
deleted file mode 100644
index 0f03b8da6..000000000
--- a/libs/html5lib/tests/utf-8-bom.html
+++ /dev/null
@@ -1,3 +0,0 @@
-<!doctype html>
-<title>Test</title>
-<p>Hello World! © \ No newline at end of file