diff options
author | morpheus65535 <[email protected]> | 2022-01-23 23:07:52 -0500 |
---|---|---|
committer | morpheus65535 <[email protected]> | 2022-01-23 23:07:52 -0500 |
commit | 0c3c5a02a75bc61b6bf6e303de20e11741d2afac (patch) | |
tree | 30ae1d524ffe5d54172b7a4a8445d90c3461e659 /libs/html5lib | |
parent | 36bf0d219d0432c20e6314e0ce752b36f4d88e3c (diff) | |
download | bazarr-0c3c5a02a75bc61b6bf6e303de20e11741d2afac.tar.gz bazarr-0c3c5a02a75bc61b6bf6e303de20e11741d2afac.zip |
Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies.v1.0.3-beta.16
Diffstat (limited to 'libs/html5lib')
27 files changed, 0 insertions, 7313 deletions
diff --git a/libs/html5lib/tests/__init__.py b/libs/html5lib/tests/__init__.py deleted file mode 100644 index b8ce2de32..000000000 --- a/libs/html5lib/tests/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals diff --git a/libs/html5lib/tests/conftest.py b/libs/html5lib/tests/conftest.py deleted file mode 100644 index dad167c58..000000000 --- a/libs/html5lib/tests/conftest.py +++ /dev/null @@ -1,108 +0,0 @@ -from __future__ import print_function -import os.path -import sys - -import pkg_resources -import pytest - -from .tree_construction import TreeConstructionFile -from .tokenizer import TokenizerFile -from .sanitizer import SanitizerFile - -_dir = os.path.abspath(os.path.dirname(__file__)) -_root = os.path.join(_dir, "..", "..") -_testdata = os.path.join(_dir, "testdata") -_tree_construction = os.path.join(_testdata, "tree-construction") -_tokenizer = os.path.join(_testdata, "tokenizer") -_sanitizer_testdata = os.path.join(_dir, "sanitizer-testdata") - - -def fail_if_missing_pytest_expect(): - """Throws an exception halting pytest if pytest-expect isn't working""" - try: - from pytest_expect import expect # noqa - except ImportError: - header = '*' * 78 - print( - '\n' + - header + '\n' + - 'ERROR: Either pytest-expect or its dependency u-msgpack-python is not\n' + - 'installed. Please install them both before running pytest.\n' + - header + '\n', - file=sys.stderr - ) - raise - - -fail_if_missing_pytest_expect() - - -def pytest_configure(config): - msgs = [] - - if not os.path.exists(_testdata): - msg = "testdata not available! " - if os.path.exists(os.path.join(_root, ".git")): - msg += ("Please run git submodule update --init --recursive " + - "and then run tests again.") - else: - msg += ("The testdata doesn't appear to be included with this package, " + - "so finding the right version will be hard. :(") - msgs.append(msg) - - if config.option.update_xfail: - # Check for optional requirements - req_file = os.path.join(_root, "requirements-optional.txt") - if os.path.exists(req_file): - with open(req_file, "r") as fp: - for line in fp: - if (line.strip() and - not (line.startswith("-r") or - line.startswith("#"))): - if ";" in line: - spec, marker = line.strip().split(";", 1) - else: - spec, marker = line.strip(), None - req = pkg_resources.Requirement.parse(spec) - if marker and not pkg_resources.evaluate_marker(marker): - msgs.append("%s not available in this environment" % spec) - else: - try: - installed = pkg_resources.working_set.find(req) - except pkg_resources.VersionConflict: - msgs.append("Outdated version of %s installed, need %s" % (req.name, spec)) - else: - if not installed: - msgs.append("Need %s" % spec) - - # Check cElementTree - import xml.etree.ElementTree as ElementTree - - try: - import xml.etree.cElementTree as cElementTree - except ImportError: - msgs.append("cElementTree unable to be imported") - else: - if cElementTree.Element is ElementTree.Element: - msgs.append("cElementTree is just an alias for ElementTree") - - if msgs: - pytest.exit("\n".join(msgs)) - - -def pytest_collect_file(path, parent): - dir = os.path.abspath(path.dirname) - dir_and_parents = set() - while dir not in dir_and_parents: - dir_and_parents.add(dir) - dir = os.path.dirname(dir) - - if _tree_construction in dir_and_parents: - if path.ext == ".dat": - return TreeConstructionFile(path, parent) - elif _tokenizer in dir_and_parents: - if path.ext == ".test": - return TokenizerFile(path, parent) - elif _sanitizer_testdata in dir_and_parents: - if path.ext == ".dat": - return SanitizerFile(path, parent) diff --git a/libs/html5lib/tests/sanitizer-testdata/tests1.dat b/libs/html5lib/tests/sanitizer-testdata/tests1.dat deleted file mode 100644 index 74e883368..000000000 --- a/libs/html5lib/tests/sanitizer-testdata/tests1.dat +++ /dev/null @@ -1,433 +0,0 @@ -[ - { - "name": "IE_Comments", - "input": "<!--[if gte IE 4]><script>alert('XSS');</script><![endif]-->", - "output": "" - }, - - { - "name": "IE_Comments_2", - "input": "<![if !IE 5]><script>alert('XSS');</script><![endif]>", - "output": "<script>alert('XSS');</script>" - }, - - { - "name": "allow_colons_in_path_component", - "input": "<a href=\"./this:that\">foo</a>", - "output": "<a href='./this:that'>foo</a>" - }, - - { - "name": "background_attribute", - "input": "<div background=\"javascript:alert('XSS')\"></div>", - "output": "<div></div>" - }, - - { - "name": "bgsound", - "input": "<bgsound src=\"javascript:alert('XSS');\" />", - "output": "<bgsound src=\"javascript:alert('XSS');\"></bgsound>" - }, - - { - "name": "div_background_image_unicode_encoded", - "input": "<div style=\"background-image:\u00a5\u00a2\u006C\u0028'\u006a\u0061\u00a6\u0061\u00a3\u0063\u00a2\u0069\u00a0\u00a4\u003a\u0061\u006c\u0065\u00a2\u00a4\u0028.1027\u0058.1053\u0053\u0027\u0029'\u0029\">foo</div>", - "output": "<div style=''>foo</div>" - }, - - { - "name": "div_expression", - "input": "<div style=\"width: expression(alert('XSS'));\">foo</div>", - "output": "<div style=''>foo</div>" - }, - - { - "name": "double_open_angle_brackets", - "input": "<img src=http://ha.ckers.org/scriptlet.html <", - "output": "" - }, - - { - "name": "double_open_angle_brackets_2", - "input": "<script src=http://ha.ckers.org/scriptlet.html <", - "output": "" - }, - - { - "name": "grave_accents", - "input": "<img src=`javascript:alert('XSS')` />", - "output": "<img/>" - }, - - { - "name": "img_dynsrc_lowsrc", - "input": "<img dynsrc=\"javascript:alert('XSS')\" />", - "output": "<img/>" - }, - - { - "name": "img_vbscript", - "input": "<img src='vbscript:msgbox(\"XSS\")' />", - "output": "<img/>" - }, - - { - "name": "input_image", - "input": "<input type=\"image\" src=\"javascript:alert('XSS');\" />", - "output": "<input type='image'/>" - }, - - { - "name": "link_stylesheets", - "input": "<link rel=\"stylesheet\" href=\"javascript:alert('XSS');\" />", - "output": "<link href=\"javascript:alert('XSS');\" rel=\"stylesheet\">" - }, - - { - "name": "link_stylesheets_2", - "input": "<link rel=\"stylesheet\" href=\"http://ha.ckers.org/xss.css\" />", - "output": "<link href=\"http://ha.ckers.org/xss.css\" rel=\"stylesheet\">" - }, - - { - "name": "list_style_image", - "input": "<li style=\"list-style-image: url(javascript:alert('XSS'))\">foo</li>", - "output": "<li style=''>foo</li>" - }, - - { - "name": "no_closing_script_tags", - "input": "<script src=http://ha.ckers.org/xss.js?<b>", - "output": "<script src=\"http://ha.ckers.org/xss.js?&lt;b\"></script>" - }, - - { - "name": "non_alpha_non_digit", - "input": "<script/XSS src=\"http://ha.ckers.org/xss.js\"></script>", - "output": "<script src=\"http://ha.ckers.org/xss.js\" xss=\"\"></script>" - }, - - { - "name": "non_alpha_non_digit_2", - "input": "<a onclick!\\#$%&()*~+-_.,:;?@[/|\\]^`=alert(\"XSS\")>foo</a>", - "output": "<a>foo</a>" - }, - - { - "name": "non_alpha_non_digit_3", - "input": "<img/src=\"http://ha.ckers.org/xss.js\"/>", - "output": "<img src='http://ha.ckers.org/xss.js'/>" - }, - - { - "name": "non_alpha_non_digit_II", - "input": "<a href!\\#$%&()*~+-_.,:;?@[/|]^`=alert('XSS')>foo</a>", - "output": "<a>foo</a>" - }, - - { - "name": "non_alpha_non_digit_III", - "input": "<a/href=\"javascript:alert('XSS');\">foo</a>", - "output": "<a>foo</a>" - }, - - { - "name": "platypus", - "input": "<a href=\"http://www.ragingplatypus.com/\" style=\"display:block; position:absolute; left:0; top:0; width:100%; height:100%; z-index:1; background-color:black; background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg); background-x:center; background-y:center; background-repeat:repeat;\">never trust your upstream platypus</a>", - "output": "<a href='http://www.ragingplatypus.com/' style='display: block; width: 100%; height: 100%; background-color: black; background-x: center; background-y: center;'>never trust your upstream platypus</a>" - }, - - { - "name": "protocol_resolution_in_script_tag", - "input": "<script src=//ha.ckers.org/.j></script>", - "output": "<script src=\"//ha.ckers.org/.j\"></script>" - }, - - { - "name": "should_allow_anchors", - "input": "<a href='foo' onclick='bar'><script>baz</script></a>", - "output": "<a href='foo'><script>baz</script></a>" - }, - - { - "name": "should_allow_image_alt_attribute", - "input": "<img alt='foo' onclick='bar' />", - "output": "<img alt='foo'/>" - }, - - { - "name": "should_allow_image_height_attribute", - "input": "<img height='foo' onclick='bar' />", - "output": "<img height='foo'/>" - }, - - { - "name": "should_allow_image_src_attribute", - "input": "<img src='foo' onclick='bar' />", - "output": "<img src='foo'/>" - }, - - { - "name": "should_allow_image_width_attribute", - "input": "<img width='foo' onclick='bar' />", - "output": "<img width='foo'/>" - }, - - { - "name": "should_handle_blank_text", - "input": "", - "output": "" - }, - - { - "name": "should_handle_malformed_image_tags", - "input": "<img \"\"\"><script>alert(\"XSS\")</script>\">", - "output": "<img/><script>alert(\"XSS\")</script>\">" - }, - - { - "name": "should_handle_non_html", - "input": "abc", - "output": "abc" - }, - - { - "name": "should_not_fall_for_ridiculous_hack", - "input": "<img\nsrc\n=\n\"\nj\na\nv\na\ns\nc\nr\ni\np\nt\n:\na\nl\ne\nr\nt\n(\n'\nX\nS\nS\n'\n)\n\"\n />", - "output": "<img/>" - }, - - { - "name": "should_not_fall_for_xss_image_hack_0", - "input": "<img src=\"javascript:alert('XSS');\" />", - "output": "<img/>" - }, - - { - "name": "should_not_fall_for_xss_image_hack_1", - "input": "<img src=javascript:alert('XSS') />", - "output": "<img/>" - }, - - { - "name": "should_not_fall_for_xss_image_hack_10", - "input": "<img src=\"jav
ascript:alert('XSS');\" />", - "output": "<img/>" - }, - - { - "name": "should_not_fall_for_xss_image_hack_11", - "input": "<img src=\"jav
ascript:alert('XSS');\" />", - "output": "<img/>" - }, - - { - "name": "should_not_fall_for_xss_image_hack_12", - "input": "<img src=\"  javascript:alert('XSS');\" />", - "output": "<img/>" - }, - - { - "name": "should_not_fall_for_xss_image_hack_13", - "input": "<img src=\" javascript:alert('XSS');\" />", - "output": "<img/>" - }, - - { - "name": "should_not_fall_for_xss_image_hack_14", - "input": "<img src=\" javascript:alert('XSS');\" />", - "output": "<img/>" - }, - - { - "name": "should_not_fall_for_xss_image_hack_2", - "input": "<img src=\"JaVaScRiPt:alert('XSS')\" />", - "output": "<img/>" - }, - - { - "name": "should_not_fall_for_xss_image_hack_3", - "input": "<img src='javascript:alert("XSS")' />", - "output": "<img/>" - }, - - { - "name": "should_not_fall_for_xss_image_hack_4", - "input": "<img src='javascript:alert(String.fromCharCode(88,83,83))' />", - "output": "<img/>" - }, - - { - "name": "should_not_fall_for_xss_image_hack_5", - "input": "<img src='javascript:alert('XSS')' />", - "output": "<img/>" - }, - - { - "name": "should_not_fall_for_xss_image_hack_6", - "input": "<img src='javascript:alert('XSS')' />", - "output": "<img/>" - }, - - { - "name": "should_not_fall_for_xss_image_hack_7", - "input": "<img src='javascript:alert('XSS')' />", - "output": "<img/>" - }, - - { - "name": "should_not_fall_for_xss_image_hack_8", - "input": "<img src=\"jav\tascript:alert('XSS');\" />", - "output": "<img/>" - }, - - { - "name": "should_not_fall_for_xss_image_hack_9", - "input": "<img src=\"jav	ascript:alert('XSS');\" />", - "output": "<img/>" - }, - - { - "name": "should_sanitize_half_open_scripts", - "input": "<img src=\"javascript:alert('XSS')\"", - "output": "" - }, - - { - "name": "should_sanitize_invalid_script_tag", - "input": "<script/XSS SRC=\"http://ha.ckers.org/xss.js\"></script>", - "output": "<script src=\"http://ha.ckers.org/xss.js\" xss=\"\"></script>" - }, - - { - "name": "should_sanitize_script_tag_with_multiple_open_brackets", - "input": "<<script>alert(\"XSS\");//<</script>", - "output": "<<script>alert(\"XSS\");//<</script>" - }, - - { - "name": "should_sanitize_script_tag_with_multiple_open_brackets_2", - "input": "<iframe src=http://ha.ckers.org/scriptlet.html\n<", - "output": "" - }, - - { - "name": "should_sanitize_tag_broken_up_by_null", - "input": "<scr\u0000ipt>alert(\"XSS\")</scr\u0000ipt>", - "output": "<scr\ufffdipt>alert(\"XSS\")</scr\ufffdipt>" - }, - - { - "name": "should_sanitize_unclosed_script", - "input": "<script src=http://ha.ckers.org/xss.js?<b>", - "output": "<script src=\"http://ha.ckers.org/xss.js?&lt;b\"></script>" - }, - - { - "name": "should_strip_href_attribute_in_a_with_bad_protocols", - "input": "<a href=\"javascript:XSS\" title=\"1\">boo</a>", - "output": "<a title='1'>boo</a>" - }, - - { - "name": "should_strip_href_attribute_in_a_with_bad_protocols_and_whitespace", - "input": "<a href=\" javascript:XSS\" title=\"1\">boo</a>", - "output": "<a title='1'>boo</a>" - }, - - { - "name": "should_strip_src_attribute_in_img_with_bad_protocols", - "input": "<img src=\"javascript:XSS\" title=\"1\">boo</img>", - "output": "<img title='1'/>boo" - }, - - { - "name": "should_strip_src_attribute_in_img_with_bad_protocols_and_whitespace", - "input": "<img src=\" javascript:XSS\" title=\"1\">boo</img>", - "output": "<img title='1'/>boo" - }, - - { - "name": "xml_base", - "input": "<div xml:base=\"javascript:alert('XSS');//\">foo</div>", - "output": "<div>foo</div>" - }, - - { - "name": "xul", - "input": "<p style=\"-moz-binding:url('http://ha.ckers.org/xssmoz.xml#xss')\">fubar</p>", - "output": "<p style=''>fubar</p>" - }, - - { - "name": "quotes_in_attributes", - "input": "<img src='foo' title='\"foo\" bar' />", - "output": "<img src='foo' title='\"foo\" bar'/>" - }, - - { - "name": "uri_refs_in_svg_attributes", - "input": "<svg><rect fill='url(#foo)' />", - "output": "<svg><rect fill='url(#foo)'></rect></svg>" - }, - - { - "name": "absolute_uri_refs_in_svg_attributes", - "input": "<svg><rect fill='url(http://bad.com/) #fff' />", - "output": "<svg><rect fill=' #fff'></rect></svg>" - }, - - { - "name": "uri_ref_with_space_in svg_attribute", - "input": "<svg><rect fill='url(\n#foo)' />", - "output": "<svg><rect fill='url(\n#foo)'></rect></svg>" - }, - - { - "name": "absolute_uri_ref_with_space_in svg_attribute", - "input": "<svg><rect fill=\"url(\nhttp://bad.com/)\" />", - "output": "<svg><rect fill=' '></rect></svg>" - }, - - { - "name": "allow_html5_image_tag", - "input": "<image src='foo' />", - "output": "<img src='foo'/>" - }, - - { - "name": "style_attr_end_with_nothing", - "input": "<div style=\"color: blue\" />", - "output": "<div style='color: blue;'></div>" - }, - - { - "name": "style_attr_end_with_space", - "input": "<div style=\"color: blue \" />", - "output": "<div style='color: blue ;'></div>" - }, - - { - "name": "style_attr_end_with_semicolon", - "input": "<div style=\"color: blue;\" />", - "output": "<div style='color: blue;'></div>" - }, - - { - "name": "style_attr_end_with_semicolon_space", - "input": "<div style=\"color: blue; \" />", - "output": "<div style='color: blue;'></div>" - }, - - { - "name": "attributes_with_embedded_quotes", - "input": "<img src=doesntexist.jpg\"'onerror=\"alert(1) />", - "output": "<img src='doesntexist.jpg\"'onerror=\"alert(1)'/>" - }, - - { - "name": "attributes_with_embedded_quotes_II", - "input": "<img src=notthere.jpg\"\"onerror=\"alert(2) />", - "output": "<img src='notthere.jpg\"\"onerror=\"alert(2)'/>" - } -] diff --git a/libs/html5lib/tests/sanitizer.py b/libs/html5lib/tests/sanitizer.py deleted file mode 100644 index bb4834214..000000000 --- a/libs/html5lib/tests/sanitizer.py +++ /dev/null @@ -1,51 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -import codecs -import json - -import pytest - -from html5lib import parseFragment, serialize - - -class SanitizerFile(pytest.File): - def collect(self): - with codecs.open(str(self.fspath), "r", encoding="utf-8") as fp: - tests = json.load(fp) - for i, test in enumerate(tests): - yield SanitizerTest(str(i), self, test=test) - - -class SanitizerTest(pytest.Item): - def __init__(self, name, parent, test): - super(SanitizerTest, self).__init__(name, parent) - self.obj = lambda: 1 # this is to hack around skipif needing a function! - self.test = test - - def runtest(self): - input = self.test["input"] - expected = self.test["output"] - - parsed = parseFragment(input) - with pytest.deprecated_call(): - serialized = serialize(parsed, - sanitize=True, - omit_optional_tags=False, - use_trailing_solidus=True, - space_before_trailing_solidus=False, - quote_attr_values="always", - quote_char="'", - alphabetical_attributes=True) - errorMsg = "\n".join(["\n\nInput:", input, - "\nExpected:", expected, - "\nReceived:", serialized]) - assert expected == serialized, errorMsg - - def repr_failure(self, excinfo): - traceback = excinfo.traceback - ntraceback = traceback.cut(path=__file__) - excinfo.traceback = ntraceback.filter() - - return excinfo.getrepr(funcargs=True, - showlocals=False, - style="short", tbfilter=False) diff --git a/libs/html5lib/tests/serializer-testdata/core.test b/libs/html5lib/tests/serializer-testdata/core.test deleted file mode 100644 index 55294b683..000000000 --- a/libs/html5lib/tests/serializer-testdata/core.test +++ /dev/null @@ -1,395 +0,0 @@ -{ - "tests": [ - { - "expected": [ - "<span title='test \"with\" &quot;'>" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "span", - [ - { - "namespace": null, - "name": "title", - "value": "test \"with\" "" - } - ] - ] - ], - "description": "proper attribute value escaping" - }, - { - "expected": [ - "<span title=foo>" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "span", - [ - { - "namespace": null, - "name": "title", - "value": "foo" - } - ] - ] - ], - "description": "proper attribute value non-quoting" - }, - { - "expected": [ - "<span title=\"foo<bar\">" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "span", - [ - { - "namespace": null, - "name": "title", - "value": "foo<bar" - } - ] - ] - ], - "description": "proper attribute value non-quoting (with <)" - }, - { - "expected": [ - "<span title=\"foo=bar\">" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "span", - [ - { - "namespace": null, - "name": "title", - "value": "foo=bar" - } - ] - ] - ], - "description": "proper attribute value quoting (with =)" - }, - { - "expected": [ - "<span title=\"foo>bar\">" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "span", - [ - { - "namespace": null, - "name": "title", - "value": "foo>bar" - } - ] - ] - ], - "description": "proper attribute value quoting (with >)" - }, - { - "expected": [ - "<span title='foo\"bar'>" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "span", - [ - { - "namespace": null, - "name": "title", - "value": "foo\"bar" - } - ] - ] - ], - "description": "proper attribute value quoting (with \")" - }, - { - "expected": [ - "<span title=\"foo'bar\">" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "span", - [ - { - "namespace": null, - "name": "title", - "value": "foo'bar" - } - ] - ] - ], - "description": "proper attribute value quoting (with ')" - }, - { - "expected": [ - "<span title=\"foo'bar"baz\">" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "span", - [ - { - "namespace": null, - "name": "title", - "value": "foo'bar\"baz" - } - ] - ] - ], - "description": "proper attribute value quoting (with both \" and ')" - }, - { - "expected": [ - "<span title=\"foo bar\">" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "span", - [ - { - "namespace": null, - "name": "title", - "value": "foo bar" - } - ] - ] - ], - "description": "proper attribute value quoting (with space)" - }, - { - "expected": [ - "<span title=\"foo\tbar\">" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "span", - [ - { - "namespace": null, - "name": "title", - "value": "foo\tbar" - } - ] - ] - ], - "description": "proper attribute value quoting (with tab)" - }, - { - "expected": [ - "<span title=\"foo\nbar\">" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "span", - [ - { - "namespace": null, - "name": "title", - "value": "foo\nbar" - } - ] - ] - ], - "description": "proper attribute value quoting (with LF)" - }, - { - "expected": [ - "<span title=\"foo\rbar\">" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "span", - [ - { - "namespace": null, - "name": "title", - "value": "foo\rbar" - } - ] - ] - ], - "description": "proper attribute value quoting (with CR)" - }, - { - "expected": [ - "<span title=\"foo\u000bbar\">" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "span", - [ - { - "namespace": null, - "name": "title", - "value": "foo\u000bbar" - } - ] - ] - ], - "description": "proper attribute value non-quoting (with linetab)" - }, - { - "expected": [ - "<span title=\"foo\fbar\">" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "span", - [ - { - "namespace": null, - "name": "title", - "value": "foo\fbar" - } - ] - ] - ], - "description": "proper attribute value quoting (with form feed)" - }, - { - "expected": [ - "<img>" - ], - "input": [ - [ - "EmptyTag", - "img", - {} - ] - ], - "description": "void element (as EmptyTag token)" - }, - { - "expected": [ - "<!DOCTYPE foo>" - ], - "input": [ - [ - "Doctype", - "foo" - ] - ], - "description": "doctype in error" - }, - { - "expected": [ - "a<b>c&d" - ], - "input": [ - [ - "Characters", - "a<b>c&d" - ] - ], - "description": "character data", - "options": { - "encoding": "utf-8" - } - }, - { - "expected": [ - "<script>a<b>c&d" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "script", - {} - ], - [ - "Characters", - "a<b>c&d" - ] - ], - "description": "rcdata" - }, - { - "expected": [ - "<!DOCTYPE HTML>" - ], - "input": [ - [ - "Doctype", - "HTML" - ] - ], - "description": "doctype" - }, - { - "expected": [ - "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">" - ], - "input": [ - [ - "Doctype", - "HTML", - "-//W3C//DTD HTML 4.01//EN", - "http://www.w3.org/TR/html4/strict.dtd" - ] - ], - "description": "HTML 4.01 DOCTYPE" - }, - { - "expected": [ - "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\">" - ], - "input": [ - [ - "Doctype", - "HTML", - "-//W3C//DTD HTML 4.01//EN" - ] - ], - "description": "HTML 4.01 DOCTYPE without system identifier" - }, - { - "expected": [ - "<!DOCTYPE html SYSTEM \"http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd\">" - ], - "input": [ - [ - "Doctype", - "html", - "", - "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd" - ] - ], - "description": "IBM DOCTYPE without public identifier" - } - ] -} diff --git a/libs/html5lib/tests/serializer-testdata/injectmeta.test b/libs/html5lib/tests/serializer-testdata/injectmeta.test deleted file mode 100644 index 399590c3f..000000000 --- a/libs/html5lib/tests/serializer-testdata/injectmeta.test +++ /dev/null @@ -1,350 +0,0 @@ -{ - "tests": [ - { - "expected": [ - "" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "head", - {} - ], - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "head" - ] - ], - "description": "no encoding", - "options": { - "inject_meta_charset": true - } - }, - { - "expected": [ - "<meta charset=utf-8>" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "head", - {} - ], - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "head" - ] - ], - "description": "empytag head", - "options": { - "encoding": "utf-8", - "inject_meta_charset": true - } - }, - { - "expected": [ - "<meta charset=utf-8><title>foo</title>" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "head", - {} - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "title", - {} - ], - [ - "Characters", - "foo" - ], - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "title" - ], - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "head" - ] - ], - "description": "head w/title", - "options": { - "encoding": "utf-8", - "inject_meta_charset": true - } - }, - { - "expected": [ - "<meta charset=utf-8>" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "head", - {} - ], - [ - "EmptyTag", - "meta", - [ - { - "namespace": null, - "name": "charset", - "value": "ascii" - } - ] - ], - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "head" - ] - ], - "description": "head w/meta-charset", - "options": { - "encoding": "utf-8", - "inject_meta_charset": true - } - }, - { - "expected": [ - "<meta charset=utf-8><meta charset=utf-8>", - "<head><meta charset=utf-8><meta charset=ascii>" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "head", - {} - ], - [ - "EmptyTag", - "meta", - [ - { - "namespace": null, - "name": "charset", - "value": "ascii" - } - ] - ], - [ - "EmptyTag", - "meta", - [ - { - "namespace": null, - "name": "charset", - "value": "ascii" - } - ] - ], - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "head" - ] - ], - "description": "head w/ two meta-charset", - "options": { - "encoding": "utf-8", - "inject_meta_charset": true - } - }, - { - "expected": [ - "<meta charset=utf-8><meta content=noindex name=robots>" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "head", - {} - ], - [ - "EmptyTag", - "meta", - [ - { - "namespace": null, - "name": "name", - "value": "robots" - }, - { - "namespace": null, - "name": "content", - "value": "noindex" - } - ] - ], - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "head" - ] - ], - "description": "head w/robots", - "options": { - "encoding": "utf-8", - "inject_meta_charset": true - } - }, - { - "expected": [ - "<meta content=noindex name=robots><meta charset=utf-8>" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "head", - {} - ], - [ - "EmptyTag", - "meta", - [ - { - "namespace": null, - "name": "name", - "value": "robots" - }, - { - "namespace": null, - "name": "content", - "value": "noindex" - } - ] - ], - [ - "EmptyTag", - "meta", - [ - { - "namespace": null, - "name": "charset", - "value": "ascii" - } - ] - ], - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "head" - ] - ], - "description": "head w/robots & charset", - "options": { - "encoding": "utf-8", - "inject_meta_charset": true - } - }, - { - "expected": [ - "<meta content=\"text/html; charset=utf-8\" http-equiv=content-type>" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "head", - {} - ], - [ - "EmptyTag", - "meta", - [ - { - "namespace": null, - "name": "http-equiv", - "value": "content-type" - }, - { - "namespace": null, - "name": "content", - "value": "text/html; charset=ascii" - } - ] - ], - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "head" - ] - ], - "description": "head w/ charset in http-equiv content-type", - "options": { - "encoding": "utf-8", - "inject_meta_charset": true - } - }, - { - "expected": [ - "<meta content=noindex name=robots><meta content=\"text/html; charset=utf-8\" http-equiv=content-type>" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "head", - {} - ], - [ - "EmptyTag", - "meta", - [ - { - "namespace": null, - "name": "name", - "value": "robots" - }, - { - "namespace": null, - "name": "content", - "value": "noindex" - } - ] - ], - [ - "EmptyTag", - "meta", - [ - { - "namespace": null, - "name": "http-equiv", - "value": "content-type" - }, - { - "namespace": null, - "name": "content", - "value": "text/html; charset=ascii" - } - ] - ], - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "head" - ] - ], - "description": "head w/robots & charset in http-equiv content-type", - "options": { - "encoding": "utf-8", - "inject_meta_charset": true - } - } - ] -}
\ No newline at end of file diff --git a/libs/html5lib/tests/serializer-testdata/optionaltags.test b/libs/html5lib/tests/serializer-testdata/optionaltags.test deleted file mode 100644 index e67725ca2..000000000 --- a/libs/html5lib/tests/serializer-testdata/optionaltags.test +++ /dev/null @@ -1,3254 +0,0 @@ -{ - "tests": [ - { - "expected": [ - "<html lang=en>foo" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "html", - [ - { - "namespace": null, - "name": "lang", - "value": "en" - } - ] - ], - [ - "Characters", - "foo" - ] - ], - "description": "html start-tag followed by text, with attributes" - }, - { - "expected": [ - "<html><!--foo-->" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "html", - {} - ], - [ - "Comment", - "foo" - ] - ], - "description": "html start-tag followed by comment" - }, - { - "expected": [ - "<html> foo" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "html", - {} - ], - [ - "Characters", - " foo" - ] - ], - "description": "html start-tag followed by space character" - }, - { - "expected": [ - "foo" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "html", - {} - ], - [ - "Characters", - "foo" - ] - ], - "description": "html start-tag followed by text" - }, - { - "expected": [ - "<foo>" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "html", - {} - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "foo", - {} - ] - ], - "description": "html start-tag followed by start-tag" - }, - { - "expected": [ - "</foo>" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "html", - {} - ], - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "foo" - ] - ], - "description": "html start-tag followed by end-tag" - }, - { - "expected": [ - "" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "html", - {} - ] - ], - "description": "html start-tag at EOF (shouldn't ever happen?!)" - }, - { - "expected": [ - "</html><!--foo-->" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "html" - ], - [ - "Comment", - "foo" - ] - ], - "description": "html end-tag followed by comment" - }, - { - "expected": [ - "</html> foo" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "html" - ], - [ - "Characters", - " foo" - ] - ], - "description": "html end-tag followed by space character" - }, - { - "expected": [ - "foo" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "html" - ], - [ - "Characters", - "foo" - ] - ], - "description": "html end-tag followed by text" - }, - { - "expected": [ - "<foo>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "html" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "foo", - {} - ] - ], - "description": "html end-tag followed by start-tag" - }, - { - "expected": [ - "</foo>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "html" - ], - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "foo" - ] - ], - "description": "html end-tag followed by end-tag" - }, - { - "expected": [ - "" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "html" - ] - ], - "description": "html end-tag at EOF" - }, - { - "expected": [ - "<head><!--foo-->" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "head", - {} - ], - [ - "Comment", - "foo" - ] - ], - "description": "head start-tag followed by comment" - }, - { - "expected": [ - "<head> foo" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "head", - {} - ], - [ - "Characters", - " foo" - ] - ], - "description": "head start-tag followed by space character" - }, - { - "expected": [ - "<head>foo" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "head", - {} - ], - [ - "Characters", - "foo" - ] - ], - "description": "head start-tag followed by text" - }, - { - "expected": [ - "<foo>" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "head", - {} - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "foo", - {} - ] - ], - "description": "head start-tag followed by start-tag" - }, - { - "expected": [ - "<head></foo>", - "</foo>" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "head", - {} - ], - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "foo" - ] - ], - "description": "head start-tag followed by end-tag (shouldn't ever happen?!)" - }, - { - "expected": [ - "" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "head", - {} - ], - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "head" - ] - ], - "description": "empty head element" - }, - { - "expected": [ - "<meta>" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "head", - {} - ], - [ - "EmptyTag", - "meta", - {} - ] - ], - "description": "head start-tag followed by empty-tag" - }, - { - "expected": [ - "<head>", - "" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "head", - {} - ] - ], - "description": "head start-tag at EOF (shouldn't ever happen?!)" - }, - { - "expected": [ - "</head><!--foo-->" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "head" - ], - [ - "Comment", - "foo" - ] - ], - "description": "head end-tag followed by comment" - }, - { - "expected": [ - "</head> foo" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "head" - ], - [ - "Characters", - " foo" - ] - ], - "description": "head end-tag followed by space character" - }, - { - "expected": [ - "foo" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "head" - ], - [ - "Characters", - "foo" - ] - ], - "description": "head end-tag followed by text" - }, - { - "expected": [ - "<foo>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "head" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "foo", - {} - ] - ], - "description": "head end-tag followed by start-tag" - }, - { - "expected": [ - "</foo>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "head" - ], - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "foo" - ] - ], - "description": "head end-tag followed by end-tag" - }, - { - "expected": [ - "" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "head" - ] - ], - "description": "head end-tag at EOF" - }, - { - "expected": [ - "<body><!--foo-->" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "body", - {} - ], - [ - "Comment", - "foo" - ] - ], - "description": "body start-tag followed by comment" - }, - { - "expected": [ - "<body> foo" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "body", - {} - ], - [ - "Characters", - " foo" - ] - ], - "description": "body start-tag followed by space character" - }, - { - "expected": [ - "foo" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "body", - {} - ], - [ - "Characters", - "foo" - ] - ], - "description": "body start-tag followed by text" - }, - { - "expected": [ - "<foo>" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "body", - {} - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "foo", - {} - ] - ], - "description": "body start-tag followed by start-tag" - }, - { - "expected": [ - "</foo>" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "body", - {} - ], - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "foo" - ] - ], - "description": "body start-tag followed by end-tag" - }, - { - "expected": [ - "" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "body", - {} - ] - ], - "description": "body start-tag at EOF (shouldn't ever happen?!)" - }, - { - "expected": [ - "</body><!--foo-->" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "body" - ], - [ - "Comment", - "foo" - ] - ], - "description": "body end-tag followed by comment" - }, - { - "expected": [ - "</body> foo" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "body" - ], - [ - "Characters", - " foo" - ] - ], - "description": "body end-tag followed by space character" - }, - { - "expected": [ - "foo" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "body" - ], - [ - "Characters", - "foo" - ] - ], - "description": "body end-tag followed by text" - }, - { - "expected": [ - "<foo>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "body" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "foo", - {} - ] - ], - "description": "body end-tag followed by start-tag" - }, - { - "expected": [ - "</foo>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "body" - ], - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "foo" - ] - ], - "description": "body end-tag followed by end-tag" - }, - { - "expected": [ - "" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "body" - ] - ], - "description": "body end-tag at EOF" - }, - { - "expected": [ - "</li><!--foo-->" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "li" - ], - [ - "Comment", - "foo" - ] - ], - "description": "li end-tag followed by comment" - }, - { - "expected": [ - "</li> foo" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "li" - ], - [ - "Characters", - " foo" - ] - ], - "description": "li end-tag followed by space character" - }, - { - "expected": [ - "</li>foo" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "li" - ], - [ - "Characters", - "foo" - ] - ], - "description": "li end-tag followed by text" - }, - { - "expected": [ - "</li><foo>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "li" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "foo", - {} - ] - ], - "description": "li end-tag followed by start-tag" - }, - { - "expected": [ - "<li>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "li" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "li", - {} - ] - ], - "description": "li end-tag followed by li start-tag" - }, - { - "expected": [ - "</foo>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "li" - ], - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "foo" - ] - ], - "description": "li end-tag followed by end-tag" - }, - { - "expected": [ - "" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "li" - ] - ], - "description": "li end-tag at EOF" - }, - { - "expected": [ - "</dt><!--foo-->" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "dt" - ], - [ - "Comment", - "foo" - ] - ], - "description": "dt end-tag followed by comment" - }, - { - "expected": [ - "</dt> foo" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "dt" - ], - [ - "Characters", - " foo" - ] - ], - "description": "dt end-tag followed by space character" - }, - { - "expected": [ - "</dt>foo" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "dt" - ], - [ - "Characters", - "foo" - ] - ], - "description": "dt end-tag followed by text" - }, - { - "expected": [ - "</dt><foo>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "dt" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "foo", - {} - ] - ], - "description": "dt end-tag followed by start-tag" - }, - { - "expected": [ - "<dt>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "dt" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "dt", - {} - ] - ], - "description": "dt end-tag followed by dt start-tag" - }, - { - "expected": [ - "<dd>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "dt" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "dd", - {} - ] - ], - "description": "dt end-tag followed by dd start-tag" - }, - { - "expected": [ - "</dt></foo>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "dt" - ], - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "foo" - ] - ], - "description": "dt end-tag followed by end-tag" - }, - { - "expected": [ - "</dt>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "dt" - ] - ], - "description": "dt end-tag at EOF" - }, - { - "expected": [ - "</dd><!--foo-->" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "dd" - ], - [ - "Comment", - "foo" - ] - ], - "description": "dd end-tag followed by comment" - }, - { - "expected": [ - "</dd> foo" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "dd" - ], - [ - "Characters", - " foo" - ] - ], - "description": "dd end-tag followed by space character" - }, - { - "expected": [ - "</dd>foo" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "dd" - ], - [ - "Characters", - "foo" - ] - ], - "description": "dd end-tag followed by text" - }, - { - "expected": [ - "</dd><foo>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "dd" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "foo", - {} - ] - ], - "description": "dd end-tag followed by start-tag" - }, - { - "expected": [ - "<dd>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "dd" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "dd", - {} - ] - ], - "description": "dd end-tag followed by dd start-tag" - }, - { - "expected": [ - "<dt>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "dd" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "dt", - {} - ] - ], - "description": "dd end-tag followed by dt start-tag" - }, - { - "expected": [ - "</foo>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "dd" - ], - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "foo" - ] - ], - "description": "dd end-tag followed by end-tag" - }, - { - "expected": [ - "" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "dd" - ] - ], - "description": "dd end-tag at EOF" - }, - { - "expected": [ - "</p><!--foo-->" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "p" - ], - [ - "Comment", - "foo" - ] - ], - "description": "p end-tag followed by comment" - }, - { - "expected": [ - "</p> foo" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "p" - ], - [ - "Characters", - " foo" - ] - ], - "description": "p end-tag followed by space character" - }, - { - "expected": [ - "</p>foo" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "p" - ], - [ - "Characters", - "foo" - ] - ], - "description": "p end-tag followed by text" - }, - { - "expected": [ - "</p><foo>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "p" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "foo", - {} - ] - ], - "description": "p end-tag followed by start-tag" - }, - { - "expected": [ - "<address>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "p" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "address", - {} - ] - ], - "description": "p end-tag followed by address start-tag" - }, - { - "expected": [ - "<article>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "p" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "article", - {} - ] - ], - "description": "p end-tag followed by article start-tag" - }, - { - "expected": [ - "<aside>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "p" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "aside", - {} - ] - ], - "description": "p end-tag followed by aside start-tag" - }, - { - "expected": [ - "<blockquote>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "p" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "blockquote", - {} - ] - ], - "description": "p end-tag followed by blockquote start-tag" - }, - { - "expected": [ - "<datagrid>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "p" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "datagrid", - {} - ] - ], - "description": "p end-tag followed by datagrid start-tag" - }, - { - "expected": [ - "<dialog>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "p" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "dialog", - {} - ] - ], - "description": "p end-tag followed by dialog start-tag" - }, - { - "expected": [ - "<dir>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "p" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "dir", - {} - ] - ], - "description": "p end-tag followed by dir start-tag" - }, - { - "expected": [ - "<div>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "p" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "div", - {} - ] - ], - "description": "p end-tag followed by div start-tag" - }, - { - "expected": [ - "<dl>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "p" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "dl", - {} - ] - ], - "description": "p end-tag followed by dl start-tag" - }, - { - "expected": [ - "<fieldset>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "p" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "fieldset", - {} - ] - ], - "description": "p end-tag followed by fieldset start-tag" - }, - { - "expected": [ - "<footer>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "p" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "footer", - {} - ] - ], - "description": "p end-tag followed by footer start-tag" - }, - { - "expected": [ - "<form>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "p" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "form", - {} - ] - ], - "description": "p end-tag followed by form start-tag" - }, - { - "expected": [ - "<h1>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "p" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "h1", - {} - ] - ], - "description": "p end-tag followed by h1 start-tag" - }, - { - "expected": [ - "<h2>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "p" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "h2", - {} - ] - ], - "description": "p end-tag followed by h2 start-tag" - }, - { - "expected": [ - "<h3>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "p" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "h3", - {} - ] - ], - "description": "p end-tag followed by h3 start-tag" - }, - { - "expected": [ - "<h4>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "p" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "h4", - {} - ] - ], - "description": "p end-tag followed by h4 start-tag" - }, - { - "expected": [ - "<h5>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "p" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "h5", - {} - ] - ], - "description": "p end-tag followed by h5 start-tag" - }, - { - "expected": [ - "<h6>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "p" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "h6", - {} - ] - ], - "description": "p end-tag followed by h6 start-tag" - }, - { - "expected": [ - "<header>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "p" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "header", - {} - ] - ], - "description": "p end-tag followed by header start-tag" - }, - { - "expected": [ - "<hr>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "p" - ], - [ - "EmptyTag", - "hr", - {} - ] - ], - "description": "p end-tag followed by hr empty-tag" - }, - { - "expected": [ - "<menu>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "p" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "menu", - {} - ] - ], - "description": "p end-tag followed by menu start-tag" - }, - { - "expected": [ - "<nav>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "p" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "nav", - {} - ] - ], - "description": "p end-tag followed by nav start-tag" - }, - { - "expected": [ - "<ol>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "p" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "ol", - {} - ] - ], - "description": "p end-tag followed by ol start-tag" - }, - { - "expected": [ - "<p>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "p" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "p", - {} - ] - ], - "description": "p end-tag followed by p start-tag" - }, - { - "expected": [ - "<pre>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "p" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "pre", - {} - ] - ], - "description": "p end-tag followed by pre start-tag" - }, - { - "expected": [ - "<section>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "p" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "section", - {} - ] - ], - "description": "p end-tag followed by section start-tag" - }, - { - "expected": [ - "<table>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "p" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "table", - {} - ] - ], - "description": "p end-tag followed by table start-tag" - }, - { - "expected": [ - "<ul>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "p" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "ul", - {} - ] - ], - "description": "p end-tag followed by ul start-tag" - }, - { - "expected": [ - "</foo>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "p" - ], - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "foo" - ] - ], - "description": "p end-tag followed by end-tag" - }, - { - "expected": [ - "" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "p" - ] - ], - "description": "p end-tag at EOF" - }, - { - "expected": [ - "</optgroup><!--foo-->" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "optgroup" - ], - [ - "Comment", - "foo" - ] - ], - "description": "optgroup end-tag followed by comment" - }, - { - "expected": [ - "</optgroup> foo" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "optgroup" - ], - [ - "Characters", - " foo" - ] - ], - "description": "optgroup end-tag followed by space character" - }, - { - "expected": [ - "</optgroup>foo" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "optgroup" - ], - [ - "Characters", - "foo" - ] - ], - "description": "optgroup end-tag followed by text" - }, - { - "expected": [ - "</optgroup><foo>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "optgroup" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "foo", - {} - ] - ], - "description": "optgroup end-tag followed by start-tag" - }, - { - "expected": [ - "<optgroup>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "optgroup" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "optgroup", - {} - ] - ], - "description": "optgroup end-tag followed by optgroup start-tag" - }, - { - "expected": [ - "</foo>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "optgroup" - ], - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "foo" - ] - ], - "description": "optgroup end-tag followed by end-tag" - }, - { - "expected": [ - "" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "optgroup" - ] - ], - "description": "optgroup end-tag at EOF" - }, - { - "expected": [ - "</option><!--foo-->" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "option" - ], - [ - "Comment", - "foo" - ] - ], - "description": "option end-tag followed by comment" - }, - { - "expected": [ - "</option> foo" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "option" - ], - [ - "Characters", - " foo" - ] - ], - "description": "option end-tag followed by space character" - }, - { - "expected": [ - "</option>foo" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "option" - ], - [ - "Characters", - "foo" - ] - ], - "description": "option end-tag followed by text" - }, - { - "expected": [ - "<optgroup>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "option" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "optgroup", - {} - ] - ], - "description": "option end-tag followed by optgroup start-tag" - }, - { - "expected": [ - "</option><foo>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "option" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "foo", - {} - ] - ], - "description": "option end-tag followed by start-tag" - }, - { - "expected": [ - "<option>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "option" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "option", - {} - ] - ], - "description": "option end-tag followed by option start-tag" - }, - { - "expected": [ - "</foo>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "option" - ], - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "foo" - ] - ], - "description": "option end-tag followed by end-tag" - }, - { - "expected": [ - "" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "option" - ] - ], - "description": "option end-tag at EOF" - }, - { - "expected": [ - "<colgroup><!--foo-->" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "colgroup", - {} - ], - [ - "Comment", - "foo" - ] - ], - "description": "colgroup start-tag followed by comment" - }, - { - "expected": [ - "<colgroup> foo" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "colgroup", - {} - ], - [ - "Characters", - " foo" - ] - ], - "description": "colgroup start-tag followed by space character" - }, - { - "expected": [ - "<colgroup>foo" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "colgroup", - {} - ], - [ - "Characters", - "foo" - ] - ], - "description": "colgroup start-tag followed by text" - }, - { - "expected": [ - "<colgroup><foo>" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "colgroup", - {} - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "foo", - {} - ] - ], - "description": "colgroup start-tag followed by start-tag" - }, - { - "expected": [ - "<table><col>" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "table", - {} - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "colgroup", - {} - ], - [ - "EmptyTag", - "col", - {} - ] - ], - "description": "first colgroup in a table with a col child" - }, - { - "expected": [ - "</colgroup><col>", - "<colgroup><col>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "colgroup" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "colgroup", - {} - ], - [ - "EmptyTag", - "http://www.w3.org/1999/xhtml", - "col", - {} - ] - ], - "description": "colgroup with a col child, following another colgroup" - }, - { - "expected": [ - "<colgroup></foo>" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "colgroup", - {} - ], - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "foo" - ] - ], - "description": "colgroup start-tag followed by end-tag" - }, - { - "expected": [ - "<colgroup>" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "colgroup", - {} - ] - ], - "description": "colgroup start-tag at EOF" - }, - { - "expected": [ - "</colgroup><!--foo-->" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "colgroup" - ], - [ - "Comment", - "foo" - ] - ], - "description": "colgroup end-tag followed by comment" - }, - { - "expected": [ - "</colgroup> foo" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "colgroup" - ], - [ - "Characters", - " foo" - ] - ], - "description": "colgroup end-tag followed by space character" - }, - { - "expected": [ - "foo" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "colgroup" - ], - [ - "Characters", - "foo" - ] - ], - "description": "colgroup end-tag followed by text" - }, - { - "expected": [ - "<foo>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "colgroup" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "foo", - {} - ] - ], - "description": "colgroup end-tag followed by start-tag" - }, - { - "expected": [ - "</foo>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "colgroup" - ], - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "foo" - ] - ], - "description": "colgroup end-tag followed by end-tag" - }, - { - "expected": [ - "" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "colgroup" - ] - ], - "description": "colgroup end-tag at EOF" - }, - { - "expected": [ - "</thead><!--foo-->" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "thead" - ], - [ - "Comment", - "foo" - ] - ], - "description": "thead end-tag followed by comment" - }, - { - "expected": [ - "</thead> foo" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "thead" - ], - [ - "Characters", - " foo" - ] - ], - "description": "thead end-tag followed by space character" - }, - { - "expected": [ - "</thead>foo" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "thead" - ], - [ - "Characters", - "foo" - ] - ], - "description": "thead end-tag followed by text" - }, - { - "expected": [ - "</thead><foo>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "thead" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "foo", - {} - ] - ], - "description": "thead end-tag followed by start-tag" - }, - { - "expected": [ - "<tbody>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "thead" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "tbody", - {} - ] - ], - "description": "thead end-tag followed by tbody start-tag" - }, - { - "expected": [ - "<tfoot>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "thead" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "tfoot", - {} - ] - ], - "description": "thead end-tag followed by tfoot start-tag" - }, - { - "expected": [ - "</thead></foo>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "thead" - ], - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "foo" - ] - ], - "description": "thead end-tag followed by end-tag" - }, - { - "expected": [ - "</thead>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "thead" - ] - ], - "description": "thead end-tag at EOF" - }, - { - "expected": [ - "<tbody><!--foo-->" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "tbody", - {} - ], - [ - "Comment", - "foo" - ] - ], - "description": "tbody start-tag followed by comment" - }, - { - "expected": [ - "<tbody> foo" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "tbody", - {} - ], - [ - "Characters", - " foo" - ] - ], - "description": "tbody start-tag followed by space character" - }, - { - "expected": [ - "<tbody>foo" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "tbody", - {} - ], - [ - "Characters", - "foo" - ] - ], - "description": "tbody start-tag followed by text" - }, - { - "expected": [ - "<tbody><foo>" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "tbody", - {} - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "foo", - {} - ] - ], - "description": "tbody start-tag followed by start-tag" - }, - { - "expected": [ - "<table><tr>" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "table", - {} - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "tbody", - {} - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "tr", - {} - ] - ], - "description": "first tbody in a table with a tr child" - }, - { - "expected": [ - "<tbody><tr>", - "</tbody><tr>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "tbody" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "tbody", - {} - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "tr", - {} - ] - ], - "description": "tbody with a tr child, following another tbody" - }, - { - "expected": [ - "<tbody><tr>", - "</thead><tr>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "thead" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "tbody", - {} - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "tr", - {} - ] - ], - "description": "tbody with a tr child, following a thead" - }, - { - "expected": [ - "<tbody><tr>", - "</tfoot><tr>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "tfoot" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "tbody", - {} - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "tr", - {} - ] - ], - "description": "tbody with a tr child, following a tfoot" - }, - { - "expected": [ - "<tbody></foo>" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "tbody", - {} - ], - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "foo" - ] - ], - "description": "tbody start-tag followed by end-tag" - }, - { - "expected": [ - "<tbody>" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "tbody", - {} - ] - ], - "description": "tbody start-tag at EOF" - }, - { - "expected": [ - "</tbody><!--foo-->" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "tbody" - ], - [ - "Comment", - "foo" - ] - ], - "description": "tbody end-tag followed by comment" - }, - { - "expected": [ - "</tbody> foo" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "tbody" - ], - [ - "Characters", - " foo" - ] - ], - "description": "tbody end-tag followed by space character" - }, - { - "expected": [ - "</tbody>foo" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "tbody" - ], - [ - "Characters", - "foo" - ] - ], - "description": "tbody end-tag followed by text" - }, - { - "expected": [ - "</tbody><foo>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "tbody" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "foo", - {} - ] - ], - "description": "tbody end-tag followed by start-tag" - }, - { - "expected": [ - "<tbody>", - "</tbody>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "tbody" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "tbody", - {} - ] - ], - "description": "tbody end-tag followed by tbody start-tag" - }, - { - "expected": [ - "<tfoot>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "tbody" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "tfoot", - {} - ] - ], - "description": "tbody end-tag followed by tfoot start-tag" - }, - { - "expected": [ - "</foo>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "tbody" - ], - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "foo" - ] - ], - "description": "tbody end-tag followed by end-tag" - }, - { - "expected": [ - "" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "tbody" - ] - ], - "description": "tbody end-tag at EOF" - }, - { - "expected": [ - "</tfoot><!--foo-->" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "tfoot" - ], - [ - "Comment", - "foo" - ] - ], - "description": "tfoot end-tag followed by comment" - }, - { - "expected": [ - "</tfoot> foo" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "tfoot" - ], - [ - "Characters", - " foo" - ] - ], - "description": "tfoot end-tag followed by space character" - }, - { - "expected": [ - "</tfoot>foo" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "tfoot" - ], - [ - "Characters", - "foo" - ] - ], - "description": "tfoot end-tag followed by text" - }, - { - "expected": [ - "</tfoot><foo>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "tfoot" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "foo", - {} - ] - ], - "description": "tfoot end-tag followed by start-tag" - }, - { - "expected": [ - "<tbody>", - "</tfoot>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "tfoot" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "tbody", - {} - ] - ], - "description": "tfoot end-tag followed by tbody start-tag" - }, - { - "expected": [ - "</foo>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "tfoot" - ], - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "foo" - ] - ], - "description": "tfoot end-tag followed by end-tag" - }, - { - "expected": [ - "" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "tfoot" - ] - ], - "description": "tfoot end-tag at EOF" - }, - { - "expected": [ - "</tr><!--foo-->" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "tr" - ], - [ - "Comment", - "foo" - ] - ], - "description": "tr end-tag followed by comment" - }, - { - "expected": [ - "</tr> foo" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "tr" - ], - [ - "Characters", - " foo" - ] - ], - "description": "tr end-tag followed by space character" - }, - { - "expected": [ - "</tr>foo" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "tr" - ], - [ - "Characters", - "foo" - ] - ], - "description": "tr end-tag followed by text" - }, - { - "expected": [ - "</tr><foo>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "tr" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "foo", - {} - ] - ], - "description": "tr end-tag followed by start-tag" - }, - { - "expected": [ - "<tr>", - "</tr>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "tr" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "tr", - {} - ] - ], - "description": "tr end-tag followed by tr start-tag" - }, - { - "expected": [ - "</foo>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "tr" - ], - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "foo" - ] - ], - "description": "tr end-tag followed by end-tag" - }, - { - "expected": [ - "" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "tr" - ] - ], - "description": "tr end-tag at EOF" - }, - { - "expected": [ - "</td><!--foo-->" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "td" - ], - [ - "Comment", - "foo" - ] - ], - "description": "td end-tag followed by comment" - }, - { - "expected": [ - "</td> foo" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "td" - ], - [ - "Characters", - " foo" - ] - ], - "description": "td end-tag followed by space character" - }, - { - "expected": [ - "</td>foo" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "td" - ], - [ - "Characters", - "foo" - ] - ], - "description": "td end-tag followed by text" - }, - { - "expected": [ - "</td><foo>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "td" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "foo", - {} - ] - ], - "description": "td end-tag followed by start-tag" - }, - { - "expected": [ - "<td>", - "</td>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "td" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "td", - {} - ] - ], - "description": "td end-tag followed by td start-tag" - }, - { - "expected": [ - "<th>", - "</td>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "td" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "th", - {} - ] - ], - "description": "td end-tag followed by th start-tag" - }, - { - "expected": [ - "</foo>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "td" - ], - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "foo" - ] - ], - "description": "td end-tag followed by end-tag" - }, - { - "expected": [ - "" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "td" - ] - ], - "description": "td end-tag at EOF" - }, - { - "expected": [ - "</th><!--foo-->" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "th" - ], - [ - "Comment", - "foo" - ] - ], - "description": "th end-tag followed by comment" - }, - { - "expected": [ - "</th> foo" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "th" - ], - [ - "Characters", - " foo" - ] - ], - "description": "th end-tag followed by space character" - }, - { - "expected": [ - "</th>foo" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "th" - ], - [ - "Characters", - "foo" - ] - ], - "description": "th end-tag followed by text" - }, - { - "expected": [ - "</th><foo>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "th" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "foo", - {} - ] - ], - "description": "th end-tag followed by start-tag" - }, - { - "expected": [ - "<th>", - "</th>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "th" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "th", - {} - ] - ], - "description": "th end-tag followed by th start-tag" - }, - { - "expected": [ - "<td>", - "</th>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "th" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "td", - {} - ] - ], - "description": "th end-tag followed by td start-tag" - }, - { - "expected": [ - "</foo>" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "th" - ], - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "foo" - ] - ], - "description": "th end-tag followed by end-tag" - }, - { - "expected": [ - "" - ], - "input": [ - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "th" - ] - ], - "description": "th end-tag at EOF" - } - ] -}
\ No newline at end of file diff --git a/libs/html5lib/tests/serializer-testdata/options.test b/libs/html5lib/tests/serializer-testdata/options.test deleted file mode 100644 index a22eebfcf..000000000 --- a/libs/html5lib/tests/serializer-testdata/options.test +++ /dev/null @@ -1,334 +0,0 @@ -{ - "tests": [ - { - "expected": [ - "<span title='test 'with' quote_char'>" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "span", - [ - { - "namespace": null, - "name": "title", - "value": "test 'with' quote_char" - } - ] - ] - ], - "description": "quote_char=\"'\"", - "options": { - "quote_char": "'" - } - }, - { - "expected": [ - "<button disabled>" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "button", - [ - { - "namespace": null, - "name": "disabled", - "value": "disabled" - } - ] - ] - ], - "description": "quote_attr_values='always'", - "options": { - "quote_attr_values": "always" - } - }, - { - "expected": [ - "<div itemscope>" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "div", - [ - { - "namespace": null, - "name": "itemscope", - "value": "itemscope" - } - ] - ] - ], - "description": "quote_attr_values='always' with itemscope", - "options": { - "quote_attr_values": "always" - } - }, - { - "expected": [ - "<div irrelevant>" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "div", - [ - { - "namespace": null, - "name": "irrelevant", - "value": "irrelevant" - } - ] - ] - ], - "description": "quote_attr_values='always' with irrelevant", - "options": { - "quote_attr_values": "always" - } - }, - { - "expected": [ - "<div class=\"foo\">" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "div", - [ - { - "namespace": null, - "name": "class", - "value": "foo" - } - ] - ] - ], - "description": "non-minimized quote_attr_values='always'", - "options": { - "quote_attr_values": "always" - } - }, - { - "expected": [ - "<div class=foo>" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "div", - [ - { - "namespace": null, - "name": "class", - "value": "foo" - } - ] - ] - ], - "description": "non-minimized quote_attr_values='legacy'", - "options": { - "quote_attr_values": "legacy" - } - }, - { - "expected": [ - "<div class=foo>" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "div", - [ - { - "namespace": null, - "name": "class", - "value": "foo" - } - ] - ] - ], - "description": "non-minimized quote_attr_values='spec'", - "options": { - "quote_attr_values": "spec" - } - }, - { - "expected": [ - "<img />" - ], - "input": [ - [ - "EmptyTag", - "img", - {} - ] - ], - "description": "use_trailing_solidus=true with void element", - "options": { - "use_trailing_solidus": true - } - }, - { - "expected": [ - "<div>" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "div", - {} - ] - ], - "description": "use_trailing_solidus=true with non-void element", - "options": { - "use_trailing_solidus": true - } - }, - { - "expected": [ - "<div itemscope=itemscope>" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "div", - [ - { - "namespace": null, - "name": "itemscope", - "value": "itemscope" - } - ] - ] - ], - "description": "minimize_boolean_attributes=false", - "options": { - "minimize_boolean_attributes": false - } - }, - { - "expected": [ - "<div irrelevant=irrelevant>" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "div", - [ - { - "namespace": null, - "name": "irrelevant", - "value": "irrelevant" - } - ] - ] - ], - "description": "minimize_boolean_attributes=false", - "options": { - "minimize_boolean_attributes": false - } - }, - { - "expected": [ - "<div itemscope=\"\">" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "div", - [ - { - "namespace": null, - "name": "itemscope", - "value": "" - } - ] - ] - ], - "description": "minimize_boolean_attributes=false with empty value", - "options": { - "minimize_boolean_attributes": false - } - }, - { - "expected": [ - "<div irrelevant=\"\">" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "div", - [ - { - "namespace": null, - "name": "irrelevant", - "value": "" - } - ] - ] - ], - "description": "minimize_boolean_attributes=false with empty value", - "options": { - "minimize_boolean_attributes": false - } - }, - { - "expected": [ - "<a title=\"a<b>c&d\">" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "a", - [ - { - "namespace": null, - "name": "title", - "value": "a<b>c&d" - } - ] - ] - ], - "description": "escape less than signs in attribute values", - "options": { - "escape_lt_in_attrs": true - } - }, - { - "expected": [ - "<script>a<b>c&d" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "script", - {} - ], - [ - "Characters", - "a<b>c&d" - ] - ], - "description": "rcdata", - "options": { - "escape_rcdata": true - } - } - ] -}
\ No newline at end of file diff --git a/libs/html5lib/tests/serializer-testdata/whitespace.test b/libs/html5lib/tests/serializer-testdata/whitespace.test deleted file mode 100644 index dac3a69e2..000000000 --- a/libs/html5lib/tests/serializer-testdata/whitespace.test +++ /dev/null @@ -1,198 +0,0 @@ -{ - "tests": [ - { - "expected": [ - " foo" - ], - "input": [ - [ - "Characters", - "\t\r\n\f foo" - ] - ], - "description": "bare text with leading spaces", - "options": { - "strip_whitespace": true - } - }, - { - "expected": [ - "foo " - ], - "input": [ - [ - "Characters", - "foo \t\r\n\f" - ] - ], - "description": "bare text with trailing spaces", - "options": { - "strip_whitespace": true - } - }, - { - "expected": [ - "foo bar" - ], - "input": [ - [ - "Characters", - "foo \t\r\n\f bar" - ] - ], - "description": "bare text with inner spaces", - "options": { - "strip_whitespace": true - } - }, - { - "expected": [ - "<pre>\t\r\n\f foo \t\r\n\f bar \t\r\n\f</pre>" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "pre", - {} - ], - [ - "Characters", - "\t\r\n\f foo \t\r\n\f bar \t\r\n\f" - ], - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "pre" - ] - ], - "description": "text within <pre>", - "options": { - "strip_whitespace": true - } - }, - { - "expected": [ - "<pre>\t\r\n\f fo<span>o \t\r\n\f b</span>ar \t\r\n\f</pre>" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "pre", - {} - ], - [ - "Characters", - "\t\r\n\f fo" - ], - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "span", - {} - ], - [ - "Characters", - "o \t\r\n\f b" - ], - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "span" - ], - [ - "Characters", - "ar \t\r\n\f" - ], - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "pre" - ] - ], - "description": "text within <pre>, with inner markup", - "options": { - "strip_whitespace": true - } - }, - { - "expected": [ - "<textarea>\t\r\n\f foo \t\r\n\f bar \t\r\n\f</textarea>" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "textarea", - {} - ], - [ - "Characters", - "\t\r\n\f foo \t\r\n\f bar \t\r\n\f" - ], - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "textarea" - ] - ], - "description": "text within <textarea>", - "options": { - "strip_whitespace": true - } - }, - { - "expected": [ - "<script>\t\r\n\f foo \t\r\n\f bar \t\r\n\f</script>" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "script", - {} - ], - [ - "Characters", - "\t\r\n\f foo \t\r\n\f bar \t\r\n\f" - ], - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "script" - ] - ], - "description": "text within <script>", - "options": { - "strip_whitespace": true - } - }, - { - "expected": [ - "<style>\t\r\n\f foo \t\r\n\f bar \t\r\n\f</style>" - ], - "input": [ - [ - "StartTag", - "http://www.w3.org/1999/xhtml", - "style", - {} - ], - [ - "Characters", - "\t\r\n\f foo \t\r\n\f bar \t\r\n\f" - ], - [ - "EndTag", - "http://www.w3.org/1999/xhtml", - "style" - ] - ], - "description": "text within <style>", - "options": { - "strip_whitespace": true - } - } - ] -}
\ No newline at end of file diff --git a/libs/html5lib/tests/support.py b/libs/html5lib/tests/support.py deleted file mode 100644 index 9cd5afbe6..000000000 --- a/libs/html5lib/tests/support.py +++ /dev/null @@ -1,199 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -# pylint:disable=wrong-import-position - -import os -import sys -import codecs -import glob -import xml.sax.handler - -base_path = os.path.split(__file__)[0] - -test_dir = os.path.join(base_path, 'testdata') -sys.path.insert(0, os.path.abspath(os.path.join(base_path, - os.path.pardir, - os.path.pardir))) - -from html5lib import treebuilders, treewalkers, treeadapters # noqa -del base_path - -# Build a dict of available trees -treeTypes = {} - -# DOM impls -treeTypes["DOM"] = { - "builder": treebuilders.getTreeBuilder("dom"), - "walker": treewalkers.getTreeWalker("dom") -} - -# ElementTree impls -import xml.etree.ElementTree as ElementTree # noqa -treeTypes['ElementTree'] = { - "builder": treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True), - "walker": treewalkers.getTreeWalker("etree", ElementTree) -} - -try: - import xml.etree.cElementTree as cElementTree # noqa -except ImportError: - treeTypes['cElementTree'] = None -else: - # On Python 3.3 and above cElementTree is an alias, don't run them twice. - if cElementTree.Element is ElementTree.Element: - treeTypes['cElementTree'] = None - else: - treeTypes['cElementTree'] = { - "builder": treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True), - "walker": treewalkers.getTreeWalker("etree", cElementTree) - } - -try: - import lxml.etree as lxml # noqa -except ImportError: - treeTypes['lxml'] = None -else: - treeTypes['lxml'] = { - "builder": treebuilders.getTreeBuilder("lxml"), - "walker": treewalkers.getTreeWalker("lxml") - } - -# Genshi impls -try: - import genshi # noqa -except ImportError: - treeTypes["genshi"] = None -else: - treeTypes["genshi"] = { - "builder": treebuilders.getTreeBuilder("dom"), - "adapter": lambda tree: treeadapters.genshi.to_genshi(treewalkers.getTreeWalker("dom")(tree)), - "walker": treewalkers.getTreeWalker("genshi") - } - -# pylint:enable=wrong-import-position - - -def get_data_files(subdirectory, files='*.dat', search_dir=test_dir): - return sorted(glob.glob(os.path.join(search_dir, subdirectory, files))) - - -class DefaultDict(dict): - def __init__(self, default, *args, **kwargs): - self.default = default - dict.__init__(self, *args, **kwargs) - - def __getitem__(self, key): - return dict.get(self, key, self.default) - - -class TestData(object): - def __init__(self, filename, newTestHeading="data", encoding="utf8"): - if encoding is None: - self.f = open(filename, mode="rb") - else: - self.f = codecs.open(filename, encoding=encoding) - self.encoding = encoding - self.newTestHeading = newTestHeading - - def __iter__(self): - data = DefaultDict(None) - key = None - for line in self.f: - heading = self.isSectionHeading(line) - if heading: - if data and heading == self.newTestHeading: - # Remove trailing newline - data[key] = data[key][:-1] - yield self.normaliseOutput(data) - data = DefaultDict(None) - key = heading - data[key] = "" if self.encoding else b"" - elif key is not None: - data[key] += line - if data: - yield self.normaliseOutput(data) - - def isSectionHeading(self, line): - """If the current heading is a test section heading return the heading, - otherwise return False""" - # print(line) - if line.startswith("#" if self.encoding else b"#"): - return line[1:].strip() - else: - return False - - def normaliseOutput(self, data): - # Remove trailing newlines - for key, value in data.items(): - if value.endswith("\n" if self.encoding else b"\n"): - data[key] = value[:-1] - return data - - -def convert(stripChars): - def convertData(data): - """convert the output of str(document) to the format used in the testcases""" - data = data.split("\n") - rv = [] - for line in data: - if line.startswith("|"): - rv.append(line[stripChars:]) - else: - rv.append(line) - return "\n".join(rv) - return convertData - - -convertExpected = convert(2) - - -def errorMessage(input, expected, actual): - msg = ("Input:\n%s\nExpected:\n%s\nReceived\n%s\n" % - (repr(input), repr(expected), repr(actual))) - if sys.version_info[0] == 2: - msg = msg.encode("ascii", "backslashreplace") - return msg - - -class TracingSaxHandler(xml.sax.handler.ContentHandler): - def __init__(self): - xml.sax.handler.ContentHandler.__init__(self) - self.visited = [] - - def startDocument(self): - self.visited.append('startDocument') - - def endDocument(self): - self.visited.append('endDocument') - - def startPrefixMapping(self, prefix, uri): - # These are ignored as their order is not guaranteed - pass - - def endPrefixMapping(self, prefix): - # These are ignored as their order is not guaranteed - pass - - def startElement(self, name, attrs): - self.visited.append(('startElement', name, attrs)) - - def endElement(self, name): - self.visited.append(('endElement', name)) - - def startElementNS(self, name, qname, attrs): - self.visited.append(('startElementNS', name, qname, dict(attrs))) - - def endElementNS(self, name, qname): - self.visited.append(('endElementNS', name, qname)) - - def characters(self, content): - self.visited.append(('characters', content)) - - def ignorableWhitespace(self, whitespace): - self.visited.append(('ignorableWhitespace', whitespace)) - - def processingInstruction(self, target, data): - self.visited.append(('processingInstruction', target, data)) - - def skippedEntity(self, name): - self.visited.append(('skippedEntity', name)) diff --git a/libs/html5lib/tests/test_alphabeticalattributes.py b/libs/html5lib/tests/test_alphabeticalattributes.py deleted file mode 100644 index 7d5b8e0f6..000000000 --- a/libs/html5lib/tests/test_alphabeticalattributes.py +++ /dev/null @@ -1,78 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from collections import OrderedDict - -import pytest - -import html5lib -from html5lib.filters.alphabeticalattributes import Filter -from html5lib.serializer import HTMLSerializer - - [email protected]('msg, attrs, expected_attrs', [ - ( - 'no attrs', - {}, - {} - ), - ( - 'one attr', - {(None, 'alt'): 'image'}, - OrderedDict([((None, 'alt'), 'image')]) - ), - ( - 'multiple attrs', - { - (None, 'src'): 'foo', - (None, 'alt'): 'image', - (None, 'style'): 'border: 1px solid black;' - }, - OrderedDict([ - ((None, 'alt'), 'image'), - ((None, 'src'), 'foo'), - ((None, 'style'), 'border: 1px solid black;') - ]) - ), -]) -def test_alphabetizing(msg, attrs, expected_attrs): - tokens = [{'type': 'StartTag', 'name': 'img', 'data': attrs}] - output_tokens = list(Filter(tokens)) - - attrs = output_tokens[0]['data'] - assert attrs == expected_attrs - - -def test_with_different_namespaces(): - tokens = [{ - 'type': 'StartTag', - 'name': 'pattern', - 'data': { - (None, 'id'): 'patt1', - ('http://www.w3.org/1999/xlink', 'href'): '#patt2' - } - }] - output_tokens = list(Filter(tokens)) - - attrs = output_tokens[0]['data'] - assert attrs == OrderedDict([ - ((None, 'id'), 'patt1'), - (('http://www.w3.org/1999/xlink', 'href'), '#patt2') - ]) - - -def test_with_serializer(): - """Verify filter works in the context of everything else""" - parser = html5lib.HTMLParser() - dom = parser.parseFragment('<svg><pattern xlink:href="#patt2" id="patt1"></svg>') - walker = html5lib.getTreeWalker('etree') - ser = HTMLSerializer( - alphabetical_attributes=True, - quote_attr_values='always' - ) - - # FIXME(willkg): The "xlink" namespace gets dropped by the serializer. When - # that gets fixed, we can fix this expected result. - assert ( - ser.render(walker(dom)) == - '<svg><pattern id="patt1" href="#patt2"></pattern></svg>' - ) diff --git a/libs/html5lib/tests/test_encoding.py b/libs/html5lib/tests/test_encoding.py deleted file mode 100644 index 47c4814a4..000000000 --- a/libs/html5lib/tests/test_encoding.py +++ /dev/null @@ -1,117 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -import os - -import pytest - -from .support import get_data_files, test_dir, errorMessage, TestData as _TestData -from html5lib import HTMLParser, _inputstream - - -def test_basic_prescan_length(): - data = "<title>Caf\u00E9</title><!--a--><meta charset='utf-8'>".encode('utf-8') - pad = 1024 - len(data) + 1 - data = data.replace(b"-a-", b"-" + (b"a" * pad) + b"-") - assert len(data) == 1024 # Sanity - stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False) - assert 'utf-8' == stream.charEncoding[0].name - - -def test_parser_reparse(): - data = "<title>Caf\u00E9</title><!--a--><meta charset='utf-8'>".encode('utf-8') - pad = 10240 - len(data) + 1 - data = data.replace(b"-a-", b"-" + (b"a" * pad) + b"-") - assert len(data) == 10240 # Sanity - stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False) - assert 'windows-1252' == stream.charEncoding[0].name - p = HTMLParser(namespaceHTMLElements=False) - doc = p.parse(data, useChardet=False) - assert 'utf-8' == p.documentEncoding - assert doc.find(".//title").text == "Caf\u00E9" - - [email protected]("expected,data,kwargs", [ - ("utf-16le", b"\xFF\xFE", {"override_encoding": "iso-8859-2"}), - ("utf-16be", b"\xFE\xFF", {"override_encoding": "iso-8859-2"}), - ("utf-8", b"\xEF\xBB\xBF", {"override_encoding": "iso-8859-2"}), - ("iso-8859-2", b"", {"override_encoding": "iso-8859-2", "transport_encoding": "iso-8859-3"}), - ("iso-8859-2", b"<meta charset=iso-8859-3>", {"transport_encoding": "iso-8859-2"}), - ("iso-8859-2", b"<meta charset=iso-8859-2>", {"same_origin_parent_encoding": "iso-8859-3"}), - ("iso-8859-2", b"", {"same_origin_parent_encoding": "iso-8859-2", "likely_encoding": "iso-8859-3"}), - ("iso-8859-2", b"", {"same_origin_parent_encoding": "utf-16", "likely_encoding": "iso-8859-2"}), - ("iso-8859-2", b"", {"same_origin_parent_encoding": "utf-16be", "likely_encoding": "iso-8859-2"}), - ("iso-8859-2", b"", {"same_origin_parent_encoding": "utf-16le", "likely_encoding": "iso-8859-2"}), - ("iso-8859-2", b"", {"likely_encoding": "iso-8859-2", "default_encoding": "iso-8859-3"}), - ("iso-8859-2", b"", {"default_encoding": "iso-8859-2"}), - ("windows-1252", b"", {"default_encoding": "totally-bogus-string"}), - ("windows-1252", b"", {}), -]) -def test_parser_args(expected, data, kwargs): - stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False, **kwargs) - assert expected == stream.charEncoding[0].name - p = HTMLParser() - p.parse(data, useChardet=False, **kwargs) - assert expected == p.documentEncoding - - [email protected]("kwargs", [ - {"override_encoding": "iso-8859-2"}, - {"override_encoding": None}, - {"transport_encoding": "iso-8859-2"}, - {"transport_encoding": None}, - {"same_origin_parent_encoding": "iso-8859-2"}, - {"same_origin_parent_encoding": None}, - {"likely_encoding": "iso-8859-2"}, - {"likely_encoding": None}, - {"default_encoding": "iso-8859-2"}, - {"default_encoding": None}, - {"foo_encoding": "iso-8859-2"}, - {"foo_encoding": None}, -]) -def test_parser_args_raises(kwargs): - with pytest.raises(TypeError) as exc_info: - p = HTMLParser() - p.parse("", useChardet=False, **kwargs) - assert exc_info.value.args[0].startswith("Cannot set an encoding with a unicode input") - - -def param_encoding(): - for filename in get_data_files("encoding"): - tests = _TestData(filename, b"data", encoding=None) - for test in tests: - yield test[b'data'], test[b'encoding'] - - [email protected]("data, encoding", param_encoding()) -def test_parser_encoding(data, encoding): - p = HTMLParser() - assert p.documentEncoding is None - p.parse(data, useChardet=False) - encoding = encoding.lower().decode("ascii") - - assert encoding == p.documentEncoding, errorMessage(data, encoding, p.documentEncoding) - - [email protected]("data, encoding", param_encoding()) -def test_prescan_encoding(data, encoding): - stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False) - encoding = encoding.lower().decode("ascii") - - # Very crude way to ignore irrelevant tests - if len(data) > stream.numBytesMeta: - return - - assert encoding == stream.charEncoding[0].name, errorMessage(data, encoding, stream.charEncoding[0].name) - - -# pylint:disable=wrong-import-position -try: - import chardet # noqa -except ImportError: - print("chardet not found, skipping chardet tests") -else: - def test_chardet(): - with open(os.path.join(test_dir, "encoding", "chardet", "test_big5.txt"), "rb") as fp: - encoding = _inputstream.HTMLInputStream(fp.read()).charEncoding - assert encoding[0].name == "big5" -# pylint:enable=wrong-import-position diff --git a/libs/html5lib/tests/test_meta.py b/libs/html5lib/tests/test_meta.py deleted file mode 100644 index dd02dd7fb..000000000 --- a/libs/html5lib/tests/test_meta.py +++ /dev/null @@ -1,41 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -import six -from mock import Mock - -from . import support - - -def _createReprMock(r): - """Creates a mock with a __repr__ returning r - - Also provides __str__ mock with default mock behaviour""" - mock = Mock() - mock.__repr__ = Mock() - mock.__repr__.return_value = r - mock.__str__ = Mock(wraps=mock.__str__) - return mock - - -def test_errorMessage(): - # Create mock objects to take repr of - input = _createReprMock("1") - expected = _createReprMock("2") - actual = _createReprMock("3") - - # Run the actual test - r = support.errorMessage(input, expected, actual) - - # Assertions! - if six.PY2: - assert b"Input:\n1\nExpected:\n2\nReceived\n3\n" == r - else: - assert six.PY3 - assert "Input:\n1\nExpected:\n2\nReceived\n3\n" == r - - assert input.__repr__.call_count == 1 - assert expected.__repr__.call_count == 1 - assert actual.__repr__.call_count == 1 - assert not input.__str__.called - assert not expected.__str__.called - assert not actual.__str__.called diff --git a/libs/html5lib/tests/test_optionaltags_filter.py b/libs/html5lib/tests/test_optionaltags_filter.py deleted file mode 100644 index cd2821497..000000000 --- a/libs/html5lib/tests/test_optionaltags_filter.py +++ /dev/null @@ -1,7 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from html5lib.filters.optionaltags import Filter - - -def test_empty(): - assert list(Filter([])) == [] diff --git a/libs/html5lib/tests/test_parser2.py b/libs/html5lib/tests/test_parser2.py deleted file mode 100644 index 879d2447d..000000000 --- a/libs/html5lib/tests/test_parser2.py +++ /dev/null @@ -1,94 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from six import PY2, text_type - -import io - -from . import support # noqa - -from html5lib.constants import namespaces -from html5lib import parse, parseFragment, HTMLParser - - -# tests that aren't autogenerated from text files -def test_assertDoctypeCloneable(): - doc = parse('<!DOCTYPE HTML>', treebuilder="dom") - assert doc.cloneNode(True) is not None - - -def test_line_counter(): - # http://groups.google.com/group/html5lib-discuss/browse_frm/thread/f4f00e4a2f26d5c0 - assert parse("<pre>\nx\n>\n</pre>") is not None - - -def test_namespace_html_elements_0_dom(): - doc = parse("<html></html>", - treebuilder="dom", - namespaceHTMLElements=True) - assert doc.childNodes[0].namespaceURI == namespaces["html"] - - -def test_namespace_html_elements_1_dom(): - doc = parse("<html></html>", - treebuilder="dom", - namespaceHTMLElements=False) - assert doc.childNodes[0].namespaceURI is None - - -def test_namespace_html_elements_0_etree(): - doc = parse("<html></html>", - treebuilder="etree", - namespaceHTMLElements=True) - assert doc.tag == "{%s}html" % (namespaces["html"],) - - -def test_namespace_html_elements_1_etree(): - doc = parse("<html></html>", - treebuilder="etree", - namespaceHTMLElements=False) - assert doc.tag == "html" - - -def test_unicode_file(): - assert parse(io.StringIO("a")) is not None - - -def test_debug_log(): - parser = HTMLParser(debug=True) - parser.parse("<!doctype html><title>a</title><p>b<script>c</script>d</p>e") - - expected = [('dataState', 'InitialPhase', 'InitialPhase', 'processDoctype', {'type': 'Doctype'}), - ('dataState', 'BeforeHtmlPhase', 'BeforeHtmlPhase', 'processStartTag', {'name': 'title', 'type': 'StartTag'}), - ('dataState', 'BeforeHeadPhase', 'BeforeHeadPhase', 'processStartTag', {'name': 'title', 'type': 'StartTag'}), - ('dataState', 'InHeadPhase', 'InHeadPhase', 'processStartTag', {'name': 'title', 'type': 'StartTag'}), - ('rcdataState', 'TextPhase', 'TextPhase', 'processCharacters', {'type': 'Characters'}), - ('dataState', 'TextPhase', 'TextPhase', 'processEndTag', {'name': 'title', 'type': 'EndTag'}), - ('dataState', 'InHeadPhase', 'InHeadPhase', 'processStartTag', {'name': 'p', 'type': 'StartTag'}), - ('dataState', 'AfterHeadPhase', 'AfterHeadPhase', 'processStartTag', {'name': 'p', 'type': 'StartTag'}), - ('dataState', 'InBodyPhase', 'InBodyPhase', 'processStartTag', {'name': 'p', 'type': 'StartTag'}), - ('dataState', 'InBodyPhase', 'InBodyPhase', 'processCharacters', {'type': 'Characters'}), - ('dataState', 'InBodyPhase', 'InBodyPhase', 'processStartTag', {'name': 'script', 'type': 'StartTag'}), - ('dataState', 'InBodyPhase', 'InHeadPhase', 'processStartTag', {'name': 'script', 'type': 'StartTag'}), - ('scriptDataState', 'TextPhase', 'TextPhase', 'processCharacters', {'type': 'Characters'}), - ('dataState', 'TextPhase', 'TextPhase', 'processEndTag', {'name': 'script', 'type': 'EndTag'}), - ('dataState', 'InBodyPhase', 'InBodyPhase', 'processCharacters', {'type': 'Characters'}), - ('dataState', 'InBodyPhase', 'InBodyPhase', 'processEndTag', {'name': 'p', 'type': 'EndTag'}), - ('dataState', 'InBodyPhase', 'InBodyPhase', 'processCharacters', {'type': 'Characters'})] - - if PY2: - for i, log in enumerate(expected): - log = [x.encode("ascii") if isinstance(x, text_type) else x for x in log] - expected[i] = tuple(log) - - assert parser.log == expected - - -def test_no_duplicate_clone(): - frag = parseFragment("<b><em><foo><foob><fooc><aside></b></em>") - assert len(frag) == 2 - - -def test_self_closing_col(): - parser = HTMLParser() - parser.parseFragment('<table><colgroup><col /></colgroup></table>') - assert not parser.errors diff --git a/libs/html5lib/tests/test_sanitizer.py b/libs/html5lib/tests/test_sanitizer.py deleted file mode 100644 index f3faeb805..000000000 --- a/libs/html5lib/tests/test_sanitizer.py +++ /dev/null @@ -1,133 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -import pytest - -from html5lib import constants, parseFragment, serialize -from html5lib.filters import sanitizer - - -def sanitize_html(stream): - parsed = parseFragment(stream) - with pytest.deprecated_call(): - serialized = serialize(parsed, - sanitize=True, - omit_optional_tags=False, - use_trailing_solidus=True, - space_before_trailing_solidus=False, - quote_attr_values="always", - quote_char='"', - alphabetical_attributes=True) - return serialized - - -def test_should_handle_astral_plane_characters(): - sanitized = sanitize_html("<p>𝒵 𝔸</p>") - expected = '<p>\U0001d4b5 \U0001d538</p>' - assert expected == sanitized - - -def test_should_allow_relative_uris(): - sanitized = sanitize_html('<p><a href="/example.com"></a></p>') - expected = '<p><a href="/example.com"></a></p>' - assert expected == sanitized - - -def test_invalid_data_uri(): - sanitized = sanitize_html('<audio controls="" src="data:foobar"></audio>') - expected = '<audio controls></audio>' - assert expected == sanitized - - -def test_invalid_ipv6_url(): - sanitized = sanitize_html('<a href="h://]">') - expected = "<a></a>" - assert expected == sanitized - - -def test_data_uri_disallowed_type(): - sanitized = sanitize_html('<audio controls="" src="data:text/html,<html>"></audio>') - expected = "<audio controls></audio>" - assert expected == sanitized - - -def param_sanitizer(): - for ns, tag_name in sanitizer.allowed_elements: - if ns != constants.namespaces["html"]: - continue - if tag_name in ['caption', 'col', 'colgroup', 'optgroup', 'option', 'table', 'tbody', 'td', - 'tfoot', 'th', 'thead', 'tr', 'select']: - continue # TODO - if tag_name == 'image': - yield ("test_should_allow_%s_tag" % tag_name, - "<img title=\"1\"/>foo <bad>bar</bad> baz", - "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name)) - elif tag_name == 'br': - yield ("test_should_allow_%s_tag" % tag_name, - "<br title=\"1\"/>foo <bad>bar</bad> baz<br/>", - "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name)) - elif tag_name in constants.voidElements: - yield ("test_should_allow_%s_tag" % tag_name, - "<%s title=\"1\"/>foo <bad>bar</bad> baz" % tag_name, - "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name)) - else: - yield ("test_should_allow_%s_tag" % tag_name, - "<%s title=\"1\">foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name), - "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name)) - - for ns, attribute_name in sanitizer.allowed_attributes: - if ns is not None: - continue - if attribute_name != attribute_name.lower(): - continue # TODO - if attribute_name == 'style': - continue - attribute_value = 'foo' - if attribute_name in sanitizer.attr_val_is_uri: - attribute_value = '%s://sub.domain.tld/path/object.ext' % sanitizer.allowed_protocols[0] - yield ("test_should_allow_%s_attribute" % attribute_name, - "<p %s=\"%s\">foo <bad>bar</bad> baz</p>" % (attribute_name, attribute_value), - "<p %s='%s'>foo <bad>bar</bad> baz</p>" % (attribute_name, attribute_value)) - - for protocol in sanitizer.allowed_protocols: - rest_of_uri = '//sub.domain.tld/path/object.ext' - if protocol == 'data': - rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ=' - yield ("test_should_allow_uppercase_%s_uris" % protocol, - "<img src=\"%s:%s\">foo</a>" % (protocol, rest_of_uri), - """<img src="%s:%s">foo</a>""" % (protocol, rest_of_uri)) - - for protocol in sanitizer.allowed_protocols: - rest_of_uri = '//sub.domain.tld/path/object.ext' - if protocol == 'data': - rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ=' - protocol = protocol.upper() - yield ("test_should_allow_uppercase_%s_uris" % protocol, - "<img src=\"%s:%s\">foo</a>" % (protocol, rest_of_uri), - """<img src="%s:%s">foo</a>""" % (protocol, rest_of_uri)) - - [email protected]("expected, input", - (pytest.param(expected, input, id=id) - for id, expected, input in param_sanitizer())) -def test_sanitizer(expected, input): - parsed = parseFragment(expected) - expected = serialize(parsed, - omit_optional_tags=False, - use_trailing_solidus=True, - space_before_trailing_solidus=False, - quote_attr_values="always", - quote_char='"', - alphabetical_attributes=True) - assert expected == sanitize_html(input) - - -def test_lowercase_color_codes_in_style(): - sanitized = sanitize_html("<p style=\"border: 1px solid #a2a2a2;\"></p>") - expected = '<p style=\"border: 1px solid #a2a2a2;\"></p>' - assert expected == sanitized - - -def test_uppercase_color_codes_in_style(): - sanitized = sanitize_html("<p style=\"border: 1px solid #A2A2A2;\"></p>") - expected = '<p style=\"border: 1px solid #A2A2A2;\"></p>' - assert expected == sanitized diff --git a/libs/html5lib/tests/test_serializer.py b/libs/html5lib/tests/test_serializer.py deleted file mode 100644 index bce624590..000000000 --- a/libs/html5lib/tests/test_serializer.py +++ /dev/null @@ -1,226 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -import os -import json - -import pytest - -from .support import get_data_files - -from html5lib import constants -from html5lib.filters.lint import Filter as Lint -from html5lib.serializer import HTMLSerializer, serialize -from html5lib.treewalkers.base import TreeWalker - -# pylint:disable=wrong-import-position -optionals_loaded = [] - -try: - from lxml import etree - optionals_loaded.append("lxml") -except ImportError: - pass -# pylint:enable=wrong-import-position - -default_namespace = constants.namespaces["html"] - - -class JsonWalker(TreeWalker): - def __iter__(self): - for token in self.tree: - type = token[0] - if type == "StartTag": - if len(token) == 4: - namespace, name, attrib = token[1:4] - else: - namespace = default_namespace - name, attrib = token[1:3] - yield self.startTag(namespace, name, self._convertAttrib(attrib)) - elif type == "EndTag": - if len(token) == 3: - namespace, name = token[1:3] - else: - namespace = default_namespace - name = token[1] - yield self.endTag(namespace, name) - elif type == "EmptyTag": - if len(token) == 4: - namespace, name, attrib = token[1:] - else: - namespace = default_namespace - name, attrib = token[1:] - for token in self.emptyTag(namespace, name, self._convertAttrib(attrib)): - yield token - elif type == "Comment": - yield self.comment(token[1]) - elif type in ("Characters", "SpaceCharacters"): - for token in self.text(token[1]): - yield token - elif type == "Doctype": - if len(token) == 4: - yield self.doctype(token[1], token[2], token[3]) - elif len(token) == 3: - yield self.doctype(token[1], token[2]) - else: - yield self.doctype(token[1]) - else: - raise ValueError("Unknown token type: " + type) - - def _convertAttrib(self, attribs): - """html5lib tree-walkers use a dict of (namespace, name): value for - attributes, but JSON cannot represent this. Convert from the format - in the serializer tests (a list of dicts with "namespace", "name", - and "value" as keys) to html5lib's tree-walker format.""" - attrs = {} - for attrib in attribs: - name = (attrib["namespace"], attrib["name"]) - assert(name not in attrs) - attrs[name] = attrib["value"] - return attrs - - -def serialize_html(input, options): - options = {str(k): v for k, v in options.items()} - encoding = options.get("encoding", None) - if "encoding" in options: - del options["encoding"] - stream = Lint(JsonWalker(input), False) - serializer = HTMLSerializer(alphabetical_attributes=True, **options) - return serializer.render(stream, encoding) - - -def throwsWithLatin1(input): - with pytest.raises(UnicodeEncodeError): - serialize_html(input, {"encoding": "iso-8859-1"}) - - -def testDoctypeName(): - throwsWithLatin1([["Doctype", "\u0101"]]) - - -def testDoctypePublicId(): - throwsWithLatin1([["Doctype", "potato", "\u0101"]]) - - -def testDoctypeSystemId(): - throwsWithLatin1([["Doctype", "potato", "potato", "\u0101"]]) - - -def testCdataCharacters(): - test_serializer([["StartTag", "http://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\u0101"]], - ["<style>ā"], {"encoding": "iso-8859-1"}) - - -def testCharacters(): - test_serializer([["Characters", "\u0101"]], - ["ā"], {"encoding": "iso-8859-1"}) - - -def testStartTagName(): - throwsWithLatin1([["StartTag", "http://www.w3.org/1999/xhtml", "\u0101", []]]) - - -def testAttributeName(): - throwsWithLatin1([["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": None, "name": "\u0101", "value": "potato"}]]]) - - -def testAttributeValue(): - test_serializer([["StartTag", "http://www.w3.org/1999/xhtml", "span", - [{"namespace": None, "name": "potato", "value": "\u0101"}]]], - ["<span potato=ā>"], {"encoding": "iso-8859-1"}) - - -def testEndTagName(): - throwsWithLatin1([["EndTag", "http://www.w3.org/1999/xhtml", "\u0101"]]) - - -def testComment(): - throwsWithLatin1([["Comment", "\u0101"]]) - - -def testThrowsUnknownOption(): - with pytest.raises(TypeError): - HTMLSerializer(foobar=None) - - [email protected]("c", list("\t\n\u000C\x20\r\"'=<>`")) -def testSpecQuoteAttribute(c): - input_ = [["StartTag", "http://www.w3.org/1999/xhtml", "span", - [{"namespace": None, "name": "foo", "value": c}]]] - if c == '"': - output_ = ["<span foo='%s'>" % c] - else: - output_ = ['<span foo="%s">' % c] - options_ = {"quote_attr_values": "spec"} - test_serializer(input_, output_, options_) - - [email protected]("c", list("\t\n\u000C\x20\r\"'=<>`" - "\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n" - "\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15" - "\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" - "\x20\x2f\x60\xa0\u1680\u180e\u180f\u2000" - "\u2001\u2002\u2003\u2004\u2005\u2006\u2007" - "\u2008\u2009\u200a\u2028\u2029\u202f\u205f" - "\u3000")) -def testLegacyQuoteAttribute(c): - input_ = [["StartTag", "http://www.w3.org/1999/xhtml", "span", - [{"namespace": None, "name": "foo", "value": c}]]] - if c == '"': - output_ = ["<span foo='%s'>" % c] - else: - output_ = ['<span foo="%s">' % c] - options_ = {"quote_attr_values": "legacy"} - test_serializer(input_, output_, options_) - - -def lxml_parser(): - return etree.XMLParser(resolve_entities=False) - - [email protected]("lxml" not in optionals_loaded, reason="lxml not importable") -def testEntityReplacement(lxml_parser): - doc = '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>β</html>' - tree = etree.fromstring(doc, parser=lxml_parser).getroottree() - result = serialize(tree, tree="lxml", omit_optional_tags=False) - assert result == '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>\u03B2</html>' - - [email protected]("lxml" not in optionals_loaded, reason="lxml not importable") -def testEntityXML(lxml_parser): - doc = '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>></html>' - tree = etree.fromstring(doc, parser=lxml_parser).getroottree() - result = serialize(tree, tree="lxml", omit_optional_tags=False) - assert result == '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>></html>' - - [email protected]("lxml" not in optionals_loaded, reason="lxml not importable") -def testEntityNoResolve(lxml_parser): - doc = '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>β</html>' - tree = etree.fromstring(doc, parser=lxml_parser).getroottree() - result = serialize(tree, tree="lxml", omit_optional_tags=False, - resolve_entities=False) - assert result == '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>β</html>' - - -def param_serializer(): - for filename in get_data_files('serializer-testdata', '*.test', os.path.dirname(__file__)): - with open(filename) as fp: - tests = json.load(fp) - for test in tests['tests']: - yield test["input"], test["expected"], test.get("options", {}) - - [email protected]("input, expected, options", param_serializer()) -def test_serializer(input, expected, options): - encoding = options.get("encoding", None) - - if encoding: - expected = list(map(lambda x: x.encode(encoding), expected)) - - result = serialize_html(input, options) - if len(expected) == 1: - assert expected[0] == result, "Expected:\n%s\nActual:\n%s\nOptions:\n%s" % (expected[0], result, str(options)) - elif result not in expected: - assert False, "Expected: %s, Received: %s" % (expected, result) diff --git a/libs/html5lib/tests/test_stream.py b/libs/html5lib/tests/test_stream.py deleted file mode 100644 index efe9b472f..000000000 --- a/libs/html5lib/tests/test_stream.py +++ /dev/null @@ -1,325 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from . import support # noqa - -import codecs -import sys -from io import BytesIO, StringIO - -import pytest - -import six -from six.moves import http_client, urllib - -from html5lib._inputstream import (BufferedStream, HTMLInputStream, - HTMLUnicodeInputStream, HTMLBinaryInputStream) -from html5lib._utils import supports_lone_surrogates - - -def test_basic(): - s = b"abc" - fp = BufferedStream(BytesIO(s)) - read = fp.read(10) - assert read == s - - -def test_read_length(): - fp = BufferedStream(BytesIO(b"abcdef")) - read1 = fp.read(1) - assert read1 == b"a" - read2 = fp.read(2) - assert read2 == b"bc" - read3 = fp.read(3) - assert read3 == b"def" - read4 = fp.read(4) - assert read4 == b"" - - -def test_tell(): - fp = BufferedStream(BytesIO(b"abcdef")) - read1 = fp.read(1) - assert read1 == b"a" - assert fp.tell() == 1 - read2 = fp.read(2) - assert read2 == b"bc" - assert fp.tell() == 3 - read3 = fp.read(3) - assert read3 == b"def" - assert fp.tell() == 6 - read4 = fp.read(4) - assert read4 == b"" - assert fp.tell() == 6 - - -def test_seek(): - fp = BufferedStream(BytesIO(b"abcdef")) - read1 = fp.read(1) - assert read1 == b"a" - fp.seek(0) - read2 = fp.read(1) - assert read2 == b"a" - read3 = fp.read(2) - assert read3 == b"bc" - fp.seek(2) - read4 = fp.read(2) - assert read4 == b"cd" - fp.seek(4) - read5 = fp.read(2) - assert read5 == b"ef" - - -def test_seek_tell(): - fp = BufferedStream(BytesIO(b"abcdef")) - read1 = fp.read(1) - assert read1 == b"a" - assert fp.tell() == 1 - fp.seek(0) - read2 = fp.read(1) - assert read2 == b"a" - assert fp.tell() == 1 - read3 = fp.read(2) - assert read3 == b"bc" - assert fp.tell() == 3 - fp.seek(2) - read4 = fp.read(2) - assert read4 == b"cd" - assert fp.tell() == 4 - fp.seek(4) - read5 = fp.read(2) - assert read5 == b"ef" - assert fp.tell() == 6 - - -class HTMLUnicodeInputStreamShortChunk(HTMLUnicodeInputStream): - _defaultChunkSize = 2 - - -class HTMLBinaryInputStreamShortChunk(HTMLBinaryInputStream): - _defaultChunkSize = 2 - - -def test_char_ascii(): - stream = HTMLInputStream(b"'", override_encoding='ascii') - assert stream.charEncoding[0].name == 'windows-1252' - assert stream.char() == "'" - - -def test_char_utf8(): - stream = HTMLInputStream('\u2018'.encode('utf-8'), override_encoding='utf-8') - assert stream.charEncoding[0].name == 'utf-8' - assert stream.char() == '\u2018' - - -def test_char_win1252(): - stream = HTMLInputStream("\xa9\xf1\u2019".encode('windows-1252')) - assert stream.charEncoding[0].name == 'windows-1252' - assert stream.char() == "\xa9" - assert stream.char() == "\xf1" - assert stream.char() == "\u2019" - - -def test_bom(): - stream = HTMLInputStream(codecs.BOM_UTF8 + b"'") - assert stream.charEncoding[0].name == 'utf-8' - assert stream.char() == "'" - - -def test_utf_16(): - stream = HTMLInputStream((' ' * 1025).encode('utf-16')) - assert stream.charEncoding[0].name in ['utf-16le', 'utf-16be'] - assert len(stream.charsUntil(' ', True)) == 1025 - - -def test_newlines(): - stream = HTMLBinaryInputStreamShortChunk(codecs.BOM_UTF8 + b"a\nbb\r\nccc\rddddxe") - assert stream.position() == (1, 0) - assert stream.charsUntil('c') == "a\nbb\n" - assert stream.position() == (3, 0) - assert stream.charsUntil('x') == "ccc\ndddd" - assert stream.position() == (4, 4) - assert stream.charsUntil('e') == "x" - assert stream.position() == (4, 5) - - -def test_newlines2(): - size = HTMLUnicodeInputStream._defaultChunkSize - stream = HTMLInputStream("\r" * size + "\n") - assert stream.charsUntil('x') == "\n" * size - - -def test_position(): - stream = HTMLBinaryInputStreamShortChunk(codecs.BOM_UTF8 + b"a\nbb\nccc\nddde\nf\ngh") - assert stream.position() == (1, 0) - assert stream.charsUntil('c') == "a\nbb\n" - assert stream.position() == (3, 0) - stream.unget("\n") - assert stream.position() == (2, 2) - assert stream.charsUntil('c') == "\n" - assert stream.position() == (3, 0) - stream.unget("\n") - assert stream.position() == (2, 2) - assert stream.char() == "\n" - assert stream.position() == (3, 0) - assert stream.charsUntil('e') == "ccc\nddd" - assert stream.position() == (4, 3) - assert stream.charsUntil('h') == "e\nf\ng" - assert stream.position() == (6, 1) - - -def test_position2(): - stream = HTMLUnicodeInputStreamShortChunk("abc\nd") - assert stream.position() == (1, 0) - assert stream.char() == "a" - assert stream.position() == (1, 1) - assert stream.char() == "b" - assert stream.position() == (1, 2) - assert stream.char() == "c" - assert stream.position() == (1, 3) - assert stream.char() == "\n" - assert stream.position() == (2, 0) - assert stream.char() == "d" - assert stream.position() == (2, 1) - - -def test_python_issue_20007(): - """ - Make sure we have a work-around for Python bug #20007 - http://bugs.python.org/issue20007 - """ - class FakeSocket(object): - def makefile(self, _mode, _bufsize=None): - # pylint:disable=unused-argument - return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText") - - source = http_client.HTTPResponse(FakeSocket()) - source.begin() - stream = HTMLInputStream(source) - assert stream.charsUntil(" ") == "Text" - - -def test_python_issue_20007_b(): - """ - Make sure we have a work-around for Python bug #20007 - http://bugs.python.org/issue20007 - """ - if six.PY2: - return - - class FakeSocket(object): - def makefile(self, _mode, _bufsize=None): - # pylint:disable=unused-argument - return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText") - - source = http_client.HTTPResponse(FakeSocket()) - source.begin() - wrapped = urllib.response.addinfourl(source, source.msg, "http://example.com") - stream = HTMLInputStream(wrapped) - assert stream.charsUntil(" ") == "Text" - - [email protected]("inp,num", - [("\u0000", 0), - ("\u0001", 1), - ("\u0008", 1), - ("\u0009", 0), - ("\u000A", 0), - ("\u000B", 1), - ("\u000C", 0), - ("\u000D", 0), - ("\u000E", 1), - ("\u001F", 1), - ("\u0020", 0), - ("\u007E", 0), - ("\u007F", 1), - ("\u009F", 1), - ("\u00A0", 0), - ("\uFDCF", 0), - ("\uFDD0", 1), - ("\uFDEF", 1), - ("\uFDF0", 0), - ("\uFFFD", 0), - ("\uFFFE", 1), - ("\uFFFF", 1), - ("\U0001FFFD", 0), - ("\U0001FFFE", 1), - ("\U0001FFFF", 1), - ("\U0002FFFD", 0), - ("\U0002FFFE", 1), - ("\U0002FFFF", 1), - ("\U0003FFFD", 0), - ("\U0003FFFE", 1), - ("\U0003FFFF", 1), - ("\U0004FFFD", 0), - ("\U0004FFFE", 1), - ("\U0004FFFF", 1), - ("\U0005FFFD", 0), - ("\U0005FFFE", 1), - ("\U0005FFFF", 1), - ("\U0006FFFD", 0), - ("\U0006FFFE", 1), - ("\U0006FFFF", 1), - ("\U0007FFFD", 0), - ("\U0007FFFE", 1), - ("\U0007FFFF", 1), - ("\U0008FFFD", 0), - ("\U0008FFFE", 1), - ("\U0008FFFF", 1), - ("\U0009FFFD", 0), - ("\U0009FFFE", 1), - ("\U0009FFFF", 1), - ("\U000AFFFD", 0), - ("\U000AFFFE", 1), - ("\U000AFFFF", 1), - ("\U000BFFFD", 0), - ("\U000BFFFE", 1), - ("\U000BFFFF", 1), - ("\U000CFFFD", 0), - ("\U000CFFFE", 1), - ("\U000CFFFF", 1), - ("\U000DFFFD", 0), - ("\U000DFFFE", 1), - ("\U000DFFFF", 1), - ("\U000EFFFD", 0), - ("\U000EFFFE", 1), - ("\U000EFFFF", 1), - ("\U000FFFFD", 0), - ("\U000FFFFE", 1), - ("\U000FFFFF", 1), - ("\U0010FFFD", 0), - ("\U0010FFFE", 1), - ("\U0010FFFF", 1), - ("\x01\x01\x01", 3), - ("a\x01a\x01a\x01a", 3)]) -def test_invalid_codepoints(inp, num): - stream = HTMLUnicodeInputStream(StringIO(inp)) - for _i in range(len(inp)): - stream.char() - assert len(stream.errors) == num - - [email protected](not supports_lone_surrogates, reason="doesn't support lone surrogates") [email protected]("inp,num", - [("'\\uD7FF'", 0), - ("'\\uD800'", 1), - ("'\\uDBFF'", 1), - ("'\\uDC00'", 1), - ("'\\uDFFF'", 1), - ("'\\uE000'", 0), - ("'\\uD800\\uD800\\uD800'", 3), - ("'a\\uD800a\\uD800a\\uD800a'", 3), - ("'\\uDFFF\\uDBFF'", 2), - pytest.param( - "'\\uDBFF\\uDFFF'", 2, - marks=pytest.mark.skipif( - sys.maxunicode == 0xFFFF, - reason="narrow Python"))]) -def test_invalid_codepoints_surrogates(inp, num): - inp = eval(inp) # pylint:disable=eval-used - fp = StringIO(inp) - if ord(max(fp.read())) > 0xFFFF: - pytest.skip("StringIO altered string") - fp.seek(0) - stream = HTMLUnicodeInputStream(fp) - for _i in range(len(inp)): - stream.char() - assert len(stream.errors) == num diff --git a/libs/html5lib/tests/test_tokenizer2.py b/libs/html5lib/tests/test_tokenizer2.py deleted file mode 100644 index 158d847a2..000000000 --- a/libs/html5lib/tests/test_tokenizer2.py +++ /dev/null @@ -1,66 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -import io - -from six import unichr, text_type - -from html5lib._tokenizer import HTMLTokenizer -from html5lib.constants import tokenTypes - - -def ignore_parse_errors(toks): - for tok in toks: - if tok['type'] != tokenTypes['ParseError']: - yield tok - - -def test_maintain_attribute_order(): - # generate loads to maximize the chance a hash-based mutation will occur - attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))] - stream = io.StringIO("<span " + " ".join("%s='%s'" % (x, i) for x, i in attrs) + ">") - - toks = HTMLTokenizer(stream) - out = list(ignore_parse_errors(toks)) - - assert len(out) == 1 - assert out[0]['type'] == tokenTypes['StartTag'] - - attrs_tok = out[0]['data'] - assert len(attrs_tok) == len(attrs) - - for (in_name, in_value), (out_name, out_value) in zip(attrs, attrs_tok.items()): - assert in_name == out_name - assert in_value == out_value - - -def test_duplicate_attribute(): - stream = io.StringIO("<span a=1 a=2 a=3>") - - toks = HTMLTokenizer(stream) - out = list(ignore_parse_errors(toks)) - - assert len(out) == 1 - assert out[0]['type'] == tokenTypes['StartTag'] - - attrs_tok = out[0]['data'] - assert len(attrs_tok) == 1 - assert list(attrs_tok.items()) == [('a', '1')] - - -def test_maintain_duplicate_attribute_order(): - # generate loads to maximize the chance a hash-based mutation will occur - attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))] - stream = io.StringIO("<span " + " ".join("%s='%s'" % (x, i) for x, i in attrs) + " a=100>") - - toks = HTMLTokenizer(stream) - out = list(ignore_parse_errors(toks)) - - assert len(out) == 1 - assert out[0]['type'] == tokenTypes['StartTag'] - - attrs_tok = out[0]['data'] - assert len(attrs_tok) == len(attrs) - - for (in_name, in_value), (out_name, out_value) in zip(attrs, attrs_tok.items()): - assert in_name == out_name - assert in_value == out_value diff --git a/libs/html5lib/tests/test_treeadapters.py b/libs/html5lib/tests/test_treeadapters.py deleted file mode 100644 index 95e56c00c..000000000 --- a/libs/html5lib/tests/test_treeadapters.py +++ /dev/null @@ -1,40 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from . import support # noqa - -import html5lib -from html5lib.treeadapters import sax -from html5lib.treewalkers import getTreeWalker - - -def test_to_sax(): - handler = support.TracingSaxHandler() - tree = html5lib.parse("""<html xml:lang="en"> - <title>Directory Listing</title> - <a href="/"><b/></p> - """, treebuilder="etree") - walker = getTreeWalker("etree") - sax.to_sax(walker(tree), handler) - expected = [ - 'startDocument', - ('startElementNS', ('http://www.w3.org/1999/xhtml', 'html'), - 'html', {(None, 'xml:lang'): 'en'}), - ('startElementNS', ('http://www.w3.org/1999/xhtml', 'head'), 'head', {}), - ('startElementNS', ('http://www.w3.org/1999/xhtml', 'title'), 'title', {}), - ('characters', 'Directory Listing'), - ('endElementNS', ('http://www.w3.org/1999/xhtml', 'title'), 'title'), - ('characters', '\n '), - ('endElementNS', ('http://www.w3.org/1999/xhtml', 'head'), 'head'), - ('startElementNS', ('http://www.w3.org/1999/xhtml', 'body'), 'body', {}), - ('startElementNS', ('http://www.w3.org/1999/xhtml', 'a'), 'a', {(None, 'href'): '/'}), - ('startElementNS', ('http://www.w3.org/1999/xhtml', 'b'), 'b', {}), - ('startElementNS', ('http://www.w3.org/1999/xhtml', 'p'), 'p', {}), - ('endElementNS', ('http://www.w3.org/1999/xhtml', 'p'), 'p'), - ('characters', '\n '), - ('endElementNS', ('http://www.w3.org/1999/xhtml', 'b'), 'b'), - ('endElementNS', ('http://www.w3.org/1999/xhtml', 'a'), 'a'), - ('endElementNS', ('http://www.w3.org/1999/xhtml', 'body'), 'body'), - ('endElementNS', ('http://www.w3.org/1999/xhtml', 'html'), 'html'), - 'endDocument', - ] - assert expected == handler.visited diff --git a/libs/html5lib/tests/test_treewalkers.py b/libs/html5lib/tests/test_treewalkers.py deleted file mode 100644 index 780ca964b..000000000 --- a/libs/html5lib/tests/test_treewalkers.py +++ /dev/null @@ -1,205 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -import itertools -import sys - -from six import unichr, text_type -import pytest - -try: - import lxml.etree -except ImportError: - pass - -from .support import treeTypes - -from html5lib import html5parser, treewalkers -from html5lib.filters.lint import Filter as Lint - -import re -attrlist = re.compile(r"^(\s+)\w+=.*(\n\1\w+=.*)+", re.M) - - -def sortattrs(x): - lines = x.group(0).split("\n") - lines.sort() - return "\n".join(lines) - - -def test_all_tokens(): - expected = [ - {'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'html'}, - {'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'head'}, - {'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'head'}, - {'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'body'}, - {'data': 'a', 'type': 'Characters'}, - {'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'div'}, - {'data': 'b', 'type': 'Characters'}, - {'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'div'}, - {'data': 'c', 'type': 'Characters'}, - {'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'body'}, - {'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'html'} - ] - for _, treeCls in sorted(treeTypes.items()): - if treeCls is None: - continue - p = html5parser.HTMLParser(tree=treeCls["builder"]) - document = p.parse("<html><head></head><body>a<div>b</div>c</body></html>") - document = treeCls.get("adapter", lambda x: x)(document) - output = Lint(treeCls["walker"](document)) - for expectedToken, outputToken in zip(expected, output): - assert expectedToken == outputToken - - -def set_attribute_on_first_child(docfrag, name, value, treeName): - """naively sets an attribute on the first child of the document - fragment passed in""" - setter = {'ElementTree': lambda d: d[0].set, - 'DOM': lambda d: d.firstChild.setAttribute} - setter['cElementTree'] = setter['ElementTree'] - try: - setter.get(treeName, setter['DOM'])(docfrag)(name, value) - except AttributeError: - setter['ElementTree'](docfrag)(name, value) - - -def param_treewalker_six_mix(): - """Str/Unicode mix. If str attrs added to tree""" - - # On Python 2.x string literals are of type str. Unless, like this - # file, the programmer imports unicode_literals from __future__. - # In that case, string literals become objects of type unicode. - - # This test simulates a Py2 user, modifying attributes on a document - # fragment but not using the u'' syntax nor importing unicode_literals - sm_tests = [ - ('<a href="http://example.com">Example</a>', - [(str('class'), str('test123'))], - '<a>\n class="test123"\n href="http://example.com"\n "Example"'), - - ('<link href="http://example.com/cow">', - [(str('rel'), str('alternate'))], - '<link>\n href="http://example.com/cow"\n rel="alternate"\n "Example"') - ] - - for tree in sorted(treeTypes.items()): - for intext, attrs, expected in sm_tests: - yield intext, expected, attrs, tree - - [email protected]("intext, expected, attrs_to_add, tree", param_treewalker_six_mix()) -def test_treewalker_six_mix(intext, expected, attrs_to_add, tree): - """tests what happens when we add attributes to the intext""" - treeName, treeClass = tree - if treeClass is None: - pytest.skip("Treebuilder not loaded") - parser = html5parser.HTMLParser(tree=treeClass["builder"]) - document = parser.parseFragment(intext) - for nom, val in attrs_to_add: - set_attribute_on_first_child(document, nom, val, treeName) - - document = treeClass.get("adapter", lambda x: x)(document) - output = treewalkers.pprint(treeClass["walker"](document)) - output = attrlist.sub(sortattrs, output) - if output not in expected: - raise AssertionError("TreewalkerEditTest: %s\nExpected:\n%s\nReceived:\n%s" % (treeName, expected, output)) - - [email protected]("tree,char", itertools.product(sorted(treeTypes.items()), ["x", "\u1234"])) -def test_fragment_single_char(tree, char): - expected = [ - {'data': char, 'type': 'Characters'} - ] - - treeName, treeClass = tree - if treeClass is None: - pytest.skip("Treebuilder not loaded") - - parser = html5parser.HTMLParser(tree=treeClass["builder"]) - document = parser.parseFragment(char) - document = treeClass.get("adapter", lambda x: x)(document) - output = Lint(treeClass["walker"](document)) - - assert list(output) == expected - - [email protected](treeTypes["lxml"] is None, reason="lxml not importable") -def test_lxml_xml(): - expected = [ - {'data': {}, 'name': 'div', 'namespace': None, 'type': 'StartTag'}, - {'data': {}, 'name': 'div', 'namespace': None, 'type': 'StartTag'}, - {'name': 'div', 'namespace': None, 'type': 'EndTag'}, - {'name': 'div', 'namespace': None, 'type': 'EndTag'} - ] - - lxmltree = lxml.etree.fromstring('<div><div></div></div>') - walker = treewalkers.getTreeWalker('lxml') - output = Lint(walker(lxmltree)) - - assert list(output) == expected - - [email protected]("treeName", - [pytest.param(treeName, marks=[getattr(pytest.mark, treeName), - pytest.mark.skipif( - treeName != "lxml" or - sys.version_info < (3, 7), reason="dict order undef")]) - for treeName in sorted(treeTypes.keys())]) -def test_maintain_attribute_order(treeName): - treeAPIs = treeTypes[treeName] - if treeAPIs is None: - pytest.skip("Treebuilder not loaded") - - # generate loads to maximize the chance a hash-based mutation will occur - attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))] - data = "<span " + " ".join("%s='%s'" % (x, i) for x, i in attrs) + ">" - - parser = html5parser.HTMLParser(tree=treeAPIs["builder"]) - document = parser.parseFragment(data) - - document = treeAPIs.get("adapter", lambda x: x)(document) - output = list(Lint(treeAPIs["walker"](document))) - - assert len(output) == 2 - assert output[0]['type'] == 'StartTag' - assert output[1]['type'] == "EndTag" - - attrs_out = output[0]['data'] - assert len(attrs) == len(attrs_out) - - for (in_name, in_value), (out_name, out_value) in zip(attrs, attrs_out.items()): - assert (None, in_name) == out_name - assert in_value == out_value - - [email protected]("treeName", - [pytest.param(treeName, marks=[getattr(pytest.mark, treeName), - pytest.mark.skipif( - treeName != "lxml" or - sys.version_info < (3, 7), reason="dict order undef")]) - for treeName in sorted(treeTypes.keys())]) -def test_maintain_attribute_order_adjusted(treeName): - treeAPIs = treeTypes[treeName] - if treeAPIs is None: - pytest.skip("Treebuilder not loaded") - - # generate loads to maximize the chance a hash-based mutation will occur - data = "<svg a=1 refx=2 b=3 xml:lang=4 c=5>" - - parser = html5parser.HTMLParser(tree=treeAPIs["builder"]) - document = parser.parseFragment(data) - - document = treeAPIs.get("adapter", lambda x: x)(document) - output = list(Lint(treeAPIs["walker"](document))) - - assert len(output) == 2 - assert output[0]['type'] == 'StartTag' - assert output[1]['type'] == "EndTag" - - attrs_out = output[0]['data'] - - assert list(attrs_out.items()) == [((None, 'a'), '1'), - ((None, 'refX'), '2'), - ((None, 'b'), '3'), - (('http://www.w3.org/XML/1998/namespace', 'lang'), '4'), - ((None, 'c'), '5')] diff --git a/libs/html5lib/tests/test_whitespace_filter.py b/libs/html5lib/tests/test_whitespace_filter.py deleted file mode 100644 index e9da6140a..000000000 --- a/libs/html5lib/tests/test_whitespace_filter.py +++ /dev/null @@ -1,125 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from html5lib.filters.whitespace import Filter -from html5lib.constants import spaceCharacters -spaceCharacters = "".join(spaceCharacters) - - -def runTest(input, expected): - output = list(Filter(input)) - errorMsg = "\n".join(["\n\nInput:", str(input), - "\nExpected:", str(expected), - "\nReceived:", str(output)]) - assert expected == output, errorMsg - - -def runTestUnmodifiedOutput(input): - runTest(input, input) - - -def testPhrasingElements(): - runTestUnmodifiedOutput( - [{"type": "Characters", "data": "This is a "}, - {"type": "StartTag", "name": "span", "data": []}, - {"type": "Characters", "data": "phrase"}, - {"type": "EndTag", "name": "span", "data": []}, - {"type": "SpaceCharacters", "data": " "}, - {"type": "Characters", "data": "with"}, - {"type": "SpaceCharacters", "data": " "}, - {"type": "StartTag", "name": "em", "data": []}, - {"type": "Characters", "data": "emphasised text"}, - {"type": "EndTag", "name": "em", "data": []}, - {"type": "Characters", "data": " and an "}, - {"type": "StartTag", "name": "img", "data": [["alt", "image"]]}, - {"type": "Characters", "data": "."}]) - - -def testLeadingWhitespace(): - runTest( - [{"type": "StartTag", "name": "p", "data": []}, - {"type": "SpaceCharacters", "data": spaceCharacters}, - {"type": "Characters", "data": "foo"}, - {"type": "EndTag", "name": "p", "data": []}], - [{"type": "StartTag", "name": "p", "data": []}, - {"type": "SpaceCharacters", "data": " "}, - {"type": "Characters", "data": "foo"}, - {"type": "EndTag", "name": "p", "data": []}]) - - -def testLeadingWhitespaceAsCharacters(): - runTest( - [{"type": "StartTag", "name": "p", "data": []}, - {"type": "Characters", "data": spaceCharacters + "foo"}, - {"type": "EndTag", "name": "p", "data": []}], - [{"type": "StartTag", "name": "p", "data": []}, - {"type": "Characters", "data": " foo"}, - {"type": "EndTag", "name": "p", "data": []}]) - - -def testTrailingWhitespace(): - runTest( - [{"type": "StartTag", "name": "p", "data": []}, - {"type": "Characters", "data": "foo"}, - {"type": "SpaceCharacters", "data": spaceCharacters}, - {"type": "EndTag", "name": "p", "data": []}], - [{"type": "StartTag", "name": "p", "data": []}, - {"type": "Characters", "data": "foo"}, - {"type": "SpaceCharacters", "data": " "}, - {"type": "EndTag", "name": "p", "data": []}]) - - -def testTrailingWhitespaceAsCharacters(): - runTest( - [{"type": "StartTag", "name": "p", "data": []}, - {"type": "Characters", "data": "foo" + spaceCharacters}, - {"type": "EndTag", "name": "p", "data": []}], - [{"type": "StartTag", "name": "p", "data": []}, - {"type": "Characters", "data": "foo "}, - {"type": "EndTag", "name": "p", "data": []}]) - - -def testWhitespace(): - runTest( - [{"type": "StartTag", "name": "p", "data": []}, - {"type": "Characters", "data": "foo" + spaceCharacters + "bar"}, - {"type": "EndTag", "name": "p", "data": []}], - [{"type": "StartTag", "name": "p", "data": []}, - {"type": "Characters", "data": "foo bar"}, - {"type": "EndTag", "name": "p", "data": []}]) - - -def testLeadingWhitespaceInPre(): - runTestUnmodifiedOutput( - [{"type": "StartTag", "name": "pre", "data": []}, - {"type": "SpaceCharacters", "data": spaceCharacters}, - {"type": "Characters", "data": "foo"}, - {"type": "EndTag", "name": "pre", "data": []}]) - - -def testLeadingWhitespaceAsCharactersInPre(): - runTestUnmodifiedOutput( - [{"type": "StartTag", "name": "pre", "data": []}, - {"type": "Characters", "data": spaceCharacters + "foo"}, - {"type": "EndTag", "name": "pre", "data": []}]) - - -def testTrailingWhitespaceInPre(): - runTestUnmodifiedOutput( - [{"type": "StartTag", "name": "pre", "data": []}, - {"type": "Characters", "data": "foo"}, - {"type": "SpaceCharacters", "data": spaceCharacters}, - {"type": "EndTag", "name": "pre", "data": []}]) - - -def testTrailingWhitespaceAsCharactersInPre(): - runTestUnmodifiedOutput( - [{"type": "StartTag", "name": "pre", "data": []}, - {"type": "Characters", "data": "foo" + spaceCharacters}, - {"type": "EndTag", "name": "pre", "data": []}]) - - -def testWhitespaceInPre(): - runTestUnmodifiedOutput( - [{"type": "StartTag", "name": "pre", "data": []}, - {"type": "Characters", "data": "foo" + spaceCharacters + "bar"}, - {"type": "EndTag", "name": "pre", "data": []}]) diff --git a/libs/html5lib/tests/tokenizer.py b/libs/html5lib/tests/tokenizer.py deleted file mode 100644 index 47264cc32..000000000 --- a/libs/html5lib/tests/tokenizer.py +++ /dev/null @@ -1,253 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -import codecs -import json -import warnings -import re - -import pytest -from six import unichr - -from html5lib._tokenizer import HTMLTokenizer -from html5lib import constants, _utils - - -class TokenizerTestParser(object): - def __init__(self, initialState, lastStartTag=None): - self.tokenizer = HTMLTokenizer - self._state = initialState - self._lastStartTag = lastStartTag - - def parse(self, stream, encoding=None, innerHTML=False): - # pylint:disable=unused-argument - tokenizer = self.tokenizer(stream, encoding) - self.outputTokens = [] - - tokenizer.state = getattr(tokenizer, self._state) - if self._lastStartTag is not None: - tokenizer.currentToken = {"type": "startTag", - "name": self._lastStartTag} - - types = {v: k for k, v in constants.tokenTypes.items()} - for token in tokenizer: - getattr(self, 'process%s' % types[token["type"]])(token) - - return self.outputTokens - - def processDoctype(self, token): - self.outputTokens.append(["DOCTYPE", token["name"], token["publicId"], - token["systemId"], token["correct"]]) - - def processStartTag(self, token): - self.outputTokens.append(["StartTag", token["name"], - token["data"], token["selfClosing"]]) - - def processEmptyTag(self, token): - if token["name"] not in constants.voidElements: - self.outputTokens.append("ParseError") - self.outputTokens.append(["StartTag", token["name"], dict(token["data"][::-1])]) - - def processEndTag(self, token): - self.outputTokens.append(["EndTag", token["name"], - token["selfClosing"]]) - - def processComment(self, token): - self.outputTokens.append(["Comment", token["data"]]) - - def processSpaceCharacters(self, token): - self.outputTokens.append(["Character", token["data"]]) - self.processSpaceCharacters = self.processCharacters - - def processCharacters(self, token): - self.outputTokens.append(["Character", token["data"]]) - - def processEOF(self, token): - pass - - def processParseError(self, token): - self.outputTokens.append(["ParseError", token["data"]]) - - -def concatenateCharacterTokens(tokens): - outputTokens = [] - for token in tokens: - if "ParseError" not in token and token[0] == "Character": - if (outputTokens and "ParseError" not in outputTokens[-1] and - outputTokens[-1][0] == "Character"): - outputTokens[-1][1] += token[1] - else: - outputTokens.append(token) - else: - outputTokens.append(token) - return outputTokens - - -def normalizeTokens(tokens): - # TODO: convert tests to reflect arrays - for i, token in enumerate(tokens): - if token[0] == 'ParseError': - tokens[i] = token[0] - return tokens - - -def tokensMatch(expectedTokens, receivedTokens, ignoreErrorOrder, - ignoreErrors=False): - """Test whether the test has passed or failed - - If the ignoreErrorOrder flag is set to true we don't test the relative - positions of parse errors and non parse errors - """ - checkSelfClosing = False - for token in expectedTokens: - if (token[0] == "StartTag" and len(token) == 4 or - token[0] == "EndTag" and len(token) == 3): - checkSelfClosing = True - break - - if not checkSelfClosing: - for token in receivedTokens: - if token[0] == "StartTag" or token[0] == "EndTag": - token.pop() - - if not ignoreErrorOrder and not ignoreErrors: - expectedTokens = concatenateCharacterTokens(expectedTokens) - return expectedTokens == receivedTokens - else: - # Sort the tokens into two groups; non-parse errors and parse errors - tokens = {"expected": [[], []], "received": [[], []]} - for tokenType, tokenList in zip(list(tokens.keys()), - (expectedTokens, receivedTokens)): - for token in tokenList: - if token != "ParseError": - tokens[tokenType][0].append(token) - else: - if not ignoreErrors: - tokens[tokenType][1].append(token) - tokens[tokenType][0] = concatenateCharacterTokens(tokens[tokenType][0]) - return tokens["expected"] == tokens["received"] - - -_surrogateRe = re.compile(r"\\u([0-9A-Fa-f]{4})(?:\\u([0-9A-Fa-f]{4}))?") - - -def unescape(test): - def decode(inp): - """Decode \\uXXXX escapes - - This decodes \\uXXXX escapes, possibly into non-BMP characters when - two surrogate character escapes are adjacent to each other. - """ - # This cannot be implemented using the unicode_escape codec - # because that requires its input be ISO-8859-1, and we need - # arbitrary unicode as input. - def repl(m): - if m.group(2) is not None: - high = int(m.group(1), 16) - low = int(m.group(2), 16) - if 0xD800 <= high <= 0xDBFF and 0xDC00 <= low <= 0xDFFF: - cp = ((high - 0xD800) << 10) + (low - 0xDC00) + 0x10000 - return unichr(cp) - else: - return unichr(high) + unichr(low) - else: - return unichr(int(m.group(1), 16)) - try: - return _surrogateRe.sub(repl, inp) - except ValueError: - # This occurs when unichr throws ValueError, which should - # only be for a lone-surrogate. - if _utils.supports_lone_surrogates: - raise - return None - - test["input"] = decode(test["input"]) - for token in test["output"]: - if token == "ParseError": - continue - else: - token[1] = decode(token[1]) - if len(token) > 2: - for key, value in token[2]: - del token[2][key] - token[2][decode(key)] = decode(value) - return test - - -def _doCapitalize(match): - return match.group(1).upper() - - -_capitalizeRe = re.compile(r"\W+(\w)").sub - - -def capitalize(s): - s = s.lower() - s = _capitalizeRe(_doCapitalize, s) - return s - - -class TokenizerFile(pytest.File): - def collect(self): - with codecs.open(str(self.fspath), "r", encoding="utf-8") as fp: - tests = json.load(fp) - if 'tests' in tests: - for i, test in enumerate(tests['tests']): - yield TokenizerTestCollector(str(i), self, testdata=test) - - -class TokenizerTestCollector(pytest.Collector): - def __init__(self, name, parent=None, config=None, session=None, testdata=None): - super(TokenizerTestCollector, self).__init__(name, parent, config, session) - if 'initialStates' not in testdata: - testdata["initialStates"] = ["Data state"] - if 'doubleEscaped' in testdata: - testdata = unescape(testdata) - self.testdata = testdata - - def collect(self): - for initialState in self.testdata["initialStates"]: - initialState = capitalize(initialState) - item = TokenizerTest(initialState, - self, - self.testdata, - initialState) - if self.testdata["input"] is None: - item.add_marker(pytest.mark.skipif(True, reason="Relies on lone surrogates")) - yield item - - -class TokenizerTest(pytest.Item): - def __init__(self, name, parent, test, initialState): - super(TokenizerTest, self).__init__(name, parent) - self.obj = lambda: 1 # this is to hack around skipif needing a function! - self.test = test - self.initialState = initialState - - def runtest(self): - warnings.resetwarnings() - warnings.simplefilter("error") - - expected = self.test['output'] - if 'lastStartTag' not in self.test: - self.test['lastStartTag'] = None - parser = TokenizerTestParser(self.initialState, - self.test['lastStartTag']) - tokens = parser.parse(self.test['input']) - received = normalizeTokens(tokens) - errorMsg = "\n".join(["\n\nInitial state:", - self.initialState, - "\nInput:", self.test['input'], - "\nExpected:", repr(expected), - "\nreceived:", repr(tokens)]) - errorMsg = errorMsg - ignoreErrorOrder = self.test.get('ignoreErrorOrder', False) - assert tokensMatch(expected, received, ignoreErrorOrder, True), errorMsg - - def repr_failure(self, excinfo): - traceback = excinfo.traceback - ntraceback = traceback.cut(path=__file__) - excinfo.traceback = ntraceback.filter() - - return excinfo.getrepr(funcargs=True, - showlocals=False, - style="short", tbfilter=False) diff --git a/libs/html5lib/tests/tokenizertotree.py b/libs/html5lib/tests/tokenizertotree.py deleted file mode 100644 index 8528e8766..000000000 --- a/libs/html5lib/tests/tokenizertotree.py +++ /dev/null @@ -1,69 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -import sys -import os -import json -import re - -import html5lib -from . import support -from . import test_tokenizer - -p = html5lib.HTMLParser() - -unnamespaceExpected = re.compile(r"^(\|\s*)<html ([^>]+)>", re.M).sub - - -def main(out_path): - if not os.path.exists(out_path): - sys.stderr.write("Path %s does not exist" % out_path) - sys.exit(1) - - for filename in support.get_data_files('tokenizer', '*.test'): - run_file(filename, out_path) - - -def run_file(filename, out_path): - try: - tests_data = json.load(open(filename, "r")) - except ValueError: - sys.stderr.write("Failed to load %s\n" % filename) - return - name = os.path.splitext(os.path.split(filename)[1])[0] - output_file = open(os.path.join(out_path, "tokenizer_%s.dat" % name), "w") - - if 'tests' in tests_data: - for test_data in tests_data['tests']: - if 'initialStates' not in test_data: - test_data["initialStates"] = ["Data state"] - - for initial_state in test_data["initialStates"]: - if initial_state != "Data state": - # don't support this yet - continue - test = make_test(test_data) - output_file.write(test) - - output_file.close() - - -def make_test(test_data): - if 'doubleEscaped' in test_data: - test_data = test_tokenizer.unescape_test(test_data) - - rv = [] - rv.append("#data") - rv.append(test_data["input"].encode("utf8")) - rv.append("#errors") - tree = p.parse(test_data["input"]) - output = p.tree.testSerializer(tree) - output = "\n".join(("| " + line[3:]) if line.startswith("| ") else line - for line in output.split("\n")) - output = unnamespaceExpected(r"\1<\2>", output) - rv.append(output.encode("utf8")) - rv.append("") - return "\n".join(rv) - - -if __name__ == "__main__": - main(sys.argv[1]) diff --git a/libs/html5lib/tests/tree_construction.py b/libs/html5lib/tests/tree_construction.py deleted file mode 100644 index 1ef6e7250..000000000 --- a/libs/html5lib/tests/tree_construction.py +++ /dev/null @@ -1,205 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -import itertools -import re -import warnings -from difflib import unified_diff - -import pytest - -from .support import TestData, convert, convertExpected, treeTypes -from html5lib import html5parser, constants, treewalkers -from html5lib.filters.lint import Filter as Lint - -_attrlist_re = re.compile(r"^(\s+)\w+=.*(\n\1\w+=.*)+", re.M) - - -def sortattrs(s): - def replace(m): - lines = m.group(0).split("\n") - lines.sort() - return "\n".join(lines) - return _attrlist_re.sub(replace, s) - - -class TreeConstructionFile(pytest.File): - def collect(self): - tests = TestData(str(self.fspath), "data") - for i, test in enumerate(tests): - yield TreeConstructionTest(str(i), self, testdata=test) - - -class TreeConstructionTest(pytest.Collector): - def __init__(self, name, parent=None, config=None, session=None, testdata=None): - super(TreeConstructionTest, self).__init__(name, parent, config, session) - self.testdata = testdata - - def collect(self): - for treeName, treeAPIs in sorted(treeTypes.items()): - for x in itertools.chain(self._getParserTests(treeName, treeAPIs), - self._getTreeWalkerTests(treeName, treeAPIs)): - yield x - - def _getParserTests(self, treeName, treeAPIs): - if treeAPIs is not None and "adapter" in treeAPIs: - return - for namespaceHTMLElements in (True, False): - if namespaceHTMLElements: - nodeid = "%s::parser::namespaced" % treeName - else: - nodeid = "%s::parser::void-namespace" % treeName - item = ParserTest(nodeid, - self, - self.testdata, - treeAPIs["builder"] if treeAPIs is not None else None, - namespaceHTMLElements) - item.add_marker(getattr(pytest.mark, treeName)) - item.add_marker(pytest.mark.parser) - if namespaceHTMLElements: - item.add_marker(pytest.mark.namespaced) - yield item - - def _getTreeWalkerTests(self, treeName, treeAPIs): - nodeid = "%s::treewalker" % treeName - item = TreeWalkerTest(nodeid, - self, - self.testdata, - treeAPIs) - item.add_marker(getattr(pytest.mark, treeName)) - item.add_marker(pytest.mark.treewalker) - yield item - - -def convertTreeDump(data): - return "\n".join(convert(3)(data).split("\n")[1:]) - - -namespaceExpected = re.compile(r"^(\s*)<(\S+)>", re.M).sub - - -class ParserTest(pytest.Item): - def __init__(self, name, parent, test, treeClass, namespaceHTMLElements): - super(ParserTest, self).__init__(name, parent) - self.test = test - self.treeClass = treeClass - self.namespaceHTMLElements = namespaceHTMLElements - - def runtest(self): - if self.treeClass is None: - pytest.skip("Treebuilder not loaded") - - p = html5parser.HTMLParser(tree=self.treeClass, - namespaceHTMLElements=self.namespaceHTMLElements) - - input = self.test['data'] - fragmentContainer = self.test['document-fragment'] - expected = convertExpected(self.test['document']) - expectedErrors = self.test['errors'].split("\n") if self.test['errors'] else [] - - scripting = False - if 'script-on' in self.test: - scripting = True - - with warnings.catch_warnings(): - warnings.simplefilter("error") - try: - if fragmentContainer: - document = p.parseFragment(input, fragmentContainer, scripting=scripting) - else: - document = p.parse(input, scripting=scripting) - except constants.DataLossWarning: - pytest.skip("data loss warning") - - output = convertTreeDump(p.tree.testSerializer(document)) - - expected = expected - if self.namespaceHTMLElements: - expected = namespaceExpected(r"\1<html \2>", expected) - - errorMsg = "\n".join(["\n\nInput:", input, "\nExpected:", expected, - "\nReceived:", output]) - assert expected == output, errorMsg - - errStr = [] - for (line, col), errorcode, datavars in p.errors: - assert isinstance(datavars, dict), "%s, %s" % (errorcode, repr(datavars)) - errStr.append("Line: %i Col: %i %s" % (line, col, - constants.E[errorcode] % datavars)) - - errorMsg2 = "\n".join(["\n\nInput:", input, - "\nExpected errors (" + str(len(expectedErrors)) + "):\n" + "\n".join(expectedErrors), - "\nActual errors (" + str(len(p.errors)) + "):\n" + "\n".join(errStr)]) - if False: # we're currently not testing parse errors - assert len(p.errors) == len(expectedErrors), errorMsg2 - - def repr_failure(self, excinfo): - traceback = excinfo.traceback - ntraceback = traceback.cut(path=__file__) - excinfo.traceback = ntraceback.filter() - - return excinfo.getrepr(funcargs=True, - showlocals=False, - style="short", tbfilter=False) - - -class TreeWalkerTest(pytest.Item): - def __init__(self, name, parent, test, treeAPIs): - super(TreeWalkerTest, self).__init__(name, parent) - self.test = test - self.treeAPIs = treeAPIs - - def runtest(self): - if self.treeAPIs is None: - pytest.skip("Treebuilder not loaded") - - p = html5parser.HTMLParser(tree=self.treeAPIs["builder"]) - - input = self.test['data'] - fragmentContainer = self.test['document-fragment'] - expected = convertExpected(self.test['document']) - - scripting = False - if 'script-on' in self.test: - scripting = True - - with warnings.catch_warnings(): - warnings.simplefilter("error") - try: - if fragmentContainer: - document = p.parseFragment(input, fragmentContainer, scripting=scripting) - else: - document = p.parse(input, scripting=scripting) - except constants.DataLossWarning: - pytest.skip("data loss warning") - - poutput = convertTreeDump(p.tree.testSerializer(document)) - namespace_expected = namespaceExpected(r"\1<html \2>", expected) - if poutput != namespace_expected: - pytest.skip("parser output incorrect") - - document = self.treeAPIs.get("adapter", lambda x: x)(document) - - try: - output = treewalkers.pprint(Lint(self.treeAPIs["walker"](document))) - output = sortattrs(output) - expected = sortattrs(expected) - diff = "".join(unified_diff([line + "\n" for line in expected.splitlines()], - [line + "\n" for line in output.splitlines()], - "Expected", "Received")) - assert expected == output, "\n".join([ - "", "Input:", input, - "", "Expected:", expected, - "", "Received:", output, - "", "Diff:", diff, - ]) - except NotImplementedError: - pytest.skip("tree walker NotImplementedError") - - def repr_failure(self, excinfo): - traceback = excinfo.traceback - ntraceback = traceback.cut(path=__file__) - excinfo.traceback = ntraceback.filter() - - return excinfo.getrepr(funcargs=True, - showlocals=False, - style="short", tbfilter=False) diff --git a/libs/html5lib/tests/us-ascii.html b/libs/html5lib/tests/us-ascii.html deleted file mode 100644 index bf8fb5761..000000000 --- a/libs/html5lib/tests/us-ascii.html +++ /dev/null @@ -1,3 +0,0 @@ -<!doctype html> -<title>Test</title> -<p>Hello World!
\ No newline at end of file diff --git a/libs/html5lib/tests/utf-8-bom.html b/libs/html5lib/tests/utf-8-bom.html deleted file mode 100644 index 0f03b8da6..000000000 --- a/libs/html5lib/tests/utf-8-bom.html +++ /dev/null @@ -1,3 +0,0 @@ -<!doctype html> -<title>Test</title> -<p>Hello World! ©
\ No newline at end of file |