diff --git a/.pytest.expect b/.pytest.expect index 0fa326f0..1b3705a7 100644 --- a/.pytest.expect +++ b/.pytest.expect @@ -1,7 +1,7 @@ pytest-expect file v1 -(2, 7, 11, 'final', 0) -b'html5lib/tests/test_encoding.py::test_encoding::[110]': FAIL -b'html5lib/tests/test_encoding.py::test_encoding::[111]': FAIL +(2, 7, 18, 'final', 0) +b'html5lib/tests/test_encoding.py::test_parser_encoding[\\n-iso-8859-2]': FAIL +b'html5lib/tests/test_encoding.py::test_prescan_encoding[\\n-iso-8859-2]': FAIL u'html5lib/tests/testdata/tokenizer/test2.test::0::dataState': FAIL u'html5lib/tests/testdata/tokenizer/test3.test::228::dataState': FAIL u'html5lib/tests/testdata/tokenizer/test3.test::231::dataState': FAIL diff --git a/html5lib/tests/test_encoding.py b/html5lib/tests/test_encoding.py index 9a411c77..47c4814a 100644 --- a/html5lib/tests/test_encoding.py +++ b/html5lib/tests/test_encoding.py @@ -75,7 +75,15 @@ def test_parser_args_raises(kwargs): assert exc_info.value.args[0].startswith("Cannot set an encoding with a unicode input") -def runParserEncodingTest(data, encoding): +def param_encoding(): + for filename in get_data_files("encoding"): + tests = _TestData(filename, b"data", encoding=None) + for test in tests: + yield test[b'data'], test[b'encoding'] + + +@pytest.mark.parametrize("data, encoding", param_encoding()) +def test_parser_encoding(data, encoding): p = HTMLParser() assert p.documentEncoding is None p.parse(data, useChardet=False) @@ -84,7 +92,8 @@ def runParserEncodingTest(data, encoding): assert encoding == p.documentEncoding, errorMessage(data, encoding, p.documentEncoding) -def runPreScanEncodingTest(data, encoding): +@pytest.mark.parametrize("data, encoding", param_encoding()) +def test_prescan_encoding(data, encoding): stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False) encoding = encoding.lower().decode("ascii") @@ -95,14 +104,6 @@ def runPreScanEncodingTest(data, encoding): assert encoding == stream.charEncoding[0].name, errorMessage(data, encoding, stream.charEncoding[0].name) -def test_encoding(): - for filename in get_data_files("encoding"): - tests = _TestData(filename, b"data", encoding=None) - for test in tests: - yield (runParserEncodingTest, test[b'data'], test[b'encoding']) - yield (runPreScanEncodingTest, test[b'data'], test[b'encoding']) - - # pylint:disable=wrong-import-position try: import chardet # noqa diff --git a/html5lib/tests/test_sanitizer.py b/html5lib/tests/test_sanitizer.py index 45046d57..9a8e7f2d 100644 --- a/html5lib/tests/test_sanitizer.py +++ b/html5lib/tests/test_sanitizer.py @@ -1,21 +1,11 @@ from __future__ import absolute_import, division, unicode_literals +import pytest + from html5lib import constants, parseFragment, serialize from html5lib.filters import sanitizer -def runSanitizerTest(_, expected, input): - parsed = parseFragment(expected) - expected = serialize(parsed, - omit_optional_tags=False, - use_trailing_solidus=True, - space_before_trailing_solidus=False, - quote_attr_values="always", - quote_char='"', - alphabetical_attributes=True) - assert expected == sanitize_html(input) - - def sanitize_html(stream): parsed = parseFragment(stream) serialized = serialize(parsed, @@ -59,7 +49,7 @@ def test_data_uri_disallowed_type(): assert expected == sanitized -def test_sanitizer(): +def param_sanitizer(): for ns, tag_name in sanitizer.allowed_elements: if ns != constants.namespaces["html"]: continue @@ -67,19 +57,19 @@ def test_sanitizer(): 'tfoot', 'th', 'thead', 'tr', 'select']: continue # TODO if tag_name == 'image': - yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name, + yield ("test_should_allow_%s_tag" % tag_name, "foo <bad>bar</bad> baz", "<%s title='1'>foo bar baz" % (tag_name, tag_name)) elif tag_name == 'br': - yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name, + yield ("test_should_allow_%s_tag" % tag_name, "
foo <bad>bar</bad> baz
", "<%s title='1'>foo bar baz" % (tag_name, tag_name)) elif tag_name in constants.voidElements: - yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name, + yield ("test_should_allow_%s_tag" % tag_name, "<%s title=\"1\"/>foo <bad>bar</bad> baz" % tag_name, "<%s title='1'>foo bar baz" % (tag_name, tag_name)) else: - yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name, + yield ("test_should_allow_%s_tag" % tag_name, "<%s title=\"1\">foo <bad>bar</bad> baz" % (tag_name, tag_name), "<%s title='1'>foo bar baz" % (tag_name, tag_name)) @@ -93,7 +83,7 @@ def test_sanitizer(): attribute_value = 'foo' if attribute_name in sanitizer.attr_val_is_uri: attribute_value = '%s://sub.domain.tld/path/object.ext' % sanitizer.allowed_protocols[0] - yield (runSanitizerTest, "test_should_allow_%s_attribute" % attribute_name, + yield ("test_should_allow_%s_attribute" % attribute_name, "

foo <bad>bar</bad> baz

" % (attribute_name, attribute_value), "

foo bar baz

" % (attribute_name, attribute_value)) @@ -101,7 +91,7 @@ def test_sanitizer(): rest_of_uri = '//sub.domain.tld/path/object.ext' if protocol == 'data': rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ=' - yield (runSanitizerTest, "test_should_allow_uppercase_%s_uris" % protocol, + yield ("test_should_allow_uppercase_%s_uris" % protocol, "foo" % (protocol, rest_of_uri), """foo""" % (protocol, rest_of_uri)) @@ -110,11 +100,26 @@ def test_sanitizer(): if protocol == 'data': rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ=' protocol = protocol.upper() - yield (runSanitizerTest, "test_should_allow_uppercase_%s_uris" % protocol, + yield ("test_should_allow_uppercase_%s_uris" % protocol, "foo" % (protocol, rest_of_uri), """foo""" % (protocol, rest_of_uri)) +@pytest.mark.parametrize("expected, input", + (pytest.param(expected, input, id=id) + for id, expected, input in param_sanitizer())) +def test_sanitizer(expected, input): + parsed = parseFragment(expected) + expected = serialize(parsed, + omit_optional_tags=False, + use_trailing_solidus=True, + space_before_trailing_solidus=False, + quote_attr_values="always", + quote_char='"', + alphabetical_attributes=True) + assert expected == sanitize_html(input) + + def test_lowercase_color_codes_in_style(): sanitized = sanitize_html("

") expected = '

' diff --git a/html5lib/tests/test_serializer.py b/html5lib/tests/test_serializer.py index c23592af..bce62459 100644 --- a/html5lib/tests/test_serializer.py +++ b/html5lib/tests/test_serializer.py @@ -89,19 +89,6 @@ def serialize_html(input, options): return serializer.render(stream, encoding) -def runSerializerTest(input, expected, options): - encoding = options.get("encoding", None) - - if encoding: - expected = list(map(lambda x: x.encode(encoding), expected)) - - result = serialize_html(input, options) - if len(expected) == 1: - assert expected[0] == result, "Expected:\n%s\nActual:\n%s\nOptions:\n%s" % (expected[0], result, str(options)) - elif result not in expected: - assert False, "Expected: %s, Received: %s" % (expected, result) - - def throwsWithLatin1(input): with pytest.raises(UnicodeEncodeError): serialize_html(input, {"encoding": "iso-8859-1"}) @@ -120,13 +107,13 @@ def testDoctypeSystemId(): def testCdataCharacters(): - runSerializerTest([["StartTag", "http://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\u0101"]], - ["