Skip to content

Commit

Permalink
Move to pytest4/5
Browse files Browse the repository at this point in the history
This largely involves moving away from using generators as tests
  • Loading branch information
gsnedders committed Jun 7, 2020
1 parent 5cd73ef commit 93c3555
Show file tree
Hide file tree
Showing 7 changed files with 98 additions and 119 deletions.
6 changes: 3 additions & 3 deletions .pytest.expect
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
pytest-expect file v1
(2, 7, 11, 'final', 0)
b'html5lib/tests/test_encoding.py::test_encoding::[110]': FAIL
b'html5lib/tests/test_encoding.py::test_encoding::[111]': FAIL
(2, 7, 18, 'final', 0)
b'html5lib/tests/test_encoding.py::test_parser_encoding[<!DOCTYPE HTML>\\n<script>document.write(\'<meta charset="ISO-8859-\' + \'2">\')</script>-iso-8859-2]': FAIL
b'html5lib/tests/test_encoding.py::test_prescan_encoding[<!DOCTYPE HTML>\\n<script>document.write(\'<meta charset="ISO-8859-\' + \'2">\')</script>-iso-8859-2]': FAIL
u'html5lib/tests/testdata/tokenizer/test2.test::0::dataState': FAIL
u'html5lib/tests/testdata/tokenizer/test3.test::228::dataState': FAIL
u'html5lib/tests/testdata/tokenizer/test3.test::231::dataState': FAIL
Expand Down
21 changes: 11 additions & 10 deletions html5lib/tests/test_encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,15 @@ def test_parser_args_raises(kwargs):
assert exc_info.value.args[0].startswith("Cannot set an encoding with a unicode input")


def runParserEncodingTest(data, encoding):
def param_encoding():
for filename in get_data_files("encoding"):
tests = _TestData(filename, b"data", encoding=None)
for test in tests:
yield test[b'data'], test[b'encoding']


@pytest.mark.parametrize("data, encoding", param_encoding())
def test_parser_encoding(data, encoding):
p = HTMLParser()
assert p.documentEncoding is None
p.parse(data, useChardet=False)
Expand All @@ -84,7 +92,8 @@ def runParserEncodingTest(data, encoding):
assert encoding == p.documentEncoding, errorMessage(data, encoding, p.documentEncoding)


def runPreScanEncodingTest(data, encoding):
@pytest.mark.parametrize("data, encoding", param_encoding())
def test_prescan_encoding(data, encoding):
stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False)
encoding = encoding.lower().decode("ascii")

Expand All @@ -95,14 +104,6 @@ def runPreScanEncodingTest(data, encoding):
assert encoding == stream.charEncoding[0].name, errorMessage(data, encoding, stream.charEncoding[0].name)


def test_encoding():
for filename in get_data_files("encoding"):
tests = _TestData(filename, b"data", encoding=None)
for test in tests:
yield (runParserEncodingTest, test[b'data'], test[b'encoding'])
yield (runPreScanEncodingTest, test[b'data'], test[b'encoding'])


# pylint:disable=wrong-import-position
try:
import chardet # noqa
Expand Down
45 changes: 25 additions & 20 deletions html5lib/tests/test_sanitizer.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,11 @@
from __future__ import absolute_import, division, unicode_literals

import pytest

from html5lib import constants, parseFragment, serialize
from html5lib.filters import sanitizer


def runSanitizerTest(_, expected, input):
parsed = parseFragment(expected)
expected = serialize(parsed,
omit_optional_tags=False,
use_trailing_solidus=True,
space_before_trailing_solidus=False,
quote_attr_values="always",
quote_char='"',
alphabetical_attributes=True)
assert expected == sanitize_html(input)


def sanitize_html(stream):
parsed = parseFragment(stream)
serialized = serialize(parsed,
Expand Down Expand Up @@ -59,27 +49,27 @@ def test_data_uri_disallowed_type():
assert expected == sanitized


def test_sanitizer():
def param_sanitizer():
for ns, tag_name in sanitizer.allowed_elements:
if ns != constants.namespaces["html"]:
continue
if tag_name in ['caption', 'col', 'colgroup', 'optgroup', 'option', 'table', 'tbody', 'td',
'tfoot', 'th', 'thead', 'tr', 'select']:
continue # TODO
if tag_name == 'image':
yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
yield ("test_should_allow_%s_tag" % tag_name,
"<img title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz",
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
elif tag_name == 'br':
yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
yield ("test_should_allow_%s_tag" % tag_name,
"<br title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz<br/>",
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
elif tag_name in constants.voidElements:
yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
yield ("test_should_allow_%s_tag" % tag_name,
"<%s title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz" % tag_name,
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
else:
yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
yield ("test_should_allow_%s_tag" % tag_name,
"<%s title=\"1\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</%s>" % (tag_name, tag_name),
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))

Expand All @@ -93,15 +83,15 @@ def test_sanitizer():
attribute_value = 'foo'
if attribute_name in sanitizer.attr_val_is_uri:
attribute_value = '%s:https://sub.domain.tld/path/object.ext' % sanitizer.allowed_protocols[0]
yield (runSanitizerTest, "test_should_allow_%s_attribute" % attribute_name,
yield ("test_should_allow_%s_attribute" % attribute_name,
"<p %s=\"%s\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>" % (attribute_name, attribute_value),
"<p %s='%s'>foo <bad>bar</bad> baz</p>" % (attribute_name, attribute_value))

for protocol in sanitizer.allowed_protocols:
rest_of_uri = '//sub.domain.tld/path/object.ext'
if protocol == 'data':
rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ='
yield (runSanitizerTest, "test_should_allow_uppercase_%s_uris" % protocol,
yield ("test_should_allow_uppercase_%s_uris" % protocol,
"<img src=\"%s:%s\">foo</a>" % (protocol, rest_of_uri),
"""<img src="%s:%s">foo</a>""" % (protocol, rest_of_uri))

Expand All @@ -110,11 +100,26 @@ def test_sanitizer():
if protocol == 'data':
rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ='
protocol = protocol.upper()
yield (runSanitizerTest, "test_should_allow_uppercase_%s_uris" % protocol,
yield ("test_should_allow_uppercase_%s_uris" % protocol,
"<img src=\"%s:%s\">foo</a>" % (protocol, rest_of_uri),
"""<img src="%s:%s">foo</a>""" % (protocol, rest_of_uri))


@pytest.mark.parametrize("expected, input",
(pytest.param(expected, input, id=id)
for id, expected, input in param_sanitizer()))
def test_sanitizer(expected, input):
parsed = parseFragment(expected)
expected = serialize(parsed,
omit_optional_tags=False,
use_trailing_solidus=True,
space_before_trailing_solidus=False,
quote_attr_values="always",
quote_char='"',
alphabetical_attributes=True)
assert expected == sanitize_html(input)


def test_lowercase_color_codes_in_style():
sanitized = sanitize_html("<p style=\"border: 1px solid #a2a2a2;\"></p>")
expected = '<p style=\"border: 1px solid #a2a2a2;\"></p>'
Expand Down
49 changes: 25 additions & 24 deletions html5lib/tests/test_serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,19 +89,6 @@ def serialize_html(input, options):
return serializer.render(stream, encoding)


def runSerializerTest(input, expected, options):
encoding = options.get("encoding", None)

if encoding:
expected = list(map(lambda x: x.encode(encoding), expected))

result = serialize_html(input, options)
if len(expected) == 1:
assert expected[0] == result, "Expected:\n%s\nActual:\n%s\nOptions:\n%s" % (expected[0], result, str(options))
elif result not in expected:
assert False, "Expected: %s, Received: %s" % (expected, result)


def throwsWithLatin1(input):
with pytest.raises(UnicodeEncodeError):
serialize_html(input, {"encoding": "iso-8859-1"})
Expand All @@ -120,13 +107,13 @@ def testDoctypeSystemId():


def testCdataCharacters():
runSerializerTest([["StartTag", "http:https://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\u0101"]],
["<style>&amacr;"], {"encoding": "iso-8859-1"})
test_serializer([["StartTag", "http:https://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\u0101"]],
["<style>&amacr;"], {"encoding": "iso-8859-1"})


def testCharacters():
runSerializerTest([["Characters", "\u0101"]],
["&amacr;"], {"encoding": "iso-8859-1"})
test_serializer([["Characters", "\u0101"]],
["&amacr;"], {"encoding": "iso-8859-1"})


def testStartTagName():
Expand All @@ -138,9 +125,9 @@ def testAttributeName():


def testAttributeValue():
runSerializerTest([["StartTag", "http:https://www.w3.org/1999/xhtml", "span",
[{"namespace": None, "name": "potato", "value": "\u0101"}]]],
["<span potato=&amacr;>"], {"encoding": "iso-8859-1"})
test_serializer([["StartTag", "http:https://www.w3.org/1999/xhtml", "span",
[{"namespace": None, "name": "potato", "value": "\u0101"}]]],
["<span potato=&amacr;>"], {"encoding": "iso-8859-1"})


def testEndTagName():
Expand All @@ -165,7 +152,7 @@ def testSpecQuoteAttribute(c):
else:
output_ = ['<span foo="%s">' % c]
options_ = {"quote_attr_values": "spec"}
runSerializerTest(input_, output_, options_)
test_serializer(input_, output_, options_)


@pytest.mark.parametrize("c", list("\t\n\u000C\x20\r\"'=<>`"
Expand All @@ -184,7 +171,7 @@ def testLegacyQuoteAttribute(c):
else:
output_ = ['<span foo="%s">' % c]
options_ = {"quote_attr_values": "legacy"}
runSerializerTest(input_, output_, options_)
test_serializer(input_, output_, options_)


@pytest.fixture
Expand Down Expand Up @@ -217,9 +204,23 @@ def testEntityNoResolve(lxml_parser):
assert result == '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>'


def test_serializer():
def param_serializer():
for filename in get_data_files('serializer-testdata', '*.test', os.path.dirname(__file__)):
with open(filename) as fp:
tests = json.load(fp)
for test in tests['tests']:
yield runSerializerTest, test["input"], test["expected"], test.get("options", {})
yield test["input"], test["expected"], test.get("options", {})


@pytest.mark.parametrize("input, expected, options", param_serializer())
def test_serializer(input, expected, options):
encoding = options.get("encoding", None)

if encoding:
expected = list(map(lambda x: x.encode(encoding), expected))

result = serialize_html(input, options)
if len(expected) == 1:
assert expected[0] == result, "Expected:\n%s\nActual:\n%s\nOptions:\n%s" % (expected[0], result, str(options))
elif result not in expected:
assert False, "Expected: %s, Received: %s" % (expected, result)
39 changes: 20 additions & 19 deletions html5lib/tests/test_treewalkers.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,24 +61,7 @@ def set_attribute_on_first_child(docfrag, name, value, treeName):
setter['ElementTree'](docfrag)(name, value)


def runTreewalkerEditTest(intext, expected, attrs_to_add, tree):
"""tests what happens when we add attributes to the intext"""
treeName, treeClass = tree
if treeClass is None:
pytest.skip("Treebuilder not loaded")
parser = html5parser.HTMLParser(tree=treeClass["builder"])
document = parser.parseFragment(intext)
for nom, val in attrs_to_add:
set_attribute_on_first_child(document, nom, val, treeName)

document = treeClass.get("adapter", lambda x: x)(document)
output = treewalkers.pprint(treeClass["walker"](document))
output = attrlist.sub(sortattrs, output)
if output not in expected:
raise AssertionError("TreewalkerEditTest: %s\nExpected:\n%s\nReceived:\n%s" % (treeName, expected, output))


def test_treewalker_six_mix():
def param_treewalker_six_mix():
"""Str/Unicode mix. If str attrs added to tree"""

# On Python 2.x string literals are of type str. Unless, like this
Expand All @@ -99,7 +82,25 @@ def test_treewalker_six_mix():

for tree in sorted(treeTypes.items()):
for intext, attrs, expected in sm_tests:
yield runTreewalkerEditTest, intext, expected, attrs, tree
yield intext, expected, attrs, tree


@pytest.mark.parametrize("intext, expected, attrs_to_add, tree", param_treewalker_six_mix())
def test_treewalker_six_mix(intext, expected, attrs_to_add, tree):
"""tests what happens when we add attributes to the intext"""
treeName, treeClass = tree
if treeClass is None:
pytest.skip("Treebuilder not loaded")
parser = html5parser.HTMLParser(tree=treeClass["builder"])
document = parser.parseFragment(intext)
for nom, val in attrs_to_add:
set_attribute_on_first_child(document, nom, val, treeName)

document = treeClass.get("adapter", lambda x: x)(document)
output = treewalkers.pprint(treeClass["walker"](document))
output = attrlist.sub(sortattrs, output)
if output not in expected:
raise AssertionError("TreewalkerEditTest: %s\nExpected:\n%s\nReceived:\n%s" % (treeName, expected, output))


@pytest.mark.parametrize("tree,char", itertools.product(sorted(treeTypes.items()), ["x", "\u1234"]))
Expand Down
12 changes: 6 additions & 6 deletions html5lib/tests/tree_construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,6 @@ def _getParserTests(self, treeName, treeAPIs):
item.add_marker(pytest.mark.parser)
if namespaceHTMLElements:
item.add_marker(pytest.mark.namespaced)
if treeAPIs is None:
item.add_marker(pytest.mark.skipif(True, reason="Treebuilder not loaded"))
yield item

def _getTreeWalkerTests(self, treeName, treeAPIs):
Expand All @@ -69,8 +67,6 @@ def _getTreeWalkerTests(self, treeName, treeAPIs):
treeAPIs)
item.add_marker(getattr(pytest.mark, treeName))
item.add_marker(pytest.mark.treewalker)
if treeAPIs is None:
item.add_marker(pytest.mark.skipif(True, reason="Treebuilder not loaded"))
yield item


Expand All @@ -84,12 +80,14 @@ def convertTreeDump(data):
class ParserTest(pytest.Item):
def __init__(self, name, parent, test, treeClass, namespaceHTMLElements):
super(ParserTest, self).__init__(name, parent)
self.obj = lambda: 1 # this is to hack around skipif needing a function!
self.test = test
self.treeClass = treeClass
self.namespaceHTMLElements = namespaceHTMLElements

def runtest(self):
if self.treeClass is None:
pytest.skip("Treebuilder not loaded")

p = html5parser.HTMLParser(tree=self.treeClass,
namespaceHTMLElements=self.namespaceHTMLElements)

Expand Down Expand Up @@ -147,11 +145,13 @@ def repr_failure(self, excinfo):
class TreeWalkerTest(pytest.Item):
def __init__(self, name, parent, test, treeAPIs):
super(TreeWalkerTest, self).__init__(name, parent)
self.obj = lambda: 1 # this is to hack around skipif needing a function!
self.test = test
self.treeAPIs = treeAPIs

def runtest(self):
if self.treeAPIs is None:
pytest.skip("Treebuilder not loaded")

p = html5parser.HTMLParser(tree=self.treeAPIs["builder"])

input = self.test['data']
Expand Down
Loading

0 comments on commit 93c3555

Please sign in to comment.