Move to pytest4/5

This largely involves moving away from using generators as tests
yanorepuser4 · Jun 7, 2020 · 93c3555 · 93c3555
1 parent 5cd73ef
commit 93c3555
Show file tree

Hide file tree

Showing 7 changed files with 98 additions and 119 deletions.
diff --git a/.pytest.expect b/.pytest.expect
@@ -1,7 +1,7 @@
 pytest-expect file v1
-(2, 7, 11, 'final', 0)
-b'html5lib/tests/test_encoding.py::test_encoding::[110]': FAIL
-b'html5lib/tests/test_encoding.py::test_encoding::[111]': FAIL
+(2, 7, 18, 'final', 0)
+b'html5lib/tests/test_encoding.py::test_parser_encoding[<!DOCTYPE HTML>\\n<script>document.write(\'<meta charset="ISO-8859-\' + \'2">\')</script>-iso-8859-2]': FAIL
+b'html5lib/tests/test_encoding.py::test_prescan_encoding[<!DOCTYPE HTML>\\n<script>document.write(\'<meta charset="ISO-8859-\' + \'2">\')</script>-iso-8859-2]': FAIL
 u'html5lib/tests/testdata/tokenizer/test2.test::0::dataState': FAIL
 u'html5lib/tests/testdata/tokenizer/test3.test::228::dataState': FAIL
 u'html5lib/tests/testdata/tokenizer/test3.test::231::dataState': FAIL

diff --git a/html5lib/tests/test_encoding.py b/html5lib/tests/test_encoding.py
@@ -75,7 +75,15 @@ def test_parser_args_raises(kwargs):
  assert exc_info.value.args[0].startswith("Cannot set an encoding with a unicode input")
 
 
-def runParserEncodingTest(data, encoding):
+def param_encoding():
+ for filename in get_data_files("encoding"):
+ tests = _TestData(filename, b"data", encoding=None)
+ for test in tests:
+ yield test[b'data'], test[b'encoding']
+
+
+@pytest.mark.parametrize("data, encoding", param_encoding())
+def test_parser_encoding(data, encoding):
  p = HTMLParser()
  assert p.documentEncoding is None
  p.parse(data, useChardet=False)
@@ -84,7 +92,8 @@ def runParserEncodingTest(data, encoding):
  assert encoding == p.documentEncoding, errorMessage(data, encoding, p.documentEncoding)
 
 
-def runPreScanEncodingTest(data, encoding):
+@pytest.mark.parametrize("data, encoding", param_encoding())
+def test_prescan_encoding(data, encoding):
  stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False)
  encoding = encoding.lower().decode("ascii")
 
@@ -95,14 +104,6 @@ def runPreScanEncodingTest(data, encoding):
  assert encoding == stream.charEncoding[0].name, errorMessage(data, encoding, stream.charEncoding[0].name)
 
 
-def test_encoding():
- for filename in get_data_files("encoding"):
- tests = _TestData(filename, b"data", encoding=None)
- for test in tests:
- yield (runParserEncodingTest, test[b'data'], test[b'encoding'])
- yield (runPreScanEncodingTest, test[b'data'], test[b'encoding'])
-
-
 # pylint:disable=wrong-import-position
 try:
  import chardet # noqa

diff --git a/html5lib/tests/test_sanitizer.py b/html5lib/tests/test_sanitizer.py
@@ -1,21 +1,11 @@
 from __future__ import absolute_import, division, unicode_literals
 
+import pytest
+
 from html5lib import constants, parseFragment, serialize
 from html5lib.filters import sanitizer
 
 
-def runSanitizerTest(_, expected, input):
- parsed = parseFragment(expected)
- expected = serialize(parsed,
- omit_optional_tags=False,
- use_trailing_solidus=True,
- space_before_trailing_solidus=False,
- quote_attr_values="always",
- quote_char='"',
- alphabetical_attributes=True)
- assert expected == sanitize_html(input)
-
-
 def sanitize_html(stream):
  parsed = parseFragment(stream)
  serialized = serialize(parsed,
@@ -59,27 +49,27 @@ def test_data_uri_disallowed_type():
  assert expected == sanitized
 
 
-def test_sanitizer():
+def param_sanitizer():
  for ns, tag_name in sanitizer.allowed_elements:
  if ns != constants.namespaces["html"]:
  continue
  if tag_name in ['caption', 'col', 'colgroup', 'optgroup', 'option', 'table', 'tbody', 'td',
  'tfoot', 'th', 'thead', 'tr', 'select']:
  continue # TODO
  if tag_name == 'image':
- yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
+ yield ("test_should_allow_%s_tag" % tag_name,
  "<img title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz",
  "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
  elif tag_name == 'br':
- yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
+ yield ("test_should_allow_%s_tag" % tag_name,
  "<br title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz<br/>",
  "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
  elif tag_name in constants.voidElements:
- yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
+ yield ("test_should_allow_%s_tag" % tag_name,
  "<%s title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz" % tag_name,
  "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
  else:
- yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
+ yield ("test_should_allow_%s_tag" % tag_name,
  "<%s title=\"1\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</%s>" % (tag_name, tag_name),
  "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
 
@@ -93,15 +83,15 @@ def test_sanitizer():
  attribute_value = 'foo'
  if attribute_name in sanitizer.attr_val_is_uri:
  attribute_value = '%s:https://sub.domain.tld/path/object.ext' % sanitizer.allowed_protocols[0]
- yield (runSanitizerTest, "test_should_allow_%s_attribute" % attribute_name,
+ yield ("test_should_allow_%s_attribute" % attribute_name,
  "<p %s=\"%s\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>" % (attribute_name, attribute_value),
  "<p %s='%s'>foo <bad>bar</bad> baz</p>" % (attribute_name, attribute_value))
 
  for protocol in sanitizer.allowed_protocols:
  rest_of_uri = '//sub.domain.tld/path/object.ext'
  if protocol == 'data':
  rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ='
- yield (runSanitizerTest, "test_should_allow_uppercase_%s_uris" % protocol,
+ yield ("test_should_allow_uppercase_%s_uris" % protocol,
  "<img src=\"%s:%s\">foo</a>" % (protocol, rest_of_uri),
  """<img src="%s:%s">foo</a>""" % (protocol, rest_of_uri))
 
@@ -110,11 +100,26 @@ def test_sanitizer():
  if protocol == 'data':
  rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ='
  protocol = protocol.upper()
- yield (runSanitizerTest, "test_should_allow_uppercase_%s_uris" % protocol,
+ yield ("test_should_allow_uppercase_%s_uris" % protocol,
  "<img src=\"%s:%s\">foo</a>" % (protocol, rest_of_uri),
  """<img src="%s:%s">foo</a>""" % (protocol, rest_of_uri))
 
 
+@pytest.mark.parametrize("expected, input",
+ (pytest.param(expected, input, id=id)
+ for id, expected, input in param_sanitizer()))
+def test_sanitizer(expected, input):
+ parsed = parseFragment(expected)
+ expected = serialize(parsed,
+ omit_optional_tags=False,
+ use_trailing_solidus=True,
+ space_before_trailing_solidus=False,
+ quote_attr_values="always",
+ quote_char='"',
+ alphabetical_attributes=True)
+ assert expected == sanitize_html(input)
+
+
 def test_lowercase_color_codes_in_style():
  sanitized = sanitize_html("<p style=\"border: 1px solid #a2a2a2;\"></p>")
  expected = '<p style=\"border: 1px solid #a2a2a2;\"></p>'

diff --git a/html5lib/tests/test_serializer.py b/html5lib/tests/test_serializer.py
@@ -89,19 +89,6 @@ def serialize_html(input, options):
  return serializer.render(stream, encoding)
 
 
-def runSerializerTest(input, expected, options):
- encoding = options.get("encoding", None)
-
- if encoding:
- expected = list(map(lambda x: x.encode(encoding), expected))
-
- result = serialize_html(input, options)
- if len(expected) == 1:
- assert expected[0] == result, "Expected:\n%s\nActual:\n%s\nOptions:\n%s" % (expected[0], result, str(options))
- elif result not in expected:
- assert False, "Expected: %s, Received: %s" % (expected, result)
-
-
 def throwsWithLatin1(input):
  with pytest.raises(UnicodeEncodeError):
  serialize_html(input, {"encoding": "iso-8859-1"})
@@ -120,13 +107,13 @@ def testDoctypeSystemId():
 
 
 def testCdataCharacters():
- runSerializerTest([["StartTag", "http:https://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\u0101"]],
-  ["<style>&amacr;"], {"encoding": "iso-8859-1"})
+ test_serializer([["StartTag", "http:https://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\u0101"]],
+ ["<style>&amacr;"], {"encoding": "iso-8859-1"})
 
 
 def testCharacters():
- runSerializerTest([["Characters", "\u0101"]],
-  ["&amacr;"], {"encoding": "iso-8859-1"})
+ test_serializer([["Characters", "\u0101"]],
+ ["&amacr;"], {"encoding": "iso-8859-1"})
 
 
 def testStartTagName():
@@ -138,9 +125,9 @@ def testAttributeName():
 
 
 def testAttributeValue():
- runSerializerTest([["StartTag", "http:https://www.w3.org/1999/xhtml", "span",
-  [{"namespace": None, "name": "potato", "value": "\u0101"}]]],
-  ["<span potato=&amacr;>"], {"encoding": "iso-8859-1"})
+ test_serializer([["StartTag", "http:https://www.w3.org/1999/xhtml", "span",
+ [{"namespace": None, "name": "potato", "value": "\u0101"}]]],
+ ["<span potato=&amacr;>"], {"encoding": "iso-8859-1"})
 
 
 def testEndTagName():
@@ -165,7 +152,7 @@ def testSpecQuoteAttribute(c):
  else:
  output_ = ['<span foo="%s">' % c]
  options_ = {"quote_attr_values": "spec"}
- runSerializerTest(input_, output_, options_)
+ test_serializer(input_, output_, options_)
 
 
 @pytest.mark.parametrize("c", list("\t\n\u000C\x20\r\"'=<>`"
@@ -184,7 +171,7 @@ def testLegacyQuoteAttribute(c):
  else:
  output_ = ['<span foo="%s">' % c]
  options_ = {"quote_attr_values": "legacy"}
- runSerializerTest(input_, output_, options_)
+ test_serializer(input_, output_, options_)
 
 
 @pytest.fixture
@@ -217,9 +204,23 @@ def testEntityNoResolve(lxml_parser):
  assert result == '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>'
 
 
-def test_serializer():
+def param_serializer():
  for filename in get_data_files('serializer-testdata', '*.test', os.path.dirname(__file__)):
  with open(filename) as fp:
  tests = json.load(fp)
  for test in tests['tests']:
- yield runSerializerTest, test["input"], test["expected"], test.get("options", {})
+ yield test["input"], test["expected"], test.get("options", {})
+
+
+@pytest.mark.parametrize("input, expected, options", param_serializer())
+def test_serializer(input, expected, options):
+ encoding = options.get("encoding", None)
+
+ if encoding:
+ expected = list(map(lambda x: x.encode(encoding), expected))
+
+ result = serialize_html(input, options)
+ if len(expected) == 1:
+ assert expected[0] == result, "Expected:\n%s\nActual:\n%s\nOptions:\n%s" % (expected[0], result, str(options))
+ elif result not in expected:
+ assert False, "Expected: %s, Received: %s" % (expected, result)
diff --git a/html5lib/tests/test_treewalkers.py b/html5lib/tests/test_treewalkers.py
@@ -61,24 +61,7 @@ def set_attribute_on_first_child(docfrag, name, value, treeName):
  setter['ElementTree'](docfrag)(name, value)
 
 
-def runTreewalkerEditTest(intext, expected, attrs_to_add, tree):
- """tests what happens when we add attributes to the intext"""
- treeName, treeClass = tree
- if treeClass is None:
- pytest.skip("Treebuilder not loaded")
- parser = html5parser.HTMLParser(tree=treeClass["builder"])
- document = parser.parseFragment(intext)
- for nom, val in attrs_to_add:
- set_attribute_on_first_child(document, nom, val, treeName)
-
- document = treeClass.get("adapter", lambda x: x)(document)
- output = treewalkers.pprint(treeClass["walker"](document))
- output = attrlist.sub(sortattrs, output)
- if output not in expected:
- raise AssertionError("TreewalkerEditTest: %s\nExpected:\n%s\nReceived:\n%s" % (treeName, expected, output))
-
-
-def test_treewalker_six_mix():
+def param_treewalker_six_mix():
  """Str/Unicode mix. If str attrs added to tree"""
 
  # On Python 2.x string literals are of type str. Unless, like this
@@ -99,7 +82,25 @@ def test_treewalker_six_mix():
 
  for tree in sorted(treeTypes.items()):
  for intext, attrs, expected in sm_tests:
- yield runTreewalkerEditTest, intext, expected, attrs, tree
+ yield intext, expected, attrs, tree
+
+
+@pytest.mark.parametrize("intext, expected, attrs_to_add, tree", param_treewalker_six_mix())
+def test_treewalker_six_mix(intext, expected, attrs_to_add, tree):
+ """tests what happens when we add attributes to the intext"""
+ treeName, treeClass = tree
+ if treeClass is None:
+ pytest.skip("Treebuilder not loaded")
+ parser = html5parser.HTMLParser(tree=treeClass["builder"])
+ document = parser.parseFragment(intext)
+ for nom, val in attrs_to_add:
+ set_attribute_on_first_child(document, nom, val, treeName)
+
+ document = treeClass.get("adapter", lambda x: x)(document)
+ output = treewalkers.pprint(treeClass["walker"](document))
+ output = attrlist.sub(sortattrs, output)
+ if output not in expected:
+ raise AssertionError("TreewalkerEditTest: %s\nExpected:\n%s\nReceived:\n%s" % (treeName, expected, output))
 
 
 @pytest.mark.parametrize("tree,char", itertools.product(sorted(treeTypes.items()), ["x", "\u1234"]))

diff --git a/html5lib/tests/tree_construction.py b/html5lib/tests/tree_construction.py
@@ -57,8 +57,6 @@ def _getParserTests(self, treeName, treeAPIs):
  item.add_marker(pytest.mark.parser)
  if namespaceHTMLElements:
  item.add_marker(pytest.mark.namespaced)
- if treeAPIs is None:
- item.add_marker(pytest.mark.skipif(True, reason="Treebuilder not loaded"))
  yield item
 
  def _getTreeWalkerTests(self, treeName, treeAPIs):
@@ -69,8 +67,6 @@ def _getTreeWalkerTests(self, treeName, treeAPIs):
  treeAPIs)
  item.add_marker(getattr(pytest.mark, treeName))
  item.add_marker(pytest.mark.treewalker)
- if treeAPIs is None:
- item.add_marker(pytest.mark.skipif(True, reason="Treebuilder not loaded"))
  yield item
 
 
@@ -84,12 +80,14 @@ def convertTreeDump(data):
 class ParserTest(pytest.Item):
  def __init__(self, name, parent, test, treeClass, namespaceHTMLElements):
  super(ParserTest, self).__init__(name, parent)
- self.obj = lambda: 1 # this is to hack around skipif needing a function!
  self.test = test
  self.treeClass = treeClass
  self.namespaceHTMLElements = namespaceHTMLElements
 
  def runtest(self):
+ if self.treeClass is None:
+ pytest.skip("Treebuilder not loaded")
+
  p = html5parser.HTMLParser(tree=self.treeClass,
  namespaceHTMLElements=self.namespaceHTMLElements)
 
@@ -147,11 +145,13 @@ def repr_failure(self, excinfo):
 class TreeWalkerTest(pytest.Item):
  def __init__(self, name, parent, test, treeAPIs):
  super(TreeWalkerTest, self).__init__(name, parent)
- self.obj = lambda: 1 # this is to hack around skipif needing a function!
  self.test = test
  self.treeAPIs = treeAPIs
 
  def runtest(self):
+ if self.treeAPIs is None:
+ pytest.skip("Treebuilder not loaded")
+
  p = html5parser.HTMLParser(tree=self.treeAPIs["builder"])
 
  input = self.test['data']