From f5855f3d0ad6d4202e2b5d02626d85178ff6443a Mon Sep 17 00:00:00 2001 From: jgraham Date: Fri, 10 Feb 2012 23:41:11 +0100 Subject: [PATCH] Fix up more tests to work with nose --- html5lib/tests/test_serializer.py | 112 ++++++++++++----------------- html5lib/tests/test_tokenizer.py | 70 ++++++++---------- html5lib/tests/test_treewalkers.py | 62 +++++++--------- html5lib/treewalkers/etree.py | 3 +- setup.py | 5 +- 5 files changed, 102 insertions(+), 150 deletions(-) diff --git a/html5lib/tests/test_serializer.py b/html5lib/tests/test_serializer.py index 1d469aae..132620b1 100644 --- a/html5lib/tests/test_serializer.py +++ b/html5lib/tests/test_serializer.py @@ -83,28 +83,21 @@ def serialize_xhtml(input, options): options = dict([(str(k),v) for k,v in options.iteritems()]) return serializer.XHTMLSerializer(**options).render(JsonWalker(input),options.get("encoding",None)) +def make_test(input, expected, xhtml, options): + result = serialize_html(input, options) + if len(expected) == 1: + assert expected[0] == result, "Expected:\n%s\nActual:\n%s\nOptions\nxhtml:False\n%s"%(expected[0], result, str(options)) + elif result not in expected: + assert False, "Expected: %s, Received: %s" % (expected, result) -class TestCase(unittest.TestCase): - def addTest(cls, name, description, input, expected, xhtml, options): - func = lambda self: self.mockTest(input, options, expected, xhtml) - func.__doc__ = "\t".join([name, description, str(input), str(options)]) - setattr(cls, name, func) - addTest = classmethod(addTest) + if not xhtml: + return - def mockTest(self, input, options, expected, xhtml): - result = serialize_html(input, options) - if len(expected) == 1: - self.assertEquals(expected[0], result, "Expected:\n%s\nActual:\n%s\nOptions\nxhtml:False\n%s"%(expected[0], result, str(options))) - elif result not in expected: - self.fail("Expected: %s, Received: %s" % (expected, result)) - - if not xhtml: return - - result = serialize_xhtml(input, options) - if len(xhtml) == 1: - self.assertEquals(xhtml[0], result, "Expected:\n%s\nActual:\n%s\nOptions\nxhtml:True\n%s"%(xhtml[0], result, str(options))) - elif result not in xhtml: - self.fail("Expected: %s, Received: %s" % (xhtml, result)) + result = serialize_xhtml(input, options) + if len(xhtml) == 1: + assert xhtml[0] == result, "Expected:\n%s\nActual:\n%s\nOptions\nxhtml:True\n%s"%(xhtml[0], result, str(options)) + elif result not in xhtml: + assert False, "Expected: %s, Received: %s" % (xhtml, result) class EncodingTestCase(unittest.TestCase): @@ -150,55 +143,38 @@ def testComment(self): self.throwsWithLatin1([["Comment", u"\u0101"]]) -class LxmlTestCase(unittest.TestCase): - def setUp(self): - self.parser = etree.XMLParser(resolve_entities=False) - self.treewalker = html5lib.getTreeWalker("lxml") - self.serializer = serializer.HTMLSerializer() - - def testEntityReplacement(self): - doc = """β""" - tree = etree.fromstring(doc, parser = self.parser).getroottree() - result = serializer.serialize(tree, tree="lxml", omit_optional_tags=False) - self.assertEquals(u"""\u03B2""", result) - - def testEntityXML(self): - doc = """>""" - tree = etree.fromstring(doc, parser = self.parser).getroottree() - result = serializer.serialize(tree, tree="lxml", omit_optional_tags=False) - self.assertEquals(u""">""", result) - - def testEntityNoResolve(self): - doc = """β""" - tree = etree.fromstring(doc, parser = self.parser).getroottree() - result = serializer.serialize(tree, tree="lxml", omit_optional_tags=False, - resolve_entities=False) - self.assertEquals(u"""β""", result) - -def buildBasicTestSuite(): +if "lxml" in optionals_loaded: + class LxmlTestCase(unittest.TestCase): + def setUp(self): + self.parser = etree.XMLParser(resolve_entities=False) + self.treewalker = html5lib.getTreeWalker("lxml") + self.serializer = serializer.HTMLSerializer() + + def testEntityReplacement(self): + doc = """β""" + tree = etree.fromstring(doc, parser = self.parser).getroottree() + result = serializer.serialize(tree, tree="lxml", omit_optional_tags=False) + self.assertEquals(u"""\u03B2""", result) + + def testEntityXML(self): + doc = """>""" + tree = etree.fromstring(doc, parser = self.parser).getroottree() + result = serializer.serialize(tree, tree="lxml", omit_optional_tags=False) + self.assertEquals(u""">""", result) + + def testEntityNoResolve(self): + doc = """β""" + tree = etree.fromstring(doc, parser = self.parser).getroottree() + result = serializer.serialize(tree, tree="lxml", omit_optional_tags=False, + resolve_entities=False) + self.assertEquals(u"""β""", result) + +def test_serializer(): for filename in html5lib_test_files('serializer', '*.test'): - test_name = os.path.basename(filename).replace('.test','') tests = json.load(file(filename)) + test_name = os.path.basename(filename).replace('.test','') for index, test in enumerate(tests['tests']): xhtml = test.get("xhtml", test["expected"]) - if test_name == 'optionaltags': xhtml = None - TestCase.addTest('test_%s_%d' % (test_name, index+1), - test["description"], test["input"], test["expected"], xhtml, - test.get("options", {})) - return unittest.TestLoader().loadTestsFromTestCase(TestCase) - -def buildTestSuite(): - allTests = [buildBasicTestSuite()] - allTests.append(unittest.TestLoader().loadTestsFromTestCase(EncodingTestCase)) - if "lxml" in optionals_loaded: - allTests.append(unittest.TestLoader().loadTestsFromTestCase(LxmlTestCase)) - - return unittest.TestSuite(allTests) - - -def main(): - buildTestSuite() - unittest.main() - -if __name__ == "__main__": - main() + if test_name == 'optionaltags': + xhtml = None + yield make_test, test["input"], test["expected"], xhtml, test.get("options", {}) diff --git a/html5lib/tests/test_tokenizer.py b/html5lib/tests/test_tokenizer.py index 8a49a3af..1b76806a 100644 --- a/html5lib/tests/test_tokenizer.py +++ b/html5lib/tests/test_tokenizer.py @@ -138,33 +138,32 @@ def decode(inp): token[2][decode(key)] = decode(value) return test -class TestCase(unittest.TestCase): - def runTokenizerTest(self, test): - #XXX - move this out into the setup function - #concatenate all consecutive character tokens into a single token - if 'doubleEscaped' in test: - test = unescape_test(test) - - expected = concatenateCharacterTokens(test['output']) - if 'lastStartTag' not in test: - test['lastStartTag'] = None - outBuffer = cStringIO.StringIO() - stdout = sys.stdout - sys.stdout = outBuffer - parser = TokenizerTestParser(test['initialState'], - test['lastStartTag']) - tokens = parser.parse(test['input']) - tokens = concatenateCharacterTokens(tokens) - received = normalizeTokens(tokens) - errorMsg = u"\n".join(["\n\nInitial state:", - test['initialState'] , - "\nInput:", unicode(test['input']), - "\nExpected:", unicode(expected), - "\nreceived:", unicode(tokens)]) - errorMsg = errorMsg.encode("utf-8") - ignoreErrorOrder = test.get('ignoreErrorOrder', False) - self.assertEquals(tokensMatch(expected, received, ignoreErrorOrder), - True, errorMsg) + +def runTokenizerTest(test): + #XXX - move this out into the setup function + #concatenate all consecutive character tokens into a single token + if 'doubleEscaped' in test: + test = unescape_test(test) + + expected = concatenateCharacterTokens(test['output']) + if 'lastStartTag' not in test: + test['lastStartTag'] = None + outBuffer = cStringIO.StringIO() + stdout = sys.stdout + sys.stdout = outBuffer + parser = TokenizerTestParser(test['initialState'], + test['lastStartTag']) + tokens = parser.parse(test['input']) + tokens = concatenateCharacterTokens(tokens) + received = normalizeTokens(tokens) + errorMsg = u"\n".join(["\n\nInitial state:", + test['initialState'] , + "\nInput:", unicode(test['input']), + "\nExpected:", unicode(expected), + "\nreceived:", unicode(tokens)]) + errorMsg = errorMsg.encode("utf-8") + ignoreErrorOrder = test.get('ignoreErrorOrder', False) + assert tokensMatch(expected, received, ignoreErrorOrder), errorMsg def _doCapitalize(match): @@ -178,7 +177,7 @@ def capitalize(s): return s -def buildTestSuite(): +def test_tokenizer(): for filename in html5lib_test_files('tokenizer', '*.test'): tests = json.load(file(filename)) testName = os.path.basename(filename).replace(".test","") @@ -190,16 +189,5 @@ def buildTestSuite(): test["initialStates"] = ["Data state"] for initialState in test["initialStates"]: test["initialState"] = capitalize(initialState) - def testFunc(self, test=test): - self.runTokenizerTest(test) - testFunc.__doc__ = "\t".join([testName, - test['description']]) - setattr(TestCase, 'test_%s_%d_%s' % (testName, index, test["initialState"]), testFunc) - return unittest.TestLoader().loadTestsFromTestCase(TestCase) - -def main(): - buildTestSuite() - unittest.main() - -if __name__ == "__main__": - main() + yield runTokenizerTest, test + diff --git a/html5lib/tests/test_treewalkers.py b/html5lib/tests/test_treewalkers.py index 6b88d557..4b558bdd 100644 --- a/html5lib/tests/test_treewalkers.py +++ b/html5lib/tests/test_treewalkers.py @@ -242,30 +242,6 @@ def sortattrs(x): lines.sort() return "\n".join(lines) -class TestCase(unittest.TestCase): - def runTest(self, innerHTML, input, expected, errors, treeClass): - try: - p = html5parser.HTMLParser(tree = treeClass["builder"]) - if innerHTML: - document = p.parseFragment(StringIO.StringIO(input), innerHTML) - else: - document = p.parse(StringIO.StringIO(input)) - except constants.DataLossWarning: - #Ignore testcases we know we don't pass - return - - document = treeClass.get("adapter", lambda x: x)(document) - try: - output = convertTokens(treeClass["walker"](document)) - output = attrlist.sub(sortattrs, output) - expected = attrlist.sub(sortattrs, convertExpected(expected)) - self.assertEquals(expected, output, "\n".join([ - "", "Input:", input, - "", "Expected:", expected, - "", "Received:", output - ])) - except NotImplementedError: - pass # Amnesty for those that confess... class TokenTestCase(unittest.TestCase): def test_all_tokens(self): @@ -290,8 +266,31 @@ def test_all_tokens(self): for expectedToken, outputToken in zip(expected, output): self.assertEquals(expectedToken, outputToken) +def run_test(innerHTML, input, expected, errors, treeClass): + try: + p = html5parser.HTMLParser(tree = treeClass["builder"]) + if innerHTML: + document = p.parseFragment(StringIO.StringIO(input), innerHTML) + else: + document = p.parse(StringIO.StringIO(input)) + except constants.DataLossWarning: + #Ignore testcases we know we don't pass + return + + document = treeClass.get("adapter", lambda x: x)(document) + try: + output = convertTokens(treeClass["walker"](document)) + output = attrlist.sub(sortattrs, output) + expected = attrlist.sub(sortattrs, convertExpected(expected)) + assert expected == output, "\n".join([ + "", "Input:", input, + "", "Expected:", expected, + "", "Received:", output + ]) + except NotImplementedError: + pass # Amnesty for those that confess... -def buildTestSuite(): +def test_treewalker(): sys.stdout.write('Testing tree walkers '+ " ".join(treeTypes.keys()) + "\n") for treeName, treeCls in treeTypes.iteritems(): @@ -307,17 +306,6 @@ def buildTestSuite(): "document-fragment", "document")] errors = errors.split("\n") - def testFunc(self, innerHTML=innerHTML, input=input, - expected=expected, errors=errors, treeCls=treeCls): - self.runTest(innerHTML, input, expected, errors, treeCls) - setattr(TestCase, "test_%s_%d_%s" % (testName,index+1,treeName), - testFunc) - - return unittest.TestLoader().loadTestsFromTestCase(TestCase) + yield run_test, innerHTML, input, expected, errors, treeCls -def main(): - buildTestSuite() - unittest.main() -if __name__ == "__main__": - main() diff --git a/html5lib/treewalkers/etree.py b/html5lib/treewalkers/etree.py index e5f4e1f9..13b03194 100644 --- a/html5lib/treewalkers/etree.py +++ b/html5lib/treewalkers/etree.py @@ -61,10 +61,11 @@ def getNodeDetails(self, node): return (_base.DOCTYPE, node.text, node.get("publicId"), node.get("systemId")) - elif type(node.tag) == type(ElementTree.Comment): + elif node.tag == ElementTree.Comment: return _base.COMMENT, node.text else: + assert type(node.tag) in (str, unicode), type(node.tag) #This is assumed to be an ordinary element match = tag_regexp.match(node.tag) if match: diff --git a/setup.py b/setup.py index 4d220279..dfd88077 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ ] setup(name='html5lib', - version='0.95-dev', + version='0.95', url='http://code.google.com/p/html5lib/', license="MIT License", description='HTML parser based on the WHAT-WG Web Applications 1.0' @@ -34,6 +34,5 @@ for name in os.listdir(os.path.join('html5lib')) if os.path.isdir(os.path.join('html5lib',name)) and not name.startswith('.')], - test_suite = "html5lib.tests.buildTestSuite", - tests_require = ['simplejson'] + test_suite = "html5lib.tests.buildTestSuite" )