From f5855f3d0ad6d4202e2b5d02626d85178ff6443a Mon Sep 17 00:00:00 2001
From: jgraham <james@hoppipolla.co.uk>
Date: Fri, 10 Feb 2012 23:41:11 +0100
Subject: [PATCH] Fix up more tests to work with nose

---
 html5lib/tests/test_serializer.py  | 112 ++++++++++++-----------------
 html5lib/tests/test_tokenizer.py   |  70 ++++++++----------
 html5lib/tests/test_treewalkers.py |  62 +++++++---------
 html5lib/treewalkers/etree.py      |   3 +-
 setup.py                           |   5 +-
 5 files changed, 102 insertions(+), 150 deletions(-)
diff --git a/html5lib/tests/test_serializer.py b/html5lib/tests/test_serializer.py
index 1d469aae..132620b1 100644
--- a/html5lib/tests/test_serializer.py
+++ b/html5lib/tests/test_serializer.py
@@ -83,28 +83,21 @@ def serialize_xhtml(input, options):
     options = dict([(str(k),v) for k,v in options.iteritems()])
     return serializer.XHTMLSerializer(**options).render(JsonWalker(input),options.get("encoding",None))
 
+def make_test(input, expected, xhtml, options):
+    result = serialize_html(input, options)
+    if len(expected) == 1:
+        assert expected[0] == result, "Expected:\n%s\nActual:\n%s\nOptions\nxhtml:False\n%s"%(expected[0], result, str(options))
+    elif result not in expected:
+        assert False, "Expected: %s, Received: %s" % (expected, result)
 
-class TestCase(unittest.TestCase):
-    def addTest(cls, name, description, input, expected, xhtml, options):
-        func = lambda self: self.mockTest(input, options, expected, xhtml)
-        func.__doc__ = "\t".join([name, description, str(input), str(options)])
-        setattr(cls, name, func)
-    addTest = classmethod(addTest)
+    if not xhtml:
+        return
 
-    def mockTest(self, input, options, expected, xhtml):
-        result = serialize_html(input, options)
-        if len(expected) == 1:
-            self.assertEquals(expected[0], result, "Expected:\n%s\nActual:\n%s\nOptions\nxhtml:False\n%s"%(expected[0], result, str(options)))
-        elif result not in expected:
-            self.fail("Expected: %s, Received: %s" % (expected, result))
-
-        if not xhtml: return
-
-        result = serialize_xhtml(input, options)
-        if len(xhtml) == 1:
-            self.assertEquals(xhtml[0], result, "Expected:\n%s\nActual:\n%s\nOptions\nxhtml:True\n%s"%(xhtml[0], result, str(options)))
-        elif result not in xhtml:
-            self.fail("Expected: %s, Received: %s" % (xhtml, result))
+    result = serialize_xhtml(input, options)
+    if len(xhtml) == 1:
+        assert xhtml[0] == result, "Expected:\n%s\nActual:\n%s\nOptions\nxhtml:True\n%s"%(xhtml[0], result, str(options))
+    elif result not in xhtml:
+        assert False, "Expected: %s, Received: %s" % (xhtml, result)
 
 
 class EncodingTestCase(unittest.TestCase):
@@ -150,55 +143,38 @@ def testComment(self):
         self.throwsWithLatin1([["Comment", u"\u0101"]])
 
 
-class LxmlTestCase(unittest.TestCase):
-    def setUp(self):
-        self.parser = etree.XMLParser(resolve_entities=False)
-        self.treewalker = html5lib.getTreeWalker("lxml")
-        self.serializer = serializer.HTMLSerializer()
-
-    def testEntityReplacement(self):
-        doc = """<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>"""
-        tree = etree.fromstring(doc, parser = self.parser).getroottree()
-        result = serializer.serialize(tree, tree="lxml", omit_optional_tags=False)
-        self.assertEquals(u"""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>\u03B2</html>""", result)
-
-    def testEntityXML(self):
-        doc = """<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&gt;</html>"""
-        tree = etree.fromstring(doc, parser = self.parser).getroottree()
-        result = serializer.serialize(tree, tree="lxml", omit_optional_tags=False)
-        self.assertEquals(u"""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&gt;</html>""", result)
-
-    def testEntityNoResolve(self):
-        doc = """<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>"""
-        tree = etree.fromstring(doc, parser = self.parser).getroottree()
-        result = serializer.serialize(tree, tree="lxml", omit_optional_tags=False,
-                                      resolve_entities=False)
-        self.assertEquals(u"""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>""", result)
-
-def buildBasicTestSuite():
+if "lxml" in optionals_loaded:
+    class LxmlTestCase(unittest.TestCase):
+        def setUp(self):
+            self.parser = etree.XMLParser(resolve_entities=False)
+            self.treewalker = html5lib.getTreeWalker("lxml")
+            self.serializer = serializer.HTMLSerializer()
+
+        def testEntityReplacement(self):
+            doc = """<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>"""
+            tree = etree.fromstring(doc, parser = self.parser).getroottree()
+            result = serializer.serialize(tree, tree="lxml", omit_optional_tags=False)
+            self.assertEquals(u"""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>\u03B2</html>""", result)
+
+        def testEntityXML(self):
+            doc = """<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&gt;</html>"""
+            tree = etree.fromstring(doc, parser = self.parser).getroottree()
+            result = serializer.serialize(tree, tree="lxml", omit_optional_tags=False)
+            self.assertEquals(u"""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&gt;</html>""", result)
+
+        def testEntityNoResolve(self):
+            doc = """<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>"""
+            tree = etree.fromstring(doc, parser = self.parser).getroottree()
+            result = serializer.serialize(tree, tree="lxml", omit_optional_tags=False,
+                                          resolve_entities=False)
+            self.assertEquals(u"""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>""", result)
+
+def test_serializer():
     for filename in html5lib_test_files('serializer', '*.test'):
-        test_name = os.path.basename(filename).replace('.test','')
         tests = json.load(file(filename))
+        test_name = os.path.basename(filename).replace('.test','')
         for index, test in enumerate(tests['tests']):
             xhtml = test.get("xhtml", test["expected"])
-            if test_name == 'optionaltags': xhtml = None
-            TestCase.addTest('test_%s_%d' % (test_name, index+1),
-                test["description"], test["input"], test["expected"], xhtml,
-                test.get("options", {}))
-    return unittest.TestLoader().loadTestsFromTestCase(TestCase)
-
-def buildTestSuite():
-    allTests = [buildBasicTestSuite()]
-    allTests.append(unittest.TestLoader().loadTestsFromTestCase(EncodingTestCase))
-    if "lxml" in optionals_loaded:
-        allTests.append(unittest.TestLoader().loadTestsFromTestCase(LxmlTestCase))
-
-    return unittest.TestSuite(allTests)
-
-
-def main():
-    buildTestSuite()
-    unittest.main()
-
-if __name__ == "__main__":
-    main()
+            if test_name == 'optionaltags': 
+                xhtml = None
+            yield make_test, test["input"], test["expected"], xhtml, test.get("options", {})
diff --git a/html5lib/tests/test_tokenizer.py b/html5lib/tests/test_tokenizer.py
index 8a49a3af..1b76806a 100644
--- a/html5lib/tests/test_tokenizer.py
+++ b/html5lib/tests/test_tokenizer.py
@@ -138,33 +138,32 @@ def decode(inp):
                     token[2][decode(key)] = decode(value)
     return test
 
-class TestCase(unittest.TestCase):
-    def runTokenizerTest(self, test):
-        #XXX - move this out into the setup function
-        #concatenate all consecutive character tokens into a single token
-        if 'doubleEscaped' in test:
-            test = unescape_test(test)
-
-        expected = concatenateCharacterTokens(test['output'])            
-        if 'lastStartTag' not in test:
-            test['lastStartTag'] = None
-        outBuffer = cStringIO.StringIO()
-        stdout = sys.stdout
-        sys.stdout = outBuffer
-        parser = TokenizerTestParser(test['initialState'], 
-                                     test['lastStartTag'])
-        tokens = parser.parse(test['input'])
-        tokens = concatenateCharacterTokens(tokens)
-        received = normalizeTokens(tokens)
-        errorMsg = u"\n".join(["\n\nInitial state:",
-                              test['initialState'] ,
-                              "\nInput:", unicode(test['input']),
-                              "\nExpected:", unicode(expected),
-                              "\nreceived:", unicode(tokens)])
-        errorMsg = errorMsg.encode("utf-8")
-        ignoreErrorOrder = test.get('ignoreErrorOrder', False)
-        self.assertEquals(tokensMatch(expected, received, ignoreErrorOrder), 
-                          True, errorMsg)
+
+def runTokenizerTest(test):
+    #XXX - move this out into the setup function
+    #concatenate all consecutive character tokens into a single token
+    if 'doubleEscaped' in test:
+        test = unescape_test(test)
+
+    expected = concatenateCharacterTokens(test['output'])            
+    if 'lastStartTag' not in test:
+        test['lastStartTag'] = None
+    outBuffer = cStringIO.StringIO()
+    stdout = sys.stdout
+    sys.stdout = outBuffer
+    parser = TokenizerTestParser(test['initialState'], 
+                                 test['lastStartTag'])
+    tokens = parser.parse(test['input'])
+    tokens = concatenateCharacterTokens(tokens)
+    received = normalizeTokens(tokens)
+    errorMsg = u"\n".join(["\n\nInitial state:",
+                          test['initialState'] ,
+                          "\nInput:", unicode(test['input']),
+                          "\nExpected:", unicode(expected),
+                          "\nreceived:", unicode(tokens)])
+    errorMsg = errorMsg.encode("utf-8")
+    ignoreErrorOrder = test.get('ignoreErrorOrder', False)
+    assert tokensMatch(expected, received, ignoreErrorOrder), errorMsg
 
 
 def _doCapitalize(match):
@@ -178,7 +177,7 @@ def capitalize(s):
     return s
 
 
-def buildTestSuite():
+def test_tokenizer():
     for filename in html5lib_test_files('tokenizer', '*.test'):
         tests = json.load(file(filename))
         testName = os.path.basename(filename).replace(".test","")
@@ -190,16 +189,5 @@ def buildTestSuite():
                     test["initialStates"] = ["Data state"]
                 for initialState in test["initialStates"]:
                     test["initialState"] = capitalize(initialState)
-                    def testFunc(self, test=test):
-                        self.runTokenizerTest(test)
-                    testFunc.__doc__ = "\t".join([testName, 
-                                                  test['description']])
-                    setattr(TestCase, 'test_%s_%d_%s' % (testName, index, test["initialState"]), testFunc)
-    return unittest.TestLoader().loadTestsFromTestCase(TestCase)
-
-def main():
-    buildTestSuite()
-    unittest.main()
-
-if __name__ == "__main__":
-    main()
+                    yield runTokenizerTest, test
+
diff --git a/html5lib/tests/test_treewalkers.py b/html5lib/tests/test_treewalkers.py
index 6b88d557..4b558bdd 100644
--- a/html5lib/tests/test_treewalkers.py
+++ b/html5lib/tests/test_treewalkers.py
@@ -242,30 +242,6 @@ def sortattrs(x):
   lines.sort()
   return "\n".join(lines)
 
-class TestCase(unittest.TestCase):
-    def runTest(self, innerHTML, input, expected, errors, treeClass):
-        try:
-            p = html5parser.HTMLParser(tree = treeClass["builder"])
-            if innerHTML:
-                document = p.parseFragment(StringIO.StringIO(input), innerHTML)
-            else:
-                document = p.parse(StringIO.StringIO(input))
-        except constants.DataLossWarning:
-            #Ignore testcases we know we don't pass
-            return
-
-        document = treeClass.get("adapter", lambda x: x)(document)
-        try:
-            output = convertTokens(treeClass["walker"](document))
-            output = attrlist.sub(sortattrs, output)
-            expected = attrlist.sub(sortattrs, convertExpected(expected))
-            self.assertEquals(expected, output, "\n".join([
-                "", "Input:", input,
-                "", "Expected:", expected,
-                "", "Received:", output
-            ]))
-        except NotImplementedError:
-            pass # Amnesty for those that confess...
 
 class TokenTestCase(unittest.TestCase):
     def test_all_tokens(self):
@@ -290,8 +266,31 @@ def test_all_tokens(self):
             for expectedToken, outputToken in zip(expected, output):
                 self.assertEquals(expectedToken, outputToken)
 
+def run_test(innerHTML, input, expected, errors, treeClass):
+    try:
+        p = html5parser.HTMLParser(tree = treeClass["builder"])
+        if innerHTML:
+            document = p.parseFragment(StringIO.StringIO(input), innerHTML)
+        else:
+            document = p.parse(StringIO.StringIO(input))
+    except constants.DataLossWarning:
+        #Ignore testcases we know we don't pass
+        return
+
+    document = treeClass.get("adapter", lambda x: x)(document)
+    try:
+        output = convertTokens(treeClass["walker"](document))
+        output = attrlist.sub(sortattrs, output)
+        expected = attrlist.sub(sortattrs, convertExpected(expected))
+        assert expected == output, "\n".join([
+                "", "Input:", input,
+                "", "Expected:", expected,
+                "", "Received:", output
+                ])
+    except NotImplementedError:
+        pass # Amnesty for those that confess...
             
-def buildTestSuite():
+def test_treewalker():
     sys.stdout.write('Testing tree walkers '+ " ".join(treeTypes.keys()) + "\n")
 
     for treeName, treeCls in treeTypes.iteritems():
@@ -307,17 +306,6 @@ def buildTestSuite():
                                                                "document-fragment",
                                                                "document")]
                 errors = errors.split("\n")
-                def testFunc(self, innerHTML=innerHTML, input=input,
-                    expected=expected, errors=errors, treeCls=treeCls):
-                    self.runTest(innerHTML, input, expected, errors, treeCls)
-                setattr(TestCase, "test_%s_%d_%s" % (testName,index+1,treeName),
-                     testFunc)
-
-    return unittest.TestLoader().loadTestsFromTestCase(TestCase)
+                yield run_test, innerHTML, input, expected, errors, treeCls
 
-def main():
-    buildTestSuite()
-    unittest.main()
 
-if __name__ == "__main__":
-    main()
diff --git a/html5lib/treewalkers/etree.py b/html5lib/treewalkers/etree.py
index e5f4e1f9..13b03194 100644
--- a/html5lib/treewalkers/etree.py
+++ b/html5lib/treewalkers/etree.py
@@ -61,10 +61,11 @@ def getNodeDetails(self, node):
                 return (_base.DOCTYPE, node.text, 
                         node.get("publicId"), node.get("systemId"))
 
-            elif type(node.tag) == type(ElementTree.Comment):
+            elif node.tag == ElementTree.Comment:
                 return _base.COMMENT, node.text
 
             else:
+                assert type(node.tag) in (str, unicode), type(node.tag)
                 #This is assumed to be an ordinary element
                 match = tag_regexp.match(node.tag)
                 if match:
diff --git a/setup.py b/setup.py
index 4d220279..dfd88077 100644
--- a/setup.py
+++ b/setup.py
@@ -21,7 +21,7 @@
     ]
 
 setup(name='html5lib',
-      version='0.95-dev',
+      version='0.95',
       url='http://code.google.com/p/html5lib/',
       license="MIT License",
       description='HTML parser based on the WHAT-WG Web Applications 1.0' 
@@ -34,6 +34,5 @@
           for name in os.listdir(os.path.join('html5lib'))
           if os.path.isdir(os.path.join('html5lib',name)) and
               not name.startswith('.')],
-      test_suite = "html5lib.tests.buildTestSuite",
-      tests_require = ['simplejson']
+      test_suite = "html5lib.tests.buildTestSuite"
       )