Skip to content

Commit

Permalink
Fix up more tests to work with nose
Browse files Browse the repository at this point in the history
  • Loading branch information
jgraham committed Feb 10, 2012
1 parent 19686c8 commit f5855f3
Show file tree
Hide file tree
Showing 5 changed files with 102 additions and 150 deletions.
112 changes: 44 additions & 68 deletions html5lib/tests/test_serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,28 +83,21 @@ def serialize_xhtml(input, options):
options = dict([(str(k),v) for k,v in options.iteritems()])
return serializer.XHTMLSerializer(**options).render(JsonWalker(input),options.get("encoding",None))

def make_test(input, expected, xhtml, options):
result = serialize_html(input, options)
if len(expected) == 1:
assert expected[0] == result, "Expected:\n%s\nActual:\n%s\nOptions\nxhtml:False\n%s"%(expected[0], result, str(options))
elif result not in expected:
assert False, "Expected: %s, Received: %s" % (expected, result)

class TestCase(unittest.TestCase):
def addTest(cls, name, description, input, expected, xhtml, options):
func = lambda self: self.mockTest(input, options, expected, xhtml)
func.__doc__ = "\t".join([name, description, str(input), str(options)])
setattr(cls, name, func)
addTest = classmethod(addTest)
if not xhtml:
return

def mockTest(self, input, options, expected, xhtml):
result = serialize_html(input, options)
if len(expected) == 1:
self.assertEquals(expected[0], result, "Expected:\n%s\nActual:\n%s\nOptions\nxhtml:False\n%s"%(expected[0], result, str(options)))
elif result not in expected:
self.fail("Expected: %s, Received: %s" % (expected, result))

if not xhtml: return

result = serialize_xhtml(input, options)
if len(xhtml) == 1:
self.assertEquals(xhtml[0], result, "Expected:\n%s\nActual:\n%s\nOptions\nxhtml:True\n%s"%(xhtml[0], result, str(options)))
elif result not in xhtml:
self.fail("Expected: %s, Received: %s" % (xhtml, result))
result = serialize_xhtml(input, options)
if len(xhtml) == 1:
assert xhtml[0] == result, "Expected:\n%s\nActual:\n%s\nOptions\nxhtml:True\n%s"%(xhtml[0], result, str(options))
elif result not in xhtml:
assert False, "Expected: %s, Received: %s" % (xhtml, result)


class EncodingTestCase(unittest.TestCase):
Expand Down Expand Up @@ -150,55 +143,38 @@ def testComment(self):
self.throwsWithLatin1([["Comment", u"\u0101"]])


class LxmlTestCase(unittest.TestCase):
def setUp(self):
self.parser = etree.XMLParser(resolve_entities=False)
self.treewalker = html5lib.getTreeWalker("lxml")
self.serializer = serializer.HTMLSerializer()

def testEntityReplacement(self):
doc = """<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>"""
tree = etree.fromstring(doc, parser = self.parser).getroottree()
result = serializer.serialize(tree, tree="lxml", omit_optional_tags=False)
self.assertEquals(u"""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>\u03B2</html>""", result)

def testEntityXML(self):
doc = """<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&gt;</html>"""
tree = etree.fromstring(doc, parser = self.parser).getroottree()
result = serializer.serialize(tree, tree="lxml", omit_optional_tags=False)
self.assertEquals(u"""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&gt;</html>""", result)

def testEntityNoResolve(self):
doc = """<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>"""
tree = etree.fromstring(doc, parser = self.parser).getroottree()
result = serializer.serialize(tree, tree="lxml", omit_optional_tags=False,
resolve_entities=False)
self.assertEquals(u"""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>""", result)

def buildBasicTestSuite():
if "lxml" in optionals_loaded:
class LxmlTestCase(unittest.TestCase):
def setUp(self):
self.parser = etree.XMLParser(resolve_entities=False)
self.treewalker = html5lib.getTreeWalker("lxml")
self.serializer = serializer.HTMLSerializer()

def testEntityReplacement(self):
doc = """<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>"""
tree = etree.fromstring(doc, parser = self.parser).getroottree()
result = serializer.serialize(tree, tree="lxml", omit_optional_tags=False)
self.assertEquals(u"""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>\u03B2</html>""", result)

def testEntityXML(self):
doc = """<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&gt;</html>"""
tree = etree.fromstring(doc, parser = self.parser).getroottree()
result = serializer.serialize(tree, tree="lxml", omit_optional_tags=False)
self.assertEquals(u"""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&gt;</html>""", result)

def testEntityNoResolve(self):
doc = """<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>"""
tree = etree.fromstring(doc, parser = self.parser).getroottree()
result = serializer.serialize(tree, tree="lxml", omit_optional_tags=False,
resolve_entities=False)
self.assertEquals(u"""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>""", result)

def test_serializer():
for filename in html5lib_test_files('serializer', '*.test'):
test_name = os.path.basename(filename).replace('.test','')
tests = json.load(file(filename))
test_name = os.path.basename(filename).replace('.test','')
for index, test in enumerate(tests['tests']):
xhtml = test.get("xhtml", test["expected"])
if test_name == 'optionaltags': xhtml = None
TestCase.addTest('test_%s_%d' % (test_name, index+1),
test["description"], test["input"], test["expected"], xhtml,
test.get("options", {}))
return unittest.TestLoader().loadTestsFromTestCase(TestCase)

def buildTestSuite():
allTests = [buildBasicTestSuite()]
allTests.append(unittest.TestLoader().loadTestsFromTestCase(EncodingTestCase))
if "lxml" in optionals_loaded:
allTests.append(unittest.TestLoader().loadTestsFromTestCase(LxmlTestCase))

return unittest.TestSuite(allTests)


def main():
buildTestSuite()
unittest.main()

if __name__ == "__main__":
main()
if test_name == 'optionaltags':
xhtml = None
yield make_test, test["input"], test["expected"], xhtml, test.get("options", {})
70 changes: 29 additions & 41 deletions html5lib/tests/test_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,33 +138,32 @@ def decode(inp):
token[2][decode(key)] = decode(value)
return test

class TestCase(unittest.TestCase):
def runTokenizerTest(self, test):
#XXX - move this out into the setup function
#concatenate all consecutive character tokens into a single token
if 'doubleEscaped' in test:
test = unescape_test(test)

expected = concatenateCharacterTokens(test['output'])
if 'lastStartTag' not in test:
test['lastStartTag'] = None
outBuffer = cStringIO.StringIO()
stdout = sys.stdout
sys.stdout = outBuffer
parser = TokenizerTestParser(test['initialState'],
test['lastStartTag'])
tokens = parser.parse(test['input'])
tokens = concatenateCharacterTokens(tokens)
received = normalizeTokens(tokens)
errorMsg = u"\n".join(["\n\nInitial state:",
test['initialState'] ,
"\nInput:", unicode(test['input']),
"\nExpected:", unicode(expected),
"\nreceived:", unicode(tokens)])
errorMsg = errorMsg.encode("utf-8")
ignoreErrorOrder = test.get('ignoreErrorOrder', False)
self.assertEquals(tokensMatch(expected, received, ignoreErrorOrder),
True, errorMsg)

def runTokenizerTest(test):
#XXX - move this out into the setup function
#concatenate all consecutive character tokens into a single token
if 'doubleEscaped' in test:
test = unescape_test(test)

expected = concatenateCharacterTokens(test['output'])
if 'lastStartTag' not in test:
test['lastStartTag'] = None
outBuffer = cStringIO.StringIO()
stdout = sys.stdout
sys.stdout = outBuffer
parser = TokenizerTestParser(test['initialState'],
test['lastStartTag'])
tokens = parser.parse(test['input'])
tokens = concatenateCharacterTokens(tokens)
received = normalizeTokens(tokens)
errorMsg = u"\n".join(["\n\nInitial state:",
test['initialState'] ,
"\nInput:", unicode(test['input']),
"\nExpected:", unicode(expected),
"\nreceived:", unicode(tokens)])
errorMsg = errorMsg.encode("utf-8")
ignoreErrorOrder = test.get('ignoreErrorOrder', False)
assert tokensMatch(expected, received, ignoreErrorOrder), errorMsg


def _doCapitalize(match):
Expand All @@ -178,7 +177,7 @@ def capitalize(s):
return s


def buildTestSuite():
def test_tokenizer():
for filename in html5lib_test_files('tokenizer', '*.test'):
tests = json.load(file(filename))
testName = os.path.basename(filename).replace(".test","")
Expand All @@ -190,16 +189,5 @@ def buildTestSuite():
test["initialStates"] = ["Data state"]
for initialState in test["initialStates"]:
test["initialState"] = capitalize(initialState)
def testFunc(self, test=test):
self.runTokenizerTest(test)
testFunc.__doc__ = "\t".join([testName,
test['description']])
setattr(TestCase, 'test_%s_%d_%s' % (testName, index, test["initialState"]), testFunc)
return unittest.TestLoader().loadTestsFromTestCase(TestCase)

def main():
buildTestSuite()
unittest.main()

if __name__ == "__main__":
main()
yield runTokenizerTest, test

62 changes: 25 additions & 37 deletions html5lib/tests/test_treewalkers.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,30 +242,6 @@ def sortattrs(x):
lines.sort()
return "\n".join(lines)

class TestCase(unittest.TestCase):
def runTest(self, innerHTML, input, expected, errors, treeClass):
try:
p = html5parser.HTMLParser(tree = treeClass["builder"])
if innerHTML:
document = p.parseFragment(StringIO.StringIO(input), innerHTML)
else:
document = p.parse(StringIO.StringIO(input))
except constants.DataLossWarning:
#Ignore testcases we know we don't pass
return

document = treeClass.get("adapter", lambda x: x)(document)
try:
output = convertTokens(treeClass["walker"](document))
output = attrlist.sub(sortattrs, output)
expected = attrlist.sub(sortattrs, convertExpected(expected))
self.assertEquals(expected, output, "\n".join([
"", "Input:", input,
"", "Expected:", expected,
"", "Received:", output
]))
except NotImplementedError:
pass # Amnesty for those that confess...

class TokenTestCase(unittest.TestCase):
def test_all_tokens(self):
Expand All @@ -290,8 +266,31 @@ def test_all_tokens(self):
for expectedToken, outputToken in zip(expected, output):
self.assertEquals(expectedToken, outputToken)

def run_test(innerHTML, input, expected, errors, treeClass):
try:
p = html5parser.HTMLParser(tree = treeClass["builder"])
if innerHTML:
document = p.parseFragment(StringIO.StringIO(input), innerHTML)
else:
document = p.parse(StringIO.StringIO(input))
except constants.DataLossWarning:
#Ignore testcases we know we don't pass
return

document = treeClass.get("adapter", lambda x: x)(document)
try:
output = convertTokens(treeClass["walker"](document))
output = attrlist.sub(sortattrs, output)
expected = attrlist.sub(sortattrs, convertExpected(expected))
assert expected == output, "\n".join([
"", "Input:", input,
"", "Expected:", expected,
"", "Received:", output
])
except NotImplementedError:
pass # Amnesty for those that confess...

def buildTestSuite():
def test_treewalker():
sys.stdout.write('Testing tree walkers '+ " ".join(treeTypes.keys()) + "\n")

for treeName, treeCls in treeTypes.iteritems():
Expand All @@ -307,17 +306,6 @@ def buildTestSuite():
"document-fragment",
"document")]
errors = errors.split("\n")
def testFunc(self, innerHTML=innerHTML, input=input,
expected=expected, errors=errors, treeCls=treeCls):
self.runTest(innerHTML, input, expected, errors, treeCls)
setattr(TestCase, "test_%s_%d_%s" % (testName,index+1,treeName),
testFunc)

return unittest.TestLoader().loadTestsFromTestCase(TestCase)
yield run_test, innerHTML, input, expected, errors, treeCls

def main():
buildTestSuite()
unittest.main()

if __name__ == "__main__":
main()
3 changes: 2 additions & 1 deletion html5lib/treewalkers/etree.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,11 @@ def getNodeDetails(self, node):
return (_base.DOCTYPE, node.text,
node.get("publicId"), node.get("systemId"))

elif type(node.tag) == type(ElementTree.Comment):
elif node.tag == ElementTree.Comment:
return _base.COMMENT, node.text

else:
assert type(node.tag) in (str, unicode), type(node.tag)
#This is assumed to be an ordinary element
match = tag_regexp.match(node.tag)
if match:
Expand Down
5 changes: 2 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
]

setup(name='html5lib',
version='0.95-dev',
version='0.95',
url='http:https://code.google.com/p/html5lib/',
license="MIT License",
description='HTML parser based on the WHAT-WG Web Applications 1.0'
Expand All @@ -34,6 +34,5 @@
for name in os.listdir(os.path.join('html5lib'))
if os.path.isdir(os.path.join('html5lib',name)) and
not name.startswith('.')],
test_suite = "html5lib.tests.buildTestSuite",
tests_require = ['simplejson']
test_suite = "html5lib.tests.buildTestSuite"
)

0 comments on commit f5855f3

Please sign in to comment.