diff --git a/setup.py b/setup.py new file mode 100644 index 00000000..d30c95b2 --- /dev/null +++ b/setup.py @@ -0,0 +1,34 @@ +from distutils.core import setup + +long_description="""HTML parser designed to follow the WHATWG HTML5 +specification. The parser is designed to handle all flavours of HTML and +parses invalid documents using well-defined error handling rules compatible +with the behaviour of major desktop web browsers. + +Output is to a tree structure; the current release supports output to +a custom tree similar to DOM and to ElementTree. +""" + +classifiers=[ + 'Development Status :: 3 - Alpha', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: MIT License', + 'Operating System :: OS Independent', + 'Programming Language :: Python', + 'Topic :: Software Development :: Libraries :: Python Modules', + 'Topic :: Text Processing :: Markup :: HTML' + ], + +setup(name='html5lib', + version='0.2', + url='http://code.google.com/p/html5lib/', + license="MIT License", + description='HTML parser based on the WHAT-WG Web Applications 1.0' + '("HTML5") specifcation', + long_description=long_description, + classifiers=classifiers, + maintainer='James Graham', + maintainer_email='jg307@cam.ac.uk', + packages=['html5lib', 'html5lib.treebuilders'], + package_dir = {'html5lib': 'src'} + ) diff --git a/tests/test_encoding.py b/tests/test_encoding.py index 37c60f9d..0ee10e33 100644 --- a/tests/test_encoding.py +++ b/tests/test_encoding.py @@ -6,12 +6,18 @@ import new import codecs +#RELEASE remove # XXX Allow us to import the sibling module os.chdir(os.path.split(os.path.abspath(__file__))[0]) sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, "src"))) import inputstream +#END RELEASE +#RELEASE add +#import html5lib +#from html5lib import inputstream +#END RELEASE def parseTestcase(testString): testString = testString.split("\n") diff --git a/tests/test_lxp.py b/tests/test_lxp.py index 458657ee..41a7a456 100644 --- a/tests/test_lxp.py +++ b/tests/test_lxp.py @@ -1,3 +1,4 @@ +#RELEASE remove if __name__ == '__main__': import os, sys os.chdir(os.path.split(os.path.abspath(__file__))[0]) @@ -5,6 +6,13 @@ from liberalxmlparser import * from treebuilders import dom +#END RELEASE + +#RELEASE add +#import html5lib +#from html5lib.treebuilders import dom +#from html5lib.liberalxmlparser import * +#END RELEASE import unittest, re diff --git a/tests/test_parser.py b/tests/test_parser.py index 2bafd815..3f8fbc65 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -5,6 +5,7 @@ import unittest import new +#RELEASE remove # XXX Allow us to import the sibling module os.chdir(os.path.split(os.path.abspath(__file__))[0]) sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, "src"))) @@ -14,6 +15,13 @@ #XXX - it would be nice to automate finding all treebuilders or to allow running just one from treebuilders import simpletree, etreefull, dom +#END RELEASE + +#RELEASE add +#import html5lib +#from html5lib import html5parser +#from html5lib.treebuilders import simpletree, etreefull, dom +#END RELEASE treetypes = {"simpletree":simpletree.TreeBuilder, "DOM":dom.TreeBuilder} diff --git a/tests/test_sax.py b/tests/test_sax.py index 1ce8a144..12cbaf3f 100644 --- a/tests/test_sax.py +++ b/tests/test_sax.py @@ -5,6 +5,7 @@ PREFERRED_XML_PARSERS = ["drv_libxml2"] +#RELEASE remove if __name__ == '__main__': import os, sys os.chdir(os.path.split(os.path.abspath(__file__))[0]) @@ -12,6 +13,13 @@ from liberalxmlparser import * from treebuilders import dom +#END RELEASE + +#RELEASE add +#import html5lib +#from html5lib.treebuilders import dom +#from html5lib.liberalxmlparser import * +#END RELEASE class SAXLogger: def __init__(self): diff --git a/tests/test_tokenizer.py b/tests/test_tokenizer.py index 4427703b..104050fb 100644 --- a/tests/test_tokenizer.py +++ b/tests/test_tokenizer.py @@ -16,12 +16,20 @@ def load(f): return eval(input) load = staticmethod(load) +#RELEASE remove #Allow us to import the parent module os.chdir(os.path.split(os.path.abspath(__file__))[0]) sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, "src"))) from tokenizer import HTMLTokenizer import constants +#END RELEASE + +#RELEASE add +#import html5lib +#from html5lib.tokenizer import HTMLTokenizer +#from html5lib import constants +#END RELEASE class TokenizerTestParser(object): def __init__(self, contentModelFlag, lastStartTag=None):