forked from html5lib/html5lib-python
-
Notifications
You must be signed in to change notification settings - Fork 0
/
support.py
177 lines (139 loc) · 5.37 KB
/
support.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
from __future__ import absolute_import, division, unicode_literals
import os
import sys
import codecs
import glob
import xml.sax.handler
base_path = os.path.split(__file__)[0]
test_dir = os.path.join(base_path, 'testdata')
sys.path.insert(0, os.path.abspath(os.path.join(base_path,
os.path.pardir,
os.path.pardir)))
from html5lib import treebuilders
del base_path
# Build a dict of avaliable trees
treeTypes = {"DOM": treebuilders.getTreeBuilder("dom")}
# Try whatever etree implementations are avaliable from a list that are
#"supposed" to work
try:
import xml.etree.ElementTree as ElementTree
treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True)
except ImportError:
try:
import elementtree.ElementTree as ElementTree
treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True)
except ImportError:
pass
try:
import xml.etree.cElementTree as cElementTree
treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True)
except ImportError:
try:
import cElementTree
treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True)
except ImportError:
pass
try:
import lxml.etree as lxml # flake8: noqa
except ImportError:
pass
else:
treeTypes['lxml'] = treebuilders.getTreeBuilder("lxml")
def get_data_files(subdirectory, files='*.dat'):
return glob.glob(os.path.join(test_dir, subdirectory, files))
class DefaultDict(dict):
def __init__(self, default, *args, **kwargs):
self.default = default
dict.__init__(self, *args, **kwargs)
def __getitem__(self, key):
return dict.get(self, key, self.default)
class TestData(object):
def __init__(self, filename, newTestHeading="data", encoding="utf8"):
if encoding is None:
self.f = open(filename, mode="rb")
else:
self.f = codecs.open(filename, encoding=encoding)
self.encoding = encoding
self.newTestHeading = newTestHeading
def __del__(self):
self.f.close()
def __iter__(self):
data = DefaultDict(None)
key = None
for line in self.f:
heading = self.isSectionHeading(line)
if heading:
if data and heading == self.newTestHeading:
# Remove trailing newline
data[key] = data[key][:-1]
yield self.normaliseOutput(data)
data = DefaultDict(None)
key = heading
data[key] = "" if self.encoding else b""
elif key is not None:
data[key] += line
if data:
yield self.normaliseOutput(data)
def isSectionHeading(self, line):
"""If the current heading is a test section heading return the heading,
otherwise return False"""
# print(line)
if line.startswith("#" if self.encoding else b"#"):
return line[1:].strip()
else:
return False
def normaliseOutput(self, data):
# Remove trailing newlines
for key, value in data.items():
if value.endswith("\n" if self.encoding else b"\n"):
data[key] = value[:-1]
return data
def convert(stripChars):
def convertData(data):
"""convert the output of str(document) to the format used in the testcases"""
data = data.split("\n")
rv = []
for line in data:
if line.startswith("|"):
rv.append(line[stripChars:])
else:
rv.append(line)
return "\n".join(rv)
return convertData
convertExpected = convert(2)
def errorMessage(input, expected, actual):
msg = ("Input:\n%s\nExpected:\n%s\nRecieved\n%s\n" %
(repr(input), repr(expected), repr(actual)))
if sys.version_info.major == 2:
msg = msg.encode("ascii", "backslashreplace")
return msg
class TracingSaxHandler(xml.sax.handler.ContentHandler):
def __init__(self):
xml.sax.handler.ContentHandler.__init__(self)
self.visited = []
def startDocument(self):
self.visited.append('startDocument')
def endDocument(self):
self.visited.append('endDocument')
def startPrefixMapping(self, prefix, uri):
# These are ignored as their order is not guaranteed
pass
def endPrefixMapping(self, prefix):
# These are ignored as their order is not guaranteed
pass
def startElement(self, name, attrs):
self.visited.append(('startElement', name, attrs))
def endElement(self, name):
self.visited.append(('endElement', name))
def startElementNS(self, name, qname, attrs):
self.visited.append(('startElementNS', name, qname, dict(attrs)))
def endElementNS(self, name, qname):
self.visited.append(('endElementNS', name, qname))
def characters(self, content):
self.visited.append(('characters', content))
def ignorableWhitespace(self, whitespace):
self.visited.append(('ignorableWhitespace', whitespace))
def processingInstruction(self, target, data):
self.visited.append(('processingInstruction', target, data))
def skippedEntity(self, name):
self.visited.append(('skippedEntity', name))