1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
|
import io
import unittest
import xml.sax
from xml.sax.xmlreader import AttributesImpl
from xml.dom import pulldom
from test.support import findfile
tstfile = findfile("test.xml", subdir="xmltestdata")
# A handy XML snippet, containing attributes, a namespace prefix, and a
# self-closing tag:
SMALL_SAMPLE = """<?xml version="1.0"?>
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:xdc="http://www.xml.com/books">
<!-- A comment -->
<title>Introduction to XSL</title>
<hr/>
<p><xdc:author xdc:attrib="prefixed attribute" attrib="other attrib">A. Namespace</xdc:author></p>
</html>"""
class PullDOMTestCase(unittest.TestCase):
def test_parse(self):
"""Minimal test of DOMEventStream.parse()"""
# This just tests that parsing from a stream works. Actual parser
# semantics are tested using parseString with a more focused XML
# fragment.
# Test with a filename:
handler = pulldom.parse(tstfile)
self.addCleanup(handler.stream.close)
list(handler)
# Test with a file object:
with open(tstfile, "rb") as fin:
list(pulldom.parse(fin))
def test_parse_semantics(self):
"""Test DOMEventStream parsing semantics."""
items = pulldom.parseString(SMALL_SAMPLE)
evt, node = next(items)
# Just check the node is a Document:
self.assertTrue(hasattr(node, "createElement"))
self.assertEqual(pulldom.START_DOCUMENT, evt)
evt, node = next(items)
self.assertEqual(pulldom.START_ELEMENT, evt)
self.assertEqual("html", node.tagName)
self.assertEqual(2, len(node.attributes))
self.assertEqual(node.attributes.getNamedItem("xmlns:xdc").value,
"http://www.xml.com/books")
evt, node = next(items)
self.assertEqual(pulldom.CHARACTERS, evt) # Line break
evt, node = next(items)
# XXX - A comment should be reported here!
# self.assertEqual(pulldom.COMMENT, evt)
# Line break after swallowed comment:
self.assertEqual(pulldom.CHARACTERS, evt)
evt, node = next(items)
self.assertEqual("title", node.tagName)
title_node = node
evt, node = next(items)
self.assertEqual(pulldom.CHARACTERS, evt)
self.assertEqual("Introduction to XSL", node.data)
evt, node = next(items)
self.assertEqual(pulldom.END_ELEMENT, evt)
self.assertEqual("title", node.tagName)
self.assertTrue(title_node is node)
evt, node = next(items)
self.assertEqual(pulldom.CHARACTERS, evt)
evt, node = next(items)
self.assertEqual(pulldom.START_ELEMENT, evt)
self.assertEqual("hr", node.tagName)
evt, node = next(items)
self.assertEqual(pulldom.END_ELEMENT, evt)
self.assertEqual("hr", node.tagName)
evt, node = next(items)
self.assertEqual(pulldom.CHARACTERS, evt)
evt, node = next(items)
self.assertEqual(pulldom.START_ELEMENT, evt)
self.assertEqual("p", node.tagName)
evt, node = next(items)
self.assertEqual(pulldom.START_ELEMENT, evt)
self.assertEqual("xdc:author", node.tagName)
evt, node = next(items)
self.assertEqual(pulldom.CHARACTERS, evt)
evt, node = next(items)
self.assertEqual(pulldom.END_ELEMENT, evt)
self.assertEqual("xdc:author", node.tagName)
evt, node = next(items)
self.assertEqual(pulldom.END_ELEMENT, evt)
evt, node = next(items)
self.assertEqual(pulldom.CHARACTERS, evt)
evt, node = next(items)
self.assertEqual(pulldom.END_ELEMENT, evt)
# XXX No END_DOCUMENT item is ever obtained:
#evt, node = next(items)
#self.assertEqual(pulldom.END_DOCUMENT, evt)
def test_expandItem(self):
"""Ensure expandItem works as expected."""
items = pulldom.parseString(SMALL_SAMPLE)
# Loop through the nodes until we get to a "title" start tag:
for evt, item in items:
if evt == pulldom.START_ELEMENT and item.tagName == "title":
items.expandNode(item)
self.assertEqual(1, len(item.childNodes))
break
else:
self.fail("No \"title\" element detected in SMALL_SAMPLE!")
# Loop until we get to the next start-element:
for evt, node in items:
if evt == pulldom.START_ELEMENT:
break
self.assertEqual("hr", node.tagName,
"expandNode did not leave DOMEventStream in the correct state.")
# Attempt to expand a standalone element:
items.expandNode(node)
self.assertEqual(next(items)[0], pulldom.CHARACTERS)
evt, node = next(items)
self.assertEqual(node.tagName, "p")
items.expandNode(node)
next(items) # Skip character data
evt, node = next(items)
self.assertEqual(node.tagName, "html")
with self.assertRaises(StopIteration):
next(items)
items.clear()
self.assertIsNone(items.parser)
self.assertIsNone(items.stream)
@unittest.expectedFailure
def test_comment(self):
"""PullDOM does not receive "comment" events."""
items = pulldom.parseString(SMALL_SAMPLE)
for evt, _ in items:
if evt == pulldom.COMMENT:
break
else:
self.fail("No comment was encountered")
@unittest.expectedFailure
def test_end_document(self):
"""PullDOM does not receive "end-document" events."""
items = pulldom.parseString(SMALL_SAMPLE)
# Read all of the nodes up to and including </html>:
for evt, node in items:
if evt == pulldom.END_ELEMENT and node.tagName == "html":
break
try:
# Assert that the next node is END_DOCUMENT:
evt, node = next(items)
self.assertEqual(pulldom.END_DOCUMENT, evt)
except StopIteration:
self.fail(
"Ran out of events, but should have received END_DOCUMENT")
class ThoroughTestCase(unittest.TestCase):
"""Test the hard-to-reach parts of pulldom."""
def test_thorough_parse(self):
"""Test some of the hard-to-reach parts of PullDOM."""
self._test_thorough(pulldom.parse(None, parser=SAXExerciser()))
@unittest.expectedFailure
def test_sax2dom_fail(self):
"""SAX2DOM can"t handle a PI before the root element."""
pd = SAX2DOMTestHelper(None, SAXExerciser(), 12)
self._test_thorough(pd)
def test_thorough_sax2dom(self):
"""Test some of the hard-to-reach parts of SAX2DOM."""
pd = SAX2DOMTestHelper(None, SAX2DOMExerciser(), 12)
self._test_thorough(pd, False)
def _test_thorough(self, pd, before_root=True):
"""Test some of the hard-to-reach parts of the parser, using a mock
parser."""
evt, node = next(pd)
self.assertEqual(pulldom.START_DOCUMENT, evt)
# Just check the node is a Document:
self.assertTrue(hasattr(node, "createElement"))
if before_root:
evt, node = next(pd)
self.assertEqual(pulldom.COMMENT, evt)
self.assertEqual("a comment", node.data)
evt, node = next(pd)
self.assertEqual(pulldom.PROCESSING_INSTRUCTION, evt)
self.assertEqual("target", node.target)
self.assertEqual("data", node.data)
evt, node = next(pd)
self.assertEqual(pulldom.START_ELEMENT, evt)
self.assertEqual("html", node.tagName)
evt, node = next(pd)
self.assertEqual(pulldom.COMMENT, evt)
self.assertEqual("a comment", node.data)
evt, node = next(pd)
self.assertEqual(pulldom.PROCESSING_INSTRUCTION, evt)
self.assertEqual("target", node.target)
self.assertEqual("data", node.data)
evt, node = next(pd)
self.assertEqual(pulldom.START_ELEMENT, evt)
self.assertEqual("p", node.tagName)
evt, node = next(pd)
self.assertEqual(pulldom.CHARACTERS, evt)
self.assertEqual("text", node.data)
evt, node = next(pd)
self.assertEqual(pulldom.END_ELEMENT, evt)
self.assertEqual("p", node.tagName)
evt, node = next(pd)
self.assertEqual(pulldom.END_ELEMENT, evt)
self.assertEqual("html", node.tagName)
evt, node = next(pd)
self.assertEqual(pulldom.END_DOCUMENT, evt)
class SAXExerciser(object):
"""A fake sax parser that calls some of the harder-to-reach sax methods to
ensure it emits the correct events"""
def setContentHandler(self, handler):
self._handler = handler
def parse(self, _):
h = self._handler
h.startDocument()
# The next two items ensure that items preceding the first
# start_element are properly stored and emitted:
h.comment("a comment")
h.processingInstruction("target", "data")
h.startElement("html", AttributesImpl({}))
h.comment("a comment")
h.processingInstruction("target", "data")
h.startElement("p", AttributesImpl({"class": "paraclass"}))
h.characters("text")
h.endElement("p")
h.endElement("html")
h.endDocument()
def stub(self, *args, **kwargs):
"""Stub method. Does nothing."""
pass
setProperty = stub
setFeature = stub
class SAX2DOMExerciser(SAXExerciser):
"""The same as SAXExerciser, but without the processing instruction and
comment before the root element, because S2D can"t handle it"""
def parse(self, _):
h = self._handler
h.startDocument()
h.startElement("html", AttributesImpl({}))
h.comment("a comment")
h.processingInstruction("target", "data")
h.startElement("p", AttributesImpl({"class": "paraclass"}))
h.characters("text")
h.endElement("p")
h.endElement("html")
h.endDocument()
class SAX2DOMTestHelper(pulldom.DOMEventStream):
"""Allows us to drive SAX2DOM from a DOMEventStream."""
def reset(self):
self.pulldom = pulldom.SAX2DOM()
# This content handler relies on namespace support
self.parser.setFeature(xml.sax.handler.feature_namespaces, 1)
self.parser.setContentHandler(self.pulldom)
class SAX2DOMTestCase(unittest.TestCase):
def confirm(self, test, testname="Test"):
self.assertTrue(test, testname)
def test_basic(self):
"""Ensure SAX2DOM can parse from a stream."""
with io.StringIO(SMALL_SAMPLE) as fin:
sd = SAX2DOMTestHelper(fin, xml.sax.make_parser(),
len(SMALL_SAMPLE))
for evt, node in sd:
if evt == pulldom.START_ELEMENT and node.tagName == "html":
break
# Because the buffer is the same length as the XML, all the
# nodes should have been parsed and added:
self.assertGreater(len(node.childNodes), 0)
def testSAX2DOM(self):
"""Ensure SAX2DOM expands nodes as expected."""
sax2dom = pulldom.SAX2DOM()
sax2dom.startDocument()
sax2dom.startElement("doc", {})
sax2dom.characters("text")
sax2dom.startElement("subelm", {})
sax2dom.characters("text")
sax2dom.endElement("subelm")
sax2dom.characters("text")
sax2dom.endElement("doc")
sax2dom.endDocument()
doc = sax2dom.document
root = doc.documentElement
(text1, elm1, text2) = root.childNodes
text3 = elm1.childNodes[0]
self.assertIsNone(text1.previousSibling)
self.assertIs(text1.nextSibling, elm1)
self.assertIs(elm1.previousSibling, text1)
self.assertIs(elm1.nextSibling, text2)
self.assertIs(text2.previousSibling, elm1)
self.assertIsNone(text2.nextSibling)
self.assertIsNone(text3.previousSibling)
self.assertIsNone(text3.nextSibling)
self.assertIs(root.parentNode, doc)
self.assertIs(text1.parentNode, root)
self.assertIs(elm1.parentNode, root)
self.assertIs(text2.parentNode, root)
self.assertIs(text3.parentNode, elm1)
doc.unlink()
if __name__ == "__main__":
unittest.main()
|