diff options
author | Benjamin Peterson <benjamin@python.org> | 2011-03-02 23:40:36 (GMT) |
---|---|---|
committer | Benjamin Peterson <benjamin@python.org> | 2011-03-02 23:40:36 (GMT) |
commit | 863a0c3f53765d24e4e99c11571278ae4322f288 (patch) | |
tree | 5afdcd6c425cc790e5fbee11ddb4b601f85f39c4 /Lib/test/test_pulldom.py | |
parent | 8eda5f7cd9007b6e60be6458f981b504c1442071 (diff) | |
download | cpython-863a0c3f53765d24e4e99c11571278ae4322f288.zip cpython-863a0c3f53765d24e4e99c11571278ae4322f288.tar.gz cpython-863a0c3f53765d24e4e99c11571278ae4322f288.tar.bz2 |
add tests for xml.pulldom #9373
Thanks to Mark Smith for the patch.
Diffstat (limited to 'Lib/test/test_pulldom.py')
-rw-r--r-- | Lib/test/test_pulldom.py | 345 |
1 files changed, 345 insertions, 0 deletions
diff --git a/Lib/test/test_pulldom.py b/Lib/test/test_pulldom.py new file mode 100644 index 0000000..4171526 --- /dev/null +++ b/Lib/test/test_pulldom.py @@ -0,0 +1,345 @@ +import io +import unittest +import sys +import xml.sax + +from xml.sax.xmlreader import AttributesImpl +from xml.dom import pulldom + +from test.support import run_unittest, findfile + + +tstfile = findfile("test.xml", subdir="xmltestdata") + +# A handy XML snippet, containing attributes, a namespace prefix, and a +# self-closing tag: +SMALL_SAMPLE = """<?xml version="1.0"?> +<html xmlns="http://www.w3.org/1999/xhtml" xmlns:xdc="http://www.xml.com/books"> +<!-- A comment --> +<title>Introduction to XSL</title> +<hr/> +<p><xdc:author xdc:attrib="prefixed attribute" attrib="other attrib">A. Namespace</xdc:author></p> +</html>""" + + +class PullDOMTestCase(unittest.TestCase): + + def test_parse(self): + """Minimal test of DOMEventStream.parse()""" + + # This just tests that parsing from a stream works. Actual parser + # semantics are tested using parseString with a more focused XML + # fragment. + + # Test with a filename: + list(pulldom.parse(tstfile)) + + # Test with a file object: + with open(tstfile, "rb") as fin: + list(pulldom.parse(fin)) + + def test_parse_semantics(self): + """Test DOMEventStream parsing semantics.""" + + items = pulldom.parseString(SMALL_SAMPLE) + evt, node = next(items) + # Just check the node is a Document: + self.assertTrue(hasattr(node, "createElement")) + self.assertEqual(pulldom.START_DOCUMENT, evt) + evt, node = next(items) + self.assertEqual(pulldom.START_ELEMENT, evt) + self.assertEqual("html", node.tagName) + self.assertEqual(2, len(node.attributes)) + self.assertEqual(node.attributes.getNamedItem("xmlns:xdc").value, + "http://www.xml.com/books") + evt, node = next(items) + self.assertEqual(pulldom.CHARACTERS, evt) # Line break + evt, node = next(items) + # XXX - A comment should be reported here! + # self.assertEqual(pulldom.COMMENT, evt) + # Line break after swallowed comment: + self.assertEqual(pulldom.CHARACTERS, evt) + evt, node = next(items) + self.assertEqual("title", node.tagName) + title_node = node + evt, node = next(items) + self.assertEqual(pulldom.CHARACTERS, evt) + self.assertEqual("Introduction to XSL", node.data) + evt, node = next(items) + self.assertEqual(pulldom.END_ELEMENT, evt) + self.assertEqual("title", node.tagName) + self.assertTrue(title_node is node) + evt, node = next(items) + self.assertEqual(pulldom.CHARACTERS, evt) + evt, node = next(items) + self.assertEqual(pulldom.START_ELEMENT, evt) + self.assertEqual("hr", node.tagName) + evt, node = next(items) + self.assertEqual(pulldom.END_ELEMENT, evt) + self.assertEqual("hr", node.tagName) + evt, node = next(items) + self.assertEqual(pulldom.CHARACTERS, evt) + evt, node = next(items) + self.assertEqual(pulldom.START_ELEMENT, evt) + self.assertEqual("p", node.tagName) + evt, node = next(items) + self.assertEqual(pulldom.START_ELEMENT, evt) + self.assertEqual("xdc:author", node.tagName) + evt, node = next(items) + self.assertEqual(pulldom.CHARACTERS, evt) + evt, node = next(items) + self.assertEqual(pulldom.END_ELEMENT, evt) + self.assertEqual("xdc:author", node.tagName) + evt, node = next(items) + self.assertEqual(pulldom.END_ELEMENT, evt) + evt, node = next(items) + self.assertEqual(pulldom.CHARACTERS, evt) + evt, node = next(items) + self.assertEqual(pulldom.END_ELEMENT, evt) + # XXX No END_DOCUMENT item is ever obtained: + #evt, node = next(items) + #self.assertEqual(pulldom.END_DOCUMENT, evt) + + def test_expandItem(self): + """Ensure expandItem works as expected.""" + items = pulldom.parseString(SMALL_SAMPLE) + # Loop through the nodes until we get to a "title" start tag: + for evt, item in items: + if evt == pulldom.START_ELEMENT and item.tagName == "title": + items.expandNode(item) + self.assertEqual(1, len(item.childNodes)) + break + else: + self.fail("No \"title\" element detected in SMALL_SAMPLE!") + # Loop until we get to the next start-element: + for evt, node in items: + if evt == pulldom.START_ELEMENT: + break + self.assertEqual("hr", node.tagName, + "expandNode did not leave DOMEventStream in the correct state.") + # Attempt to expand a standalone element: + items.expandNode(node) + self.assertEqual(next(items)[0], pulldom.CHARACTERS) + evt, node = next(items) + self.assertEqual(node.tagName, "p") + items.expandNode(node) + next(items) # Skip character data + evt, node = next(items) + self.assertEqual(node.tagName, "html") + with self.assertRaises(StopIteration): + next(items) + items.clear() + self.assertIsNone(items.parser) + self.assertIsNone(items.stream) + + @unittest.expectedFailure + def test_comment(self): + """PullDOM does not receive "comment" events.""" + items = pulldom.parseString(SMALL_SAMPLE) + for evt, _ in items: + if evt == pulldom.COMMENT: + break + else: + self.fail("No comment was encountered") + + @unittest.expectedFailure + def test_end_document(self): + """PullDOM does not receive "end-document" events.""" + items = pulldom.parseString(SMALL_SAMPLE) + # Read all of the nodes up to and including </html>: + for evt, node in items: + if evt == pulldom.END_ELEMENT and node.tagName == "html": + break + try: + # Assert that the next node is END_DOCUMENT: + evt, node = next(items) + self.assertEqual(pulldom.END_DOCUMENT, evt) + except StopIteration: + self.fail( + "Ran out of events, but should have received END_DOCUMENT") + + +class ThoroughTestCase(unittest.TestCase): + """Test the hard-to-reach parts of pulldom.""" + + def test_thorough_parse(self): + """Test some of the hard-to-reach parts of PullDOM.""" + self._test_thorough(pulldom.parse(None, parser=SAXExerciser())) + + @unittest.expectedFailure + def test_sax2dom_fail(self): + """SAX2DOM can"t handle a PI before the root element.""" + pd = SAX2DOMTestHelper(None, SAXExerciser(), 12) + self._test_thorough(pd) + + def test_thorough_sax2dom(self): + """Test some of the hard-to-reach parts of SAX2DOM.""" + pd = SAX2DOMTestHelper(None, SAX2DOMExerciser(), 12) + self._test_thorough(pd, False) + + def _test_thorough(self, pd, before_root=True): + """Test some of the hard-to-reach parts of the parser, using a mock + parser.""" + + evt, node = next(pd) + self.assertEqual(pulldom.START_DOCUMENT, evt) + # Just check the node is a Document: + self.assertTrue(hasattr(node, "createElement")) + + if before_root: + evt, node = next(pd) + self.assertEqual(pulldom.COMMENT, evt) + self.assertEqual("a comment", node.data) + evt, node = next(pd) + self.assertEqual(pulldom.PROCESSING_INSTRUCTION, evt) + self.assertEqual("target", node.target) + self.assertEqual("data", node.data) + + evt, node = next(pd) + self.assertEqual(pulldom.START_ELEMENT, evt) + self.assertEqual("html", node.tagName) + + evt, node = next(pd) + self.assertEqual(pulldom.COMMENT, evt) + self.assertEqual("a comment", node.data) + evt, node = next(pd) + self.assertEqual(pulldom.PROCESSING_INSTRUCTION, evt) + self.assertEqual("target", node.target) + self.assertEqual("data", node.data) + + evt, node = next(pd) + self.assertEqual(pulldom.START_ELEMENT, evt) + self.assertEqual("p", node.tagName) + + evt, node = next(pd) + self.assertEqual(pulldom.CHARACTERS, evt) + self.assertEqual("text", node.data) + evt, node = next(pd) + self.assertEqual(pulldom.END_ELEMENT, evt) + self.assertEqual("p", node.tagName) + evt, node = next(pd) + self.assertEqual(pulldom.END_ELEMENT, evt) + self.assertEqual("html", node.tagName) + evt, node = next(pd) + self.assertEqual(pulldom.END_DOCUMENT, evt) + + +class SAXExerciser(object): + """A fake sax parser that calls some of the harder-to-reach sax methods to + ensure it emits the correct events""" + + def setContentHandler(self, handler): + self._handler = handler + + def parse(self, _): + h = self._handler + h.startDocument() + + # The next two items ensure that items preceding the first + # start_element are properly stored and emitted: + h.comment("a comment") + h.processingInstruction("target", "data") + + h.startElement("html", AttributesImpl({})) + + h.comment("a comment") + h.processingInstruction("target", "data") + + h.startElement("p", AttributesImpl({"class": "paraclass"})) + h.characters("text") + h.endElement("p") + h.endElement("html") + h.endDocument() + + def stub(self, *args, **kwargs): + """Stub method. Does nothing.""" + pass + setProperty = stub + setFeature = stub + + +class SAX2DOMExerciser(SAXExerciser): + """The same as SAXExerciser, but without the processing instruction and + comment before the root element, because S2D can"t handle it""" + + def parse(self, _): + h = self._handler + h.startDocument() + h.startElement("html", AttributesImpl({})) + h.comment("a comment") + h.processingInstruction("target", "data") + h.startElement("p", AttributesImpl({"class": "paraclass"})) + h.characters("text") + h.endElement("p") + h.endElement("html") + h.endDocument() + + +class SAX2DOMTestHelper(pulldom.DOMEventStream): + """Allows us to drive SAX2DOM from a DOMEventStream.""" + + def reset(self): + self.pulldom = pulldom.SAX2DOM() + # This content handler relies on namespace support + self.parser.setFeature(xml.sax.handler.feature_namespaces, 1) + self.parser.setContentHandler(self.pulldom) + + +class SAX2DOMTestCase(unittest.TestCase): + + def confirm(self, test, testname="Test"): + self.assertTrue(test, testname) + + def test_basic(self): + """Ensure SAX2DOM can parse from a stream.""" + with io.StringIO(SMALL_SAMPLE) as fin: + sd = SAX2DOMTestHelper(fin, xml.sax.make_parser(), + len(SMALL_SAMPLE)) + for evt, node in sd: + if evt == pulldom.START_ELEMENT and node.tagName == "html": + break + # Because the buffer is the same length as the XML, all the + # nodes should have been parsed and added: + self.assertGreater(len(node.childNodes), 0) + + def testSAX2DOM(self): + """Ensure SAX2DOM expands nodes as expected.""" + sax2dom = pulldom.SAX2DOM() + sax2dom.startDocument() + sax2dom.startElement("doc", {}) + sax2dom.characters("text") + sax2dom.startElement("subelm", {}) + sax2dom.characters("text") + sax2dom.endElement("subelm") + sax2dom.characters("text") + sax2dom.endElement("doc") + sax2dom.endDocument() + + doc = sax2dom.document + root = doc.documentElement + (text1, elm1, text2) = root.childNodes + text3 = elm1.childNodes[0] + + self.assertIsNone(text1.previousSibling) + self.assertIs(text1.nextSibling, elm1) + self.assertIs(elm1.previousSibling, text1) + self.assertIs(elm1.nextSibling, text2) + self.assertIs(text2.previousSibling, elm1) + self.assertIsNone(text2.nextSibling) + self.assertIsNone(text3.previousSibling) + self.assertIsNone(text3.nextSibling) + + self.assertIs(root.parentNode, doc) + self.assertIs(text1.parentNode, root) + self.assertIs(elm1.parentNode, root) + self.assertIs(text2.parentNode, root) + self.assertIs(text3.parentNode, elm1) + doc.unlink() + + +def test_main(): + run_unittest(PullDOMTestCase, ThoroughTestCase, SAX2DOMTestCase) + + +if __name__ == "__main__": + test_main() |