diff options
author | Eli Bendersky <eliben@gmail.com> | 2012-06-15 04:42:50 (GMT) |
---|---|---|
committer | Eli Bendersky <eliben@gmail.com> | 2012-06-15 04:42:50 (GMT) |
commit | 64d11e60f23f6b1435704adb87ebf818e5a4c0c1 (patch) | |
tree | ece3c4337e34bdb0408016b1eb38428343b75873 /Lib | |
parent | fedb04a37aff9f7a2cfe746f7fc4683e74e38bf0 (diff) | |
download | cpython-64d11e60f23f6b1435704adb87ebf818e5a4c0c1.zip cpython-64d11e60f23f6b1435704adb87ebf818e5a4c0c1.tar.gz cpython-64d11e60f23f6b1435704adb87ebf818e5a4c0c1.tar.bz2 |
Replace the iter/itertext methods of Element in _elementtree with true C implementations, instead of the bootstrapped Python code. In addition to being cleaner (removing the last remains of the bootstrapping code in _elementtree), this gives a 10x performance boost for iter() on large documents.
Also reorganized the tests a bit to be more robust.
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/test/test_xml_etree.py | 247 | ||||
-rw-r--r-- | Lib/test/test_xml_etree_c.py | 28 | ||||
-rw-r--r-- | Lib/xml/etree/ElementTree.py | 6 |
3 files changed, 142 insertions, 139 deletions
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 49a5633..bee6329 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -23,7 +23,8 @@ import weakref from test import support from test.support import findfile, import_fresh_module, gc_collect -pyET = import_fresh_module('xml.etree.ElementTree', blocked=['_elementtree']) +pyET = None +ET = None SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata") try: @@ -209,10 +210,8 @@ def interface(): These methods return an iterable. See bug 6472. - >>> check_method(element.iter("tag").__next__) >>> check_method(element.iterfind("tag").__next__) >>> check_method(element.iterfind("*").__next__) - >>> check_method(tree.iter("tag").__next__) >>> check_method(tree.iterfind("tag").__next__) >>> check_method(tree.iterfind("*").__next__) @@ -291,42 +290,6 @@ def cdata(): '<tag>hello</tag>' """ -# Only with Python implementation -def simplefind(): - """ - Test find methods using the elementpath fallback. - - >>> ElementTree = pyET - - >>> CurrentElementPath = ElementTree.ElementPath - >>> ElementTree.ElementPath = ElementTree._SimpleElementPath() - >>> elem = ElementTree.XML(SAMPLE_XML) - >>> elem.find("tag").tag - 'tag' - >>> ElementTree.ElementTree(elem).find("tag").tag - 'tag' - >>> elem.findtext("tag") - 'text' - >>> elem.findtext("tog") - >>> elem.findtext("tog", "default") - 'default' - >>> ElementTree.ElementTree(elem).findtext("tag") - 'text' - >>> summarize_list(elem.findall("tag")) - ['tag', 'tag'] - >>> summarize_list(elem.findall(".//tag")) - ['tag', 'tag', 'tag'] - - Path syntax doesn't work in this case. - - >>> elem.find("section/tag") - >>> elem.findtext("section/tag") - >>> summarize_list(elem.findall("section/tag")) - [] - - >>> ElementTree.ElementPath = CurrentElementPath - """ - def find(): """ Test find methods (including xpath syntax). @@ -1002,36 +965,6 @@ def methods(): '1 < 2\n' """ -def iterators(): - """ - Test iterators. - - >>> e = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>") - >>> summarize_list(e.iter()) - ['html', 'body', 'i'] - >>> summarize_list(e.find("body").iter()) - ['body', 'i'] - >>> summarize(next(e.iter())) - 'html' - >>> "".join(e.itertext()) - 'this is a paragraph...' - >>> "".join(e.find("body").itertext()) - 'this is a paragraph.' - >>> next(e.itertext()) - 'this is a ' - - Method iterparse should return an iterator. See bug 6472. - - >>> sourcefile = serialize(e, to_string=False) - >>> next(ET.iterparse(sourcefile)) # doctest: +ELLIPSIS - ('end', <Element 'i' at 0x...>) - - >>> tree = ET.ElementTree(None) - >>> tree.iter() - Traceback (most recent call last): - AttributeError: 'NoneType' object has no attribute 'iter' - """ - ENTITY_XML = """\ <!DOCTYPE points [ <!ENTITY % user-entities SYSTEM 'user-entities.xml'> @@ -1339,6 +1272,7 @@ XINCLUDE["default.xml"] = """\ </document> """.format(html.escape(SIMPLE_XMLFILE, True)) + def xinclude_loader(href, parse="xml", encoding=None): try: data = XINCLUDE[href] @@ -1411,22 +1345,6 @@ def xinclude(): >>> # print(serialize(document)) # C5 """ -def xinclude_default(): - """ - >>> from xml.etree import ElementInclude - - >>> document = xinclude_loader("default.xml") - >>> ElementInclude.include(document) - >>> print(serialize(document)) # default - <document> - <p>Example.</p> - <root> - <element key="value">text</element> - <element>text</element>tail - <empty-element /> - </root> - </document> - """ # # badly formatted xi:include tags @@ -1917,9 +1835,8 @@ class ElementTreeTest(unittest.TestCase): self.assertIsInstance(ET.QName, type) self.assertIsInstance(ET.ElementTree, type) self.assertIsInstance(ET.Element, type) - # XXX issue 14128 with C ElementTree - # self.assertIsInstance(ET.TreeBuilder, type) - # self.assertIsInstance(ET.XMLParser, type) + self.assertIsInstance(ET.TreeBuilder, type) + self.assertIsInstance(ET.XMLParser, type) def test_Element_subclass_trivial(self): class MyElement(ET.Element): @@ -1953,6 +1870,73 @@ class ElementTreeTest(unittest.TestCase): self.assertEqual(mye.newmethod(), 'joe') +class ElementIterTest(unittest.TestCase): + def _ilist(self, elem, tag=None): + return summarize_list(elem.iter(tag)) + + def test_basic(self): + doc = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>") + self.assertEqual(self._ilist(doc), ['html', 'body', 'i']) + self.assertEqual(self._ilist(doc.find('body')), ['body', 'i']) + self.assertEqual(next(doc.iter()).tag, 'html') + self.assertEqual(''.join(doc.itertext()), 'this is a paragraph...') + self.assertEqual(''.join(doc.find('body').itertext()), + 'this is a paragraph.') + self.assertEqual(next(doc.itertext()), 'this is a ') + + # iterparse should return an iterator + sourcefile = serialize(doc, to_string=False) + self.assertEqual(next(ET.iterparse(sourcefile))[0], 'end') + + tree = ET.ElementTree(None) + self.assertRaises(AttributeError, tree.iter) + + def test_corners(self): + # single root, no subelements + a = ET.Element('a') + self.assertEqual(self._ilist(a), ['a']) + + # one child + b = ET.SubElement(a, 'b') + self.assertEqual(self._ilist(a), ['a', 'b']) + + # one child and one grandchild + c = ET.SubElement(b, 'c') + self.assertEqual(self._ilist(a), ['a', 'b', 'c']) + + # two children, only first with grandchild + d = ET.SubElement(a, 'd') + self.assertEqual(self._ilist(a), ['a', 'b', 'c', 'd']) + + # replace first child by second + a[0] = a[1] + del a[1] + self.assertEqual(self._ilist(a), ['a', 'd']) + + def test_iter_by_tag(self): + doc = ET.XML(''' + <document> + <house> + <room>bedroom1</room> + <room>bedroom2</room> + </house> + <shed>nothing here + </shed> + <house> + <room>bedroom8</room> + </house> + </document>''') + + self.assertEqual(self._ilist(doc, 'room'), ['room'] * 3) + self.assertEqual(self._ilist(doc, 'house'), ['house'] * 2) + + # make sure both tag=None and tag='*' return all tags + all_tags = ['document', 'house', 'room', 'room', + 'shed', 'house', 'room'] + self.assertEqual(self._ilist(doc), all_tags) + self.assertEqual(self._ilist(doc, '*'), all_tags) + + class TreeBuilderTest(unittest.TestCase): sample1 = ('<!DOCTYPE html PUBLIC' ' "-//W3C//DTD XHTML 1.0 Transitional//EN"' @@ -2026,7 +2010,20 @@ class TreeBuilderTest(unittest.TestCase): ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN', 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd')) - +class XincludeTest(unittest.TestCase): + def test_xinclude_default(self): + from xml.etree import ElementInclude + doc = xinclude_loader('default.xml') + ElementInclude.include(doc) + s = serialize(doc) + self.assertEqual(s.strip(), '''<document> + <p>Example.</p> + <root> + <element key="value">text</element> + <element>text</element>tail + <empty-element /> +</root> +</document>''') class XMLParserTest(unittest.TestCase): sample1 = '<file><line>22</line></file>' sample2 = ('<!DOCTYPE html PUBLIC' @@ -2073,13 +2070,6 @@ class XMLParserTest(unittest.TestCase): 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd')) -class NoAcceleratorTest(unittest.TestCase): - # Test that the C accelerator was not imported for pyET - def test_correct_import_pyET(self): - self.assertEqual(pyET.Element.__module__, 'xml.etree.ElementTree') - self.assertEqual(pyET.SubElement.__module__, 'xml.etree.ElementTree') - - class NamespaceParseTest(unittest.TestCase): def test_find_with_namespace(self): nsmap = {'h': 'hello', 'f': 'foo'} @@ -2090,7 +2080,6 @@ class NamespaceParseTest(unittest.TestCase): self.assertEqual(len(doc.findall('.//{foo}name', nsmap)), 1) - class ElementSlicingTest(unittest.TestCase): def _elem_tags(self, elemlist): return [e.tag for e in elemlist] @@ -2232,6 +2221,36 @@ class KeywordArgsTest(unittest.TestCase): with self.assertRaisesRegex(TypeError, 'must be dict, not str'): ET.Element('a', attrib="I'm not a dict") +# -------------------------------------------------------------------- + +@unittest.skipUnless(pyET, 'only for the Python version') +class NoAcceleratorTest(unittest.TestCase): + # Test that the C accelerator was not imported for pyET + def test_correct_import_pyET(self): + self.assertEqual(pyET.Element.__module__, 'xml.etree.ElementTree') + self.assertEqual(pyET.SubElement.__module__, 'xml.etree.ElementTree') + + +class ElementPathFallbackTest(unittest.TestCase): + def test_fallback(self): + current_ElementPath = ET.ElementPath + ET.ElementPath = ET._SimpleElementPath() + elem = ET.XML(SAMPLE_XML) + self.assertEqual(elem.find('tag').tag, 'tag') + self.assertEqual(ET.ElementTree(elem).find('tag').tag, 'tag') + self.assertEqual(elem.findtext('tag'), 'text') + self.assertIsNone(elem.findtext('tog')) + self.assertEqual(elem.findtext('tog', 'default'), 'default') + self.assertEqual(ET.ElementTree(elem).findtext('tag'), 'text') + self.assertEqual(summarize_list(elem.findall('tag')), ['tag', 'tag']) + self.assertEqual(summarize_list(elem.findall('.//tag')), + ['tag', 'tag', 'tag']) + + self.assertIsNone(elem.find('section/tag')) + self.assertIsNone(elem.findtext('section/tag')) + self.assertEqual(summarize_list(elem.findall('section/tag')), []) + + ET.ElementPath = current_ElementPath # -------------------------------------------------------------------- @@ -2276,31 +2295,43 @@ class CleanContext(object): self.checkwarnings.__exit__(*args) -def test_main(module=pyET): - from test import test_xml_etree +def test_main(module=None): + # When invoked without a module, runs the Python ET tests by loading pyET. + # Otherwise, uses the given module as the ET. + if module is None: + global pyET + pyET = import_fresh_module('xml.etree.ElementTree', + blocked=['_elementtree']) + module = pyET - # The same doctests are used for both the Python and the C implementations - test_xml_etree.ET = module + global ET + ET = module test_classes = [ ElementSlicingTest, BasicElementTest, StringIOTest, ParseErrorTest, + XincludeTest, ElementTreeTest, - NamespaceParseTest, + ElementIterTest, TreeBuilderTest, - XMLParserTest, - KeywordArgsTest] - if module is pyET: - # Run the tests specific to the Python implementation - test_classes += [NoAcceleratorTest] + ] + + # These tests will only run for the pure-Python version that doesn't import + # _elementtree. We can't use skipUnless here, because pyET is filled in only + # after the module is loaded. + if pyET: + test_classes.extend([ + NoAcceleratorTest, + ElementPathFallbackTest, + ]) support.run_unittest(*test_classes) # XXX the C module should give the same warnings as the Python module with CleanContext(quiet=(module is not pyET)): - support.run_doctest(test_xml_etree, verbosity=True) + support.run_doctest(sys.modules[__name__], verbosity=True) if __name__ == '__main__': test_main() diff --git a/Lib/test/test_xml_etree_c.py b/Lib/test/test_xml_etree_c.py index 10416d2..142a22f 100644 --- a/Lib/test/test_xml_etree_c.py +++ b/Lib/test/test_xml_etree_c.py @@ -8,31 +8,6 @@ cET = import_fresh_module('xml.etree.ElementTree', fresh=['_elementtree']) cET_alias = import_fresh_module('xml.etree.cElementTree', fresh=['_elementtree', 'xml.etree']) -# cElementTree specific tests - -def sanity(): - r""" - Import sanity. - - Issue #6697. - - >>> cElementTree = cET - >>> e = cElementTree.Element('a') - >>> getattr(e, '\uD800') # doctest: +ELLIPSIS - Traceback (most recent call last): - ... - UnicodeEncodeError: ... - - >>> p = cElementTree.XMLParser() - >>> p.version.split()[0] - 'Expat' - >>> getattr(p, '\uD800') - Traceback (most recent call last): - ... - AttributeError: 'XMLParser' object has no attribute '\ud800' - """ - - class MiscTests(unittest.TestCase): # Issue #8651. @support.bigmemtest(size=support._2G + 100, memuse=1) @@ -46,6 +21,7 @@ class MiscTests(unittest.TestCase): finally: data = None + @unittest.skipUnless(cET, 'requires _elementtree') class TestAliasWorking(unittest.TestCase): # Test that the cET alias module is alive @@ -53,6 +29,7 @@ class TestAliasWorking(unittest.TestCase): e = cET_alias.Element('foo') self.assertEqual(e.tag, 'foo') + @unittest.skipUnless(cET, 'requires _elementtree') class TestAcceleratorImported(unittest.TestCase): # Test that the C accelerator was imported, as expected @@ -67,7 +44,6 @@ def test_main(): from test import test_xml_etree, test_xml_etree_c # Run the tests specific to the C implementation - support.run_doctest(test_xml_etree_c, verbosity=True) support.run_unittest( MiscTests, TestAliasWorking, diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index e068fc2..4776625 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -916,11 +916,7 @@ def _namespaces(elem, default_namespace=None): _raise_serialization_error(qname) # populate qname and namespaces table - try: - iterate = elem.iter - except AttributeError: - iterate = elem.getiterator # cET compatibility - for elem in iterate(): + for elem in elem.iter(): tag = elem.tag if isinstance(tag, QName): if tag.text not in qnames: |