summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorEli Bendersky <eliben@gmail.com>2012-06-15 04:42:50 (GMT)
committerEli Bendersky <eliben@gmail.com>2012-06-15 04:42:50 (GMT)
commit64d11e60f23f6b1435704adb87ebf818e5a4c0c1 (patch)
treeece3c4337e34bdb0408016b1eb38428343b75873 /Lib
parentfedb04a37aff9f7a2cfe746f7fc4683e74e38bf0 (diff)
downloadcpython-64d11e60f23f6b1435704adb87ebf818e5a4c0c1.zip
cpython-64d11e60f23f6b1435704adb87ebf818e5a4c0c1.tar.gz
cpython-64d11e60f23f6b1435704adb87ebf818e5a4c0c1.tar.bz2
Replace the iter/itertext methods of Element in _elementtree with true C implementations, instead of the bootstrapped Python code. In addition to being cleaner (removing the last remains of the bootstrapping code in _elementtree), this gives a 10x performance boost for iter() on large documents.
Also reorganized the tests a bit to be more robust.
Diffstat (limited to 'Lib')
-rw-r--r--Lib/test/test_xml_etree.py247
-rw-r--r--Lib/test/test_xml_etree_c.py28
-rw-r--r--Lib/xml/etree/ElementTree.py6
3 files changed, 142 insertions, 139 deletions
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index 49a5633..bee6329 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -23,7 +23,8 @@ import weakref
from test import support
from test.support import findfile, import_fresh_module, gc_collect
-pyET = import_fresh_module('xml.etree.ElementTree', blocked=['_elementtree'])
+pyET = None
+ET = None
SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata")
try:
@@ -209,10 +210,8 @@ def interface():
These methods return an iterable. See bug 6472.
- >>> check_method(element.iter("tag").__next__)
>>> check_method(element.iterfind("tag").__next__)
>>> check_method(element.iterfind("*").__next__)
- >>> check_method(tree.iter("tag").__next__)
>>> check_method(tree.iterfind("tag").__next__)
>>> check_method(tree.iterfind("*").__next__)
@@ -291,42 +290,6 @@ def cdata():
'<tag>hello</tag>'
"""
-# Only with Python implementation
-def simplefind():
- """
- Test find methods using the elementpath fallback.
-
- >>> ElementTree = pyET
-
- >>> CurrentElementPath = ElementTree.ElementPath
- >>> ElementTree.ElementPath = ElementTree._SimpleElementPath()
- >>> elem = ElementTree.XML(SAMPLE_XML)
- >>> elem.find("tag").tag
- 'tag'
- >>> ElementTree.ElementTree(elem).find("tag").tag
- 'tag'
- >>> elem.findtext("tag")
- 'text'
- >>> elem.findtext("tog")
- >>> elem.findtext("tog", "default")
- 'default'
- >>> ElementTree.ElementTree(elem).findtext("tag")
- 'text'
- >>> summarize_list(elem.findall("tag"))
- ['tag', 'tag']
- >>> summarize_list(elem.findall(".//tag"))
- ['tag', 'tag', 'tag']
-
- Path syntax doesn't work in this case.
-
- >>> elem.find("section/tag")
- >>> elem.findtext("section/tag")
- >>> summarize_list(elem.findall("section/tag"))
- []
-
- >>> ElementTree.ElementPath = CurrentElementPath
- """
-
def find():
"""
Test find methods (including xpath syntax).
@@ -1002,36 +965,6 @@ def methods():
'1 < 2\n'
"""
-def iterators():
- """
- Test iterators.
-
- >>> e = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>")
- >>> summarize_list(e.iter())
- ['html', 'body', 'i']
- >>> summarize_list(e.find("body").iter())
- ['body', 'i']
- >>> summarize(next(e.iter()))
- 'html'
- >>> "".join(e.itertext())
- 'this is a paragraph...'
- >>> "".join(e.find("body").itertext())
- 'this is a paragraph.'
- >>> next(e.itertext())
- 'this is a '
-
- Method iterparse should return an iterator. See bug 6472.
-
- >>> sourcefile = serialize(e, to_string=False)
- >>> next(ET.iterparse(sourcefile)) # doctest: +ELLIPSIS
- ('end', <Element 'i' at 0x...>)
-
- >>> tree = ET.ElementTree(None)
- >>> tree.iter()
- Traceback (most recent call last):
- AttributeError: 'NoneType' object has no attribute 'iter'
- """
-
ENTITY_XML = """\
<!DOCTYPE points [
<!ENTITY % user-entities SYSTEM 'user-entities.xml'>
@@ -1339,6 +1272,7 @@ XINCLUDE["default.xml"] = """\
</document>
""".format(html.escape(SIMPLE_XMLFILE, True))
+
def xinclude_loader(href, parse="xml", encoding=None):
try:
data = XINCLUDE[href]
@@ -1411,22 +1345,6 @@ def xinclude():
>>> # print(serialize(document)) # C5
"""
-def xinclude_default():
- """
- >>> from xml.etree import ElementInclude
-
- >>> document = xinclude_loader("default.xml")
- >>> ElementInclude.include(document)
- >>> print(serialize(document)) # default
- <document>
- <p>Example.</p>
- <root>
- <element key="value">text</element>
- <element>text</element>tail
- <empty-element />
- </root>
- </document>
- """
#
# badly formatted xi:include tags
@@ -1917,9 +1835,8 @@ class ElementTreeTest(unittest.TestCase):
self.assertIsInstance(ET.QName, type)
self.assertIsInstance(ET.ElementTree, type)
self.assertIsInstance(ET.Element, type)
- # XXX issue 14128 with C ElementTree
- # self.assertIsInstance(ET.TreeBuilder, type)
- # self.assertIsInstance(ET.XMLParser, type)
+ self.assertIsInstance(ET.TreeBuilder, type)
+ self.assertIsInstance(ET.XMLParser, type)
def test_Element_subclass_trivial(self):
class MyElement(ET.Element):
@@ -1953,6 +1870,73 @@ class ElementTreeTest(unittest.TestCase):
self.assertEqual(mye.newmethod(), 'joe')
+class ElementIterTest(unittest.TestCase):
+ def _ilist(self, elem, tag=None):
+ return summarize_list(elem.iter(tag))
+
+ def test_basic(self):
+ doc = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>")
+ self.assertEqual(self._ilist(doc), ['html', 'body', 'i'])
+ self.assertEqual(self._ilist(doc.find('body')), ['body', 'i'])
+ self.assertEqual(next(doc.iter()).tag, 'html')
+ self.assertEqual(''.join(doc.itertext()), 'this is a paragraph...')
+ self.assertEqual(''.join(doc.find('body').itertext()),
+ 'this is a paragraph.')
+ self.assertEqual(next(doc.itertext()), 'this is a ')
+
+ # iterparse should return an iterator
+ sourcefile = serialize(doc, to_string=False)
+ self.assertEqual(next(ET.iterparse(sourcefile))[0], 'end')
+
+ tree = ET.ElementTree(None)
+ self.assertRaises(AttributeError, tree.iter)
+
+ def test_corners(self):
+ # single root, no subelements
+ a = ET.Element('a')
+ self.assertEqual(self._ilist(a), ['a'])
+
+ # one child
+ b = ET.SubElement(a, 'b')
+ self.assertEqual(self._ilist(a), ['a', 'b'])
+
+ # one child and one grandchild
+ c = ET.SubElement(b, 'c')
+ self.assertEqual(self._ilist(a), ['a', 'b', 'c'])
+
+ # two children, only first with grandchild
+ d = ET.SubElement(a, 'd')
+ self.assertEqual(self._ilist(a), ['a', 'b', 'c', 'd'])
+
+ # replace first child by second
+ a[0] = a[1]
+ del a[1]
+ self.assertEqual(self._ilist(a), ['a', 'd'])
+
+ def test_iter_by_tag(self):
+ doc = ET.XML('''
+ <document>
+ <house>
+ <room>bedroom1</room>
+ <room>bedroom2</room>
+ </house>
+ <shed>nothing here
+ </shed>
+ <house>
+ <room>bedroom8</room>
+ </house>
+ </document>''')
+
+ self.assertEqual(self._ilist(doc, 'room'), ['room'] * 3)
+ self.assertEqual(self._ilist(doc, 'house'), ['house'] * 2)
+
+ # make sure both tag=None and tag='*' return all tags
+ all_tags = ['document', 'house', 'room', 'room',
+ 'shed', 'house', 'room']
+ self.assertEqual(self._ilist(doc), all_tags)
+ self.assertEqual(self._ilist(doc, '*'), all_tags)
+
+
class TreeBuilderTest(unittest.TestCase):
sample1 = ('<!DOCTYPE html PUBLIC'
' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
@@ -2026,7 +2010,20 @@ class TreeBuilderTest(unittest.TestCase):
('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
-
+class XincludeTest(unittest.TestCase):
+ def test_xinclude_default(self):
+ from xml.etree import ElementInclude
+ doc = xinclude_loader('default.xml')
+ ElementInclude.include(doc)
+ s = serialize(doc)
+ self.assertEqual(s.strip(), '''<document>
+ <p>Example.</p>
+ <root>
+ <element key="value">text</element>
+ <element>text</element>tail
+ <empty-element />
+</root>
+</document>''')
class XMLParserTest(unittest.TestCase):
sample1 = '<file><line>22</line></file>'
sample2 = ('<!DOCTYPE html PUBLIC'
@@ -2073,13 +2070,6 @@ class XMLParserTest(unittest.TestCase):
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
-class NoAcceleratorTest(unittest.TestCase):
- # Test that the C accelerator was not imported for pyET
- def test_correct_import_pyET(self):
- self.assertEqual(pyET.Element.__module__, 'xml.etree.ElementTree')
- self.assertEqual(pyET.SubElement.__module__, 'xml.etree.ElementTree')
-
-
class NamespaceParseTest(unittest.TestCase):
def test_find_with_namespace(self):
nsmap = {'h': 'hello', 'f': 'foo'}
@@ -2090,7 +2080,6 @@ class NamespaceParseTest(unittest.TestCase):
self.assertEqual(len(doc.findall('.//{foo}name', nsmap)), 1)
-
class ElementSlicingTest(unittest.TestCase):
def _elem_tags(self, elemlist):
return [e.tag for e in elemlist]
@@ -2232,6 +2221,36 @@ class KeywordArgsTest(unittest.TestCase):
with self.assertRaisesRegex(TypeError, 'must be dict, not str'):
ET.Element('a', attrib="I'm not a dict")
+# --------------------------------------------------------------------
+
+@unittest.skipUnless(pyET, 'only for the Python version')
+class NoAcceleratorTest(unittest.TestCase):
+ # Test that the C accelerator was not imported for pyET
+ def test_correct_import_pyET(self):
+ self.assertEqual(pyET.Element.__module__, 'xml.etree.ElementTree')
+ self.assertEqual(pyET.SubElement.__module__, 'xml.etree.ElementTree')
+
+
+class ElementPathFallbackTest(unittest.TestCase):
+ def test_fallback(self):
+ current_ElementPath = ET.ElementPath
+ ET.ElementPath = ET._SimpleElementPath()
+ elem = ET.XML(SAMPLE_XML)
+ self.assertEqual(elem.find('tag').tag, 'tag')
+ self.assertEqual(ET.ElementTree(elem).find('tag').tag, 'tag')
+ self.assertEqual(elem.findtext('tag'), 'text')
+ self.assertIsNone(elem.findtext('tog'))
+ self.assertEqual(elem.findtext('tog', 'default'), 'default')
+ self.assertEqual(ET.ElementTree(elem).findtext('tag'), 'text')
+ self.assertEqual(summarize_list(elem.findall('tag')), ['tag', 'tag'])
+ self.assertEqual(summarize_list(elem.findall('.//tag')),
+ ['tag', 'tag', 'tag'])
+
+ self.assertIsNone(elem.find('section/tag'))
+ self.assertIsNone(elem.findtext('section/tag'))
+ self.assertEqual(summarize_list(elem.findall('section/tag')), [])
+
+ ET.ElementPath = current_ElementPath
# --------------------------------------------------------------------
@@ -2276,31 +2295,43 @@ class CleanContext(object):
self.checkwarnings.__exit__(*args)
-def test_main(module=pyET):
- from test import test_xml_etree
+def test_main(module=None):
+ # When invoked without a module, runs the Python ET tests by loading pyET.
+ # Otherwise, uses the given module as the ET.
+ if module is None:
+ global pyET
+ pyET = import_fresh_module('xml.etree.ElementTree',
+ blocked=['_elementtree'])
+ module = pyET
- # The same doctests are used for both the Python and the C implementations
- test_xml_etree.ET = module
+ global ET
+ ET = module
test_classes = [
ElementSlicingTest,
BasicElementTest,
StringIOTest,
ParseErrorTest,
+ XincludeTest,
ElementTreeTest,
- NamespaceParseTest,
+ ElementIterTest,
TreeBuilderTest,
- XMLParserTest,
- KeywordArgsTest]
- if module is pyET:
- # Run the tests specific to the Python implementation
- test_classes += [NoAcceleratorTest]
+ ]
+
+ # These tests will only run for the pure-Python version that doesn't import
+ # _elementtree. We can't use skipUnless here, because pyET is filled in only
+ # after the module is loaded.
+ if pyET:
+ test_classes.extend([
+ NoAcceleratorTest,
+ ElementPathFallbackTest,
+ ])
support.run_unittest(*test_classes)
# XXX the C module should give the same warnings as the Python module
with CleanContext(quiet=(module is not pyET)):
- support.run_doctest(test_xml_etree, verbosity=True)
+ support.run_doctest(sys.modules[__name__], verbosity=True)
if __name__ == '__main__':
test_main()
diff --git a/Lib/test/test_xml_etree_c.py b/Lib/test/test_xml_etree_c.py
index 10416d2..142a22f 100644
--- a/Lib/test/test_xml_etree_c.py
+++ b/Lib/test/test_xml_etree_c.py
@@ -8,31 +8,6 @@ cET = import_fresh_module('xml.etree.ElementTree', fresh=['_elementtree'])
cET_alias = import_fresh_module('xml.etree.cElementTree', fresh=['_elementtree', 'xml.etree'])
-# cElementTree specific tests
-
-def sanity():
- r"""
- Import sanity.
-
- Issue #6697.
-
- >>> cElementTree = cET
- >>> e = cElementTree.Element('a')
- >>> getattr(e, '\uD800') # doctest: +ELLIPSIS
- Traceback (most recent call last):
- ...
- UnicodeEncodeError: ...
-
- >>> p = cElementTree.XMLParser()
- >>> p.version.split()[0]
- 'Expat'
- >>> getattr(p, '\uD800')
- Traceback (most recent call last):
- ...
- AttributeError: 'XMLParser' object has no attribute '\ud800'
- """
-
-
class MiscTests(unittest.TestCase):
# Issue #8651.
@support.bigmemtest(size=support._2G + 100, memuse=1)
@@ -46,6 +21,7 @@ class MiscTests(unittest.TestCase):
finally:
data = None
+
@unittest.skipUnless(cET, 'requires _elementtree')
class TestAliasWorking(unittest.TestCase):
# Test that the cET alias module is alive
@@ -53,6 +29,7 @@ class TestAliasWorking(unittest.TestCase):
e = cET_alias.Element('foo')
self.assertEqual(e.tag, 'foo')
+
@unittest.skipUnless(cET, 'requires _elementtree')
class TestAcceleratorImported(unittest.TestCase):
# Test that the C accelerator was imported, as expected
@@ -67,7 +44,6 @@ def test_main():
from test import test_xml_etree, test_xml_etree_c
# Run the tests specific to the C implementation
- support.run_doctest(test_xml_etree_c, verbosity=True)
support.run_unittest(
MiscTests,
TestAliasWorking,
diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py
index e068fc2..4776625 100644
--- a/Lib/xml/etree/ElementTree.py
+++ b/Lib/xml/etree/ElementTree.py
@@ -916,11 +916,7 @@ def _namespaces(elem, default_namespace=None):
_raise_serialization_error(qname)
# populate qname and namespaces table
- try:
- iterate = elem.iter
- except AttributeError:
- iterate = elem.getiterator # cET compatibility
- for elem in iterate():
+ for elem in elem.iter():
tag = elem.tag
if isinstance(tag, QName):
if tag.text not in qnames: