From 4cc2afa0ec54910d60cdc4ca57d886f66c88dc18 Mon Sep 17 00:00:00 2001 From: Nick Coghlan Date: Sat, 28 Sep 2013 23:50:35 +1000 Subject: Close #18990: remove root attribute from XMLPullParser - this was an internal implementation detail for iterparse - this has been changed to use a new private method instead - XMLPullParser.close docs are now more explicit about not returning a root element and instead direct users towards read_events - also added missing docstrings and clarified some details related to exactly *when* events are consumed from the internal queue (Initial patch by Stefan Behnel) --- Doc/library/xml.etree.elementtree.rst | 13 ++++++++++--- Lib/test/test_xml_etree.py | 19 +++++-------------- Lib/xml/etree/ElementTree.py | 32 ++++++++++++++++++++++---------- 3 files changed, 37 insertions(+), 27 deletions(-) diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst index 97550ed..c15041f 100644 --- a/Doc/library/xml.etree.elementtree.rst +++ b/Doc/library/xml.etree.elementtree.rst @@ -1031,15 +1031,22 @@ XMLPullParser Objects .. method:: close() - Signal the parser that the data stream is terminated. + Signal the parser that the data stream is terminated. Unlike + :meth:`XMLParser.close`, this method always returns :const:`None`. + Any events not yet retrieved when the parser is closed can still be + read with :meth:`read_events`. .. method:: read_events() Iterate over the events which have been encountered in the data fed to the parser. This method yields ``(event, elem)`` pairs, where *event* is a string representing the type of event (e.g. ``"end"``) and *elem* is the - encountered :class:`Element` object. Events provided in a previous call - to :meth:`read_events` will not be yielded again. + encountered :class:`Element` object. + + Events provided in a previous call to :meth:`read_events` will not be + yielded again. As events are consumed from the internal queue only as + they are retrieved from the iterator, multiple readers calling + :meth:`read_events` in parallel will have unpredictable results. .. note:: diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index a888fe5..614e598 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -985,10 +985,7 @@ class XMLPullParserTest(unittest.TestCase): ]) self._feed(parser, "\n", chunk_size) self.assert_event_tags(parser, [('end', 'root')]) - # Closing sets the `root` attribute - self.assertIs(parser.root, None) - parser.close() - self.assertEqual(parser.root.tag, 'root') + self.assertIsNone(parser.close()) def test_feed_while_iterating(self): parser = ET.XMLPullParser() @@ -1021,10 +1018,7 @@ class XMLPullParserTest(unittest.TestCase): ]) self._feed(parser, "\n") self.assert_event_tags(parser, [('end', '{namespace}root')]) - # Closing sets the `root` attribute - self.assertIs(parser.root, None) - parser.close() - self.assertEqual(parser.root.tag, '{namespace}root') + self.assertIsNone(parser.close()) def test_ns_events(self): parser = ET.XMLPullParser(events=('start-ns', 'end-ns')) @@ -1039,7 +1033,7 @@ class XMLPullParserTest(unittest.TestCase): self._feed(parser, "\n") self._feed(parser, "\n") self.assertEqual(list(parser.read_events()), [('end-ns', None)]) - parser.close() + self.assertIsNone(parser.close()) def test_events(self): parser = ET.XMLPullParser(events=()) @@ -1064,10 +1058,8 @@ class XMLPullParserTest(unittest.TestCase): ('end', '{foo}element'), ]) self._feed(parser, "") - parser.close() - self.assertIs(parser.root, None) + self.assertIsNone(parser.close()) self.assert_event_tags(parser, [('end', 'root')]) - self.assertEqual(parser.root.tag, 'root') parser = ET.XMLPullParser(events=('start',)) self._feed(parser, "\n") @@ -1085,8 +1077,7 @@ class XMLPullParserTest(unittest.TestCase): ('start', '{foo}empty-element'), ]) self._feed(parser, "") - parser.close() - self.assertEqual(parser.root.tag, 'root') + self.assertIsNone(parser.close()) def test_events_sequence(self): # Test that events can be some sequence that's not just a tuple or list diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index 6526b3e..cab415c 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -1220,7 +1220,6 @@ class XMLPullParser: # _elementtree.c expects a list, not a deque self._events_queue = [] self._index = 0 - self.root = self._root = None self._parser = _parser or XMLParser(target=TreeBuilder()) # wire up the parser for event reporting if events is None: @@ -1228,6 +1227,7 @@ class XMLPullParser: self._parser._setevents(self._events_queue, events) def feed(self, data): + """Feed encoded data to parser.""" if self._parser is None: raise ValueError("feed() called after end of stream") if data: @@ -1236,13 +1236,26 @@ class XMLPullParser: except SyntaxError as exc: self._events_queue.append(exc) - def close(self): - self._root = self._parser.close() + def _close_and_return_root(self): + # iterparse needs this to set its root attribute properly :( + root = self._parser.close() self._parser = None - if self._index >= len(self._events_queue): - self.root = self._root + return root + + def close(self): + """Finish feeding data to parser. + + Unlike XMLParser, does not return the root element. Use + read_events() to consume elements from XMLPullParser. + """ + self._close_and_return_root() def read_events(self): + """Iterate over currently available (event, elem) pairs. + + Events are consumed from the internal event queue as they are + retrieved from the iterator. + """ events = self._events_queue while True: index = self._index @@ -1254,6 +1267,7 @@ class XMLPullParser: break index += 1 # Compact the list in a O(1) amortized fashion + # As noted above, _elementree.c needs a list, not a deque if index * 2 >= len(events): events[:index] = [] self._index = 0 @@ -1263,8 +1277,6 @@ class XMLPullParser: raise event else: yield event - if self._parser is None: - self.root = self._root class _IterParseIterator: @@ -1275,14 +1287,14 @@ class _IterParseIterator: self._parser = XMLPullParser(events=events, _parser=parser) self._file = source self._close_file = close_source - self.root = None + self.root = self._root = None def __next__(self): while 1: for event in self._parser.read_events(): return event if self._parser._parser is None: - self.root = self._parser.root + self.root = self._root if self._close_file: self._file.close() raise StopIteration @@ -1291,7 +1303,7 @@ class _IterParseIterator: if data: self._parser.feed(data) else: - self._parser.close() + self._root = self._parser._close_and_return_root() def __iter__(self): return self -- cgit v0.12