diff options
author | Florent Xicluna <florent.xicluna@gmail.com> | 2011-11-01 22:31:09 (GMT) |
---|---|---|
committer | Florent Xicluna <florent.xicluna@gmail.com> | 2011-11-01 22:31:09 (GMT) |
commit | 91d5193b3ad208f359107512ff12a416c9dbec3b (patch) | |
tree | 3868c3e9f202850439d6ec606b00eed3db499530 | |
parent | c1e73c30e98c9b59460d0f963a391a08156286e5 (diff) | |
download | cpython-91d5193b3ad208f359107512ff12a416c9dbec3b.zip cpython-91d5193b3ad208f359107512ff12a416c9dbec3b.tar.gz cpython-91d5193b3ad208f359107512ff12a416c9dbec3b.tar.bz2 |
Closes #2892: preserve iterparse events in case of SyntaxError.
-rw-r--r-- | Lib/test/test_xml_etree.py | 1 | ||||
-rw-r--r-- | Lib/xml/etree/ElementTree.py | 38 | ||||
-rw-r--r-- | Misc/NEWS | 2 | ||||
-rw-r--r-- | Modules/_elementtree.c | 38 |
4 files changed, 49 insertions, 30 deletions
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 22fafa9..b7a996c 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -754,6 +754,7 @@ def iterparse(): ... print(action, elem.tag) ... except ET.ParseError as v: ... print(v) + end document junk after document element: line 1, column 12 """ diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index ba33879..f94c48c 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -1250,6 +1250,7 @@ class _IterParseIterator: self._close_file = close_source self._events = [] self._index = 0 + self._error = None self.root = self._root = None self._parser = parser # wire up the parser for event reporting @@ -1291,24 +1292,31 @@ class _IterParseIterator: while 1: try: item = self._events[self._index] + self._index += 1 + return item except IndexError: - if self._parser is None: - self.root = self._root - if self._close_file: - self._file.close() - raise StopIteration - # load event buffer - del self._events[:] - self._index = 0 - data = self._file.read(16384) - if data: + pass + if self._error: + e = self._error + self._error = None + raise e + if self._parser is None: + self.root = self._root + if self._close_file: + self._file.close() + raise StopIteration + # load event buffer + del self._events[:] + self._index = 0 + data = self._file.read(16384) + if data: + try: self._parser.feed(data) - else: - self._root = self._parser.close() - self._parser = None + except SyntaxError as exc: + self._error = exc else: - self._index = self._index + 1 - return item + self._root = self._parser.close() + self._parser = None def __iter__(self): return self @@ -66,6 +66,8 @@ Core and Builtins Library ------- +- Issue #2892: preserve iterparse events in case of SyntaxError. + - Issue #670664: Fix HTMLParser to correctly handle the content of ``<script>...</script>`` and ``<style>...</style>``. diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 3061d8e..7887721 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -3000,6 +3000,7 @@ PyInit__elementtree(void) " self._file = file\n" " self._events = []\n" " self._index = 0\n" + " self._error = None\n" " self.root = self._root = None\n" " b = cElementTree.TreeBuilder()\n" " self._parser = cElementTree.XMLParser(b)\n" @@ -3008,24 +3009,31 @@ PyInit__elementtree(void) " while 1:\n" " try:\n" " item = self._events[self._index]\n" + " self._index += 1\n" + " return item\n" " except IndexError:\n" - " if self._parser is None:\n" - " self.root = self._root\n" - " if self._close_file:\n" - " self._file.close()\n" - " raise StopIteration\n" - " # load event buffer\n" - " del self._events[:]\n" - " self._index = 0\n" - " data = self._file.read(16384)\n" - " if data:\n" + " pass\n" + " if self._error:\n" + " e = self._error\n" + " self._error = None\n" + " raise e\n" + " if self._parser is None:\n" + " self.root = self._root\n" + " if self._close_file:\n" + " self._file.close()\n" + " raise StopIteration\n" + " # load event buffer\n" + " del self._events[:]\n" + " self._index = 0\n" + " data = self._file.read(16384)\n" + " if data:\n" + " try:\n" " self._parser.feed(data)\n" - " else:\n" - " self._root = self._parser.close()\n" - " self._parser = None\n" + " except SyntaxError as exc:\n" + " self._error = exc\n" " else:\n" - " self._index = self._index + 1\n" - " return item\n" + " self._root = self._parser.close()\n" + " self._parser = None\n" " def __iter__(self):\n" " return self\n" "cElementTree.iterparse = iterparse\n" |