summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>2024-01-23 20:49:13 (GMT)
committerGitHub <noreply@github.com>2024-01-23 20:49:13 (GMT)
commit9344edeb75e299baeee51dbc26172977475827ef (patch)
tree6b30f2881d3cf88ba36d82bb6c90c86c43ba6af1
parente85f4c6fa96d9b6f7d4a1d0a9ac07c12ba7f33db (diff)
downloadcpython-9344edeb75e299baeee51dbc26172977475827ef.zip
cpython-9344edeb75e299baeee51dbc26172977475827ef.tar.gz
cpython-9344edeb75e299baeee51dbc26172977475827ef.tar.bz2
[3.11] gh-101438: Avoid reference cycle in ElementTree.iterparse. (GH-114269) (GH-114500)
The iterator returned by ElementTree.iterparse() may hold on to a file descriptor. The reference cycle prevented prompt clean-up of the file descriptor if the returned iterator was not exhausted. (cherry picked from commit ce01ab536f22a3cf095d621f3b3579c1e3567859) Co-authored-by: Sam Gross <colesbury@gmail.com>
-rw-r--r--Lib/xml/etree/ElementTree.py27
-rw-r--r--Misc/NEWS.d/next/Library/2024-01-18-22-29-28.gh-issue-101438.1-uUi_.rst4
2 files changed, 21 insertions, 10 deletions
diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py
index 1dc8035..d4b259e 100644
--- a/Lib/xml/etree/ElementTree.py
+++ b/Lib/xml/etree/ElementTree.py
@@ -99,6 +99,7 @@ import io
import collections
import collections.abc
import contextlib
+import weakref
from . import ElementPath
@@ -1238,13 +1239,14 @@ def iterparse(source, events=None, parser=None):
# parser argument of iterparse is removed, this can be killed.
pullparser = XMLPullParser(events=events, _parser=parser)
- def iterator(source):
+ if not hasattr(source, "read"):
+ source = open(source, "rb")
+ close_source = True
+ else:
close_source = False
+
+ def iterator(source):
try:
- if not hasattr(source, "read"):
- source = open(source, "rb")
- close_source = True
- yield None
while True:
yield from pullparser.read_events()
# load event buffer
@@ -1254,18 +1256,23 @@ def iterparse(source, events=None, parser=None):
pullparser.feed(data)
root = pullparser._close_and_return_root()
yield from pullparser.read_events()
- it.root = root
+ it = wr()
+ if it is not None:
+ it.root = root
finally:
if close_source:
source.close()
class IterParseIterator(collections.abc.Iterator):
__next__ = iterator(source).__next__
- it = IterParseIterator()
- it.root = None
- del iterator, IterParseIterator
- next(it)
+ def __del__(self):
+ if close_source:
+ source.close()
+
+ it = IterParseIterator()
+ wr = weakref.ref(it)
+ del IterParseIterator
return it
diff --git a/Misc/NEWS.d/next/Library/2024-01-18-22-29-28.gh-issue-101438.1-uUi_.rst b/Misc/NEWS.d/next/Library/2024-01-18-22-29-28.gh-issue-101438.1-uUi_.rst
new file mode 100644
index 0000000..9b69b5d
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-01-18-22-29-28.gh-issue-101438.1-uUi_.rst
@@ -0,0 +1,4 @@
+Avoid reference cycle in ElementTree.iterparse. The iterator returned by
+``ElementTree.iterparse`` may hold on to a file descriptor. The reference
+cycle prevented prompt clean-up of the file descriptor if the returned
+iterator was not exhausted.