diff options
author | Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com> | 2024-01-23 20:49:13 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-23 20:49:13 (GMT) |
commit | 9344edeb75e299baeee51dbc26172977475827ef (patch) | |
tree | 6b30f2881d3cf88ba36d82bb6c90c86c43ba6af1 | |
parent | e85f4c6fa96d9b6f7d4a1d0a9ac07c12ba7f33db (diff) | |
download | cpython-9344edeb75e299baeee51dbc26172977475827ef.zip cpython-9344edeb75e299baeee51dbc26172977475827ef.tar.gz cpython-9344edeb75e299baeee51dbc26172977475827ef.tar.bz2 |
[3.11] gh-101438: Avoid reference cycle in ElementTree.iterparse. (GH-114269) (GH-114500)
The iterator returned by ElementTree.iterparse() may hold on to a file
descriptor. The reference cycle prevented prompt clean-up of the file
descriptor if the returned iterator was not exhausted.
(cherry picked from commit ce01ab536f22a3cf095d621f3b3579c1e3567859)
Co-authored-by: Sam Gross <colesbury@gmail.com>
-rw-r--r-- | Lib/xml/etree/ElementTree.py | 27 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Library/2024-01-18-22-29-28.gh-issue-101438.1-uUi_.rst | 4 |
2 files changed, 21 insertions, 10 deletions
diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index 1dc8035..d4b259e 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -99,6 +99,7 @@ import io import collections import collections.abc import contextlib +import weakref from . import ElementPath @@ -1238,13 +1239,14 @@ def iterparse(source, events=None, parser=None): # parser argument of iterparse is removed, this can be killed. pullparser = XMLPullParser(events=events, _parser=parser) - def iterator(source): + if not hasattr(source, "read"): + source = open(source, "rb") + close_source = True + else: close_source = False + + def iterator(source): try: - if not hasattr(source, "read"): - source = open(source, "rb") - close_source = True - yield None while True: yield from pullparser.read_events() # load event buffer @@ -1254,18 +1256,23 @@ def iterparse(source, events=None, parser=None): pullparser.feed(data) root = pullparser._close_and_return_root() yield from pullparser.read_events() - it.root = root + it = wr() + if it is not None: + it.root = root finally: if close_source: source.close() class IterParseIterator(collections.abc.Iterator): __next__ = iterator(source).__next__ - it = IterParseIterator() - it.root = None - del iterator, IterParseIterator - next(it) + def __del__(self): + if close_source: + source.close() + + it = IterParseIterator() + wr = weakref.ref(it) + del IterParseIterator return it diff --git a/Misc/NEWS.d/next/Library/2024-01-18-22-29-28.gh-issue-101438.1-uUi_.rst b/Misc/NEWS.d/next/Library/2024-01-18-22-29-28.gh-issue-101438.1-uUi_.rst new file mode 100644 index 0000000..9b69b5d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-18-22-29-28.gh-issue-101438.1-uUi_.rst @@ -0,0 +1,4 @@ +Avoid reference cycle in ElementTree.iterparse. The iterator returned by +``ElementTree.iterparse`` may hold on to a file descriptor. The reference +cycle prevented prompt clean-up of the file descriptor if the returned +iterator was not exhausted. |