summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBarney Gale <barney.gale@gmail.com>2024-05-30 03:05:36 (GMT)
committerGitHub <noreply@github.com>2024-05-30 03:05:36 (GMT)
commit3c890b503c740767d0eb9a0e74b47f17a1e69452 (patch)
treea29c69300686ec574c8c6de350fd38699b9b1c01
parent2cc3502f98bb9aea386ab55443fc077ddcdde91d (diff)
downloadcpython-3c890b503c740767d0eb9a0e74b47f17a1e69452.zip
cpython-3c890b503c740767d0eb9a0e74b47f17a1e69452.tar.gz
cpython-3c890b503c740767d0eb9a0e74b47f17a1e69452.tar.bz2
GH-89727: Fix `os.fwalk()` recursion error on deep trees (#119638)
Implement `os.fwalk()` using a list as a stack to avoid emitting recursion errors on deeply nested trees.
-rw-r--r--Lib/os.py91
-rw-r--r--Lib/test/test_os.py2
-rw-r--r--Misc/NEWS.d/next/Library/2024-05-28-00-56-59.gh-issue-89727._bxoL3.rst3
3 files changed, 56 insertions, 40 deletions
diff --git a/Lib/os.py b/Lib/os.py
index ae9e646..cef5d90 100644
--- a/Lib/os.py
+++ b/Lib/os.py
@@ -478,24 +478,52 @@ if {open, stat} <= supports_dir_fd and {scandir, stat} <= supports_fd:
"""
sys.audit("os.fwalk", top, topdown, onerror, follow_symlinks, dir_fd)
top = fspath(top)
- # Note: To guard against symlink races, we use the standard
- # lstat()/open()/fstat() trick.
- if not follow_symlinks:
- orig_st = stat(top, follow_symlinks=False, dir_fd=dir_fd)
- topfd = open(top, O_RDONLY | O_NONBLOCK, dir_fd=dir_fd)
- try:
- if (follow_symlinks or (st.S_ISDIR(orig_st.st_mode) and
- path.samestat(orig_st, stat(topfd)))):
- yield from _fwalk(topfd, top, isinstance(top, bytes),
- topdown, onerror, follow_symlinks)
- finally:
- close(topfd)
-
- def _fwalk(topfd, toppath, isbytes, topdown, onerror, follow_symlinks):
+ stack = [(_fwalk_walk, (True, dir_fd, top, top, None))]
+ isbytes = isinstance(top, bytes)
+ while stack:
+ yield from _fwalk(stack, isbytes, topdown, onerror, follow_symlinks)
+
+ # Each item in the _fwalk() stack is a pair (action, args).
+ _fwalk_walk = 0 # args: (isroot, dirfd, toppath, topname, entry)
+ _fwalk_yield = 1 # args: (toppath, dirnames, filenames, topfd)
+ _fwalk_close = 2 # args: dirfd
+
+ def _fwalk(stack, isbytes, topdown, onerror, follow_symlinks):
# Note: This uses O(depth of the directory tree) file descriptors: if
# necessary, it can be adapted to only require O(1) FDs, see issue
# #13734.
+ action, value = stack.pop()
+ if action == _fwalk_close:
+ close(value)
+ return
+ elif action == _fwalk_yield:
+ yield value
+ return
+ assert action == _fwalk_walk
+ isroot, dirfd, toppath, topname, entry = value
+ try:
+ if not follow_symlinks:
+ # Note: To guard against symlink races, we use the standard
+ # lstat()/open()/fstat() trick.
+ if entry is None:
+ orig_st = stat(topname, follow_symlinks=False, dir_fd=dirfd)
+ else:
+ orig_st = entry.stat(follow_symlinks=False)
+ topfd = open(topname, O_RDONLY | O_NONBLOCK, dir_fd=dirfd)
+ except OSError as err:
+ if isroot:
+ raise
+ if onerror is not None:
+ onerror(err)
+ return
+ stack.append((_fwalk_close, topfd))
+ if not follow_symlinks:
+ if isroot and not st.S_ISDIR(orig_st.st_mode):
+ return
+ if not path.samestat(orig_st, stat(topfd)):
+ return
+
scandir_it = scandir(topfd)
dirs = []
nondirs = []
@@ -521,31 +549,18 @@ if {open, stat} <= supports_dir_fd and {scandir, stat} <= supports_fd:
if topdown:
yield toppath, dirs, nondirs, topfd
+ else:
+ stack.append((_fwalk_yield, (toppath, dirs, nondirs, topfd)))
- for name in dirs if entries is None else zip(dirs, entries):
- try:
- if not follow_symlinks:
- if topdown:
- orig_st = stat(name, dir_fd=topfd, follow_symlinks=False)
- else:
- assert entries is not None
- name, entry = name
- orig_st = entry.stat(follow_symlinks=False)
- dirfd = open(name, O_RDONLY | O_NONBLOCK, dir_fd=topfd)
- except OSError as err:
- if onerror is not None:
- onerror(err)
- continue
- try:
- if follow_symlinks or path.samestat(orig_st, stat(dirfd)):
- dirpath = path.join(toppath, name)
- yield from _fwalk(dirfd, dirpath, isbytes,
- topdown, onerror, follow_symlinks)
- finally:
- close(dirfd)
-
- if not topdown:
- yield toppath, dirs, nondirs, topfd
+ toppath = path.join(toppath, toppath[:0]) # Add trailing slash.
+ if entries is None:
+ stack.extend(
+ (_fwalk_walk, (False, topfd, toppath + name, name, None))
+ for name in dirs[::-1])
+ else:
+ stack.extend(
+ (_fwalk_walk, (False, topfd, toppath + name, name, entry))
+ for name, entry in zip(dirs[::-1], entries[::-1]))
__all__.append("fwalk")
diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py
index 941fa2b..7dc5784 100644
--- a/Lib/test/test_os.py
+++ b/Lib/test/test_os.py
@@ -1687,8 +1687,6 @@ class FwalkTests(WalkTests):
# fwalk() keeps file descriptors open
test_walk_many_open_files = None
- # fwalk() still uses recursion
- test_walk_above_recursion_limit = None
class BytesWalkTests(WalkTests):
diff --git a/Misc/NEWS.d/next/Library/2024-05-28-00-56-59.gh-issue-89727._bxoL3.rst b/Misc/NEWS.d/next/Library/2024-05-28-00-56-59.gh-issue-89727._bxoL3.rst
new file mode 100644
index 0000000..92222bc
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-05-28-00-56-59.gh-issue-89727._bxoL3.rst
@@ -0,0 +1,3 @@
+Fix issue with :func:`os.fwalk` where a :exc:`RecursionError` was raised on
+deep directory trees by adjusting the implementation to be iterative instead
+of recursive.