diff options
author | Barney Gale <barney.gale@gmail.com> | 2024-05-30 03:05:36 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-05-30 03:05:36 (GMT) |
commit | 3c890b503c740767d0eb9a0e74b47f17a1e69452 (patch) | |
tree | a29c69300686ec574c8c6de350fd38699b9b1c01 | |
parent | 2cc3502f98bb9aea386ab55443fc077ddcdde91d (diff) | |
download | cpython-3c890b503c740767d0eb9a0e74b47f17a1e69452.zip cpython-3c890b503c740767d0eb9a0e74b47f17a1e69452.tar.gz cpython-3c890b503c740767d0eb9a0e74b47f17a1e69452.tar.bz2 |
GH-89727: Fix `os.fwalk()` recursion error on deep trees (#119638)
Implement `os.fwalk()` using a list as a stack to avoid emitting recursion
errors on deeply nested trees.
-rw-r--r-- | Lib/os.py | 91 | ||||
-rw-r--r-- | Lib/test/test_os.py | 2 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Library/2024-05-28-00-56-59.gh-issue-89727._bxoL3.rst | 3 |
3 files changed, 56 insertions, 40 deletions
@@ -478,24 +478,52 @@ if {open, stat} <= supports_dir_fd and {scandir, stat} <= supports_fd: """ sys.audit("os.fwalk", top, topdown, onerror, follow_symlinks, dir_fd) top = fspath(top) - # Note: To guard against symlink races, we use the standard - # lstat()/open()/fstat() trick. - if not follow_symlinks: - orig_st = stat(top, follow_symlinks=False, dir_fd=dir_fd) - topfd = open(top, O_RDONLY | O_NONBLOCK, dir_fd=dir_fd) - try: - if (follow_symlinks or (st.S_ISDIR(orig_st.st_mode) and - path.samestat(orig_st, stat(topfd)))): - yield from _fwalk(topfd, top, isinstance(top, bytes), - topdown, onerror, follow_symlinks) - finally: - close(topfd) - - def _fwalk(topfd, toppath, isbytes, topdown, onerror, follow_symlinks): + stack = [(_fwalk_walk, (True, dir_fd, top, top, None))] + isbytes = isinstance(top, bytes) + while stack: + yield from _fwalk(stack, isbytes, topdown, onerror, follow_symlinks) + + # Each item in the _fwalk() stack is a pair (action, args). + _fwalk_walk = 0 # args: (isroot, dirfd, toppath, topname, entry) + _fwalk_yield = 1 # args: (toppath, dirnames, filenames, topfd) + _fwalk_close = 2 # args: dirfd + + def _fwalk(stack, isbytes, topdown, onerror, follow_symlinks): # Note: This uses O(depth of the directory tree) file descriptors: if # necessary, it can be adapted to only require O(1) FDs, see issue # #13734. + action, value = stack.pop() + if action == _fwalk_close: + close(value) + return + elif action == _fwalk_yield: + yield value + return + assert action == _fwalk_walk + isroot, dirfd, toppath, topname, entry = value + try: + if not follow_symlinks: + # Note: To guard against symlink races, we use the standard + # lstat()/open()/fstat() trick. + if entry is None: + orig_st = stat(topname, follow_symlinks=False, dir_fd=dirfd) + else: + orig_st = entry.stat(follow_symlinks=False) + topfd = open(topname, O_RDONLY | O_NONBLOCK, dir_fd=dirfd) + except OSError as err: + if isroot: + raise + if onerror is not None: + onerror(err) + return + stack.append((_fwalk_close, topfd)) + if not follow_symlinks: + if isroot and not st.S_ISDIR(orig_st.st_mode): + return + if not path.samestat(orig_st, stat(topfd)): + return + scandir_it = scandir(topfd) dirs = [] nondirs = [] @@ -521,31 +549,18 @@ if {open, stat} <= supports_dir_fd and {scandir, stat} <= supports_fd: if topdown: yield toppath, dirs, nondirs, topfd + else: + stack.append((_fwalk_yield, (toppath, dirs, nondirs, topfd))) - for name in dirs if entries is None else zip(dirs, entries): - try: - if not follow_symlinks: - if topdown: - orig_st = stat(name, dir_fd=topfd, follow_symlinks=False) - else: - assert entries is not None - name, entry = name - orig_st = entry.stat(follow_symlinks=False) - dirfd = open(name, O_RDONLY | O_NONBLOCK, dir_fd=topfd) - except OSError as err: - if onerror is not None: - onerror(err) - continue - try: - if follow_symlinks or path.samestat(orig_st, stat(dirfd)): - dirpath = path.join(toppath, name) - yield from _fwalk(dirfd, dirpath, isbytes, - topdown, onerror, follow_symlinks) - finally: - close(dirfd) - - if not topdown: - yield toppath, dirs, nondirs, topfd + toppath = path.join(toppath, toppath[:0]) # Add trailing slash. + if entries is None: + stack.extend( + (_fwalk_walk, (False, topfd, toppath + name, name, None)) + for name in dirs[::-1]) + else: + stack.extend( + (_fwalk_walk, (False, topfd, toppath + name, name, entry)) + for name, entry in zip(dirs[::-1], entries[::-1])) __all__.append("fwalk") diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py index 941fa2b..7dc5784 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -1687,8 +1687,6 @@ class FwalkTests(WalkTests): # fwalk() keeps file descriptors open test_walk_many_open_files = None - # fwalk() still uses recursion - test_walk_above_recursion_limit = None class BytesWalkTests(WalkTests): diff --git a/Misc/NEWS.d/next/Library/2024-05-28-00-56-59.gh-issue-89727._bxoL3.rst b/Misc/NEWS.d/next/Library/2024-05-28-00-56-59.gh-issue-89727._bxoL3.rst new file mode 100644 index 0000000..92222bc --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-05-28-00-56-59.gh-issue-89727._bxoL3.rst @@ -0,0 +1,3 @@ +Fix issue with :func:`os.fwalk` where a :exc:`RecursionError` was raised on +deep directory trees by adjusting the implementation to be iterative instead +of recursive. |