summaryrefslogtreecommitdiffstats
path: root/Lib/os.py
diff options
context:
space:
mode:
authorJon Burdo <jon@jonburdo.com>2022-12-19 18:59:01 (GMT)
committerGitHub <noreply@github.com>2022-12-19 18:59:01 (GMT)
commit797edb28c3dd02a5727f0374e937e906a389ab77 (patch)
treeee1150ec0127c660ec8074fc9d7ba99241d41e51 /Lib/os.py
parent702a5bc4637c82dc011e98b84f0cede98eb08dda (diff)
downloadcpython-797edb28c3dd02a5727f0374e937e906a389ab77.zip
cpython-797edb28c3dd02a5727f0374e937e906a389ab77.tar.gz
cpython-797edb28c3dd02a5727f0374e937e906a389ab77.tar.bz2
gh-89727: Fix os.walk RecursionError on deep trees (#99803)
Use a stack to implement os.walk iteratively instead of recursively to avoid hitting recursion limits on deeply nested trees.
Diffstat (limited to 'Lib/os.py')
-rw-r--r--Lib/os.py160
1 files changed, 83 insertions, 77 deletions
diff --git a/Lib/os.py b/Lib/os.py
index fd1e774..73a5442 100644
--- a/Lib/os.py
+++ b/Lib/os.py
@@ -340,89 +340,95 @@ def walk(top, topdown=True, onerror=None, followlinks=False):
"""
sys.audit("os.walk", top, topdown, onerror, followlinks)
- return _walk(fspath(top), topdown, onerror, followlinks)
-
-def _walk(top, topdown, onerror, followlinks):
- dirs = []
- nondirs = []
- walk_dirs = []
-
- # We may not have read permission for top, in which case we can't
- # get a list of the files the directory contains. os.walk
- # always suppressed the exception then, rather than blow up for a
- # minor reason when (say) a thousand readable directories are still
- # left to visit. That logic is copied here.
- try:
- # Note that scandir is global in this module due
- # to earlier import-*.
- scandir_it = scandir(top)
- except OSError as error:
- if onerror is not None:
- onerror(error)
- return
- with scandir_it:
- while True:
- try:
+ stack = [(False, fspath(top))]
+ islink, join = path.islink, path.join
+ while stack:
+ must_yield, top = stack.pop()
+ if must_yield:
+ yield top
+ continue
+
+ dirs = []
+ nondirs = []
+ walk_dirs = []
+
+ # We may not have read permission for top, in which case we can't
+ # get a list of the files the directory contains.
+ # We suppress the exception here, rather than blow up for a
+ # minor reason when (say) a thousand readable directories are still
+ # left to visit.
+ try:
+ scandir_it = scandir(top)
+ except OSError as error:
+ if onerror is not None:
+ onerror(error)
+ continue
+
+ cont = False
+ with scandir_it:
+ while True:
try:
- entry = next(scandir_it)
- except StopIteration:
+ try:
+ entry = next(scandir_it)
+ except StopIteration:
+ break
+ except OSError as error:
+ if onerror is not None:
+ onerror(error)
+ cont = True
break
- except OSError as error:
- if onerror is not None:
- onerror(error)
- return
- try:
- is_dir = entry.is_dir()
- except OSError:
- # If is_dir() raises an OSError, consider that the entry is not
- # a directory, same behaviour than os.path.isdir().
- is_dir = False
-
- if is_dir:
- dirs.append(entry.name)
- else:
- nondirs.append(entry.name)
+ try:
+ is_dir = entry.is_dir()
+ except OSError:
+ # If is_dir() raises an OSError, consider the entry not to
+ # be a directory, same behaviour as os.path.isdir().
+ is_dir = False
- if not topdown and is_dir:
- # Bottom-up: recurse into sub-directory, but exclude symlinks to
- # directories if followlinks is False
- if followlinks:
- walk_into = True
+ if is_dir:
+ dirs.append(entry.name)
else:
- try:
- is_symlink = entry.is_symlink()
- except OSError:
- # If is_symlink() raises an OSError, consider that the
- # entry is not a symbolic link, same behaviour than
- # os.path.islink().
- is_symlink = False
- walk_into = not is_symlink
-
- if walk_into:
- walk_dirs.append(entry.path)
-
- # Yield before recursion if going top down
- if topdown:
- yield top, dirs, nondirs
-
- # Recurse into sub-directories
- islink, join = path.islink, path.join
- for dirname in dirs:
- new_path = join(top, dirname)
- # Issue #23605: os.path.islink() is used instead of caching
- # entry.is_symlink() result during the loop on os.scandir() because
- # the caller can replace the directory entry during the "yield"
- # above.
- if followlinks or not islink(new_path):
- yield from _walk(new_path, topdown, onerror, followlinks)
- else:
- # Recurse into sub-directories
- for new_path in walk_dirs:
- yield from _walk(new_path, topdown, onerror, followlinks)
- # Yield after recursion if going bottom up
- yield top, dirs, nondirs
+ nondirs.append(entry.name)
+
+ if not topdown and is_dir:
+ # Bottom-up: traverse into sub-directory, but exclude
+ # symlinks to directories if followlinks is False
+ if followlinks:
+ walk_into = True
+ else:
+ try:
+ is_symlink = entry.is_symlink()
+ except OSError:
+ # If is_symlink() raises an OSError, consider the
+ # entry not to be a symbolic link, same behaviour
+ # as os.path.islink().
+ is_symlink = False
+ walk_into = not is_symlink
+
+ if walk_into:
+ walk_dirs.append(entry.path)
+ if cont:
+ continue
+
+ if topdown:
+ # Yield before sub-directory traversal if going top down
+ yield top, dirs, nondirs
+ # Traverse into sub-directories
+ for dirname in reversed(dirs):
+ new_path = join(top, dirname)
+ # bpo-23605: os.path.islink() is used instead of caching
+ # entry.is_symlink() result during the loop on os.scandir() because
+ # the caller can replace the directory entry during the "yield"
+ # above.
+ if followlinks or not islink(new_path):
+ stack.append((False, new_path))
+ else:
+ # Yield after sub-directory traversal if going bottom up
+ stack.append((True, (top, dirs, nondirs)))
+ # Traverse into sub-directories
+ for new_path in reversed(walk_dirs):
+ stack.append((False, new_path))
__all__.append("walk")