diff options
author | Jon Burdo <jon@jonburdo.com> | 2022-12-19 18:59:01 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-12-19 18:59:01 (GMT) |
commit | 797edb28c3dd02a5727f0374e937e906a389ab77 (patch) | |
tree | ee1150ec0127c660ec8074fc9d7ba99241d41e51 /Lib/os.py | |
parent | 702a5bc4637c82dc011e98b84f0cede98eb08dda (diff) | |
download | cpython-797edb28c3dd02a5727f0374e937e906a389ab77.zip cpython-797edb28c3dd02a5727f0374e937e906a389ab77.tar.gz cpython-797edb28c3dd02a5727f0374e937e906a389ab77.tar.bz2 |
gh-89727: Fix os.walk RecursionError on deep trees (#99803)
Use a stack to implement os.walk iteratively instead of recursively to
avoid hitting recursion limits on deeply nested trees.
Diffstat (limited to 'Lib/os.py')
-rw-r--r-- | Lib/os.py | 160 |
1 files changed, 83 insertions, 77 deletions
@@ -340,89 +340,95 @@ def walk(top, topdown=True, onerror=None, followlinks=False): """ sys.audit("os.walk", top, topdown, onerror, followlinks) - return _walk(fspath(top), topdown, onerror, followlinks) - -def _walk(top, topdown, onerror, followlinks): - dirs = [] - nondirs = [] - walk_dirs = [] - - # We may not have read permission for top, in which case we can't - # get a list of the files the directory contains. os.walk - # always suppressed the exception then, rather than blow up for a - # minor reason when (say) a thousand readable directories are still - # left to visit. That logic is copied here. - try: - # Note that scandir is global in this module due - # to earlier import-*. - scandir_it = scandir(top) - except OSError as error: - if onerror is not None: - onerror(error) - return - with scandir_it: - while True: - try: + stack = [(False, fspath(top))] + islink, join = path.islink, path.join + while stack: + must_yield, top = stack.pop() + if must_yield: + yield top + continue + + dirs = [] + nondirs = [] + walk_dirs = [] + + # We may not have read permission for top, in which case we can't + # get a list of the files the directory contains. + # We suppress the exception here, rather than blow up for a + # minor reason when (say) a thousand readable directories are still + # left to visit. + try: + scandir_it = scandir(top) + except OSError as error: + if onerror is not None: + onerror(error) + continue + + cont = False + with scandir_it: + while True: try: - entry = next(scandir_it) - except StopIteration: + try: + entry = next(scandir_it) + except StopIteration: + break + except OSError as error: + if onerror is not None: + onerror(error) + cont = True break - except OSError as error: - if onerror is not None: - onerror(error) - return - try: - is_dir = entry.is_dir() - except OSError: - # If is_dir() raises an OSError, consider that the entry is not - # a directory, same behaviour than os.path.isdir(). - is_dir = False - - if is_dir: - dirs.append(entry.name) - else: - nondirs.append(entry.name) + try: + is_dir = entry.is_dir() + except OSError: + # If is_dir() raises an OSError, consider the entry not to + # be a directory, same behaviour as os.path.isdir(). + is_dir = False - if not topdown and is_dir: - # Bottom-up: recurse into sub-directory, but exclude symlinks to - # directories if followlinks is False - if followlinks: - walk_into = True + if is_dir: + dirs.append(entry.name) else: - try: - is_symlink = entry.is_symlink() - except OSError: - # If is_symlink() raises an OSError, consider that the - # entry is not a symbolic link, same behaviour than - # os.path.islink(). - is_symlink = False - walk_into = not is_symlink - - if walk_into: - walk_dirs.append(entry.path) - - # Yield before recursion if going top down - if topdown: - yield top, dirs, nondirs - - # Recurse into sub-directories - islink, join = path.islink, path.join - for dirname in dirs: - new_path = join(top, dirname) - # Issue #23605: os.path.islink() is used instead of caching - # entry.is_symlink() result during the loop on os.scandir() because - # the caller can replace the directory entry during the "yield" - # above. - if followlinks or not islink(new_path): - yield from _walk(new_path, topdown, onerror, followlinks) - else: - # Recurse into sub-directories - for new_path in walk_dirs: - yield from _walk(new_path, topdown, onerror, followlinks) - # Yield after recursion if going bottom up - yield top, dirs, nondirs + nondirs.append(entry.name) + + if not topdown and is_dir: + # Bottom-up: traverse into sub-directory, but exclude + # symlinks to directories if followlinks is False + if followlinks: + walk_into = True + else: + try: + is_symlink = entry.is_symlink() + except OSError: + # If is_symlink() raises an OSError, consider the + # entry not to be a symbolic link, same behaviour + # as os.path.islink(). + is_symlink = False + walk_into = not is_symlink + + if walk_into: + walk_dirs.append(entry.path) + if cont: + continue + + if topdown: + # Yield before sub-directory traversal if going top down + yield top, dirs, nondirs + # Traverse into sub-directories + for dirname in reversed(dirs): + new_path = join(top, dirname) + # bpo-23605: os.path.islink() is used instead of caching + # entry.is_symlink() result during the loop on os.scandir() because + # the caller can replace the directory entry during the "yield" + # above. + if followlinks or not islink(new_path): + stack.append((False, new_path)) + else: + # Yield after sub-directory traversal if going bottom up + stack.append((True, (top, dirs, nondirs))) + # Traverse into sub-directories + for new_path in reversed(walk_dirs): + stack.append((False, new_path)) __all__.append("walk") |