diff options
author | Victor Stinner <victor.stinner@gmail.com> | 2015-03-18 10:29:47 (GMT) |
---|---|---|
committer | Victor Stinner <victor.stinner@gmail.com> | 2015-03-18 10:29:47 (GMT) |
commit | 7fea974baf7ba69824a511ddaf7776634ea0737a (patch) | |
tree | 5c3699090ef73e78ebdfac4a8b39ff2549d4da70 | |
parent | a47fc5c2ddcf86eb3cb1ad628d04bf3764cfce59 (diff) | |
download | cpython-7fea974baf7ba69824a511ddaf7776634ea0737a.zip cpython-7fea974baf7ba69824a511ddaf7776634ea0737a.tar.gz cpython-7fea974baf7ba69824a511ddaf7776634ea0737a.tar.bz2 |
Issue #23605: Fix os.walk(topdown=True), don't cache entry.is_symlink() because
the caller can replace the directory with a different file kind.
The bottom-up way, os.walk(topdown=False), still uses entry.is_symlink(), and
so can be faster than Python 3.4.
-rw-r--r-- | Lib/os.py | 80 |
1 files changed, 51 insertions, 29 deletions
@@ -354,7 +354,6 @@ def walk(top, topdown=True, onerror=None, followlinks=False): dirs = [] nondirs = [] - symlinks = set() # We may not have read permission for top, in which case we can't # get a list of the files the directory contains. os.walk @@ -364,46 +363,69 @@ def walk(top, topdown=True, onerror=None, followlinks=False): try: # Note that scandir is global in this module due # to earlier import-*. - for entry in scandir(top): + scandir_it = scandir(top) + except OSError as error: + if onerror is not None: + onerror(error) + return + + while True: + try: try: - is_dir = entry.is_dir() - except OSError: - # If is_dir() raises an OSError, consider that the entry is not - # a directory, same behaviour than os.path.isdir(). - is_dir = False - - if is_dir: - dirs.append(entry.name) - else: - nondirs.append(entry.name) + entry = next(scandir_it) + except StopIteration: + break + except OSError as error: + if onerror is not None: + onerror(error) + return + + try: + is_dir = entry.is_dir() + except OSError: + # If is_dir() raises an OSError, consider that the entry is not + # a directory, same behaviour than os.path.isdir(). + is_dir = False - if is_dir and not followlinks: + if is_dir: + dirs.append(entry.name) + else: + nondirs.append(entry.name) + + if not topdown and is_dir: + # Bottom-up: recurse into sub-directory, but exclude symlinks to + # directories if followlinks is False + if followlinks: + walk_into = True + else: try: - if entry.is_symlink(): - symlinks.add(entry.name) + is_symlink = entry.is_symlink() except OSError: # If is_symlink() raises an OSError, consider that the # entry is not a symbolik link, same behaviour than # os.path.islink(). - pass - except OSError as error: - # scandir() or iterating into scandir() iterator raised an OSError - if onerror is not None: - onerror(error) - return + is_symlink = False + walk_into = not is_symlink + + if walk_into: + yield from walk(entry.path, topdown, onerror, followlinks) # Yield before recursion if going top down if topdown: yield top, dirs, nondirs - # Recurse into sub-directories - for name in dirs: - if followlinks or name not in symlinks: - new_path = path.join(top, name) - yield from walk(new_path, topdown, onerror, followlinks) - - # Yield after recursion if going bottom up - if not topdown: + # Recurse into sub-directories + islink, join = path.islink, path.join + for name in dirs: + new_path = join(top, name) + # Issue #23605: os.path.islink() is used instead of caching + # entry.is_symlink() result during the loop on os.scandir() because + # the caller can replace the directory entry during the "yield" + # above. + if followlinks or not islink(new_path): + yield from walk(new_path, topdown, onerror, followlinks) + else: + # Yield after recursion if going bottom up yield top, dirs, nondirs __all__.append("walk") |