summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@gmail.com>2015-03-18 10:29:47 (GMT)
committerVictor Stinner <victor.stinner@gmail.com>2015-03-18 10:29:47 (GMT)
commit7fea974baf7ba69824a511ddaf7776634ea0737a (patch)
tree5c3699090ef73e78ebdfac4a8b39ff2549d4da70
parenta47fc5c2ddcf86eb3cb1ad628d04bf3764cfce59 (diff)
downloadcpython-7fea974baf7ba69824a511ddaf7776634ea0737a.zip
cpython-7fea974baf7ba69824a511ddaf7776634ea0737a.tar.gz
cpython-7fea974baf7ba69824a511ddaf7776634ea0737a.tar.bz2
Issue #23605: Fix os.walk(topdown=True), don't cache entry.is_symlink() because
the caller can replace the directory with a different file kind. The bottom-up way, os.walk(topdown=False), still uses entry.is_symlink(), and so can be faster than Python 3.4.
-rw-r--r--Lib/os.py80
1 files changed, 51 insertions, 29 deletions
diff --git a/Lib/os.py b/Lib/os.py
index a9a57e0..e5934da 100644
--- a/Lib/os.py
+++ b/Lib/os.py
@@ -354,7 +354,6 @@ def walk(top, topdown=True, onerror=None, followlinks=False):
dirs = []
nondirs = []
- symlinks = set()
# We may not have read permission for top, in which case we can't
# get a list of the files the directory contains. os.walk
@@ -364,46 +363,69 @@ def walk(top, topdown=True, onerror=None, followlinks=False):
try:
# Note that scandir is global in this module due
# to earlier import-*.
- for entry in scandir(top):
+ scandir_it = scandir(top)
+ except OSError as error:
+ if onerror is not None:
+ onerror(error)
+ return
+
+ while True:
+ try:
try:
- is_dir = entry.is_dir()
- except OSError:
- # If is_dir() raises an OSError, consider that the entry is not
- # a directory, same behaviour than os.path.isdir().
- is_dir = False
-
- if is_dir:
- dirs.append(entry.name)
- else:
- nondirs.append(entry.name)
+ entry = next(scandir_it)
+ except StopIteration:
+ break
+ except OSError as error:
+ if onerror is not None:
+ onerror(error)
+ return
+
+ try:
+ is_dir = entry.is_dir()
+ except OSError:
+ # If is_dir() raises an OSError, consider that the entry is not
+ # a directory, same behaviour than os.path.isdir().
+ is_dir = False
- if is_dir and not followlinks:
+ if is_dir:
+ dirs.append(entry.name)
+ else:
+ nondirs.append(entry.name)
+
+ if not topdown and is_dir:
+ # Bottom-up: recurse into sub-directory, but exclude symlinks to
+ # directories if followlinks is False
+ if followlinks:
+ walk_into = True
+ else:
try:
- if entry.is_symlink():
- symlinks.add(entry.name)
+ is_symlink = entry.is_symlink()
except OSError:
# If is_symlink() raises an OSError, consider that the
# entry is not a symbolik link, same behaviour than
# os.path.islink().
- pass
- except OSError as error:
- # scandir() or iterating into scandir() iterator raised an OSError
- if onerror is not None:
- onerror(error)
- return
+ is_symlink = False
+ walk_into = not is_symlink
+
+ if walk_into:
+ yield from walk(entry.path, topdown, onerror, followlinks)
# Yield before recursion if going top down
if topdown:
yield top, dirs, nondirs
- # Recurse into sub-directories
- for name in dirs:
- if followlinks or name not in symlinks:
- new_path = path.join(top, name)
- yield from walk(new_path, topdown, onerror, followlinks)
-
- # Yield after recursion if going bottom up
- if not topdown:
+ # Recurse into sub-directories
+ islink, join = path.islink, path.join
+ for name in dirs:
+ new_path = join(top, name)
+ # Issue #23605: os.path.islink() is used instead of caching
+ # entry.is_symlink() result during the loop on os.scandir() because
+ # the caller can replace the directory entry during the "yield"
+ # above.
+ if followlinks or not islink(new_path):
+ yield from walk(new_path, topdown, onerror, followlinks)
+ else:
+ # Yield after recursion if going bottom up
yield top, dirs, nondirs
__all__.append("walk")