summaryrefslogtreecommitdiffstats
path: root/Lib/os.py
diff options
context:
space:
mode:
authorCharles-François Natali <neologix@free.fr>2012-02-05 14:15:38 (GMT)
committerCharles-François Natali <neologix@free.fr>2012-02-05 14:15:38 (GMT)
commit7372b06cd7b6b1e88482215ffa27eec8e4b7861a (patch)
tree14d59be233b09255f3ded9fcf171d6769380bb40 /Lib/os.py
parentfb39063202a020371c464627dd8b9d563c2dbf76 (diff)
downloadcpython-7372b06cd7b6b1e88482215ffa27eec8e4b7861a.zip
cpython-7372b06cd7b6b1e88482215ffa27eec8e4b7861a.tar.gz
cpython-7372b06cd7b6b1e88482215ffa27eec8e4b7861a.tar.bz2
Issue #13734: Add os.fwalk(), a directory walking function yielding file
descriptors.
Diffstat (limited to 'Lib/os.py')
-rw-r--r--Lib/os.py101
1 files changed, 96 insertions, 5 deletions
diff --git a/Lib/os.py b/Lib/os.py
index 28979bf..ad5d538 100644
--- a/Lib/os.py
+++ b/Lib/os.py
@@ -24,6 +24,7 @@ and opendir), and leave all pathname manipulation to os.path
#'
import sys, errno
+import stat as st
_names = sys.builtin_module_names
@@ -32,6 +33,9 @@ __all__ = ["altsep", "curdir", "pardir", "sep", "pathsep", "linesep",
"defpath", "name", "path", "devnull",
"SEEK_SET", "SEEK_CUR", "SEEK_END"]
+def _exists(name):
+ return name in globals()
+
def _get_exports_list(module):
try:
return list(module.__all__)
@@ -120,7 +124,12 @@ def _get_masked_mode(mode):
umask(mask)
return mode & ~mask
-#'
+def _are_same_file(stat1, stat2):
+ """Helper function that checks whether two stat results refer to the same
+ file.
+ """
+ return (stat1.st_ino == stat2.st_ino and stat1.st_dev == stat2.st_dev)
+#
# Super directory utilities.
# (Inspired by Eric Raymond; the doc strings are mostly his)
@@ -151,7 +160,6 @@ def makedirs(name, mode=0o777, exist_ok=False):
try:
mkdir(name, mode)
except OSError as e:
- import stat as st
if not (e.errno == errno.EEXIST and exist_ok and path.isdir(name) and
st.S_IMODE(lstat(name).st_mode) == _get_masked_mode(mode)):
raise
@@ -298,6 +306,92 @@ def walk(top, topdown=True, onerror=None, followlinks=False):
__all__.append("walk")
+if _exists("openat"):
+
+ def fwalk(top, topdown=True, onerror=None, followlinks=False):
+ """Directory tree generator.
+
+ This behaves exactly like walk(), except that it yields a 4-tuple
+
+ dirpath, dirnames, filenames, dirfd
+
+ `dirpath`, `dirnames` and `filenames` are identical to walk() output,
+ and `dirfd` is a file descriptor referring to the directory `dirpath`.
+
+ The advantage of walkfd() over walk() is that it's safe against symlink
+ races (when followlinks is False).
+
+ Caution:
+ Since fwalk() yields file descriptors, those are only valid until the
+ next iteration step, so you should dup() them if you want to keep them
+ for a longer period.
+
+ Example:
+
+ import os
+ for root, dirs, files, rootfd in os.fwalk('python/Lib/email'):
+ print(root, "consumes", end="")
+ print(sum([os.fstatat(rootfd, name).st_size for name in files]),
+ end="")
+ print("bytes in", len(files), "non-directory files")
+ if 'CVS' in dirs:
+ dirs.remove('CVS') # don't visit CVS directories
+ """
+ # Note: To guard against symlink races, we use the standard
+ # lstat()/open()/fstat() trick.
+ orig_st = lstat(top)
+ topfd = open(top, O_RDONLY)
+ try:
+ if (followlinks or (st.S_ISDIR(orig_st.st_mode) and
+ _are_same_file(orig_st, fstat(topfd)))):
+ for x in _fwalk(topfd, top, topdown, onerror, followlinks):
+ yield x
+ finally:
+ close(topfd)
+
+ def _fwalk(topfd, toppath, topdown, onerror, followlinks):
+ # Note: This uses O(depth of the directory tree) file descriptors: if
+ # necessary, it can be adapted to only require O(1) FDs, see issue
+ # #13734.
+
+ # whether to follow symlinks
+ flag = 0 if followlinks else AT_SYMLINK_NOFOLLOW
+
+ names = fdlistdir(topfd)
+ dirs, nondirs = [], []
+ for name in names:
+ # Here, we don't use AT_SYMLINK_NOFOLLOW to be consistent with
+ # walk() which reports symlinks to directories as directories. We do
+ # however check for symlinks before recursing into a subdirectory.
+ if st.S_ISDIR(fstatat(topfd, name).st_mode):
+ dirs.append(name)
+ else:
+ nondirs.append(name)
+
+ if topdown:
+ yield toppath, dirs, nondirs, topfd
+
+ for name in dirs:
+ try:
+ orig_st = fstatat(topfd, name, flag)
+ dirfd = openat(topfd, name, O_RDONLY)
+ except error as err:
+ if onerror is not None:
+ onerror(err)
+ return
+ try:
+ if followlinks or _are_same_file(orig_st, fstat(dirfd)):
+ dirpath = path.join(toppath, name)
+ for x in _fwalk(dirfd, dirpath, topdown, onerror, followlinks):
+ yield x
+ finally:
+ close(dirfd)
+
+ if not topdown:
+ yield toppath, dirs, nondirs, topfd
+
+ __all__.append("fwalk")
+
# Make sure os.environ exists, at least
try:
environ
@@ -598,9 +692,6 @@ def _fscodec():
fsencode, fsdecode = _fscodec()
del _fscodec
-def _exists(name):
- return name in globals()
-
# Supply spawn*() (probably only for Unix)
if _exists("fork") and not _exists("spawnv") and _exists("execv"):