diff options
author | Charles-François Natali <neologix@free.fr> | 2012-02-05 14:15:38 (GMT) |
---|---|---|
committer | Charles-François Natali <neologix@free.fr> | 2012-02-05 14:15:38 (GMT) |
commit | 7372b06cd7b6b1e88482215ffa27eec8e4b7861a (patch) | |
tree | 14d59be233b09255f3ded9fcf171d6769380bb40 /Lib | |
parent | fb39063202a020371c464627dd8b9d563c2dbf76 (diff) | |
download | cpython-7372b06cd7b6b1e88482215ffa27eec8e4b7861a.zip cpython-7372b06cd7b6b1e88482215ffa27eec8e4b7861a.tar.gz cpython-7372b06cd7b6b1e88482215ffa27eec8e4b7861a.tar.bz2 |
Issue #13734: Add os.fwalk(), a directory walking function yielding file
descriptors.
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/os.py | 101 | ||||
-rw-r--r-- | Lib/test/test_os.py | 60 |
2 files changed, 154 insertions, 7 deletions
@@ -24,6 +24,7 @@ and opendir), and leave all pathname manipulation to os.path #' import sys, errno +import stat as st _names = sys.builtin_module_names @@ -32,6 +33,9 @@ __all__ = ["altsep", "curdir", "pardir", "sep", "pathsep", "linesep", "defpath", "name", "path", "devnull", "SEEK_SET", "SEEK_CUR", "SEEK_END"] +def _exists(name): + return name in globals() + def _get_exports_list(module): try: return list(module.__all__) @@ -120,7 +124,12 @@ def _get_masked_mode(mode): umask(mask) return mode & ~mask -#' +def _are_same_file(stat1, stat2): + """Helper function that checks whether two stat results refer to the same + file. + """ + return (stat1.st_ino == stat2.st_ino and stat1.st_dev == stat2.st_dev) +# # Super directory utilities. # (Inspired by Eric Raymond; the doc strings are mostly his) @@ -151,7 +160,6 @@ def makedirs(name, mode=0o777, exist_ok=False): try: mkdir(name, mode) except OSError as e: - import stat as st if not (e.errno == errno.EEXIST and exist_ok and path.isdir(name) and st.S_IMODE(lstat(name).st_mode) == _get_masked_mode(mode)): raise @@ -298,6 +306,92 @@ def walk(top, topdown=True, onerror=None, followlinks=False): __all__.append("walk") +if _exists("openat"): + + def fwalk(top, topdown=True, onerror=None, followlinks=False): + """Directory tree generator. + + This behaves exactly like walk(), except that it yields a 4-tuple + + dirpath, dirnames, filenames, dirfd + + `dirpath`, `dirnames` and `filenames` are identical to walk() output, + and `dirfd` is a file descriptor referring to the directory `dirpath`. + + The advantage of walkfd() over walk() is that it's safe against symlink + races (when followlinks is False). + + Caution: + Since fwalk() yields file descriptors, those are only valid until the + next iteration step, so you should dup() them if you want to keep them + for a longer period. + + Example: + + import os + for root, dirs, files, rootfd in os.fwalk('python/Lib/email'): + print(root, "consumes", end="") + print(sum([os.fstatat(rootfd, name).st_size for name in files]), + end="") + print("bytes in", len(files), "non-directory files") + if 'CVS' in dirs: + dirs.remove('CVS') # don't visit CVS directories + """ + # Note: To guard against symlink races, we use the standard + # lstat()/open()/fstat() trick. + orig_st = lstat(top) + topfd = open(top, O_RDONLY) + try: + if (followlinks or (st.S_ISDIR(orig_st.st_mode) and + _are_same_file(orig_st, fstat(topfd)))): + for x in _fwalk(topfd, top, topdown, onerror, followlinks): + yield x + finally: + close(topfd) + + def _fwalk(topfd, toppath, topdown, onerror, followlinks): + # Note: This uses O(depth of the directory tree) file descriptors: if + # necessary, it can be adapted to only require O(1) FDs, see issue + # #13734. + + # whether to follow symlinks + flag = 0 if followlinks else AT_SYMLINK_NOFOLLOW + + names = fdlistdir(topfd) + dirs, nondirs = [], [] + for name in names: + # Here, we don't use AT_SYMLINK_NOFOLLOW to be consistent with + # walk() which reports symlinks to directories as directories. We do + # however check for symlinks before recursing into a subdirectory. + if st.S_ISDIR(fstatat(topfd, name).st_mode): + dirs.append(name) + else: + nondirs.append(name) + + if topdown: + yield toppath, dirs, nondirs, topfd + + for name in dirs: + try: + orig_st = fstatat(topfd, name, flag) + dirfd = openat(topfd, name, O_RDONLY) + except error as err: + if onerror is not None: + onerror(err) + return + try: + if followlinks or _are_same_file(orig_st, fstat(dirfd)): + dirpath = path.join(toppath, name) + for x in _fwalk(dirfd, dirpath, topdown, onerror, followlinks): + yield x + finally: + close(dirfd) + + if not topdown: + yield toppath, dirs, nondirs, topfd + + __all__.append("fwalk") + # Make sure os.environ exists, at least try: environ @@ -598,9 +692,6 @@ def _fscodec(): fsencode, fsdecode = _fscodec() del _fscodec -def _exists(name): - return name in globals() - # Supply spawn*() (probably only for Unix) if _exists("fork") and not _exists("spawnv") and _exists("execv"): diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py index 293005b..7f955d1 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -20,6 +20,8 @@ import uuid import asyncore import asynchat import socket +import itertools +import stat try: import threading except ImportError: @@ -159,7 +161,6 @@ class StatAttributeTests(unittest.TestCase): if not hasattr(os, "stat"): return - import stat result = os.stat(fname) # Make sure direct access works @@ -476,7 +477,7 @@ class EnvironTests(mapping_tests.BasicTestMappingProtocol): class WalkTests(unittest.TestCase): """Tests for os.walk().""" - def test_traversal(self): + def setUp(self): import os from os.path import join @@ -586,6 +587,60 @@ class WalkTests(unittest.TestCase): os.remove(dirname) os.rmdir(support.TESTFN) + +@unittest.skipUnless(hasattr(os, 'fwalk'), "Test needs os.fwalk()") +class FwalkTests(WalkTests): + """Tests for os.fwalk().""" + + def test_compare_to_walk(self): + # compare with walk() results + for topdown, followlinks in itertools.product((True, False), repeat=2): + args = support.TESTFN, topdown, None, followlinks + expected = {} + for root, dirs, files in os.walk(*args): + expected[root] = (set(dirs), set(files)) + + for root, dirs, files, rootfd in os.fwalk(*args): + self.assertIn(root, expected) + self.assertEqual(expected[root], (set(dirs), set(files))) + + def test_dir_fd(self): + # check returned file descriptors + for topdown, followlinks in itertools.product((True, False), repeat=2): + args = support.TESTFN, topdown, None, followlinks + for root, dirs, files, rootfd in os.fwalk(*args): + # check that the FD is valid + os.fstat(rootfd) + # check that fdlistdir() returns consistent information + self.assertEqual(set(os.fdlistdir(rootfd)), set(dirs) | set(files)) + + def test_fd_leak(self): + # Since we're opening a lot of FDs, we must be careful to avoid leaks: + # we both check that calling fwalk() a large number of times doesn't + # yield EMFILE, and that the minimum allocated FD hasn't changed. + minfd = os.dup(1) + os.close(minfd) + for i in range(256): + for x in os.fwalk(support.TESTFN): + pass + newfd = os.dup(1) + self.addCleanup(os.close, newfd) + self.assertEqual(newfd, minfd) + + def tearDown(self): + # cleanup + for root, dirs, files, rootfd in os.fwalk(support.TESTFN, topdown=False): + for name in files: + os.unlinkat(rootfd, name) + for name in dirs: + st = os.fstatat(rootfd, name, os.AT_SYMLINK_NOFOLLOW) + if stat.S_ISDIR(st.st_mode): + os.unlinkat(rootfd, name, os.AT_REMOVEDIR) + else: + os.unlinkat(rootfd, name) + os.rmdir(support.TESTFN) + + class MakedirTests(unittest.TestCase): def setUp(self): os.mkdir(support.TESTFN) @@ -1700,6 +1755,7 @@ def test_main(): StatAttributeTests, EnvironTests, WalkTests, + FwalkTests, MakedirTests, DevNullTests, URandomTests, |