summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorTim Peters <tim.peters@gmail.com>2003-04-25 07:11:48 (GMT)
committerTim Peters <tim.peters@gmail.com>2003-04-25 07:11:48 (GMT)
commitc4e09400422487857a665a5c69a4e2d07a909aed (patch)
tree3b31be0701fea9d0131c84929e53eb1ae0713b49 /Lib
parente7adda903500e3467f8d892c6ad46a73a19164b4 (diff)
downloadcpython-c4e09400422487857a665a5c69a4e2d07a909aed.zip
cpython-c4e09400422487857a665a5c69a4e2d07a909aed.tar.gz
cpython-c4e09400422487857a665a5c69a4e2d07a909aed.tar.bz2
New generator os.walk() does a bit more than os.path.walk() does, and
seems much easier to use. Code, docs, NEWS, and additions to test_os.py (testing this sucker is a bitch!).
Diffstat (limited to 'Lib')
-rw-r--r--Lib/os.py81
-rw-r--r--Lib/test/test_os.py88
2 files changed, 165 insertions, 4 deletions
diff --git a/Lib/os.py b/Lib/os.py
index 358c8c6..69d1a44 100644
--- a/Lib/os.py
+++ b/Lib/os.py
@@ -26,6 +26,7 @@ import sys
_names = sys.builtin_module_names
+# Note: more names are added to __all__ later.
__all__ = ["altsep", "curdir", "pardir", "sep", "pathsep", "linesep",
"defpath", "name", "path"]
@@ -158,7 +159,7 @@ def removedirs(name):
Super-rmdir; remove a leaf directory and empty all intermediate
ones. Works like rmdir except that, if the leaf directory is
successfully removed, directories corresponding to rightmost path
- segments will be pruned way until either the whole path is
+ segments will be pruned away until either the whole path is
consumed or an error occurs. Errors during this latter phase are
ignored -- they generally mean that a directory was not empty.
@@ -202,6 +203,84 @@ def renames(old, new):
__all__.extend(["makedirs", "removedirs", "renames"])
+def walk(top, topdown=True):
+ """Directory tree generator.
+
+ For each directory in the directory tree rooted at top (including top
+ itself, but excluding '.' and '..'), yields a 3-tuple
+
+ dirpath, dirnames, filenames
+
+ dirpath is a string, the path to the directory. dirnames is a list of
+ the names of the subdirectories in dirpath (excluding '.' and '..').
+ filenames is a list of the names of the non-directory files in dirpath.
+ Note that the names in the lists are just names, with no path components.
+ To get a full path (which begins with top) to a file or directory in
+ dirpath, do os.path.join(dirpath, name).
+
+ If optional arg 'topdown' is true or not specified, the triple for a
+ directory is generated before the triples for any of its subdirectories
+ (directories are generated top down). If topdown is false, the triple
+ for a directory is generated after the triples for all of its
+ subdirectories (directories are generated bottom up).
+
+ When topdown is true, the caller can modify the dirnames list in-place
+ (e.g., via del or slice assignment), and walk will only recurse into the
+ subdirectories whose names remain in dirnames; this can be used to prune
+ the search, or to impose a specific order of visiting. Modifying
+ dirnames when topdown is false is ineffective, since the directories in
+ dirnames have already been generated by the time dirnames itself is
+ generated.
+
+ Caution: if you pass a relative pathname for top, don't change the
+ current working directory between resumptions of walk. walk never
+ changes the current directory, and assumes that the client doesn't
+ either.
+
+ Example:
+
+ from os.path import join, getsize
+ for root, dirs, files in walk('python/Lib/email'):
+ print root, "consumes",
+ print sum([getsize(join(root, name)) for name in files]),
+ print "bytes in", len(files), "non-directory files"
+ if 'CVS' in dirs:
+ dirs.remove('CVS') # don't visit CVS directories
+ """
+
+ from os.path import join, isdir, islink
+
+ # We may not have read permission for top, in which case we can't
+ # get a list of the files the directory contains. os.path.walk
+ # always suppressed the exception then, rather than blow up for a
+ # minor reason when (say) a thousand readable directories are still
+ # left to visit. That logic is copied here.
+ try:
+ # Note that listdir and error are globals in this module due
+ # to earlier import-*.
+ names = listdir(top)
+ except error:
+ return
+
+ dirs, nondirs = [], []
+ for name in names:
+ if isdir(join(top, name)):
+ dirs.append(name)
+ else:
+ nondirs.append(name)
+
+ if topdown:
+ yield top, dirs, nondirs
+ for name in dirs:
+ path = join(top, name)
+ if not islink(path):
+ for x in walk(path, topdown):
+ yield x
+ if not topdown:
+ yield top, dirs, nondirs
+
+__all__.append("walk")
+
# Make sure os.environ exists, at least
try:
environ
diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py
index 2956d73..cf67ef8 100644
--- a/Lib/test/test_os.py
+++ b/Lib/test/test_os.py
@@ -202,11 +202,93 @@ class EnvironTests(TestMappingProtocol):
os.environ.clear()
os.environ.update(self.__save)
+class WalkTests(unittest.TestCase):
+ """Tests for os.walk()."""
+
+ def test_traversal(self):
+ import os
+ from os.path import join
+
+ # Build:
+ # TESTFN/ a file kid and two directory kids
+ # tmp1
+ # SUB1/ a file kid and a directory kid
+ # tmp2
+ # SUB11/ no kids
+ # SUB2/ just a file kid
+ # tmp3
+ sub1_path = join(TESTFN, "SUB1")
+ sub11_path = join(sub1_path, "SUB11")
+ sub2_path = join(TESTFN, "SUB2")
+ tmp1_path = join(TESTFN, "tmp1")
+ tmp2_path = join(sub1_path, "tmp2")
+ tmp3_path = join(sub2_path, "tmp3")
+
+ # Create stuff.
+ os.makedirs(sub11_path)
+ os.makedirs(sub2_path)
+ for path in tmp1_path, tmp2_path, tmp3_path:
+ f = file(path, "w")
+ f.write("I'm " + path + " and proud of it. Blame test_os.\n")
+ f.close()
+
+ # Walk top-down.
+ all = list(os.walk(TESTFN))
+ self.assertEqual(len(all), 4)
+ # We can't know which order SUB1 and SUB2 will appear in.
+ # Not flipped: TESTFN, SUB1, SUB11, SUB2
+ # flipped: TESTFN, SUB2, SUB1, SUB11
+ flipped = all[0][1][0] != "SUB1"
+ all[0][1].sort()
+ self.assertEqual(all[0], (TESTFN, ["SUB1", "SUB2"], ["tmp1"]))
+ self.assertEqual(all[1 + flipped], (sub1_path, ["SUB11"], ["tmp2"]))
+ self.assertEqual(all[2 + flipped], (sub11_path, [], []))
+ self.assertEqual(all[3 - 2 * flipped], (sub2_path, [], ["tmp3"]))
+
+ # Prune the search.
+ all = []
+ for root, dirs, files in os.walk(TESTFN):
+ all.append((root, dirs, files))
+ # Don't descend into SUB1.
+ if 'SUB1' in dirs:
+ # Note that this also mutates the dirs we appended to all!
+ dirs.remove('SUB1')
+ self.assertEqual(len(all), 2)
+ self.assertEqual(all[0], (TESTFN, ["SUB2"], ["tmp1"]))
+ self.assertEqual(all[1], (sub2_path, [], ["tmp3"]))
+
+ # Walk bottom-up.
+ all = list(os.walk(TESTFN, topdown=False))
+ self.assertEqual(len(all), 4)
+ # We can't know which order SUB1 and SUB2 will appear in.
+ # Not flipped: SUB11, SUB1, SUB2, TESTFN
+ # flipped: SUB2, SUB11, SUB1, TESTFN
+ flipped = all[3][1][0] != "SUB1"
+ all[3][1].sort()
+ self.assertEqual(all[3], (TESTFN, ["SUB1", "SUB2"], ["tmp1"]))
+ self.assertEqual(all[flipped], (sub11_path, [], []))
+ self.assertEqual(all[flipped + 1], (sub1_path, ["SUB11"], ["tmp2"]))
+ self.assertEqual(all[2 - 2 * flipped], (sub2_path, [], ["tmp3"]))
+
+ # Tear everything down. This is a decent use for bottom-up on
+ # Windows, which doesn't have a recursive delete command. The
+ # (not so) subtlety is that rmdir will fail unless the dir's
+ # kids are removed first, so bottom up is essential.
+ for root, dirs, files in os.walk(TESTFN, topdown=False):
+ for name in files:
+ os.remove(join(root, name))
+ for name in dirs:
+ os.rmdir(join(root, name))
+ os.rmdir(TESTFN)
+
def test_main():
suite = unittest.TestSuite()
- suite.addTest(unittest.makeSuite(TemporaryFileTests))
- suite.addTest(unittest.makeSuite(StatAttributeTests))
- suite.addTest(unittest.makeSuite(EnvironTests))
+ for cls in (TemporaryFileTests,
+ StatAttributeTests,
+ EnvironTests,
+ WalkTests,
+ ):
+ suite.addTest(unittest.makeSuite(cls))
run_suite(suite)
if __name__ == "__main__":