diff options
author | Barney Gale <barney.gale@gmail.com> | 2023-05-29 15:59:52 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-05-29 15:59:52 (GMT) |
commit | ace676e2c2121f94a55effc6a30b3b0e987ae7da (patch) | |
tree | 45446ea82eda9526217298b32cbd49e51d91151a /Lib | |
parent | 1668b41dc477bc9562e4c50ab36a232839b4621b (diff) | |
download | cpython-ace676e2c2121f94a55effc6a30b3b0e987ae7da.zip cpython-ace676e2c2121f94a55effc6a30b3b0e987ae7da.tar.gz cpython-ace676e2c2121f94a55effc6a30b3b0e987ae7da.tar.bz2 |
GH-77609: Add follow_symlinks argument to `pathlib.Path.glob()` (GH-102616)
Add a keyword-only *follow_symlinks* parameter to `pathlib.Path.glob()` and`rglob()`.
When *follow_symlinks* is `None` (the default), these methods follow symlinks except when evaluating "`**`" wildcards. When set to true or false, symlinks are always or never followed, respectively.
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/pathlib.py | 40 | ||||
-rw-r--r-- | Lib/test/test_pathlib.py | 83 |
2 files changed, 104 insertions, 19 deletions
diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 8cb5279..87c2e97 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -105,19 +105,19 @@ class _Selector: self.successor = _TerminatingSelector() self.dironly = False - def select_from(self, parent_path): + def select_from(self, parent_path, follow_symlinks): """Iterate over all child paths of `parent_path` matched by this selector. This can contain parent_path itself.""" path_cls = type(parent_path) scandir = path_cls._scandir if not parent_path.is_dir(): return iter([]) - return self._select_from(parent_path, scandir) + return self._select_from(parent_path, scandir, follow_symlinks) class _TerminatingSelector: - def _select_from(self, parent_path, scandir): + def _select_from(self, parent_path, scandir, follow_symlinks): yield parent_path @@ -126,9 +126,9 @@ class _ParentSelector(_Selector): def __init__(self, name, child_parts, flavour, case_sensitive): _Selector.__init__(self, child_parts, flavour, case_sensitive) - def _select_from(self, parent_path, scandir): + def _select_from(self, parent_path, scandir, follow_symlinks): path = parent_path._make_child_relpath('..') - for p in self.successor._select_from(path, scandir): + for p in self.successor._select_from(path, scandir, follow_symlinks): yield p @@ -141,7 +141,8 @@ class _WildcardSelector(_Selector): case_sensitive = _is_case_sensitive(flavour) self.match = _compile_pattern(pat, case_sensitive) - def _select_from(self, parent_path, scandir): + def _select_from(self, parent_path, scandir, follow_symlinks): + follow_dirlinks = True if follow_symlinks is None else follow_symlinks try: # We must close the scandir() object before proceeding to # avoid exhausting file descriptors when globbing deep trees. @@ -153,14 +154,14 @@ class _WildcardSelector(_Selector): for entry in entries: if self.dironly: try: - if not entry.is_dir(): + if not entry.is_dir(follow_symlinks=follow_dirlinks): continue except OSError: continue name = entry.name if self.match(name): path = parent_path._make_child_relpath(name) - for p in self.successor._select_from(path, scandir): + for p in self.successor._select_from(path, scandir, follow_symlinks): yield p @@ -169,16 +170,17 @@ class _RecursiveWildcardSelector(_Selector): def __init__(self, pat, child_parts, flavour, case_sensitive): _Selector.__init__(self, child_parts, flavour, case_sensitive) - def _iterate_directories(self, parent_path): + def _iterate_directories(self, parent_path, follow_symlinks): yield parent_path - for dirpath, dirnames, _ in parent_path.walk(): + for dirpath, dirnames, _ in parent_path.walk(follow_symlinks=follow_symlinks): for dirname in dirnames: yield dirpath._make_child_relpath(dirname) - def _select_from(self, parent_path, scandir): + def _select_from(self, parent_path, scandir, follow_symlinks): + follow_dirlinks = False if follow_symlinks is None else follow_symlinks successor_select = self.successor._select_from - for starting_point in self._iterate_directories(parent_path): - for p in successor_select(starting_point, scandir): + for starting_point in self._iterate_directories(parent_path, follow_dirlinks): + for p in successor_select(starting_point, scandir, follow_symlinks): yield p @@ -189,10 +191,10 @@ class _DoubleRecursiveWildcardSelector(_RecursiveWildcardSelector): multiple non-adjacent '**' segments. """ - def _select_from(self, parent_path, scandir): + def _select_from(self, parent_path, scandir, follow_symlinks): yielded = set() try: - for p in super()._select_from(parent_path, scandir): + for p in super()._select_from(parent_path, scandir, follow_symlinks): if p not in yielded: yield p yielded.add(p) @@ -994,7 +996,7 @@ class Path(PurePath): path._tail_cached = tail + [name] return path - def glob(self, pattern, *, case_sensitive=None): + def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None): """Iterate over this subtree and yield all existing files (of any kind, including directories) matching the given relative pattern. """ @@ -1007,10 +1009,10 @@ class Path(PurePath): if pattern[-1] in (self._flavour.sep, self._flavour.altsep): pattern_parts.append('') selector = _make_selector(tuple(pattern_parts), self._flavour, case_sensitive) - for p in selector.select_from(self): + for p in selector.select_from(self, follow_symlinks): yield p - def rglob(self, pattern, *, case_sensitive=None): + def rglob(self, pattern, *, case_sensitive=None, follow_symlinks=None): """Recursively yield all existing files (of any kind, including directories) matching the given relative pattern, anywhere in this subtree. @@ -1022,7 +1024,7 @@ class Path(PurePath): if pattern and pattern[-1] in (self._flavour.sep, self._flavour.altsep): pattern_parts.append('') selector = _make_selector(("**",) + tuple(pattern_parts), self._flavour, case_sensitive) - for p in selector.select_from(self): + for p in selector.select_from(self, follow_symlinks): yield p def walk(self, top_down=True, on_error=None, follow_symlinks=False): diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 01615e2..4391d68 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -1863,6 +1863,35 @@ class _BasePathTest(object): _check(path, "dirb/file*", True, []) _check(path, "dirb/file*", False, ["dirB/fileB"]) + @os_helper.skip_unless_symlink + def test_glob_follow_symlinks_common(self): + def _check(path, glob, expected): + actual = {path for path in path.glob(glob, follow_symlinks=True) + if "linkD" not in path.parent.parts} # exclude symlink loop. + self.assertEqual(actual, { P(BASE, q) for q in expected }) + P = self.cls + p = P(BASE) + _check(p, "fileB", []) + _check(p, "dir*/file*", ["dirB/fileB", "dirC/fileC"]) + _check(p, "*A", ["dirA", "fileA", "linkA"]) + _check(p, "*B/*", ["dirB/fileB", "dirB/linkD", "linkB/fileB", "linkB/linkD"]) + _check(p, "*/fileB", ["dirB/fileB", "linkB/fileB"]) + _check(p, "*/", ["dirA", "dirB", "dirC", "dirE", "linkB"]) + + @os_helper.skip_unless_symlink + def test_glob_no_follow_symlinks_common(self): + def _check(path, glob, expected): + actual = {path for path in path.glob(glob, follow_symlinks=False)} + self.assertEqual(actual, { P(BASE, q) for q in expected }) + P = self.cls + p = P(BASE) + _check(p, "fileB", []) + _check(p, "dir*/file*", ["dirB/fileB", "dirC/fileC"]) + _check(p, "*A", ["dirA", "fileA", "linkA"]) + _check(p, "*B/*", ["dirB/fileB", "dirB/linkD"]) + _check(p, "*/fileB", ["dirB/fileB"]) + _check(p, "*/", ["dirA", "dirB", "dirC", "dirE"]) + def test_rglob_common(self): def _check(glob, expected): self.assertEqual(sorted(glob), sorted(P(BASE, q) for q in expected)) @@ -1907,6 +1936,60 @@ class _BasePathTest(object): _check(p.rglob("*.*"), ["dirC/novel.txt"]) @os_helper.skip_unless_symlink + def test_rglob_follow_symlinks_common(self): + def _check(path, glob, expected): + actual = {path for path in path.rglob(glob, follow_symlinks=True) + if 'linkD' not in path.parent.parts} # exclude symlink loop. + self.assertEqual(actual, { P(BASE, q) for q in expected }) + P = self.cls + p = P(BASE) + _check(p, "fileB", ["dirB/fileB", "dirA/linkC/fileB", "linkB/fileB"]) + _check(p, "*/fileA", []) + _check(p, "*/fileB", ["dirB/fileB", "dirA/linkC/fileB", "linkB/fileB"]) + _check(p, "file*", ["fileA", "dirA/linkC/fileB", "dirB/fileB", + "dirC/fileC", "dirC/dirD/fileD", "linkB/fileB"]) + _check(p, "*/", ["dirA", "dirA/linkC", "dirA/linkC/linkD", "dirB", "dirB/linkD", + "dirC", "dirC/dirD", "dirE", "linkB", "linkB/linkD"]) + _check(p, "", ["", "dirA", "dirA/linkC", "dirA/linkC/linkD", "dirB", "dirB/linkD", + "dirC", "dirE", "dirC/dirD", "linkB", "linkB/linkD"]) + + p = P(BASE, "dirC") + _check(p, "*", ["dirC/fileC", "dirC/novel.txt", + "dirC/dirD", "dirC/dirD/fileD"]) + _check(p, "file*", ["dirC/fileC", "dirC/dirD/fileD"]) + _check(p, "*/*", ["dirC/dirD/fileD"]) + _check(p, "*/", ["dirC/dirD"]) + _check(p, "", ["dirC", "dirC/dirD"]) + # gh-91616, a re module regression + _check(p, "*.txt", ["dirC/novel.txt"]) + _check(p, "*.*", ["dirC/novel.txt"]) + + @os_helper.skip_unless_symlink + def test_rglob_no_follow_symlinks_common(self): + def _check(path, glob, expected): + actual = {path for path in path.rglob(glob, follow_symlinks=False)} + self.assertEqual(actual, { P(BASE, q) for q in expected }) + P = self.cls + p = P(BASE) + _check(p, "fileB", ["dirB/fileB"]) + _check(p, "*/fileA", []) + _check(p, "*/fileB", ["dirB/fileB"]) + _check(p, "file*", ["fileA", "dirB/fileB", "dirC/fileC", "dirC/dirD/fileD", ]) + _check(p, "*/", ["dirA", "dirB", "dirC", "dirC/dirD", "dirE"]) + _check(p, "", ["", "dirA", "dirB", "dirC", "dirE", "dirC/dirD"]) + + p = P(BASE, "dirC") + _check(p, "*", ["dirC/fileC", "dirC/novel.txt", + "dirC/dirD", "dirC/dirD/fileD"]) + _check(p, "file*", ["dirC/fileC", "dirC/dirD/fileD"]) + _check(p, "*/*", ["dirC/dirD/fileD"]) + _check(p, "*/", ["dirC/dirD"]) + _check(p, "", ["dirC", "dirC/dirD"]) + # gh-91616, a re module regression + _check(p, "*.txt", ["dirC/novel.txt"]) + _check(p, "*.*", ["dirC/novel.txt"]) + + @os_helper.skip_unless_symlink def test_rglob_symlink_loop(self): # Don't get fooled by symlink loops (Issue #26012). P = self.cls |