summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorBarney Gale <barney.gale@gmail.com>2023-05-29 15:59:52 (GMT)
committerGitHub <noreply@github.com>2023-05-29 15:59:52 (GMT)
commitace676e2c2121f94a55effc6a30b3b0e987ae7da (patch)
tree45446ea82eda9526217298b32cbd49e51d91151a /Lib
parent1668b41dc477bc9562e4c50ab36a232839b4621b (diff)
downloadcpython-ace676e2c2121f94a55effc6a30b3b0e987ae7da.zip
cpython-ace676e2c2121f94a55effc6a30b3b0e987ae7da.tar.gz
cpython-ace676e2c2121f94a55effc6a30b3b0e987ae7da.tar.bz2
GH-77609: Add follow_symlinks argument to `pathlib.Path.glob()` (GH-102616)
Add a keyword-only *follow_symlinks* parameter to `pathlib.Path.glob()` and`rglob()`. When *follow_symlinks* is `None` (the default), these methods follow symlinks except when evaluating "`**`" wildcards. When set to true or false, symlinks are always or never followed, respectively.
Diffstat (limited to 'Lib')
-rw-r--r--Lib/pathlib.py40
-rw-r--r--Lib/test/test_pathlib.py83
2 files changed, 104 insertions, 19 deletions
diff --git a/Lib/pathlib.py b/Lib/pathlib.py
index 8cb5279..87c2e97 100644
--- a/Lib/pathlib.py
+++ b/Lib/pathlib.py
@@ -105,19 +105,19 @@ class _Selector:
self.successor = _TerminatingSelector()
self.dironly = False
- def select_from(self, parent_path):
+ def select_from(self, parent_path, follow_symlinks):
"""Iterate over all child paths of `parent_path` matched by this
selector. This can contain parent_path itself."""
path_cls = type(parent_path)
scandir = path_cls._scandir
if not parent_path.is_dir():
return iter([])
- return self._select_from(parent_path, scandir)
+ return self._select_from(parent_path, scandir, follow_symlinks)
class _TerminatingSelector:
- def _select_from(self, parent_path, scandir):
+ def _select_from(self, parent_path, scandir, follow_symlinks):
yield parent_path
@@ -126,9 +126,9 @@ class _ParentSelector(_Selector):
def __init__(self, name, child_parts, flavour, case_sensitive):
_Selector.__init__(self, child_parts, flavour, case_sensitive)
- def _select_from(self, parent_path, scandir):
+ def _select_from(self, parent_path, scandir, follow_symlinks):
path = parent_path._make_child_relpath('..')
- for p in self.successor._select_from(path, scandir):
+ for p in self.successor._select_from(path, scandir, follow_symlinks):
yield p
@@ -141,7 +141,8 @@ class _WildcardSelector(_Selector):
case_sensitive = _is_case_sensitive(flavour)
self.match = _compile_pattern(pat, case_sensitive)
- def _select_from(self, parent_path, scandir):
+ def _select_from(self, parent_path, scandir, follow_symlinks):
+ follow_dirlinks = True if follow_symlinks is None else follow_symlinks
try:
# We must close the scandir() object before proceeding to
# avoid exhausting file descriptors when globbing deep trees.
@@ -153,14 +154,14 @@ class _WildcardSelector(_Selector):
for entry in entries:
if self.dironly:
try:
- if not entry.is_dir():
+ if not entry.is_dir(follow_symlinks=follow_dirlinks):
continue
except OSError:
continue
name = entry.name
if self.match(name):
path = parent_path._make_child_relpath(name)
- for p in self.successor._select_from(path, scandir):
+ for p in self.successor._select_from(path, scandir, follow_symlinks):
yield p
@@ -169,16 +170,17 @@ class _RecursiveWildcardSelector(_Selector):
def __init__(self, pat, child_parts, flavour, case_sensitive):
_Selector.__init__(self, child_parts, flavour, case_sensitive)
- def _iterate_directories(self, parent_path):
+ def _iterate_directories(self, parent_path, follow_symlinks):
yield parent_path
- for dirpath, dirnames, _ in parent_path.walk():
+ for dirpath, dirnames, _ in parent_path.walk(follow_symlinks=follow_symlinks):
for dirname in dirnames:
yield dirpath._make_child_relpath(dirname)
- def _select_from(self, parent_path, scandir):
+ def _select_from(self, parent_path, scandir, follow_symlinks):
+ follow_dirlinks = False if follow_symlinks is None else follow_symlinks
successor_select = self.successor._select_from
- for starting_point in self._iterate_directories(parent_path):
- for p in successor_select(starting_point, scandir):
+ for starting_point in self._iterate_directories(parent_path, follow_dirlinks):
+ for p in successor_select(starting_point, scandir, follow_symlinks):
yield p
@@ -189,10 +191,10 @@ class _DoubleRecursiveWildcardSelector(_RecursiveWildcardSelector):
multiple non-adjacent '**' segments.
"""
- def _select_from(self, parent_path, scandir):
+ def _select_from(self, parent_path, scandir, follow_symlinks):
yielded = set()
try:
- for p in super()._select_from(parent_path, scandir):
+ for p in super()._select_from(parent_path, scandir, follow_symlinks):
if p not in yielded:
yield p
yielded.add(p)
@@ -994,7 +996,7 @@ class Path(PurePath):
path._tail_cached = tail + [name]
return path
- def glob(self, pattern, *, case_sensitive=None):
+ def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
"""Iterate over this subtree and yield all existing files (of any
kind, including directories) matching the given relative pattern.
"""
@@ -1007,10 +1009,10 @@ class Path(PurePath):
if pattern[-1] in (self._flavour.sep, self._flavour.altsep):
pattern_parts.append('')
selector = _make_selector(tuple(pattern_parts), self._flavour, case_sensitive)
- for p in selector.select_from(self):
+ for p in selector.select_from(self, follow_symlinks):
yield p
- def rglob(self, pattern, *, case_sensitive=None):
+ def rglob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
"""Recursively yield all existing files (of any kind, including
directories) matching the given relative pattern, anywhere in
this subtree.
@@ -1022,7 +1024,7 @@ class Path(PurePath):
if pattern and pattern[-1] in (self._flavour.sep, self._flavour.altsep):
pattern_parts.append('')
selector = _make_selector(("**",) + tuple(pattern_parts), self._flavour, case_sensitive)
- for p in selector.select_from(self):
+ for p in selector.select_from(self, follow_symlinks):
yield p
def walk(self, top_down=True, on_error=None, follow_symlinks=False):
diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py
index 01615e2..4391d68 100644
--- a/Lib/test/test_pathlib.py
+++ b/Lib/test/test_pathlib.py
@@ -1863,6 +1863,35 @@ class _BasePathTest(object):
_check(path, "dirb/file*", True, [])
_check(path, "dirb/file*", False, ["dirB/fileB"])
+ @os_helper.skip_unless_symlink
+ def test_glob_follow_symlinks_common(self):
+ def _check(path, glob, expected):
+ actual = {path for path in path.glob(glob, follow_symlinks=True)
+ if "linkD" not in path.parent.parts} # exclude symlink loop.
+ self.assertEqual(actual, { P(BASE, q) for q in expected })
+ P = self.cls
+ p = P(BASE)
+ _check(p, "fileB", [])
+ _check(p, "dir*/file*", ["dirB/fileB", "dirC/fileC"])
+ _check(p, "*A", ["dirA", "fileA", "linkA"])
+ _check(p, "*B/*", ["dirB/fileB", "dirB/linkD", "linkB/fileB", "linkB/linkD"])
+ _check(p, "*/fileB", ["dirB/fileB", "linkB/fileB"])
+ _check(p, "*/", ["dirA", "dirB", "dirC", "dirE", "linkB"])
+
+ @os_helper.skip_unless_symlink
+ def test_glob_no_follow_symlinks_common(self):
+ def _check(path, glob, expected):
+ actual = {path for path in path.glob(glob, follow_symlinks=False)}
+ self.assertEqual(actual, { P(BASE, q) for q in expected })
+ P = self.cls
+ p = P(BASE)
+ _check(p, "fileB", [])
+ _check(p, "dir*/file*", ["dirB/fileB", "dirC/fileC"])
+ _check(p, "*A", ["dirA", "fileA", "linkA"])
+ _check(p, "*B/*", ["dirB/fileB", "dirB/linkD"])
+ _check(p, "*/fileB", ["dirB/fileB"])
+ _check(p, "*/", ["dirA", "dirB", "dirC", "dirE"])
+
def test_rglob_common(self):
def _check(glob, expected):
self.assertEqual(sorted(glob), sorted(P(BASE, q) for q in expected))
@@ -1907,6 +1936,60 @@ class _BasePathTest(object):
_check(p.rglob("*.*"), ["dirC/novel.txt"])
@os_helper.skip_unless_symlink
+ def test_rglob_follow_symlinks_common(self):
+ def _check(path, glob, expected):
+ actual = {path for path in path.rglob(glob, follow_symlinks=True)
+ if 'linkD' not in path.parent.parts} # exclude symlink loop.
+ self.assertEqual(actual, { P(BASE, q) for q in expected })
+ P = self.cls
+ p = P(BASE)
+ _check(p, "fileB", ["dirB/fileB", "dirA/linkC/fileB", "linkB/fileB"])
+ _check(p, "*/fileA", [])
+ _check(p, "*/fileB", ["dirB/fileB", "dirA/linkC/fileB", "linkB/fileB"])
+ _check(p, "file*", ["fileA", "dirA/linkC/fileB", "dirB/fileB",
+ "dirC/fileC", "dirC/dirD/fileD", "linkB/fileB"])
+ _check(p, "*/", ["dirA", "dirA/linkC", "dirA/linkC/linkD", "dirB", "dirB/linkD",
+ "dirC", "dirC/dirD", "dirE", "linkB", "linkB/linkD"])
+ _check(p, "", ["", "dirA", "dirA/linkC", "dirA/linkC/linkD", "dirB", "dirB/linkD",
+ "dirC", "dirE", "dirC/dirD", "linkB", "linkB/linkD"])
+
+ p = P(BASE, "dirC")
+ _check(p, "*", ["dirC/fileC", "dirC/novel.txt",
+ "dirC/dirD", "dirC/dirD/fileD"])
+ _check(p, "file*", ["dirC/fileC", "dirC/dirD/fileD"])
+ _check(p, "*/*", ["dirC/dirD/fileD"])
+ _check(p, "*/", ["dirC/dirD"])
+ _check(p, "", ["dirC", "dirC/dirD"])
+ # gh-91616, a re module regression
+ _check(p, "*.txt", ["dirC/novel.txt"])
+ _check(p, "*.*", ["dirC/novel.txt"])
+
+ @os_helper.skip_unless_symlink
+ def test_rglob_no_follow_symlinks_common(self):
+ def _check(path, glob, expected):
+ actual = {path for path in path.rglob(glob, follow_symlinks=False)}
+ self.assertEqual(actual, { P(BASE, q) for q in expected })
+ P = self.cls
+ p = P(BASE)
+ _check(p, "fileB", ["dirB/fileB"])
+ _check(p, "*/fileA", [])
+ _check(p, "*/fileB", ["dirB/fileB"])
+ _check(p, "file*", ["fileA", "dirB/fileB", "dirC/fileC", "dirC/dirD/fileD", ])
+ _check(p, "*/", ["dirA", "dirB", "dirC", "dirC/dirD", "dirE"])
+ _check(p, "", ["", "dirA", "dirB", "dirC", "dirE", "dirC/dirD"])
+
+ p = P(BASE, "dirC")
+ _check(p, "*", ["dirC/fileC", "dirC/novel.txt",
+ "dirC/dirD", "dirC/dirD/fileD"])
+ _check(p, "file*", ["dirC/fileC", "dirC/dirD/fileD"])
+ _check(p, "*/*", ["dirC/dirD/fileD"])
+ _check(p, "*/", ["dirC/dirD"])
+ _check(p, "", ["dirC", "dirC/dirD"])
+ # gh-91616, a re module regression
+ _check(p, "*.txt", ["dirC/novel.txt"])
+ _check(p, "*.*", ["dirC/novel.txt"])
+
+ @os_helper.skip_unless_symlink
def test_rglob_symlink_loop(self):
# Don't get fooled by symlink loops (Issue #26012).
P = self.cls