summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Lib/pathlib.py54
-rw-r--r--Lib/test/test_pathlib.py6
-rw-r--r--Misc/NEWS.d/next/Library/2023-05-06-20-37-46.gh-issue-102613.QZG9iX.rst3
3 files changed, 45 insertions, 18 deletions
diff --git a/Lib/pathlib.py b/Lib/pathlib.py
index 68255aa..20ec1ce 100644
--- a/Lib/pathlib.py
+++ b/Lib/pathlib.py
@@ -64,17 +64,25 @@ def _is_case_sensitive(flavour):
@functools.lru_cache()
def _make_selector(pattern_parts, flavour, case_sensitive):
pat = pattern_parts[0]
- child_parts = pattern_parts[1:]
if not pat:
return _TerminatingSelector()
if pat == '**':
- cls = _RecursiveWildcardSelector
- elif pat == '..':
- cls = _ParentSelector
- elif '**' in pat:
- raise ValueError("Invalid pattern: '**' can only be an entire path component")
+ child_parts_idx = 1
+ while child_parts_idx < len(pattern_parts) and pattern_parts[child_parts_idx] == '**':
+ child_parts_idx += 1
+ child_parts = pattern_parts[child_parts_idx:]
+ if '**' in child_parts:
+ cls = _DoubleRecursiveWildcardSelector
+ else:
+ cls = _RecursiveWildcardSelector
else:
- cls = _WildcardSelector
+ child_parts = pattern_parts[1:]
+ if pat == '..':
+ cls = _ParentSelector
+ elif '**' in pat:
+ raise ValueError("Invalid pattern: '**' can only be an entire path component")
+ else:
+ cls = _WildcardSelector
return cls(pat, child_parts, flavour, case_sensitive)
@@ -183,20 +191,32 @@ class _RecursiveWildcardSelector(_Selector):
def _select_from(self, parent_path, scandir):
try:
- yielded = set()
- try:
- successor_select = self.successor._select_from
- for starting_point in self._iterate_directories(parent_path, scandir):
- for p in successor_select(starting_point, scandir):
- if p not in yielded:
- yield p
- yielded.add(p)
- finally:
- yielded.clear()
+ successor_select = self.successor._select_from
+ for starting_point in self._iterate_directories(parent_path, scandir):
+ for p in successor_select(starting_point, scandir):
+ yield p
except PermissionError:
return
+class _DoubleRecursiveWildcardSelector(_RecursiveWildcardSelector):
+ """
+ Like _RecursiveWildcardSelector, but also de-duplicates results from
+ successive selectors. This is necessary if the pattern contains
+ multiple non-adjacent '**' segments.
+ """
+
+ def _select_from(self, parent_path, scandir):
+ yielded = set()
+ try:
+ for p in super()._select_from(parent_path, scandir):
+ if p not in yielded:
+ yield p
+ yielded.add(p)
+ finally:
+ yielded.clear()
+
+
#
# Public API
#
diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py
index e25c77f..ee0ef9a 100644
--- a/Lib/test/test_pathlib.py
+++ b/Lib/test/test_pathlib.py
@@ -1853,13 +1853,14 @@ class _BasePathTest(object):
def test_rglob_common(self):
def _check(glob, expected):
- self.assertEqual(set(glob), { P(BASE, q) for q in expected })
+ self.assertEqual(sorted(glob), sorted(P(BASE, q) for q in expected))
P = self.cls
p = P(BASE)
it = p.rglob("fileA")
self.assertIsInstance(it, collections.abc.Iterator)
_check(it, ["fileA"])
_check(p.rglob("fileB"), ["dirB/fileB"])
+ _check(p.rglob("**/fileB"), ["dirB/fileB"])
_check(p.rglob("*/fileA"), [])
if not os_helper.can_symlink():
_check(p.rglob("*/fileB"), ["dirB/fileB"])
@@ -1883,9 +1884,12 @@ class _BasePathTest(object):
_check(p.rglob("*"), ["dirC/fileC", "dirC/novel.txt",
"dirC/dirD", "dirC/dirD/fileD"])
_check(p.rglob("file*"), ["dirC/fileC", "dirC/dirD/fileD"])
+ _check(p.rglob("**/file*"), ["dirC/fileC", "dirC/dirD/fileD"])
+ _check(p.rglob("dir*/**"), ["dirC/dirD"])
_check(p.rglob("*/*"), ["dirC/dirD/fileD"])
_check(p.rglob("*/"), ["dirC/dirD"])
_check(p.rglob(""), ["dirC", "dirC/dirD"])
+ _check(p.rglob("**"), ["dirC", "dirC/dirD"])
# gh-91616, a re module regression
_check(p.rglob("*.txt"), ["dirC/novel.txt"])
_check(p.rglob("*.*"), ["dirC/novel.txt"])
diff --git a/Misc/NEWS.d/next/Library/2023-05-06-20-37-46.gh-issue-102613.QZG9iX.rst b/Misc/NEWS.d/next/Library/2023-05-06-20-37-46.gh-issue-102613.QZG9iX.rst
new file mode 100644
index 0000000..01f8b94
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-05-06-20-37-46.gh-issue-102613.QZG9iX.rst
@@ -0,0 +1,3 @@
+Improve performance of :meth:`pathlib.Path.glob` when expanding recursive
+wildcards ("``**``") by merging adjacent wildcards and de-duplicating
+results only when necessary.