summaryrefslogtreecommitdiffstats
path: root/Lib/test
diff options
context:
space:
mode:
authorBarney Gale <barney.gale@gmail.com>2023-05-07 21:12:50 (GMT)
committerGitHub <noreply@github.com>2023-05-07 21:12:50 (GMT)
commitc0ece3dc9791694e960952ba74070efaaa79a676 (patch)
tree17d6baebf2f6a39e129b7e74da2ab56a029e8d16 /Lib/test
parent8d95012c95988dc517db6e09348aab996868699c (diff)
downloadcpython-c0ece3dc9791694e960952ba74070efaaa79a676.zip
cpython-c0ece3dc9791694e960952ba74070efaaa79a676.tar.gz
cpython-c0ece3dc9791694e960952ba74070efaaa79a676.tar.bz2
GH-102613: Improve performance of `pathlib.Path.rglob()` (GH-104244)
Stop de-duplicating results in `_RecursiveWildcardSelector`. A new `_DoubleRecursiveWildcardSelector` class is introduced which performs de-duplication, but this is used _only_ for patterns with multiple non-adjacent `**` segments, such as `path.glob('**/foo/**')`. By avoiding the use of a set, `PurePath.__hash__()` is not called, and so paths do not need to be stringified and case-normalised. Also merge adjacent '**' segments in patterns.
Diffstat (limited to 'Lib/test')
-rw-r--r--Lib/test/test_pathlib.py6
1 files changed, 5 insertions, 1 deletions
diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py
index e25c77f..ee0ef9a 100644
--- a/Lib/test/test_pathlib.py
+++ b/Lib/test/test_pathlib.py
@@ -1853,13 +1853,14 @@ class _BasePathTest(object):
def test_rglob_common(self):
def _check(glob, expected):
- self.assertEqual(set(glob), { P(BASE, q) for q in expected })
+ self.assertEqual(sorted(glob), sorted(P(BASE, q) for q in expected))
P = self.cls
p = P(BASE)
it = p.rglob("fileA")
self.assertIsInstance(it, collections.abc.Iterator)
_check(it, ["fileA"])
_check(p.rglob("fileB"), ["dirB/fileB"])
+ _check(p.rglob("**/fileB"), ["dirB/fileB"])
_check(p.rglob("*/fileA"), [])
if not os_helper.can_symlink():
_check(p.rglob("*/fileB"), ["dirB/fileB"])
@@ -1883,9 +1884,12 @@ class _BasePathTest(object):
_check(p.rglob("*"), ["dirC/fileC", "dirC/novel.txt",
"dirC/dirD", "dirC/dirD/fileD"])
_check(p.rglob("file*"), ["dirC/fileC", "dirC/dirD/fileD"])
+ _check(p.rglob("**/file*"), ["dirC/fileC", "dirC/dirD/fileD"])
+ _check(p.rglob("dir*/**"), ["dirC/dirD"])
_check(p.rglob("*/*"), ["dirC/dirD/fileD"])
_check(p.rglob("*/"), ["dirC/dirD"])
_check(p.rglob(""), ["dirC", "dirC/dirD"])
+ _check(p.rglob("**"), ["dirC", "dirC/dirD"])
# gh-91616, a re module regression
_check(p.rglob("*.txt"), ["dirC/novel.txt"])
_check(p.rglob("*.*"), ["dirC/novel.txt"])