diff options
author | Barney Gale <barney.gale@gmail.com> | 2023-05-07 21:12:50 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-05-07 21:12:50 (GMT) |
commit | c0ece3dc9791694e960952ba74070efaaa79a676 (patch) | |
tree | 17d6baebf2f6a39e129b7e74da2ab56a029e8d16 /Lib/test | |
parent | 8d95012c95988dc517db6e09348aab996868699c (diff) | |
download | cpython-c0ece3dc9791694e960952ba74070efaaa79a676.zip cpython-c0ece3dc9791694e960952ba74070efaaa79a676.tar.gz cpython-c0ece3dc9791694e960952ba74070efaaa79a676.tar.bz2 |
GH-102613: Improve performance of `pathlib.Path.rglob()` (GH-104244)
Stop de-duplicating results in `_RecursiveWildcardSelector`. A new
`_DoubleRecursiveWildcardSelector` class is introduced which performs
de-duplication, but this is used _only_ for patterns with multiple
non-adjacent `**` segments, such as `path.glob('**/foo/**')`. By avoiding
the use of a set, `PurePath.__hash__()` is not called, and so paths do not
need to be stringified and case-normalised.
Also merge adjacent '**' segments in patterns.
Diffstat (limited to 'Lib/test')
-rw-r--r-- | Lib/test/test_pathlib.py | 6 |
1 files changed, 5 insertions, 1 deletions
diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index e25c77f..ee0ef9a 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -1853,13 +1853,14 @@ class _BasePathTest(object): def test_rglob_common(self): def _check(glob, expected): - self.assertEqual(set(glob), { P(BASE, q) for q in expected }) + self.assertEqual(sorted(glob), sorted(P(BASE, q) for q in expected)) P = self.cls p = P(BASE) it = p.rglob("fileA") self.assertIsInstance(it, collections.abc.Iterator) _check(it, ["fileA"]) _check(p.rglob("fileB"), ["dirB/fileB"]) + _check(p.rglob("**/fileB"), ["dirB/fileB"]) _check(p.rglob("*/fileA"), []) if not os_helper.can_symlink(): _check(p.rglob("*/fileB"), ["dirB/fileB"]) @@ -1883,9 +1884,12 @@ class _BasePathTest(object): _check(p.rglob("*"), ["dirC/fileC", "dirC/novel.txt", "dirC/dirD", "dirC/dirD/fileD"]) _check(p.rglob("file*"), ["dirC/fileC", "dirC/dirD/fileD"]) + _check(p.rglob("**/file*"), ["dirC/fileC", "dirC/dirD/fileD"]) + _check(p.rglob("dir*/**"), ["dirC/dirD"]) _check(p.rglob("*/*"), ["dirC/dirD/fileD"]) _check(p.rglob("*/"), ["dirC/dirD"]) _check(p.rglob(""), ["dirC", "dirC/dirD"]) + _check(p.rglob("**"), ["dirC", "dirC/dirD"]) # gh-91616, a re module regression _check(p.rglob("*.txt"), ["dirC/novel.txt"]) _check(p.rglob("*.*"), ["dirC/novel.txt"]) |