diff options
author | Barney Gale <barney.gale@gmail.com> | 2024-11-04 19:29:57 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-11-04 19:29:57 (GMT) |
commit | 9b7294c3a560f43f1e26a0f48c258829076d6464 (patch) | |
tree | bc6936818bae49242cebd431545cd380e853149a | |
parent | 2e95c5ba3bf7e5004c7e2304afda4a8f8e2443a7 (diff) | |
download | cpython-9b7294c3a560f43f1e26a0f48c258829076d6464.zip cpython-9b7294c3a560f43f1e26a0f48c258829076d6464.tar.gz cpython-9b7294c3a560f43f1e26a0f48c258829076d6464.tar.bz2 |
GH-126363: Speed up pattern parsing in `pathlib.Path.glob()` (#126364)
The implementation of `Path.glob()` does rather a hacky thing: it calls
`self.with_segments()` to convert the given pattern to a `Path` object, and
then peeks at the private `_raw_path` attribute to see if pathlib removed a
trailing slash from the pattern.
In this patch, we make `glob()` use a new `_parse_pattern()` classmethod
that splits the pattern into parts while preserving information about any
trailing slash. This skips the cost of creating a `Path` object, and avoids
some path anchor normalization, which makes `Path.glob()` slightly faster.
But mostly it's about making the code less naughty.
Co-authored-by: Tomas R. <tomas.roun8@gmail.com>
-rw-r--r-- | Lib/pathlib/_local.py | 41 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Library/2024-11-03-14-43-51.gh-issue-126363.Xus7vU.rst | 2 |
2 files changed, 29 insertions, 14 deletions
diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index ef072b8..99474e1 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -274,6 +274,31 @@ class PurePath(PurePathBase): root = sep return drv, root, [x for x in rel.split(sep) if x and x != '.'] + @classmethod + def _parse_pattern(cls, pattern): + """Parse a glob pattern to a list of parts. This is much like + _parse_path, except: + + - Rather than normalizing and returning the drive and root, we raise + NotImplementedError if either are present. + - If the path has no real parts, we raise ValueError. + - If the path ends in a slash, then a final empty part is added. + """ + drv, root, rel = cls.parser.splitroot(pattern) + if root or drv: + raise NotImplementedError("Non-relative patterns are unsupported") + sep = cls.parser.sep + altsep = cls.parser.altsep + if altsep: + rel = rel.replace(altsep, sep) + parts = [x for x in rel.split(sep) if x and x != '.'] + if not parts: + raise ValueError(f"Unacceptable pattern: {str(pattern)!r}") + elif rel.endswith(sep): + # GH-65238: preserve trailing slash in glob patterns. + parts.append('') + return parts + @property def _raw_path(self): """The joined but unnormalized path.""" @@ -641,17 +666,7 @@ class Path(PathBase, PurePath): kind, including directories) matching the given relative pattern. """ sys.audit("pathlib.Path.glob", self, pattern) - if not isinstance(pattern, PurePath): - pattern = self.with_segments(pattern) - if pattern.anchor: - raise NotImplementedError("Non-relative patterns are unsupported") - parts = pattern._tail.copy() - if not parts: - raise ValueError("Unacceptable pattern: {!r}".format(pattern)) - raw = pattern._raw_path - if raw[-1] in (self.parser.sep, self.parser.altsep): - # GH-65238: pathlib doesn't preserve trailing slash. Add it back. - parts.append('') + parts = self._parse_pattern(pattern) select = self._glob_selector(parts[::-1], case_sensitive, recurse_symlinks) root = str(self) paths = select(root) @@ -672,9 +687,7 @@ class Path(PathBase, PurePath): this subtree. """ sys.audit("pathlib.Path.rglob", self, pattern) - if not isinstance(pattern, PurePath): - pattern = self.with_segments(pattern) - pattern = '**' / pattern + pattern = self.parser.join('**', pattern) return self.glob(pattern, case_sensitive=case_sensitive, recurse_symlinks=recurse_symlinks) def walk(self, top_down=True, on_error=None, follow_symlinks=False): diff --git a/Misc/NEWS.d/next/Library/2024-11-03-14-43-51.gh-issue-126363.Xus7vU.rst b/Misc/NEWS.d/next/Library/2024-11-03-14-43-51.gh-issue-126363.Xus7vU.rst new file mode 100644 index 0000000..20fea9b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-03-14-43-51.gh-issue-126363.Xus7vU.rst @@ -0,0 +1,2 @@ +Speed up pattern parsing in :meth:`pathlib.Path.glob` by skipping creation +of a :class:`pathlib.Path` object for the pattern. |