summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBarney Gale <barney.gale@gmail.com>2024-11-04 19:29:57 (GMT)
committerGitHub <noreply@github.com>2024-11-04 19:29:57 (GMT)
commit9b7294c3a560f43f1e26a0f48c258829076d6464 (patch)
treebc6936818bae49242cebd431545cd380e853149a
parent2e95c5ba3bf7e5004c7e2304afda4a8f8e2443a7 (diff)
downloadcpython-9b7294c3a560f43f1e26a0f48c258829076d6464.zip
cpython-9b7294c3a560f43f1e26a0f48c258829076d6464.tar.gz
cpython-9b7294c3a560f43f1e26a0f48c258829076d6464.tar.bz2
GH-126363: Speed up pattern parsing in `pathlib.Path.glob()` (#126364)
The implementation of `Path.glob()` does rather a hacky thing: it calls `self.with_segments()` to convert the given pattern to a `Path` object, and then peeks at the private `_raw_path` attribute to see if pathlib removed a trailing slash from the pattern. In this patch, we make `glob()` use a new `_parse_pattern()` classmethod that splits the pattern into parts while preserving information about any trailing slash. This skips the cost of creating a `Path` object, and avoids some path anchor normalization, which makes `Path.glob()` slightly faster. But mostly it's about making the code less naughty. Co-authored-by: Tomas R. <tomas.roun8@gmail.com>
-rw-r--r--Lib/pathlib/_local.py41
-rw-r--r--Misc/NEWS.d/next/Library/2024-11-03-14-43-51.gh-issue-126363.Xus7vU.rst2
2 files changed, 29 insertions, 14 deletions
diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py
index ef072b8..99474e1 100644
--- a/Lib/pathlib/_local.py
+++ b/Lib/pathlib/_local.py
@@ -274,6 +274,31 @@ class PurePath(PurePathBase):
root = sep
return drv, root, [x for x in rel.split(sep) if x and x != '.']
+ @classmethod
+ def _parse_pattern(cls, pattern):
+ """Parse a glob pattern to a list of parts. This is much like
+ _parse_path, except:
+
+ - Rather than normalizing and returning the drive and root, we raise
+ NotImplementedError if either are present.
+ - If the path has no real parts, we raise ValueError.
+ - If the path ends in a slash, then a final empty part is added.
+ """
+ drv, root, rel = cls.parser.splitroot(pattern)
+ if root or drv:
+ raise NotImplementedError("Non-relative patterns are unsupported")
+ sep = cls.parser.sep
+ altsep = cls.parser.altsep
+ if altsep:
+ rel = rel.replace(altsep, sep)
+ parts = [x for x in rel.split(sep) if x and x != '.']
+ if not parts:
+ raise ValueError(f"Unacceptable pattern: {str(pattern)!r}")
+ elif rel.endswith(sep):
+ # GH-65238: preserve trailing slash in glob patterns.
+ parts.append('')
+ return parts
+
@property
def _raw_path(self):
"""The joined but unnormalized path."""
@@ -641,17 +666,7 @@ class Path(PathBase, PurePath):
kind, including directories) matching the given relative pattern.
"""
sys.audit("pathlib.Path.glob", self, pattern)
- if not isinstance(pattern, PurePath):
- pattern = self.with_segments(pattern)
- if pattern.anchor:
- raise NotImplementedError("Non-relative patterns are unsupported")
- parts = pattern._tail.copy()
- if not parts:
- raise ValueError("Unacceptable pattern: {!r}".format(pattern))
- raw = pattern._raw_path
- if raw[-1] in (self.parser.sep, self.parser.altsep):
- # GH-65238: pathlib doesn't preserve trailing slash. Add it back.
- parts.append('')
+ parts = self._parse_pattern(pattern)
select = self._glob_selector(parts[::-1], case_sensitive, recurse_symlinks)
root = str(self)
paths = select(root)
@@ -672,9 +687,7 @@ class Path(PathBase, PurePath):
this subtree.
"""
sys.audit("pathlib.Path.rglob", self, pattern)
- if not isinstance(pattern, PurePath):
- pattern = self.with_segments(pattern)
- pattern = '**' / pattern
+ pattern = self.parser.join('**', pattern)
return self.glob(pattern, case_sensitive=case_sensitive, recurse_symlinks=recurse_symlinks)
def walk(self, top_down=True, on_error=None, follow_symlinks=False):
diff --git a/Misc/NEWS.d/next/Library/2024-11-03-14-43-51.gh-issue-126363.Xus7vU.rst b/Misc/NEWS.d/next/Library/2024-11-03-14-43-51.gh-issue-126363.Xus7vU.rst
new file mode 100644
index 0000000..20fea9b
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-11-03-14-43-51.gh-issue-126363.Xus7vU.rst
@@ -0,0 +1,2 @@
+Speed up pattern parsing in :meth:`pathlib.Path.glob` by skipping creation
+of a :class:`pathlib.Path` object for the pattern.