summaryrefslogtreecommitdiffstats
path: root/Lib/glob.py
diff options
context:
space:
mode:
authorBarney Gale <barney.gale@gmail.com>2024-04-12 21:19:21 (GMT)
committerGitHub <noreply@github.com>2024-04-12 21:19:21 (GMT)
commit0eb52f5f266d9e0a662f28a4d2dfef8c746cf96e (patch)
tree2cccc9ca7a407f88e57b1bf1b7406a1c73005ce2 /Lib/glob.py
parent069de14cb948f56b37e507f367b99c5563d3685e (diff)
downloadcpython-0eb52f5f266d9e0a662f28a4d2dfef8c746cf96e.zip
cpython-0eb52f5f266d9e0a662f28a4d2dfef8c746cf96e.tar.gz
cpython-0eb52f5f266d9e0a662f28a4d2dfef8c746cf96e.tar.bz2
GH-115060: Speed up `pathlib.Path.glob()` by not scanning literal parts (#117732)
Don't bother calling `os.scandir()` to scan for literal pattern segments, like `foo` in `foo/*.py`. Instead, append the segment(s) as-is and call through to the next selector with `exists=False`, which signals that the path might not exist. Subsequent selectors will call `os.scandir()` or `os.lstat()` to filter out missing paths as needed.
Diffstat (limited to 'Lib/glob.py')
-rw-r--r--Lib/glob.py22
1 files changed, 21 insertions, 1 deletions
diff --git a/Lib/glob.py b/Lib/glob.py
index b1d2681..72cf222 100644
--- a/Lib/glob.py
+++ b/Lib/glob.py
@@ -331,9 +331,10 @@ class _Globber:
"""Class providing shell-style pattern matching and globbing.
"""
- def __init__(self, sep, case_sensitive, recursive=False):
+ def __init__(self, sep, case_sensitive, case_pedantic=False, recursive=False):
self.sep = sep
self.case_sensitive = case_sensitive
+ self.case_pedantic = case_pedantic
self.recursive = recursive
# Low-level methods
@@ -373,6 +374,8 @@ class _Globber:
selector = self.recursive_selector
elif part in _special_parts:
selector = self.special_selector
+ elif not self.case_pedantic and magic_check.search(part) is None:
+ selector = self.literal_selector
else:
selector = self.wildcard_selector
return selector(part, parts)
@@ -387,6 +390,23 @@ class _Globber:
return select_next(path, exists)
return select_special
+ def literal_selector(self, part, parts):
+ """Returns a function that selects a literal descendant of a path.
+ """
+
+ # Optimization: consume and join any subsequent literal parts here,
+ # rather than leaving them for the next selector. This reduces the
+ # number of string concatenation operations and calls to add_slash().
+ while parts and magic_check.search(parts[-1]) is None:
+ part += self.sep + parts.pop()
+
+ select_next = self.selector(parts)
+
+ def select_literal(path, exists=False):
+ path = self.concat_path(self.add_slash(path), part)
+ return select_next(path, exists=False)
+ return select_literal
+
def wildcard_selector(self, part, parts):
"""Returns a function that selects direct children of a given path,
filtering by pattern.