diff options
author | Barney Gale <barney.gale@gmail.com> | 2024-04-12 21:19:21 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-12 21:19:21 (GMT) |
commit | 0eb52f5f266d9e0a662f28a4d2dfef8c746cf96e (patch) | |
tree | 2cccc9ca7a407f88e57b1bf1b7406a1c73005ce2 /Lib/glob.py | |
parent | 069de14cb948f56b37e507f367b99c5563d3685e (diff) | |
download | cpython-0eb52f5f266d9e0a662f28a4d2dfef8c746cf96e.zip cpython-0eb52f5f266d9e0a662f28a4d2dfef8c746cf96e.tar.gz cpython-0eb52f5f266d9e0a662f28a4d2dfef8c746cf96e.tar.bz2 |
GH-115060: Speed up `pathlib.Path.glob()` by not scanning literal parts (#117732)
Don't bother calling `os.scandir()` to scan for literal pattern segments,
like `foo` in `foo/*.py`. Instead, append the segment(s) as-is and call
through to the next selector with `exists=False`, which signals that the
path might not exist. Subsequent selectors will call `os.scandir()` or
`os.lstat()` to filter out missing paths as needed.
Diffstat (limited to 'Lib/glob.py')
-rw-r--r-- | Lib/glob.py | 22 |
1 files changed, 21 insertions, 1 deletions
diff --git a/Lib/glob.py b/Lib/glob.py index b1d2681..72cf222 100644 --- a/Lib/glob.py +++ b/Lib/glob.py @@ -331,9 +331,10 @@ class _Globber: """Class providing shell-style pattern matching and globbing. """ - def __init__(self, sep, case_sensitive, recursive=False): + def __init__(self, sep, case_sensitive, case_pedantic=False, recursive=False): self.sep = sep self.case_sensitive = case_sensitive + self.case_pedantic = case_pedantic self.recursive = recursive # Low-level methods @@ -373,6 +374,8 @@ class _Globber: selector = self.recursive_selector elif part in _special_parts: selector = self.special_selector + elif not self.case_pedantic and magic_check.search(part) is None: + selector = self.literal_selector else: selector = self.wildcard_selector return selector(part, parts) @@ -387,6 +390,23 @@ class _Globber: return select_next(path, exists) return select_special + def literal_selector(self, part, parts): + """Returns a function that selects a literal descendant of a path. + """ + + # Optimization: consume and join any subsequent literal parts here, + # rather than leaving them for the next selector. This reduces the + # number of string concatenation operations and calls to add_slash(). + while parts and magic_check.search(parts[-1]) is None: + part += self.sep + parts.pop() + + select_next = self.selector(parts) + + def select_literal(path, exists=False): + path = self.concat_path(self.add_slash(path), part) + return select_next(path, exists=False) + return select_literal + def wildcard_selector(self, part, parts): """Returns a function that selects direct children of a given path, filtering by pattern. |