summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2023-10-26 15:07:06 (GMT)
committerGitHub <noreply@github.com>2023-10-26 15:07:06 (GMT)
commit309efb39dc005a834bb67e9a6f27b6689f00ec9d (patch)
tree05e3828927ad56ad914c08c40b28b653fa280315
parent67a91f78e4395148afcc33e5cd6f3f0a9623e63a (diff)
downloadcpython-309efb39dc005a834bb67e9a6f27b6689f00ec9d.zip
cpython-309efb39dc005a834bb67e9a6f27b6689f00ec9d.tar.gz
cpython-309efb39dc005a834bb67e9a6f27b6689f00ec9d.tar.bz2
gh-111259: Optimize recursive wildcards in pathlib (GH-111303)
Regular expression pattern `(?s:.)` is much faster than `[\s\S]`.
-rw-r--r--Lib/pathlib.py6
-rw-r--r--Misc/NEWS.d/next/Library/2023-10-25-11-13-35.gh-issue-111259.z7ndeA.rst1
2 files changed, 4 insertions, 3 deletions
diff --git a/Lib/pathlib.py b/Lib/pathlib.py
index 5c1c71e..e3eecc3 100644
--- a/Lib/pathlib.py
+++ b/Lib/pathlib.py
@@ -124,13 +124,13 @@ def _compile_pattern_lines(pattern_lines, case_sensitive):
elif part == '*':
part = r'.+'
elif part == '**\n':
- # '**/' component: we use '[\s\S]' rather than '.' so that path
+ # '**/' component: we use '(?s:.)' rather than '.' so that path
# separators (i.e. newlines) are matched. The trailing '^' ensures
# we terminate after a path separator (i.e. on a new line).
- part = r'[\s\S]*^'
+ part = r'(?s:.)*^'
elif part == '**':
# '**' component.
- part = r'[\s\S]*'
+ part = r'(?s:.)*'
elif '**' in part:
raise ValueError("Invalid pattern: '**' can only be an entire path component")
else:
diff --git a/Misc/NEWS.d/next/Library/2023-10-25-11-13-35.gh-issue-111259.z7ndeA.rst b/Misc/NEWS.d/next/Library/2023-10-25-11-13-35.gh-issue-111259.z7ndeA.rst
new file mode 100644
index 0000000..4b597f5
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-10-25-11-13-35.gh-issue-111259.z7ndeA.rst
@@ -0,0 +1 @@
+Optimize recursive wildcards in :mod:`pathlib`.