From 309efb39dc005a834bb67e9a6f27b6689f00ec9d Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 26 Oct 2023 18:07:06 +0300 Subject: gh-111259: Optimize recursive wildcards in pathlib (GH-111303) Regular expression pattern `(?s:.)` is much faster than `[\s\S]`. --- Lib/pathlib.py | 6 +++--- .../next/Library/2023-10-25-11-13-35.gh-issue-111259.z7ndeA.rst | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-10-25-11-13-35.gh-issue-111259.z7ndeA.rst diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 5c1c71e..e3eecc3 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -124,13 +124,13 @@ def _compile_pattern_lines(pattern_lines, case_sensitive): elif part == '*': part = r'.+' elif part == '**\n': - # '**/' component: we use '[\s\S]' rather than '.' so that path + # '**/' component: we use '(?s:.)' rather than '.' so that path # separators (i.e. newlines) are matched. The trailing '^' ensures # we terminate after a path separator (i.e. on a new line). - part = r'[\s\S]*^' + part = r'(?s:.)*^' elif part == '**': # '**' component. - part = r'[\s\S]*' + part = r'(?s:.)*' elif '**' in part: raise ValueError("Invalid pattern: '**' can only be an entire path component") else: diff --git a/Misc/NEWS.d/next/Library/2023-10-25-11-13-35.gh-issue-111259.z7ndeA.rst b/Misc/NEWS.d/next/Library/2023-10-25-11-13-35.gh-issue-111259.z7ndeA.rst new file mode 100644 index 0000000..4b597f5 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-10-25-11-13-35.gh-issue-111259.z7ndeA.rst @@ -0,0 +1 @@ +Optimize recursive wildcards in :mod:`pathlib`. -- cgit v0.12