summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2022-03-22 15:27:55 (GMT)
committerGitHub <noreply@github.com>2022-03-22 15:27:55 (GMT)
commit492d4109f4d953c478cb48f17aa32adbb912623b (patch)
treec912b908f3e247d161901c0794bb28753759b796
parent32e77154ddfc514a3144d5912bffdd957246fd6c (diff)
downloadcpython-492d4109f4d953c478cb48f17aa32adbb912623b.zip
cpython-492d4109f4d953c478cb48f17aa32adbb912623b.tar.gz
cpython-492d4109f4d953c478cb48f17aa32adbb912623b.tar.bz2
bpo-42885: Optimize search for regular expressions starting with "\A" or "^" (GH-32021)
Affected functions are re.search(), re.split(), re.findall(), re.finditer() and re.sub().
-rw-r--r--Lib/test/test_re.py15
-rw-r--r--Misc/NEWS.d/next/Library/2022-03-21-08-32-19.bpo-42885.LCnTTp.rst3
-rw-r--r--Modules/sre_lib.h7
3 files changed, 25 insertions, 0 deletions
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index da827ca..fd6db6a 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -5,6 +5,7 @@ import locale
import re
import sre_compile
import string
+import time
import unittest
import warnings
from re import Scanner
@@ -2038,6 +2039,20 @@ class ReTests(unittest.TestCase):
with self.assertRaisesRegex(TypeError, "got 'type'"):
re.search("x*", type)
+ def test_search_anchor_at_beginning(self):
+ s = 'x'*10**7
+ start = time.perf_counter()
+ for p in r'\Ay', r'^y':
+ self.assertIsNone(re.search(p, s))
+ self.assertEqual(re.split(p, s), [s])
+ self.assertEqual(re.findall(p, s), [])
+ self.assertEqual(list(re.finditer(p, s)), [])
+ self.assertEqual(re.sub(p, '', s), s)
+ t = time.perf_counter() - start
+ # Without optimization it takes 1 second on my computer.
+ # With optimization -- 0.0003 seconds.
+ self.assertLess(t, 0.1)
+
def test_possessive_quantifiers(self):
"""Test Possessive Quantifiers
Test quantifiers of the form @+ for some repetition operator @,
diff --git a/Misc/NEWS.d/next/Library/2022-03-21-08-32-19.bpo-42885.LCnTTp.rst b/Misc/NEWS.d/next/Library/2022-03-21-08-32-19.bpo-42885.LCnTTp.rst
new file mode 100644
index 0000000..5f9c1a1
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2022-03-21-08-32-19.bpo-42885.LCnTTp.rst
@@ -0,0 +1,3 @@
+Optimize :func:`re.search`, :func:`re.split`, :func:`re.findall`,
+:func:`re.finditer` and :func:`re.sub` for regular expressions starting with
+``\A`` or ``^``.
diff --git a/Modules/sre_lib.h b/Modules/sre_lib.h
index 956fd3f..a82210f 100644
--- a/Modules/sre_lib.h
+++ b/Modules/sre_lib.h
@@ -1693,6 +1693,13 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
state->start = state->ptr = ptr;
status = SRE(match)(state, pattern, 1);
state->must_advance = 0;
+ if (status == 0 && pattern[0] == SRE_OP_AT &&
+ (pattern[1] == SRE_AT_BEGINNING ||
+ pattern[1] == SRE_AT_BEGINNING_STRING))
+ {
+ state->start = state->ptr = ptr = end;
+ return 0;
+ }
while (status == 0 && ptr < end) {
ptr++;
RESET_CAPTURE_GROUP();