diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2022-03-22 15:27:55 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-03-22 15:27:55 (GMT) |
commit | 492d4109f4d953c478cb48f17aa32adbb912623b (patch) | |
tree | c912b908f3e247d161901c0794bb28753759b796 | |
parent | 32e77154ddfc514a3144d5912bffdd957246fd6c (diff) | |
download | cpython-492d4109f4d953c478cb48f17aa32adbb912623b.zip cpython-492d4109f4d953c478cb48f17aa32adbb912623b.tar.gz cpython-492d4109f4d953c478cb48f17aa32adbb912623b.tar.bz2 |
bpo-42885: Optimize search for regular expressions starting with "\A" or "^" (GH-32021)
Affected functions are re.search(), re.split(), re.findall(), re.finditer()
and re.sub().
-rw-r--r-- | Lib/test/test_re.py | 15 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Library/2022-03-21-08-32-19.bpo-42885.LCnTTp.rst | 3 | ||||
-rw-r--r-- | Modules/sre_lib.h | 7 |
3 files changed, 25 insertions, 0 deletions
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index da827ca..fd6db6a 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -5,6 +5,7 @@ import locale import re import sre_compile import string +import time import unittest import warnings from re import Scanner @@ -2038,6 +2039,20 @@ class ReTests(unittest.TestCase): with self.assertRaisesRegex(TypeError, "got 'type'"): re.search("x*", type) + def test_search_anchor_at_beginning(self): + s = 'x'*10**7 + start = time.perf_counter() + for p in r'\Ay', r'^y': + self.assertIsNone(re.search(p, s)) + self.assertEqual(re.split(p, s), [s]) + self.assertEqual(re.findall(p, s), []) + self.assertEqual(list(re.finditer(p, s)), []) + self.assertEqual(re.sub(p, '', s), s) + t = time.perf_counter() - start + # Without optimization it takes 1 second on my computer. + # With optimization -- 0.0003 seconds. + self.assertLess(t, 0.1) + def test_possessive_quantifiers(self): """Test Possessive Quantifiers Test quantifiers of the form @+ for some repetition operator @, diff --git a/Misc/NEWS.d/next/Library/2022-03-21-08-32-19.bpo-42885.LCnTTp.rst b/Misc/NEWS.d/next/Library/2022-03-21-08-32-19.bpo-42885.LCnTTp.rst new file mode 100644 index 0000000..5f9c1a1 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-03-21-08-32-19.bpo-42885.LCnTTp.rst @@ -0,0 +1,3 @@ +Optimize :func:`re.search`, :func:`re.split`, :func:`re.findall`, +:func:`re.finditer` and :func:`re.sub` for regular expressions starting with +``\A`` or ``^``. diff --git a/Modules/sre_lib.h b/Modules/sre_lib.h index 956fd3f..a82210f 100644 --- a/Modules/sre_lib.h +++ b/Modules/sre_lib.h @@ -1693,6 +1693,13 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern) state->start = state->ptr = ptr; status = SRE(match)(state, pattern, 1); state->must_advance = 0; + if (status == 0 && pattern[0] == SRE_OP_AT && + (pattern[1] == SRE_AT_BEGINNING || + pattern[1] == SRE_AT_BEGINNING_STRING)) + { + state->start = state->ptr = ptr = end; + return 0; + } while (status == 0 && ptr < end) { ptr++; RESET_CAPTURE_GROUP(); |