summaryrefslogtreecommitdiffstats
path: root/Modules/sre_lib.h
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2015-06-21 11:06:55 (GMT)
committerSerhiy Storchaka <storchaka@gmail.com>2015-06-21 11:06:55 (GMT)
commit66dc4648fcca725bc48b0c8d7030c107dfeda709 (patch)
tree188c8a43a54c79340c11cc27997cb4e189eb6922 /Modules/sre_lib.h
parent6ee588f14e23206db3c927653956fd35f6ca857a (diff)
downloadcpython-66dc4648fcca725bc48b0c8d7030c107dfeda709.zip
cpython-66dc4648fcca725bc48b0c8d7030c107dfeda709.tar.gz
cpython-66dc4648fcca725bc48b0c8d7030c107dfeda709.tar.bz2
Issue #24426: Fast searching optimization in regular expressions now works
for patterns that starts with capturing groups. Fast searching optimization now can't be disabled at compile time.
Diffstat (limited to 'Modules/sre_lib.h')
-rw-r--r--Modules/sre_lib.h53
1 files changed, 27 insertions, 26 deletions
diff --git a/Modules/sre_lib.h b/Modules/sre_lib.h
index 463a908..422f168 100644
--- a/Modules/sre_lib.h
+++ b/Modules/sre_lib.h
@@ -1248,7 +1248,32 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
prefix, prefix_len, prefix_skip));
TRACE(("charset = %p\n", charset));
-#if defined(USE_FAST_SEARCH)
+ if (prefix_len == 1) {
+ /* pattern starts with a literal character */
+ SRE_CHAR c = (SRE_CHAR) prefix[0];
+#if SIZEOF_SRE_CHAR < 4
+ if ((SRE_CODE) c != prefix[0])
+ return 0; /* literal can't match: doesn't fit in char width */
+#endif
+ end = (SRE_CHAR *)state->end;
+ while (ptr < end) {
+ while (*ptr != c) {
+ if (++ptr >= end)
+ return 0;
+ }
+ TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
+ state->start = ptr;
+ state->ptr = ptr + prefix_skip;
+ if (flags & SRE_INFO_LITERAL)
+ return 1; /* we got all of it */
+ status = SRE(match)(state, pattern + 2*prefix_skip, 0);
+ if (status != 0)
+ return status;
+ ++ptr;
+ }
+ return 0;
+ }
+
if (prefix_len > 1) {
/* pattern starts with a known prefix. use the overlap
table to skip forward as fast as we possibly can */
@@ -1297,32 +1322,8 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
}
return 0;
}
-#endif
- if (pattern[0] == SRE_OP_LITERAL) {
- /* pattern starts with a literal character. this is used
- for short prefixes, and if fast search is disabled */
- SRE_CHAR c = (SRE_CHAR) pattern[1];
-#if SIZEOF_SRE_CHAR < 4
- if ((SRE_CODE) c != pattern[1])
- return 0; /* literal can't match: doesn't fit in char width */
-#endif
- end = (SRE_CHAR *)state->end;
- while (ptr < end) {
- while (*ptr != c) {
- if (++ptr >= end)
- return 0;
- }
- TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
- state->start = ptr;
- state->ptr = ++ptr;
- if (flags & SRE_INFO_LITERAL)
- return 1; /* we got all of it */
- status = SRE(match)(state, pattern + 2, 0);
- if (status != 0)
- break;
- }
- } else if (charset) {
+ if (charset) {
/* pattern starts with a character from a known set */
end = (SRE_CHAR *)state->end;
for (;;) {