diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2015-06-21 11:06:55 (GMT) |
---|---|---|
committer | Serhiy Storchaka <storchaka@gmail.com> | 2015-06-21 11:06:55 (GMT) |
commit | 66dc4648fcca725bc48b0c8d7030c107dfeda709 (patch) | |
tree | 188c8a43a54c79340c11cc27997cb4e189eb6922 /Modules/sre_lib.h | |
parent | 6ee588f14e23206db3c927653956fd35f6ca857a (diff) | |
download | cpython-66dc4648fcca725bc48b0c8d7030c107dfeda709.zip cpython-66dc4648fcca725bc48b0c8d7030c107dfeda709.tar.gz cpython-66dc4648fcca725bc48b0c8d7030c107dfeda709.tar.bz2 |
Issue #24426: Fast searching optimization in regular expressions now works
for patterns that starts with capturing groups. Fast searching optimization
now can't be disabled at compile time.
Diffstat (limited to 'Modules/sre_lib.h')
-rw-r--r-- | Modules/sre_lib.h | 53 |
1 files changed, 27 insertions, 26 deletions
diff --git a/Modules/sre_lib.h b/Modules/sre_lib.h index 463a908..422f168 100644 --- a/Modules/sre_lib.h +++ b/Modules/sre_lib.h @@ -1248,7 +1248,32 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern) prefix, prefix_len, prefix_skip)); TRACE(("charset = %p\n", charset)); -#if defined(USE_FAST_SEARCH) + if (prefix_len == 1) { + /* pattern starts with a literal character */ + SRE_CHAR c = (SRE_CHAR) prefix[0]; +#if SIZEOF_SRE_CHAR < 4 + if ((SRE_CODE) c != prefix[0]) + return 0; /* literal can't match: doesn't fit in char width */ +#endif + end = (SRE_CHAR *)state->end; + while (ptr < end) { + while (*ptr != c) { + if (++ptr >= end) + return 0; + } + TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr)); + state->start = ptr; + state->ptr = ptr + prefix_skip; + if (flags & SRE_INFO_LITERAL) + return 1; /* we got all of it */ + status = SRE(match)(state, pattern + 2*prefix_skip, 0); + if (status != 0) + return status; + ++ptr; + } + return 0; + } + if (prefix_len > 1) { /* pattern starts with a known prefix. use the overlap table to skip forward as fast as we possibly can */ @@ -1297,32 +1322,8 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern) } return 0; } -#endif - if (pattern[0] == SRE_OP_LITERAL) { - /* pattern starts with a literal character. this is used - for short prefixes, and if fast search is disabled */ - SRE_CHAR c = (SRE_CHAR) pattern[1]; -#if SIZEOF_SRE_CHAR < 4 - if ((SRE_CODE) c != pattern[1]) - return 0; /* literal can't match: doesn't fit in char width */ -#endif - end = (SRE_CHAR *)state->end; - while (ptr < end) { - while (*ptr != c) { - if (++ptr >= end) - return 0; - } - TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr)); - state->start = ptr; - state->ptr = ++ptr; - if (flags & SRE_INFO_LITERAL) - return 1; /* we got all of it */ - status = SRE(match)(state, pattern + 2, 0); - if (status != 0) - break; - } - } else if (charset) { + if (charset) { /* pattern starts with a character from a known set */ end = (SRE_CHAR *)state->end; for (;;) { |