summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBrandt Bucher <brandtbucher@microsoft.com>2022-04-15 16:26:44 (GMT)
committerGitHub <noreply@github.com>2022-04-15 16:26:44 (GMT)
commit1b34b5687b20a54cff2158c8660201e7377dec21 (patch)
tree44790f7a643402046501c277d704266ca50e847b
parentd104f4d21f735693ea93fe65ea4b4e1aa1779343 (diff)
downloadcpython-1b34b5687b20a54cff2158c8660201e7377dec21.zip
cpython-1b34b5687b20a54cff2158c8660201e7377dec21.tar.gz
cpython-1b34b5687b20a54cff2158c8660201e7377dec21.tar.bz2
gh-91404: Use computed gotos and reduce indirection in re (#91495)
-rw-r--r--Doc/whatsnew/3.11.rst6
-rw-r--r--Makefile.pre.in7
-rw-r--r--Misc/NEWS.d/next/Library/2022-04-12-19-08-13.gh-issue-91404.zjqYHo.rst3
-rw-r--r--Modules/_sre/sre_lib.h736
-rw-r--r--Modules/_sre/sre_targets.h59
-rwxr-xr-xTools/scripts/generate_sre_constants.py20
6 files changed, 483 insertions, 348 deletions
diff --git a/Doc/whatsnew/3.11.rst b/Doc/whatsnew/3.11.rst
index 9f7f6f5..a5a5268 100644
--- a/Doc/whatsnew/3.11.rst
+++ b/Doc/whatsnew/3.11.rst
@@ -520,6 +520,12 @@ Optimizations
becomes 272 bytes from 352 bytes on 64bit platform.
(Contributed by Inada Naoki in :issue:`46845`.)
+* :mod:`re`'s regular expression matching engine has been partially refactored,
+ and now uses computed gotos (or "threaded code") on supported platforms. As a
+ result, Python 3.11 executes the `pyperformance regular expression benchmarks
+ <https://pyperformance.readthedocs.io/benchmarks.html#regex-dna>`_ up to 10%
+ faster than Python 3.10.
+
Faster CPython
==============
diff --git a/Makefile.pre.in b/Makefile.pre.in
index d89886d..f6c8c72 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -1351,11 +1351,12 @@ regen-stdlib-module-names: build_all Programs/_testembed
$(UPDATE_FILE) $(srcdir)/Python/stdlib_module_names.h $(srcdir)/Python/stdlib_module_names.h.new
regen-sre:
- # Regenerate Modules/_sre/sre_constants.h from Lib/re/_constants.py
- # using Tools/scripts/generate_sre_constants.py
+ # Regenerate Modules/_sre/sre_constants.h and Modules/_sre/sre_targets.h
+ # from Lib/re/_constants.py using Tools/scripts/generate_sre_constants.py
$(PYTHON_FOR_REGEN) $(srcdir)/Tools/scripts/generate_sre_constants.py \
$(srcdir)/Lib/re/_constants.py \
- $(srcdir)/Modules/_sre/sre_constants.h
+ $(srcdir)/Modules/_sre/sre_constants.h \
+ $(srcdir)/Modules/_sre/sre_targets.h
Python/compile.o Python/symtable.o Python/ast_unparse.o Python/ast.o Python/future.o: $(srcdir)/Include/internal/pycore_ast.h
diff --git a/Misc/NEWS.d/next/Library/2022-04-12-19-08-13.gh-issue-91404.zjqYHo.rst b/Misc/NEWS.d/next/Library/2022-04-12-19-08-13.gh-issue-91404.zjqYHo.rst
new file mode 100644
index 0000000..58464fc
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2022-04-12-19-08-13.gh-issue-91404.zjqYHo.rst
@@ -0,0 +1,3 @@
+Improve the performance of :mod:`re` matching by using computed gotos (or
+"threaded code") on supported platforms and removing expensive pointer
+indirections.
diff --git a/Modules/_sre/sre_lib.h b/Modules/_sre/sre_lib.h
index 34cd055..3472e65 100644
--- a/Modules/_sre/sre_lib.h
+++ b/Modules/_sre/sre_lib.h
@@ -485,16 +485,20 @@ do { \
#define JUMP_ATOMIC_GROUP 16
#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \
+ ctx->pattern = pattern; \
+ ctx->ptr = ptr; \
DATA_ALLOC(SRE(match_context), nextctx); \
nextctx->last_ctx_pos = ctx_pos; \
nextctx->jump = jumpvalue; \
nextctx->pattern = nextpattern; \
nextctx->toplevel = toplevel_; \
+ pattern = nextpattern; \
ctx_pos = alloc_pos; \
ctx = nextctx; \
goto entrance; \
jumplabel: \
- while (0) /* gcc doesn't like labels at end of scopes */ \
+ pattern = ctx->pattern; \
+ ptr = ctx->ptr;
#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel)
@@ -517,6 +521,36 @@ typedef struct {
int toplevel;
} SRE(match_context);
+#define MAYBE_CHECK_SIGNALS \
+ do { \
+ if ((0 == (++sigcount & 0xfff)) && PyErr_CheckSignals()) { \
+ RETURN_ERROR(SRE_ERROR_INTERRUPTED); \
+ } \
+ } while (0)
+
+#ifdef HAVE_COMPUTED_GOTOS
+ #ifndef USE_COMPUTED_GOTOS
+ #define USE_COMPUTED_GOTOS 1
+ #endif
+#elif defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
+ #error "Computed gotos are not supported on this compiler."
+#else
+ #undef USE_COMPUTED_GOTOS
+ #define USE_COMPUTED_GOTOS 0
+#endif
+
+#if USE_COMPUTED_GOTOS
+ #define TARGET(OP) TARGET_ ## OP
+ #define DISPATCH \
+ do { \
+ MAYBE_CHECK_SIGNALS; \
+ goto *sre_targets[*pattern++]; \
+ } while (0)
+#else
+ #define TARGET(OP) case OP
+ #define DISPATCH goto dispatch
+#endif
+
/* check if string matches the given pattern. returns <0 for
error, 0 for failure, and 1 for success */
LOCAL(Py_ssize_t)
@@ -536,38 +570,44 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
DATA_ALLOC(SRE(match_context), ctx);
ctx->last_ctx_pos = -1;
ctx->jump = JUMP_NONE;
- ctx->pattern = pattern;
ctx->toplevel = toplevel;
ctx_pos = alloc_pos;
+#if USE_COMPUTED_GOTOS
+#include "sre_targets.h"
+#endif
+
entrance:
- ctx->ptr = (SRE_CHAR *)state->ptr;
+ ; // Fashion statement.
+ const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
- if (ctx->pattern[0] == SRE_OP_INFO) {
+ if (pattern[0] == SRE_OP_INFO) {
/* optimization info block */
/* <INFO> <1=skip> <2=flags> <3=min> ... */
- if (ctx->pattern[3] && (uintptr_t)(end - ctx->ptr) < ctx->pattern[3]) {
+ if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
TRACE(("reject (got %zd chars, need %zd)\n",
- end - ctx->ptr, (Py_ssize_t) ctx->pattern[3]));
+ end - ptr, (Py_ssize_t) pattern[3]));
RETURN_FAILURE;
}
- ctx->pattern += ctx->pattern[1] + 1;
+ pattern += pattern[1] + 1;
}
- for (;;) {
- ++sigcount;
- if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals())
- RETURN_ERROR(SRE_ERROR_INTERRUPTED);
-
- switch (*ctx->pattern++) {
+#if USE_COMPUTED_GOTOS
+ DISPATCH;
+#else
+dispatch:
+ MAYBE_CHECK_SIGNALS;
+ switch (*pattern++)
+#endif
+ {
- case SRE_OP_MARK:
+ TARGET(SRE_OP_MARK):
/* set mark */
/* <MARK> <gid> */
- TRACE(("|%p|%p|MARK %d\n", ctx->pattern,
- ctx->ptr, ctx->pattern[0]));
- i = ctx->pattern[0];
+ TRACE(("|%p|%p|MARK %d\n", pattern,
+ ptr, pattern[0]));
+ i = pattern[0];
if (i & 1)
state->lastindex = i/2 + 1;
if (i > state->lastmark) {
@@ -580,210 +620,210 @@ entrance:
state->mark[j++] = NULL;
state->lastmark = i;
}
- state->mark[i] = ctx->ptr;
- ctx->pattern++;
- break;
+ state->mark[i] = ptr;
+ pattern++;
+ DISPATCH;
- case SRE_OP_LITERAL:
+ TARGET(SRE_OP_LITERAL):
/* match literal string */
/* <LITERAL> <code> */
- TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern,
- ctx->ptr, *ctx->pattern));
- if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0])
+ TRACE(("|%p|%p|LITERAL %d\n", pattern,
+ ptr, *pattern));
+ if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
RETURN_FAILURE;
- ctx->pattern++;
- ctx->ptr++;
- break;
+ pattern++;
+ ptr++;
+ DISPATCH;
- case SRE_OP_NOT_LITERAL:
+ TARGET(SRE_OP_NOT_LITERAL):
/* match anything that is not literal character */
/* <NOT_LITERAL> <code> */
- TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern,
- ctx->ptr, *ctx->pattern));
- if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0])
+ TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
+ ptr, *pattern));
+ if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
RETURN_FAILURE;
- ctx->pattern++;
- ctx->ptr++;
- break;
+ pattern++;
+ ptr++;
+ DISPATCH;
- case SRE_OP_SUCCESS:
+ TARGET(SRE_OP_SUCCESS):
/* end of pattern */
- TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr));
+ TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
if (ctx->toplevel &&
- ((state->match_all && ctx->ptr != state->end) ||
- (state->must_advance && ctx->ptr == state->start)))
+ ((state->match_all && ptr != state->end) ||
+ (state->must_advance && ptr == state->start)))
{
RETURN_FAILURE;
}
- state->ptr = ctx->ptr;
+ state->ptr = ptr;
RETURN_SUCCESS;
- case SRE_OP_AT:
+ TARGET(SRE_OP_AT):
/* match at given position */
/* <AT> <code> */
- TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern));
- if (!SRE(at)(state, ctx->ptr, *ctx->pattern))
+ TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
+ if (!SRE(at)(state, ptr, *pattern))
RETURN_FAILURE;
- ctx->pattern++;
- break;
+ pattern++;
+ DISPATCH;
- case SRE_OP_CATEGORY:
+ TARGET(SRE_OP_CATEGORY):
/* match at given category */
/* <CATEGORY> <code> */
- TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern,
- ctx->ptr, *ctx->pattern));
- if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0]))
+ TRACE(("|%p|%p|CATEGORY %d\n", pattern,
+ ptr, *pattern));
+ if (ptr >= end || !sre_category(pattern[0], ptr[0]))
RETURN_FAILURE;
- ctx->pattern++;
- ctx->ptr++;
- break;
+ pattern++;
+ ptr++;
+ DISPATCH;
- case SRE_OP_ANY:
+ TARGET(SRE_OP_ANY):
/* match anything (except a newline) */
/* <ANY> */
- TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr));
- if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0]))
+ TRACE(("|%p|%p|ANY\n", pattern, ptr));
+ if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
RETURN_FAILURE;
- ctx->ptr++;
- break;
+ ptr++;
+ DISPATCH;
- case SRE_OP_ANY_ALL:
+ TARGET(SRE_OP_ANY_ALL):
/* match anything */
/* <ANY_ALL> */
- TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr));
- if (ctx->ptr >= end)
+ TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
+ if (ptr >= end)
RETURN_FAILURE;
- ctx->ptr++;
- break;
+ ptr++;
+ DISPATCH;
- case SRE_OP_IN:
+ TARGET(SRE_OP_IN):
/* match set member (or non_member) */
/* <IN> <skip> <set> */
- TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr));
- if (ctx->ptr >= end ||
- !SRE(charset)(state, ctx->pattern + 1, *ctx->ptr))
+ TRACE(("|%p|%p|IN\n", pattern, ptr));
+ if (ptr >= end ||
+ !SRE(charset)(state, pattern + 1, *ptr))
RETURN_FAILURE;
- ctx->pattern += ctx->pattern[0];
- ctx->ptr++;
- break;
+ pattern += pattern[0];
+ ptr++;
+ DISPATCH;
- case SRE_OP_LITERAL_IGNORE:
+ TARGET(SRE_OP_LITERAL_IGNORE):
TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
- ctx->pattern, ctx->ptr, ctx->pattern[0]));
- if (ctx->ptr >= end ||
- sre_lower_ascii(*ctx->ptr) != *ctx->pattern)
+ pattern, ptr, pattern[0]));
+ if (ptr >= end ||
+ sre_lower_ascii(*ptr) != *pattern)
RETURN_FAILURE;
- ctx->pattern++;
- ctx->ptr++;
- break;
+ pattern++;
+ ptr++;
+ DISPATCH;
- case SRE_OP_LITERAL_UNI_IGNORE:
+ TARGET(SRE_OP_LITERAL_UNI_IGNORE):
TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
- ctx->pattern, ctx->ptr, ctx->pattern[0]));
- if (ctx->ptr >= end ||
- sre_lower_unicode(*ctx->ptr) != *ctx->pattern)
+ pattern, ptr, pattern[0]));
+ if (ptr >= end ||
+ sre_lower_unicode(*ptr) != *pattern)
RETURN_FAILURE;
- ctx->pattern++;
- ctx->ptr++;
- break;
+ pattern++;
+ ptr++;
+ DISPATCH;
- case SRE_OP_LITERAL_LOC_IGNORE:
+ TARGET(SRE_OP_LITERAL_LOC_IGNORE):
TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
- ctx->pattern, ctx->ptr, ctx->pattern[0]));
- if (ctx->ptr >= end
- || !char_loc_ignore(*ctx->pattern, *ctx->ptr))
+ pattern, ptr, pattern[0]));
+ if (ptr >= end
+ || !char_loc_ignore(*pattern, *ptr))
RETURN_FAILURE;
- ctx->pattern++;
- ctx->ptr++;
- break;
+ pattern++;
+ ptr++;
+ DISPATCH;
- case SRE_OP_NOT_LITERAL_IGNORE:
+ TARGET(SRE_OP_NOT_LITERAL_IGNORE):
TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
- ctx->pattern, ctx->ptr, *ctx->pattern));
- if (ctx->ptr >= end ||
- sre_lower_ascii(*ctx->ptr) == *ctx->pattern)
+ pattern, ptr, *pattern));
+ if (ptr >= end ||
+ sre_lower_ascii(*ptr) == *pattern)
RETURN_FAILURE;
- ctx->pattern++;
- ctx->ptr++;
- break;
+ pattern++;
+ ptr++;
+ DISPATCH;
- case SRE_OP_NOT_LITERAL_UNI_IGNORE:
+ TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
- ctx->pattern, ctx->ptr, *ctx->pattern));
- if (ctx->ptr >= end ||
- sre_lower_unicode(*ctx->ptr) == *ctx->pattern)
+ pattern, ptr, *pattern));
+ if (ptr >= end ||
+ sre_lower_unicode(*ptr) == *pattern)
RETURN_FAILURE;
- ctx->pattern++;
- ctx->ptr++;
- break;
+ pattern++;
+ ptr++;
+ DISPATCH;
- case SRE_OP_NOT_LITERAL_LOC_IGNORE:
+ TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
- ctx->pattern, ctx->ptr, *ctx->pattern));
- if (ctx->ptr >= end
- || char_loc_ignore(*ctx->pattern, *ctx->ptr))
+ pattern, ptr, *pattern));
+ if (ptr >= end
+ || char_loc_ignore(*pattern, *ptr))
RETURN_FAILURE;
- ctx->pattern++;
- ctx->ptr++;
- break;
+ pattern++;
+ ptr++;
+ DISPATCH;
- case SRE_OP_IN_IGNORE:
- TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr));
- if (ctx->ptr >= end
- || !SRE(charset)(state, ctx->pattern+1,
- (SRE_CODE)sre_lower_ascii(*ctx->ptr)))
+ TARGET(SRE_OP_IN_IGNORE):
+ TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
+ if (ptr >= end
+ || !SRE(charset)(state, pattern+1,
+ (SRE_CODE)sre_lower_ascii(*ptr)))
RETURN_FAILURE;
- ctx->pattern += ctx->pattern[0];
- ctx->ptr++;
- break;
+ pattern += pattern[0];
+ ptr++;
+ DISPATCH;
- case SRE_OP_IN_UNI_IGNORE:
- TRACE(("|%p|%p|IN_UNI_IGNORE\n", ctx->pattern, ctx->ptr));
- if (ctx->ptr >= end
- || !SRE(charset)(state, ctx->pattern+1,
- (SRE_CODE)sre_lower_unicode(*ctx->ptr)))
+ TARGET(SRE_OP_IN_UNI_IGNORE):
+ TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
+ if (ptr >= end
+ || !SRE(charset)(state, pattern+1,
+ (SRE_CODE)sre_lower_unicode(*ptr)))
RETURN_FAILURE;
- ctx->pattern += ctx->pattern[0];
- ctx->ptr++;
- break;
+ pattern += pattern[0];
+ ptr++;
+ DISPATCH;
- case SRE_OP_IN_LOC_IGNORE:
- TRACE(("|%p|%p|IN_LOC_IGNORE\n", ctx->pattern, ctx->ptr));
- if (ctx->ptr >= end
- || !SRE(charset_loc_ignore)(state, ctx->pattern+1, *ctx->ptr))
+ TARGET(SRE_OP_IN_LOC_IGNORE):
+ TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
+ if (ptr >= end
+ || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
RETURN_FAILURE;
- ctx->pattern += ctx->pattern[0];
- ctx->ptr++;
- break;
+ pattern += pattern[0];
+ ptr++;
+ DISPATCH;
- case SRE_OP_JUMP:
- case SRE_OP_INFO:
+ TARGET(SRE_OP_JUMP):
+ TARGET(SRE_OP_INFO):
/* jump forward */
/* <JUMP> <offset> */
- TRACE(("|%p|%p|JUMP %d\n", ctx->pattern,
- ctx->ptr, ctx->pattern[0]));
- ctx->pattern += ctx->pattern[0];
- break;
+ TRACE(("|%p|%p|JUMP %d\n", pattern,
+ ptr, pattern[0]));
+ pattern += pattern[0];
+ DISPATCH;
- case SRE_OP_BRANCH:
+ TARGET(SRE_OP_BRANCH):
/* alternation */
/* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
- TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr));
+ TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
LASTMARK_SAVE();
if (state->repeat)
MARK_PUSH(ctx->lastmark);
- for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) {
- if (ctx->pattern[1] == SRE_OP_LITERAL &&
- (ctx->ptr >= end ||
- (SRE_CODE) *ctx->ptr != ctx->pattern[2]))
+ for (; pattern[0]; pattern += pattern[0]) {
+ if (pattern[1] == SRE_OP_LITERAL &&
+ (ptr >= end ||
+ (SRE_CODE) *ptr != pattern[2]))
continue;
- if (ctx->pattern[1] == SRE_OP_IN &&
- (ctx->ptr >= end ||
- !SRE(charset)(state, ctx->pattern + 3,
- (SRE_CODE) *ctx->ptr)))
+ if (pattern[1] == SRE_OP_IN &&
+ (ptr >= end ||
+ !SRE(charset)(state, pattern + 3,
+ (SRE_CODE) *ptr)))
continue;
- state->ptr = ctx->ptr;
- DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1);
+ state->ptr = ptr;
+ DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
if (ret) {
if (state->repeat)
MARK_POP_DISCARD(ctx->lastmark);
@@ -798,7 +838,7 @@ entrance:
MARK_POP_DISCARD(ctx->lastmark);
RETURN_FAILURE;
- case SRE_OP_REPEAT_ONE:
+ TARGET(SRE_OP_REPEAT_ONE):
/* match repeated sequence (maximizing regexp) */
/* this operator only works if the repeated item is
@@ -808,34 +848,34 @@ entrance:
/* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
- TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
- ctx->pattern[1], ctx->pattern[2]));
+ TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
+ pattern[1], pattern[2]));
- if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr)
+ if ((Py_ssize_t) pattern[1] > end - ptr)
RETURN_FAILURE; /* cannot match */
- state->ptr = ctx->ptr;
+ state->ptr = ptr;
- ret = SRE(count)(state, ctx->pattern+3, ctx->pattern[2]);
+ ret = SRE(count)(state, pattern+3, pattern[2]);
RETURN_ON_ERROR(ret);
DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
ctx->count = ret;
- ctx->ptr += ctx->count;
+ ptr += ctx->count;
/* when we arrive here, count contains the number of
- matches, and ctx->ptr points to the tail of the target
+ matches, and ptr points to the tail of the target
string. check if the rest of the pattern matches,
and backtrack if not. */
- if (ctx->count < (Py_ssize_t) ctx->pattern[1])
+ if (ctx->count < (Py_ssize_t) pattern[1])
RETURN_FAILURE;
- if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS &&
- ctx->ptr == state->end &&
- !(ctx->toplevel && state->must_advance && ctx->ptr == state->start))
+ if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
+ ptr == state->end &&
+ !(ctx->toplevel && state->must_advance && ptr == state->start))
{
/* tail is empty. we're finished */
- state->ptr = ctx->ptr;
+ state->ptr = ptr;
RETURN_SUCCESS;
}
@@ -843,21 +883,21 @@ entrance:
if (state->repeat)
MARK_PUSH(ctx->lastmark);
- if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) {
+ if (pattern[pattern[0]] == SRE_OP_LITERAL) {
/* tail starts with a literal. skip positions where
the rest of the pattern cannot possibly match */
- ctx->u.chr = ctx->pattern[ctx->pattern[0]+1];
+ ctx->u.chr = pattern[pattern[0]+1];
for (;;) {
- while (ctx->count >= (Py_ssize_t) ctx->pattern[1] &&
- (ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) {
- ctx->ptr--;
+ while (ctx->count >= (Py_ssize_t) pattern[1] &&
+ (ptr >= end || *ptr != ctx->u.chr)) {
+ ptr--;
ctx->count--;
}
- if (ctx->count < (Py_ssize_t) ctx->pattern[1])
+ if (ctx->count < (Py_ssize_t) pattern[1])
break;
- state->ptr = ctx->ptr;
+ state->ptr = ptr;
DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
- ctx->pattern+ctx->pattern[0]);
+ pattern+pattern[0]);
if (ret) {
if (state->repeat)
MARK_POP_DISCARD(ctx->lastmark);
@@ -868,17 +908,17 @@ entrance:
MARK_POP_KEEP(ctx->lastmark);
LASTMARK_RESTORE();
- ctx->ptr--;
+ ptr--;
ctx->count--;
}
if (state->repeat)
MARK_POP_DISCARD(ctx->lastmark);
} else {
/* general case */
- while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) {
- state->ptr = ctx->ptr;
+ while (ctx->count >= (Py_ssize_t) pattern[1]) {
+ state->ptr = ptr;
DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
- ctx->pattern+ctx->pattern[0]);
+ pattern+pattern[0]);
if (ret) {
if (state->repeat)
MARK_POP_DISCARD(ctx->lastmark);
@@ -889,7 +929,7 @@ entrance:
MARK_POP_KEEP(ctx->lastmark);
LASTMARK_RESTORE();
- ctx->ptr--;
+ ptr--;
ctx->count--;
}
if (state->repeat)
@@ -897,7 +937,7 @@ entrance:
}
RETURN_FAILURE;
- case SRE_OP_MIN_REPEAT_ONE:
+ TARGET(SRE_OP_MIN_REPEAT_ONE):
/* match repeated sequence (minimizing regexp) */
/* this operator only works if the repeated item is
@@ -907,36 +947,36 @@ entrance:
/* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
- TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
- ctx->pattern[1], ctx->pattern[2]));
+ TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
+ pattern[1], pattern[2]));
- if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr)
+ if ((Py_ssize_t) pattern[1] > end - ptr)
RETURN_FAILURE; /* cannot match */
- state->ptr = ctx->ptr;
+ state->ptr = ptr;
- if (ctx->pattern[1] == 0)
+ if (pattern[1] == 0)
ctx->count = 0;
else {
/* count using pattern min as the maximum */
- ret = SRE(count)(state, ctx->pattern+3, ctx->pattern[1]);
+ ret = SRE(count)(state, pattern+3, pattern[1]);
RETURN_ON_ERROR(ret);
DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
- if (ret < (Py_ssize_t) ctx->pattern[1])
+ if (ret < (Py_ssize_t) pattern[1])
/* didn't match minimum number of times */
RETURN_FAILURE;
/* advance past minimum matches of repeat */
ctx->count = ret;
- ctx->ptr += ctx->count;
+ ptr += ctx->count;
}
- if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS &&
+ if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
!(ctx->toplevel &&
- ((state->match_all && ctx->ptr != state->end) ||
- (state->must_advance && ctx->ptr == state->start))))
+ ((state->match_all && ptr != state->end) ||
+ (state->must_advance && ptr == state->start))))
{
/* tail is empty. we're finished */
- state->ptr = ctx->ptr;
+ state->ptr = ptr;
RETURN_SUCCESS;
} else {
@@ -945,11 +985,11 @@ entrance:
if (state->repeat)
MARK_PUSH(ctx->lastmark);
- while ((Py_ssize_t)ctx->pattern[2] == SRE_MAXREPEAT
- || ctx->count <= (Py_ssize_t)ctx->pattern[2]) {
- state->ptr = ctx->ptr;
+ while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
+ || ctx->count <= (Py_ssize_t)pattern[2]) {
+ state->ptr = ptr;
DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
- ctx->pattern+ctx->pattern[0]);
+ pattern+pattern[0]);
if (ret) {
if (state->repeat)
MARK_POP_DISCARD(ctx->lastmark);
@@ -960,14 +1000,14 @@ entrance:
MARK_POP_KEEP(ctx->lastmark);
LASTMARK_RESTORE();
- state->ptr = ctx->ptr;
- ret = SRE(count)(state, ctx->pattern+3, 1);
+ state->ptr = ptr;
+ ret = SRE(count)(state, pattern+3, 1);
RETURN_ON_ERROR(ret);
DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
if (ret == 0)
break;
assert(ret == 1);
- ctx->ptr++;
+ ptr++;
ctx->count++;
}
if (state->repeat)
@@ -975,7 +1015,7 @@ entrance:
}
RETURN_FAILURE;
- case SRE_OP_POSSESSIVE_REPEAT_ONE:
+ TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
/* match repeated sequence (maximizing regexp) without
backtracking */
@@ -987,67 +1027,67 @@ entrance:
/* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
tail */
- TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", ctx->pattern,
- ctx->ptr, ctx->pattern[1], ctx->pattern[2]));
+ TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
+ ptr, pattern[1], pattern[2]));
- if (ctx->ptr + ctx->pattern[1] > end) {
+ if (ptr + pattern[1] > end) {
RETURN_FAILURE; /* cannot match */
}
- state->ptr = ctx->ptr;
+ state->ptr = ptr;
- ret = SRE(count)(state, ctx->pattern + 3, ctx->pattern[2]);
+ ret = SRE(count)(state, pattern + 3, pattern[2]);
RETURN_ON_ERROR(ret);
DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
ctx->count = ret;
- ctx->ptr += ctx->count;
+ ptr += ctx->count;
/* when we arrive here, count contains the number of
- matches, and ctx->ptr points to the tail of the target
+ matches, and ptr points to the tail of the target
string. check if the rest of the pattern matches,
and fail if not. */
/* Test for not enough repetitions in match */
- if (ctx->count < (Py_ssize_t) ctx->pattern[1]) {
+ if (ctx->count < (Py_ssize_t) pattern[1]) {
RETURN_FAILURE;
}
/* Update the pattern to point to the next op code */
- ctx->pattern += ctx->pattern[0];
+ pattern += pattern[0];
/* Let the tail be evaluated separately and consider this
match successful. */
- if (*ctx->pattern == SRE_OP_SUCCESS &&
- ctx->ptr == state->end &&
- !(ctx->toplevel && state->must_advance && ctx->ptr == state->start))
+ if (*pattern == SRE_OP_SUCCESS &&
+ ptr == state->end &&
+ !(ctx->toplevel && state->must_advance && ptr == state->start))
{
/* tail is empty. we're finished */
- state->ptr = ctx->ptr;
+ state->ptr = ptr;
RETURN_SUCCESS;
}
/* Attempt to match the rest of the string */
- break;
+ DISPATCH;
- case SRE_OP_REPEAT:
+ TARGET(SRE_OP_REPEAT):
/* create repeat context. all the hard work is done
by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
/* <REPEAT> <skip> <1=min> <2=max>
<3=repeat_index> item <UNTIL> tail */
- TRACE(("|%p|%p|REPEAT %d %d %d\n", ctx->pattern, ctx->ptr,
- ctx->pattern[1], ctx->pattern[2], ctx->pattern[3]));
+ TRACE(("|%p|%p|REPEAT %d %d %d\n", pattern, ptr,
+ pattern[1], pattern[2], pattern[3]));
/* install repeat context */
- ctx->u.rep = &state->repeats_array[ctx->pattern[3]];
+ ctx->u.rep = &state->repeats_array[pattern[3]];
ctx->u.rep->count = -1;
- ctx->u.rep->pattern = ctx->pattern;
+ ctx->u.rep->pattern = pattern;
ctx->u.rep->prev = state->repeat;
ctx->u.rep->last_ptr = NULL;
state->repeat = ctx->u.rep;
- state->ptr = ctx->ptr;
- DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]);
+ state->ptr = ptr;
+ DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
state->repeat = ctx->u.rep->prev;
if (ret) {
@@ -1056,7 +1096,7 @@ entrance:
}
RETURN_FAILURE;
- case SRE_OP_MAX_UNTIL:
+ TARGET(SRE_OP_MAX_UNTIL):
/* maximizing repeat */
/* <REPEAT> <skip> <1=min> <2=max>
<3=repeat_index> item <MAX_UNTIL> tail */
@@ -1068,12 +1108,12 @@ entrance:
if (!ctx->u.rep)
RETURN_ERROR(SRE_ERROR_STATE);
- state->ptr = ctx->ptr;
+ state->ptr = ptr;
ctx->count = ctx->u.rep->count+1;
- TRACE(("|%p|%p|MAX_UNTIL %zd\n", ctx->pattern,
- ctx->ptr, ctx->count));
+ TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
+ ptr, ctx->count));
if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
/* not enough matches */
@@ -1085,7 +1125,7 @@ entrance:
RETURN_SUCCESS;
}
ctx->u.rep->count = ctx->count-1;
- state->ptr = ctx->ptr;
+ state->ptr = ptr;
RETURN_FAILURE;
}
@@ -1111,20 +1151,20 @@ entrance:
MARK_POP(ctx->lastmark);
LASTMARK_RESTORE();
ctx->u.rep->count = ctx->count-1;
- state->ptr = ctx->ptr;
+ state->ptr = ptr;
}
/* cannot match more repeated items here. make sure the
tail matches */
state->repeat = ctx->u.rep->prev;
- DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern);
+ DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
state->repeat = ctx->u.rep; // restore repeat before return
RETURN_ON_SUCCESS(ret);
- state->ptr = ctx->ptr;
+ state->ptr = ptr;
RETURN_FAILURE;
- case SRE_OP_MIN_UNTIL:
+ TARGET(SRE_OP_MIN_UNTIL):
/* minimizing repeat */
/* <REPEAT> <skip> <1=min> <2=max>
<3=repeat_index> item <MIN_UNTIL> tail */
@@ -1133,12 +1173,12 @@ entrance:
if (!ctx->u.rep)
RETURN_ERROR(SRE_ERROR_STATE);
- state->ptr = ctx->ptr;
+ state->ptr = ptr;
ctx->count = ctx->u.rep->count+1;
- TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", ctx->pattern,
- ctx->ptr, ctx->count, ctx->u.rep->pattern));
+ TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
+ ptr, ctx->count, ctx->u.rep->pattern));
if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
/* not enough matches */
@@ -1150,7 +1190,7 @@ entrance:
RETURN_SUCCESS;
}
ctx->u.rep->count = ctx->count-1;
- state->ptr = ctx->ptr;
+ state->ptr = ptr;
RETURN_FAILURE;
}
@@ -1161,7 +1201,7 @@ entrance:
if (state->repeat)
MARK_PUSH(ctx->lastmark);
- DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern);
+ DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
SRE_REPEAT *repeat_of_tail = state->repeat;
state->repeat = ctx->u.rep; // restore repeat before return
@@ -1175,7 +1215,7 @@ entrance:
MARK_POP(ctx->lastmark);
LASTMARK_RESTORE();
- state->ptr = ctx->ptr;
+ state->ptr = ptr;
if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
&& ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
@@ -1194,34 +1234,34 @@ entrance:
RETURN_SUCCESS;
}
ctx->u.rep->count = ctx->count-1;
- state->ptr = ctx->ptr;
+ state->ptr = ptr;
RETURN_FAILURE;
- case SRE_OP_POSSESSIVE_REPEAT:
+ TARGET(SRE_OP_POSSESSIVE_REPEAT):
/* create possessive repeat contexts. */
/* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
<SUCCESS> tail */
- TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", ctx->pattern,
- ctx->ptr, ctx->pattern[1], ctx->pattern[2]));
+ TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
+ ptr, pattern[1], pattern[2]));
/* Set the global Input pointer to this context's Input
pointer */
- state->ptr = ctx->ptr;
+ state->ptr = ptr;
/* Initialize Count to 0 */
ctx->count = 0;
/* Check for minimum required matches. */
- while (ctx->count < (Py_ssize_t)ctx->pattern[1]) {
+ while (ctx->count < (Py_ssize_t)pattern[1]) {
/* not enough matches */
DO_JUMP(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
- &ctx->pattern[3]);
+ &pattern[3]);
if (ret) {
RETURN_ON_ERROR(ret);
ctx->count++;
}
else {
- state->ptr = ctx->ptr;
+ state->ptr = ptr;
RETURN_FAILURE;
}
}
@@ -1229,13 +1269,13 @@ entrance:
/* Clear the context's Input stream pointer so that it
doesn't match the global state so that the while loop can
be entered. */
- ctx->ptr = NULL;
+ ptr = NULL;
/* Keep trying to parse the <pattern> sub-pattern until the
end is reached, creating a new context each time. */
- while ((ctx->count < (Py_ssize_t)ctx->pattern[2] ||
- (Py_ssize_t)ctx->pattern[2] == SRE_MAXREPEAT) &&
- state->ptr != ctx->ptr) {
+ while ((ctx->count < (Py_ssize_t)pattern[2] ||
+ (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
+ state->ptr != ptr) {
/* Save the Capture Group Marker state into the current
Context and back up the current highest number
Capture Group marker. */
@@ -1257,12 +1297,12 @@ entrance:
maximum number of matches are counted, and because
of this, we could immediately stop at that point and
consider this match successful. */
- ctx->ptr = state->ptr;
+ ptr = state->ptr;
/* We have not reached the maximin matches, so try to
match once more. */
DO_JUMP(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
- &ctx->pattern[3]);
+ &pattern[3]);
/* Check to see if the last attempted match
succeeded. */
@@ -1293,47 +1333,47 @@ entrance:
/* Evaluate Tail */
/* Jump to end of pattern indicated by skip, and then skip
the SUCCESS op code that follows it. */
- ctx->pattern += ctx->pattern[0] + 1;
- ctx->ptr = state->ptr;
- break;
+ pattern += pattern[0] + 1;
+ ptr = state->ptr;
+ DISPATCH;
- case SRE_OP_ATOMIC_GROUP:
+ TARGET(SRE_OP_ATOMIC_GROUP):
/* Atomic Group Sub Pattern */
/* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
- TRACE(("|%p|%p|ATOMIC_GROUP\n", ctx->pattern, ctx->ptr));
+ TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
/* Set the global Input pointer to this context's Input
pointer */
- state->ptr = ctx->ptr;
+ state->ptr = ptr;
/* Evaluate the Atomic Group in a new context, terminating
when the end of the group, represented by a SUCCESS op
code, is reached. */
/* Group Pattern begins at an offset of 1 code. */
DO_JUMP(JUMP_ATOMIC_GROUP, jump_atomic_group,
- &ctx->pattern[1]);
+ &pattern[1]);
/* Test Exit Condition */
RETURN_ON_ERROR(ret);
if (ret == 0) {
/* Atomic Group failed to Match. */
- state->ptr = ctx->ptr;
+ state->ptr = ptr;
RETURN_FAILURE;
}
/* Evaluate Tail */
/* Jump to end of pattern indicated by skip, and then skip
the SUCCESS op code that follows it. */
- ctx->pattern += ctx->pattern[0];
- ctx->ptr = state->ptr;
- break;
+ pattern += pattern[0];
+ ptr = state->ptr;
+ DISPATCH;
- case SRE_OP_GROUPREF:
+ TARGET(SRE_OP_GROUPREF):
/* match backreference */
- TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern,
- ctx->ptr, ctx->pattern[0]));
- i = ctx->pattern[0];
+ TRACE(("|%p|%p|GROUPREF %d\n", pattern,
+ ptr, pattern[0]));
+ i = pattern[0];
{
Py_ssize_t groupref = i+i;
if (groupref >= state->lastmark) {
@@ -1344,21 +1384,21 @@ entrance:
if (!p || !e || e < p)
RETURN_FAILURE;
while (p < e) {
- if (ctx->ptr >= end || *ctx->ptr != *p)
+ if (ptr >= end || *ptr != *p)
RETURN_FAILURE;
p++;
- ctx->ptr++;
+ ptr++;
}
}
}
- ctx->pattern++;
- break;
+ pattern++;
+ DISPATCH;
- case SRE_OP_GROUPREF_IGNORE:
+ TARGET(SRE_OP_GROUPREF_IGNORE):
/* match backreference */
- TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern,
- ctx->ptr, ctx->pattern[0]));
- i = ctx->pattern[0];
+ TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
+ ptr, pattern[0]));
+ i = pattern[0];
{
Py_ssize_t groupref = i+i;
if (groupref >= state->lastmark) {
@@ -1369,22 +1409,22 @@ entrance:
if (!p || !e || e < p)
RETURN_FAILURE;
while (p < e) {
- if (ctx->ptr >= end ||
- sre_lower_ascii(*ctx->ptr) != sre_lower_ascii(*p))
+ if (ptr >= end ||
+ sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
RETURN_FAILURE;
p++;
- ctx->ptr++;
+ ptr++;
}
}
}
- ctx->pattern++;
- break;
+ pattern++;
+ DISPATCH;
- case SRE_OP_GROUPREF_UNI_IGNORE:
+ TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
/* match backreference */
- TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", ctx->pattern,
- ctx->ptr, ctx->pattern[0]));
- i = ctx->pattern[0];
+ TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
+ ptr, pattern[0]));
+ i = pattern[0];
{
Py_ssize_t groupref = i+i;
if (groupref >= state->lastmark) {
@@ -1395,22 +1435,22 @@ entrance:
if (!p || !e || e < p)
RETURN_FAILURE;
while (p < e) {
- if (ctx->ptr >= end ||
- sre_lower_unicode(*ctx->ptr) != sre_lower_unicode(*p))
+ if (ptr >= end ||
+ sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
RETURN_FAILURE;
p++;
- ctx->ptr++;
+ ptr++;
}
}
}
- ctx->pattern++;
- break;
+ pattern++;
+ DISPATCH;
- case SRE_OP_GROUPREF_LOC_IGNORE:
+ TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
/* match backreference */
- TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", ctx->pattern,
- ctx->ptr, ctx->pattern[0]));
- i = ctx->pattern[0];
+ TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
+ ptr, pattern[0]));
+ i = pattern[0];
{
Py_ssize_t groupref = i+i;
if (groupref >= state->lastmark) {
@@ -1421,64 +1461,64 @@ entrance:
if (!p || !e || e < p)
RETURN_FAILURE;
while (p < e) {
- if (ctx->ptr >= end ||
- sre_lower_locale(*ctx->ptr) != sre_lower_locale(*p))
+ if (ptr >= end ||
+ sre_lower_locale(*ptr) != sre_lower_locale(*p))
RETURN_FAILURE;
p++;
- ctx->ptr++;
+ ptr++;
}
}
}
- ctx->pattern++;
- break;
+ pattern++;
+ DISPATCH;
- case SRE_OP_GROUPREF_EXISTS:
- TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern,
- ctx->ptr, ctx->pattern[0]));
+ TARGET(SRE_OP_GROUPREF_EXISTS):
+ TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
+ ptr, pattern[0]));
/* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
- i = ctx->pattern[0];
+ i = pattern[0];
{
Py_ssize_t groupref = i+i;
if (groupref >= state->lastmark) {
- ctx->pattern += ctx->pattern[1];
- break;
+ pattern += pattern[1];
+ DISPATCH;
} else {
SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
if (!p || !e || e < p) {
- ctx->pattern += ctx->pattern[1];
- break;
+ pattern += pattern[1];
+ DISPATCH;
}
}
}
- ctx->pattern += 2;
- break;
+ pattern += 2;
+ DISPATCH;
- case SRE_OP_ASSERT:
+ TARGET(SRE_OP_ASSERT):
/* assert subpattern */
/* <ASSERT> <skip> <back> <pattern> */
- TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern,
- ctx->ptr, ctx->pattern[1]));
- if (ctx->ptr - (SRE_CHAR *)state->beginning < (Py_ssize_t)ctx->pattern[1])
+ TRACE(("|%p|%p|ASSERT %d\n", pattern,
+ ptr, pattern[1]));
+ if (ptr - (SRE_CHAR *)state->beginning < (Py_ssize_t)pattern[1])
RETURN_FAILURE;
- state->ptr = ctx->ptr - ctx->pattern[1];
- DO_JUMP0(JUMP_ASSERT, jump_assert, ctx->pattern+2);
+ state->ptr = ptr - pattern[1];
+ DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
RETURN_ON_FAILURE(ret);
- ctx->pattern += ctx->pattern[0];
- break;
+ pattern += pattern[0];
+ DISPATCH;
- case SRE_OP_ASSERT_NOT:
+ TARGET(SRE_OP_ASSERT_NOT):
/* assert not subpattern */
/* <ASSERT_NOT> <skip> <back> <pattern> */
- TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern,
- ctx->ptr, ctx->pattern[1]));
- if (ctx->ptr - (SRE_CHAR *)state->beginning >= (Py_ssize_t)ctx->pattern[1]) {
- state->ptr = ctx->ptr - ctx->pattern[1];
+ TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
+ ptr, pattern[1]));
+ if (ptr - (SRE_CHAR *)state->beginning >= (Py_ssize_t)pattern[1]) {
+ state->ptr = ptr - pattern[1];
LASTMARK_SAVE();
if (state->repeat)
MARK_PUSH(ctx->lastmark);
- DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2);
+ DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
if (ret) {
if (state->repeat)
MARK_POP_DISCARD(ctx->lastmark);
@@ -1489,19 +1529,29 @@ entrance:
MARK_POP(ctx->lastmark);
LASTMARK_RESTORE();
}
- ctx->pattern += ctx->pattern[0];
- break;
+ pattern += pattern[0];
+ DISPATCH;
- case SRE_OP_FAILURE:
+ TARGET(SRE_OP_FAILURE):
/* immediate failure */
- TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr));
+ TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
RETURN_FAILURE;
+#if !USE_COMPUTED_GOTOS
default:
- TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr,
- ctx->pattern[-1]));
+#endif
+ // Also any unused opcodes:
+ TARGET(SRE_OP_RANGE_UNI_IGNORE):
+ TARGET(SRE_OP_SUBPATTERN):
+ TARGET(SRE_OP_RANGE):
+ TARGET(SRE_OP_NEGATE):
+ TARGET(SRE_OP_BIGCHARSET):
+ TARGET(SRE_OP_CHARSET):
+ TARGET(SRE_OP_CALL):
+ TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
+ pattern[-1]));
RETURN_ERROR(SRE_ERROR_ILLEGAL);
- }
+
}
exit:
@@ -1514,56 +1564,56 @@ exit:
switch (jump) {
case JUMP_MAX_UNTIL_2:
- TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr));
+ TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
goto jump_max_until_2;
case JUMP_MAX_UNTIL_3:
- TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr));
+ TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
goto jump_max_until_3;
case JUMP_MIN_UNTIL_2:
- TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr));
+ TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
goto jump_min_until_2;
case JUMP_MIN_UNTIL_3:
- TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr));
+ TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
goto jump_min_until_3;
case JUMP_BRANCH:
- TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr));
+ TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
goto jump_branch;
case JUMP_MAX_UNTIL_1:
- TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr));
+ TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
goto jump_max_until_1;
case JUMP_MIN_UNTIL_1:
- TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr));
+ TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
goto jump_min_until_1;
case JUMP_POSS_REPEAT_1:
- TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", ctx->pattern, ctx->ptr));
+ TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
goto jump_poss_repeat_1;
case JUMP_POSS_REPEAT_2:
- TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", ctx->pattern, ctx->ptr));
+ TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
goto jump_poss_repeat_2;
case JUMP_REPEAT:
- TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr));
+ TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
goto jump_repeat;
case JUMP_REPEAT_ONE_1:
- TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr));
+ TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
goto jump_repeat_one_1;
case JUMP_REPEAT_ONE_2:
- TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr));
+ TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
goto jump_repeat_one_2;
case JUMP_MIN_REPEAT_ONE:
- TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr));
+ TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
goto jump_min_repeat_one;
case JUMP_ATOMIC_GROUP:
- TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", ctx->pattern, ctx->ptr));
+ TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
goto jump_atomic_group;
case JUMP_ASSERT:
- TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr));
+ TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
goto jump_assert;
case JUMP_ASSERT_NOT:
- TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr));
+ TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
goto jump_assert_not;
case JUMP_NONE:
- TRACE(("|%p|%p|RETURN %zd\n", ctx->pattern,
- ctx->ptr, ret));
+ TRACE(("|%p|%p|RETURN %zd\n", pattern,
+ ptr, ret));
break;
}
diff --git a/Modules/_sre/sre_targets.h b/Modules/_sre/sre_targets.h
new file mode 100644
index 0000000..389e7d7
--- /dev/null
+++ b/Modules/_sre/sre_targets.h
@@ -0,0 +1,59 @@
+/*
+ * Secret Labs' Regular Expression Engine
+ *
+ * regular expression matching engine
+ *
+ * Auto-generated by Tools/scripts/generate_sre_constants.py from
+ * Lib/re/_constants.py.
+ *
+ * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
+ *
+ * See the sre.c file for information on usage and redistribution.
+ */
+
+static void *sre_targets[44] = {
+ &&TARGET_SRE_OP_FAILURE,
+ &&TARGET_SRE_OP_SUCCESS,
+ &&TARGET_SRE_OP_ANY,
+ &&TARGET_SRE_OP_ANY_ALL,
+ &&TARGET_SRE_OP_ASSERT,
+ &&TARGET_SRE_OP_ASSERT_NOT,
+ &&TARGET_SRE_OP_AT,
+ &&TARGET_SRE_OP_BRANCH,
+ &&TARGET_SRE_OP_CALL,
+ &&TARGET_SRE_OP_CATEGORY,
+ &&TARGET_SRE_OP_CHARSET,
+ &&TARGET_SRE_OP_BIGCHARSET,
+ &&TARGET_SRE_OP_GROUPREF,
+ &&TARGET_SRE_OP_GROUPREF_EXISTS,
+ &&TARGET_SRE_OP_IN,
+ &&TARGET_SRE_OP_INFO,
+ &&TARGET_SRE_OP_JUMP,
+ &&TARGET_SRE_OP_LITERAL,
+ &&TARGET_SRE_OP_MARK,
+ &&TARGET_SRE_OP_MAX_UNTIL,
+ &&TARGET_SRE_OP_MIN_UNTIL,
+ &&TARGET_SRE_OP_NOT_LITERAL,
+ &&TARGET_SRE_OP_NEGATE,
+ &&TARGET_SRE_OP_RANGE,
+ &&TARGET_SRE_OP_REPEAT,
+ &&TARGET_SRE_OP_REPEAT_ONE,
+ &&TARGET_SRE_OP_SUBPATTERN,
+ &&TARGET_SRE_OP_MIN_REPEAT_ONE,
+ &&TARGET_SRE_OP_ATOMIC_GROUP,
+ &&TARGET_SRE_OP_POSSESSIVE_REPEAT,
+ &&TARGET_SRE_OP_POSSESSIVE_REPEAT_ONE,
+ &&TARGET_SRE_OP_GROUPREF_IGNORE,
+ &&TARGET_SRE_OP_IN_IGNORE,
+ &&TARGET_SRE_OP_LITERAL_IGNORE,
+ &&TARGET_SRE_OP_NOT_LITERAL_IGNORE,
+ &&TARGET_SRE_OP_GROUPREF_LOC_IGNORE,
+ &&TARGET_SRE_OP_IN_LOC_IGNORE,
+ &&TARGET_SRE_OP_LITERAL_LOC_IGNORE,
+ &&TARGET_SRE_OP_NOT_LITERAL_LOC_IGNORE,
+ &&TARGET_SRE_OP_GROUPREF_UNI_IGNORE,
+ &&TARGET_SRE_OP_IN_UNI_IGNORE,
+ &&TARGET_SRE_OP_LITERAL_UNI_IGNORE,
+ &&TARGET_SRE_OP_NOT_LITERAL_UNI_IGNORE,
+ &&TARGET_SRE_OP_RANGE_UNI_IGNORE,
+};
diff --git a/Tools/scripts/generate_sre_constants.py b/Tools/scripts/generate_sre_constants.py
index b8f0df9..7271507 100755
--- a/Tools/scripts/generate_sre_constants.py
+++ b/Tools/scripts/generate_sre_constants.py
@@ -29,7 +29,11 @@ sre_constants_header = """\
"""
-def main(infile='Lib/re/_constants.py', outfile='Modules/_sre/sre_constants.h'):
+def main(
+ infile="Lib/re/_constants.py",
+ outfile_constants="Modules/_sre/sre_constants.h",
+ outfile_targets="Modules/_sre/sre_targets.h",
+):
ns = {}
with open(infile) as fp:
code = fp.read()
@@ -46,6 +50,11 @@ def main(infile='Lib/re/_constants.py', outfile='Modules/_sre/sre_constants.h'):
for value, name in sorted(items):
yield "#define %s %d\n" % (name, value)
+ def dump_gotos(d, prefix):
+ for i, item in enumerate(sorted(d)):
+ assert i == item
+ yield f" &&{prefix}_{item},\n"
+
content = [sre_constants_header]
content.append("#define SRE_MAGIC %d\n" % ns["MAGIC"])
content.extend(dump(ns["OPCODES"], "SRE_OP"))
@@ -54,7 +63,14 @@ def main(infile='Lib/re/_constants.py', outfile='Modules/_sre/sre_constants.h'):
content.extend(dump2(ns, "SRE_FLAG_"))
content.extend(dump2(ns, "SRE_INFO_"))
- update_file(outfile, ''.join(content))
+ update_file(outfile_constants, ''.join(content))
+
+ content = [sre_constants_header]
+ content.append(f"static void *sre_targets[{len(ns['OPCODES'])}] = {{\n")
+ content.extend(dump_gotos(ns["OPCODES"], "TARGET_SRE_OP"))
+ content.append("};\n")
+
+ update_file(outfile_targets, ''.join(content))
if __name__ == '__main__':