diff options
author | dkf <donal.k.fellows@manchester.ac.uk> | 2010-02-01 00:27:53 (GMT) |
---|---|---|
committer | dkf <donal.k.fellows@manchester.ac.uk> | 2010-02-01 00:27:53 (GMT) |
commit | c6ba244014f32c930a14f48a9dfdcd463b986167 (patch) | |
tree | 2d754bccfd261f65180e2cf630619b864301c1b6 | |
parent | e9307add3ad324ccd88106208eede2efec27fa83 (diff) | |
download | tcl-c6ba244014f32c930a14f48a9dfdcd463b986167.zip tcl-c6ba244014f32c930a14f48a9dfdcd463b986167.tar.gz tcl-c6ba244014f32c930a14f48a9dfdcd463b986167.tar.bz2 |
[Bug 2942697]: Rework the RE engine so that certain pathological patterns are
matched much more rapidly. Many thanks to Tom Lane for dianosing this issue
and providing an initial patch.
-rw-r--r-- | ChangeLog | 7 | ||||
-rw-r--r-- | generic/regexec.c | 124 |
2 files changed, 74 insertions, 57 deletions
@@ -1,3 +1,10 @@ +2010-02-01 Donal K. Fellows <dkf@users.sf.net> + + * generic/regexec.c (ccondissect, crevdissect): [Bug 2942697]: Rework + these functions so that certain pathological patterns are matched much + more rapidly. Many thanks to Tom Lane for dianosing this issue and + providing an initial patch. + 2010-01-30 Donal K. Fellows <dkf@users.sf.net> * generic/tclCompile.c (tclInstructionTable): Bytecode instructions diff --git a/generic/regexec.c b/generic/regexec.c index ed6ceec..b369d2b 100644 --- a/generic/regexec.c +++ b/generic/regexec.c @@ -135,7 +135,8 @@ static void subset(struct vars *, struct subre *, chr *, chr *); static int dissect(struct vars *, struct subre *, chr *, chr *); static int condissect(struct vars *, struct subre *, chr *, chr *); static int altdissect(struct vars *, struct subre *, chr *, chr *); -static int cdissect(struct vars *, struct subre *, chr *, chr *); +static inline int cdissect(struct vars *, struct subre *, chr *, chr *); +static int ccaptdissect(struct vars *, struct subre *, chr *, chr *); static int ccondissect(struct vars *, struct subre *, chr *, chr *); static int crevdissect(struct vars *, struct subre *, chr *, chr *); static int cbrdissect(struct vars *, struct subre *, chr *, chr *); @@ -617,6 +618,9 @@ dissect( chr *begin, /* beginning of relevant substring */ chr *end) /* end of same */ { +#ifndef COMPILER_DOES_TAILCALL_OPTIMIZATION + restart: +#endif assert(t != NULL); MDEBUG(("dissect %ld-%ld\n", LOFF(begin), LOFF(end))); @@ -624,27 +628,26 @@ dissect( case '=': /* terminal node */ assert(t->left == NULL && t->right == NULL); return REG_OKAY; /* no action, parent did the work */ - break; case '|': /* alternation */ assert(t->left != NULL); return altdissect(v, t, begin, end); - break; case 'b': /* back ref -- shouldn't be calling us! */ return REG_ASSERT; - break; case '.': /* concatenation */ assert(t->left != NULL && t->right != NULL); return condissect(v, t, begin, end); - break; case '(': /* capturing */ assert(t->left != NULL && t->right == NULL); assert(t->subno > 0); subset(v, t, begin, end); +#ifndef COMPILER_DOES_TAILCALL_OPTIMIZATION + t = t->left; + goto restart; +#else return dissect(v, t->left, begin, end); - break; +#endif default: return REG_ASSERT; - break; } } @@ -786,15 +789,13 @@ altdissect( * so that 0 uniquely means "clean slate". ^ static int cdissect(struct vars *, struct subre *, chr *, chr *); */ -static int /* regexec return code */ +static inline int /* regexec return code */ cdissect( struct vars *v, struct subre *t, chr *begin, /* beginning of relevant substring */ chr *end) /* end of same */ { - int er; - assert(t != NULL); MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op)); @@ -802,33 +803,38 @@ cdissect( case '=': /* terminal node */ assert(t->left == NULL && t->right == NULL); return REG_OKAY; /* no action, parent did the work */ - break; case '|': /* alternation */ assert(t->left != NULL); return caltdissect(v, t, begin, end); - break; case 'b': /* back ref -- shouldn't be calling us! */ assert(t->left == NULL && t->right == NULL); return cbrdissect(v, t, begin, end); - break; case '.': /* concatenation */ assert(t->left != NULL && t->right != NULL); return ccondissect(v, t, begin, end); - break; case '(': /* capturing */ assert(t->left != NULL && t->right == NULL); assert(t->subno > 0); - er = cdissect(v, t->left, begin, end); - if (er == REG_OKAY) { - subset(v, t, begin, end); - } - return er; - break; + return ccaptdissect(v, t, begin, end); default: return REG_ASSERT; - break; } } + +static int /* regexec return code */ +ccaptdissect( + struct vars *v, + struct subre *t, + chr *begin, /* beginning of relevant substring */ + chr *end) /* end of same */ +{ + int er = cdissect(v, t->left, begin, end); + + if (er == REG_OKAY) { + subset(v, t, begin, end); + } + return er; +} /* - ccondissect - concatenation subexpression matches (with complications) @@ -894,15 +900,26 @@ ccondissect( * Try this midpoint on for size. */ - er = cdissect(v, t->left, begin, mid); - if ((er == REG_OKAY) && (longest(v, d2, mid, end, NULL) == end) - && (er = cdissect(v, t->right, mid, end)) == REG_OKAY) { - break; /* NOTE BREAK OUT */ - } - if ((er != REG_OKAY) && (er != REG_NOMATCH)) { - freedfa(d); - freedfa(d2); - return er; + if (longest(v, d2, mid, end, NULL) == end) { + er = cdissect(v, t->left, begin, mid); + if (er == REG_OKAY) { + er = cdissect(v, t->right, mid, end); + if (er == REG_OKAY) { + /* + * Satisfaction. + */ + + MDEBUG(("successful\n")); + freedfa(d); + freedfa(d2); + return REG_OKAY; + } + } + if ((er != REG_OKAY) && (er != REG_NOMATCH)) { + freedfa(d); + freedfa(d2); + return er; + } } /* @@ -935,15 +952,6 @@ ccondissect( zapmem(v, t->left); zapmem(v, t->right); } - - /* - * Satisfaction. - */ - - MDEBUG(("successful\n")); - freedfa(d); - freedfa(d2); - return REG_OKAY; } /* @@ -1011,15 +1019,26 @@ crevdissect( * Try this midpoint on for size. */ - er = cdissect(v, t->left, begin, mid); - if ((er == REG_OKAY) && (longest(v, d2, mid, end, NULL) == end) - && (er = cdissect(v, t->right, mid, end)) == REG_OKAY) { - break; /* NOTE BREAK OUT */ - } - if (er != REG_OKAY && er != REG_NOMATCH) { - freedfa(d); - freedfa(d2); - return er; + if (longest(v, d2, mid, end, NULL) == end) { + er = cdissect(v, t->left, begin, mid); + if (er == REG_OKAY) { + er = cdissect(v, t->right, mid, end); + if (er == REG_OKAY) { + /* + * Satisfaction. + */ + + MDEBUG(("successful\n")); + freedfa(d); + freedfa(d2); + return REG_OKAY; + } + } + if (er != REG_OKAY && er != REG_NOMATCH) { + freedfa(d); + freedfa(d2); + return er; + } } /* @@ -1052,15 +1071,6 @@ crevdissect( zapmem(v, t->left); zapmem(v, t->right); } - - /* - * Satisfaction. - */ - - MDEBUG(("successful\n")); - freedfa(d); - freedfa(d2); - return REG_OKAY; } /* |