summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordkf <donal.k.fellows@manchester.ac.uk>2010-02-01 00:27:53 (GMT)
committerdkf <donal.k.fellows@manchester.ac.uk>2010-02-01 00:27:53 (GMT)
commitc6ba244014f32c930a14f48a9dfdcd463b986167 (patch)
tree2d754bccfd261f65180e2cf630619b864301c1b6
parente9307add3ad324ccd88106208eede2efec27fa83 (diff)
downloadtcl-c6ba244014f32c930a14f48a9dfdcd463b986167.zip
tcl-c6ba244014f32c930a14f48a9dfdcd463b986167.tar.gz
tcl-c6ba244014f32c930a14f48a9dfdcd463b986167.tar.bz2
[Bug 2942697]: Rework the RE engine so that certain pathological patterns are
matched much more rapidly. Many thanks to Tom Lane for dianosing this issue and providing an initial patch.
-rw-r--r--ChangeLog7
-rw-r--r--generic/regexec.c124
2 files changed, 74 insertions, 57 deletions
diff --git a/ChangeLog b/ChangeLog
index 0f7713d..527339e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2010-02-01 Donal K. Fellows <dkf@users.sf.net>
+
+ * generic/regexec.c (ccondissect, crevdissect): [Bug 2942697]: Rework
+ these functions so that certain pathological patterns are matched much
+ more rapidly. Many thanks to Tom Lane for dianosing this issue and
+ providing an initial patch.
+
2010-01-30 Donal K. Fellows <dkf@users.sf.net>
* generic/tclCompile.c (tclInstructionTable): Bytecode instructions
diff --git a/generic/regexec.c b/generic/regexec.c
index ed6ceec..b369d2b 100644
--- a/generic/regexec.c
+++ b/generic/regexec.c
@@ -135,7 +135,8 @@ static void subset(struct vars *, struct subre *, chr *, chr *);
static int dissect(struct vars *, struct subre *, chr *, chr *);
static int condissect(struct vars *, struct subre *, chr *, chr *);
static int altdissect(struct vars *, struct subre *, chr *, chr *);
-static int cdissect(struct vars *, struct subre *, chr *, chr *);
+static inline int cdissect(struct vars *, struct subre *, chr *, chr *);
+static int ccaptdissect(struct vars *, struct subre *, chr *, chr *);
static int ccondissect(struct vars *, struct subre *, chr *, chr *);
static int crevdissect(struct vars *, struct subre *, chr *, chr *);
static int cbrdissect(struct vars *, struct subre *, chr *, chr *);
@@ -617,6 +618,9 @@ dissect(
chr *begin, /* beginning of relevant substring */
chr *end) /* end of same */
{
+#ifndef COMPILER_DOES_TAILCALL_OPTIMIZATION
+ restart:
+#endif
assert(t != NULL);
MDEBUG(("dissect %ld-%ld\n", LOFF(begin), LOFF(end)));
@@ -624,27 +628,26 @@ dissect(
case '=': /* terminal node */
assert(t->left == NULL && t->right == NULL);
return REG_OKAY; /* no action, parent did the work */
- break;
case '|': /* alternation */
assert(t->left != NULL);
return altdissect(v, t, begin, end);
- break;
case 'b': /* back ref -- shouldn't be calling us! */
return REG_ASSERT;
- break;
case '.': /* concatenation */
assert(t->left != NULL && t->right != NULL);
return condissect(v, t, begin, end);
- break;
case '(': /* capturing */
assert(t->left != NULL && t->right == NULL);
assert(t->subno > 0);
subset(v, t, begin, end);
+#ifndef COMPILER_DOES_TAILCALL_OPTIMIZATION
+ t = t->left;
+ goto restart;
+#else
return dissect(v, t->left, begin, end);
- break;
+#endif
default:
return REG_ASSERT;
- break;
}
}
@@ -786,15 +789,13 @@ altdissect(
* so that 0 uniquely means "clean slate".
^ static int cdissect(struct vars *, struct subre *, chr *, chr *);
*/
-static int /* regexec return code */
+static inline int /* regexec return code */
cdissect(
struct vars *v,
struct subre *t,
chr *begin, /* beginning of relevant substring */
chr *end) /* end of same */
{
- int er;
-
assert(t != NULL);
MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op));
@@ -802,33 +803,38 @@ cdissect(
case '=': /* terminal node */
assert(t->left == NULL && t->right == NULL);
return REG_OKAY; /* no action, parent did the work */
- break;
case '|': /* alternation */
assert(t->left != NULL);
return caltdissect(v, t, begin, end);
- break;
case 'b': /* back ref -- shouldn't be calling us! */
assert(t->left == NULL && t->right == NULL);
return cbrdissect(v, t, begin, end);
- break;
case '.': /* concatenation */
assert(t->left != NULL && t->right != NULL);
return ccondissect(v, t, begin, end);
- break;
case '(': /* capturing */
assert(t->left != NULL && t->right == NULL);
assert(t->subno > 0);
- er = cdissect(v, t->left, begin, end);
- if (er == REG_OKAY) {
- subset(v, t, begin, end);
- }
- return er;
- break;
+ return ccaptdissect(v, t, begin, end);
default:
return REG_ASSERT;
- break;
}
}
+
+static int /* regexec return code */
+ccaptdissect(
+ struct vars *v,
+ struct subre *t,
+ chr *begin, /* beginning of relevant substring */
+ chr *end) /* end of same */
+{
+ int er = cdissect(v, t->left, begin, end);
+
+ if (er == REG_OKAY) {
+ subset(v, t, begin, end);
+ }
+ return er;
+}
/*
- ccondissect - concatenation subexpression matches (with complications)
@@ -894,15 +900,26 @@ ccondissect(
* Try this midpoint on for size.
*/
- er = cdissect(v, t->left, begin, mid);
- if ((er == REG_OKAY) && (longest(v, d2, mid, end, NULL) == end)
- && (er = cdissect(v, t->right, mid, end)) == REG_OKAY) {
- break; /* NOTE BREAK OUT */
- }
- if ((er != REG_OKAY) && (er != REG_NOMATCH)) {
- freedfa(d);
- freedfa(d2);
- return er;
+ if (longest(v, d2, mid, end, NULL) == end) {
+ er = cdissect(v, t->left, begin, mid);
+ if (er == REG_OKAY) {
+ er = cdissect(v, t->right, mid, end);
+ if (er == REG_OKAY) {
+ /*
+ * Satisfaction.
+ */
+
+ MDEBUG(("successful\n"));
+ freedfa(d);
+ freedfa(d2);
+ return REG_OKAY;
+ }
+ }
+ if ((er != REG_OKAY) && (er != REG_NOMATCH)) {
+ freedfa(d);
+ freedfa(d2);
+ return er;
+ }
}
/*
@@ -935,15 +952,6 @@ ccondissect(
zapmem(v, t->left);
zapmem(v, t->right);
}
-
- /*
- * Satisfaction.
- */
-
- MDEBUG(("successful\n"));
- freedfa(d);
- freedfa(d2);
- return REG_OKAY;
}
/*
@@ -1011,15 +1019,26 @@ crevdissect(
* Try this midpoint on for size.
*/
- er = cdissect(v, t->left, begin, mid);
- if ((er == REG_OKAY) && (longest(v, d2, mid, end, NULL) == end)
- && (er = cdissect(v, t->right, mid, end)) == REG_OKAY) {
- break; /* NOTE BREAK OUT */
- }
- if (er != REG_OKAY && er != REG_NOMATCH) {
- freedfa(d);
- freedfa(d2);
- return er;
+ if (longest(v, d2, mid, end, NULL) == end) {
+ er = cdissect(v, t->left, begin, mid);
+ if (er == REG_OKAY) {
+ er = cdissect(v, t->right, mid, end);
+ if (er == REG_OKAY) {
+ /*
+ * Satisfaction.
+ */
+
+ MDEBUG(("successful\n"));
+ freedfa(d);
+ freedfa(d2);
+ return REG_OKAY;
+ }
+ }
+ if (er != REG_OKAY && er != REG_NOMATCH) {
+ freedfa(d);
+ freedfa(d2);
+ return er;
+ }
}
/*
@@ -1052,15 +1071,6 @@ crevdissect(
zapmem(v, t->left);
zapmem(v, t->right);
}
-
- /*
- * Satisfaction.
- */
-
- MDEBUG(("successful\n"));
- freedfa(d);
- freedfa(d2);
- return REG_OKAY;
}
/*