summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordgp <dgp@users.sourceforge.net>2013-02-15 15:32:30 (GMT)
committerdgp <dgp@users.sourceforge.net>2013-02-15 15:32:30 (GMT)
commit02a26d7bcd80d7c7f7a2733ebecdbfb89d1212c5 (patch)
tree722bb5b087351a5a88a8ddd7a8872c13aeb97257
parente1bfb4b3a06ef2a5199b7587116f35242d185e98 (diff)
parentd14317916c9328450a7afd6cfa2168a6c5a7224f (diff)
downloadtcl-02a26d7bcd80d7c7f7a2733ebecdbfb89d1212c5.zip
tcl-02a26d7bcd80d7c7f7a2733ebecdbfb89d1212c5.tar.gz
tcl-02a26d7bcd80d7c7f7a2733ebecdbfb89d1212c5.tar.bz2
3604074 Fix regexp optimization to stop hanging on the expression
((((((((a)*)*)*)*)*)*)*)* . Thanks to Bjørn Grathwohl for discovery.
-rw-r--r--ChangeLog6
-rw-r--r--generic/regc_nfa.c96
-rw-r--r--tests/regexp.test4
3 files changed, 67 insertions, 39 deletions
diff --git a/ChangeLog b/ChangeLog
index 97fcc85..5993378 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2013-02-15 Don Porter <dgp@users.sourceforge.net>
+
+ * generic/regc_nfa.c: [Bug 3604074] Fix regexp optimization to
+ * tests/regexp.test: stop hanging on the expression
+ ((((((((a)*)*)*)*)*)*)*)* . Thanks to Bjørn Grathwohl for discovery.
+
2013-02-14 Harald Oehlmann <oehhar@users.sf.net>
* library/msgcat/msgcat.tcl: [Bug 3604576]: Catch missing registry entry
diff --git a/generic/regc_nfa.c b/generic/regc_nfa.c
index 2c2397f..65147d4 100644
--- a/generic/regc_nfa.c
+++ b/generic/regc_nfa.c
@@ -1248,6 +1248,7 @@ fixempties(
{
struct state *s;
struct state *nexts;
+ struct state *to;
struct arc *a;
struct arc *nexta;
int progress;
@@ -1258,15 +1259,50 @@ fixempties(
do {
progress = 0;
- for (s = nfa->states; s != NULL && !NISERR()
- && s->no != FREESTATE; s = nexts) {
+ for (s = nfa->states; s != NULL && !NISERR(); s = nexts) {
nexts = s->next;
- for (a = s->outs; a != NULL && !NISERR(); a = nexta) {
+ for (a = s->outs; a != NULL && !NISERR(); a = a->outchain) {
+ if (a->type == EMPTY) {
+
+ /*
+ * Mark a for deletion; copy arcs to preserve graph
+ * connectivity after it is gone.
+ */
+
+ unempty(nfa, a);
+ }
+ }
+
+ /*
+ * Now pass through and delete the marked arcs. Doing all the
+ * deletion after all the marking prevents arc copying from
+ * resurrecting deleted arcs which can cause failure to converge.
+ * [Tcl Bug 3604074]
+ */
+
+ for (a = s->outs; a != NULL; a = nexta) {
nexta = a->outchain;
- if (a->type == EMPTY && unempty(nfa, a)) {
+ if (a->from == NULL) {
progress = 1;
+ to = a->to;
+ a->from = s;
+ freearc(nfa, a);
+ if (to->nins == 0) {
+ while ((a = to->outs)) {
+ freearc(nfa, a);
+ }
+ if (nexts == to) {
+ nexts = to->next;
+ }
+ freestate(nfa, to);
+ }
+ if (s->nouts == 0) {
+ while ((a = s->ins)) {
+ freearc(nfa, a);
+ }
+ freestate(nfa, s);
+ }
}
- assert(nexta == NULL || s->no != FREESTATE);
}
}
if (progress && f != NULL) {
@@ -1288,7 +1324,6 @@ unempty(
{
struct state *from = a->from;
struct state *to = a->to;
- int usefrom; /* work on from, as opposed to to? */
assert(a->type == EMPTY);
assert(from != nfa->pre && to != nfa->post);
@@ -1299,47 +1334,30 @@ unempty(
}
/*
- * Decide which end to work on.
+ * Mark arc for deletion.
*/
- usefrom = 1; /* default: attack from */
+ a->from = NULL;
+
if (from->nouts > to->nins) {
- usefrom = 0;
- } else if (from->nouts == to->nins) {
- /*
- * Decide on secondary issue: move/copy fewest arcs.
- */
-
- if (from->nins > to->nouts) {
- usefrom = 0;
- }
+ copyouts(nfa, to, from);
+ return 1;
+ }
+ if (from->nouts < to->nins) {
+ copyins(nfa, from, to);
+ return 1;
}
- freearc(nfa, a);
- if (usefrom) {
- if (from->nouts == 0) {
- /*
- * Was the state's only outarc.
- */
-
- moveins(nfa, from, to);
- freestate(nfa, from);
- } else {
- copyins(nfa, from, to);
- }
- } else {
- if (to->nins == 0) {
- /*
- * Was the state's only inarc.
- */
+ /*
+ * from->nouts == to->nins . decide on secondary issue: copy fewest arcs
+ */
- moveouts(nfa, to, from);
- freestate(nfa, to);
- } else {
- copyouts(nfa, to, from);
- }
+ if (from->nins > to->nouts) {
+ copyouts(nfa, to, from);
+ return 1;
}
+ copyins(nfa, from, to);
return 1;
}
diff --git a/tests/regexp.test b/tests/regexp.test
index 7cafd1b..21f5720 100644
--- a/tests/regexp.test
+++ b/tests/regexp.test
@@ -819,6 +819,10 @@ test regexp-22.1 {Bug 1810038} {
test regexp-22.2 {regexp compile and backrefs, Bug 1857126} {
regexp -- {([bc])\1} bb
} 1
+test regexp-22.3 {Bug 3604074} {
+ # This will hang in interps where the bug is not fixed
+ regexp ((((((((a)*)*)*)*)*)*)*)* a
+} 1
test regexp-23.1 {regexp -all and -line} {
set string ""