summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordgp <dgp@users.sourceforge.net>2013-02-15 14:30:44 (GMT)
committerdgp <dgp@users.sourceforge.net>2013-02-15 14:30:44 (GMT)
commitb12908345e9ef71275b47409f0cec91af99c2530 (patch)
treecd68e19b8f1450a2d30e5b79eb5d2103a675c402
parent00e736695a8c441fd7613f99a6a67fc101302477 (diff)
parent46df7f7566778f6e653d25f063fc6a57649e36a6 (diff)
downloadtcl-b12908345e9ef71275b47409f0cec91af99c2530.zip
tcl-b12908345e9ef71275b47409f0cec91af99c2530.tar.gz
tcl-b12908345e9ef71275b47409f0cec91af99c2530.tar.bz2
3604074 Fix regexp optimization to stop hanging on the expression
((((((((a)*)*)*)*)*)*)*)* . Thanks to Bjørn Grathwohl for discovery.
-rw-r--r--ChangeLog6
-rw-r--r--generic/regc_nfa.c80
-rw-r--r--tests/regexp.test5
3 files changed, 60 insertions, 31 deletions
diff --git a/ChangeLog b/ChangeLog
index 27c09e2..81d2616 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2013-02-15 Don Porter <dgp@users.sourceforge.net>
+
+ * generic/regc_nfa.c: [Bug 3604074] Fix regexp optimization to
+ * tests/regexp.test: stop hanging on the expression
+ ((((((((a)*)*)*)*)*)*)*)* . Thanks to Bjørn Grathwohl for discovery.
+
2013-02-05 Don Porter <dgp@users.sourceforge.net>
* win/tclWinFile.c: [Bug 3603434] Make sure TclpObjNormalizePath()
diff --git a/generic/regc_nfa.c b/generic/regc_nfa.c
index 48f56a9..459968a 100644
--- a/generic/regc_nfa.c
+++ b/generic/regc_nfa.c
@@ -1139,6 +1139,7 @@ FILE *f; /* for debug output; NULL none */
{
struct state *s;
struct state *nexts;
+ struct state *to;
struct arc *a;
struct arc *nexta;
int progress;
@@ -1146,14 +1147,41 @@ FILE *f; /* for debug output; NULL none */
/* find and eliminate empties until there are no more */
do {
progress = 0;
- for (s = nfa->states; s != NULL && !NISERR()
- && s->no != FREESTATE; s = nexts) {
+ for (s = nfa->states; s != NULL && !NISERR(); s = nexts) {
nexts = s->next;
- for (a = s->outs; a != NULL && !NISERR(); a = nexta) {
+ for (a = s->outs; a != NULL && !NISERR();
+ a = a->outchain)
+ if (a->type == EMPTY)
+ /* Mark a for deletion; copy arcs
+ * to preserve graph connectivity
+ * after it is gone. */
+ unempty(nfa, a);
+
+ /* Now pass through and delete the marked arcs.
+ * Doing all the deletion after all the marking
+ * prevents arc copying from resurrecting deleted
+ * arcs which can cause failure to converge.
+ * [Tcl Bug 3604074] */
+ for (a = s->outs; a != NULL; a = nexta) {
nexta = a->outchain;
- if (a->type == EMPTY && unempty(nfa, a))
+ if (a->from == NULL) {
progress = 1;
- assert(nexta == NULL || s->no != FREESTATE);
+ to = a->to;
+ a->from = s;
+ freearc(nfa, a);
+ if (to->nins == 0) {
+ while ((a = to->outs))
+ freearc(nfa, a);
+ if (nexts == to)
+ nexts = to->next;
+ freestate(nfa, to);
+ }
+ if (s->nouts == 0) {
+ while ((a = s->ins))
+ freearc(nfa, a);
+ freestate(nfa, s);
+ }
+ }
}
}
if (progress && f != NULL)
@@ -1174,7 +1202,6 @@ struct arc *a;
{
struct state *from = a->from;
struct state *to = a->to;
- int usefrom; /* work on from, as opposed to to? */
assert(a->type == EMPTY);
assert(from != nfa->pre && to != nfa->post);
@@ -1184,33 +1211,26 @@ struct arc *a;
return 1;
}
- /* decide which end to work on */
- usefrom = 1; /* default: attack from */
- if (from->nouts > to->nins)
- usefrom = 0;
- else if (from->nouts == to->nins) {
- /* decide on secondary issue: move/copy fewest arcs */
- if (from->nins > to->nouts)
- usefrom = 0;
+ /* Mark arc for deletion */
+ a->from = NULL;
+
+ if (from->nouts > to->nins) {
+ copyouts(nfa, to, from);
+ return 1;
}
-
- freearc(nfa, a);
- if (usefrom) {
- if (from->nouts == 0) {
- /* was the state's only outarc */
- moveins(nfa, from, to);
- freestate(nfa, from);
- } else
- copyins(nfa, from, to);
- } else {
- if (to->nins == 0) {
- /* was the state's only inarc */
- moveouts(nfa, to, from);
- freestate(nfa, to);
- } else
- copyouts(nfa, to, from);
+ if (from->nouts < to->nins) {
+ copyins(nfa, from, to);
+ return 1;
+ }
+
+ /* from->nouts == to->nins */
+ /* decide on secondary issue: move/copy fewest arcs */
+ if (from->nins > to->nouts) {
+ copyouts(nfa, to, from);
+ return 1;
}
+ copyins(nfa, from, to);
return 1;
}
diff --git a/tests/regexp.test b/tests/regexp.test
index 70ee47e..a7b81b2 100644
--- a/tests/regexp.test
+++ b/tests/regexp.test
@@ -630,7 +630,10 @@ test regexp-21.13 {multiple matches handle newlines} {
test regexp-22.1 {Bug 1810038} {
regexp ($|^X)* {}
} 1
-
+test regexp-22.2 {Bug 3604074} {
+ # This will hang in interps where the bug is not fixed
+ regexp ((((((((a)*)*)*)*)*)*)*)* a
+} 1
# cleanup
::tcltest::cleanupTests