diff options
author | dgp <dgp@users.sourceforge.net> | 2013-02-15 14:30:44 (GMT) |
---|---|---|
committer | dgp <dgp@users.sourceforge.net> | 2013-02-15 14:30:44 (GMT) |
commit | b12908345e9ef71275b47409f0cec91af99c2530 (patch) | |
tree | cd68e19b8f1450a2d30e5b79eb5d2103a675c402 | |
parent | 00e736695a8c441fd7613f99a6a67fc101302477 (diff) | |
parent | 46df7f7566778f6e653d25f063fc6a57649e36a6 (diff) | |
download | tcl-b12908345e9ef71275b47409f0cec91af99c2530.zip tcl-b12908345e9ef71275b47409f0cec91af99c2530.tar.gz tcl-b12908345e9ef71275b47409f0cec91af99c2530.tar.bz2 |
3604074 Fix regexp optimization to stop hanging on the expression
((((((((a)*)*)*)*)*)*)*)* . Thanks to Bjørn Grathwohl for discovery.
-rw-r--r-- | ChangeLog | 6 | ||||
-rw-r--r-- | generic/regc_nfa.c | 80 | ||||
-rw-r--r-- | tests/regexp.test | 5 |
3 files changed, 60 insertions, 31 deletions
@@ -1,3 +1,9 @@ +2013-02-15 Don Porter <dgp@users.sourceforge.net> + + * generic/regc_nfa.c: [Bug 3604074] Fix regexp optimization to + * tests/regexp.test: stop hanging on the expression + ((((((((a)*)*)*)*)*)*)*)* . Thanks to Bjørn Grathwohl for discovery. + 2013-02-05 Don Porter <dgp@users.sourceforge.net> * win/tclWinFile.c: [Bug 3603434] Make sure TclpObjNormalizePath() diff --git a/generic/regc_nfa.c b/generic/regc_nfa.c index 48f56a9..459968a 100644 --- a/generic/regc_nfa.c +++ b/generic/regc_nfa.c @@ -1139,6 +1139,7 @@ FILE *f; /* for debug output; NULL none */ { struct state *s; struct state *nexts; + struct state *to; struct arc *a; struct arc *nexta; int progress; @@ -1146,14 +1147,41 @@ FILE *f; /* for debug output; NULL none */ /* find and eliminate empties until there are no more */ do { progress = 0; - for (s = nfa->states; s != NULL && !NISERR() - && s->no != FREESTATE; s = nexts) { + for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { nexts = s->next; - for (a = s->outs; a != NULL && !NISERR(); a = nexta) { + for (a = s->outs; a != NULL && !NISERR(); + a = a->outchain) + if (a->type == EMPTY) + /* Mark a for deletion; copy arcs + * to preserve graph connectivity + * after it is gone. */ + unempty(nfa, a); + + /* Now pass through and delete the marked arcs. + * Doing all the deletion after all the marking + * prevents arc copying from resurrecting deleted + * arcs which can cause failure to converge. + * [Tcl Bug 3604074] */ + for (a = s->outs; a != NULL; a = nexta) { nexta = a->outchain; - if (a->type == EMPTY && unempty(nfa, a)) + if (a->from == NULL) { progress = 1; - assert(nexta == NULL || s->no != FREESTATE); + to = a->to; + a->from = s; + freearc(nfa, a); + if (to->nins == 0) { + while ((a = to->outs)) + freearc(nfa, a); + if (nexts == to) + nexts = to->next; + freestate(nfa, to); + } + if (s->nouts == 0) { + while ((a = s->ins)) + freearc(nfa, a); + freestate(nfa, s); + } + } } } if (progress && f != NULL) @@ -1174,7 +1202,6 @@ struct arc *a; { struct state *from = a->from; struct state *to = a->to; - int usefrom; /* work on from, as opposed to to? */ assert(a->type == EMPTY); assert(from != nfa->pre && to != nfa->post); @@ -1184,33 +1211,26 @@ struct arc *a; return 1; } - /* decide which end to work on */ - usefrom = 1; /* default: attack from */ - if (from->nouts > to->nins) - usefrom = 0; - else if (from->nouts == to->nins) { - /* decide on secondary issue: move/copy fewest arcs */ - if (from->nins > to->nouts) - usefrom = 0; + /* Mark arc for deletion */ + a->from = NULL; + + if (from->nouts > to->nins) { + copyouts(nfa, to, from); + return 1; } - - freearc(nfa, a); - if (usefrom) { - if (from->nouts == 0) { - /* was the state's only outarc */ - moveins(nfa, from, to); - freestate(nfa, from); - } else - copyins(nfa, from, to); - } else { - if (to->nins == 0) { - /* was the state's only inarc */ - moveouts(nfa, to, from); - freestate(nfa, to); - } else - copyouts(nfa, to, from); + if (from->nouts < to->nins) { + copyins(nfa, from, to); + return 1; + } + + /* from->nouts == to->nins */ + /* decide on secondary issue: move/copy fewest arcs */ + if (from->nins > to->nouts) { + copyouts(nfa, to, from); + return 1; } + copyins(nfa, from, to); return 1; } diff --git a/tests/regexp.test b/tests/regexp.test index 70ee47e..a7b81b2 100644 --- a/tests/regexp.test +++ b/tests/regexp.test @@ -630,7 +630,10 @@ test regexp-21.13 {multiple matches handle newlines} { test regexp-22.1 {Bug 1810038} { regexp ($|^X)* {} } 1 - +test regexp-22.2 {Bug 3604074} { + # This will hang in interps where the bug is not fixed + regexp ((((((((a)*)*)*)*)*)*)*)* a +} 1 # cleanup ::tcltest::cleanupTests |