diff options
-rw-r--r-- | ChangeLog | 6 | ||||
-rw-r--r-- | generic/regc_nfa.c | 96 | ||||
-rw-r--r-- | tests/regexp.test | 4 |
3 files changed, 67 insertions, 39 deletions
@@ -1,3 +1,9 @@ +2013-02-15 Don Porter <dgp@users.sourceforge.net> + + * generic/regc_nfa.c: [Bug 3604074] Fix regexp optimization to + * tests/regexp.test: stop hanging on the expression + ((((((((a)*)*)*)*)*)*)*)* . Thanks to Bjørn Grathwohl for discovery. + 2013-02-14 Harald Oehlmann <oehhar@users.sf.net> * library/msgcat/msgcat.tcl: [Bug 3604576]: Catch missing registry entry diff --git a/generic/regc_nfa.c b/generic/regc_nfa.c index 19dbe63..65ca7a7 100644 --- a/generic/regc_nfa.c +++ b/generic/regc_nfa.c @@ -1235,6 +1235,7 @@ fixempties( { struct state *s; struct state *nexts; + struct state *to; struct arc *a; struct arc *nexta; int progress; @@ -1245,15 +1246,50 @@ fixempties( do { progress = 0; - for (s = nfa->states; s != NULL && !NISERR() - && s->no != FREESTATE; s = nexts) { + for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { nexts = s->next; - for (a = s->outs; a != NULL && !NISERR(); a = nexta) { + for (a = s->outs; a != NULL && !NISERR(); a = a->outchain) { + if (a->type == EMPTY) { + + /* + * Mark a for deletion; copy arcs to preserve graph + * connectivity after it is gone. + */ + + unempty(nfa, a); + } + } + + /* + * Now pass through and delete the marked arcs. Doing all the + * deletion after all the marking prevents arc copying from + * resurrecting deleted arcs which can cause failure to converge. + * [Tcl Bug 3604074] + */ + + for (a = s->outs; a != NULL; a = nexta) { nexta = a->outchain; - if (a->type == EMPTY && unempty(nfa, a)) { + if (a->from == NULL) { progress = 1; + to = a->to; + a->from = s; + freearc(nfa, a); + if (to->nins == 0) { + while ((a = to->outs)) { + freearc(nfa, a); + } + if (nexts == to) { + nexts = to->next; + } + freestate(nfa, to); + } + if (s->nouts == 0) { + while ((a = s->ins)) { + freearc(nfa, a); + } + freestate(nfa, s); + } } - assert(nexta == NULL || s->no != FREESTATE); } } if (progress && f != NULL) { @@ -1275,7 +1311,6 @@ unempty( { struct state *from = a->from; struct state *to = a->to; - int usefrom; /* work on from, as opposed to to? */ assert(a->type == EMPTY); assert(from != nfa->pre && to != nfa->post); @@ -1286,47 +1321,30 @@ unempty( } /* - * Decide which end to work on. + * Mark arc for deletion. */ - usefrom = 1; /* default: attack from */ + a->from = NULL; + if (from->nouts > to->nins) { - usefrom = 0; - } else if (from->nouts == to->nins) { - /* - * Decide on secondary issue: move/copy fewest arcs. - */ - - if (from->nins > to->nouts) { - usefrom = 0; - } + copyouts(nfa, to, from); + return 1; + } + if (from->nouts < to->nins) { + copyins(nfa, from, to); + return 1; } - freearc(nfa, a); - if (usefrom) { - if (from->nouts == 0) { - /* - * Was the state's only outarc. - */ - - moveins(nfa, from, to); - freestate(nfa, from); - } else { - copyins(nfa, from, to); - } - } else { - if (to->nins == 0) { - /* - * Was the state's only inarc. - */ + /* + * from->nouts == to->nins . decide on secondary issue: copy fewest arcs + */ - moveouts(nfa, to, from); - freestate(nfa, to); - } else { - copyouts(nfa, to, from); - } + if (from->nins > to->nouts) { + copyouts(nfa, to, from); + return 1; } + copyins(nfa, from, to); return 1; } diff --git a/tests/regexp.test b/tests/regexp.test index b20349f..7366a8c 100644 --- a/tests/regexp.test +++ b/tests/regexp.test @@ -671,6 +671,10 @@ test regexp-22.1 {Bug 1810038} { test regexp-22.2 {regexp compile and backrefs, Bug 1857126} { regexp -- {([bc])\1} bb } 1 +test regexp-22.3 {Bug 3604074} { + # This will hang in interps where the bug is not fixed + regexp ((((((((a)*)*)*)*)*)*)*)* a +} 1 test regexp-23.1 {regexp -all and -line} { set string "" |