diff options
Diffstat (limited to 'generic')
-rw-r--r-- | generic/regc_nfa.c | 351 |
1 files changed, 177 insertions, 174 deletions
diff --git a/generic/regc_nfa.c b/generic/regc_nfa.c index b125062..107e466 100644 --- a/generic/regc_nfa.c +++ b/generic/regc_nfa.c @@ -474,7 +474,7 @@ struct state *s; return 1; return 0; } - + /* - nonemptyouts - count non-EMPTY out arcs of a state ^ static int nonemptyouts(struct state *); @@ -483,16 +483,15 @@ static int nonemptyouts(s) struct state *s; { - int n = 0; - struct arc *a; - - for (a = s->outs; a != NULL; a = a->outchain) { - if (a->type != EMPTY) - n++; - } - return n; + int n = 0; + struct arc *a; + + for (a = s->outs; a != NULL; a = a->outchain) + if (a->type != EMPTY) + n++; + return n; } - + /* - nonemptyins - count non-EMPTY in arcs of a state ^ static int nonemptyins(struct state *); @@ -501,16 +500,15 @@ static int nonemptyins(s) struct state *s; { - int n = 0; - struct arc *a; - - for (a = s->ins; a != NULL; a = a->inchain) { - if (a->type != EMPTY) - n++; - } - return n; + int n = 0; + struct arc *a; + + for (a = s->ins; a != NULL; a = a->inchain) + if (a->type != EMPTY) + n++; + return n; } - + /* - findarc - find arc, if any, from given source with given type and color * If there is more than one such arc, the result is random. @@ -1195,108 +1193,114 @@ fixempties(nfa, f) struct nfa *nfa; FILE *f; /* for debug output; NULL none */ { - struct state *s; - struct state *s2; - struct state *nexts; - struct arc *a; - struct arc *nexta; - - /* - * First, get rid of any states whose sole out-arc is an EMPTY, since - * they're basically just aliases for their successor. The parsing - * algorithm creates enough of these that it's worth special-casing this. - */ - for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { - nexts = s->next; - if (s->flag || s->nouts != 1) - continue; - a = s->outs; - assert(a != NULL && a->outchain == NULL); - if (a->type != EMPTY) - continue; - if (s != a->to) - moveins(nfa, s, a->to); - dropstate(nfa, s); - } - - /* - * Similarly, get rid of any state with a single EMPTY in-arc, by folding - * it into its predecessor. - */ - for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { - nexts = s->next; - /* while we're at it, ensure tmp fields are clear for next step */ - assert(s->tmp = NULL); - if (s->flag || s->nins != 1) - continue; - a = s->ins; - assert(a != NULL && a->inchain == NULL); - if (a->type != EMPTY) - continue; - if (s != a->from) - moveouts(nfa, s, a->from); - dropstate(nfa, s); - } - - /* - * For each remaining NFA state, find all other states that are reachable - * from it by a chain of one or more EMPTY arcs. Then generate new arcs - * that eliminate the need for each such chain. - * - * If we just do this straightforwardly, the algorithm gets slow in - * complex graphs, because the same arcs get copied to all intermediate - * states of an EMPTY chain, and then uselessly pushed repeatedly to the - * chain's final state; we waste a lot of time in newarc's duplicate - * checking. To improve matters, we decree that any state with only EMPTY - * out-arcs is "doomed" and will not be part of the final NFA. That can be - * ensured by not adding any new out-arcs to such a state. Having ensured - * that, we need not update the state's in-arcs list either; all arcs that - * might have gotten pushed forward to it will just get pushed directly to - * successor states. This eliminates most of the useless duplicate arcs. - */ - for (s = nfa->states; s != NULL && !NISERR(); s = s->next) { - for (s2 = emptyreachable(s, s); s2 != s && !NISERR(); s2 = nexts) { - /* - * If s2 is doomed, we decide that (1) we will always push arcs - * forward to it, not pull them back to s; and (2) we can optimize - * away the push-forward, per comment above. So do nothing. - */ - if (s2->flag || hasnonemptyout(s2)) - replaceempty(nfa, s, s2); - - /* Reset the tmp fields as we walk back */ - nexts = s2->tmp; - s2->tmp = NULL; + struct state *s; + struct state *s2; + struct state *nexts; + struct arc *a; + struct arc *nexta; + + /* + * First, get rid of any states whose sole out-arc is an EMPTY, + * since they're basically just aliases for their successor. + * The parsing algorithm creates enough of these that it's worth + * special-casing this. + */ + for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { + nexts = s->next; + if (s->flag || s->nouts != 1) + continue; + a = s->outs; + assert(a != NULL && a->outchain == NULL); + if (a->type != EMPTY) + continue; + if (s != a->to) + moveins(nfa, s, a->to); + dropstate(nfa, s); } - s->tmp = NULL; - } - - /* - * Now remove all the EMPTY arcs, since we don't need them anymore. - */ - for (s = nfa->states; s != NULL && !NISERR(); s = s->next) { - for (a = s->outs; a != NULL; a = nexta) { - nexta = a->outchain; - if (a->type == EMPTY) - freearc(nfa, a); + + /* + * Similarly, get rid of any state with a single EMPTY in-arc, + * by folding it into its predecessor. + */ + for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { + nexts = s->next; + /* Ensure tmp fields are clear for next step */ + assert(s->tmp = NULL); + if (s->flag || s->nins != 1) + continue; + a = s->ins; + assert(a != NULL && a->inchain == NULL); + if (a->type != EMPTY) + continue; + if (s != a->from) + moveouts(nfa, s, a->from); + dropstate(nfa, s); + } + + /* + * For each remaining NFA state, find all other states that are + * reachable from it by a chain of one or more EMPTY arcs. Then + * generate new arcs that eliminate the need for each such chain. + * + * If we just do this straightforwardly, the algorithm gets slow + * in complex graphs, because the same arcs get copied to all + * intermediate states of an EMPTY chain, and then uselessly + * pushed repeatedly to the chain's final state; we waste a lot + * of time in newarc's duplicate checking. To improve matters, + * we decree that any state with only EMPTY out-arcs is "doomed" + * and will not be part of the final NFA. That can be ensured by + * not adding any new out-arcs to such a state. Having ensured + * that, we need not update the state's in-arcs list either; all + * arcs that might have gotten pushed forward to it will just get + * pushed directly to successor states. This eliminates most of + * the useless duplicate arcs. + */ + for (s = nfa->states; s != NULL && !NISERR(); s = s->next) { + for (s2 = emptyreachable(s, s); s2 != s && !NISERR(); + s2 = nexts) { + /* + * If s2 is doomed, we decide that (1) we will + * always push arcs forward to it, not pull them + * back to s; and (2) we can optimize away the + * push-forward, per comment above. + * So do nothing. + */ + if (s2->flag || hasnonemptyout(s2)) + replaceempty(nfa, s, s2); + + /* Reset the tmp fields as we walk back */ + nexts = s2->tmp; + s2->tmp = NULL; + } + s->tmp = NULL; + } + + /* + * Remove all the EMPTY arcs, since we don't need them anymore. + */ + for (s = nfa->states; s != NULL; s = s->next) + for (a = s->outs; a != NULL; a = nexta) { + nexta = a->outchain; + if (a->type == EMPTY) + freearc(nfa, a); + } + + /* + * And remove any states that have become useless. (This + * cleanup is not very thorough, and would be even less so if we + * tried to combine it with the previous step; but cleanup() + * will take care of anything we miss.) + */ + for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { + nexts = s->next; + if ((s->nins == 0 || s->nouts == 0) && !s->flag) + dropstate(nfa, s); } - } - - /* - * And remove any states that have become useless. (This cleanup is not - * very thorough, and would be even less so if we tried to combine it with - * the previous step; but cleanup() will take care of anything we miss.) - */ - for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { - nexts = s->next; - if ((s->nins == 0 || s->nouts == 0) && !s->flag) - dropstate(nfa, s); - } - - if (f != NULL && !NISERR()) - dumpnfa(nfa, f); + + if (f != NULL && !NISERR()) + dumpnfa(nfa, f); } - + /* - emptyreachable - recursively find all states reachable from s by EMPTY arcs * The return value is the last such state found. Its tmp field links back @@ -1312,17 +1316,16 @@ emptyreachable(s, lastfound) struct state *s; struct state *lastfound; { - struct arc *a; - - s->tmp = lastfound; - lastfound = s; - for (a = s->outs; a != NULL; a = a->outchain) { - if (a->type == EMPTY && a->to->tmp == NULL) - lastfound = emptyreachable(a->to, lastfound); - } - return lastfound; + struct arc *a; + + s->tmp = lastfound; + lastfound = s; + for (a = s->outs; a != NULL; a = a->outchain) + if (a->type == EMPTY && a->to->tmp == NULL) + lastfound = emptyreachable(a->to, lastfound); + return lastfound; } - + /* - replaceempty - replace an EMPTY arc chain with some non-empty arcs * The EMPTY arc(s) should be deleted later, but we can't do it here because @@ -1335,54 +1338,54 @@ struct nfa *nfa; struct state *from; struct state *to; { - int fromouts; - int toins; - - assert(from != to); - - /* - * Create replacement arcs that bypass the need for the EMPTY chain. We - * can do this either by pushing arcs forward (linking directly from - * "from"'s predecessors to "to") or by pulling them back (linking - * directly from "from" to "to"'s successors). In general, we choose - * whichever way creates greater fan-out or fan-in, so as to improve the - * odds of reducing the other state to zero in-arcs or out-arcs and - * thereby being able to delete it. However, if "from" is doomed (has no - * non-EMPTY out-arcs), we must keep it so, so always push forward in that - * case. - * - * The fan-out/fan-in comparison should count only non-EMPTY arcs. If - * "from" is doomed, we can skip counting "to"'s arcs, since we want to - * force taking the copynonemptyins path in that case. - */ - fromouts = nonemptyouts(from); - toins = (fromouts == 0) ? 1 : nonemptyins(to); - - if (fromouts > toins) { - copyouts(nfa, to, from, 0); - return; - } - if (fromouts < toins) { - copyins(nfa, from, to, 0); - return; - } - - /* - * fromouts == toins. Decide on secondary issue: copy fewest arcs. - * - * Doesn't seem to be worth the trouble to exclude empties from these - * comparisons; that takes extra time and doesn't seem to improve the - * resulting graph much. - */ - if (from->nins > to->nouts) { - copyouts(nfa, to, from, 0); - return; - } else { + int fromouts; + int toins; + + assert(from != to); + + /* + * Create replacement arcs that bypass the need for the EMPTY + * chain. We can do this either by pushing arcs forward + * (linking directly from predecessors of "from" to "to") or by + * pulling them back (linking directly from "from" to the + * successors of "to"). In general, we choose whichever way + * creates greater fan-out or fan-in, so as to improve the odds + * of reducing the other state to zero in-arcs or out-arcs and + * thereby being able to delete it. However, if "from" is + * doomed (has no non-EMPTY out-arcs), we must keep it so, so + * always push forward in that case. + * + * The fan-out/fan-in comparison should count only non-EMPTY + * arcs. If "from" is doomed, we can skip counting "to"'s arcs, + * since we want to force taking the copyins path in that case. + */ + fromouts = nonemptyouts(from); + toins = (fromouts == 0) ? 1 : nonemptyins(to); + + if (fromouts > toins) { + copyouts(nfa, to, from, 0); + return; + } + if (fromouts < toins) { + copyins(nfa, from, to, 0); + return; + } + + /* + * fromouts == toins. Secondary decision: copy fewest arcs. + * + * Doesn't seem to be worth the trouble to exclude empties from + * these comparisons; that takes extra time and doesn't seem to + * improve the resulting graph much. + */ + if (from->nins > to->nouts) { + copyouts(nfa, to, from, 0); + return; + } + copyins(nfa, from, to, 0); - return; - } } - + /* - cleanup - clean up NFA after optimizations ^ static VOID cleanup(struct nfa *); |