From fe7e82dfed8be6f8a8c99cfd68d8e1119faee568 Mon Sep 17 00:00:00 2001 From: dgp Date: Tue, 5 Mar 2013 14:04:17 +0000 Subject: Contributed regexp engine patch from Tom Lane. Backports clean from trunk. --- generic/regc_nfa.c | 309 +++++++++++++++++++++++++++++++++++++++-------------- generic/regcomp.c | 7 +- 2 files changed, 235 insertions(+), 81 deletions(-) diff --git a/generic/regc_nfa.c b/generic/regc_nfa.c index 65ca7a7..5dab7bc 100644 --- a/generic/regc_nfa.c +++ b/generic/regc_nfa.c @@ -497,6 +497,42 @@ freearc( } /* + - nonemptyouts - count non-EMPTY out arcs of a state + ^ static int nonemptyouts(struct state *); + */ +static int +nonemptyouts( + struct state *s) +{ + int n = 0; + struct arc *a; + + for (a = s->outs; a != NULL; a = a->outchain) { + if (a->type != EMPTY) + n++; + } + return n; +} + +/* + - nonemptyins - count non-EMPTY in arcs of a state + ^ static int nonemptyins(struct state *); + */ +static int +nonemptyins( + struct state *s) +{ + int n = 0; + struct arc *a; + + for (a = s->ins; a != NULL; a = a->inchain) { + if (a->type != EMPTY) + n++; + } + return n; +} + +/* - findarc - find arc, if any, from given source with given type and color * If there is more than one such arc, the result is random. ^ static struct arc *findarc(struct state *, int, pcolor); @@ -578,6 +614,26 @@ copyins( } /* + - copynonemptyins - as above, but ignore empty arcs + ^ static void copynonemptyins(struct nfa *, struct state *, struct state *); + */ +static void +copynonemptyins( + struct nfa *nfa, + struct state *oldState, + struct state *newState) +{ + struct arc *a; + + assert(oldState != newState); + + for (a=oldState->ins ; a!=NULL ; a=a->inchain) { + if (a->type != EMPTY) + cparc(nfa, a, a->from, newState); + } +} + +/* - moveouts - move all out arcs of a state to another state ^ static VOID moveouts(struct nfa *, struct state *, struct state *); */ @@ -617,6 +673,26 @@ copyouts( } /* + - copynonemptyouts - as above, but ignore empty arcs + ^ static void copynonemptyouts(struct nfa *, struct state *, struct state *); + */ +static void +copynonemptyouts( + struct nfa *nfa, + struct state *oldState, + struct state *newState) +{ + struct arc *a; + + assert(oldState != newState); + + for (a=oldState->outs ; a!=NULL ; a=a->outchain) { + if (a->type != EMPTY) + cparc(nfa, a, newState, a->to); + } +} + +/* - cloneouts - copy out arcs of a state to another state pair, modifying type ^ static VOID cloneouts(struct nfa *, struct state *, struct state *, ^ struct state *, int); @@ -1234,118 +1310,191 @@ fixempties( FILE *f) /* for debug output; NULL none */ { struct state *s; + struct state *s2; struct state *nexts; - struct state *to; struct arc *a; struct arc *nexta; - int progress; /* - * Find and eliminate empties until there are no more. + * First, get rid of any states whose sole out-arc is an EMPTY, since + * they're basically just aliases for their successor. The parsing + * algorithm creates enough of these that it's worth special-casing this. */ + for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { + nexts = s->next; + if (s->nouts == 1 && !s->flag) { + a = s->outs; + assert(a != NULL && a->outchain == NULL); + if (a->type == EMPTY) { + if (s != a->to) + moveins(nfa, s, a->to); + dropstate(nfa, s); + } + } + } - do { - progress = 0; - for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { - nexts = s->next; - for (a = s->outs; a != NULL && !NISERR(); a = a->outchain) { - if (a->type == EMPTY) { - - /* - * Mark a for deletion; copy arcs to preserve graph - * connectivity after it is gone. - */ - - unempty(nfa, a); - } + /* + * Similarly, get rid of any state with a single EMPTY in-arc, by folding + * it into its predecessor. + */ + for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { + nexts = s->next; + /* while we're at it, ensure tmp fields are clear for next step */ + s->tmp = NULL; + if (s->nins == 1 && !s->flag) { + a = s->ins; + assert(a != NULL && a->inchain == NULL); + if (a->type == EMPTY) { + if (s != a->from) + moveouts(nfa, s, a->from); + dropstate(nfa, s); } + } + } + /* + * For each remaining NFA state, find all other states that are reachable + * from it by a chain of one or more EMPTY arcs. Then generate new arcs + * that eliminate the need for each such chain. + * + * If we just do this straightforwardly, the algorithm gets slow in + * complex graphs, because the same arcs get copied to all intermediate + * states of an EMPTY chain, and then uselessly pushed repeatedly to the + * chain's final state; we waste a lot of time in newarc's duplicate + * checking. To improve matters, we decree that any state with only EMPTY + * out-arcs is "doomed" and will not be part of the final NFA. That can be + * ensured by not adding any new out-arcs to such a state. Having ensured + * that, we need not update the state's in-arcs list either; all arcs that + * might have gotten pushed forward to it will just get pushed directly to + * successor states. This eliminates most of the useless duplicate arcs. + */ + for (s = nfa->states; s != NULL && !NISERR(); s = s->next) { + for (s2 = emptyreachable(s, s); s2 != s && !NISERR(); s2 = nexts) { /* - * Now pass through and delete the marked arcs. Doing all the - * deletion after all the marking prevents arc copying from - * resurrecting deleted arcs which can cause failure to converge. - * [Tcl Bug 3604074] + * If s2 is doomed, we decide that (1) we will always push arcs + * forward to it, not pull them back to s; and (2) we can optimize + * away the push-forward, per comment above. So do nothing. */ + if (s2->flag || nonemptyouts(s2) > 0) + replaceempty(nfa, s, s2); - for (a = s->outs; a != NULL; a = nexta) { - nexta = a->outchain; - if (a->from == NULL) { - progress = 1; - to = a->to; - a->from = s; - freearc(nfa, a); - if (to->nins == 0) { - while ((a = to->outs)) { - freearc(nfa, a); - } - if (nexts == to) { - nexts = to->next; - } - freestate(nfa, to); - } - if (s->nouts == 0) { - while ((a = s->ins)) { - freearc(nfa, a); - } - freestate(nfa, s); - } - } - } + /* Reset the tmp fields as we walk back */ + nexts = s2->tmp; + s2->tmp = NULL; } - if (progress && f != NULL) { - dumpnfa(nfa, f); + s->tmp = NULL; + } + + /* + * Now remove all the EMPTY arcs, since we don't need them anymore. + */ + for (s = nfa->states; s != NULL && !NISERR(); s = s->next) { + for (a = s->outs; a != NULL; a = nexta) { + nexta = a->outchain; + if (a->type == EMPTY) + freearc(nfa, a); } - } while (progress && !NISERR()); + } + + /* + * And remove any states that have become useless. (This cleanup is not + * very thorough, and would be even less so if we tried to combine it with + * the previous step; but cleanup() will take care of anything we miss.) + */ + for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { + nexts = s->next; + if ((s->nins == 0 || s->nouts == 0) && !s->flag) + dropstate(nfa, s); + } + + if (f != NULL && !NISERR()) + dumpnfa(nfa, f); } /* - - unempty - optimize out an EMPTY arc, if possible - * Actually, as it stands this function always succeeds, but the return value - * is kept with an eye on possible future changes. - ^ static int unempty(struct nfa *, struct arc *); + - emptyreachable - recursively find all states reachable from s by EMPTY arcs + * The return value is the last such state found. Its tmp field links back + * to the next-to-last such state, and so on back to s, so that all these + * states can be located without searching the whole NFA. + * The maximum recursion depth here is equal to the length of the longest + * loop-free chain of EMPTY arcs, which is surely no more than the size of + * the NFA, and in practice will be a lot less than that. + ^ static struct state *emptyreachable(struct state *, struct state *); */ -static int /* 0 couldn't, 1 could */ -unempty( - struct nfa *nfa, - struct arc *a) +static struct state * +emptyreachable( + struct state *s, + struct state *lastfound) { - struct state *from = a->from; - struct state *to = a->to; - - assert(a->type == EMPTY); - assert(from != nfa->pre && to != nfa->post); + struct arc *a; - if (from == to) { /* vacuous loop */ - freearc(nfa, a); - return 1; + s->tmp = lastfound; + lastfound = s; + for (a = s->outs; a != NULL; a = a->outchain) { + if (a->type == EMPTY && a->to->tmp == NULL) + lastfound = emptyreachable(a->to, lastfound); } + return lastfound; +} + +/* + - replaceempty - replace an EMPTY arc chain with some non-empty arcs + * The EMPTY arc(s) should be deleted later, but we can't do it here because + * they may still be needed to identify other arc chains during fixempties(). + ^ static void replaceempty(struct nfa *, struct state *, struct state *); + */ +static void +replaceempty( + struct nfa *nfa, + struct state *from, + struct state *to) +{ + int fromouts; + int toins; + + assert(from != to); /* - * Mark arc for deletion. + * Create replacement arcs that bypass the need for the EMPTY chain. We + * can do this either by pushing arcs forward (linking directly from + * "from"'s predecessors to "to") or by pulling them back (linking + * directly from "from" to "to"'s successors). In general, we choose + * whichever way creates greater fan-out or fan-in, so as to improve the + * odds of reducing the other state to zero in-arcs or out-arcs and + * thereby being able to delete it. However, if "from" is doomed (has no + * non-EMPTY out-arcs), we must keep it so, so always push forward in that + * case. + * + * The fan-out/fan-in comparison should count only non-EMPTY arcs. If + * "from" is doomed, we can skip counting "to"'s arcs, since we want to + * force taking the copynonemptyins path in that case. */ + fromouts = nonemptyouts(from); + toins = (fromouts == 0) ? 1 : nonemptyins(to); - a->from = NULL; - - if (from->nouts > to->nins) { - copyouts(nfa, to, from); - return 1; + if (fromouts > toins) { + copynonemptyouts(nfa, to, from); + return; } - if (from->nouts < to->nins) { - copyins(nfa, from, to); - return 1; + if (fromouts < toins) { + copynonemptyins(nfa, from, to); + return; } /* - * from->nouts == to->nins . decide on secondary issue: copy fewest arcs + * fromouts == toins. Decide on secondary issue: copy fewest arcs. + * + * Doesn't seem to be worth the trouble to exclude empties from these + * comparisons; that takes extra time and doesn't seem to improve the + * resulting graph much. */ - if (from->nins > to->nouts) { - copyouts(nfa, to, from); - return 1; + copynonemptyouts(nfa, to, from); + return; + } else { + copynonemptyins(nfa, from, to); + return; } - - copyins(nfa, from, to); - return 1; } /* diff --git a/generic/regcomp.c b/generic/regcomp.c index ca4fc01..7116d82 100644 --- a/generic/regcomp.c +++ b/generic/regcomp.c @@ -121,12 +121,16 @@ static void destroystate(struct nfa *, struct state *); static void newarc(struct nfa *, int, pcolor, struct state *, struct state *); static struct arc *allocarc(struct nfa *, struct state *); static void freearc(struct nfa *, struct arc *); +static int nonemptyouts(struct state *); +static int nonemptyins(struct state *); static struct arc *findarc(struct state *, int, pcolor); static void cparc(struct nfa *, struct arc *, struct state *, struct state *); static void moveins(struct nfa *, struct state *, struct state *); static void copyins(struct nfa *, struct state *, struct state *); +static void copynonemptyins(struct nfa *, struct state *, struct state *); static void moveouts(struct nfa *, struct state *, struct state *); static void copyouts(struct nfa *, struct state *, struct state *); +static void copynonemptyouts(struct nfa *, struct state *, struct state *); static void cloneouts(struct nfa *, struct state *, struct state *, struct state *, int); static void delsub(struct nfa *, struct state *, struct state *); static void deltraverse(struct nfa *, struct state *, struct state *); @@ -144,7 +148,8 @@ static int push(struct nfa *, struct arc *); #define COMPATIBLE 3 /* compatible but not satisfied yet */ static int combine(struct arc *, struct arc *); static void fixempties(struct nfa *, FILE *); -static int unempty(struct nfa *, struct arc *); +static struct state *emptyreachable(struct state *, struct state *); +static void replaceempty(struct nfa *, struct state *, struct state *); static void cleanup(struct nfa *); static void markreachable(struct nfa *, struct state *, struct state *, struct state *); static void markcanreach(struct nfa *, struct state *, struct state *, struct state *); -- cgit v0.12 From ec527807539d4bdc9433636ae0c055558d85222c Mon Sep 17 00:00:00 2001 From: dgp Date: Tue, 5 Mar 2013 14:38:10 +0000 Subject: Contributed patch from Tom Lane . Merge conflicts due to different coding style and lingering obsolete compiler support resolved. --- generic/regc_nfa.c | 349 +++++++++++++++++++++++++++++++++++++++-------------- generic/regcomp.c | 7 +- 2 files changed, 264 insertions(+), 92 deletions(-) diff --git a/generic/regc_nfa.c b/generic/regc_nfa.c index 459968a..11fd49b 100644 --- a/generic/regc_nfa.c +++ b/generic/regc_nfa.c @@ -460,6 +460,42 @@ struct arc *victim; } /* + - nonemptyouts - count non-EMPTY out arcs of a state + ^ static int nonemptyouts(struct state *); + */ +static int +nonemptyouts(s) +struct state *s; +{ + int n = 0; + struct arc *a; + + for (a = s->outs; a != NULL; a = a->outchain) { + if (a->type != EMPTY) + n++; + } + return n; +} + +/* + - nonemptyins - count non-EMPTY in arcs of a state + ^ static int nonemptyins(struct state *); + */ +static int +nonemptyins(s) +struct state *s; +{ + int n = 0; + struct arc *a; + + for (a = s->ins; a != NULL; a = a->inchain) { + if (a->type != EMPTY) + n++; + } + return n; +} + +/* - findarc - find arc, if any, from given source with given type and color * If there is more than one such arc, the result is random. ^ static struct arc *findarc(struct state *, int, pcolor); @@ -538,6 +574,26 @@ struct state *new; } /* + - copynonemptyins - as above, but ignore empty arcs + ^ static void copynonemptyins(struct nfa *, struct state *, struct state *); + */ +static VOID +copynonemptyins(nfa, oldState, newState) +struct nfa *nfa; +struct state *oldState; +struct state *newState; +{ + struct arc *a; + + assert(oldState != newState); + + for (a=oldState->ins ; a!=NULL ; a=a->inchain) { + if (a->type != EMPTY) + cparc(nfa, a, a->from, newState); + } +} + +/* - moveouts - move all out arcs of a state to another state ^ static VOID moveouts(struct nfa *, struct state *, struct state *); */ @@ -574,6 +630,26 @@ struct state *new; for (a = old->outs; a != NULL; a = a->outchain) cparc(nfa, a, new, a->to); } + +/* + - copynonemptyouts - as above, but ignore empty arcs + ^ static void copynonemptyouts(struct nfa *, struct state *, struct state *); + */ +static VOID +copynonemptyouts(nfa, oldState, newState) +struct nfa *nfa; +struct state *oldState; +struct state *newState; +{ + struct arc *a; + + assert(oldState != newState); + + for (a=oldState->outs ; a!=NULL ; a=a->outchain) { + if (a->type != EMPTY) + cparc(nfa, a, newState, a->to); + } +} /* - cloneouts - copy out arcs of a state to another state pair, modifying type @@ -1137,103 +1213,194 @@ fixempties(nfa, f) struct nfa *nfa; FILE *f; /* for debug output; NULL none */ { - struct state *s; - struct state *nexts; - struct state *to; - struct arc *a; - struct arc *nexta; - int progress; - - /* find and eliminate empties until there are no more */ - do { - progress = 0; - for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { - nexts = s->next; - for (a = s->outs; a != NULL && !NISERR(); - a = a->outchain) - if (a->type == EMPTY) - /* Mark a for deletion; copy arcs - * to preserve graph connectivity - * after it is gone. */ - unempty(nfa, a); - - /* Now pass through and delete the marked arcs. - * Doing all the deletion after all the marking - * prevents arc copying from resurrecting deleted - * arcs which can cause failure to converge. - * [Tcl Bug 3604074] */ - for (a = s->outs; a != NULL; a = nexta) { - nexta = a->outchain; - if (a->from == NULL) { - progress = 1; - to = a->to; - a->from = s; - freearc(nfa, a); - if (to->nins == 0) { - while ((a = to->outs)) - freearc(nfa, a); - if (nexts == to) - nexts = to->next; - freestate(nfa, to); - } - if (s->nouts == 0) { - while ((a = s->ins)) - freearc(nfa, a); - freestate(nfa, s); - } - } - } - } - if (progress && f != NULL) - dumpnfa(nfa, f); - } while (progress && !NISERR()); + struct state *s; + struct state *s2; + struct state *nexts; + struct arc *a; + struct arc *nexta; + + /* + * First, get rid of any states whose sole out-arc is an EMPTY, since + * they're basically just aliases for their successor. The parsing + * algorithm creates enough of these that it's worth special-casing this. + */ + for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { + nexts = s->next; + if (s->nouts == 1 && !s->flag) { + a = s->outs; + assert(a != NULL && a->outchain == NULL); + if (a->type == EMPTY) { + if (s != a->to) + moveins(nfa, s, a->to); + dropstate(nfa, s); + } + } + } + + /* + * Similarly, get rid of any state with a single EMPTY in-arc, by folding + * it into its predecessor. + */ + for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { + nexts = s->next; + /* while we're at it, ensure tmp fields are clear for next step */ + s->tmp = NULL; + if (s->nins == 1 && !s->flag) { + a = s->ins; + assert(a != NULL && a->inchain == NULL); + if (a->type == EMPTY) { + if (s != a->from) + moveouts(nfa, s, a->from); + dropstate(nfa, s); + } + } + } + + /* + * For each remaining NFA state, find all other states that are reachable + * from it by a chain of one or more EMPTY arcs. Then generate new arcs + * that eliminate the need for each such chain. + * + * If we just do this straightforwardly, the algorithm gets slow in + * complex graphs, because the same arcs get copied to all intermediate + * states of an EMPTY chain, and then uselessly pushed repeatedly to the + * chain's final state; we waste a lot of time in newarc's duplicate + * checking. To improve matters, we decree that any state with only EMPTY + * out-arcs is "doomed" and will not be part of the final NFA. That can be + * ensured by not adding any new out-arcs to such a state. Having ensured + * that, we need not update the state's in-arcs list either; all arcs that + * might have gotten pushed forward to it will just get pushed directly to + * successor states. This eliminates most of the useless duplicate arcs. + */ + for (s = nfa->states; s != NULL && !NISERR(); s = s->next) { + for (s2 = emptyreachable(s, s); s2 != s && !NISERR(); s2 = nexts) { + /* + * If s2 is doomed, we decide that (1) we will always push arcs + * forward to it, not pull them back to s; and (2) we can optimize + * away the push-forward, per comment above. So do nothing. + */ + if (s2->flag || nonemptyouts(s2) > 0) + replaceempty(nfa, s, s2); + + /* Reset the tmp fields as we walk back */ + nexts = s2->tmp; + s2->tmp = NULL; + } + s->tmp = NULL; + } + + /* + * Now remove all the EMPTY arcs, since we don't need them anymore. + */ + for (s = nfa->states; s != NULL && !NISERR(); s = s->next) { + for (a = s->outs; a != NULL; a = nexta) { + nexta = a->outchain; + if (a->type == EMPTY) + freearc(nfa, a); + } + } + + /* + * And remove any states that have become useless. (This cleanup is not + * very thorough, and would be even less so if we tried to combine it with + * the previous step; but cleanup() will take care of anything we miss.) + */ + for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { + nexts = s->next; + if ((s->nins == 0 || s->nouts == 0) && !s->flag) + dropstate(nfa, s); + } + + if (f != NULL && !NISERR()) + dumpnfa(nfa, f); } - + /* - - unempty - optimize out an EMPTY arc, if possible - * Actually, as it stands this function always succeeds, but the return - * value is kept with an eye on possible future changes. - ^ static int unempty(struct nfa *, struct arc *); + - emptyreachable - recursively find all states reachable from s by EMPTY arcs + * The return value is the last such state found. Its tmp field links back + * to the next-to-last such state, and so on back to s, so that all these + * states can be located without searching the whole NFA. + * The maximum recursion depth here is equal to the length of the longest + * loop-free chain of EMPTY arcs, which is surely no more than the size of + * the NFA, and in practice will be a lot less than that. + ^ static struct state *emptyreachable(struct state *, struct state *); */ -static int /* 0 couldn't, 1 could */ -unempty(nfa, a) +static struct state * +emptyreachable(s, lastfound) +struct state *s; +struct state *lastfound; +{ + struct arc *a; + + s->tmp = lastfound; + lastfound = s; + for (a = s->outs; a != NULL; a = a->outchain) { + if (a->type == EMPTY && a->to->tmp == NULL) + lastfound = emptyreachable(a->to, lastfound); + } + return lastfound; +} + +/* + - replaceempty - replace an EMPTY arc chain with some non-empty arcs + * The EMPTY arc(s) should be deleted later, but we can't do it here because + * they may still be needed to identify other arc chains during fixempties(). + ^ static void replaceempty(struct nfa *, struct state *, struct state *); + */ +static VOID +replaceempty(nfa, from, to) struct nfa *nfa; -struct arc *a; +struct state *from; +struct state *to; { - struct state *from = a->from; - struct state *to = a->to; - - assert(a->type == EMPTY); - assert(from != nfa->pre && to != nfa->post); - - if (from == to) { /* vacuous loop */ - freearc(nfa, a); - return 1; - } - - /* Mark arc for deletion */ - a->from = NULL; - - if (from->nouts > to->nins) { - copyouts(nfa, to, from); - return 1; - } - if (from->nouts < to->nins) { - copyins(nfa, from, to); - return 1; - } - - /* from->nouts == to->nins */ - /* decide on secondary issue: move/copy fewest arcs */ - if (from->nins > to->nouts) { - copyouts(nfa, to, from); - return 1; - } - - copyins(nfa, from, to); - return 1; + int fromouts; + int toins; + + assert(from != to); + + /* + * Create replacement arcs that bypass the need for the EMPTY chain. We + * can do this either by pushing arcs forward (linking directly from + * "from"'s predecessors to "to") or by pulling them back (linking + * directly from "from" to "to"'s successors). In general, we choose + * whichever way creates greater fan-out or fan-in, so as to improve the + * odds of reducing the other state to zero in-arcs or out-arcs and + * thereby being able to delete it. However, if "from" is doomed (has no + * non-EMPTY out-arcs), we must keep it so, so always push forward in that + * case. + * + * The fan-out/fan-in comparison should count only non-EMPTY arcs. If + * "from" is doomed, we can skip counting "to"'s arcs, since we want to + * force taking the copynonemptyins path in that case. + */ + fromouts = nonemptyouts(from); + toins = (fromouts == 0) ? 1 : nonemptyins(to); + + if (fromouts > toins) { + copynonemptyouts(nfa, to, from); + return; + } + if (fromouts < toins) { + copynonemptyins(nfa, from, to); + return; + } + + /* + * fromouts == toins. Decide on secondary issue: copy fewest arcs. + * + * Doesn't seem to be worth the trouble to exclude empties from these + * comparisons; that takes extra time and doesn't seem to improve the + * resulting graph much. + */ + if (from->nins > to->nouts) { + copynonemptyouts(nfa, to, from); + return; + } else { + copynonemptyins(nfa, from, to); + return; + } } - + /* - cleanup - clean up NFA after optimizations ^ static VOID cleanup(struct nfa *); diff --git a/generic/regcomp.c b/generic/regcomp.c index 307877a..6fd9c81 100644 --- a/generic/regcomp.c +++ b/generic/regcomp.c @@ -123,12 +123,16 @@ static VOID destroystate _ANSI_ARGS_((struct nfa *, struct state *)); static VOID newarc _ANSI_ARGS_((struct nfa *, int, pcolor, struct state *, struct state *)); static struct arc *allocarc _ANSI_ARGS_((struct nfa *, struct state *)); static VOID freearc _ANSI_ARGS_((struct nfa *, struct arc *)); +static int nonemptyouts _ANSI_ARGS_((struct state *)); +static int nonemptyins _ANSI_ARGS_((struct state *)); static struct arc *findarc _ANSI_ARGS_((struct state *, int, pcolor)); static VOID cparc _ANSI_ARGS_((struct nfa *, struct arc *, struct state *, struct state *)); static VOID moveins _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); static VOID copyins _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); +static VOID copynonemptyins _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); static VOID moveouts _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); static VOID copyouts _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); +static VOID copynonemptyouts _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); static VOID cloneouts _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *, int)); static VOID delsub _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); static VOID deltraverse _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); @@ -146,7 +150,8 @@ static int push _ANSI_ARGS_((struct nfa *, struct arc *)); #define COMPATIBLE 3 /* compatible but not satisfied yet */ static int combine _ANSI_ARGS_((struct arc *, struct arc *)); static VOID fixempties _ANSI_ARGS_((struct nfa *, FILE *)); -static int unempty _ANSI_ARGS_((struct nfa *, struct arc *)); +static struct state *emptyreachable _ANSI_ARGS_((struct state *, struct state *)); +static VOID replaceempty _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); static VOID cleanup _ANSI_ARGS_((struct nfa *)); static VOID markreachable _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *)); static VOID markcanreach _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *)); -- cgit v0.12 From 92e5e02234ad7151efec211f580998994de8d392 Mon Sep 17 00:00:00 2001 From: dgp Date: Wed, 6 Mar 2013 16:07:14 +0000 Subject: New routine hasnonemptyout() for minor improvement to new fixempties(). --- generic/regc_nfa.c | 18 +++++++++++++++++- generic/regcomp.c | 1 + 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/generic/regc_nfa.c b/generic/regc_nfa.c index 11fd49b..852a676 100644 --- a/generic/regc_nfa.c +++ b/generic/regc_nfa.c @@ -460,6 +460,22 @@ struct arc *victim; } /* + - hasnonemptyout - Does state have a non-EMPTY out arc? + ^ static int hasnonemptyout(struct state *); + */ +static int +hasnonemptyout(s) +struct state *s; +{ + struct arc *a; + + for (a = s->outs; a != NULL; a = a->outchain) + if (a->type != EMPTY) + return 1; + return 0; +} + +/* - nonemptyouts - count non-EMPTY out arcs of a state ^ static int nonemptyouts(struct state *); */ @@ -1279,7 +1295,7 @@ FILE *f; /* for debug output; NULL none */ * forward to it, not pull them back to s; and (2) we can optimize * away the push-forward, per comment above. So do nothing. */ - if (s2->flag || nonemptyouts(s2) > 0) + if (s2->flag || hasnonemptyout(s2)) replaceempty(nfa, s, s2); /* Reset the tmp fields as we walk back */ diff --git a/generic/regcomp.c b/generic/regcomp.c index 6fd9c81..083e9b1 100644 --- a/generic/regcomp.c +++ b/generic/regcomp.c @@ -123,6 +123,7 @@ static VOID destroystate _ANSI_ARGS_((struct nfa *, struct state *)); static VOID newarc _ANSI_ARGS_((struct nfa *, int, pcolor, struct state *, struct state *)); static struct arc *allocarc _ANSI_ARGS_((struct nfa *, struct state *)); static VOID freearc _ANSI_ARGS_((struct nfa *, struct arc *)); +static int hasnonemptyout _ANSI_ARGS_((struct state *)); static int nonemptyouts _ANSI_ARGS_((struct state *)); static int nonemptyins _ANSI_ARGS_((struct state *)); static struct arc *findarc _ANSI_ARGS_((struct state *, int, pcolor)); -- cgit v0.12 From 596e86d26f839d7ffb40012c2c511e33b6de0b12 Mon Sep 17 00:00:00 2001 From: dgp Date: Wed, 6 Mar 2013 16:26:18 +0000 Subject: New routine hasnonemptyout() for minor improvement to new fixempties(). --- generic/regc_nfa.c | 18 +++++++++++++++++- generic/regcomp.c | 1 + 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/generic/regc_nfa.c b/generic/regc_nfa.c index 5dab7bc..f072985 100644 --- a/generic/regc_nfa.c +++ b/generic/regc_nfa.c @@ -497,6 +497,22 @@ freearc( } /* + - hasnonemptyout - Does state have a non-EMPTY out arc? + ^ static int hasnonemptyout(struct state *); + */ +static int +hasnonemptyout(s) +struct state *s; +{ + struct arc *a; + + for (a = s->outs; a != NULL; a = a->outchain) + if (a->type != EMPTY) + return 1; + return 0; +} + +/* - nonemptyouts - count non-EMPTY out arcs of a state ^ static int nonemptyouts(struct state *); */ @@ -1375,7 +1391,7 @@ fixempties( * forward to it, not pull them back to s; and (2) we can optimize * away the push-forward, per comment above. So do nothing. */ - if (s2->flag || nonemptyouts(s2) > 0) + if (s2->flag || hasnonemptyout(s2)) replaceempty(nfa, s, s2); /* Reset the tmp fields as we walk back */ diff --git a/generic/regcomp.c b/generic/regcomp.c index 7116d82..68bfb30 100644 --- a/generic/regcomp.c +++ b/generic/regcomp.c @@ -121,6 +121,7 @@ static void destroystate(struct nfa *, struct state *); static void newarc(struct nfa *, int, pcolor, struct state *, struct state *); static struct arc *allocarc(struct nfa *, struct state *); static void freearc(struct nfa *, struct arc *); +static int hasnonemptyout(struct state *); static int nonemptyouts(struct state *); static int nonemptyins(struct state *); static struct arc *findarc(struct state *, int, pcolor); -- cgit v0.12 From 37745421ae8d82481007aca3632c76c589a1ff63 Mon Sep 17 00:00:00 2001 From: dgp Date: Wed, 6 Mar 2013 16:57:41 +0000 Subject: Use flag argument to combine copy(nonempty)* routines into copy* routines. --- generic/regc_nfa.c | 74 +++++++++++++++--------------------------------------- generic/regcomp.c | 8 +++--- 2 files changed, 23 insertions(+), 59 deletions(-) diff --git a/generic/regc_nfa.c b/generic/regc_nfa.c index 852a676..6e32cc9 100644 --- a/generic/regc_nfa.c +++ b/generic/regc_nfa.c @@ -572,44 +572,27 @@ struct state *new; } /* - - copyins - copy all in arcs of a state to another state - ^ static VOID copyins(struct nfa *, struct state *, struct state *); + - copyins - copy in arcs of a state to another state + * Either all arcs, or only non-empty ones as determined by all value. + ^ static VOID copyins(struct nfa *, struct state *, struct state *, int); */ static VOID -copyins(nfa, old, new) +copyins(nfa, old, new, all) struct nfa *nfa; struct state *old; struct state *new; +int all; { struct arc *a; assert(old != new); for (a = old->ins; a != NULL; a = a->inchain) - cparc(nfa, a, a->from, new); + if (all || a->type != EMPTY) + cparc(nfa, a, a->from, new); } /* - - copynonemptyins - as above, but ignore empty arcs - ^ static void copynonemptyins(struct nfa *, struct state *, struct state *); - */ -static VOID -copynonemptyins(nfa, oldState, newState) -struct nfa *nfa; -struct state *oldState; -struct state *newState; -{ - struct arc *a; - - assert(oldState != newState); - - for (a=oldState->ins ; a!=NULL ; a=a->inchain) { - if (a->type != EMPTY) - cparc(nfa, a, a->from, newState); - } -} - -/* - moveouts - move all out arcs of a state to another state ^ static VOID moveouts(struct nfa *, struct state *, struct state *); */ @@ -630,41 +613,24 @@ struct state *new; } /* - - copyouts - copy all out arcs of a state to another state - ^ static VOID copyouts(struct nfa *, struct state *, struct state *); + - copyouts - copy out arcs of a state to another state + * Either all arcs, or only non-empty ones as determined by all value. + ^ static VOID copyouts(struct nfa *, struct state *, struct state *, int); */ static VOID -copyouts(nfa, old, new) +copyouts(nfa, old, new, all) struct nfa *nfa; struct state *old; struct state *new; +int all; { struct arc *a; assert(old != new); for (a = old->outs; a != NULL; a = a->outchain) - cparc(nfa, a, new, a->to); -} - -/* - - copynonemptyouts - as above, but ignore empty arcs - ^ static void copynonemptyouts(struct nfa *, struct state *, struct state *); - */ -static VOID -copynonemptyouts(nfa, oldState, newState) -struct nfa *nfa; -struct state *oldState; -struct state *newState; -{ - struct arc *a; - - assert(oldState != newState); - - for (a=oldState->outs ; a!=NULL ; a=a->outchain) { - if (a->type != EMPTY) - cparc(nfa, a, newState, a->to); - } + if (all || a->type != EMPTY) + cparc(nfa, a, new, a->to); } /* @@ -983,7 +949,7 @@ struct arc *con; if (NISERR()) return 0; assert(to != from); /* con is not an inarc */ - copyins(nfa, from, s); /* duplicate inarcs */ + copyins(nfa, from, s, 1); /* duplicate inarcs */ cparc(nfa, con, s, to); /* move constraint arc */ freearc(nfa, con); from = s; @@ -1123,7 +1089,7 @@ struct arc *con; s = newstate(nfa); if (NISERR()) return 0; - copyouts(nfa, to, s); /* duplicate outarcs */ + copyouts(nfa, to, s, 1); /* duplicate outarcs */ cparc(nfa, con, from, s); /* move constraint */ freearc(nfa, con); to = s; @@ -1393,11 +1359,11 @@ struct state *to; toins = (fromouts == 0) ? 1 : nonemptyins(to); if (fromouts > toins) { - copynonemptyouts(nfa, to, from); + copyouts(nfa, to, from, 0); return; } if (fromouts < toins) { - copynonemptyins(nfa, from, to); + copyins(nfa, from, to, 0); return; } @@ -1409,10 +1375,10 @@ struct state *to; * resulting graph much. */ if (from->nins > to->nouts) { - copynonemptyouts(nfa, to, from); + copyouts(nfa, to, from, 0); return; } else { - copynonemptyins(nfa, from, to); + copyins(nfa, from, to, 0); return; } } diff --git a/generic/regcomp.c b/generic/regcomp.c index 083e9b1..b44ef8f 100644 --- a/generic/regcomp.c +++ b/generic/regcomp.c @@ -129,11 +129,9 @@ static int nonemptyins _ANSI_ARGS_((struct state *)); static struct arc *findarc _ANSI_ARGS_((struct state *, int, pcolor)); static VOID cparc _ANSI_ARGS_((struct nfa *, struct arc *, struct state *, struct state *)); static VOID moveins _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); -static VOID copyins _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); -static VOID copynonemptyins _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); +static VOID copyins _ANSI_ARGS_((struct nfa *, struct state *, struct state *, int)); static VOID moveouts _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); -static VOID copyouts _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); -static VOID copynonemptyouts _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); +static VOID copyouts _ANSI_ARGS_((struct nfa *, struct state *, struct state *, int)); static VOID cloneouts _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *, int)); static VOID delsub _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); static VOID deltraverse _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); @@ -558,7 +556,7 @@ struct nfa *nfa; /* do the splits */ for (s = slist; s != NULL; s = s2) { s2 = newstate(nfa); - copyouts(nfa, s, s2); + copyouts(nfa, s, s2, 1); for (a = s->ins; a != NULL; a = b) { b = a->inchain; if (a->from != pre) { -- cgit v0.12 From 0ea582d8785e76223d06415791097fc3400c7bbf Mon Sep 17 00:00:00 2001 From: dgp Date: Wed, 6 Mar 2013 17:30:14 +0000 Subject: Use flag argument to combine copy(nonempty)* routines into copy* routines. --- generic/regc_nfa.c | 76 ++++++++++++++++-------------------------------------- generic/regcomp.c | 8 +++--- 2 files changed, 25 insertions(+), 59 deletions(-) diff --git a/generic/regc_nfa.c b/generic/regc_nfa.c index f072985..5e2ad3a 100644 --- a/generic/regc_nfa.c +++ b/generic/regc_nfa.c @@ -611,41 +611,25 @@ moveins( } /* - - copyins - copy all in arcs of a state to another state - ^ static VOID copyins(struct nfa *, struct state *, struct state *); + - copyins - copy in arcs of a state to another state + * Either all arcs, or only non-empty ones as determined by all value. + ^ static VOID copyins(struct nfa *, struct state *, struct state *, int); */ static void copyins( struct nfa *nfa, struct state *oldState, - struct state *newState) + struct state *newState, + int all) { struct arc *a; assert(oldState != newState); for (a=oldState->ins ; a!=NULL ; a=a->inchain) { - cparc(nfa, a, a->from, newState); - } -} - -/* - - copynonemptyins - as above, but ignore empty arcs - ^ static void copynonemptyins(struct nfa *, struct state *, struct state *); - */ -static void -copynonemptyins( - struct nfa *nfa, - struct state *oldState, - struct state *newState) -{ - struct arc *a; - - assert(oldState != newState); - - for (a=oldState->ins ; a!=NULL ; a=a->inchain) { - if (a->type != EMPTY) + if (all || a->type != EMPTY) { cparc(nfa, a, a->from, newState); + } } } @@ -670,41 +654,25 @@ moveouts( } /* - - copyouts - copy all out arcs of a state to another state - ^ static VOID copyouts(struct nfa *, struct state *, struct state *); + - copyouts - copy out arcs of a state to another state + * Either all arcs, or only non-empty ones as determined by all value. + ^ static VOID copyouts(struct nfa *, struct state *, struct state *, int); */ static void copyouts( struct nfa *nfa, struct state *oldState, - struct state *newState) + struct state *newState, + int all) { struct arc *a; assert(oldState != newState); for (a=oldState->outs ; a!=NULL ; a=a->outchain) { - cparc(nfa, a, newState, a->to); - } -} - -/* - - copynonemptyouts - as above, but ignore empty arcs - ^ static void copynonemptyouts(struct nfa *, struct state *, struct state *); - */ -static void -copynonemptyouts( - struct nfa *nfa, - struct state *oldState, - struct state *newState) -{ - struct arc *a; - - assert(oldState != newState); - - for (a=oldState->outs ; a!=NULL ; a=a->outchain) { - if (a->type != EMPTY) + if (all || a->type != EMPTY) { cparc(nfa, a, newState, a->to); + } } } @@ -1049,9 +1017,9 @@ pull( if (NISERR()) { return 0; } - assert(to != from); /* con is not an inarc */ - copyins(nfa, from, s); /* duplicate inarcs */ - cparc(nfa, con, s, to); /* move constraint arc */ + assert(to != from); /* con is not an inarc */ + copyins(nfa, from, s, 1); /* duplicate inarcs */ + cparc(nfa, con, s, to); /* move constraint arc */ freearc(nfa, con); from = s; con = from->outs; @@ -1209,7 +1177,7 @@ push( if (NISERR()) { return 0; } - copyouts(nfa, to, s); /* duplicate outarcs */ + copyouts(nfa, to, s, 1); /* duplicate outarcs */ cparc(nfa, con, from, s); /* move constraint */ freearc(nfa, con); to = s; @@ -1489,11 +1457,11 @@ replaceempty( toins = (fromouts == 0) ? 1 : nonemptyins(to); if (fromouts > toins) { - copynonemptyouts(nfa, to, from); + copyouts(nfa, to, from, 0); return; } if (fromouts < toins) { - copynonemptyins(nfa, from, to); + copyins(nfa, from, to, 0); return; } @@ -1505,10 +1473,10 @@ replaceempty( * resulting graph much. */ if (from->nins > to->nouts) { - copynonemptyouts(nfa, to, from); + copyouts(nfa, to, from, 0); return; } else { - copynonemptyins(nfa, from, to); + copyins(nfa, from, to, 0); return; } } diff --git a/generic/regcomp.c b/generic/regcomp.c index 68bfb30..8880318 100644 --- a/generic/regcomp.c +++ b/generic/regcomp.c @@ -127,11 +127,9 @@ static int nonemptyins(struct state *); static struct arc *findarc(struct state *, int, pcolor); static void cparc(struct nfa *, struct arc *, struct state *, struct state *); static void moveins(struct nfa *, struct state *, struct state *); -static void copyins(struct nfa *, struct state *, struct state *); -static void copynonemptyins(struct nfa *, struct state *, struct state *); +static void copyins(struct nfa *, struct state *, struct state *, int); static void moveouts(struct nfa *, struct state *, struct state *); -static void copyouts(struct nfa *, struct state *, struct state *); -static void copynonemptyouts(struct nfa *, struct state *, struct state *); +static void copyouts(struct nfa *, struct state *, struct state *, int); static void cloneouts(struct nfa *, struct state *, struct state *, struct state *, int); static void delsub(struct nfa *, struct state *, struct state *); static void deltraverse(struct nfa *, struct state *, struct state *); @@ -613,7 +611,7 @@ makesearch( for (s=slist ; s!=NULL ; s=s2) { s2 = newstate(nfa); - copyouts(nfa, s, s2); + copyouts(nfa, s, s2, 1); for (a=s->ins ; a!=NULL ; a=b) { b = a->inchain; -- cgit v0.12 From 24abdb72019b174cb20e0d4d29e0a76b689c4459 Mon Sep 17 00:00:00 2001 From: dgp Date: Wed, 6 Mar 2013 18:00:21 +0000 Subject: Indent reduction in fixempties(). --- generic/regc_nfa.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/generic/regc_nfa.c b/generic/regc_nfa.c index 6e32cc9..b125062 100644 --- a/generic/regc_nfa.c +++ b/generic/regc_nfa.c @@ -1208,15 +1208,15 @@ FILE *f; /* for debug output; NULL none */ */ for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { nexts = s->next; - if (s->nouts == 1 && !s->flag) { - a = s->outs; - assert(a != NULL && a->outchain == NULL); - if (a->type == EMPTY) { - if (s != a->to) - moveins(nfa, s, a->to); - dropstate(nfa, s); - } - } + if (s->flag || s->nouts != 1) + continue; + a = s->outs; + assert(a != NULL && a->outchain == NULL); + if (a->type != EMPTY) + continue; + if (s != a->to) + moveins(nfa, s, a->to); + dropstate(nfa, s); } /* @@ -1226,16 +1226,16 @@ FILE *f; /* for debug output; NULL none */ for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { nexts = s->next; /* while we're at it, ensure tmp fields are clear for next step */ - s->tmp = NULL; - if (s->nins == 1 && !s->flag) { - a = s->ins; - assert(a != NULL && a->inchain == NULL); - if (a->type == EMPTY) { - if (s != a->from) - moveouts(nfa, s, a->from); - dropstate(nfa, s); - } - } + assert(s->tmp = NULL); + if (s->flag || s->nins != 1) + continue; + a = s->ins; + assert(a != NULL && a->inchain == NULL); + if (a->type != EMPTY) + continue; + if (s != a->from) + moveouts(nfa, s, a->from); + dropstate(nfa, s); } /* -- cgit v0.12 From 681a763b1526e58746e769676959e61013073f93 Mon Sep 17 00:00:00 2001 From: dgp Date: Wed, 6 Mar 2013 18:01:24 +0000 Subject: Indent reduction in fixempties() --- generic/regc_nfa.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/generic/regc_nfa.c b/generic/regc_nfa.c index 5e2ad3a..baf4592 100644 --- a/generic/regc_nfa.c +++ b/generic/regc_nfa.c @@ -1306,15 +1306,15 @@ fixempties( */ for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { nexts = s->next; - if (s->nouts == 1 && !s->flag) { - a = s->outs; - assert(a != NULL && a->outchain == NULL); - if (a->type == EMPTY) { - if (s != a->to) - moveins(nfa, s, a->to); - dropstate(nfa, s); - } - } + if (s->flag || s->nouts != 1) + continue; + a = s->outs; + assert(a != NULL && a->outchain == NULL); + if (a->type != EMPTY) + continue; + if (s != a->to) + moveins(nfa, s, a->to); + dropstate(nfa, s); } /* @@ -1324,16 +1324,16 @@ fixempties( for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { nexts = s->next; /* while we're at it, ensure tmp fields are clear for next step */ - s->tmp = NULL; - if (s->nins == 1 && !s->flag) { - a = s->ins; - assert(a != NULL && a->inchain == NULL); - if (a->type == EMPTY) { - if (s != a->from) - moveouts(nfa, s, a->from); - dropstate(nfa, s); - } - } + assert(s->tmp = NULL); + if (s->flag || s->nins != 1) + continue; + a = s->ins; + assert(a != NULL && a->inchain == NULL); + if (a->type != EMPTY) + continue; + if (s != a->from) + moveouts(nfa, s, a->from); + dropstate(nfa, s); } /* -- cgit v0.12 From b5ed01c2d8765e2f5bd622dfbfbf4c85215cd03a Mon Sep 17 00:00:00 2001 From: dgp Date: Wed, 6 Mar 2013 19:11:44 +0000 Subject: Rework into Tcl 8.4 coding style (closer to original Spencer). --- generic/regc_nfa.c | 351 +++++++++++++++++++++++++++-------------------------- 1 file changed, 177 insertions(+), 174 deletions(-) diff --git a/generic/regc_nfa.c b/generic/regc_nfa.c index b125062..107e466 100644 --- a/generic/regc_nfa.c +++ b/generic/regc_nfa.c @@ -474,7 +474,7 @@ struct state *s; return 1; return 0; } - + /* - nonemptyouts - count non-EMPTY out arcs of a state ^ static int nonemptyouts(struct state *); @@ -483,16 +483,15 @@ static int nonemptyouts(s) struct state *s; { - int n = 0; - struct arc *a; - - for (a = s->outs; a != NULL; a = a->outchain) { - if (a->type != EMPTY) - n++; - } - return n; + int n = 0; + struct arc *a; + + for (a = s->outs; a != NULL; a = a->outchain) + if (a->type != EMPTY) + n++; + return n; } - + /* - nonemptyins - count non-EMPTY in arcs of a state ^ static int nonemptyins(struct state *); @@ -501,16 +500,15 @@ static int nonemptyins(s) struct state *s; { - int n = 0; - struct arc *a; - - for (a = s->ins; a != NULL; a = a->inchain) { - if (a->type != EMPTY) - n++; - } - return n; + int n = 0; + struct arc *a; + + for (a = s->ins; a != NULL; a = a->inchain) + if (a->type != EMPTY) + n++; + return n; } - + /* - findarc - find arc, if any, from given source with given type and color * If there is more than one such arc, the result is random. @@ -1195,108 +1193,114 @@ fixempties(nfa, f) struct nfa *nfa; FILE *f; /* for debug output; NULL none */ { - struct state *s; - struct state *s2; - struct state *nexts; - struct arc *a; - struct arc *nexta; - - /* - * First, get rid of any states whose sole out-arc is an EMPTY, since - * they're basically just aliases for their successor. The parsing - * algorithm creates enough of these that it's worth special-casing this. - */ - for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { - nexts = s->next; - if (s->flag || s->nouts != 1) - continue; - a = s->outs; - assert(a != NULL && a->outchain == NULL); - if (a->type != EMPTY) - continue; - if (s != a->to) - moveins(nfa, s, a->to); - dropstate(nfa, s); - } - - /* - * Similarly, get rid of any state with a single EMPTY in-arc, by folding - * it into its predecessor. - */ - for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { - nexts = s->next; - /* while we're at it, ensure tmp fields are clear for next step */ - assert(s->tmp = NULL); - if (s->flag || s->nins != 1) - continue; - a = s->ins; - assert(a != NULL && a->inchain == NULL); - if (a->type != EMPTY) - continue; - if (s != a->from) - moveouts(nfa, s, a->from); - dropstate(nfa, s); - } - - /* - * For each remaining NFA state, find all other states that are reachable - * from it by a chain of one or more EMPTY arcs. Then generate new arcs - * that eliminate the need for each such chain. - * - * If we just do this straightforwardly, the algorithm gets slow in - * complex graphs, because the same arcs get copied to all intermediate - * states of an EMPTY chain, and then uselessly pushed repeatedly to the - * chain's final state; we waste a lot of time in newarc's duplicate - * checking. To improve matters, we decree that any state with only EMPTY - * out-arcs is "doomed" and will not be part of the final NFA. That can be - * ensured by not adding any new out-arcs to such a state. Having ensured - * that, we need not update the state's in-arcs list either; all arcs that - * might have gotten pushed forward to it will just get pushed directly to - * successor states. This eliminates most of the useless duplicate arcs. - */ - for (s = nfa->states; s != NULL && !NISERR(); s = s->next) { - for (s2 = emptyreachable(s, s); s2 != s && !NISERR(); s2 = nexts) { - /* - * If s2 is doomed, we decide that (1) we will always push arcs - * forward to it, not pull them back to s; and (2) we can optimize - * away the push-forward, per comment above. So do nothing. - */ - if (s2->flag || hasnonemptyout(s2)) - replaceempty(nfa, s, s2); - - /* Reset the tmp fields as we walk back */ - nexts = s2->tmp; - s2->tmp = NULL; + struct state *s; + struct state *s2; + struct state *nexts; + struct arc *a; + struct arc *nexta; + + /* + * First, get rid of any states whose sole out-arc is an EMPTY, + * since they're basically just aliases for their successor. + * The parsing algorithm creates enough of these that it's worth + * special-casing this. + */ + for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { + nexts = s->next; + if (s->flag || s->nouts != 1) + continue; + a = s->outs; + assert(a != NULL && a->outchain == NULL); + if (a->type != EMPTY) + continue; + if (s != a->to) + moveins(nfa, s, a->to); + dropstate(nfa, s); } - s->tmp = NULL; - } - - /* - * Now remove all the EMPTY arcs, since we don't need them anymore. - */ - for (s = nfa->states; s != NULL && !NISERR(); s = s->next) { - for (a = s->outs; a != NULL; a = nexta) { - nexta = a->outchain; - if (a->type == EMPTY) - freearc(nfa, a); + + /* + * Similarly, get rid of any state with a single EMPTY in-arc, + * by folding it into its predecessor. + */ + for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { + nexts = s->next; + /* Ensure tmp fields are clear for next step */ + assert(s->tmp = NULL); + if (s->flag || s->nins != 1) + continue; + a = s->ins; + assert(a != NULL && a->inchain == NULL); + if (a->type != EMPTY) + continue; + if (s != a->from) + moveouts(nfa, s, a->from); + dropstate(nfa, s); + } + + /* + * For each remaining NFA state, find all other states that are + * reachable from it by a chain of one or more EMPTY arcs. Then + * generate new arcs that eliminate the need for each such chain. + * + * If we just do this straightforwardly, the algorithm gets slow + * in complex graphs, because the same arcs get copied to all + * intermediate states of an EMPTY chain, and then uselessly + * pushed repeatedly to the chain's final state; we waste a lot + * of time in newarc's duplicate checking. To improve matters, + * we decree that any state with only EMPTY out-arcs is "doomed" + * and will not be part of the final NFA. That can be ensured by + * not adding any new out-arcs to such a state. Having ensured + * that, we need not update the state's in-arcs list either; all + * arcs that might have gotten pushed forward to it will just get + * pushed directly to successor states. This eliminates most of + * the useless duplicate arcs. + */ + for (s = nfa->states; s != NULL && !NISERR(); s = s->next) { + for (s2 = emptyreachable(s, s); s2 != s && !NISERR(); + s2 = nexts) { + /* + * If s2 is doomed, we decide that (1) we will + * always push arcs forward to it, not pull them + * back to s; and (2) we can optimize away the + * push-forward, per comment above. + * So do nothing. + */ + if (s2->flag || hasnonemptyout(s2)) + replaceempty(nfa, s, s2); + + /* Reset the tmp fields as we walk back */ + nexts = s2->tmp; + s2->tmp = NULL; + } + s->tmp = NULL; + } + + /* + * Remove all the EMPTY arcs, since we don't need them anymore. + */ + for (s = nfa->states; s != NULL; s = s->next) + for (a = s->outs; a != NULL; a = nexta) { + nexta = a->outchain; + if (a->type == EMPTY) + freearc(nfa, a); + } + + /* + * And remove any states that have become useless. (This + * cleanup is not very thorough, and would be even less so if we + * tried to combine it with the previous step; but cleanup() + * will take care of anything we miss.) + */ + for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { + nexts = s->next; + if ((s->nins == 0 || s->nouts == 0) && !s->flag) + dropstate(nfa, s); } - } - - /* - * And remove any states that have become useless. (This cleanup is not - * very thorough, and would be even less so if we tried to combine it with - * the previous step; but cleanup() will take care of anything we miss.) - */ - for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { - nexts = s->next; - if ((s->nins == 0 || s->nouts == 0) && !s->flag) - dropstate(nfa, s); - } - - if (f != NULL && !NISERR()) - dumpnfa(nfa, f); + + if (f != NULL && !NISERR()) + dumpnfa(nfa, f); } - + /* - emptyreachable - recursively find all states reachable from s by EMPTY arcs * The return value is the last such state found. Its tmp field links back @@ -1312,17 +1316,16 @@ emptyreachable(s, lastfound) struct state *s; struct state *lastfound; { - struct arc *a; - - s->tmp = lastfound; - lastfound = s; - for (a = s->outs; a != NULL; a = a->outchain) { - if (a->type == EMPTY && a->to->tmp == NULL) - lastfound = emptyreachable(a->to, lastfound); - } - return lastfound; + struct arc *a; + + s->tmp = lastfound; + lastfound = s; + for (a = s->outs; a != NULL; a = a->outchain) + if (a->type == EMPTY && a->to->tmp == NULL) + lastfound = emptyreachable(a->to, lastfound); + return lastfound; } - + /* - replaceempty - replace an EMPTY arc chain with some non-empty arcs * The EMPTY arc(s) should be deleted later, but we can't do it here because @@ -1335,54 +1338,54 @@ struct nfa *nfa; struct state *from; struct state *to; { - int fromouts; - int toins; - - assert(from != to); - - /* - * Create replacement arcs that bypass the need for the EMPTY chain. We - * can do this either by pushing arcs forward (linking directly from - * "from"'s predecessors to "to") or by pulling them back (linking - * directly from "from" to "to"'s successors). In general, we choose - * whichever way creates greater fan-out or fan-in, so as to improve the - * odds of reducing the other state to zero in-arcs or out-arcs and - * thereby being able to delete it. However, if "from" is doomed (has no - * non-EMPTY out-arcs), we must keep it so, so always push forward in that - * case. - * - * The fan-out/fan-in comparison should count only non-EMPTY arcs. If - * "from" is doomed, we can skip counting "to"'s arcs, since we want to - * force taking the copynonemptyins path in that case. - */ - fromouts = nonemptyouts(from); - toins = (fromouts == 0) ? 1 : nonemptyins(to); - - if (fromouts > toins) { - copyouts(nfa, to, from, 0); - return; - } - if (fromouts < toins) { - copyins(nfa, from, to, 0); - return; - } - - /* - * fromouts == toins. Decide on secondary issue: copy fewest arcs. - * - * Doesn't seem to be worth the trouble to exclude empties from these - * comparisons; that takes extra time and doesn't seem to improve the - * resulting graph much. - */ - if (from->nins > to->nouts) { - copyouts(nfa, to, from, 0); - return; - } else { + int fromouts; + int toins; + + assert(from != to); + + /* + * Create replacement arcs that bypass the need for the EMPTY + * chain. We can do this either by pushing arcs forward + * (linking directly from predecessors of "from" to "to") or by + * pulling them back (linking directly from "from" to the + * successors of "to"). In general, we choose whichever way + * creates greater fan-out or fan-in, so as to improve the odds + * of reducing the other state to zero in-arcs or out-arcs and + * thereby being able to delete it. However, if "from" is + * doomed (has no non-EMPTY out-arcs), we must keep it so, so + * always push forward in that case. + * + * The fan-out/fan-in comparison should count only non-EMPTY + * arcs. If "from" is doomed, we can skip counting "to"'s arcs, + * since we want to force taking the copyins path in that case. + */ + fromouts = nonemptyouts(from); + toins = (fromouts == 0) ? 1 : nonemptyins(to); + + if (fromouts > toins) { + copyouts(nfa, to, from, 0); + return; + } + if (fromouts < toins) { + copyins(nfa, from, to, 0); + return; + } + + /* + * fromouts == toins. Secondary decision: copy fewest arcs. + * + * Doesn't seem to be worth the trouble to exclude empties from + * these comparisons; that takes extra time and doesn't seem to + * improve the resulting graph much. + */ + if (from->nins > to->nouts) { + copyouts(nfa, to, from, 0); + return; + } + copyins(nfa, from, to, 0); - return; - } } - + /* - cleanup - clean up NFA after optimizations ^ static VOID cleanup(struct nfa *); -- cgit v0.12 From 92cae185d9294329ca088e078717aa662760bb72 Mon Sep 17 00:00:00 2001 From: dgp Date: Wed, 6 Mar 2013 19:53:42 +0000 Subject: Rework into Tcl 8.5+ coding style. --- generic/regc_nfa.c | 119 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 69 insertions(+), 50 deletions(-) diff --git a/generic/regc_nfa.c b/generic/regc_nfa.c index baf4592..af0bf3f 100644 --- a/generic/regc_nfa.c +++ b/generic/regc_nfa.c @@ -501,15 +501,17 @@ freearc( ^ static int hasnonemptyout(struct state *); */ static int -hasnonemptyout(s) -struct state *s; +hasnonemptyout( + struct state *s) { - struct arc *a; + struct arc *a; - for (a = s->outs; a != NULL; a = a->outchain) - if (a->type != EMPTY) - return 1; - return 0; + for (a = s->outs; a != NULL; a = a->outchain) { + if (a->type != EMPTY) { + return 1; + } + } + return 0; } /* @@ -524,8 +526,9 @@ nonemptyouts( struct arc *a; for (a = s->outs; a != NULL; a = a->outchain) { - if (a->type != EMPTY) + if (a->type != EMPTY) { n++; + } } return n; } @@ -542,8 +545,9 @@ nonemptyins( struct arc *a; for (a = s->ins; a != NULL; a = a->inchain) { - if (a->type != EMPTY) + if (a->type != EMPTY) { n++; + } } return n; } @@ -1300,67 +1304,78 @@ fixempties( struct arc *nexta; /* - * First, get rid of any states whose sole out-arc is an EMPTY, since - * they're basically just aliases for their successor. The parsing - * algorithm creates enough of these that it's worth special-casing this. + * First, get rid of any states whose sole out-arc is an EMPTY, + * since they're basically just aliases for their successor. The + * parsing algorithm creates enough of these that it's worth + * special-casing this. */ for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { nexts = s->next; - if (s->flag || s->nouts != 1) + if (s->flag || s->nouts != 1) { continue; + } a = s->outs; assert(a != NULL && a->outchain == NULL); - if (a->type != EMPTY) + if (a->type != EMPTY) { continue; - if (s != a->to) + } + if (s != a->to) { moveins(nfa, s, a->to); + } dropstate(nfa, s); } /* - * Similarly, get rid of any state with a single EMPTY in-arc, by folding - * it into its predecessor. + * Similarly, get rid of any state with a single EMPTY in-arc, by + * folding it into its predecessor. */ for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { nexts = s->next; - /* while we're at it, ensure tmp fields are clear for next step */ + /* Ensure tmp fields are clear for next step */ assert(s->tmp = NULL); - if (s->flag || s->nins != 1) + if (s->flag || s->nins != 1) { continue; + } a = s->ins; assert(a != NULL && a->inchain == NULL); - if (a->type != EMPTY) + if (a->type != EMPTY) { continue; - if (s != a->from) + } + if (s != a->from) { moveouts(nfa, s, a->from); + } dropstate(nfa, s); } /* - * For each remaining NFA state, find all other states that are reachable - * from it by a chain of one or more EMPTY arcs. Then generate new arcs - * that eliminate the need for each such chain. + * For each remaining NFA state, find all other states that are + * reachable from it by a chain of one or more EMPTY arcs. Then + * generate new arcs that eliminate the need for each such chain. * * If we just do this straightforwardly, the algorithm gets slow in - * complex graphs, because the same arcs get copied to all intermediate - * states of an EMPTY chain, and then uselessly pushed repeatedly to the - * chain's final state; we waste a lot of time in newarc's duplicate - * checking. To improve matters, we decree that any state with only EMPTY - * out-arcs is "doomed" and will not be part of the final NFA. That can be - * ensured by not adding any new out-arcs to such a state. Having ensured - * that, we need not update the state's in-arcs list either; all arcs that - * might have gotten pushed forward to it will just get pushed directly to - * successor states. This eliminates most of the useless duplicate arcs. + * complex graphs, because the same arcs get copied to all + * intermediate states of an EMPTY chain, and then uselessly pushed + * repeatedly to the chain's final state; we waste a lot of time in + * newarc's duplicate checking. To improve matters, we decree that + * any state with only EMPTY out-arcs is "doomed" and will not be + * part of the final NFA. That can be ensured by not adding any new + * out-arcs to such a state. Having ensured that, we need not update + * the state's in-arcs list either; all arcs that might have gotten + * pushed forward to it will just get pushed directly to successor + * states. This eliminates most of the useless duplicate arcs. */ for (s = nfa->states; s != NULL && !NISERR(); s = s->next) { - for (s2 = emptyreachable(s, s); s2 != s && !NISERR(); s2 = nexts) { + for (s2 = emptyreachable(s, s); s2 != s && !NISERR(); + s2 = nexts) { /* - * If s2 is doomed, we decide that (1) we will always push arcs - * forward to it, not pull them back to s; and (2) we can optimize - * away the push-forward, per comment above. So do nothing. + * If s2 is doomed, we decide that (1) we will always push + * arcs forward to it, not pull them back to s; and (2) we + * can optimize away the push-forward, per comment above. + * So do nothing. */ - if (s2->flag || hasnonemptyout(s2)) + if (s2->flag || hasnonemptyout(s2)) { replaceempty(nfa, s, s2); + } /* Reset the tmp fields as we walk back */ nexts = s2->tmp; @@ -1370,29 +1385,33 @@ fixempties( } /* - * Now remove all the EMPTY arcs, since we don't need them anymore. + * Remove all the EMPTY arcs, since we don't need them anymore. */ - for (s = nfa->states; s != NULL && !NISERR(); s = s->next) { + for (s = nfa->states; s != NULL; s = s->next) { for (a = s->outs; a != NULL; a = nexta) { nexta = a->outchain; - if (a->type == EMPTY) + if (a->type == EMPTY) { freearc(nfa, a); + } } } /* - * And remove any states that have become useless. (This cleanup is not - * very thorough, and would be even less so if we tried to combine it with - * the previous step; but cleanup() will take care of anything we miss.) + * And remove any states that have become useless. (This cleanup is + * not very thorough, and would be even less so if we tried to + * combine it with the previous step; but cleanup() will take care + * of anything we miss.) */ for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { nexts = s->next; - if ((s->nins == 0 || s->nouts == 0) && !s->flag) + if ((s->nins == 0 || s->nouts == 0) && !s->flag) { dropstate(nfa, s); + } } - if (f != NULL && !NISERR()) + if (f != NULL && !NISERR()) { dumpnfa(nfa, f); + } } /* @@ -1415,8 +1434,9 @@ emptyreachable( s->tmp = lastfound; lastfound = s; for (a = s->outs; a != NULL; a = a->outchain) { - if (a->type == EMPTY && a->to->tmp == NULL) + if (a->type == EMPTY && a->to->tmp == NULL) { lastfound = emptyreachable(a->to, lastfound); + } } return lastfound; } @@ -1475,10 +1495,9 @@ replaceempty( if (from->nins > to->nouts) { copyouts(nfa, to, from, 0); return; - } else { - copyins(nfa, from, to, 0); - return; } + + copyins(nfa, from, to, 0); } /* -- cgit v0.12