diff options
Diffstat (limited to 'generic/regc_nfa.c')
-rw-r--r-- | generic/regc_nfa.c | 416 |
1 files changed, 301 insertions, 115 deletions
diff --git a/generic/regc_nfa.c b/generic/regc_nfa.c index 19dbe63..42489dd 100644 --- a/generic/regc_nfa.c +++ b/generic/regc_nfa.c @@ -142,7 +142,7 @@ DecrementSize( /* - freenfa - free an entire NFA - ^ static VOID freenfa(struct nfa *); + ^ static void freenfa(struct nfa *); */ static void freenfa( @@ -242,7 +242,7 @@ newfstate( /* - dropstate - delete a state's inarcs and outarcs and free it - ^ static VOID dropstate(struct nfa *, struct state *); + ^ static void dropstate(struct nfa *, struct state *); */ static void dropstate( @@ -262,7 +262,7 @@ dropstate( /* - freestate - free a state, which has no in-arcs or out-arcs - ^ static VOID freestate(struct nfa *, struct state *); + ^ static void freestate(struct nfa *, struct state *); */ static void freestate( @@ -294,7 +294,7 @@ freestate( /* - destroystate - really get rid of an already-freed state - ^ static VOID destroystate(struct nfa *, struct state *); + ^ static void destroystate(struct nfa *, struct state *); */ static void destroystate( @@ -317,7 +317,7 @@ destroystate( /* - newarc - set up a new arc within an NFA - ^ static VOID newarc(struct nfa *, int, pcolor, struct state *, + ^ static void newarc(struct nfa *, int, pcolor, struct state *, ^ struct state *); */ static void @@ -426,7 +426,7 @@ allocarc( /* - freearc - free an arc - ^ static VOID freearc(struct nfa *, struct arc *); + ^ static void freearc(struct nfa *, struct arc *); */ static void freearc( @@ -497,6 +497,62 @@ freearc( } /* + - hasnonemptyout - Does state have a non-EMPTY out arc? + ^ static int hasnonemptyout(struct state *); + */ +static int +hasnonemptyout( + struct state *s) +{ + struct arc *a; + + for (a = s->outs; a != NULL; a = a->outchain) { + if (a->type != EMPTY) { + return 1; + } + } + return 0; +} + +/* + - nonemptyouts - count non-EMPTY out arcs of a state + ^ static int nonemptyouts(struct state *); + */ +static int +nonemptyouts( + struct state *s) +{ + int n = 0; + struct arc *a; + + for (a = s->outs; a != NULL; a = a->outchain) { + if (a->type != EMPTY) { + n++; + } + } + return n; +} + +/* + - nonemptyins - count non-EMPTY in arcs of a state + ^ static int nonemptyins(struct state *); + */ +static int +nonemptyins( + struct state *s) +{ + int n = 0; + struct arc *a; + + for (a = s->ins; a != NULL; a = a->inchain) { + if (a->type != EMPTY) { + n++; + } + } + return n; +} + +/* - findarc - find arc, if any, from given source with given type and color * If there is more than one such arc, the result is random. ^ static struct arc *findarc(struct state *, int, pcolor); @@ -519,7 +575,7 @@ findarc( /* - cparc - allocate a new arc within an NFA, copying details from old one - ^ static VOID cparc(struct nfa *, struct arc *, struct state *, + ^ static void cparc(struct nfa *, struct arc *, struct state *, ^ struct state *); */ static void @@ -538,7 +594,7 @@ cparc( * existing arcs, and you would be right if it weren't for the desire * for duplicate suppression, which makes it easier to just make new * ones to exploit the suppression built into newarc. - ^ static VOID moveins(struct nfa *, struct state *, struct state *); + ^ static void moveins(struct nfa *, struct state *, struct state *); */ static void moveins( @@ -559,27 +615,31 @@ moveins( } /* - - copyins - copy all in arcs of a state to another state - ^ static VOID copyins(struct nfa *, struct state *, struct state *); + - copyins - copy in arcs of a state to another state + * Either all arcs, or only non-empty ones as determined by all value. + ^ static VOID copyins(struct nfa *, struct state *, struct state *, int); */ static void copyins( struct nfa *nfa, struct state *oldState, - struct state *newState) + struct state *newState, + int all) { struct arc *a; assert(oldState != newState); for (a=oldState->ins ; a!=NULL ; a=a->inchain) { - cparc(nfa, a, a->from, newState); + if (all || a->type != EMPTY) { + cparc(nfa, a, a->from, newState); + } } } /* - moveouts - move all out arcs of a state to another state - ^ static VOID moveouts(struct nfa *, struct state *, struct state *); + ^ static void moveouts(struct nfa *, struct state *, struct state *); */ static void moveouts( @@ -598,27 +658,31 @@ moveouts( } /* - - copyouts - copy all out arcs of a state to another state - ^ static VOID copyouts(struct nfa *, struct state *, struct state *); + - copyouts - copy out arcs of a state to another state + * Either all arcs, or only non-empty ones as determined by all value. + ^ static VOID copyouts(struct nfa *, struct state *, struct state *, int); */ static void copyouts( struct nfa *nfa, struct state *oldState, - struct state *newState) + struct state *newState, + int all) { struct arc *a; assert(oldState != newState); for (a=oldState->outs ; a!=NULL ; a=a->outchain) { - cparc(nfa, a, newState, a->to); + if (all || a->type != EMPTY) { + cparc(nfa, a, newState, a->to); + } } } /* - cloneouts - copy out arcs of a state to another state pair, modifying type - ^ static VOID cloneouts(struct nfa *, struct state *, struct state *, + ^ static void cloneouts(struct nfa *, struct state *, struct state *, ^ struct state *, int); */ static void @@ -642,7 +706,7 @@ cloneouts( - delsub - delete a sub-NFA, updating subre pointers if necessary * This uses a recursive traversal of the sub-NFA, marking already-seen * states using their tmp pointer. - ^ static VOID delsub(struct nfa *, struct state *, struct state *); + ^ static void delsub(struct nfa *, struct state *, struct state *); */ static void delsub( @@ -665,7 +729,7 @@ delsub( /* - deltraverse - the recursive heart of delsub * This routine's basic job is to destroy all out-arcs of the state. - ^ static VOID deltraverse(struct nfa *, struct state *, struct state *); + ^ static void deltraverse(struct nfa *, struct state *, struct state *); */ static void deltraverse( @@ -708,7 +772,7 @@ deltraverse( * Another recursive traversal, this time using tmp to point to duplicates as * well as mark already-seen states. (You knew there was a reason why it's a * state pointer, didn't you? :-)) - ^ static VOID dupnfa(struct nfa *, struct state *, struct state *, + ^ static void dupnfa(struct nfa *, struct state *, struct state *, ^ struct state *, struct state *); */ static void @@ -725,7 +789,7 @@ dupnfa( } stop->tmp = to; - duptraverse(nfa, start, from); + duptraverse(nfa, start, from, 0); /* done, except for clearing out the tmp pointers */ stop->tmp = NULL; @@ -734,13 +798,14 @@ dupnfa( /* - duptraverse - recursive heart of dupnfa - ^ static VOID duptraverse(struct nfa *, struct state *, struct state *); + ^ static void duptraverse(struct nfa *, struct state *, struct state *); */ static void duptraverse( struct nfa *nfa, struct state *s, - struct state *stmp) /* s's duplicate, or NULL */ + struct state *stmp, /* s's duplicate, or NULL */ + int depth) { struct arc *a; @@ -754,8 +819,20 @@ duptraverse( return; } + /* + * Arbitrary depth limit. Needs tuning, but this value is sufficient to + * make all normal tests (not reg-33.14) pass. + */ +#ifndef DUPTRAVERSE_MAX_DEPTH +#define DUPTRAVERSE_MAX_DEPTH 15000 +#endif + + if (depth++ > DUPTRAVERSE_MAX_DEPTH) { + NERR(REG_ESPACE); + } + for (a=s->outs ; a!=NULL && !NISERR() ; a=a->outchain) { - duptraverse(nfa, a->to, NULL); + duptraverse(nfa, a->to, NULL, depth); if (NISERR()) { break; } @@ -766,7 +843,7 @@ duptraverse( /* - cleartraverse - recursive cleanup for algorithms that leave tmp ptrs set - ^ static VOID cleartraverse(struct nfa *, struct state *); + ^ static void cleartraverse(struct nfa *, struct state *); */ static void cleartraverse( @@ -787,7 +864,7 @@ cleartraverse( /* - specialcolors - fill in special colors for an NFA - ^ static VOID specialcolors(struct nfa *); + ^ static void specialcolors(struct nfa *); */ static void specialcolors( @@ -850,7 +927,7 @@ optimize( /* - pullback - pull back constraints backward to (with luck) eliminate them - ^ static VOID pullback(struct nfa *, FILE *); + ^ static void pullback(struct nfa *, FILE *); */ static void pullback( @@ -957,9 +1034,9 @@ pull( if (NISERR()) { return 0; } - assert(to != from); /* con is not an inarc */ - copyins(nfa, from, s); /* duplicate inarcs */ - cparc(nfa, con, s, to); /* move constraint arc */ + assert(to != from); /* con is not an inarc */ + copyins(nfa, from, s, 1); /* duplicate inarcs */ + cparc(nfa, con, s, to); /* move constraint arc */ freearc(nfa, con); from = s; con = from->outs; @@ -1007,7 +1084,7 @@ pull( /* - pushfwd - push forward constraints forward to (with luck) eliminate them - ^ static VOID pushfwd(struct nfa *, FILE *); + ^ static void pushfwd(struct nfa *, FILE *); */ static void pushfwd( @@ -1117,7 +1194,7 @@ push( if (NISERR()) { return 0; } - copyouts(nfa, to, s); /* duplicate outarcs */ + copyouts(nfa, to, s, 1); /* duplicate outarcs */ cparc(nfa, con, from, s); /* move constraint */ freearc(nfa, con); to = s; @@ -1226,7 +1303,7 @@ combine( /* - fixempties - get rid of EMPTY arcs - ^ static VOID fixempties(struct nfa *, FILE *); + ^ static void fixempties(struct nfa *, FILE *); */ static void fixempties( @@ -1234,105 +1311,214 @@ fixempties( FILE *f) /* for debug output; NULL none */ { struct state *s; + struct state *s2; struct state *nexts; struct arc *a; struct arc *nexta; - int progress; /* - * Find and eliminate empties until there are no more. + * First, get rid of any states whose sole out-arc is an EMPTY, + * since they're basically just aliases for their successor. The + * parsing algorithm creates enough of these that it's worth + * special-casing this. */ + for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { + nexts = s->next; + if (s->flag || s->nouts != 1) { + continue; + } + a = s->outs; + assert(a != NULL && a->outchain == NULL); + if (a->type != EMPTY) { + continue; + } + if (s != a->to) { + moveins(nfa, s, a->to); + } + dropstate(nfa, s); + } - do { - progress = 0; - for (s = nfa->states; s != NULL && !NISERR() - && s->no != FREESTATE; s = nexts) { - nexts = s->next; - for (a = s->outs; a != NULL && !NISERR(); a = nexta) { - nexta = a->outchain; - if (a->type == EMPTY && unempty(nfa, a)) { - progress = 1; - } - assert(nexta == NULL || s->no != FREESTATE); + /* + * Similarly, get rid of any state with a single EMPTY in-arc, by + * folding it into its predecessor. + */ + for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { + nexts = s->next; + /* Ensure tmp fields are clear for next step */ + assert(s->tmp = NULL); + if (s->flag || s->nins != 1) { + continue; + } + a = s->ins; + assert(a != NULL && a->inchain == NULL); + if (a->type != EMPTY) { + continue; + } + if (s != a->from) { + moveouts(nfa, s, a->from); + } + dropstate(nfa, s); + } + + /* + * For each remaining NFA state, find all other states that are + * reachable from it by a chain of one or more EMPTY arcs. Then + * generate new arcs that eliminate the need for each such chain. + * + * If we just do this straightforwardly, the algorithm gets slow in + * complex graphs, because the same arcs get copied to all + * intermediate states of an EMPTY chain, and then uselessly pushed + * repeatedly to the chain's final state; we waste a lot of time in + * newarc's duplicate checking. To improve matters, we decree that + * any state with only EMPTY out-arcs is "doomed" and will not be + * part of the final NFA. That can be ensured by not adding any new + * out-arcs to such a state. Having ensured that, we need not update + * the state's in-arcs list either; all arcs that might have gotten + * pushed forward to it will just get pushed directly to successor + * states. This eliminates most of the useless duplicate arcs. + */ + for (s = nfa->states; s != NULL && !NISERR(); s = s->next) { + for (s2 = emptyreachable(s, s); s2 != s && !NISERR(); + s2 = nexts) { + /* + * If s2 is doomed, we decide that (1) we will always push + * arcs forward to it, not pull them back to s; and (2) we + * can optimize away the push-forward, per comment above. + * So do nothing. + */ + if (s2->flag || hasnonemptyout(s2)) { + replaceempty(nfa, s, s2); } + + /* Reset the tmp fields as we walk back */ + nexts = s2->tmp; + s2->tmp = NULL; } - if (progress && f != NULL) { - dumpnfa(nfa, f); + s->tmp = NULL; + } + if (NISERR()) { + return; + } + + /* + * Remove all the EMPTY arcs, since we don't need them anymore. + */ + for (s = nfa->states; s != NULL; s = s->next) { + for (a = s->outs; a != NULL; a = nexta) { + nexta = a->outchain; + if (a->type == EMPTY) { + freearc(nfa, a); + } } - } while (progress && !NISERR()); + } + + /* + * And remove any states that have become useless. (This cleanup is + * not very thorough, and would be even less so if we tried to + * combine it with the previous step; but cleanup() will take care + * of anything we miss.) + */ + for (s = nfa->states; s != NULL; s = nexts) { + nexts = s->next; + if ((s->nins == 0 || s->nouts == 0) && !s->flag) { + dropstate(nfa, s); + } + } + + if (f != NULL) { + dumpnfa(nfa, f); + } } /* - - unempty - optimize out an EMPTY arc, if possible - * Actually, as it stands this function always succeeds, but the return value - * is kept with an eye on possible future changes. - ^ static int unempty(struct nfa *, struct arc *); + - emptyreachable - recursively find all states reachable from s by EMPTY arcs + * The return value is the last such state found. Its tmp field links back + * to the next-to-last such state, and so on back to s, so that all these + * states can be located without searching the whole NFA. + * The maximum recursion depth here is equal to the length of the longest + * loop-free chain of EMPTY arcs, which is surely no more than the size of + * the NFA, and in practice will be a lot less than that. + ^ static struct state *emptyreachable(struct state *, struct state *); */ -static int /* 0 couldn't, 1 could */ -unempty( - struct nfa *nfa, - struct arc *a) +static struct state * +emptyreachable( + struct state *s, + struct state *lastfound) { - struct state *from = a->from; - struct state *to = a->to; - int usefrom; /* work on from, as opposed to to? */ - - assert(a->type == EMPTY); - assert(from != nfa->pre && to != nfa->post); + struct arc *a; - if (from == to) { /* vacuous loop */ - freearc(nfa, a); - return 1; + s->tmp = lastfound; + lastfound = s; + for (a = s->outs; a != NULL; a = a->outchain) { + if (a->type == EMPTY && a->to->tmp == NULL) { + lastfound = emptyreachable(a->to, lastfound); + } } + return lastfound; +} + +/* + - replaceempty - replace an EMPTY arc chain with some non-empty arcs + * The EMPTY arc(s) should be deleted later, but we can't do it here because + * they may still be needed to identify other arc chains during fixempties(). + ^ static void replaceempty(struct nfa *, struct state *, struct state *); + */ +static void +replaceempty( + struct nfa *nfa, + struct state *from, + struct state *to) +{ + int fromouts; + int toins; + + assert(from != to); /* - * Decide which end to work on. + * Create replacement arcs that bypass the need for the EMPTY chain. We + * can do this either by pushing arcs forward (linking directly from + * "from"'s predecessors to "to") or by pulling them back (linking + * directly from "from" to "to"'s successors). In general, we choose + * whichever way creates greater fan-out or fan-in, so as to improve the + * odds of reducing the other state to zero in-arcs or out-arcs and + * thereby being able to delete it. However, if "from" is doomed (has no + * non-EMPTY out-arcs), we must keep it so, so always push forward in that + * case. + * + * The fan-out/fan-in comparison should count only non-EMPTY arcs. If + * "from" is doomed, we can skip counting "to"'s arcs, since we want to + * force taking the copynonemptyins path in that case. */ + fromouts = nonemptyouts(from); + toins = (fromouts == 0) ? 1 : nonemptyins(to); - usefrom = 1; /* default: attack from */ - if (from->nouts > to->nins) { - usefrom = 0; - } else if (from->nouts == to->nins) { - /* - * Decide on secondary issue: move/copy fewest arcs. - */ - - if (from->nins > to->nouts) { - usefrom = 0; - } + if (fromouts > toins) { + copyouts(nfa, to, from, 0); + return; + } + if (fromouts < toins) { + copyins(nfa, from, to, 0); + return; } - freearc(nfa, a); - if (usefrom) { - if (from->nouts == 0) { - /* - * Was the state's only outarc. - */ - - moveins(nfa, from, to); - freestate(nfa, from); - } else { - copyins(nfa, from, to); - } - } else { - if (to->nins == 0) { - /* - * Was the state's only inarc. - */ - - moveouts(nfa, to, from); - freestate(nfa, to); - } else { - copyouts(nfa, to, from); - } + /* + * fromouts == toins. Decide on secondary issue: copy fewest arcs. + * + * Doesn't seem to be worth the trouble to exclude empties from these + * comparisons; that takes extra time and doesn't seem to improve the + * resulting graph much. + */ + if (from->nins > to->nouts) { + copyouts(nfa, to, from, 0); + return; } - return 1; + copyins(nfa, from, to, 0); } /* - cleanup - clean up NFA after optimizations - ^ static VOID cleanup(struct nfa *); + ^ static void cleanup(struct nfa *); */ static void cleanup( @@ -1373,7 +1559,7 @@ cleanup( /* - markreachable - recursive marking of reachable states - ^ static VOID markreachable(struct nfa *, struct state *, struct state *, + ^ static void markreachable(struct nfa *, struct state *, struct state *, ^ struct state *); */ static void @@ -1397,7 +1583,7 @@ markreachable( /* - markcanreach - recursive marking of states which can reach here - ^ static VOID markcanreach(struct nfa *, struct state *, struct state *, + ^ static void markcanreach(struct nfa *, struct state *, struct state *, ^ struct state *); */ static void @@ -1445,7 +1631,7 @@ analyze( /* - compact - compact an NFA - ^ static VOID compact(struct nfa *, struct cnfa *); + ^ static void compact(struct nfa *, struct cnfa *); */ static void compact( @@ -1539,7 +1725,7 @@ compact( - carcsort - sort compacted-NFA arcs by color * Really dumb algorithm, but if the list is long enough for that to matter, * you're in real trouble anyway. - ^ static VOID carcsort(struct carc *, struct carc *); + ^ static void carcsort(struct carc *, struct carc *); */ static void carcsort( @@ -1568,7 +1754,7 @@ carcsort( /* - freecnfa - free a compacted NFA - ^ static VOID freecnfa(struct cnfa *); + ^ static void freecnfa(struct cnfa *); */ static void freecnfa( @@ -1582,7 +1768,7 @@ freecnfa( /* - dumpnfa - dump an NFA in human-readable form - ^ static VOID dumpnfa(struct nfa *, FILE *); + ^ static void dumpnfa(struct nfa *, FILE *); */ static void dumpnfa( @@ -1623,7 +1809,7 @@ dumpnfa( /* - dumpstate - dump an NFA state in human-readable form - ^ static VOID dumpstate(struct state *, FILE *); + ^ static void dumpstate(struct state *, FILE *); */ static void dumpstate( @@ -1653,7 +1839,7 @@ dumpstate( /* - dumparcs - dump out-arcs in human-readable form - ^ static VOID dumparcs(struct state *, FILE *); + ^ static void dumparcs(struct state *, FILE *); */ static void dumparcs( @@ -1696,7 +1882,7 @@ dumprarcs( /* - dumparc - dump one outarc in readable form, including prefixing tab - ^ static VOID dumparc(struct arc *, struct state *, FILE *); + ^ static void dumparc(struct arc *, struct state *, FILE *); */ static void dumparc( @@ -1770,7 +1956,7 @@ dumparc( /* - dumpcnfa - dump a compacted NFA in human-readable form - ^ static VOID dumpcnfa(struct cnfa *, FILE *); + ^ static void dumpcnfa(struct cnfa *, FILE *); */ static void dumpcnfa( @@ -1811,7 +1997,7 @@ dumpcnfa( /* - dumpcstate - dump a compacted-NFA state in human-readable form - ^ static VOID dumpcstate(int, struct carc *, struct cnfa *, FILE *); + ^ static void dumpcstate(int, struct carc *, struct cnfa *, FILE *); */ static void dumpcstate( |