summaryrefslogtreecommitdiffstats
path: root/generic
diff options
context:
space:
mode:
Diffstat (limited to 'generic')
-rw-r--r--generic/regc_nfa.c351
1 files changed, 177 insertions, 174 deletions
diff --git a/generic/regc_nfa.c b/generic/regc_nfa.c
index b125062..107e466 100644
--- a/generic/regc_nfa.c
+++ b/generic/regc_nfa.c
@@ -474,7 +474,7 @@ struct state *s;
return 1;
return 0;
}
-
+
/*
- nonemptyouts - count non-EMPTY out arcs of a state
^ static int nonemptyouts(struct state *);
@@ -483,16 +483,15 @@ static int
nonemptyouts(s)
struct state *s;
{
- int n = 0;
- struct arc *a;
-
- for (a = s->outs; a != NULL; a = a->outchain) {
- if (a->type != EMPTY)
- n++;
- }
- return n;
+ int n = 0;
+ struct arc *a;
+
+ for (a = s->outs; a != NULL; a = a->outchain)
+ if (a->type != EMPTY)
+ n++;
+ return n;
}
-
+
/*
- nonemptyins - count non-EMPTY in arcs of a state
^ static int nonemptyins(struct state *);
@@ -501,16 +500,15 @@ static int
nonemptyins(s)
struct state *s;
{
- int n = 0;
- struct arc *a;
-
- for (a = s->ins; a != NULL; a = a->inchain) {
- if (a->type != EMPTY)
- n++;
- }
- return n;
+ int n = 0;
+ struct arc *a;
+
+ for (a = s->ins; a != NULL; a = a->inchain)
+ if (a->type != EMPTY)
+ n++;
+ return n;
}
-
+
/*
- findarc - find arc, if any, from given source with given type and color
* If there is more than one such arc, the result is random.
@@ -1195,108 +1193,114 @@ fixempties(nfa, f)
struct nfa *nfa;
FILE *f; /* for debug output; NULL none */
{
- struct state *s;
- struct state *s2;
- struct state *nexts;
- struct arc *a;
- struct arc *nexta;
-
- /*
- * First, get rid of any states whose sole out-arc is an EMPTY, since
- * they're basically just aliases for their successor. The parsing
- * algorithm creates enough of these that it's worth special-casing this.
- */
- for (s = nfa->states; s != NULL && !NISERR(); s = nexts) {
- nexts = s->next;
- if (s->flag || s->nouts != 1)
- continue;
- a = s->outs;
- assert(a != NULL && a->outchain == NULL);
- if (a->type != EMPTY)
- continue;
- if (s != a->to)
- moveins(nfa, s, a->to);
- dropstate(nfa, s);
- }
-
- /*
- * Similarly, get rid of any state with a single EMPTY in-arc, by folding
- * it into its predecessor.
- */
- for (s = nfa->states; s != NULL && !NISERR(); s = nexts) {
- nexts = s->next;
- /* while we're at it, ensure tmp fields are clear for next step */
- assert(s->tmp = NULL);
- if (s->flag || s->nins != 1)
- continue;
- a = s->ins;
- assert(a != NULL && a->inchain == NULL);
- if (a->type != EMPTY)
- continue;
- if (s != a->from)
- moveouts(nfa, s, a->from);
- dropstate(nfa, s);
- }
-
- /*
- * For each remaining NFA state, find all other states that are reachable
- * from it by a chain of one or more EMPTY arcs. Then generate new arcs
- * that eliminate the need for each such chain.
- *
- * If we just do this straightforwardly, the algorithm gets slow in
- * complex graphs, because the same arcs get copied to all intermediate
- * states of an EMPTY chain, and then uselessly pushed repeatedly to the
- * chain's final state; we waste a lot of time in newarc's duplicate
- * checking. To improve matters, we decree that any state with only EMPTY
- * out-arcs is "doomed" and will not be part of the final NFA. That can be
- * ensured by not adding any new out-arcs to such a state. Having ensured
- * that, we need not update the state's in-arcs list either; all arcs that
- * might have gotten pushed forward to it will just get pushed directly to
- * successor states. This eliminates most of the useless duplicate arcs.
- */
- for (s = nfa->states; s != NULL && !NISERR(); s = s->next) {
- for (s2 = emptyreachable(s, s); s2 != s && !NISERR(); s2 = nexts) {
- /*
- * If s2 is doomed, we decide that (1) we will always push arcs
- * forward to it, not pull them back to s; and (2) we can optimize
- * away the push-forward, per comment above. So do nothing.
- */
- if (s2->flag || hasnonemptyout(s2))
- replaceempty(nfa, s, s2);
-
- /* Reset the tmp fields as we walk back */
- nexts = s2->tmp;
- s2->tmp = NULL;
+ struct state *s;
+ struct state *s2;
+ struct state *nexts;
+ struct arc *a;
+ struct arc *nexta;
+
+ /*
+ * First, get rid of any states whose sole out-arc is an EMPTY,
+ * since they're basically just aliases for their successor.
+ * The parsing algorithm creates enough of these that it's worth
+ * special-casing this.
+ */
+ for (s = nfa->states; s != NULL && !NISERR(); s = nexts) {
+ nexts = s->next;
+ if (s->flag || s->nouts != 1)
+ continue;
+ a = s->outs;
+ assert(a != NULL && a->outchain == NULL);
+ if (a->type != EMPTY)
+ continue;
+ if (s != a->to)
+ moveins(nfa, s, a->to);
+ dropstate(nfa, s);
}
- s->tmp = NULL;
- }
-
- /*
- * Now remove all the EMPTY arcs, since we don't need them anymore.
- */
- for (s = nfa->states; s != NULL && !NISERR(); s = s->next) {
- for (a = s->outs; a != NULL; a = nexta) {
- nexta = a->outchain;
- if (a->type == EMPTY)
- freearc(nfa, a);
+
+ /*
+ * Similarly, get rid of any state with a single EMPTY in-arc,
+ * by folding it into its predecessor.
+ */
+ for (s = nfa->states; s != NULL && !NISERR(); s = nexts) {
+ nexts = s->next;
+ /* Ensure tmp fields are clear for next step */
+ assert(s->tmp = NULL);
+ if (s->flag || s->nins != 1)
+ continue;
+ a = s->ins;
+ assert(a != NULL && a->inchain == NULL);
+ if (a->type != EMPTY)
+ continue;
+ if (s != a->from)
+ moveouts(nfa, s, a->from);
+ dropstate(nfa, s);
+ }
+
+ /*
+ * For each remaining NFA state, find all other states that are
+ * reachable from it by a chain of one or more EMPTY arcs. Then
+ * generate new arcs that eliminate the need for each such chain.
+ *
+ * If we just do this straightforwardly, the algorithm gets slow
+ * in complex graphs, because the same arcs get copied to all
+ * intermediate states of an EMPTY chain, and then uselessly
+ * pushed repeatedly to the chain's final state; we waste a lot
+ * of time in newarc's duplicate checking. To improve matters,
+ * we decree that any state with only EMPTY out-arcs is "doomed"
+ * and will not be part of the final NFA. That can be ensured by
+ * not adding any new out-arcs to such a state. Having ensured
+ * that, we need not update the state's in-arcs list either; all
+ * arcs that might have gotten pushed forward to it will just get
+ * pushed directly to successor states. This eliminates most of
+ * the useless duplicate arcs.
+ */
+ for (s = nfa->states; s != NULL && !NISERR(); s = s->next) {
+ for (s2 = emptyreachable(s, s); s2 != s && !NISERR();
+ s2 = nexts) {
+ /*
+ * If s2 is doomed, we decide that (1) we will
+ * always push arcs forward to it, not pull them
+ * back to s; and (2) we can optimize away the
+ * push-forward, per comment above.
+ * So do nothing.
+ */
+ if (s2->flag || hasnonemptyout(s2))
+ replaceempty(nfa, s, s2);
+
+ /* Reset the tmp fields as we walk back */
+ nexts = s2->tmp;
+ s2->tmp = NULL;
+ }
+ s->tmp = NULL;
+ }
+
+ /*
+ * Remove all the EMPTY arcs, since we don't need them anymore.
+ */
+ for (s = nfa->states; s != NULL; s = s->next)
+ for (a = s->outs; a != NULL; a = nexta) {
+ nexta = a->outchain;
+ if (a->type == EMPTY)
+ freearc(nfa, a);
+ }
+
+ /*
+ * And remove any states that have become useless. (This
+ * cleanup is not very thorough, and would be even less so if we
+ * tried to combine it with the previous step; but cleanup()
+ * will take care of anything we miss.)
+ */
+ for (s = nfa->states; s != NULL && !NISERR(); s = nexts) {
+ nexts = s->next;
+ if ((s->nins == 0 || s->nouts == 0) && !s->flag)
+ dropstate(nfa, s);
}
- }
-
- /*
- * And remove any states that have become useless. (This cleanup is not
- * very thorough, and would be even less so if we tried to combine it with
- * the previous step; but cleanup() will take care of anything we miss.)
- */
- for (s = nfa->states; s != NULL && !NISERR(); s = nexts) {
- nexts = s->next;
- if ((s->nins == 0 || s->nouts == 0) && !s->flag)
- dropstate(nfa, s);
- }
-
- if (f != NULL && !NISERR())
- dumpnfa(nfa, f);
+
+ if (f != NULL && !NISERR())
+ dumpnfa(nfa, f);
}
-
+
/*
- emptyreachable - recursively find all states reachable from s by EMPTY arcs
* The return value is the last such state found. Its tmp field links back
@@ -1312,17 +1316,16 @@ emptyreachable(s, lastfound)
struct state *s;
struct state *lastfound;
{
- struct arc *a;
-
- s->tmp = lastfound;
- lastfound = s;
- for (a = s->outs; a != NULL; a = a->outchain) {
- if (a->type == EMPTY && a->to->tmp == NULL)
- lastfound = emptyreachable(a->to, lastfound);
- }
- return lastfound;
+ struct arc *a;
+
+ s->tmp = lastfound;
+ lastfound = s;
+ for (a = s->outs; a != NULL; a = a->outchain)
+ if (a->type == EMPTY && a->to->tmp == NULL)
+ lastfound = emptyreachable(a->to, lastfound);
+ return lastfound;
}
-
+
/*
- replaceempty - replace an EMPTY arc chain with some non-empty arcs
* The EMPTY arc(s) should be deleted later, but we can't do it here because
@@ -1335,54 +1338,54 @@ struct nfa *nfa;
struct state *from;
struct state *to;
{
- int fromouts;
- int toins;
-
- assert(from != to);
-
- /*
- * Create replacement arcs that bypass the need for the EMPTY chain. We
- * can do this either by pushing arcs forward (linking directly from
- * "from"'s predecessors to "to") or by pulling them back (linking
- * directly from "from" to "to"'s successors). In general, we choose
- * whichever way creates greater fan-out or fan-in, so as to improve the
- * odds of reducing the other state to zero in-arcs or out-arcs and
- * thereby being able to delete it. However, if "from" is doomed (has no
- * non-EMPTY out-arcs), we must keep it so, so always push forward in that
- * case.
- *
- * The fan-out/fan-in comparison should count only non-EMPTY arcs. If
- * "from" is doomed, we can skip counting "to"'s arcs, since we want to
- * force taking the copynonemptyins path in that case.
- */
- fromouts = nonemptyouts(from);
- toins = (fromouts == 0) ? 1 : nonemptyins(to);
-
- if (fromouts > toins) {
- copyouts(nfa, to, from, 0);
- return;
- }
- if (fromouts < toins) {
- copyins(nfa, from, to, 0);
- return;
- }
-
- /*
- * fromouts == toins. Decide on secondary issue: copy fewest arcs.
- *
- * Doesn't seem to be worth the trouble to exclude empties from these
- * comparisons; that takes extra time and doesn't seem to improve the
- * resulting graph much.
- */
- if (from->nins > to->nouts) {
- copyouts(nfa, to, from, 0);
- return;
- } else {
+ int fromouts;
+ int toins;
+
+ assert(from != to);
+
+ /*
+ * Create replacement arcs that bypass the need for the EMPTY
+ * chain. We can do this either by pushing arcs forward
+ * (linking directly from predecessors of "from" to "to") or by
+ * pulling them back (linking directly from "from" to the
+ * successors of "to"). In general, we choose whichever way
+ * creates greater fan-out or fan-in, so as to improve the odds
+ * of reducing the other state to zero in-arcs or out-arcs and
+ * thereby being able to delete it. However, if "from" is
+ * doomed (has no non-EMPTY out-arcs), we must keep it so, so
+ * always push forward in that case.
+ *
+ * The fan-out/fan-in comparison should count only non-EMPTY
+ * arcs. If "from" is doomed, we can skip counting "to"'s arcs,
+ * since we want to force taking the copyins path in that case.
+ */
+ fromouts = nonemptyouts(from);
+ toins = (fromouts == 0) ? 1 : nonemptyins(to);
+
+ if (fromouts > toins) {
+ copyouts(nfa, to, from, 0);
+ return;
+ }
+ if (fromouts < toins) {
+ copyins(nfa, from, to, 0);
+ return;
+ }
+
+ /*
+ * fromouts == toins. Secondary decision: copy fewest arcs.
+ *
+ * Doesn't seem to be worth the trouble to exclude empties from
+ * these comparisons; that takes extra time and doesn't seem to
+ * improve the resulting graph much.
+ */
+ if (from->nins > to->nouts) {
+ copyouts(nfa, to, from, 0);
+ return;
+ }
+
copyins(nfa, from, to, 0);
- return;
- }
}
-
+
/*
- cleanup - clean up NFA after optimizations
^ static VOID cleanup(struct nfa *);