From ec527807539d4bdc9433636ae0c055558d85222c Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Tue, 5 Mar 2013 14:38:10 +0000
Subject: Contributed patch from Tom Lane <tgl@users.sf.net>.  Merge conflicts
 due to different coding style and lingering obsolete compiler support
 resolved.

---
 generic/regc_nfa.c | 349 +++++++++++++++++++++++++++++++++++++++--------------
 generic/regcomp.c  |   7 +-
 2 files changed, 264 insertions(+), 92 deletions(-)

diff --git a/generic/regc_nfa.c b/generic/regc_nfa.c
index 459968a..11fd49b 100644
--- a/generic/regc_nfa.c
+++ b/generic/regc_nfa.c
@@ -460,6 +460,42 @@ struct arc *victim;
 }
 
 /*
+ - nonemptyouts - count non-EMPTY out arcs of a state
+ ^ static int nonemptyouts(struct state *);
+ */
+static int
+nonemptyouts(s)
+struct state *s;
+{
+    int n = 0;
+    struct arc *a;
+
+    for (a = s->outs; a != NULL; a = a->outchain) {
+	if (a->type != EMPTY)
+	    n++;
+    }
+    return n;
+}
+
+/*
+ - nonemptyins - count non-EMPTY in arcs of a state
+ ^ static int nonemptyins(struct state *);
+ */
+static int
+nonemptyins(s)
+struct state *s;
+{
+    int n = 0;
+    struct arc *a;
+
+    for (a = s->ins; a != NULL; a = a->inchain) {
+	if (a->type != EMPTY)
+	    n++;
+    }
+    return n;
+}
+
+/*
  - findarc - find arc, if any, from given source with given type and color
  * If there is more than one such arc, the result is random.
  ^ static struct arc *findarc(struct state *, int, pcolor);
@@ -538,6 +574,26 @@ struct state *new;
 }
 
 /*
+ - copynonemptyins - as above, but ignore empty arcs
+ ^ static void copynonemptyins(struct nfa *, struct state *, struct state *);
+ */
+static VOID
+copynonemptyins(nfa, oldState, newState)
+struct nfa *nfa;
+struct state *oldState;
+struct state *newState;
+{
+    struct arc *a;
+
+    assert(oldState != newState);
+
+    for (a=oldState->ins ; a!=NULL ; a=a->inchain) {
+	if (a->type != EMPTY)
+	    cparc(nfa, a, a->from, newState);
+    }
+}
+
+/*
  - moveouts - move all out arcs of a state to another state
  ^ static VOID moveouts(struct nfa *, struct state *, struct state *);
  */
@@ -574,6 +630,26 @@ struct state *new;
 	for (a = old->outs; a != NULL; a = a->outchain)
 		cparc(nfa, a, new, a->to);
 }
+
+/*
+ - copynonemptyouts - as above, but ignore empty arcs
+ ^ static void copynonemptyouts(struct nfa *, struct state *, struct state *);
+ */
+static VOID 
+copynonemptyouts(nfa, oldState, newState)
+struct nfa *nfa;
+struct state *oldState;
+struct state *newState;
+{
+    struct arc *a;
+
+    assert(oldState != newState);
+
+    for (a=oldState->outs ; a!=NULL ; a=a->outchain) {
+	if (a->type != EMPTY)
+	    cparc(nfa, a, newState, a->to);
+    }
+}
 
 /*
  - cloneouts - copy out arcs of a state to another state pair, modifying type
@@ -1137,103 +1213,194 @@ fixempties(nfa, f)
 struct nfa *nfa;
 FILE *f;			/* for debug output; NULL none */
 {
-	struct state *s;
-	struct state *nexts;
-	struct state *to;
-	struct arc *a;
-	struct arc *nexta;
-	int progress;
-
-	/* find and eliminate empties until there are no more */
-	do {
-		progress = 0;
-		for (s = nfa->states; s != NULL && !NISERR(); s = nexts) {
-			nexts = s->next;
-			for (a = s->outs; a != NULL && !NISERR();
-				a = a->outchain)
-				if (a->type == EMPTY)
-					/* Mark a for deletion; copy arcs
-					 * to preserve graph connectivity
-					 * after it is gone. */
-					unempty(nfa, a);
-
-			/* Now pass through and delete the marked arcs.
-			 * Doing all the deletion after all the marking
-			 * prevents arc copying from resurrecting deleted
-			 * arcs which can cause failure to converge.
-			 * [Tcl Bug 3604074] */
-			for (a = s->outs; a != NULL; a = nexta) {
-				nexta = a->outchain;
-				if (a->from == NULL) {
-					progress = 1;
-		    			to = a->to;
-					a->from = s;
-					freearc(nfa, a);
-					if (to->nins == 0) {
-						while ((a = to->outs))
-							freearc(nfa, a);
-						if (nexts == to)
-							nexts = to->next;
-						freestate(nfa, to);
-					}
-					if (s->nouts == 0) {
-						while ((a = s->ins))
-							freearc(nfa, a);
-						freestate(nfa, s);
-					}
-				}
-			}
-		}
-		if (progress && f != NULL)
-			dumpnfa(nfa, f);
-	} while (progress && !NISERR());
+    struct state *s;
+    struct state *s2;
+    struct state *nexts;
+    struct arc *a;
+    struct arc *nexta;
+
+    /*
+     * First, get rid of any states whose sole out-arc is an EMPTY, since
+     * they're basically just aliases for their successor.  The parsing
+     * algorithm creates enough of these that it's worth special-casing this.
+     */
+    for (s = nfa->states; s != NULL && !NISERR(); s = nexts) {
+	nexts = s->next;
+	if (s->nouts == 1 && !s->flag) {
+	    a = s->outs;
+	    assert(a != NULL && a->outchain == NULL);
+	    if (a->type == EMPTY) {
+		if (s != a->to)
+		    moveins(nfa, s, a->to);
+		dropstate(nfa, s);
+	    }
+	}
+    }
+
+    /*
+     * Similarly, get rid of any state with a single EMPTY in-arc, by folding
+     * it into its predecessor.
+     */
+    for (s = nfa->states; s != NULL && !NISERR(); s = nexts) {
+	nexts = s->next;
+	/* while we're at it, ensure tmp fields are clear for next step */
+	s->tmp = NULL;
+	if (s->nins == 1 && !s->flag) {
+	    a = s->ins;
+	    assert(a != NULL && a->inchain == NULL);
+	    if (a->type == EMPTY) {
+		if (s != a->from)
+		    moveouts(nfa, s, a->from);
+		dropstate(nfa, s);
+	    }
+	}
+    }
+
+    /*
+     * For each remaining NFA state, find all other states that are reachable
+     * from it by a chain of one or more EMPTY arcs.  Then generate new arcs
+     * that eliminate the need for each such chain.
+     *
+     * If we just do this straightforwardly, the algorithm gets slow in
+     * complex graphs, because the same arcs get copied to all intermediate
+     * states of an EMPTY chain, and then uselessly pushed repeatedly to the
+     * chain's final state; we waste a lot of time in newarc's duplicate
+     * checking.  To improve matters, we decree that any state with only EMPTY
+     * out-arcs is "doomed" and will not be part of the final NFA. That can be
+     * ensured by not adding any new out-arcs to such a state. Having ensured
+     * that, we need not update the state's in-arcs list either; all arcs that
+     * might have gotten pushed forward to it will just get pushed directly to
+     * successor states.  This eliminates most of the useless duplicate arcs.
+     */
+    for (s = nfa->states; s != NULL && !NISERR(); s = s->next) {
+	for (s2 = emptyreachable(s, s); s2 != s && !NISERR(); s2 = nexts) {
+	    /*
+	     * If s2 is doomed, we decide that (1) we will always push arcs
+	     * forward to it, not pull them back to s; and (2) we can optimize
+	     * away the push-forward, per comment above.  So do nothing.
+	     */
+	    if (s2->flag || nonemptyouts(s2) > 0)
+		replaceempty(nfa, s, s2);
+
+	    /* Reset the tmp fields as we walk back */
+	    nexts = s2->tmp;
+	    s2->tmp = NULL;
+	}
+	s->tmp = NULL;
+    }
+
+    /*
+     * Now remove all the EMPTY arcs, since we don't need them anymore.
+     */
+    for (s = nfa->states; s != NULL && !NISERR(); s = s->next) {
+	for (a = s->outs; a != NULL; a = nexta) {
+	    nexta = a->outchain;
+	    if (a->type == EMPTY)
+		freearc(nfa, a);
+	}
+    }
+
+    /*
+     * And remove any states that have become useless.  (This cleanup is not
+     * very thorough, and would be even less so if we tried to combine it with
+     * the previous step; but cleanup() will take care of anything we miss.)
+     */
+    for (s = nfa->states; s != NULL && !NISERR(); s = nexts) {
+	nexts = s->next;
+	if ((s->nins == 0 || s->nouts == 0) && !s->flag)
+	    dropstate(nfa, s);
+    }
+
+    if (f != NULL && !NISERR())
+	dumpnfa(nfa, f);
 }
-
+
 /*
- - unempty - optimize out an EMPTY arc, if possible
- * Actually, as it stands this function always succeeds, but the return
- * value is kept with an eye on possible future changes.
- ^ static int unempty(struct nfa *, struct arc *);
+ - emptyreachable - recursively find all states reachable from s by EMPTY arcs
+ * The return value is the last such state found.  Its tmp field links back
+ * to the next-to-last such state, and so on back to s, so that all these
+ * states can be located without searching the whole NFA.
+ * The maximum recursion depth here is equal to the length of the longest
+ * loop-free chain of EMPTY arcs, which is surely no more than the size of
+ * the NFA, and in practice will be a lot less than that.
+ ^ static struct state *emptyreachable(struct state *, struct state *);
  */
-static int			/* 0 couldn't, 1 could */
-unempty(nfa, a)
+static struct state *
+emptyreachable(s, lastfound)
+struct state *s;
+struct state *lastfound;
+{
+    struct arc *a;
+
+    s->tmp = lastfound;
+    lastfound = s;
+    for (a = s->outs; a != NULL; a = a->outchain) {
+	if (a->type == EMPTY && a->to->tmp == NULL)
+	    lastfound = emptyreachable(a->to, lastfound);
+    }
+    return lastfound;
+}
+
+/*
+ - replaceempty - replace an EMPTY arc chain with some non-empty arcs
+ * The EMPTY arc(s) should be deleted later, but we can't do it here because
+ * they may still be needed to identify other arc chains during fixempties().
+ ^ static void replaceempty(struct nfa *, struct state *, struct state *);
+ */
+static VOID
+replaceempty(nfa, from, to)
 struct nfa *nfa;
-struct arc *a;
+struct state *from;
+struct state *to;
 {
-	struct state *from = a->from;
-	struct state *to = a->to;
-
-	assert(a->type == EMPTY);
-	assert(from != nfa->pre && to != nfa->post);
-
-	if (from == to) {		/* vacuous loop */
-		freearc(nfa, a);
-		return 1;
-	}
-
-	/* Mark arc for deletion */
-	a->from = NULL;
-
-	if (from->nouts > to->nins) {
-		copyouts(nfa, to, from);
-		return 1;
-	}
-	if (from->nouts < to->nins) {
-		copyins(nfa, from, to);
-		return 1;
-	}
-
-	/* from->nouts == to->nins */
-	/* decide on secondary issue:  move/copy fewest arcs */
-	if (from->nins > to->nouts) {
-		copyouts(nfa, to, from);
-		return 1;
-	}
-
-	copyins(nfa, from, to);
-	return 1;
+    int fromouts;
+    int toins;
+
+    assert(from != to);
+
+    /*
+     * Create replacement arcs that bypass the need for the EMPTY chain.  We
+     * can do this either by pushing arcs forward (linking directly from
+     * "from"'s predecessors to "to") or by pulling them back (linking
+     * directly from "from" to "to"'s successors).  In general, we choose
+     * whichever way creates greater fan-out or fan-in, so as to improve the
+     * odds of reducing the other state to zero in-arcs or out-arcs and
+     * thereby being able to delete it.  However, if "from" is doomed (has no
+     * non-EMPTY out-arcs), we must keep it so, so always push forward in that
+     * case.
+     *
+     * The fan-out/fan-in comparison should count only non-EMPTY arcs.  If
+     * "from" is doomed, we can skip counting "to"'s arcs, since we want to
+     * force taking the copynonemptyins path in that case.
+     */
+    fromouts = nonemptyouts(from);
+    toins = (fromouts == 0) ? 1 : nonemptyins(to);
+
+    if (fromouts > toins) {
+	copynonemptyouts(nfa, to, from);
+	return;
+    }
+    if (fromouts < toins) {
+	copynonemptyins(nfa, from, to);
+	return;
+    }
+
+    /*
+     * fromouts == toins.  Decide on secondary issue: copy fewest arcs.
+     *
+     * Doesn't seem to be worth the trouble to exclude empties from these
+     * comparisons; that takes extra time and doesn't seem to improve the
+     * resulting graph much.
+     */
+    if (from->nins > to->nouts) {
+	copynonemptyouts(nfa, to, from);
+	return;
+    } else {
+	copynonemptyins(nfa, from, to);
+	return;
+    }
 }
-
+
 /*
  - cleanup - clean up NFA after optimizations
  ^ static VOID cleanup(struct nfa *);
diff --git a/generic/regcomp.c b/generic/regcomp.c
index 307877a..6fd9c81 100644
--- a/generic/regcomp.c
+++ b/generic/regcomp.c
@@ -123,12 +123,16 @@ static VOID destroystate _ANSI_ARGS_((struct nfa *, struct state *));
 static VOID newarc _ANSI_ARGS_((struct nfa *, int, pcolor, struct state *, struct state *));
 static struct arc *allocarc _ANSI_ARGS_((struct nfa *, struct state *));
 static VOID freearc _ANSI_ARGS_((struct nfa *, struct arc *));
+static int nonemptyouts _ANSI_ARGS_((struct state *));
+static int nonemptyins _ANSI_ARGS_((struct state *));
 static struct arc *findarc _ANSI_ARGS_((struct state *, int, pcolor));
 static VOID cparc _ANSI_ARGS_((struct nfa *, struct arc *, struct state *, struct state *));
 static VOID moveins _ANSI_ARGS_((struct nfa *, struct state *, struct state *));
 static VOID copyins _ANSI_ARGS_((struct nfa *, struct state *, struct state *));
+static VOID copynonemptyins _ANSI_ARGS_((struct nfa *, struct state *, struct state *));
 static VOID moveouts _ANSI_ARGS_((struct nfa *, struct state *, struct state *));
 static VOID copyouts _ANSI_ARGS_((struct nfa *, struct state *, struct state *));
+static VOID copynonemptyouts _ANSI_ARGS_((struct nfa *, struct state *, struct state *));
 static VOID cloneouts _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *, int));
 static VOID delsub _ANSI_ARGS_((struct nfa *, struct state *, struct state *));
 static VOID deltraverse _ANSI_ARGS_((struct nfa *, struct state *, struct state *));
@@ -146,7 +150,8 @@ static int push _ANSI_ARGS_((struct nfa *, struct arc *));
 #define	COMPATIBLE	3	/* compatible but not satisfied yet */
 static int combine _ANSI_ARGS_((struct arc *, struct arc *));
 static VOID fixempties _ANSI_ARGS_((struct nfa *, FILE *));
-static int unempty _ANSI_ARGS_((struct nfa *, struct arc *));
+static struct state *emptyreachable _ANSI_ARGS_((struct state *, struct state *));
+static VOID replaceempty _ANSI_ARGS_((struct nfa *, struct state *, struct state *));
 static VOID cleanup _ANSI_ARGS_((struct nfa *));
 static VOID markreachable _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *));
 static VOID markcanreach _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *));
-- 
cgit v0.12


From 92e5e02234ad7151efec211f580998994de8d392 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Wed, 6 Mar 2013 16:07:14 +0000
Subject: New routine hasnonemptyout() for minor improvement to new
 fixempties().

---
 generic/regc_nfa.c | 18 +++++++++++++++++-
 generic/regcomp.c  |  1 +
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/generic/regc_nfa.c b/generic/regc_nfa.c
index 11fd49b..852a676 100644
--- a/generic/regc_nfa.c
+++ b/generic/regc_nfa.c
@@ -460,6 +460,22 @@ struct arc *victim;
 }
 
 /*
+ - hasnonemptyout - Does state have a non-EMPTY out arc?
+ ^ static int hasnonemptyout(struct state *);
+ */
+static int
+hasnonemptyout(s)
+struct state *s;
+{
+	struct arc *a;
+
+	for (a = s->outs; a != NULL; a = a->outchain)
+		if (a->type != EMPTY)
+			return 1;
+	return 0;
+}
+
+/*
  - nonemptyouts - count non-EMPTY out arcs of a state
  ^ static int nonemptyouts(struct state *);
  */
@@ -1279,7 +1295,7 @@ FILE *f;			/* for debug output; NULL none */
 	     * forward to it, not pull them back to s; and (2) we can optimize
 	     * away the push-forward, per comment above.  So do nothing.
 	     */
-	    if (s2->flag || nonemptyouts(s2) > 0)
+	    if (s2->flag || hasnonemptyout(s2))
 		replaceempty(nfa, s, s2);
 
 	    /* Reset the tmp fields as we walk back */
diff --git a/generic/regcomp.c b/generic/regcomp.c
index 6fd9c81..083e9b1 100644
--- a/generic/regcomp.c
+++ b/generic/regcomp.c
@@ -123,6 +123,7 @@ static VOID destroystate _ANSI_ARGS_((struct nfa *, struct state *));
 static VOID newarc _ANSI_ARGS_((struct nfa *, int, pcolor, struct state *, struct state *));
 static struct arc *allocarc _ANSI_ARGS_((struct nfa *, struct state *));
 static VOID freearc _ANSI_ARGS_((struct nfa *, struct arc *));
+static int hasnonemptyout _ANSI_ARGS_((struct state *));
 static int nonemptyouts _ANSI_ARGS_((struct state *));
 static int nonemptyins _ANSI_ARGS_((struct state *));
 static struct arc *findarc _ANSI_ARGS_((struct state *, int, pcolor));
-- 
cgit v0.12


From 37745421ae8d82481007aca3632c76c589a1ff63 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Wed, 6 Mar 2013 16:57:41 +0000
Subject: Use flag argument to combine copy(nonempty)* routines into copy*
 routines.

---
 generic/regc_nfa.c | 74 +++++++++++++++---------------------------------------
 generic/regcomp.c  |  8 +++---
 2 files changed, 23 insertions(+), 59 deletions(-)

diff --git a/generic/regc_nfa.c b/generic/regc_nfa.c
index 852a676..6e32cc9 100644
--- a/generic/regc_nfa.c
+++ b/generic/regc_nfa.c
@@ -572,44 +572,27 @@ struct state *new;
 }
 
 /*
- - copyins - copy all in arcs of a state to another state
- ^ static VOID copyins(struct nfa *, struct state *, struct state *);
+ - copyins - copy in arcs of a state to another state
+ * Either all arcs, or only non-empty ones as determined by all value.
+ ^ static VOID copyins(struct nfa *, struct state *, struct state *, int);
  */
 static VOID
-copyins(nfa, old, new)
+copyins(nfa, old, new, all)
 struct nfa *nfa;
 struct state *old;
 struct state *new;
+int all;
 {
 	struct arc *a;
 
 	assert(old != new);
 
 	for (a = old->ins; a != NULL; a = a->inchain)
-		cparc(nfa, a, a->from, new);
+		if (all || a->type != EMPTY)
+			cparc(nfa, a, a->from, new);
 }
 
 /*
- - copynonemptyins - as above, but ignore empty arcs
- ^ static void copynonemptyins(struct nfa *, struct state *, struct state *);
- */
-static VOID
-copynonemptyins(nfa, oldState, newState)
-struct nfa *nfa;
-struct state *oldState;
-struct state *newState;
-{
-    struct arc *a;
-
-    assert(oldState != newState);
-
-    for (a=oldState->ins ; a!=NULL ; a=a->inchain) {
-	if (a->type != EMPTY)
-	    cparc(nfa, a, a->from, newState);
-    }
-}
-
-/*
  - moveouts - move all out arcs of a state to another state
  ^ static VOID moveouts(struct nfa *, struct state *, struct state *);
  */
@@ -630,41 +613,24 @@ struct state *new;
 }
 
 /*
- - copyouts - copy all out arcs of a state to another state
- ^ static VOID copyouts(struct nfa *, struct state *, struct state *);
+ - copyouts - copy out arcs of a state to another state
+ * Either all arcs, or only non-empty ones as determined by all value.
+ ^ static VOID copyouts(struct nfa *, struct state *, struct state *, int);
  */
 static VOID
-copyouts(nfa, old, new)
+copyouts(nfa, old, new, all)
 struct nfa *nfa;
 struct state *old;
 struct state *new;
+int all;
 {
 	struct arc *a;
 
 	assert(old != new);
 
 	for (a = old->outs; a != NULL; a = a->outchain)
-		cparc(nfa, a, new, a->to);
-}
-
-/*
- - copynonemptyouts - as above, but ignore empty arcs
- ^ static void copynonemptyouts(struct nfa *, struct state *, struct state *);
- */
-static VOID 
-copynonemptyouts(nfa, oldState, newState)
-struct nfa *nfa;
-struct state *oldState;
-struct state *newState;
-{
-    struct arc *a;
-
-    assert(oldState != newState);
-
-    for (a=oldState->outs ; a!=NULL ; a=a->outchain) {
-	if (a->type != EMPTY)
-	    cparc(nfa, a, newState, a->to);
-    }
+		if (all || a->type != EMPTY)
+			cparc(nfa, a, new, a->to);
 }
 
 /*
@@ -983,7 +949,7 @@ struct arc *con;
 		if (NISERR())
 			return 0;
 		assert(to != from);		/* con is not an inarc */
-		copyins(nfa, from, s);		/* duplicate inarcs */
+		copyins(nfa, from, s, 1);	/* duplicate inarcs */
 		cparc(nfa, con, s, to);		/* move constraint arc */
 		freearc(nfa, con);
 		from = s;
@@ -1123,7 +1089,7 @@ struct arc *con;
 		s = newstate(nfa);
 		if (NISERR())
 			return 0;
-		copyouts(nfa, to, s);		/* duplicate outarcs */
+		copyouts(nfa, to, s, 1);	/* duplicate outarcs */
 		cparc(nfa, con, from, s);	/* move constraint */
 		freearc(nfa, con);
 		to = s;
@@ -1393,11 +1359,11 @@ struct state *to;
     toins = (fromouts == 0) ? 1 : nonemptyins(to);
 
     if (fromouts > toins) {
-	copynonemptyouts(nfa, to, from);
+	copyouts(nfa, to, from, 0);
 	return;
     }
     if (fromouts < toins) {
-	copynonemptyins(nfa, from, to);
+	copyins(nfa, from, to, 0);
 	return;
     }
 
@@ -1409,10 +1375,10 @@ struct state *to;
      * resulting graph much.
      */
     if (from->nins > to->nouts) {
-	copynonemptyouts(nfa, to, from);
+	copyouts(nfa, to, from, 0);
 	return;
     } else {
-	copynonemptyins(nfa, from, to);
+	copyins(nfa, from, to, 0);
 	return;
     }
 }
diff --git a/generic/regcomp.c b/generic/regcomp.c
index 083e9b1..b44ef8f 100644
--- a/generic/regcomp.c
+++ b/generic/regcomp.c
@@ -129,11 +129,9 @@ static int nonemptyins _ANSI_ARGS_((struct state *));
 static struct arc *findarc _ANSI_ARGS_((struct state *, int, pcolor));
 static VOID cparc _ANSI_ARGS_((struct nfa *, struct arc *, struct state *, struct state *));
 static VOID moveins _ANSI_ARGS_((struct nfa *, struct state *, struct state *));
-static VOID copyins _ANSI_ARGS_((struct nfa *, struct state *, struct state *));
-static VOID copynonemptyins _ANSI_ARGS_((struct nfa *, struct state *, struct state *));
+static VOID copyins _ANSI_ARGS_((struct nfa *, struct state *, struct state *, int));
 static VOID moveouts _ANSI_ARGS_((struct nfa *, struct state *, struct state *));
-static VOID copyouts _ANSI_ARGS_((struct nfa *, struct state *, struct state *));
-static VOID copynonemptyouts _ANSI_ARGS_((struct nfa *, struct state *, struct state *));
+static VOID copyouts _ANSI_ARGS_((struct nfa *, struct state *, struct state *, int));
 static VOID cloneouts _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *, int));
 static VOID delsub _ANSI_ARGS_((struct nfa *, struct state *, struct state *));
 static VOID deltraverse _ANSI_ARGS_((struct nfa *, struct state *, struct state *));
@@ -558,7 +556,7 @@ struct nfa *nfa;
 	/* do the splits */
 	for (s = slist; s != NULL; s = s2) {
 		s2 = newstate(nfa);
-		copyouts(nfa, s, s2);
+		copyouts(nfa, s, s2, 1);
 		for (a = s->ins; a != NULL; a = b) {
 			b = a->inchain;
 			if (a->from != pre) {
-- 
cgit v0.12


From 24abdb72019b174cb20e0d4d29e0a76b689c4459 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Wed, 6 Mar 2013 18:00:21 +0000
Subject: Indent reduction in fixempties().

---
 generic/regc_nfa.c | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/generic/regc_nfa.c b/generic/regc_nfa.c
index 6e32cc9..b125062 100644
--- a/generic/regc_nfa.c
+++ b/generic/regc_nfa.c
@@ -1208,15 +1208,15 @@ FILE *f;			/* for debug output; NULL none */
      */
     for (s = nfa->states; s != NULL && !NISERR(); s = nexts) {
 	nexts = s->next;
-	if (s->nouts == 1 && !s->flag) {
-	    a = s->outs;
-	    assert(a != NULL && a->outchain == NULL);
-	    if (a->type == EMPTY) {
-		if (s != a->to)
-		    moveins(nfa, s, a->to);
-		dropstate(nfa, s);
-	    }
-	}
+	if (s->flag || s->nouts != 1)
+	    continue;
+	a = s->outs;
+	assert(a != NULL && a->outchain == NULL);
+	if (a->type != EMPTY)
+	    continue;
+	if (s != a->to)
+	    moveins(nfa, s, a->to);
+	dropstate(nfa, s);
     }
 
     /*
@@ -1226,16 +1226,16 @@ FILE *f;			/* for debug output; NULL none */
     for (s = nfa->states; s != NULL && !NISERR(); s = nexts) {
 	nexts = s->next;
 	/* while we're at it, ensure tmp fields are clear for next step */
-	s->tmp = NULL;
-	if (s->nins == 1 && !s->flag) {
-	    a = s->ins;
-	    assert(a != NULL && a->inchain == NULL);
-	    if (a->type == EMPTY) {
-		if (s != a->from)
-		    moveouts(nfa, s, a->from);
-		dropstate(nfa, s);
-	    }
-	}
+	assert(s->tmp = NULL);
+	if (s->flag || s->nins != 1)
+	    continue;
+	a = s->ins;
+	assert(a != NULL && a->inchain == NULL);
+	if (a->type != EMPTY)
+	    continue;
+	if (s != a->from)
+	    moveouts(nfa, s, a->from);
+	dropstate(nfa, s);
     }
 
     /*
-- 
cgit v0.12


From b5ed01c2d8765e2f5bd622dfbfbf4c85215cd03a Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Wed, 6 Mar 2013 19:11:44 +0000
Subject: Rework into Tcl 8.4 coding style (closer to original Spencer).

---
 generic/regc_nfa.c | 351 +++++++++++++++++++++++++++--------------------------
 1 file changed, 177 insertions(+), 174 deletions(-)

diff --git a/generic/regc_nfa.c b/generic/regc_nfa.c
index b125062..107e466 100644
--- a/generic/regc_nfa.c
+++ b/generic/regc_nfa.c
@@ -474,7 +474,7 @@ struct state *s;
 			return 1;
 	return 0;
 }
-
+
 /*
  - nonemptyouts - count non-EMPTY out arcs of a state
  ^ static int nonemptyouts(struct state *);
@@ -483,16 +483,15 @@ static int
 nonemptyouts(s)
 struct state *s;
 {
-    int n = 0;
-    struct arc *a;
-
-    for (a = s->outs; a != NULL; a = a->outchain) {
-	if (a->type != EMPTY)
-	    n++;
-    }
-    return n;
+	int n = 0;
+	struct arc *a;
+
+	for (a = s->outs; a != NULL; a = a->outchain)
+		if (a->type != EMPTY)
+			n++;
+	return n;
 }
-
+
 /*
  - nonemptyins - count non-EMPTY in arcs of a state
  ^ static int nonemptyins(struct state *);
@@ -501,16 +500,15 @@ static int
 nonemptyins(s)
 struct state *s;
 {
-    int n = 0;
-    struct arc *a;
-
-    for (a = s->ins; a != NULL; a = a->inchain) {
-	if (a->type != EMPTY)
-	    n++;
-    }
-    return n;
+	int n = 0;
+	struct arc *a;
+
+	for (a = s->ins; a != NULL; a = a->inchain)
+		if (a->type != EMPTY)
+			n++;
+	return n;
 }
-
+
 /*
  - findarc - find arc, if any, from given source with given type and color
  * If there is more than one such arc, the result is random.
@@ -1195,108 +1193,114 @@ fixempties(nfa, f)
 struct nfa *nfa;
 FILE *f;			/* for debug output; NULL none */
 {
-    struct state *s;
-    struct state *s2;
-    struct state *nexts;
-    struct arc *a;
-    struct arc *nexta;
-
-    /*
-     * First, get rid of any states whose sole out-arc is an EMPTY, since
-     * they're basically just aliases for their successor.  The parsing
-     * algorithm creates enough of these that it's worth special-casing this.
-     */
-    for (s = nfa->states; s != NULL && !NISERR(); s = nexts) {
-	nexts = s->next;
-	if (s->flag || s->nouts != 1)
-	    continue;
-	a = s->outs;
-	assert(a != NULL && a->outchain == NULL);
-	if (a->type != EMPTY)
-	    continue;
-	if (s != a->to)
-	    moveins(nfa, s, a->to);
-	dropstate(nfa, s);
-    }
-
-    /*
-     * Similarly, get rid of any state with a single EMPTY in-arc, by folding
-     * it into its predecessor.
-     */
-    for (s = nfa->states; s != NULL && !NISERR(); s = nexts) {
-	nexts = s->next;
-	/* while we're at it, ensure tmp fields are clear for next step */
-	assert(s->tmp = NULL);
-	if (s->flag || s->nins != 1)
-	    continue;
-	a = s->ins;
-	assert(a != NULL && a->inchain == NULL);
-	if (a->type != EMPTY)
-	    continue;
-	if (s != a->from)
-	    moveouts(nfa, s, a->from);
-	dropstate(nfa, s);
-    }
-
-    /*
-     * For each remaining NFA state, find all other states that are reachable
-     * from it by a chain of one or more EMPTY arcs.  Then generate new arcs
-     * that eliminate the need for each such chain.
-     *
-     * If we just do this straightforwardly, the algorithm gets slow in
-     * complex graphs, because the same arcs get copied to all intermediate
-     * states of an EMPTY chain, and then uselessly pushed repeatedly to the
-     * chain's final state; we waste a lot of time in newarc's duplicate
-     * checking.  To improve matters, we decree that any state with only EMPTY
-     * out-arcs is "doomed" and will not be part of the final NFA. That can be
-     * ensured by not adding any new out-arcs to such a state. Having ensured
-     * that, we need not update the state's in-arcs list either; all arcs that
-     * might have gotten pushed forward to it will just get pushed directly to
-     * successor states.  This eliminates most of the useless duplicate arcs.
-     */
-    for (s = nfa->states; s != NULL && !NISERR(); s = s->next) {
-	for (s2 = emptyreachable(s, s); s2 != s && !NISERR(); s2 = nexts) {
-	    /*
-	     * If s2 is doomed, we decide that (1) we will always push arcs
-	     * forward to it, not pull them back to s; and (2) we can optimize
-	     * away the push-forward, per comment above.  So do nothing.
-	     */
-	    if (s2->flag || hasnonemptyout(s2))
-		replaceempty(nfa, s, s2);
-
-	    /* Reset the tmp fields as we walk back */
-	    nexts = s2->tmp;
-	    s2->tmp = NULL;
+	struct state *s;
+	struct state *s2;
+	struct state *nexts;
+	struct arc *a;
+	struct arc *nexta;
+
+	/*
+	 * First, get rid of any states whose sole out-arc is an EMPTY,
+	 * since they're basically just aliases for their successor.
+	 * The parsing algorithm creates enough of these that it's worth
+	 * special-casing this.
+	 */
+	for (s = nfa->states; s != NULL && !NISERR(); s = nexts) {
+		nexts = s->next;
+		if (s->flag || s->nouts != 1)
+			continue;
+		a = s->outs;
+		assert(a != NULL && a->outchain == NULL);
+		if (a->type != EMPTY)
+			continue;
+		if (s != a->to)
+			moveins(nfa, s, a->to);
+		dropstate(nfa, s);
 	}
-	s->tmp = NULL;
-    }
-
-    /*
-     * Now remove all the EMPTY arcs, since we don't need them anymore.
-     */
-    for (s = nfa->states; s != NULL && !NISERR(); s = s->next) {
-	for (a = s->outs; a != NULL; a = nexta) {
-	    nexta = a->outchain;
-	    if (a->type == EMPTY)
-		freearc(nfa, a);
+
+	/*
+	 * Similarly, get rid of any state with a single EMPTY in-arc,
+	 * by folding it into its predecessor.
+	 */
+	for (s = nfa->states; s != NULL && !NISERR(); s = nexts) {
+		nexts = s->next;
+		/* Ensure tmp fields are clear for next step */
+		assert(s->tmp = NULL);
+		if (s->flag || s->nins != 1)
+			continue;
+		a = s->ins;
+		assert(a != NULL && a->inchain == NULL);
+		if (a->type != EMPTY)
+			continue;
+		if (s != a->from)
+			moveouts(nfa, s, a->from);
+		dropstate(nfa, s);
+	}
+
+	/*
+	 * For each remaining NFA state, find all other states that are
+	 * reachable from it by a chain of one or more EMPTY arcs.  Then
+	 * generate new arcs that eliminate the need for each such chain.
+	 *
+	 * If we just do this straightforwardly, the algorithm gets slow
+	 * in complex graphs, because the same arcs get copied to all
+	 * intermediate states of an EMPTY chain, and then uselessly
+	 * pushed repeatedly to the chain's final state; we waste a lot
+	 * of time in newarc's duplicate checking.  To improve matters,
+	 * we decree that any state with only EMPTY out-arcs is "doomed"
+	 * and will not be part of the final NFA. That can be ensured by
+	 * not adding any new out-arcs to such a state. Having ensured
+	 * that, we need not update the state's in-arcs list either; all
+	 * arcs that might have gotten pushed forward to it will just get
+	 * pushed directly to successor states.  This eliminates most of
+	 * the useless duplicate arcs.
+	 */
+	for (s = nfa->states; s != NULL && !NISERR(); s = s->next) {
+		for (s2 = emptyreachable(s, s); s2 != s && !NISERR();
+				s2 = nexts) {
+			/*
+			 * If s2 is doomed, we decide that (1) we will
+			 * always push arcs forward to it, not pull them
+			 * back to s; and (2) we can optimize away the
+			 * push-forward, per comment above.
+			 * So do nothing.
+			 */
+			if (s2->flag || hasnonemptyout(s2))
+				replaceempty(nfa, s, s2);
+
+			/* Reset the tmp fields as we walk back */
+			nexts = s2->tmp;
+			s2->tmp = NULL;
+		}
+		s->tmp = NULL;
+	}
+
+	/*
+	 * Remove all the EMPTY arcs, since we don't need them anymore.
+	 */
+	for (s = nfa->states; s != NULL; s = s->next)
+		for (a = s->outs; a != NULL; a = nexta) {
+			nexta = a->outchain;
+			if (a->type == EMPTY)
+				freearc(nfa, a);
+		}
+
+	/*
+	 * And remove any states that have become useless.  (This
+	 * cleanup is not very thorough, and would be even less so if we
+	 * tried to combine it with the previous step; but cleanup()
+	 * will take care of anything we miss.)
+	 */
+	for (s = nfa->states; s != NULL && !NISERR(); s = nexts) {
+		nexts = s->next;
+		if ((s->nins == 0 || s->nouts == 0) && !s->flag)
+			dropstate(nfa, s);
 	}
-    }
-
-    /*
-     * And remove any states that have become useless.  (This cleanup is not
-     * very thorough, and would be even less so if we tried to combine it with
-     * the previous step; but cleanup() will take care of anything we miss.)
-     */
-    for (s = nfa->states; s != NULL && !NISERR(); s = nexts) {
-	nexts = s->next;
-	if ((s->nins == 0 || s->nouts == 0) && !s->flag)
-	    dropstate(nfa, s);
-    }
-
-    if (f != NULL && !NISERR())
-	dumpnfa(nfa, f);
+
+	if (f != NULL && !NISERR())
+		dumpnfa(nfa, f);
 }
-
+
 /*
  - emptyreachable - recursively find all states reachable from s by EMPTY arcs
  * The return value is the last such state found.  Its tmp field links back
@@ -1312,17 +1316,16 @@ emptyreachable(s, lastfound)
 struct state *s;
 struct state *lastfound;
 {
-    struct arc *a;
-
-    s->tmp = lastfound;
-    lastfound = s;
-    for (a = s->outs; a != NULL; a = a->outchain) {
-	if (a->type == EMPTY && a->to->tmp == NULL)
-	    lastfound = emptyreachable(a->to, lastfound);
-    }
-    return lastfound;
+	struct arc *a;
+
+	s->tmp = lastfound;
+	lastfound = s;
+	for (a = s->outs; a != NULL; a = a->outchain)
+		if (a->type == EMPTY && a->to->tmp == NULL)
+			lastfound = emptyreachable(a->to, lastfound);
+	return lastfound;
 }
-
+
 /*
  - replaceempty - replace an EMPTY arc chain with some non-empty arcs
  * The EMPTY arc(s) should be deleted later, but we can't do it here because
@@ -1335,54 +1338,54 @@ struct nfa *nfa;
 struct state *from;
 struct state *to;
 {
-    int fromouts;
-    int toins;
-
-    assert(from != to);
-
-    /*
-     * Create replacement arcs that bypass the need for the EMPTY chain.  We
-     * can do this either by pushing arcs forward (linking directly from
-     * "from"'s predecessors to "to") or by pulling them back (linking
-     * directly from "from" to "to"'s successors).  In general, we choose
-     * whichever way creates greater fan-out or fan-in, so as to improve the
-     * odds of reducing the other state to zero in-arcs or out-arcs and
-     * thereby being able to delete it.  However, if "from" is doomed (has no
-     * non-EMPTY out-arcs), we must keep it so, so always push forward in that
-     * case.
-     *
-     * The fan-out/fan-in comparison should count only non-EMPTY arcs.  If
-     * "from" is doomed, we can skip counting "to"'s arcs, since we want to
-     * force taking the copynonemptyins path in that case.
-     */
-    fromouts = nonemptyouts(from);
-    toins = (fromouts == 0) ? 1 : nonemptyins(to);
-
-    if (fromouts > toins) {
-	copyouts(nfa, to, from, 0);
-	return;
-    }
-    if (fromouts < toins) {
-	copyins(nfa, from, to, 0);
-	return;
-    }
-
-    /*
-     * fromouts == toins.  Decide on secondary issue: copy fewest arcs.
-     *
-     * Doesn't seem to be worth the trouble to exclude empties from these
-     * comparisons; that takes extra time and doesn't seem to improve the
-     * resulting graph much.
-     */
-    if (from->nins > to->nouts) {
-	copyouts(nfa, to, from, 0);
-	return;
-    } else {
+	int fromouts;
+	int toins;
+
+	assert(from != to);
+
+	/*
+	 * Create replacement arcs that bypass the need for the EMPTY
+	 * chain.  We can do this either by pushing arcs forward
+	 * (linking directly from predecessors of "from" to "to") or by
+	 * pulling them back (linking directly from "from" to the
+	 * successors of "to").  In general, we choose whichever way
+	 * creates greater fan-out or fan-in, so as to improve the odds
+	 * of reducing the other state to zero in-arcs or out-arcs and
+	 * thereby being able to delete it.  However, if "from" is
+	 * doomed (has no non-EMPTY out-arcs), we must keep it so, so
+	 * always push forward in that case.
+	 *
+	 * The fan-out/fan-in comparison should count only non-EMPTY
+	 * arcs.  If "from" is doomed, we can skip counting "to"'s arcs,
+	 * since we want to force taking the copyins path in that case.
+	 */
+	fromouts = nonemptyouts(from);
+	toins = (fromouts == 0) ? 1 : nonemptyins(to);
+
+	if (fromouts > toins) {
+		copyouts(nfa, to, from, 0);
+		return;
+	}
+	if (fromouts < toins) {
+		copyins(nfa, from, to, 0);
+		return;
+	}
+
+	/*
+	 * fromouts == toins.  Secondary decision: copy fewest arcs.
+	 *
+	 * Doesn't seem to be worth the trouble to exclude empties from
+	 * these comparisons; that takes extra time and doesn't seem to
+	 * improve the resulting graph much.
+	 */
+	if (from->nins > to->nouts) {
+		copyouts(nfa, to, from, 0);
+		return;
+	}
+
 	copyins(nfa, from, to, 0);
-	return;
-    }
 }
-
+
 /*
  - cleanup - clean up NFA after optimizations
  ^ static VOID cleanup(struct nfa *);
-- 
cgit v0.12