Adaptation of re-oNsquared.patch from Tom Lane @ postgres.

author: dgp <dgp@users.sourceforge.net> 2015-10-19 19:32:04 (GMT)
committer: dgp <dgp@users.sourceforge.net> 2015-10-19 19:32:04 (GMT)
commit: 8ed7672ab71b54afa94e164e73fdc274b0b39771 (patch)
tree: 4da041e80ad7a2639abcff0694fe8ed6d2e77d3b /generic/regc_nfa.c
parent: b8c0c06fdf9f099f27f114fc9c92b7786dce13a5 (diff)
download: tcl-8ed7672ab71b54afa94e164e73fdc274b0b39771.zip
tcl-8ed7672ab71b54afa94e164e73fdc274b0b39771.tar.gz
tcl-8ed7672ab71b54afa94e164e73fdc274b0b39771.tar.bz2
1 files changed, 708 insertions, 97 deletions
diff --git a/generic/regc_nfa.c b/generic/regc_nfa.c
index 20eb3ba..0f572b8 100644
--- a/generic/regc_nfa.c
+++ b/generic/regc_nfa.c
@@ -35,6 +35,8 @@
 #define	NISERR()	VISERR(nfa->v)
 #define	NERR(e)		VERR(nfa->v, (e))
 #define STACK_TOO_DEEP(x) (0)
+#define CANCEL_REQUESTED(x) (0)
+#define REG_CANCEL 777
 
 /*
  - newnfa - set up an NFA
@@ -322,6 +324,10 @@ destroystate(
  ^ static VOID newarc(struct nfa *, int, pcolor, struct state *,
  ^	struct state *);
  */
+/*
+ * This function checks to make sure that no duplicate arcs are created.
+ * In general we never want duplicates.
+ */
 static void
 newarc(
     struct nfa *nfa,
@@ -334,16 +340,42 @@ newarc(
 
     assert(from != NULL && to != NULL);
 
-    /*
-     * Check for duplicates.
-     */
-
-    for (a=from->outs ; a!=NULL ; a=a->outchain) {
-	if (a->to == to && a->co == co && a->type == t) {
-	    return;
+    /* check for duplicate arc, using whichever chain is shorter */
+    if (from->nouts <= to->nins) {
+	for (a = from->outs; a != NULL; a = a->outchain) {
+	    if (a->to == to && a->co == co && a->type == t) {
+		return;
+	    }
+	}
+    } else {
+	for (a = to->ins; a != NULL; a = a->inchain) {
+	    if (a->from == from && a->co == co && a->type == t) {
+		return;
+	    }
 	}
     }
+  
+    /* no dup, so create the arc */
+    createarc(nfa, t, co, from, to);
+}
 
+/*
+ * createarc - create a new arc within an NFA
+ *
+ * This function must *only* be used after verifying that there is no existing
+ * identical arc (same type/color/from/to).
+ */
+static void
+createarc(
+    struct nfa * nfa,
+    int t,
+    pcolor co,
+    struct state * from,
+    struct state * to)
+{
+    struct arc *a;
+
+    /* the arc is physically allocated within its from-state */
     a = allocarc(nfa, from);
     if (NISERR()) {
 	return;
@@ -356,15 +388,21 @@ newarc(
     a->from = from;
 
     /*
-     * Put the new arc on the beginning, not the end, of the chains. Not only
-     * is this easier, it has the very useful side effect that deleting the
-     * most-recently-added arc is the cheapest case rather than the most
-     * expensive one.
+     * Put the new arc on the beginning, not the end, of the chains; it's
+     * simpler here, and freearc() is the same cost either way.  See also the
+     * logic in moveins() and its cohorts, as well as fixempties().
      */
-
     a->inchain = to->ins;
+    a->inchainRev = NULL;
+    if (to->ins) {
+	to->ins->inchainRev = a;
+    }
     to->ins = a;
     a->outchain = from->outs;
+    a->outchainRev = NULL;
+    if (from->outs) {
+	from->outs->outchainRev = a;
+    }
     from->outs = a;
 
     from->nouts++;
@@ -437,7 +475,7 @@ freearc(
 {
     struct state *from = victim->from;
     struct state *to = victim->to;
-    struct arc *a;
+    struct arc *predecessor;
 
     assert(victim->type != 0);
 
@@ -454,16 +492,17 @@ freearc(
      */
 
     assert(from != NULL);
-    assert(from->outs != NULL);
-    a = from->outs;
-    if (a == victim) {		/* simple case: first in chain */
+    predecessor = victim->outchainRev;
+    if (predecessor == NULL) {
+	assert(from->outs == victim);
 	from->outs = victim->outchain;
     } else {
-	for (; a!=NULL && a->outchain!=victim ; a=a->outchain) {
-	    continue;
-	}
-	assert(a != NULL);
-	a->outchain = victim->outchain;
+	assert(predecessor->outchain == victim);
+	predecessor->outchain = victim->outchain;
+    }
+    if (victim->outchain != NULL) {
+	assert(victim->outchain->outchainRev == victim);
+	victim->outchain->outchainRev = predecessor;
     }
     from->nouts--;
 
@@ -472,31 +511,78 @@ freearc(
      */
 
     assert(to != NULL);
-    assert(to->ins != NULL);
-    a = to->ins;
-    if (a == victim) {		/* simple case: first in chain */
+    predecessor = victim->inchainRev;
+    if (predecessor == NULL) {
+	assert(to->ins == victim);
 	to->ins = victim->inchain;
     } else {
-	for (; a->inchain!=victim ; a=a->inchain) {
-	    assert(a->inchain != NULL);
-	    continue;
-	}
-	a->inchain = victim->inchain;
+	assert(predecessor->inchain == victim);
+	predecessor->inchain = victim->inchain;
+    }
+    if (victim->inchain != NULL) {
+	assert(victim->inchain->inchainRev == victim);
+	victim->inchain->inchainRev = predecessor;
     }
     to->nins--;
 
     /*
-     * Clean up and place on free list.
+     * Clean up and place on from-state's free list.
      */
 
     victim->type = 0;
     victim->from = NULL;	/* precautions... */
     victim->to = NULL;
     victim->inchain = NULL;
+    victim->inchainRev = NULL;
     victim->outchain = NULL;
+    victim->outchainRev = NULL;
     victim->freechain = from->free;
     from->free = victim;
 }
+
+/*
+ * changearctarget - flip an arc to have a different to state
+ *
+ * Caller must have verified that there is no pre-existing duplicate arc.
+ *
+ * Note that because we store arcs in their from state, we can't easily have
+ * a similar changearcsource function.
+ */
+static void
+changearctarget(struct arc * a, struct state * newto)
+{
+    struct state *oldto = a->to;
+    struct arc *predecessor;
+
+    assert(oldto != newto);
+
+    /* take it off old target's in-chain */
+    assert(oldto != NULL);
+    predecessor = a->inchainRev;
+    if (predecessor == NULL) {
+	assert(oldto->ins == a);
+	oldto->ins = a->inchain;
+    } else {
+	assert(predecessor->inchain == a);
+	predecessor->inchain = a->inchain;
+    }
+    if (a->inchain != NULL) {
+	assert(a->inchain->inchainRev == a);
+	a->inchain->inchainRev = predecessor;
+    }
+    oldto->nins--;
+
+    a->to = newto;
+
+    /* prepend it to new target's in-chain */
+    a->inchain = newto->ins;
+    a->inchainRev = NULL;
+    if (newto->ins) {
+	newto->ins->inchainRev = a;
+    }
+    newto->ins = a;
+    newto->nins++;
+}
 
 /*
  - hasnonemptyout - Does state have a non-EMPTY out arc?
@@ -589,13 +675,181 @@ cparc(
 {
     newarc(nfa, oa->type, oa->co, from, to);
 }
+
+/*
+ * sortins - sort the in arcs of a state by from/color/type
+ */
+static void
+sortins(
+    struct nfa * nfa,
+    struct state * s)
+{
+    struct arc **sortarray;
+    struct arc *a;
+    int n = s->nins;
+    int i;
+
+    if (n <= 1) {
+	return;		/* nothing to do */
+    }
+    /* make an array of arc pointers ... */
+    sortarray = (struct arc **) MALLOC(n * sizeof(struct arc *));
+    if (sortarray == NULL) {
+	NERR(REG_ESPACE);
+	return;
+    }
+    i = 0;
+    for (a = s->ins; a != NULL; a = a->inchain) {
+	sortarray[i++] = a;
+    }
+    assert(i == n);
+    /* ... sort the array */
+    qsort(sortarray, n, sizeof(struct arc *), sortins_cmp);
+    /* ... and rebuild arc list in order */
+    /* it seems worth special-casing first and last items to simplify loop */
+    a = sortarray[0];
+    s->ins = a;
+    a->inchain = sortarray[1];
+    a->inchainRev = NULL;
+    for (i = 1; i < n - 1; i++) {
+	a = sortarray[i];
+	a->inchain = sortarray[i + 1];
+	a->inchainRev = sortarray[i - 1];
+    }
+    a = sortarray[i];
+    a->inchain = NULL;
+    a->inchainRev = sortarray[i - 1];
+    FREE(sortarray);
+}
+
+static int
+sortins_cmp(
+    const void *a,
+    const void *b)
+{
+    const struct arc *aa = *((const struct arc * const *) a);
+    const struct arc *bb = *((const struct arc * const *) b);
+
+    /* we check the fields in the order they are most likely to be different */
+    if (aa->from->no < bb->from->no) {
+	return -1;
+    }
+    if (aa->from->no > bb->from->no) {
+ 	return 1;
+    }
+    if (aa->co < bb->co) {
+ 	return -1;
+    }
+    if (aa->co > bb->co) {
+ 	return 1;
+    }
+    if (aa->type < bb->type) {
+ 	return -1;
+    }
+    if (aa->type > bb->type) {
+ 	return 1;
+    }
+    return 0;
+}
+ 
+/*
+ * sortouts - sort the out arcs of a state by to/color/type
+ */
+static void
+sortouts(
+    struct nfa * nfa,
+    struct state * s)
+{
+    struct arc **sortarray;
+    struct arc *a;
+    int	n = s->nouts;
+    int	i;
+
+    if (n <= 1) {
+	return;					/* nothing to do */
+    }
+    /* make an array of arc pointers ... */
+    sortarray = (struct arc **) MALLOC(n * sizeof(struct arc *));
+    if (sortarray == NULL) {
+	NERR(REG_ESPACE);
+	return;
+    }
+    i = 0;
+    for (a = s->outs; a != NULL; a = a->outchain) {
+	sortarray[i++] = a;
+    }
+    assert(i == n);
+    /* ... sort the array */
+    qsort(sortarray, n, sizeof(struct arc *), sortouts_cmp);
+    /* ... and rebuild arc list in order */
+    /* it seems worth special-casing first and last items to simplify loop */
+    a = sortarray[0];
+    s->outs = a;
+    a->outchain = sortarray[1];
+    a->outchainRev = NULL;
+    for (i = 1; i < n - 1; i++) {
+	a = sortarray[i];
+	a->outchain = sortarray[i + 1];
+	a->outchainRev = sortarray[i - 1];
+    }
+    a = sortarray[i];
+    a->outchain = NULL;
+    a->outchainRev = sortarray[i - 1];
+    FREE(sortarray);
+}
+
+static int
+sortouts_cmp(
+    const void *a,
+    const void *b)
+{
+    const struct arc *aa = *((const struct arc * const *) a);
+    const struct arc *bb = *((const struct arc * const *) b);
+
+    /* we check the fields in the order they are most likely to be different */
+    if (aa->to->no < bb->to->no) {
+	return -1;
+    }
+    if (aa->to->no > bb->to->no) {
+	return 1;
+    }
+    if (aa->co < bb->co) {
+	return -1;
+    }
+    if (aa->co > bb->co) {
+	return 1;
+    }
+    if (aa->type < bb->type) {
+	return -1;
+    }
+    if (aa->type > bb->type) {
+	return 1;
+    }
+    return 0;
+}
+
+/*
+ * Common decision logic about whether to use arc-by-arc operations or
+ * sort/merge.  If there's just a few source arcs we cannot recoup the
+ * cost of sorting the destination arc list, no matter how large it is.
+ * Otherwise, limit the number of arc-by-arc comparisons to about 1000
+ * (a somewhat arbitrary choice, but the breakeven point would probably
+ * be machine dependent anyway).
+ */
+#define BULK_ARC_OP_USE_SORT(nsrcarcs, ndestarcs) \
+	((nsrcarcs) < 4 ? 0 : ((nsrcarcs) > 32 || (ndestarcs) > 32))
 
 /*
  - moveins - move all in arcs of a state to another state
  * You might think this could be done better by just updating the
- * existing arcs, and you would be right if it weren't for the desire
+ * existing arcs, and you would be right if it weren't for the need
  * for duplicate suppression, which makes it easier to just make new
  * ones to exploit the suppression built into newarc.
+ *
+ * However, if we have a whole lot of arcs to deal with, retail duplicate
+ * checks become too slow.  In that case we proceed by sorting and merging
+ * the arc lists, and then we can indeed just update the arcs in-place.
+ *
  ^ static VOID moveins(struct nfa *, struct state *, struct state *);
  */
 static void
@@ -604,14 +858,79 @@ moveins(
     struct state *oldState,
     struct state *newState)
 {
-    struct arc *a;
-
     assert(oldState != newState);
 
-    while ((a = oldState->ins) != NULL) {
-	cparc(nfa, a, a->from, newState);
-	freearc(nfa, a);
+    if (!BULK_ARC_OP_USE_SORT(oldState->nins, newState->nins)) {
+	/* With not too many arcs, just do them one at a time */
+	struct arc *a;
+
+	while ((a = oldState->ins) != NULL) {
+	    cparc(nfa, a, a->from, newState);
+	    freearc(nfa, a);
+	}
+    } else {
+	/*
+	 * With many arcs, use a sort-merge approach.  Note changearctarget()
+	 * will put the arc onto the front of newState's chain, so it does not
+	 * break our walk through the sorted part of the chain.
+	 */
+	struct arc *oa;
+	struct arc *na;
+
+	/*
+	 * Because we bypass newarc() in this code path, we'd better include a
+	 * cancel check.
+	 */
+	if (CANCEL_REQUESTED(nfa->v->re)) {
+	    NERR(REG_CANCEL);
+	    return;
+	}
+
+	sortins(nfa, oldState);
+	sortins(nfa, newState);
+	if (NISERR()) {
+	    return;		/* might have failed to sort */
+	}
+	oa = oldState->ins;
+	na = newState->ins;
+	while (oa != NULL && na != NULL) {
+	    struct arc *a = oa;
+
+	    switch (sortins_cmp(&oa, &na)) {
+		case -1:
+		    /* newState does not have anything matching oa */
+		    oa = oa->inchain;
+
+		    /*
+		     * Rather than doing createarc+freearc, we can just unlink
+		     * and relink the existing arc struct.
+		     */
+		    changearctarget(a, newState);
+		    break;
+		case 0:
+		    /* match, advance in both lists */
+		    oa = oa->inchain;
+		    na = na->inchain;
+		    /* ... and drop duplicate arc from oldState */
+		    freearc(nfa, a);
+		    break;
+		case +1:
+		    /* advance only na; oa might have a match later */
+		    na = na->inchain;
+		    break;
+		default:
+		    assert(NOTREACHED);
+	    }
+	}
+	while (oa != NULL) {
+	    /* newState does not have anything matching oa */
+	    struct arc *a = oa;
+
+	    oa = oa->inchain;
+	    changearctarget(a, newState);
+	}
     }
+
     assert(oldState->nins == 0);
     assert(oldState->ins == NULL);
 }
@@ -628,14 +947,178 @@ copyins(
     struct state *newState,
     int all)
 {
-    struct arc *a;
-
     assert(oldState != newState);
 
-    for (a=oldState->ins ; a!=NULL ; a=a->inchain) {
-	if (all || a->type != EMPTY) {
-	    cparc(nfa, a, a->from, newState);
+    if (!BULK_ARC_OP_USE_SORT(oldState->nins, newState->nins)) {
+	/* With not too many arcs, just do them one at a time */
+	struct arc *a;
+
+	for (a = oldState->ins; a != NULL; a = a->inchain) {
+	    if (all || a->type != EMPTY) {
+		cparc(nfa, a, a->from, newState);
+	    }
+	}
+    } else {
+	/*
+	 * With many arcs, use a sort-merge approach.  Note that createarc()
+	 * will put new arcs onto the front of newState's chain, so it does
+	 * not break our walk through the sorted part of the chain.
+	 */
+	struct arc *oa;
+	struct arc *na;
+
+	/*
+	 * Because we bypass newarc() in this code path, we'd better include a
+	 * cancel check.
+	 */
+	if (CANCEL_REQUESTED(nfa->v->re)) {
+	    NERR(REG_CANCEL);
+	    return;
 	}
+
+	sortins(nfa, oldState);
+	sortins(nfa, newState);
+	if (NISERR()) {
+	    return;		/* might have failed to sort */
+	}
+	oa = oldState->ins;
+	na = newState->ins;
+	while (oa != NULL && na != NULL) {
+	    struct arc *a = oa;
+
+	    if (!all && a->type == EMPTY) {
+		oa = oa->inchain;
+		continue;
+	    }
+
+	    switch (sortins_cmp(&oa, &na)) {
+		case -1:
+		    /* newState does not have anything matching oa */
+		    oa = oa->inchain;
+		    createarc(nfa, a->type, a->co, a->from, newState);
+		    break;
+		case 0:
+		    /* match, advance in both lists */
+		    oa = oa->inchain;
+		    na = na->inchain;
+		    break;
+		case +1:
+		    /* advance only na; oa might have a match later */
+		    na = na->inchain;
+		    break;
+		default:
+		    assert(NOTREACHED);
+	    }
+	}
+	while (oa != NULL) {
+	    /* newState does not have anything matching oa */
+	    struct arc *a = oa;
+
+	    if (!all && a->type == EMPTY) {
+		oa = oa->inchain;
+		continue;
+	    }
+
+	    oa = oa->inchain;
+	    createarc(nfa, a->type, a->co, a->from, newState);
+	}
+    }
+}
+
+/*
+ * mergeins - merge a list of inarcs into a state
+ *
+ * This is much like copyins, but the source arcs are listed in an array,
+ * and are not guaranteed unique.  It's okay to clobber the array contents.
+ */
+static void
+mergeins(
+    struct nfa * nfa,
+    struct state * s,
+    struct arc ** arcarray,
+    int arccount)
+{
+    struct arc *na;
+    int	i;
+    int	j;
+
+    if (arccount <= 0) {
+	return;
+    }
+
+    /*
+     * Because we bypass newarc() in this code path, we'd better include a
+     * cancel check.
+     */
+    if (CANCEL_REQUESTED(nfa->v->re)) {
+	NERR(REG_CANCEL);
+	return;
+    }
+
+    /* Sort existing inarcs as well as proposed new ones */
+    sortins(nfa, s);
+    if (NISERR()) {
+	return;			/* might have failed to sort */
+    }
+
+    qsort(arcarray, arccount, sizeof(struct arc *), sortins_cmp);
+
+    /*
+     * arcarray very likely includes dups, so we must eliminate them.  (This
+     * could be folded into the next loop, but it's not worth the trouble.)
+     */
+    j = 0;
+    for (i = 1; i < arccount; i++) {
+	switch (sortins_cmp(&arcarray[j], &arcarray[i])) {
+	    case -1:
+		/* non-dup */
+		arcarray[++j] = arcarray[i];
+		break;
+	    case 0:
+		/* dup */
+		break;
+	    default:
+		/* trouble */
+		assert(NOTREACHED);
+	}
+    }
+    arccount = j + 1;
+
+    /*
+     * Now merge into s' inchain.  Note that createarc() will put new arcs
+     * onto the front of s's chain, so it does not break our walk through the
+     * sorted part of the chain.
+     */
+    i = 0;
+    na = s->ins;
+    while (i < arccount && na != NULL) {
+	struct arc *a = arcarray[i];
+
+	switch (sortins_cmp(&a, &na)) {
+	    case -1:
+		/* s does not have anything matching a */
+		createarc(nfa, a->type, a->co, a->from, s);
+		i++;
+		break;
+	    case 0:
+		/* match, advance in both lists */
+		i++;
+		na = na->inchain;
+		break;
+	    case +1:
+		/* advance only na; array might have a match later */
+		na = na->inchain;
+		break;
+	    default:
+		assert(NOTREACHED);
+	}
+    }
+    while (i < arccount) {
+	/* s does not have anything matching a */
+	struct arc *a = arcarray[i];
+
+	createarc(nfa, a->type, a->co, a->from, s);
+	i++;
     }
 }
 
@@ -649,14 +1132,78 @@ moveouts(
     struct state *oldState,
     struct state *newState)
 {
-    struct arc *a;
-
     assert(oldState != newState);
 
-    while ((a = oldState->outs) != NULL) {
-	cparc(nfa, a, newState, a->to);
-	freearc(nfa, a);
+    if (!BULK_ARC_OP_USE_SORT(oldState->nouts, newState->nouts)) {
+	/* With not too many arcs, just do them one at a time */
+	struct arc *a;
+
+	while ((a = oldState->outs) != NULL) {
+	    cparc(nfa, a, newState, a->to);
+	    freearc(nfa, a);
+	}
+    } else {
+	/*
+	 * With many arcs, use a sort-merge approach.  Note that createarc()
+	 * will put new arcs onto the front of newState's chain, so it does
+	 * not break our walk through the sorted part of the chain.
+	 */
+	struct arc *oa;
+	struct arc *na;
+
+	/*
+	 * Because we bypass newarc() in this code path, we'd better include a
+	 * cancel check.
+	 */
+	if (CANCEL_REQUESTED(nfa->v->re)) {
+	    NERR(REG_CANCEL);
+	    return;
+	}
+
+	sortouts(nfa, oldState);
+	sortouts(nfa, newState);
+	if (NISERR()) {
+	    return;	/* might have failed to sort */
+	}
+	oa = oldState->outs;
+	na = newState->outs;
+	while (oa != NULL && na != NULL) {
+	    struct arc *a = oa;
+
+	    switch (sortouts_cmp(&oa, &na)) {
+		case -1:
+		    /* newState does not have anything matching oa */
+		    oa = oa->outchain;
+		    createarc(nfa, a->type, a->co, newState, a->to);
+		    freearc(nfa, a);
+		    break;
+		case 0:
+		    /* match, advance in both lists */
+		    oa = oa->outchain;
+		    na = na->outchain;
+		    /* ... and drop duplicate arc from oldState */
+		    freearc(nfa, a);
+		    break;
+		case +1:
+		    /* advance only na; oa might have a match later */
+		    na = na->outchain;
+		    break;
+		default:
+		    assert(NOTREACHED);
+	    }
+	}
+	while (oa != NULL) {
+	    /* newState does not have anything matching oa */
+	    struct arc *a = oa;
+
+	    oa = oa->outchain;
+	    createarc(nfa, a->type, a->co, newState, a->to);
+	    freearc(nfa, a);
+	}
     }
+
+    assert(oldState->nouts == 0);
+    assert(oldState->outs == NULL);
 }
 
 /*
@@ -671,13 +1218,80 @@ copyouts(
     struct state *newState,
     int all)
 {
-    struct arc *a;
-
     assert(oldState != newState);
 
-    for (a=oldState->outs ; a!=NULL ; a=a->outchain) {
-	if (all || a->type != EMPTY) {
-	    cparc(nfa, a, newState, a->to);
+    if (!BULK_ARC_OP_USE_SORT(oldState->nouts, newState->nouts)) {
+	/* With not too many arcs, just do them one at a time */
+	struct arc *a;
+
+	for (a = oldState->outs; a != NULL; a = a->outchain) {
+	    if (all || a->type != EMPTY) {
+		cparc(nfa, a, newState, a->to);
+	    }
+	}
+    } else {
+ 	/*
+	 * With many arcs, use a sort-merge approach.  Note that createarc()
+	 * will put new arcs onto the front of newState's chain, so it does
+	 * not break our walk through the sorted part of the chain.
+	 */
+	struct arc *oa;
+	struct arc *na;
+
+	/*
+	 * Because we bypass newarc() in this code path, we'd better include a
+	 * cancel check.
+	 */
+	if (CANCEL_REQUESTED(nfa->v->re)) {
+	    NERR(REG_CANCEL);
+	    return;
+	}
+
+	sortouts(nfa, oldState);
+	sortouts(nfa, newState);
+	if (NISERR()) {
+	    return;		/* might have failed to sort */
+	}
+	oa = oldState->outs;
+	na = newState->outs;
+	while (oa != NULL && na != NULL) {
+	    struct arc *a = oa;
+
+	    if (!all && a->type == EMPTY) {
+		oa = oa->outchain;
+		continue;
+	    }
+
+	    switch (sortouts_cmp(&oa, &na)) {
+		case -1:
+		    /* newState does not have anything matching oa */
+		    oa = oa->outchain;
+		    createarc(nfa, a->type, a->co, newState, a->to);
+		    break;
+		case 0:
+		    /* match, advance in both lists */
+		    oa = oa->outchain;
+		    na = na->outchain;
+		    break;
+		case +1:
+		    /* advance only na; oa might have a match later */
+		    na = na->outchain;
+		    break;
+		default:
+		    assert(NOTREACHED);
+	    }
+	}
+	while (oa != NULL) {
+	    /* newState does not have anything matching oa */
+	    struct arc *a = oa;
+
+	    if (!all && a->type == EMPTY){
+		oa = oa->outchain;
+		continue;
+	    }
+
+	    oa = oa->outchain;
+	    createarc(nfa, a->type, a->co, newState, a->to);
 	}
     }
 }
@@ -2238,7 +2852,7 @@ compact(
 		break;
 	    }
 	}
-	carcsort(first, ca-1);
+	carcsort(first, ca - first);
 	ca->co = COLORLESS;
 	ca->to = 0;
 	ca++;
@@ -2258,33 +2872,39 @@ compact(
 
 /*
  - carcsort - sort compacted-NFA arcs by color
- * Really dumb algorithm, but if the list is long enough for that to matter,
- * you're in real trouble anyway.
  ^ static VOID carcsort(struct carc *, struct carc *);
  */
 static void
 carcsort(
     struct carc *first,
-    struct carc *last)
+    size_t n)
 {
-    struct carc *p;
-    struct carc *q;
-    struct carc tmp;
-
-    if (last - first <= 1) {
-	return;
+    if (n > 1) {
+	qsort(first, n, sizeof(struct carc), carc_cmp);
     }
+}
 
-    for (p = first; p <= last; p++) {
-	for (q = p; q <= last; q++) {
-	    if (p->co > q->co || (p->co == q->co && p->to > q->to)) {
-		assert(p != q);
-		tmp = *p;
-		*p = *q;
-		*q = tmp;
-	    }
-	}
+static int
+carc_cmp(
+    const void *a,
+    const void *b)
+{
+    const struct carc *aa = (const struct carc *) a;
+    const struct carc *bb = (const struct carc *) b;
+  
+    if (aa->co < bb->co) {
+	return -1;
+    }
+    if (aa->co > bb->co) {
+	return +1;
+    }
+    if (aa->to < bb->to) {
+	return -1;
     }
+    if (aa->to > bb->to) {
+	return +1;
+    }
+    return 0;
 }
 
 /*
@@ -2382,37 +3002,28 @@ dumparcs(
     FILE *f)
 {
     int pos;
+    struct arc *a;
 
-    assert(s->nouts > 0);
-    /* printing arcs in reverse order is usually clearer */
-    pos = dumprarcs(s->outs, s, f, 1);
-    if (pos != 1) {
-	fprintf(f, "\n");
-    }
-}
-
-/*
- - dumprarcs - dump remaining outarcs, recursively, in reverse order
- ^ static int dumprarcs(struct arc *, struct state *, FILE *, int);
- */
-static int			/* resulting print position */
-dumprarcs(
-    struct arc *a,
-    struct state *s,
-    FILE *f,
-    int pos)			/* initial print position */
-{
-    if (a->outchain != NULL) {
-	pos = dumprarcs(a->outchain, s, f, pos);
+    /* printing oldest arcs first is usually clearer */
+    a = s->outs;
+    assert(a != NULL);
+    while (a->outchain != NULL) {
+	a = a->outchain;
     }
-    dumparc(a, s, f);
-    if (pos == 5) {
+    pos = 1;
+    do {
+	dumparc(a, s, f);
+	if (pos == 5) {
+	    fprintf(f, "\n");
+	    pos = 1;
+	} else {
+	    pos++;
+	}
+	a = a->outchainRev;
+    } while (a != NULL);
+    if (pos != 1) {
 	fprintf(f, "\n");
-	pos = 1;
-    } else {
-	pos++;
     }
-    return pos;
 }
 
 /*
author	dgp <dgp@users.sourceforge.net>	2015-10-19 19:32:04 (GMT)
committer	dgp <dgp@users.sourceforge.net>	2015-10-19 19:32:04 (GMT)
commit	8ed7672ab71b54afa94e164e73fdc274b0b39771 (patch)
tree	4da041e80ad7a2639abcff0694fe8ed6d2e77d3b /generic/regc_nfa.c
parent	b8c0c06fdf9f099f27f114fc9c92b7786dce13a5 (diff)
download	tcl-8ed7672ab71b54afa94e164e73fdc274b0b39771.zip tcl-8ed7672ab71b54afa94e164e73fdc274b0b39771.tar.gz tcl-8ed7672ab71b54afa94e164e73fdc274b0b39771.tar.bz2