From 0a41c61107c36da0a8e4ca0fc259149e3bc1956d Mon Sep 17 00:00:00 2001
From: stanton <stanton>
Date: Wed, 11 Nov 1998 01:44:46 +0000
Subject: integrated latest regexp updates from Henry Spencer, includes new
 regexp switches "-line", "-lineanchor", and "-linestop" for controlling
 newline behavior

---
 changes               |   10 +-
 generic/regc_color.c  |  499 +++++++++------
 generic/regc_lex.c    |   25 +-
 generic/regc_locale.c |   16 +-
 generic/regc_nfa.c    |   35 +-
 generic/regcomp.c     | 1705 +++++++++++++++++++++++--------------------------
 generic/regcustom.h   |   20 +-
 generic/rege_dfa.c    |  583 +++++++++++++++++
 generic/regerror.c    |    2 +-
 generic/regerrs.h     |   37 +-
 generic/regex.h       |   25 +-
 generic/regexec.c     | 1349 ++++++++------------------------------
 generic/regguts.h     |   90 ++-
 generic/tclCmdMZ.c    |   38 +-
 generic/tclRegexp.c   |  206 ++----
 generic/tclRegexp.h   |    6 +-
 generic/tclTest.c     |  321 +++++++++-
 tests/regexp.test     |    4 +-
 tests/regtests.test   |   37 +-
 unix/Makefile.in      |    4 +-
 win/makefile.vc       |   17 +-
 21 files changed, 2525 insertions(+), 2504 deletions(-)
 create mode 100644 generic/rege_dfa.c
diff --git a/changes b/changes
index 7d7851b..5e8f335 100644
--- a/changes
+++ b/changes
@@ -1,6 +1,6 @@
 Recent user-visible changes to Tcl:
 
-RCS: @(#) $Id: changes,v 1.1.2.8 1998/11/04 04:47:50 stanton Exp $
+RCS: @(#) $Id: changes,v 1.1.2.9 1998/11/11 01:44:46 stanton Exp $
 
 1. No more [command1] [command2] construct for grouping multiple
 commands on a single command line.
@@ -3933,3 +3933,11 @@ the encoding.n manual entry for more details. (stanton)
 11/3/98 (bug fix) The regular expression character classification
 syntax now includes Unicode characters in the supported
 classes. (stanton)
+
+11/9/98 (bug fix) "format" now correctly handles multibyte characters
+in %s format strings. (stanton)
+
+11/10/98 (new feature) "regexp" now accepts three new switches
+("-line", "-lineanchor", and "-linestop") that control how regular
+expressions treat line breaks. See the regexp manual entry for more
+details. (stanton)
diff --git a/generic/regc_color.c b/generic/regc_color.c
index 4a8a87c..843c05e 100644
--- a/generic/regc_color.c
+++ b/generic/regc_color.c
@@ -8,90 +8,75 @@
 
 
 
-/*
- * If this declaration draws a complaint about a negative array size,
- * then CHRBITS is defined incorrectly for the chr type.
- */
-static char isCHRBITSright[NEGIFNOT(sizeof(chr)*CHAR_BIT == CHRBITS)];
-
-
-
 #define	CISERR()	VISERR(cm->v)
 #define	CERR(e)		VERR(cm->v, (e))
 
 
 
 /*
- - newcm - get new colormap
- ^ static struct colormap *newcm(struct vars *);
+ - initcm - set up new colormap
+ ^ static VOID initcm(struct vars *, struct colormap *);
  */
-static struct colormap *	/* NULL for allocation failure */
-newcm(v)
+static VOID
+initcm(v, cm)
 struct vars *v;
+struct colormap *cm;
 {
-	struct colormap *cm;
 	int i;
 	int j;
 	union tree *t;
 	union tree *nextt;
 	struct colordesc *cd;
 
-	cm = (struct colormap *)MALLOC(sizeof(struct colormap));
-	if (cm == NULL) {
-		ERR(REG_ESPACE);
-		return NULL;
-	}
 	cm->magic = CMMAGIC;
 	cm->v = v;
-	cm->rest = WHITE;
-	cm->filled = 0;
 
 	cm->ncds = NINLINECDS;
-	cm->cd = cm->cds;
-	for (cd = cm->cd; cd < CDEND(cm); cd++) {
-		cd->nchrs = 0;
-		cd->sub = NOSUB;
-		cd->arcs = NULL;
-		cd->flags = 0;
-	}
-	cm->cd[WHITE].nchrs = CHR_MAX - CHR_MIN + 1;
-
-	/* treetop starts as NULLs if there are lower levels */
-	t = cm->tree;
-	if (NBYTS > 1)
-		for (i = BYTTAB-1; i >= 0; i--)
-			t->tptr[i] = NULL;
-	/* if no lower levels, treetop and last fill block are the same */
-
-	/* fill blocks point to next fill block... */
-	for (t = &cm->tree[1], j = NBYTS-2; j > 0; t = nextt, j--) {
+	cm->cd = cm->cdspace;
+	cm->max = 0;
+	cm->free = 0;
+
+	cd = cm->cd;			/* cm->cd[WHITE] */
+	cd->sub = NOSUB;
+	cd->arcs = NULL;
+	cd->flags = 0;
+	cd->nchrs = CHR_MAX - CHR_MIN + 1;
+
+	/* upper levels of tree */
+	for (t = &cm->tree[0], j = NBYTS-1; j > 0; t = nextt, j--) {
 		nextt = t + 1;
 		for (i = BYTTAB-1; i >= 0; i--)
-			t->tptr[i] = t + 1;
+			t->tptr[i] = nextt;
 	}
-	/* ...except last which is solid white */
+	/* bottom level is solid white */
 	t = &cm->tree[NBYTS-1];
 	for (i = BYTTAB-1; i >= 0; i--)
 		t->tcolor[i] = WHITE;
-
-
-	return cm;
+	cd->block = t;
 }
 
 /*
- - freecm - free a colormap
+ - freecm - free dynamically-allocated things in a colormap
  ^ static VOID freecm(struct colormap *);
  */
 static VOID
 freecm(cm)
 struct colormap *cm;
 {
+	size_t i;
+	union tree *cb;
+
 	cm->magic = 0;
 	if (NBYTS > 1)
 		cmtreefree(cm, cm->tree, 0);
-	if (cm->cd != cm->cds)
+	for (i = 1; i < cm->max; i++)		/* skip WHITE */
+		if (!UNUSEDCOLOR(&cm->cd[i])) {
+			cb = cm->cd[i].block;
+			if (cb != NULL)
+				FREE(cb);
+		}
+	if (cm->cd != cm->cdspace)
 		FREE(cm->cd);
-	FREE(cm);
 }
 
 /*
@@ -107,92 +92,26 @@ int level;			/* level number (top == 0) of this block */
 	int i;
 	union tree *t;
 	union tree *fillt = &cm->tree[level+1];
+	union tree *cb;
 
 	assert(level < NBYTS-1);	/* this level has pointers */
 	for (i = BYTTAB-1; i >= 0; i--) {
 		t = tree->tptr[i];
-		if (t != NULL && t != fillt) {
-			if (level < NBYTS-2)	/* more pointer blocks below */
+		assert(t != NULL);
+		if (t != fillt) {
+			if (level < NBYTS-2) {	/* more pointer blocks below */
 				cmtreefree(cm, t, level+1);
-			FREE(t);
+				FREE(t);
+			} else {		/* color block below */
+				cb = cm->cd[t->tcolor[0]].block;
+				if (t != cb)	/* not a solid block */
+					FREE(t);
+			}
 		}
 	}
 }
 
 /*
- - fillcm - fill in a colormap, so no NULLs remain
- * The point of this is that the tree traversal can then be a fixed set
- * of table lookups with no conditional branching.  It might be better
- * to do reallocation for a more compacted structure, on the order of
- * what's done for NFAs, but the colormap can be quite large and a total
- * rebuild of it could be costly.
- ^ static VOID fillcm(struct colormap *);
- */
-static VOID
-fillcm(cm)
-struct colormap *cm;
-{
-	if (!cm->filled && NBYTS > 1)
-		cmtreefill(cm, cm->tree, 0);
-	cm->filled = 1;
-}
-
-/*
- - cmtreefill - fill a non-terminal part of a colormap tree
- ^ static VOID cmtreefill(struct colormap *, union tree *, int);
- */
-static VOID
-cmtreefill(cm, tree, level)
-struct colormap *cm;
-union tree *tree;
-int level;			/* level number (top == 0) of this block */
-{
-	int i;
-	union tree *t;
-	union tree *fillt = &cm->tree[level+1];
-
-	assert(level < NBYTS-1);	/* this level has pointers */
-	for (i = BYTTAB-1; i >= 0; i--) {
-		t = tree->tptr[i];
-		if (t == fillt)			/* oops */
-			{}
-		else if (t == NULL)
-			tree->tptr[i] = fillt;
-		else if (level < NBYTS-2)	/* more pointer blocks below */
-			cmtreefill(cm, t, level+1);
-	}
-}
-
-/*
- - getcolor - get the color of a character from a colormap
- ^ static color getcolor(struct colormap *, pchr);
- */
-static color
-getcolor(cm, c)
-struct colormap *cm;
-pchr c;
-{
-	uchr uc = c;
-	int shift;
-	int b;
-	union tree *t;
-
-	assert(cm->magic == CMMAGIC);
-
-	t = cm->tree;
-	for (shift = BYTBITS * (NBYTS - 1); t != NULL; shift -= BYTBITS) {
-		b = (uc >> shift) & BYTMASK;
-		if (shift == 0)		/* reached the bottom */
-			return t->tcolor[b];
-		t = t->tptr[b];
-	}
-
-	/* we fell off an incomplete part of the tree */
-	assert(!cm->filled);
-	return cm->rest;
-}
-
-/*
  - setcolor - set the color of a character in a colormap
  ^ static color setcolor(struct colormap *, pchr, pcolor);
  */
@@ -204,11 +123,14 @@ pcolor co;
 {
 	uchr uc = c;
 	int shift;
-	int i;
+	int level;
 	int b;
 	int bottom;
 	union tree *t;
+	union tree *newt;
+	union tree *fillt;
 	union tree *lastt;
+	union tree *cb;
 	color prev;
 
 	assert(cm->magic == CMMAGIC);
@@ -216,28 +138,32 @@ pcolor co;
 		return COLORLESS;
 
 	t = cm->tree;
-	for (shift = BYTBITS * (NBYTS - 1); shift > 0; shift -= BYTBITS) {
+	for (level = 0, shift = BYTBITS * (NBYTS - 1); shift > 0;
+						level++, shift -= BYTBITS) {
 		b = (uc >> shift) & BYTMASK;
 		lastt = t;
-		t = t->tptr[b];
-		if (t == NULL) {	/* fell off an incomplete part */
-			bottom = (shift <= BYTBITS) ? 1 : 0;
-			t = (union tree *)MALLOC((bottom) ?
+		t = lastt->tptr[b];
+		assert(t != NULL);
+		fillt = &cm->tree[level+1];
+		bottom = (shift <= BYTBITS) ? 1 : 0;
+		cb = (bottom) ? cm->cd[t->tcolor[0]].block : fillt;
+		if (t == fillt || t == cb) {	/* must allocate a new block */
+			newt = (union tree *)MALLOC((bottom) ?
 				sizeof(struct colors) : sizeof(struct ptrs));
-			if (t == NULL) {
+			if (newt == NULL) {
 				CERR(REG_ESPACE);
 				return COLORLESS;
 			}
 			if (bottom)
-				for (i = BYTTAB-1; i >= 0; i--)
-					t->tcolor[i] = cm->rest;
+				memcpy(VS(newt->tcolor), VS(t->tcolor),
+							BYTTAB*sizeof(color));
 			else
-				for (i = BYTTAB-1; i >= 0; i--)
-					t->tptr[i] = NULL;
+				memcpy(VS(newt->tptr), VS(t->tptr),
+						BYTTAB*sizeof(union tree *));
+			t = newt;
 			lastt->tptr[b] = t;
 		}
 	}
-	assert(shift == 0 && t != NULL);	/* we hit bottom; it's there */
 
 	b = uc & BYTMASK;
 	prev = t->tcolor[b];
@@ -253,20 +179,10 @@ static color
 maxcolor(cm)
 struct colormap *cm;
 {
-	struct colordesc *cd;
-	struct colordesc *end;
-	struct colordesc *lastused;
-
 	if (CISERR())
 		return COLORLESS;
 
-	lastused = NULL;
-	end = CDEND(cm);
-	for (cd = cm->cd; cd < end; cd++)
-		if (!UNUSEDCOLOR(cd))
-			lastused = cd;
-	assert(lastused != NULL);
-	return (color)(lastused - cm->cd);
+	return (color)cm->max;
 }
 
 /*
@@ -279,47 +195,85 @@ newcolor(cm)
 struct colormap *cm;
 {
 	struct colordesc *cd;
-	struct colordesc *end;
-	struct colordesc *firstnew;
+	struct colordesc *new;
 	size_t n;
 
 	if (CISERR())
 		return COLORLESS;
 
-	end = CDEND(cm);
-	for (cd = cm->cd; cd < end; cd++)
-		if (UNUSEDCOLOR(cd)) {
-			assert(cd->arcs == NULL);
-			return (color)(cd - cm->cd);
-		}
-
-	/* oops, must allocate more */
-	n = cm->ncds * 2;
-	if (cm->cd == cm->cds) {
-		cd = (struct colordesc *)MALLOC(sizeof(struct colordesc) * n);
-		if (cd != NULL)
-			memcpy(VS(cd), VS(cm->cds), cm->ncds *
-						sizeof(struct colordesc));
+	if (cm->free != 0) {
+		assert(cm->free > 0);
+		assert((size_t)cm->free < cm->ncds);
+		cd = &cm->cd[cm->free];
+		assert(UNUSEDCOLOR(cd));
+		assert(cd->arcs == NULL);
+		cm->free = cd->sub;
+	} else if (cm->max < cm->ncds - 1) {
+		cm->max++;
+		cd = &cm->cd[cm->max];
 	} else {
-		cd = (struct colordesc *)REALLOC(cm->cd,
+		/* oops, must allocate more */
+		n = cm->ncds * 2;
+		if (cm->cd == cm->cdspace) {
+			new = (struct colordesc *)MALLOC(n *
+						sizeof(struct colordesc));
+			if (new != NULL)
+				memcpy(VS(new), VS(cm->cdspace), cm->ncds *
+						sizeof(struct colordesc));
+		} else
+			new = (struct colordesc *)REALLOC(cm->cd,
 						n * sizeof(struct colordesc));
+		if (new == NULL) {
+			CERR(REG_ESPACE);
+			return COLORLESS;
+		}
+		cm->cd = new;
+		cm->ncds = n;
+		assert(cm->max < cm->ncds - 1);
+		cm->max++;
+		cd = &cm->cd[cm->max];
 	}
-	if (cd == NULL) {
-		CERR(REG_ESPACE);
-		return COLORLESS;
+
+	cd->nchrs = 0;
+	cd->sub = NOSUB;
+	cd->arcs = NULL;
+	cd->flags = 0;
+	cd->block = NULL;
+
+	return (color)(cd - cm->cd);
+}
+
+/*
+ - freecolor - free a color (must have no arcs or subcolor)
+ ^ static VOID freecolor(struct colormap *, pcolor);
+ */
+static VOID
+freecolor(cm, co)
+struct colormap *cm;
+pcolor co;
+{
+	struct colordesc *cd = &cm->cd[co];
+
+	assert(co >= 0);
+	if (co == WHITE)
+		return;
+
+	assert(cd->arcs == NULL);
+	assert(cd->sub == NOSUB);
+	assert(cd->nchrs == 0);
+	cd->flags = FREECOL;
+	if (cd->block != NULL) {
+		FREE(cd->block);
+		cd->block = NULL;	/* just paranoia */
 	}
-	cm->cd = cd;
-	firstnew = CDEND(cm);
-	cm->ncds = n;
-	end = CDEND(cm);
-	for (cd = firstnew; cd < end; cd++) {
-		cd->nchrs = 0;
-		cd->sub = NOSUB;
-		cd->arcs = NULL;
-		cd->flags = 0;
+
+	if ((size_t)co == cm->max)
+		while (cm->max > WHITE && UNUSEDCOLOR(&cm->cd[cm->max]))
+				cm->max--;
+	else {
+		cd->sub = cm->free;
+		cm->free = (color)(cd - cm->cd);
 	}
-	assert(firstnew < CDEND(cm) && UNUSEDCOLOR(firstnew));
-	return (color)(firstnew - cm->cd);
 }
 
 /*
@@ -352,27 +306,170 @@ pchr c;
 	color co;			/* current color of c */
 	color sco;			/* new subcolor */
 
-	co = getcolor(cm, c);
+	co = GETCOLOR(cm, c);
+	sco = newsub(cm, co);
+
+	if (co == sco)		/* already in an open subcolor */
+		return co;	/* rest is redundant */
+	cm->cd[co].nchrs--;
+	cm->cd[sco].nchrs++;
+	setcolor(cm, c, sco);
+	return sco;
+}
+
+/*
+ - newsub - allocate a new subcolor (if necessary) for a color
+ ^ static color newsub(struct colormap *, pcolor);
+ */
+static color
+newsub(cm, co)
+struct colormap *cm;
+pcolor co;
+{
+	color sco;			/* new subcolor */
+
 	sco = cm->cd[co].sub;
-	if (sco == NOSUB) {		/* must create subcolor */
-		if (cm->cd[co].nchrs == 1)	/* shortcut */
+	if (sco == NOSUB) {		/* color has no open subcolor */
+		if (cm->cd[co].nchrs == 1)	/* optimization */
 			return co;
-		sco = newcolor(cm);
+		sco = newcolor(cm);	/* must create subcolor */
 		if (sco == COLORLESS)
 			return COLORLESS;
 		cm->cd[co].sub = sco;
-		cm->cd[sco].sub = sco;	/* self-referential subcolor ptr */
+		cm->cd[sco].sub = sco;	/* open subcolor points to self */
 	}
+	assert(sco != NOSUB);
 
-	if (co == sco)		/* repeated character */
-		return co;	/* no further action needed */
-	cm->cd[co].nchrs--;
-	cm->cd[sco].nchrs++;
-	setcolor(cm, c, sco);
 	return sco;
 }
 
 /*
+ - subrange - allocate new subcolors to this range of chrs, fill in arcs
+ ^ static VOID subrange(struct vars *, pchr, pchr, struct state *,
+ ^ 	struct state *);
+ */
+static VOID
+subrange(v, from, to, lp, rp)
+struct vars *v;
+pchr from;
+pchr to;
+struct state *lp;
+struct state *rp;
+{
+	uchr uf;
+	int i;
+
+	assert(from <= to);
+
+	/* first, align "from" on a tree-block boundary */
+	uf = (uchr)from;
+	i = (int)( ((uf + BYTTAB-1) & (uchr)~BYTMASK) - uf );
+	for (; from <= to && i > 0; i--, from++)
+		newarc(v->nfa, PLAIN, subcolor(v->cm, from), lp, rp);
+	if (from > to)			/* didn't reach a boundary */
+		return;
+
+	/* deal with whole blocks */
+	for (; to - from >= BYTTAB; from += BYTTAB)
+		subblock(v, from, lp, rp);
+
+	/* clean up any remaining partial table */
+	for (; from <= to; from++)
+		newarc(v->nfa, PLAIN, subcolor(v->cm, from), lp, rp);
+}
+
+/*
+ - subblock - allocate new subcolors for one tree block of chrs, fill in arcs
+ ^ static VOID subblock(struct vars *, pchr, struct state *, struct state *);
+ */
+static VOID
+subblock(v, start, lp, rp)
+struct vars *v;
+pchr start;			/* first of BYTTAB chrs */
+struct state *lp;
+struct state *rp;
+{
+	uchr uc = start;
+	struct colormap *cm = v->cm;
+	int shift;
+	int level;
+	int i;
+	int b;
+	union tree *t;
+	union tree *cb;
+	union tree *fillt;
+	union tree *lastt;
+	int previ;
+	int ndone;
+	color co;
+	color sco;
+
+	assert((uc & BYTMASK) == 0);
+
+	/* find its color block, making new pointer blocks as needed */
+	t = cm->tree;
+	fillt = NULL;
+	for (level = 0, shift = BYTBITS * (NBYTS - 1); shift > 0;
+						level++, shift -= BYTBITS) {
+		b = (uc >> shift) & BYTMASK;
+		lastt = t;
+		t = lastt->tptr[b];
+		assert(t != NULL);
+		fillt = &cm->tree[level+1];
+		if (t == fillt && shift > BYTBITS) {	/* need new ptr block */
+			t = (union tree *)MALLOC(sizeof(struct ptrs));
+			if (t == NULL) {
+				CERR(REG_ESPACE);
+				return;
+			}
+			memcpy(VS(t->tptr), VS(fillt->tptr),
+						BYTTAB*sizeof(union tree *));
+			lastt->tptr[b] = t;
+		}
+	}
+
+	/* special cases:  fill block or solid block */
+	co = t->tcolor[0];
+	cb = cm->cd[co].block;
+	if (t == fillt || t == cb) {
+		/* either way, we want a subcolor solid block */
+		sco = newsub(cm, co);
+		t = cm->cd[sco].block;
+		if (t == NULL) {	/* must set it up */
+			t = (union tree *)MALLOC(sizeof(struct colors));
+			if (t == NULL) {
+				CERR(REG_ESPACE);
+				return;
+			}
+			for (i = 0; i < BYTTAB; i++)
+				t->tcolor[i] = sco;
+			cm->cd[sco].block = t;
+		}
+		/* find loop must have run at least once */
+		lastt->tptr[b] = t;
+		newarc(v->nfa, PLAIN, sco, lp, rp);
+		cm->cd[co].nchrs -= BYTTAB;
+		cm->cd[sco].nchrs += BYTTAB;
+		return;
+	}
+
+	/* general case, a mixed block to be altered */
+	i = 0;
+	while (i < BYTTAB) {
+		co = t->tcolor[i];
+		sco = newsub(cm, co);
+		newarc(v->nfa, PLAIN, sco, lp, rp);
+		previ = i;
+		do {
+			t->tcolor[i++] = sco;
+		} while (i < BYTTAB && t->tcolor[i] == co);
+		ndone = i - previ;
+		cm->cd[co].nchrs -= ndone;
+		cm->cd[sco].nchrs += ndone;
+	}
+}
+
+/*
  - okcolors - promote subcolors to full colors
  ^ static VOID okcolors(struct nfa *, struct colormap *);
  */
@@ -390,7 +487,7 @@ struct colormap *cm;
 
 	for (cd = cm->cd, co = 0; cd < end; cd++, co++) {
 		sco = cd->sub;
-		if (sco == NOSUB) {
+		if (UNUSEDCOLOR(cd) || sco == NOSUB) {
 			/* has no subcolor, no further action */
 		} else if (sco == co) {
 			/* is subcolor, let parent deal with it */
@@ -410,6 +507,7 @@ struct colormap *cm;
 				a->colorchain = scd->arcs;
 				scd->arcs = a;
 			}
+			freecolor(cm, co);
 		} else {
 			/* parent's arcs must gain parallel subcolor arcs */
 			cd->sub = NOSUB;
@@ -475,7 +573,7 @@ pchr c;
 {
 	color co;			/* color of c */
 
-	co = getcolor(cm, c);
+	co = GETCOLOR(cm, c);
 	if (cm->cd[co].nchrs == 1 && cm->cd[co].sub == NOSUB)
 		return 1;
 	return 0;
@@ -548,24 +646,27 @@ FILE *f;
 	struct colordesc *end;
 	color co;
 	chr c;
+	char *has;
 
-	if (cm->filled) {
-		fprintf(f, "filled\n");
-		if (NBYTS > 1)
-			fillcheck(cm, cm->tree, 0, f);
-	}
+	fprintf(f, "max %ld\n", (long)cm->max);
+	if (NBYTS > 1)
+		fillcheck(cm, cm->tree, 0, f);
 	end = CDEND(cm);
 	for (cd = cm->cd + 1, co = 1; cd < end; cd++, co++)	/* skip 0 */
-		if (cd->nchrs > 0) {
+		if (!UNUSEDCOLOR(cd)) {
+			assert(cd->nchrs > 0);
+			has = (cd->block != NULL) ? "#" : "";
 			if (cd->flags&PSEUDO)
-				fprintf(f, "#%2ld(ps): ", (long)co);
+				fprintf(f, "#%2ld%s(ps): ", (long)co, has);
 			else
-				fprintf(f, "#%2ld(%2d): ", (long)co, cd->nchrs);
+				fprintf(f, "#%2ld%s(%2d): ", (long)co,
+							has, cd->nchrs);
+			/* it's hard to do this more efficiently */
 			for (c = CHR_MIN; c < CHR_MAX; c++)
-				if (getcolor(cm, c) == co)
+				if (GETCOLOR(cm, c) == co)
 					dumpchr(c, f);
 			assert(c == CHR_MAX);
-			if (getcolor(cm, c) == co)
+			if (GETCOLOR(cm, c) == co)
 				dumpchr(c, f);
 			fprintf(f, "\n");
 		}
@@ -613,7 +714,7 @@ FILE *f;
 	else if (c > ' ' && c <= '~')
 		putc((char)c, f);
 	else
-		fprintf(f, "\\0%lo", (long)c);
+		fprintf(f, "\\u%04lx", (long)c);
 }
 
 #endif				/* ifdef REG_DEBUG */
diff --git a/generic/regc_lex.c b/generic/regc_lex.c
index 820b404..0eb6c5d 100644
--- a/generic/regc_lex.c
+++ b/generic/regc_lex.c
@@ -364,7 +364,7 @@ struct vars *v;
 			NOTE(REG_UNONPOSIX);
 			if (ATEOS())
 				FAILW(REG_EESCAPE);
-			(DISCARD) lexescape(v);
+			(DISCARD)lexescape(v);
 			switch (v->nexttype) {	/* not all escapes okay here */
 			case PLAIN:
 				return 1;
@@ -459,10 +459,10 @@ struct vars *v;
 		break;
 	}
 
-	/* that got rid of everything except EREs */
+	/* that got rid of everything except EREs and AREs */
 	assert(INCON(L_ERE));
 
-	/* deal with EREs, except for backslashes */
+	/* deal with EREs and AREs, except for backslashes */
 	switch (c) {
 	case CHR('|'):
 		RET('|');
@@ -529,12 +529,6 @@ struct vars *v;
 				NOTE(REG_ULOOKAHEAD);
 				RETV(LACON, 0);
 				break;
-			case CHR('<'):		/* prefer short */
-				RETV(PREFER, 0);
-				break;
-			case CHR('>'):		/* prefer long */
-				RETV(PREFER, 1);
-				break;
 			default:
 				FAILW(REG_BADRPT);
 				break;
@@ -547,8 +541,9 @@ struct vars *v;
 			RETV('(', 1);
 		break;
 	case CHR(')'):
-		if (LASTTYPE('('))
+		if (LASTTYPE('(')) {
 			NOTE(REG_UUNSPEC);
+		}
 		RETV(')', c);
 		break;
 	case CHR('['):		/* easy except for [[:<:]] and [[:>:]] */
@@ -589,7 +584,7 @@ struct vars *v;
 		break;
 	}
 
-	/* ERE backslash handling; backslash already eaten */
+	/* ERE/ARE backslash handling; backslash already eaten */
 	assert(!ATEOS());
 	if (!(v->cflags&REG_ADVF)) {	/* only AREs have non-trivial escapes */
 		if (iscalnum(*v->now)) {
@@ -598,7 +593,7 @@ struct vars *v;
 		}
 		RETV(PLAIN, *v->now++);
 	}
-	(DISCARD) lexescape(v);
+	(DISCARD)lexescape(v);
 	if (ISERR())
 		FAILW(REG_EESCAPE);
 	if (v->nexttype == CCLASS) {	/* fudge at lexical level */
@@ -682,6 +677,12 @@ struct vars *v;
 	case CHR('f'):
 		RETV(PLAIN, CHR('\f'));
 		break;
+	case CHR('m'):
+		RET('<');
+		break;
+	case CHR('M'):
+		RET('>');
+		break;
 	case CHR('n'):
 		RETV(PLAIN, CHR('\n'));
 		break;
diff --git a/generic/regc_locale.c b/generic/regc_locale.c
index 7d504c8..5930378 100644
--- a/generic/regc_locale.c
+++ b/generic/regc_locale.c
@@ -1,9 +1,15 @@
-/*
- * locale-specific stuff, including MCCE handling
- * This file is #included by regcomp.c.
+/* 
+ * regc_locale.c --
+ *
+ *	This file contains the Unicode locale specific regexp routines.
+ *	This file is #included by regcomp.c.
+ *
+ * Copyright (c) 1998 by Scriptics Corporation.
+ *
+ * See the file "license.terms" for information on usage and redistribution
+ * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * No MCCEs for Tcl.  The handling of character names and classes is
- * still ASCII-centric, and needs to be extended to handle full Unicode.
+ * RCS: @(#) $Id: regc_locale.c,v 1.1.2.3 1998/11/11 01:44:49 stanton Exp $
  */
 
 /* ASCII character-name table */
diff --git a/generic/regc_nfa.c b/generic/regc_nfa.c
index 14ee077..8143181 100644
--- a/generic/regc_nfa.c
+++ b/generic/regc_nfa.c
@@ -575,7 +575,7 @@ struct state *s;
 static VOID
 dupnfa(nfa, start, stop, from, to)
 struct nfa *nfa;
-struct state *start;		/* duplicate starting here */
+struct state *start;		/* duplicate of subNFA starting here */
 struct state *stop;		/* and stopping here */
 struct state *from;		/* stringing duplicate from here */
 struct state *to;		/* to here */
@@ -1161,7 +1161,8 @@ struct nfa *nfa;
 	struct arc *a;
 	struct arc *aa;
 
-	/* can the NFA match the empty string? */
+	if (nfa->pre->outs == NULL)
+		return REG_UIMPOSSIBLE;
 	for (a = nfa->pre->outs; a != NULL; a = a->outchain)
 		for (aa = a->to->outs; aa != NULL; aa = aa->outchain)
 			if (aa->to == nfa->post)
@@ -1170,29 +1171,6 @@ struct nfa *nfa;
 }
 
 /*
- - isempty - is a sub-NFA composed only of EMPTY arcs?
- * Somewhat limited implementation, makes assumptions.
- ^ static int isempty(struct state *, struct state *);
- */
-static int
-isempty(begin, end)
-struct state *begin;
-struct state *end;
-{
-	struct state *s;
-
-	for (s = begin; s != end; s = s->outs->to) {
-		if (s->nouts != 1)
-			return 0;
-		assert(s->outs != NULL);
-		if (s->outs->type != EMPTY)
-			return 0;
-	}
-
-	return 1;
-}
-
-/*
  - compact - compact an NFA
  ^ static VOID compact(struct nfa *, struct cnfa *);
  */
@@ -1305,19 +1283,16 @@ struct carc *last;
 
 /*
  - freecnfa - free a compacted NFA
- ^ static VOID freecnfa(struct cnfa *, int);
+ ^ static VOID freecnfa(struct cnfa *);
  */
 static VOID
-freecnfa(cnfa, dynalloc)
+freecnfa(cnfa)
 struct cnfa *cnfa;
-int dynalloc;			/* is the cnfa struct itself dynamic? */
 {
 	assert(cnfa->nstates != 0);	/* not empty already */
 	cnfa->nstates = 0;
 	FREE(cnfa->states);
 	FREE(cnfa->arcs);
-	if (dynalloc)
-		FREE(cnfa);
 }
 
 /*
diff --git a/generic/regcomp.c b/generic/regcomp.c
index b8c2119..620dc1c 100644
--- a/generic/regcomp.c
+++ b/generic/regcomp.c
@@ -14,7 +14,11 @@
 int compile _ANSI_ARGS_((regex_t *, CONST chr *, size_t, int));
 static VOID moresubs _ANSI_ARGS_((struct vars *, int));
 static int freev _ANSI_ARGS_((struct vars *, int));
-static struct rtree *parse _ANSI_ARGS_((struct vars *, int, int, struct state *, struct state *, int));
+static struct subre *parse _ANSI_ARGS_((struct vars *, int, int, struct state *, struct state *));
+static struct subre *parsebranch _ANSI_ARGS_((struct vars *, int, int, struct state *, struct state *, int));
+static VOID parseqatom _ANSI_ARGS_((struct vars *, int, int, struct state *, struct state *, struct subre *));
+static VOID nonword _ANSI_ARGS_((struct vars *, int, struct state *, struct state *));
+static VOID word _ANSI_ARGS_((struct vars *, int, struct state *, struct state *));
 static int scannum _ANSI_ARGS_((struct vars *));
 static VOID repeat _ANSI_ARGS_((struct vars *, struct state *, struct state *, int, int));
 static VOID bracket _ANSI_ARGS_((struct vars *, struct state *, struct state *));
@@ -24,24 +28,23 @@ static chr *scanplain _ANSI_ARGS_((struct vars *));
 static VOID leaders _ANSI_ARGS_((struct vars *, struct cvec *));
 static VOID onechr _ANSI_ARGS_((struct vars *, pchr, struct state *, struct state *));
 static VOID dovec _ANSI_ARGS_((struct vars *, struct cvec *, struct state *, struct state *));
-static color nlcolor _ANSI_ARGS_((struct vars *));
+static celt nextleader _ANSI_ARGS_((struct vars *, pchr, pchr));
 static VOID wordchrs _ANSI_ARGS_((struct vars *));
-static struct subre subre _ANSI_ARGS_((struct state *, struct state *, int, int, struct rtree *));
-static struct rtree *newrt _ANSI_ARGS_((struct vars *));
-static VOID freert _ANSI_ARGS_((struct vars *, struct rtree *));
-static VOID freertnode _ANSI_ARGS_((struct vars *, struct rtree *));
-static VOID optrt _ANSI_ARGS_((struct vars *, struct rtree *));
-static int numrt _ANSI_ARGS_((struct rtree *, int));
-static VOID markrt _ANSI_ARGS_((struct rtree *));
-static VOID cleanrt _ANSI_ARGS_((struct vars *));
-static VOID nfatree _ANSI_ARGS_((struct vars *, struct rtree *, FILE *));
-static VOID nfanode _ANSI_ARGS_((struct vars *, struct subre *, FILE *));
+static struct subre *subre _ANSI_ARGS_((struct vars *, int, int, struct state *, struct state *));
+static VOID freesubre _ANSI_ARGS_((struct vars *, struct subre *));
+static VOID freesrnode _ANSI_ARGS_((struct vars *, struct subre *));
+static VOID optst _ANSI_ARGS_((struct vars *, struct subre *));
+static int numst _ANSI_ARGS_((struct subre *, int));
+static VOID markst _ANSI_ARGS_((struct subre *));
+static VOID cleanst _ANSI_ARGS_((struct vars *));
+static int nfatree _ANSI_ARGS_((struct vars *, struct subre *, FILE *));
+static int nfanode _ANSI_ARGS_((struct vars *, struct subre *, FILE *));
 static int newlacon _ANSI_ARGS_((struct vars *, struct state *, struct state *, int));
 static VOID freelacons _ANSI_ARGS_((struct subre *, int));
 static VOID rfree _ANSI_ARGS_((regex_t *));
 static VOID dump _ANSI_ARGS_((regex_t *, FILE *));
-static VOID dumprt _ANSI_ARGS_((struct rtree *, FILE *, int));
-static VOID rtdump _ANSI_ARGS_((struct rtree *, FILE *, int, int));
+static VOID dumpst _ANSI_ARGS_((struct subre *, FILE *, int));
+static VOID stdump _ANSI_ARGS_((struct subre *, FILE *, int, int));
 /* === regc_lex.c === */
 static VOID lexstart _ANSI_ARGS_((struct vars *));
 static VOID prefixes _ANSI_ARGS_((struct vars *));
@@ -56,17 +59,18 @@ static chr newline _ANSI_ARGS_((NOPARMS));
 static chr *ch _ANSI_ARGS_((NOPARMS));
 static chr chrnamed _ANSI_ARGS_((struct vars *, chr *, chr *, pchr));
 /* === regc_color.c === */
-static struct colormap *newcm _ANSI_ARGS_((struct vars *));
+static VOID initcm _ANSI_ARGS_((struct vars *, struct colormap *));
 static VOID freecm _ANSI_ARGS_((struct colormap *));
 static VOID cmtreefree _ANSI_ARGS_((struct colormap *, union tree *, int));
-static VOID fillcm _ANSI_ARGS_((struct colormap *));
-static VOID cmtreefill _ANSI_ARGS_((struct colormap *, union tree *, int));
-static color getcolor _ANSI_ARGS_((struct colormap *, pchr));
 static color setcolor _ANSI_ARGS_((struct colormap *, pchr, pcolor));
 static color maxcolor _ANSI_ARGS_((struct colormap *));
 static color newcolor _ANSI_ARGS_((struct colormap *));
+static VOID freecolor _ANSI_ARGS_((struct colormap *, pcolor));
 static color pseudocolor _ANSI_ARGS_((struct colormap *));
 static color subcolor _ANSI_ARGS_((struct colormap *, pchr c));
+static color newsub _ANSI_ARGS_((struct colormap *, pcolor));
+static VOID subrange _ANSI_ARGS_((struct vars *, pchr, pchr, struct state *, struct state *));
+static VOID subblock _ANSI_ARGS_((struct vars *, pchr, struct state *, struct state *));
 static VOID okcolors _ANSI_ARGS_((struct nfa *, struct colormap *));
 static VOID colorchain _ANSI_ARGS_((struct colormap *, struct arc *));
 static VOID uncolorchain _ANSI_ARGS_((struct colormap *, struct arc *));
@@ -115,10 +119,9 @@ static VOID cleanup _ANSI_ARGS_((struct nfa *));
 static VOID markreachable _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *));
 static VOID markcanreach _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *));
 static int analyze _ANSI_ARGS_((struct nfa *));
-static int isempty _ANSI_ARGS_((struct state *, struct state *));
 static VOID compact _ANSI_ARGS_((struct nfa *, struct cnfa *));
 static VOID carcsort _ANSI_ARGS_((struct carc *, struct carc *));
-static VOID freecnfa _ANSI_ARGS_((struct cnfa *, int));
+static VOID freecnfa _ANSI_ARGS_((struct cnfa *));
 static VOID dumpnfa _ANSI_ARGS_((struct nfa *, FILE *));
 static VOID dumpstate _ANSI_ARGS_((struct state *, FILE *));
 static VOID dumparcs _ANSI_ARGS_((struct state *, FILE *));
@@ -173,9 +176,9 @@ struct vars {
 	struct colormap *cm;	/* character color map */
 	color nlcolor;		/* color of newline */
 	struct state *wordchrs;	/* state in nfa holding word-char outarcs */
-	struct rtree *tree;	/* subexpression tree */
-	struct rtree *treechain;	/* all tree nodes allocated */
-	struct rtree *treefree;		/* any free tree nodes */
+	struct subre *tree;	/* subexpression tree */
+	struct subre *treechain;	/* all tree nodes allocated */
+	struct subre *treefree;		/* any free tree nodes */
 	int ntree;		/* number of tree nodes */
 	struct cvec *cv;	/* interface cvec */
 	struct cvec *cv2;	/* utility cvec */
@@ -186,6 +189,7 @@ struct vars {
 	struct subre *lacons;	/* lookahead-constraint vector */
 	int nlacons;		/* size of lacons */
 	int usedshorter;	/* used short-preferring quantifiers */
+	int unmatchable;	/* can never match */
 };
 
 /* parsing macros; most know that `v' is the struct vars pointer */
@@ -199,6 +203,7 @@ struct vars {
 #define	ERR(e)	VERR(v, e)		/* record an error */
 #define	NOERR()	{if (ISERR()) return;}	/* if error seen, return */
 #define	NOERRN()	{if (ISERR()) return NULL;}	/* NOERR with retval */
+#define	NOERRZ()	{if (ISERR()) return 0;}	/* NOERR with retval */
 #define	INSIST(c, e)	((c) ? 0 : ERR(e))	/* if condition false, error */
 #define	NOTE(b)	(v->re->re_info |= (b))		/* note visible condition */
 #define	EMPTYARC(x, y)	newarc(v->nfa, EMPTY, 0, x, y)
@@ -259,7 +264,6 @@ int flags;
 
 	if (re == NULL || string == NULL)
 		return REG_INVARG;
-	assert(REG_ADVANCED == (REG_EXTENDED|REG_ADVF));
 	if ((flags&REG_QUOTE) &&
 			(flags&(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE)))
 		return REG_INVARG;
@@ -297,19 +301,18 @@ int flags;
 	re->re_fns = VS(&functions);
 
 	/* more complex setup, malloced things */
-	v->cm = newcm(v);
-	CNOERR();
-	v->nfa = newnfa(v, v->cm, (struct nfa *)NULL);
-	CNOERR();
 	re->re_guts = VS(MALLOC(sizeof(struct guts)));
 	if (re->re_guts == NULL)
 		return freev(v, REG_ESPACE);
 	g = (struct guts *)re->re_guts;
-	ZAPCNFA(g->cnfa);
 	g->tree = NULL;
-	g->cm = NULL;
+	initcm(v, &g->cmap);
+	v->cm = &g->cmap;
 	g->lacons = NULL;
 	g->nlacons = 0;
+	ZAPCNFA(g->search);
+	v->nfa = newnfa(v, v->cm, (struct nfa *)NULL);
+	CNOERR();
 	v->cv = newcvec(100, 20, 10);
 	if (v->cv == NULL)
 		return freev(v, REG_ESPACE);
@@ -324,53 +327,56 @@ int flags;
 
 	/* parsing */
 	lexstart(v);			/* also handles prefixes */
-	v->tree = parse(v, EOS, PLAIN, v->nfa->init, v->nfa->final, NONEYET);
+	if ((v->cflags&REG_NLSTOP) || (v->cflags&REG_NLANCH)) {
+		/* assign newline a unique color */
+		v->nlcolor = subcolor(v->cm, newline());
+		okcolors(v->nfa, v->cm);
+	}
+	CNOERR();
+	v->tree = parse(v, EOS, PLAIN, v->nfa->init, v->nfa->final);
 	assert(SEE(EOS));		/* even if error; ISERR() => SEE(EOS) */
 	CNOERR();
+	assert(v->tree != NULL);
 
 	/* finish setup of nfa and its subre tree */
 	specialcolors(v->nfa);
 	CNOERR();
 	if (debug != NULL) {
 		dumpnfa(v->nfa, debug);
-		dumprt(v->tree, debug, 1);
+		dumpst(v->tree, debug, 1);
 	}
 	v->usedshorter = 0;
-	optrt(v, v->tree);
-	if (v->tree != NULL) {
-		v->ntree = numrt(v->tree, 1);
-		markrt(v->tree);
-	} else
-		v->ntree = 0;
-	cleanrt(v);
+	v->unmatchable = 0;
+	optst(v, v->tree);
+	v->ntree = numst(v->tree, 1);
+	markst(v->tree);
+	cleanst(v);
 	if (debug != NULL) {
 		fprintf(debug, "-->\n");
-		dumprt(v->tree, debug, 1);
+		dumpst(v->tree, debug, 1);
 	}
 
-	/* build compacted NFAs for tree, lacons, main nfa */
-	nfatree(v, v->tree, debug);
+	/* build compacted NFAs for tree, lacons, fast search */
+	re->re_info |= nfatree(v, v->tree, debug);
 	if (debug != NULL) {
 		fprintf(debug, "---->\n");
-		dumprt(v->tree, debug, 1);
+		dumpst(v->tree, debug, 1);
 	}
 	CNOERR();
+	if (re->re_info&REG_UIMPOSSIBLE)
+		v->unmatchable = 1;
 	assert(v->nlacons == 0 || v->lacons != NULL);
 	for (i = 1; i < v->nlacons; i++)
 		nfanode(v, &v->lacons[i], debug);
 	CNOERR();
-	re->re_info |= optimize(v->nfa, debug);
+	rainbow(v->nfa, v->cm, PLAIN, COLORLESS, v->nfa->pre, v->nfa->pre);
+	newarc(v->nfa, PLAIN, v->nfa->bos[0], v->nfa->pre, v->nfa->pre);
+	newarc(v->nfa, PLAIN, v->nfa->bos[1], v->nfa->pre, v->nfa->pre);
+	newarc(v->nfa, PLAIN, v->nfa->eos[0], v->nfa->pre, v->nfa->pre);
+	newarc(v->nfa, PLAIN, v->nfa->eos[1], v->nfa->pre, v->nfa->pre);
+	(DISCARD)optimize(v->nfa, debug);
 	CNOERR();
-	if (v->nfa->post->nins <= 0)	
-		return freev(v, REG_IMPOSS);	/* end unreachable! */
-	assert(v->nfa->pre->nouts > 0);
-	compact(v->nfa, &g->cnfa);
-	CNOERR();
-	freenfa(v->nfa);
-	v->nfa = NULL;
-
-	/* fill color map */
-	fillcm(v->cm);
+	compact(v->nfa, &g->search);
 	CNOERR();
 
 	/* looks okay, package it up */
@@ -380,8 +386,6 @@ int flags;
 	g->cflags = v->cflags;
 	g->info = re->re_info;
 	g->nsub = re->re_nsub;
-	g->cm = v->cm;
-	v->cm = NULL;
 	g->tree = v->tree;
 	v->tree = NULL;
 	g->ntree = v->ntree;
@@ -390,6 +394,7 @@ int flags;
 	v->lacons = NULL;
 	g->nlacons = v->nlacons;
 	g->usedshorter = v->usedshorter;
+	g->unmatchable = v->unmatchable;
 
 	if (flags&REG_DUMP)
 		dump(re, stdout);
@@ -447,12 +452,10 @@ int err;
 		FREE(v->subs);
 	if (v->nfa != NULL)
 		freenfa(v->nfa);
-	if (v->cm != NULL)
-		freecm(v->cm);
 	if (v->tree != NULL)
-		freert(v, v->tree);
+		freesubre(v, v->tree);
 	if (v->treechain != NULL)
-		cleanrt(v);
+		cleanst(v);
 	if (v->cv != NULL)
 		freecvec(v->cv);
 	if (v->cv2 != NULL)
@@ -468,541 +471,562 @@ int err;
 
 /*
  - parse - parse an RE
- * Arguably this is too big and too complex and ought to be divided up.
- * However, the code is somewhat intertwined...
- *
- * Note that it is no longer necessary to be rigorous about freeing tree
- * nodes on error exits, as the tree machinery keeps track of them.
- ^ static struct rtree *parse(struct vars *, int, int, struct state *,
- ^ 	struct state *, int);
+ * This is actually just the top level, which parses a bunch of branches
+ * tied together with '|'.  They appear in the tree as the left children
+ * of a chain of '|' subres.
+ ^ static struct subre *parse(struct vars *, int, int, struct state *,
+ ^ 	struct state *);
  */
-static struct rtree *		/* NULL if no interesting substructure */
-parse(v, stopper, type, init, final, pprefer)
+static struct subre *
+parse(v, stopper, type, init, final)
 struct vars *v;
 int stopper;			/* EOS or ')' */
 int type;			/* LACON (lookahead subRE) or PLAIN */
 struct state *init;		/* initial state */
 struct state *final;		/* final state */
-int pprefer;			/* parent's short/long preference */
 {
 	struct state *left;	/* scaffolding for branch */
 	struct state *right;
-	struct state *lp;	/* scaffolding for current construct */
-	struct state *rp;
-	struct state *s;	/* temporaries for new states */
-	struct state *s2;
-#	define	ARCV(t, val)	newarc(v->nfa, t, val, lp, rp)
-	int m, n;
-	int emptybranch;	/* is there anything in this branch yet? */
-	struct rtree *branches;	/* top level */
-	struct rtree *branch;	/* current branch */
-	struct subre *now;	/* current subtree's top */
-	struct subre sub;	/* communication variable */
-	struct rtree *rt1;	/* temporaries */
-	struct rtree *rt2;
-	struct subre *t;	/* work pointer, top of interesting subtree */
+	struct subre *branches;	/* top level */
+	struct subre *branch;	/* current branch */
+	struct subre *t;	/* temporary */
 	int firstbranch;	/* is this the first branch? */
-	int capture;		/* any capturing parens within this? */
-	int constraint;		/* is the current atom a constraint? */
 
 	assert(stopper == ')' || stopper == EOS);
 
-	capture = 0;
-	branches = newrt(v);
+	branches = subre(v, '|', LONGER, init, final);
+	NOERRN();
 	branch = branches;
-	rt1 = NULL;		/* shut up lint */
 	firstbranch = 1;
-	NOERRN();
-	do {
-		/* a branch */
-		emptybranch = 1;	/* tentatively */
-		left = newstate(v->nfa);
-		right = newstate(v->nfa);
-		NOERRN();
+	do {	/* a branch */
 		if (!firstbranch) {
-			rt1 = newrt(v);
+			/* need a place to hang it */
+			branch->right = subre(v, '|', LONGER, init, final);
 			NOERRN();
-			branch->next = rt1;
-			branch = rt1;
+			branch = branch->right;
 		}
+		firstbranch = 0;
+		left = newstate(v->nfa);
+		right = newstate(v->nfa);
+		NOERRN();
 		EMPTYARC(init, left);
 		EMPTYARC(right, final);
-		lp = left;
-		rp = right;
-		branch->op = '|';
-		now = &branch->left;
-		*now = subre(left, right, NONEYET, 0, (struct rtree *)NULL);
-		firstbranch = 0;
 		NOERRN();
+		branch->left = parsebranch(v, stopper, type, left, right, 0);
+		NOERRN();
+		branch->flags |= UP(branch->flags | branch->left->flags);
+		if ((branch->flags &~ branches->flags) != 0)	/* new flags */
+			for (t = branches; t != branch; t = t->right)
+				t->flags |= branch->flags;
+	} while (EAT('|'));
+	assert(SEE(stopper) || SEE(EOS));
 
-		while (!SEE('|') && !SEE(stopper) && !SEE(EOS)) {
-			/* initial bookkeeping */
-			sub.begin = NULL;	/* no substructure seen yet */
-			sub.subno = 0;
-			sub.prefer = NONEYET;
-			constraint = 0;
-			if (emptybranch)	/* first of the branch */
-				emptybranch = 0;
-			else {			/* implicit concat operator */
-				lp = newstate(v->nfa);
-				NOERRN();
-				moveins(v->nfa, rp, lp);
-			}
-			assert(lp->nouts == 0);	/* must string new code */
-			assert(rp->nins == 0);	/*  between lp and rp */
-
-			/* an atom... */
-			switch (v->nexttype) {
-			case '(':	/* value flags as capturing or non */
-				m = (type == LACON) ? 0 : v->nextvalue;
-				if (m) {
-					v->nsubexp++;
-					sub.subno = v->nsubexp;
-					if ((size_t)sub.subno >= v->nsubs)
-						moresubs(v, sub.subno);
-					assert((size_t)sub.subno < v->nsubs);
-				} else
-					sub.subno = 0;
-				NEXT();
-				sub.begin = lp;	/* NB, substructure seen */
-				sub.end = rp;
-				/* use now->tree as temporary, so */
-				/*  things get freed on error returns */
-				assert(now->tree == NULL);
-				now->tree = parse(v, ')', PLAIN, lp, rp,
-								now->prefer);
-				assert(SEE(')') || ISERR());
-				NEXT();
-				NOERRN();
-				if (!m && now->tree == NULL) {
-					/* actually no relevant substructure */
-					sub.begin = NULL;
-				}
-				if (now->tree != NULL) {
-					if (now->tree->op == '|')
-						sub.prefer = LONGER;
-					else
-						sub.prefer =
-							now->tree->left.prefer;
-				}
-				/* must postpone other processing until we */
-				/*  know about any {0,0} quantifier */
-				break;
-			case BACKREF:	/* the Feature From The Black Lagoon */
-				INSIST(type != LACON, REG_ESUBREG);
-				INSIST(v->nextvalue < v->nsubs, REG_ESUBREG);
-				INSIST(v->subs[v->nextvalue] != NULL,
-								REG_ESUBREG);
-				NOERRN();
-				assert(v->nextvalue > 0);
-				sub.subno = -v->nextvalue;
-				sub.begin = lp;	/* NB, substructure seen */
-				sub.end = rp;
-				EMPTYARC(lp, rp);	/* temporarily */
-				assert(now->tree == NULL);
-				NEXT();
-				break;
-			case LACON:	/* lookahead constraint */
-				m = v->nextvalue;	/* is positive? */
-				NEXT();
-				s = newstate(v->nfa);
-				s2 = newstate(v->nfa);
-				NOERRN();
-				rt1 = parse(v, ')', LACON, s, s2, NONEYET);
-				assert(SEE(')') || ISERR());
-				NEXT();
-				m = newlacon(v, s, s2, m);
-				freert(v, rt1);
-				NOERRN();
-				ARCV(LACON, m);
-				constraint = 1;
-				break;
-			case PREFER:	/* length preference */
-				sub.prefer = (v->nextvalue) ? LONGER : SHORTER;
-				NEXT();
-				sub.begin = lp;	/* NB, substructure seen */
-				sub.end = rp;
-				/* use now->tree as temporary, so */
-				/*  things get freed on error returns */
-				assert(now->tree == NULL);
-				now->tree = parse(v, ')', PLAIN, lp, rp,
-								sub.prefer);
-				assert(SEE(')') || ISERR());
-				NEXT();
-				NOERRN();
-				if (now->prefer == NONEYET)
-					now->prefer = sub.prefer;
-				if (sub.prefer == now->prefer &&
-							now->tree == NULL) {
-					/* actually no relevant substructure */
-					sub.begin = NULL;
-				}
-				break;
-			case '[':
-				if (v->nextvalue == 1)
-					bracket(v, lp, rp);
-				else
-					cbracket(v, lp, rp);
-				assert(SEE(']') || ISERR());
-				NEXT();
-				break;
-			case '.':
-				rainbow(v->nfa, v->cm, PLAIN,
-					(v->cflags&REG_NLSTOP) ?
-							nlcolor(v) : COLORLESS,
-					lp, rp);
-				NEXT();
-				break;
-			case '^':
-				ARCV('^', 1);
-				if (v->cflags&REG_NLANCH)
-					ARCV(BEHIND, nlcolor(v));
-				NEXT();
-				constraint = 1;
-				break;
-			case '$':
-				ARCV('$', 1);
-				if (v->cflags&REG_NLANCH)
-					ARCV(AHEAD, nlcolor(v));
-				NEXT();
-				constraint = 1;
-				break;
-			case SBEGIN:
-				ARCV('^', 1);	/* BOL */
-				ARCV('^', 0);	/* or BOS */
-				NEXT();
-				constraint = 1;
-				break;
-			case SEND:
-				ARCV('$', 1);	/* EOL */
-				ARCV('$', 0);	/* or EOS */
-				NEXT();
-				constraint = 1;
-				break;
-			case '<':
-				wordchrs(v);	/* does NEXT() */
-				s = newstate(v->nfa);
-				NOERRN();
-				/* needs BOL, BOS, or nonword to left... */
-				newarc(v->nfa, '^', 1, lp, s);
-				newarc(v->nfa, '^', 0, lp, s);
-				colorcomplement(v->nfa, v->cm, BEHIND,
-							v->wordchrs, lp, s);
-				/* ... and word to right */
-				cloneouts(v->nfa, v->wordchrs, s, rp, AHEAD);
-				/* (no need for special attention to \n) */
-				constraint = 1;
-				break;
-			case '>':
-				wordchrs(v);	/* does NEXT() */
-				s = newstate(v->nfa);
-				NOERRN();
-				/* needs word to left... */
-				cloneouts(v->nfa, v->wordchrs, lp, s, BEHIND);
-				/* ... and EOL, EOS, or nonword to right */
-				newarc(v->nfa, '$', 1, s, rp);
-				newarc(v->nfa, '$', 0, s, rp);
-				colorcomplement(v->nfa, v->cm, AHEAD,
-							v->wordchrs, s, rp);
-				/* (no need for special attention to \n) */
-				constraint = 1;
-				break;
-			case WBDRY:
-				wordchrs(v);	/* does NEXT() */
-				s = newstate(v->nfa);
-				NOERRN();
-				/* needs BOL, BOS, or nonword to left... */
-				newarc(v->nfa, '^', 1, lp, s);
-				newarc(v->nfa, '^', 0, lp, s);
-				colorcomplement(v->nfa, v->cm, BEHIND,
-							v->wordchrs, lp, s);
-				/* ... and word to right... */
-				cloneouts(v->nfa, v->wordchrs, s, rp, AHEAD);
-				/* ...or... */
-				s = newstate(v->nfa);
-				NOERRN();
-				/* ...needs word to left... */
-				cloneouts(v->nfa, v->wordchrs, lp, s, BEHIND);
-				/* ... and EOL, EOS, or nonword to right */
-				newarc(v->nfa, '$', 1, s, rp);
-				newarc(v->nfa, '$', 0, s, rp);
-				colorcomplement(v->nfa, v->cm, AHEAD,
-							v->wordchrs, s, rp);
-				/* (no need for special attention to \n) */
-				constraint = 1;
-				break;
-			case NWBDRY:
-				wordchrs(v);	/* does NEXT() */
-				s = newstate(v->nfa);
-				NOERRN();
-				/* needs word to both left and right... */
-				cloneouts(v->nfa, v->wordchrs, lp, s, BEHIND);
-				cloneouts(v->nfa, v->wordchrs, s, rp, AHEAD);
-				/* ...or... */
-				s = newstate(v->nfa);
-				NOERRN();
-				/* ...BOL, BOS, or nonword to left... */
-				newarc(v->nfa, '^', 1, lp, s);
-				newarc(v->nfa, '^', 0, lp, s);
-				colorcomplement(v->nfa, v->cm, BEHIND,
-							v->wordchrs, lp, s);
-				/* ... and EOL, EOS, or nonword to right */
-				newarc(v->nfa, '$', 1, s, rp);
-				newarc(v->nfa, '$', 0, s, rp);
-				colorcomplement(v->nfa, v->cm, AHEAD,
-							v->wordchrs, s, rp);
-				/* (no need for special attention to \n) */
-				constraint = 1;
-				break;
-			case ')':		/* unbalanced paren */
-#ifdef POSIX_MISTAKE
-				if (!(v->cflags&REG_EXTENDED) ||
-							(v->cflags&REG_ADVF)) {
-					ERR(REG_EPAREN);
-					return NULL;
-				}
-				NOTE(REG_UPBOTCH);
-				/* fallthrough into case PLAIN */
-#else
-				ERR(REG_EPAREN);
-				return NULL;
-				break;
-#endif
-			case PLAIN:
-				onechr(v, v->nextvalue, lp, rp);
-				okcolors(v->nfa, v->cm);
-				NOERRN();
-				NEXT();
-				break;
-			case '*':
-			case '+':
-			case '?':
-			case '{':
-				ERR(REG_BADRPT);
-				return NULL;
-				break;
-			default:
-				ERR(REG_ASSERT);
-				return NULL;
-				break;
-			}
-
-			/* ...possibly followed by a quantifier */
-			switch (v->nexttype) {
-			case '*':
-				m = 0;
-				n = INFINITY;
-				sub.prefer = (v->nextvalue) ? LONGER : SHORTER;
-				NEXT();
-				break;
-			case '+':
-				m = 1;
-				n = INFINITY;
-				sub.prefer = (v->nextvalue) ? LONGER : SHORTER;
-				NEXT();
-				break;
-			case '?':
-				m = 0;
-				n = 1;
-				sub.prefer = (v->nextvalue) ? LONGER : SHORTER;
-				NEXT();
-				break;
-			case '{':
-				NEXT();
-				m = scannum(v);
-				if (EAT(',')) {
-					if (SEE(DIGIT))
-						n = scannum(v);
-					else
-						n = INFINITY;
-					if (m > n) {
-						ERR(REG_BADBR);
-						return NULL;
-					}
-				} else
-					n = m;
-				if (!SEE('}')) {	/* gets errors too */
-					ERR(REG_BADBR);
-					return NULL;
-				}
-				if (m != n)
-					sub.prefer = (v->nextvalue) ? LONGER :
-									SHORTER;
-				NEXT();
-				break;
-			default:		/* no quantifier */
-				m = n = 1;
-				constraint = 0;
-				break;
-			}
+	if (!SEE(stopper)) {
+		assert(stopper == ')' && SEE(EOS));
+		ERR(REG_EPAREN);
+	}
 
-			/* constraints may not be quantified */
-			if (constraint) {
-				ERR(REG_BADRPT);
-				return NULL;
-			}
+	/* optimize out simple cases */
+	if (branch == branches) {	/* only one branch */
+		assert(branch->right == NULL);
+		t = branch->left;
+		branch->left = NULL;
+		freesubre(v, branches);
+		branches = t;
+	} else if (!MESSY(branches->flags)) {	/* no interesting innards */
+		freesubre(v, branches->left);
+		branches->left = NULL;
+		freesubre(v, branches->right);
+		branches->right = NULL;
+		branches->op = '=';
+	}
 
-			/* annoying special case:  {0,0} cancels everything */
-			if (m == 0 && n == 0 && sub.begin != NULL) {
-				freert(v, now->tree);
-				now->tree = NULL;
-				sub.begin = NULL;	/* no substructure */
-				sub.prefer = NONEYET;
-				/* the repeat() below will do the rest */
-			}
+	return branches;
+}
 
-			/* if no substructure, avoid hard part */
-			if (now->prefer == NONEYET)
-				now->prefer = sub.prefer;
-			if (sub.begin == NULL && (sub.prefer == NONEYET ||
-						sub.prefer == now->prefer)) {
-				assert(sub.subno >= 0 || (m == 0 && n == 0));
-				if (!(m == 1 && n == 1))
-					repeat(v, lp, rp, m, n);
-				continue;		/* NOTE CONTINUE */
-			}
+/*
+ - parsebranch - parse one branch of an RE
+ * This mostly manages concatenation, working closely with parseqatom().
+ * Concatenated things are bundled up as much as possible, with separate
+ * ',' nodes introduced only when necessary due to substructure.
+ ^ static struct subre *parsebranch(struct vars *, int, int, struct state *,
+ ^ 	struct state *, int);
+ */
+static struct subre *
+parsebranch(v, stopper, type, left, right, partial)
+struct vars *v;
+int stopper;			/* EOS or ')' */
+int type;			/* LACON (lookahead subRE) or PLAIN */
+struct state *left;		/* leftmost state */
+struct state *right;		/* rightmost state */
+int partial;			/* is this only part of a branch? */
+{
+	struct state *lp;	/* left end of current construct */
+	int seencontent;	/* is there anything in this branch yet? */
+	struct subre *t;
 
-			/* hard part:  something messy seen */
-			/* break subRE into pre, x{...}, post-to-be */
-			capture = 1;		/* upper levels will care */
-			rt1 = newrt(v);
-			rt2 = newrt(v);
-			s = newstate(v->nfa);	/* between x and post-to-be */
+	lp = left;
+	seencontent = 0;
+	t = subre(v, '=', 0, left, right);	/* op '=' is tentative */
+	NOERRN();
+	while (!SEE('|') && !SEE(stopper) && !SEE(EOS)) {
+		if (seencontent) {	/* implicit concat operator */
+			lp = newstate(v->nfa);
 			NOERRN();
-			moveins(v->nfa, rp, s);
-			EMPTYARC(s, rp);
-			rt1->op = ',';
-			rt1->left = subre(now->begin, lp, now->prefer, 0,
-							(struct rtree *)NULL);
-			assert(now->end == rp);
-			rt1->right = subre(lp, rp, sub.prefer, 0, rt2);
-			rt2->op = ',';
-			rt2->left = subre(lp, s, sub.prefer, 0, now->tree);
-			rt2->right = subre(s, rp, NONEYET, 0,
-							(struct rtree *)NULL);
-			now->tree = rt1;
-			now = &rt2->right;	/* future elaborations here */
-			t = &rt2->left;		/* current activity here */
-
-			/* if it's a backref, time to replicate the subNFA */
-			if (sub.subno < 0) {
-				assert(lp->nouts == 1);	/* just the EMPTY */
-				delsub(v->nfa, lp, s);
-				assert(v->subs[-sub.subno] != NULL);
-				dupnfa(v->nfa, v->subs[-sub.subno]->begin,
-					v->subs[-sub.subno]->end, lp, s);
-				NOERRN();
-			}
+			moveins(v->nfa, right, lp);
+		}
+		seencontent = 1;
 
-			/* if no/vacuous quantifier and not backref, done */
-			if (m == 1 && n == 1 && sub.subno >= 0) {
-				t->subno = sub.subno;
-				if (sub.subno > 0)
-					v->subs[sub.subno] = t;
-				continue;		/* NOTE CONTINUE */
-			}
+		/* NB, recursion in parseqatom() may swallow rest of branch */
+		parseqatom(v, stopper, type, lp, right, t);
+	}
 
-			/* really sticky part, quantified capturer/backref */
-			/* first, turn x{0,...} into x{1,...}| */
-			if (m == 0) {
-				s = newstate(v->nfa);
-				s2 = newstate(v->nfa);
-				rt1 = newrt(v);
-				rt2 = newrt(v);
-				NOERRN();
-				moveouts(v->nfa, t->begin, s);
-				EMPTYARC(t->begin, s);
-				EMPTYARC(t->begin, s2);
-				EMPTYARC(s2, t->end);
-				rt1->op = rt2->op = '|';
-				rt1->left = subre(s, t->end, sub.prefer, 0,
-								t->tree);
-				rt1->next = rt2;
-				rt2->left = subre(s2, t->end, sub.prefer, 0,
-							(struct rtree *)NULL);
-				t->tree = rt1;
-				t = &rt1->left;
-				m = 1;
-			}
+	if (!seencontent) {		/* empty branch */
+		if (!partial)
+			NOTE(REG_UUNSPEC);
+		assert(lp == left);
+		EMPTYARC(left, right);
+	}
 
-			/* second, x{1,1} is just x */
-			if (m == 1 && n == 1 && sub.subno >= 0) {
-				t->subno = sub.subno;
-				if (sub.subno > 0)
-					v->subs[sub.subno] = t;
-				continue;		/* NOTE CONTINUE */
-			}
+	return t;
+}
 
-			/* backrefs get special treatment */
-			if (sub.subno < 0) {
-				repeat(v, t->begin, t->end, m, n);
-				rt1 = newrt(v);
-				NOERRN();
-				assert(t->tree == NULL);
-				t->tree = rt1;
-				rt1->op = 'b';
-				rt1->left.subno = sub.subno;
-				rt1->left.min = (short)m;
-				rt1->left.max = (short)n;
-				rt1->left.prefer = sub.prefer;
-				continue;		/* NOTE CONTINUE */
-			}
+/*
+ - parseqatom - parse one quantified atom or constraint of an RE
+ * The bookkeeping near the end cooperates very closely with parsebranch();
+ * in particular, it contains a recursion that can involve parsing the rest
+ * of the branch, making this function's name somewhat inaccurate.
+ ^ static VOID parseqatom(struct vars *, int, int, struct state *,
+ ^ 	struct state *, struct subre *);
+ */
+static VOID
+parseqatom(v, stopper, type, lp, rp, top)
+struct vars *v;
+int stopper;			/* EOS or ')' */
+int type;			/* LACON (lookahead subRE) or PLAIN */
+struct state *lp;		/* left state to hang it on */
+struct state *rp;		/* right state to hang it on */
+struct subre *top;		/* subtree top */
+{
+	struct state *s;	/* temporaries for new states */
+	struct state *s2;
+#	define	ARCV(t, val)	newarc(v->nfa, t, val, lp, rp)
+	int m, n;
+	struct subre *atom;	/* atom's subtree */
+	struct subre *t;
+	int cap;		/* capturing parens? */
+	int pos;		/* positive lookahead? */
+	int subno;		/* capturing-parens or backref number */
+	int atomtype;
+	int qprefer;		/* quantifier short/long preference */
+	int f;
+	struct subre **atomp;	/* where the pointer to atom is */
+
+	/* initial bookkeeping */
+	atom = NULL;
+	assert(lp->nouts == 0);	/* must string new code */
+	assert(rp->nins == 0);	/*  between lp and rp */
+	subno = 0;		/* just to shut lint up */
+
+	/* an atom or constraint... */
+	atomtype = v->nexttype;
+	switch (atomtype) {
+	/* first, constraints, which end by returning */
+	case '^':
+		ARCV('^', 1);
+		if (v->cflags&REG_NLANCH)
+			ARCV(BEHIND, v->nlcolor);
+		NEXT();
+		return;
+		break;
+	case '$':
+		ARCV('$', 1);
+		if (v->cflags&REG_NLANCH)
+			ARCV(AHEAD, v->nlcolor);
+		NEXT();
+		return;
+		break;
+	case SBEGIN:
+		ARCV('^', 1);	/* BOL */
+		ARCV('^', 0);	/* or BOS */
+		NEXT();
+		return;
+		break;
+	case SEND:
+		ARCV('$', 1);	/* EOL */
+		ARCV('$', 0);	/* or EOS */
+		NEXT();
+		return;
+		break;
+	case '<':
+		wordchrs(v);	/* does NEXT() */
+		s = newstate(v->nfa);
+		NOERR();
+		nonword(v, BEHIND, lp, s);
+		word(v, AHEAD, s, rp);
+		return;
+		break;
+	case '>':
+		wordchrs(v);	/* does NEXT() */
+		s = newstate(v->nfa);
+		NOERR();
+		word(v, BEHIND, lp, s);
+		nonword(v, AHEAD, s, rp);
+		return;
+		break;
+	case WBDRY:
+		wordchrs(v);	/* does NEXT() */
+		s = newstate(v->nfa);
+		NOERR();
+		nonword(v, BEHIND, lp, s);
+		word(v, AHEAD, s, rp);
+		s = newstate(v->nfa);
+		NOERR();
+		word(v, BEHIND, lp, s);
+		nonword(v, AHEAD, s, rp);
+		return;
+		break;
+	case NWBDRY:
+		wordchrs(v);	/* does NEXT() */
+		s = newstate(v->nfa);
+		NOERR();
+		word(v, BEHIND, lp, s);
+		word(v, AHEAD, s, rp);
+		s = newstate(v->nfa);
+		NOERR();
+		nonword(v, BEHIND, lp, s);
+		nonword(v, AHEAD, s, rp);
+		return;
+		break;
+	case LACON:	/* lookahead constraint */
+		pos = v->nextvalue;
+		NEXT();
+		s = newstate(v->nfa);
+		s2 = newstate(v->nfa);
+		NOERR();
+		t = parse(v, ')', LACON, s, s2);
+		freesubre(v, t);	/* internal structure irrelevant */
+		assert(SEE(')') || ISERR());
+		NEXT();
+		n = newlacon(v, s, s2, pos);
+		NOERR();
+		ARCV(LACON, n);
+		return;
+		break;
+	/* then errors, to get them out of the way */
+	case '*':
+	case '+':
+	case '?':
+	case '{':
+		ERR(REG_BADRPT);
+		return;
+		break;
+	default:
+		ERR(REG_ASSERT);
+		return;
+		break;
+	/* then plain characters, and minor variants on that theme */
+	case ')':		/* unbalanced paren */
+		if ((v->cflags&REG_ADVANCED) != REG_EXTENDED) {
+			ERR(REG_EPAREN);
+			return;
+		}
+		/* legal in EREs due to specification botch */
+		NOTE(REG_UPBOTCH);
+		/* fallthrough into case PLAIN */
+	case PLAIN:
+		onechr(v, v->nextvalue, lp, rp);
+		okcolors(v->nfa, v->cm);
+		NOERR();
+		NEXT();
+		break;
+	case '[':
+		if (v->nextvalue == 1)
+			bracket(v, lp, rp);
+		else
+			cbracket(v, lp, rp);
+		assert(SEE(']') || ISERR());
+		NEXT();
+		break;
+	case '.':
+		rainbow(v->nfa, v->cm, PLAIN,
+				(v->cflags&REG_NLSTOP) ? v->nlcolor : COLORLESS,
+				lp, rp);
+		NEXT();
+		break;
+	/* and finally the ugly stuff */
+	case '(':	/* value flags as capturing or non */
+		cap = (type == LACON) ? 0 : v->nextvalue;
+		if (cap) {
+			v->nsubexp++;
+			subno = v->nsubexp;
+			if ((size_t)subno >= v->nsubs)
+				moresubs(v, subno);
+			assert((size_t)subno < v->nsubs);
+		} else
+			atomtype = PLAIN;	/* something that's not '(' */
+		NEXT();
+		/* need new endpoints because tree will contain pointers */
+		s = newstate(v->nfa);
+		s2 = newstate(v->nfa);
+		NOERR();
+		EMPTYARC(lp, s);
+		EMPTYARC(s2, rp);
+		NOERR();
+		atom = parse(v, ')', PLAIN, s, s2);
+		assert(SEE(')') || ISERR());
+		NEXT();
+		NOERR();
+		if (cap) {
+			v->subs[subno] = atom;
+			t = subre(v, '(', atom->flags|CAP, lp, rp);
+			NOERR();
+			t->subno = subno;
+			t->left = atom;
+			atom = t;
+		}
+		/* postpone everything else pending possible {0} */
+		break;
+	case BACKREF:	/* the Feature From The Black Lagoon */
+		INSIST(type != LACON, REG_ESUBREG);
+		INSIST(v->nextvalue < v->nsubs, REG_ESUBREG);
+		INSIST(v->subs[v->nextvalue] != NULL, REG_ESUBREG);
+		NOERR();
+		assert(v->nextvalue > 0);
+		atom = subre(v, 'b', BACKR, lp, rp);
+		subno = v->nextvalue;
+		atom->subno = subno;
+		EMPTYARC(lp, rp);	/* temporarily, so there's something */
+		NEXT();
+		break;
+	}
 
-			/* turn x{m,n} into x{m-1,n-1}x, with capturing */
-			/*  parens in only second x */
-			s = newstate(v->nfa);
-			NOERRN();
-			moveouts(v->nfa, t->begin, s);
-			dupnfa(v->nfa, s, t->end, t->begin, s);
-			assert(m >= 1 && m != INFINITY && n >= 1);
-			repeat(v, t->begin, s, m-1, (n == INFINITY) ? n : n-1);
-			rt1 = newrt(v);
-			NOERRN();
-			rt1->op = ',';
-			rt1->left = subre(t->begin, s, sub.prefer, 0,
-							(struct rtree *)NULL);
-			/* sub.prefer not really right, but doesn't matter */
-			rt1->right = subre(s, t->end, sub.prefer, sub.subno,
-								t->tree);
-			if (sub.subno > 0)
-				v->subs[sub.subno] = &rt1->right;
-			t->tree = rt1;
+	/* ...and an atom may be followed by a quantifier */
+	switch (v->nexttype) {
+	case '*':
+		m = 0;
+		n = INFINITY;
+		qprefer = (v->nextvalue) ? LONGER : SHORTER;
+		NEXT();
+		break;
+	case '+':
+		m = 1;
+		n = INFINITY;
+		qprefer = (v->nextvalue) ? LONGER : SHORTER;
+		NEXT();
+		break;
+	case '?':
+		m = 0;
+		n = 1;
+		qprefer = (v->nextvalue) ? LONGER : SHORTER;
+		NEXT();
+		break;
+	case '{':
+		NEXT();
+		m = scannum(v);
+		if (EAT(',')) {
+			if (SEE(DIGIT))
+				n = scannum(v);
+			else
+				n = INFINITY;
+			if (m > n) {
+				ERR(REG_BADBR);
+				return;
+			}
+			/* {m,n} exercises preference, even if it's {m,m} */
+			qprefer = (v->nextvalue) ? LONGER : SHORTER;
+		} else {
+			n = m;
+			/* {m} passes operand's preference through */
+			qprefer = 0;
 		}
-		if (emptybranch) {
-			NOTE(REG_UUNSPEC);
-			EMPTYARC(lp, rp);
+		if (!SEE('}')) {	/* catches errors too */
+			ERR(REG_BADBR);
+			return;
 		}
-	} while (EAT('|'));
-	assert(SEE(stopper) || SEE(EOS));
+		NEXT();
+		break;
+	default:		/* no quantifier */
+		m = n = 1;
+		qprefer = 0;
+		break;
+	}
 
-	if (!SEE(stopper)) {
-		assert(stopper == ')' && SEE(EOS));
-		ERR(REG_EPAREN);
+	/* annoying special case:  {0} or {0,0} cancels everything */
+	if (m == 0 && n == 0) {
+		if (atom != NULL)
+			freesubre(v, atom);
+		if (atomtype == '(')
+			v->subs[subno] = NULL;
+		delsub(v->nfa, lp, rp);
+		EMPTYARC(lp, rp);
+		return;
 	}
 
-	/* higher levels care about our preference in certain situations */
-	if (branch != branches) {	/* >1 branch */
-		if (pprefer != LONGER)
-			capture = 1;
-	} else if (branches->left.prefer != pprefer)
-		capture = 1;
-
-	/* optimize out vacuous alternation */
-	if (branch == branches) {
-		assert(branch->next == NULL && branch->right.begin == NULL);
-		assert(branch->left.subno == 0);
-		if (capture && branch->left.tree == NULL)
-			branch->op = ',';
-		else {
-			branches = branch->left.tree;	/* might be NULL */
-			freertnode(v, branch);
-		}
+	/* if not a messy case, avoid hard part */
+	assert(!MESSY(top->flags));
+	f = top->flags | qprefer | ((atom != NULL) ? atom->flags : 0);
+	if (atomtype != '(' && atomtype != BACKREF && !MESSY(UP(f))) {
+		if (!(m == 1 && n == 1))
+			repeat(v, lp, rp, m, n);
+		if (atom != NULL)
+			freesubre(v, atom);
+		top->flags = f;
+		return;
+	}
+
+	/*
+	 * hard part:  something messy
+	 * That is, capturing parens, back reference, short/long clash, or
+	 * an atom with substructure containing one of those.
+	 */
+
+	/* now we'll need a subre for the contents even if they're boring */
+	if (atom == NULL) {
+		atom = subre(v, '=', 0, lp, rp);
+		NOERR();
+	}
+
+	/*
+	 * prepare a general-purpose state skeleton
+	 *
+	 *    ---> [s] ---prefix---> [begin] ---atom---> [end] ----rest---> [rp]
+	 *   /                                            /
+	 * [lp] ----> [s2] ----bypass---------------------
+	 *
+	 * where bypass is an empty, and prefix is some repetitions of atom
+	 */
+	s = newstate(v->nfa);		/* first, new endpoints for the atom */
+	s2 = newstate(v->nfa);
+	NOERR();
+	moveouts(v->nfa, lp, s);
+	moveins(v->nfa, rp, s2);
+	NOERR();
+	atom->begin = s;
+	atom->end = s2;
+	s = newstate(v->nfa);		/* and spots for prefix and bypass */
+	s2 = newstate(v->nfa);
+	NOERR();
+	EMPTYARC(lp, s);
+	EMPTYARC(lp, s2);
+	NOERR();
+
+	/* break remaining subRE into x{...} and what follows */
+	t = subre(v, '.', COMBINE(qprefer, atom->flags), lp, rp);
+	t->left = atom;
+	atomp = &t->left;
+	/* here we should recurse... but we must postpone that to the end */
+
+	/* split top into prefix and remaining */
+	assert(top->op == '=' && top->left == NULL && top->right == NULL);
+	top->left = subre(v, '=', top->flags, top->begin, lp);
+	top->op = '.';
+	top->right = t;
+
+	/* if it's a backref, now is the time to replicate the subNFA */
+	if (atomtype == BACKREF) {
+		assert(atom->begin->nouts == 1);	/* just the EMPTY */
+		delsub(v->nfa, atom->begin, atom->end);
+		assert(v->subs[subno] != NULL);
+		/* and here's why the recursion got postponed:  it must */
+		/* wait until the skeleton is filled in, because it may */
+		/* hit a backref that wants to copy the filled-in skeleton */
+		dupnfa(v->nfa, v->subs[subno]->begin, v->subs[subno]->end,
+						atom->begin, atom->end);
+		NOERR();
+	}
+
+	/* it's quantifier time; first, turn x{0,...} into x{1,...}|empty */
+	if (m == 0) {
+		EMPTYARC(s2, atom->end);		/* the bypass */
+		assert(PREF(qprefer) != 0);
+		f = COMBINE(qprefer, atom->flags);
+		t = subre(v, '|', f, lp, atom->end);
+		NOERR();
+		t->left = atom;
+		t->right = subre(v, '|', PREF(f), s2, atom->end);
+		NOERR();
+		t->right->left = subre(v, '=', 0, s2, atom->end);
+		NOERR();
+		*atomp = t;
+		atomp = &t->left;
+		m = 1;
 	}
 
-	if (capture)			/* actually a catchall flag */
-		return branches;
-	freert(v, branches);
-	return NULL;
+	/* deal with the rest of the quantifier */
+	if (atomtype == BACKREF) {
+		/* special case:  backrefs have internal quantifiers */
+		EMPTYARC(s, atom->begin);	/* empty prefix */
+		/* just stuff everything into atom */
+		repeat(v, atom->begin, atom->end, m, n);
+		atom->min = (short)m;
+		atom->max = (short)n;
+		atom->flags |= COMBINE(qprefer, atom->flags);
+	} else if (m == 1 && n == 1) {
+		/* no/vacuous quantifier:  done */
+		EMPTYARC(s, atom->begin);	/* empty prefix */
+	} else {
+		/* turn x{m,n} into x{m-1,n-1}x, with capturing */
+		/*  parens in only second x */
+		dupnfa(v->nfa, atom->begin, atom->end, s, atom->begin);
+		assert(m >= 1 && m != INFINITY && n >= 1);
+		repeat(v, s, atom->begin, m-1, (n == INFINITY) ? n : n-1);
+		f = COMBINE(qprefer, atom->flags);
+		t = subre(v, '.', f, s, atom->end);	/* prefix and atom */
+		NOERR();
+		t->left = subre(v, '=', PREF(f), s, atom->begin);
+		NOERR();
+		t->right = atom;
+		*atomp = t;
+	}
+
+	/* and finally, look after that postponed recursion */
+	t = top->right;
+	if (!(SEE('|') || SEE(stopper) || SEE(EOS)))
+		t->right = parsebranch(v, stopper, type, atom->end, rp, 1);
+	else {
+		EMPTYARC(atom->end, rp);
+		t->right = subre(v, '=', 0, atom->end, rp);
+	}
+	assert(SEE('|') || SEE(stopper) || SEE(EOS));
+	t->flags |= COMBINE(t->flags, t->right->flags);
+	top->flags |= COMBINE(top->flags, t->flags);
+}
+
+/*
+ - nonword - generate arcs for non-word-character ahead or behind
+ ^ static VOID nonword(struct vars *, int, struct state *, struct state *);
+ */
+static VOID
+nonword(v, dir, lp, rp)
+struct vars *v;
+int dir;			/* AHEAD or BEHIND */
+struct state *lp;
+struct state *rp;
+{
+	int anchor = (dir == AHEAD) ? '$' : '^';
+
+	assert(dir == AHEAD || dir == BEHIND);
+	newarc(v->nfa, anchor, 1, lp, rp);
+	newarc(v->nfa, anchor, 0, lp, rp);
+	colorcomplement(v->nfa, v->cm, dir, v->wordchrs, lp, rp);
+	/* (no need for special attention to \n) */
+}
+
+/*
+ - word - generate arcs for word character ahead or behind
+ ^ static VOID word(struct vars *, int, struct state *, struct state *);
+ */
+static VOID
+word(v, dir, lp, rp)
+struct vars *v;
+int dir;			/* AHEAD or BEHIND */
+struct state *lp;
+struct state *rp;
+{
+	assert(dir == AHEAD || dir == BEHIND);
+	cloneouts(v->nfa, v->wordchrs, lp, rp, dir);
+	/* (no need for special attention to \n) */
 }
 
 /*
@@ -1163,7 +1187,7 @@ struct state *rp;
 	NOERR();
 	bracket(v, left, right);
 	if (v->cflags&REG_NLSTOP)
-		newarc(v->nfa, PLAIN, nlcolor(v), left, right);
+		newarc(v->nfa, PLAIN, v->nlcolor, left, right);
 	NOERR();
 
 	assert(lp->nouts == 0);		/* all outarcs will be ours */
@@ -1181,7 +1205,7 @@ struct state *rp;
 	/* but complementing gets messy in the presence of MCCEs... */
 	NOTE(REG_ULOCALE);
 	for (p = v->mcces->chrs, i = v->mcces->nchrs; i > 0; p++, i--) {
-		co = getcolor(v->cm, *p);
+		co = GETCOLOR(v->cm, *p);
 		a = findarc(lp, PLAIN, co);
 		ba = findarc(left, PLAIN, co);
 		if (ba == NULL) {
@@ -1391,7 +1415,7 @@ struct cvec *cv;
 			okcolors(v->nfa, v->cm);
 		} else {
 			a = findarc(v->mccepbegin, PLAIN,
-						getcolor(v->cm, leader));
+						GETCOLOR(v->cm, leader));
 			assert(a != NULL);
 			s = a->to;
 			assert(s != v->mccepend);
@@ -1438,6 +1462,7 @@ struct state *lp;
 struct state *rp;
 {
 	chr ch, from, to;
+	celt ce;
 	chr *p;
 	int i;
 	color co;
@@ -1478,16 +1503,17 @@ struct state *rp;
 	for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--) {
 		from = *p;
 		to = *(p+1);
-		for (ch = from; ch <= to; ch++)
-			if (!ISCELEADER(v, ch))
-				newarc(v->nfa, PLAIN, subcolor(v->cm, ch), lp,
-									rp);
-			else {
-				assert(singleton(v->cm, ch));
-				assert(leads != NULL);
-				if (!haschr(leads, ch))
-					addchr(leads, ch);
-			}
+		while (from <= to && (ce = nextleader(v, from, to)) != NOCELT) {
+			if (from < ce)
+				subrange(v, from, ce - 1, lp, rp);
+			assert(singleton(v->cm, ce));
+			assert(leads != NULL);
+			if (!haschr(leads, ce))
+				addchr(leads, ce);
+			from = ce + 1;
+		}
+		if (from <= to)
+			subrange(v, from, to, lp, rp);
 	}
 
 	if ((leads == NULL || leads->nchrs == 0) && cv->nmcces == 0)
@@ -1496,7 +1522,7 @@ struct state *rp;
 	/* deal with the MCCE leaders */
 	NOTE(REG_ULOCALE);
 	for (p = leads->chrs, i = leads->nchrs; i > 0; p++, i--) {
-		co = getcolor(v->cm, *p);
+		co = GETCOLOR(v->cm, *p);
 		a = findarc(lp, PLAIN, co);
 		if (a != NULL)
 			s = a->to;
@@ -1519,7 +1545,8 @@ struct state *rp;
 	for (i = 0; i < cv->nmcces; i++) {
 		p = cv->mcces[i];
 		assert(singleton(v->cm, *p));
-		co = getcolor(v->cm, *p++);
+		ch = *p++;
+		co = GETCOLOR(v->cm, ch);
 		a = findarc(lp, PLAIN, co);
 		if (a != NULL)
 			s = a->to;
@@ -1531,7 +1558,8 @@ struct state *rp;
 		}
 		assert(*p != 0);	/* at least two chars */
 		assert(singleton(v->cm, *p));
-		co = getcolor(v->cm, *p++);
+		ch = *p++;
+		co = GETCOLOR(v->cm, ch);
 		assert(*p == 0);	/* and only two, for now */
 		newarc(v->nfa, PLAIN, co, s, rp);
 		NOERR();
@@ -1539,20 +1567,30 @@ struct state *rp;
 }
 
 /*
- - nlcolor - assign newline a unique color, if it doesn't have one already
- * Restriction:  can't be called when there are subcolors open.  (Maybe
- * this should be enforced...)
- ^ static color nlcolor(struct vars *);
+ - nextleader - find next MCCE leader within range
+ ^ static celt nextleader(struct vars *, pchr, pchr);
  */
-static color
-nlcolor(v)
+static celt			/* NOCELT means none */
+nextleader(v, from, to)
 struct vars *v;
+pchr from;
+pchr to;
 {
-	if (v->nlcolor == COLORLESS) {
-		v->nlcolor = subcolor(v->cm, newline());
-		okcolors(v->nfa, v->cm);
+	int i;
+	chr *p;
+	chr ch;
+	celt it = NOCELT;
+
+	if (v->mcces == NULL)
+		return it;
+
+	for (i = v->mcces->nchrs, p = v->mcces->chrs; i > 0; i--, p++) {
+		ch = *p;
+		if (from <= ch && ch <= to)
+			if (it == NOCELT || ch < it)
+				it = ch;
 	}
-	return v->nlcolor;
+	return it;
 }
 
 /*
@@ -1579,6 +1617,7 @@ struct vars *v;
 	left = newstate(v->nfa);
 	right = newstate(v->nfa);
 	NOERR();
+	/* fine point:  implemented with [::], and lexer will set REG_ULOCALE */
 	lexword(v);
 	NEXT();
 	assert(v->savenow != NULL && SEE('['));
@@ -1590,273 +1629,169 @@ struct vars *v;
 }
 
 /*
- - subre - construct a subre struct
- ^ static struct subre subre(struct state *, struct state *, int, int,
- ^ 	struct rtree *);
+ - subre - allocate a subre
+ ^ static struct subre *subre(struct vars *, int, int, struct state *,
+ ^	struct state *);
  */
-static struct subre
-subre(begin, end, prefer, subno, tree)
+static struct subre *
+subre(v, op, flags, begin, end)
+struct vars *v;
+int op;
+int flags;
 struct state *begin;
 struct state *end;
-int prefer;
-int subno;
-struct rtree *tree;
-{
-	struct subre ret;
-
-	ret.begin = begin;
-	ret.end = end;
-	ret.prefer = prefer;
-	ret.subno = subno;
-	ret.min = ret.max = 1;
-	ret.tree = tree;
-	ZAPCNFA(ret.cnfa);
-	return ret;
-}
-
-/*
- - newrt - allocate subRE-tree node
- ^ static struct rtree *newrt(struct vars *);
- */
-static struct rtree *
-newrt(v)
-struct vars *v;
 {
-	struct rtree *rt;
+	struct subre *ret;
 
-	rt = v->treefree;
-	if (rt != NULL)
-		v->treefree = rt->next;
+	ret = v->treefree;
+	if (ret != NULL)
+		v->treefree = ret->left;
 	else {
-		rt = (struct rtree *)MALLOC(sizeof(struct rtree));
-		if (rt == NULL) {
+		ret = (struct subre *)MALLOC(sizeof(struct subre));
+		if (ret == NULL) {
 			ERR(REG_ESPACE);
 			return NULL;
 		}
-		rt->chain = v->treechain;
-		v->treechain = rt;
+		ret->chain = v->treechain;
+		v->treechain = ret;
 	}
 
-	rt->op = '?';		/* invalid */
-	rt->flags = 0;
-	rt->no = 0;
-	rt->left.begin = NULL;
-	rt->left.end = NULL;
-	rt->left.prefer = NONEYET;
-	rt->left.subno = 0;
-	rt->left.min = rt->left.max = 1;
-	rt->left.tree = NULL;
-	ZAPCNFA(rt->left.cnfa);
-	rt->right.begin = NULL;
-	rt->right.end = NULL;
-	rt->right.prefer = NONEYET;
-	rt->right.subno = 0;
-	rt->right.min = rt->right.max = 1;
-	rt->right.tree = NULL;
-	ZAPCNFA(rt->right.cnfa);
-	rt->next = NULL;
-
-	return rt;
+	assert(strchr("|.b(=", op) != NULL);
+
+	ret->op = op;
+	ret->flags = flags;
+	ret->retry = 0;
+	ret->subno = 0;
+	ret->min = ret->max = 1;
+	ret->left = NULL;
+	ret->right = NULL;
+	ret->begin = begin;
+	ret->end = end;
+	ZAPCNFA(ret->cnfa);
+
+	return ret;
 }
 
 /*
- - freert - free a subRE subtree
- ^ static VOID freert(struct vars *, struct rtree *);
+ - freesubre - free a subRE subtree
+ ^ static VOID freesubre(struct vars *, struct subre *);
  */
 static VOID
-freert(v, rt)
+freesubre(v, sr)
 struct vars *v;			/* might be NULL */
-struct rtree *rt;
+struct subre *sr;
 {
-	if (rt == NULL)
+	if (sr == NULL)
 		return;
 
-	if (rt->left.tree != NULL)
-		freert(v, rt->left.tree);
-	if (rt->right.tree != NULL)
-		freert(v, rt->right.tree);
-	if (rt->next != NULL)
-		freert(v, rt->next);
+	if (sr->left != NULL)
+		freesubre(v, sr->left);
+	if (sr->right != NULL)
+		freesubre(v, sr->right);
 
-	freertnode(v, rt);
+	freesrnode(v, sr);
 }
 
 /*
- - freertnode - free one node in a subRE subtree
- ^ static VOID freertnode(struct vars *, struct rtree *);
+ - freesrnode - free one node in a subRE subtree
+ ^ static VOID freesrnode(struct vars *, struct subre *);
  */
 static VOID
-freertnode(v, rt)
+freesrnode(v, sr)
 struct vars *v;			/* might be NULL */
-struct rtree *rt;
+struct subre *sr;
 {
-	if (rt == NULL)
+	if (sr == NULL)
 		return;
 
-	if (!NULLCNFA(rt->left.cnfa))
-		freecnfa(&rt->left.cnfa, 0);
-	if (!NULLCNFA(rt->right.cnfa))
-		freecnfa(&rt->right.cnfa, 0);
-	rt->flags = 0;
+	if (!NULLCNFA(sr->cnfa))
+		freecnfa(&sr->cnfa);
+	sr->flags = 0;
 
 	if (v != NULL) {
-		rt->next = v->treefree;
-		v->treefree = rt;
+		sr->left = v->treefree;
+		v->treefree = sr;
 	} else
-		FREE(rt);
+		FREE(sr);
 }
 
 /*
- - optrt - optimize a subRE subtree
- ^ static VOID optrt(struct vars *, struct rtree *);
+ - optst - optimize a subRE subtree
+ ^ static VOID optst(struct vars *, struct subre *);
  */
 static VOID
-optrt(v, rt)
+optst(v, t)
 struct vars *v;
-struct rtree *rt;
+struct subre *t;
 {
-	struct rtree *t;
-	int subno;
-
-	if (rt == NULL)
+	if (t == NULL)
 		return;
-	assert(rt->op != 'b');
-
-	/* pull up subtrees if possible */
-	if (rt->left.begin != NULL && rt->left.tree != NULL &&
-						rt->left.tree->op != 'b') {
-		t = rt->left.tree;
-		optrt(v, t);
-		if (t->right.begin == NULL && t->next == NULL &&
-				(rt->left.prefer == NONEYET ||
-					t->left.prefer == rt->left.prefer) &&
-				(rt->left.subno == 0 || t->left.subno == 0)) {
-			subno = rt->left.subno;
-			rt->left = t->left;
-			assert(NULLCNFA(t->left.cnfa));
-			freertnode(v, t);
-			if (subno != 0) {
-				assert(rt->left.subno == 0 && subno > 0);
-				rt->left.subno = subno;
-			}
-		}
-	}
-	if (rt->right.begin != NULL && rt->right.tree != NULL &&
-						rt->right.tree->op != 'b') {
-		t = rt->right.tree;
-		optrt(v, t);
-		if (t->right.begin == NULL && t->next == NULL &&
-				(rt->right.prefer == NONEYET ||
-					t->right.prefer == rt->right.prefer) &&
-				(rt->right.subno == 0 || t->right.subno == 0)) {
-			subno = rt->right.subno;
-			rt->right = t->left;
-			assert(NULLCNFA(t->right.cnfa));
-			freertnode(v, t);
-			if (subno != 0) {
-				assert(rt->right.subno == 0 && subno > 0);
-				rt->right.subno = subno;
-			}
-		}
-	}
-
-	/* simplify empties */
-	if (rt->left.begin != NULL && isempty(rt->left.begin, rt->left.end))
-		rt->left.end = rt->left.begin;
-	if (rt->right.begin != NULL && isempty(rt->right.begin, rt->right.end))
-		rt->right.end = rt->right.begin;
-
-	/* if left subtree vacuous and right non-empty, move right over */
-	if (rt->left.begin != NULL && rt->left.begin == rt->left.end &&
-				rt->left.subno == 0 && rt->left.tree == NULL &&
-				rt->right.begin != NULL) {
-		rt->left = rt->right;
-		rt->right.begin = NULL;
-		rt->right.tree = NULL;
-	}
-
-	/* if right subtree vacuous, clear it out */
-	if (rt->right.begin != NULL && rt->right.begin == rt->right.end &&
-			rt->right.subno == 0 && rt->right.tree == NULL) {
-		rt->right.begin = NULL;
-		rt->right.tree = NULL;
-	}
 
 	/* preference cleanup and analysis */
-	if (rt->left.prefer == NONEYET)
-		rt->left.prefer = LONGER;
-	if (rt->left.prefer == SHORTER)
+	if (t->flags&SHORTER)
 		v->usedshorter = 1;
-	if (rt->right.begin != NULL) {
-		if (rt->right.prefer == NONEYET)
-			rt->right.prefer = LONGER;
-		if (rt->right.prefer == SHORTER)
-			v->usedshorter = 1;
-	}
 
-	/* recurse through alternatives */
-	if (rt->next != NULL)
-		optrt(v, rt->next);
+	/* recurse through children */
+	if (t->left != NULL)
+		optst(v, t->left);
+	if (t->right != NULL)
+		optst(v, t->right);
 }
 
 /*
- - numrt - number tree nodes
- ^ static int numrt(struct rtree *, int);
+ - numst - number tree nodes (assigning retry indexes)
+ ^ static int numst(struct subre *, int);
  */
 static int			/* next number */
-numrt(rt, start)
-struct rtree *rt;
+numst(t, start)
+struct subre *t;
 int start;			/* starting point for subtree numbers */
 {
 	int i;
 
-	assert(rt != NULL);
+	assert(t != NULL);
 
 	i = start;
-	rt->no = (short)i++;
-	if (rt->left.tree != NULL)
-		i = numrt(rt->left.tree, i);
-	if (rt->right.tree != NULL)
-		i = numrt(rt->right.tree, i);
-	if (rt->next != NULL)
-		i = numrt(rt->next, i);
+	t->retry = (short)i++;
+	if (t->left != NULL)
+		i = numst(t->left, i);
+	if (t->right != NULL)
+		i = numst(t->right, i);
 	return i;
 }
 
 /*
- - markrt - mark tree nodes as INUSE
- ^ static VOID markrt(struct rtree *);
+ - markst - mark tree nodes as INUSE
+ ^ static VOID markst(struct subre *);
  */
 static VOID
-markrt(rt)
-struct rtree *rt;
+markst(t)
+struct subre *t;
 {
-	assert(rt != NULL);
-
-	rt->flags |= INUSE;
-	if (rt->left.tree != NULL)
-		markrt(rt->left.tree);
-	if (rt->right.tree != NULL)
-		markrt(rt->right.tree);
-	if (rt->next != NULL)
-		markrt(rt->next);
+	assert(t != NULL);
+
+	t->flags |= INUSE;
+	if (t->left != NULL)
+		markst(t->left);
+	if (t->right != NULL)
+		markst(t->right);
 }
 
 /*
- - cleanrt - free any tree nodes not marked INUSE
- ^ static VOID cleanrt(struct vars *);
+ - cleanst - free any tree nodes not marked INUSE
+ ^ static VOID cleanst(struct vars *);
  */
 static VOID
-cleanrt(v)
+cleanst(v)
 struct vars *v;
 {
-	struct rtree *rt;
-	struct rtree *next;
+	struct subre *t;
+	struct subre *next;
 
-	for (rt = v->treechain; rt != NULL; rt = next) {
-		next = rt->next;
-		if (!(rt->flags&INUSE))
-			FREE(rt);
+	for (t = v->treechain; t != NULL; t = next) {
+		next = t->chain;
+		if (!(t->flags&INUSE))
+			FREE(t);
 	}
 	v->treechain = NULL;
 	v->treefree = NULL;		/* just on general principles */
@@ -1864,56 +1799,51 @@ struct vars *v;
 
 /*
  - nfatree - turn a subRE subtree into a tree of compacted NFAs
- ^ static VOID nfatree(struct vars *, struct rtree *, FILE *);
+ ^ static int nfatree(struct vars *, struct subre *, FILE *);
  */
-static VOID
-nfatree(v, rt, f)
+static int			/* optimize results from top node */
+nfatree(v, t, f)
 struct vars *v;
-struct rtree *rt;
+struct subre *t;
 FILE *f;			/* for debug output */
 {
-	if (rt == NULL)
-		return;
+	assert(t != NULL && t->begin != NULL);
 
-	if (rt->left.begin != NULL)
-		nfanode(v, &rt->left, f);
-	if (rt->left.tree != NULL)
-		nfatree(v, rt->left.tree, f);
+	if (t->left != NULL)
+		(DISCARD)nfatree(v, t->left, f);
+	if (t->right != NULL)
+		(DISCARD)nfatree(v, t->right, f);
 
-	if (rt->right.begin != NULL)
-		nfanode(v, &rt->right, f);
-	if (rt->right.tree != NULL)
-		nfatree(v, rt->right.tree, f);
-
-	if (rt->next != NULL)
-		nfatree(v, rt->next, f);
+	return nfanode(v, t, f);
 }
 
 /*
  - nfanode - do one NFA for nfatree
- ^ static VOID nfanode(struct vars *, struct subre *, FILE *);
+ ^ static int nfanode(struct vars *, struct subre *, FILE *);
  */
-static VOID
-nfanode(v, sub, f)
+static int			/* optimize results */
+nfanode(v, t, f)
 struct vars *v;
-struct subre *sub;
+struct subre *t;
 FILE *f;			/* for debug output */
 {
 	struct nfa *nfa;
+	int ret = 0;
 
-	if (sub->begin == NULL)
-		return;
+	assert(t->begin != NULL);
 
 	nfa = newnfa(v, v->cm, v->nfa);
-	NOERR();
-	dupnfa(nfa, sub->begin, sub->end, nfa->init, nfa->final);
+	NOERRZ();
+	dupnfa(nfa, t->begin, t->end, nfa->init, nfa->final);
 	if (!ISERR()) {
 		specialcolors(nfa);
-		(DISCARD) optimize(nfa, f);
+		ret = optimize(nfa, f);
 	}
 	if (!ISERR())
-		compact(nfa, &sub->cnfa);
+		compact(nfa, &t->cnfa);
+
 	freenfa(nfa);
+	return ret;
 }
 
 /*
@@ -1964,9 +1894,9 @@ int n;
 	int i;
 
 	assert(n > 0);
-	for (sub = subs + 1, i = n - 1; i > 0; sub++, i--)
+	for (sub = subs + 1, i = n - 1; i > 0; sub++, i--)	/* no 0th */
 		if (!NULLCNFA(sub->cnfa))
-			freecnfa(&sub->cnfa, 0);
+			freecnfa(&sub->cnfa);
 	FREE(subs);
 }
 
@@ -1988,14 +1918,13 @@ regex_t *re;
 	re->re_guts = NULL;
 	re->re_fns = NULL;
 	g->magic = 0;
-	if (!NULLCNFA(g->cnfa))
-		freecnfa(&g->cnfa, 0);
-	if (g->cm != NULL)
-		freecm(g->cm);
+	freecm(&g->cmap);
 	if (g->tree != NULL)
-		freert((struct vars *)NULL, g->tree);
+		freesubre((struct vars *)NULL, g->tree);
 	if (g->lacons != NULL)
 		freelacons(g->lacons, g->nlacons);
+	if (!NULLCNFA(g->search))
+		freecnfa(&g->search);
 	FREE(g);
 }
 
@@ -2028,41 +1957,44 @@ FILE *f;
 		re->re_nsub, re->re_info, re->re_csize, g->ntree,
 		g->usedshorter);
 
-	dumpcolors(g->cm, f);
-	dumpcnfa(&g->cnfa, f);
+	dumpcolors(&g->cmap, f);
+	if (!NULLCNFA(g->search)) {
+		printf("search:\n");
+		dumpcnfa(&g->search, f);
+	}
 	for (i = 1; i < g->nlacons; i++) {
 		fprintf(f, "la%d (%s):\n", i,
 				(g->lacons[i].subno) ? "positive" : "negative");
 		dumpcnfa(&g->lacons[i].cnfa, f);
 	}
-	dumprt(g->tree, f, 0);
+	dumpst(g->tree, f, 0);
 #endif
 }
 
 /*
- - dumprt - dump a subRE tree
- ^ static VOID dumprt(struct rtree *, FILE *, int);
+ - dumpst - dump a subRE tree
+ ^ static VOID dumpst(struct subre *, FILE *, int);
  */
 static VOID
-dumprt(rt, f, nfapresent)
-struct rtree *rt;
+dumpst(t, f, nfapresent)
+struct subre *t;
 FILE *f;
 int nfapresent;			/* is the original NFA still around? */
 {
-	if (rt == NULL)
+	if (t == NULL)
 		fprintf(f, "null tree\n");
 	else
-		rtdump(rt, f, nfapresent, 0);
+		stdump(t, f, nfapresent, 0);
 	fflush(f);
 }
 
 /*
- - rtdump - recursive guts of dumprt
- ^ static VOID rtdump(struct rtree *, FILE *, int, int);
+ - stdump - recursive guts of dumpst
+ ^ static VOID stdump(struct subre *, FILE *, int, int);
  */
 static VOID
-rtdump(rt, f, nfapresent, level)
-struct rtree *rt;
+stdump(t, f, nfapresent, level)
+struct subre *t;
 FILE *f;
 int nfapresent;			/* is the original NFA still around? */
 int level;
@@ -2072,102 +2004,41 @@ int level;
 
 	for (i = 0; i < level; i++)
 		fprintf(f, RTSEP);
-	fprintf(f, "%c (n%d) {\n", rt->op, rt->no);
-	if (rt->left.begin != NULL) {
-		for (i = 0; i < level+1; i++)
-			fprintf(f, RTSEP);
-		fprintf(f, "L");
-		fprintf(f, "%s", (rt->left.prefer == NONEYET) ? "-" :
-				((rt->left.prefer == LONGER) ? ">" : "<"));
-		if (nfapresent)
-			fprintf(f, "%ld-%ld", (long)rt->left.begin->no,
-							(long)rt->left.end->no);
-		if (rt->left.subno > 0)
-			fprintf(f, " (%d)", rt->left.subno);
-		else if (rt->left.subno < 0) {
-			fprintf(f, " \\%d", -rt->left.subno);
-			if (rt->left.min != 1 || rt->left.max != 1) {
-				fprintf(f, "{%d-", (int)rt->left.min);
-				if (rt->left.max != INFINITY)
-					fprintf(f, "%d", (int)rt->left.max);
-				fprintf(f, "}");
-			}
-			if (rt->left.tree != NULL)
-				fprintf(f, "(nonNULL tree!!)");
-		}
-		if (rt->left.tree != NULL || !NULLCNFA(rt->left.cnfa))
-			fprintf(f, ":");
-		fprintf(f, "\n");
-		if (!NULLCNFA(rt->left.cnfa))
-			dumpcnfa(&rt->left.cnfa, f);
-		if (rt->left.tree != NULL)
-			rtdump(rt->left.tree, f, nfapresent, level+1);
-	} else if (rt->op == 'b') {
-		for (i = 0; i < level+1; i++)
-			fprintf(f, RTSEP);
+	fprintf(f, "%c (", t->op);
+	if (t->flags&LONGER)
 		fprintf(f, "L");
-		fprintf(f, "%s", (rt->left.prefer == NONEYET) ? "-" :
-				((rt->left.prefer == LONGER) ? ">" : "<"));
-		assert(rt->left.subno < 0);
-		fprintf(f, " \\%d", -rt->left.subno);
-		if (rt->left.min != 1 || rt->left.max != 1) {
-			fprintf(f, "{%d-", (int)rt->left.min);
-			if (rt->left.max != INFINITY)
-				fprintf(f, "%d", (int)rt->left.max);
-			fprintf(f, "}");
-		}
-		if (rt->left.tree != NULL)
-			fprintf(f, "(nonNULL tree!!)");
-		fprintf(f, "\n");
-	}
-
-	if (rt->right.begin != NULL) {
-		if (rt->op != ',')
-			fprintf(f, "op %c has non-NULL right tree\n", rt->op);
-		for (i = 0; i < level+1; i++)
-			fprintf(f, RTSEP);
-		fprintf(f, "R");
-		fprintf(f, "%s", (rt->right.prefer == NONEYET) ? "-" :
-				((rt->right.prefer == LONGER) ? ">" : "<"));
-		if (nfapresent)
-			fprintf(f, "%ld-%ld", (long)rt->right.begin->no,
-						(long)rt->right.end->no);
-		if (rt->right.subno > 0)
-			fprintf(f, " (%d)", rt->right.subno);
-		else if (rt->right.subno < 0) {
-			fprintf(f, " \\%d", -rt->right.subno);
-			if (rt->right.min != 1 || rt->right.max != 1) {
-				fprintf(f, "{%d-", (int)rt->right.min);
-				if (rt->right.max != INFINITY)
-					fprintf(f, "%d", (int)rt->right.max);
-				fprintf(f, "}");
-			}
-			if (rt->right.tree != NULL)
-				fprintf(f, "(nonNULL tree!!)");
-		}
-		if (rt->right.tree != NULL || !NULLCNFA(rt->right.cnfa))
-			fprintf(f, ":");
-		fprintf(f, "\n");
-		if (!NULLCNFA(rt->right.cnfa))
-			dumpcnfa(&rt->right.cnfa, f);
-		if (rt->right.tree != NULL)
-			rtdump(rt->right.tree, f, nfapresent, level+1);
-	}
-	for (i = 0; i < level; i++)
-		fprintf(f, RTSEP);
-	fprintf(f, "}\n");
-
-	if (rt->next != NULL) {
-		if (rt->op != '|')
-			fprintf(f, "op %c has non-NULL next\n", rt->op);
-		if (rt->next->op != rt->op)
-			fprintf(f, "next op %c, expecting %c\n", rt->next->op,
-									rt->op);
-		rtdump(rt->next, f, nfapresent, level);
+	if (t->flags&SHORTER)
+		fprintf(f, "S");
+	if (t->flags&MIXED)
+		fprintf(f, "M");
+	if (t->flags&CAP)
+		fprintf(f, "c");
+	if (t->flags&BACKR)
+		fprintf(f, "b");
+	if (!(t->flags&INUSE))
+		fprintf(f, "!u");
+	fprintf(f, ") r%d", t->retry);
+	if (t->subno != 0)
+		fprintf(f, " #%d", t->subno);
+	if (t->min != 1 || t->max != 1) {
+		fprintf(f, "{%d,", t->min);
+		if (t->max != INFINITY)
+			fprintf(f, "%d", t->max);
+		fprintf(f, "}");
 	}
+	if (nfapresent)
+		fprintf(f, " %ld-%ld", (long)t->begin->no, (long)t->end->no);
+	if (!NULLCNFA(t->cnfa))
+		fprintf(f, ":");
+	fprintf(f, "\n");
+	if (t->left != NULL)
+		stdump(t->left, f, nfapresent, level+1);
+	if (!NULLCNFA(t->cnfa))
+		dumpcnfa(&t->cnfa, f);
+	if (t->right != NULL)
+		stdump(t->right, f, nfapresent, level+1);
 }
 
-#define	COMPILE	1
 #include "regc_lex.c"
 #include "regc_color.c"
 #include "regc_nfa.c"
diff --git a/generic/regcustom.h b/generic/regcustom.h
index 0fda25f..eeaafa0 100644
--- a/generic/regcustom.h
+++ b/generic/regcustom.h
@@ -37,17 +37,20 @@
 #endif
 /* interface types */
 #define	__REG_WIDE_T	Tcl_UniChar
-#define	__REG_WIDE_COMPILE	re_ucomp
-#define	__REG_WIDE_EXEC		re_uexec
 #define	__REG_REGOFF_T	long	/* not really right, but good enough... */
 #define	__REG_VOID_T	VOID
 #define	__REG_CONST	CONST
+/* names and declarations */
+#define	__REG_WIDE_COMPILE	re_ucomp
+#define	__REG_WIDE_EXEC		re_uexec
 #ifndef __REG_NOFRONT
 #define	__REG_NOFRONT		/* don't want regcomp() and regexec() */
 #endif
 #ifndef __REG_NOCHAR
 #define	__REG_NOCHAR		/* or the char versions */
 #endif
+#define	regfree		re_ufree
+#define	regerror	re_uerror
 /* --- end --- */
 
 
@@ -73,18 +76,11 @@ typedef int celt;		/* type to hold chr, MCCE number, or NOCELT */
 /* name the external functions */
 #define	compile		re_ucomp
 #define	exec		re_uexec
+
+/* enable/disable debugging code (by whether REG_DEBUG is defined or not) */
 #ifdef notdef
-#define	regfree		re_ufree
-#define	regerror	re_uerror
+#define	REG_DEBUG	/* */
 #endif
 
-/*
- * Implement a mistake in the original POSIX.2:  in EREs, and only in EREs
- * (AREs do not support this botch), an unbalanced right parenthesis is an
- * ordinary character rather than an error.  This was unintentional, and
- * will be fixed someday.
- */
-#define	POSIX_MISTAKE	/* sigh */
-
 /* and pick up the standard header */
 #include "regex.h"
diff --git a/generic/rege_dfa.c b/generic/rege_dfa.c
new file mode 100644
index 0000000..6e4b941
--- /dev/null
+++ b/generic/rege_dfa.c
@@ -0,0 +1,583 @@
+/*
+ * DFA routines
+ * This file is #included by regexec.c.
+ */
+
+/*
+ - longest - longest-preferred matching engine
+ ^ static chr *longest(struct vars *, struct dfa *, chr *, chr *);
+ */
+static chr *			/* endpoint, or NULL */
+longest(v, d, start, stop)
+struct vars *v;			/* used only for debug and exec flags */
+struct dfa *d;
+chr *start;			/* where the match should start */
+chr *stop;			/* match must end at or before here */
+{
+	chr *cp;
+	chr *realstop = (stop == v->stop) ? stop : stop + 1;
+	color co;
+	struct sset *css;
+	struct sset *ss;
+	chr *post;
+	int i;
+	struct colormap *cm = d->cm;
+
+	/* initialize */
+	css = initialize(v, d, start);
+	cp = start;
+
+	/* startup */
+	FDEBUG(("+++ startup +++\n"));
+	if (cp == v->start) {
+		co = d->cnfa->bos[(v->eflags&REG_NOTBOL) ? 0 : 1];
+		FDEBUG(("color %ld\n", (long)co));
+	} else {
+		co = GETCOLOR(cm, *(cp - 1));
+		FDEBUG(("char %c, color %ld\n", (char)*(cp-1), (long)co));
+	}
+	css = miss(v, d, css, co, cp, start);
+	if (css == NULL)
+		return NULL;
+	css->lastseen = cp;
+
+	/* main loop */
+	if (v->eflags&REG_FTRACE)
+		while (cp < realstop) {
+			FDEBUG(("+++ at c%d +++\n", css - d->ssets));
+			co = GETCOLOR(cm, *cp);
+			FDEBUG(("char %c, color %ld\n", (char)*cp, (long)co));
+			ss = css->outs[co];
+			if (ss == NULL) {
+				ss = miss(v, d, css, co, cp+1, start);
+				if (ss == NULL)
+					break;	/* NOTE BREAK OUT */
+			}
+			cp++;
+			ss->lastseen = cp;
+			css = ss;
+		}
+	else
+		while (cp < realstop) {
+			co = GETCOLOR(cm, *cp);
+			ss = css->outs[co];
+			if (ss == NULL) {
+				ss = miss(v, d, css, co, cp+1, start);
+				if (ss == NULL)
+					break;	/* NOTE BREAK OUT */
+			}
+			cp++;
+			ss->lastseen = cp;
+			css = ss;
+		}
+
+	/* shutdown */
+	FDEBUG(("+++ shutdown at c%d +++\n", css - d->ssets));
+	if (cp == v->stop && stop == v->stop) {
+		co = d->cnfa->eos[(v->eflags&REG_NOTEOL) ? 0 : 1];
+		FDEBUG(("color %ld\n", (long)co));
+		ss = miss(v, d, css, co, cp, start);
+		/* special case:  match ended at eol? */
+		if (ss != NULL && (ss->flags&POSTSTATE))
+			return cp;
+		else if (ss != NULL)
+			ss->lastseen = cp;	/* to be tidy */
+	}
+
+	/* find last match, if any */
+	post = d->lastpost;
+	for (ss = d->ssets, i = 0; i < d->nssused; ss++, i++)
+		if ((ss->flags&POSTSTATE) && post != ss->lastseen &&
+					(post == NULL || post < ss->lastseen))
+			post = ss->lastseen;
+	if (post != NULL)		/* found one */
+		return post - 1;
+
+	return NULL;
+}
+
+/*
+ - shortest - shortest-preferred matching engine
+ ^ static chr *shortest(struct vars *, struct dfa *, chr *, chr *, chr *,
+ ^ 	chr **);
+ */
+static chr *			/* endpoint, or NULL */
+shortest(v, d, start, min, max, coldp)
+struct vars *v;			/* used only for debug and exec flags */
+struct dfa *d;
+chr *start;			/* where the match should start */
+chr *min;			/* match must end at or after here */
+chr *max;			/* match must end at or before here */
+chr **coldp;			/* store coldstart pointer here, if nonNULL */
+{
+	chr *cp;
+	chr *realmin = (min == v->stop) ? min : min + 1;
+	chr *realmax = (max == v->stop) ? max : max + 1;
+	color co;
+	struct sset *css;
+	struct sset *firstss;
+	struct sset *ss;
+	struct colormap *cm = d->cm;
+
+	/* initialize */
+	css = initialize(v, d, start);
+	firstss = css;
+	cp = start;
+
+	/* startup */
+	FDEBUG(("--- startup ---\n"));
+	if (cp == v->start) {
+		co = d->cnfa->bos[(v->eflags&REG_NOTBOL) ? 0 : 1];
+		FDEBUG(("color %ld\n", (long)co));
+	} else {
+		co = GETCOLOR(cm, *(cp - 1));
+		FDEBUG(("char %c, color %ld\n", (char)*(cp-1), (long)co));
+	}
+	css = miss(v, d, css, co, cp, start);
+	if (css == NULL)
+		return NULL;
+	css->lastseen = cp;
+	ss = css;
+
+	/* main loop */
+	if (v->eflags&REG_FTRACE)
+		while (cp < realmax) {
+			FDEBUG(("--- at c%d ---\n", css - d->ssets));
+			co = GETCOLOR(cm, *cp);
+			FDEBUG(("char %c, color %ld\n", (char)*cp, (long)co));
+			ss = css->outs[co];
+			if (ss == NULL) {
+				ss = miss(v, d, css, co, cp+1, start);
+				if (ss == NULL)
+					break;	/* NOTE BREAK OUT */
+			}
+			cp++;
+			ss->lastseen = cp;
+			css = ss;
+			if ((ss->flags&POSTSTATE) && cp >= realmin)
+				break;		/* NOTE BREAK OUT */
+		}
+	else
+		while (cp < realmax) {
+			co = GETCOLOR(cm, *cp);
+			ss = css->outs[co];
+			if (ss == NULL) {
+				ss = miss(v, d, css, co, cp+1, start);
+				if (ss == NULL)
+					break;	/* NOTE BREAK OUT */
+			}
+			cp++;
+			ss->lastseen = cp;
+			css = ss;
+			if ((ss->flags&POSTSTATE) && cp >= realmin)
+				break;		/* NOTE BREAK OUT */
+		}
+
+	if (ss == NULL)
+		return NULL;
+	if (ss->flags&POSTSTATE) {
+		assert(firstss->flags&STARTER);
+		assert(firstss->lastseen != NULL);
+		if (coldp != NULL)
+			*coldp = firstss->lastseen;
+		assert(cp >= realmin);
+		return cp - 1;
+	}
+
+	/* shutdown */
+	FDEBUG(("--- shutdown at c%d ---\n", css - d->ssets));
+	if (cp == v->stop && max == v->stop) {
+		co = d->cnfa->eos[(v->eflags&REG_NOTEOL) ? 0 : 1];
+		FDEBUG(("color %ld\n", (long)co));
+		ss = miss(v, d, css, co, cp, start);
+		/* special case:  match ended at eol? */
+		if (ss != NULL && (ss->flags&POSTSTATE)) {
+			assert(firstss->flags&STARTER);
+			assert(firstss->lastseen != NULL);
+			if (coldp != NULL)
+				*coldp = firstss->lastseen;
+			return cp;
+		}
+	}
+
+	return NULL;
+}
+
+/*
+ - newdfa - set up a fresh DFA
+ ^ static struct dfa *newdfa(struct vars *, struct cnfa *,
+ ^ 	struct colormap *);
+ */
+static struct dfa *
+newdfa(v, cnfa, cm)
+struct vars *v;
+struct cnfa *cnfa;
+struct colormap *cm;
+{
+	struct dfa *d = (struct dfa *)MALLOC(sizeof(struct dfa));
+	int wordsper = (cnfa->nstates + UBITS - 1) / UBITS;
+	struct sset *ss;
+	int i;
+
+	assert(cnfa != NULL && cnfa->nstates != 0);
+	if (d == NULL) {
+		ERR(REG_ESPACE);
+		return NULL;
+	}
+
+	d->ssets = (struct sset *)MALLOC(CACHE * sizeof(struct sset));
+	d->statesarea = (unsigned *)MALLOC((CACHE+WORK) * wordsper *
+							sizeof(unsigned));
+	d->work = &d->statesarea[CACHE * wordsper];
+	d->outsarea = (struct sset **)MALLOC(CACHE * cnfa->ncolors *
+							sizeof(struct sset *));
+	d->incarea = (struct arcp *)MALLOC(CACHE * cnfa->ncolors *
+							sizeof(struct arcp));
+	if (d->ssets == NULL || d->statesarea == NULL || d->outsarea == NULL ||
+							d->incarea == NULL) {
+		freedfa(d);
+		ERR(REG_ESPACE);
+		return NULL;
+	}
+
+	d->nssets = (v->eflags&REG_SMALL) ? 7 : CACHE;
+	d->nssused = 0;
+	d->nstates = cnfa->nstates;
+	d->ncolors = cnfa->ncolors;
+	d->wordsper = wordsper;
+	d->cnfa = cnfa;
+	d->cm = cm;
+	d->lastpost = NULL;
+	d->search = d->ssets;
+
+	for (ss = d->ssets, i = 0; i < d->nssets; ss++, i++) {
+		/* initialization of most fields is done as needed */
+		ss->states = &d->statesarea[i * d->wordsper];
+		ss->outs = &d->outsarea[i * d->ncolors];
+		ss->inchain = &d->incarea[i * d->ncolors];
+	}
+
+	return d;
+}
+
+/*
+ - freedfa - free a DFA
+ ^ static VOID freedfa(struct dfa *);
+ */
+static VOID
+freedfa(d)
+struct dfa *d;
+{
+	if (d->ssets != NULL)
+		FREE(d->ssets);
+	if (d->statesarea != NULL)
+		FREE(d->statesarea);
+	if (d->outsarea != NULL)
+		FREE(d->outsarea);
+	if (d->incarea != NULL)
+		FREE(d->incarea);
+	FREE(d);
+}
+
+/*
+ - hash - construct a hash code for a bitvector
+ * There are probably better ways, but they're more expensive.
+ ^ static unsigned hash(unsigned *, int);
+ */
+static unsigned
+hash(uv, n)
+unsigned *uv;
+int n;
+{
+	int i;
+	unsigned h;
+
+	h = 0;
+	for (i = 0; i < n; i++)
+		h ^= uv[i];
+	return h;
+}
+
+/*
+ - initialize - hand-craft a cache entry for startup, otherwise get ready
+ ^ static struct sset *initialize(struct vars *, struct dfa *, chr *);
+ */
+static struct sset *
+initialize(v, d, start)
+struct vars *v;			/* used only for debug flags */
+struct dfa *d;
+chr *start;
+{
+	struct sset *ss;
+	int i;
+
+	/* is previous one still there? */
+	if (d->nssused > 0 && (d->ssets[0].flags&STARTER))
+		ss = &d->ssets[0];
+	else {				/* no, must (re)build it */
+		ss = getvacant(v, d, start, start);
+		for (i = 0; i < d->wordsper; i++)
+			ss->states[i] = 0;
+		BSET(ss->states, d->cnfa->pre);
+		ss->hash = hash(ss->states, d->wordsper);
+		assert(d->cnfa->pre != d->cnfa->post);
+		ss->flags = STARTER|LOCKED;
+		/* lastseen dealt with below */
+	}
+
+	for (i = 0; i < d->nssused; i++)
+		d->ssets[i].lastseen = NULL;
+	ss->lastseen = start;		/* maybe untrue, but harmless */
+	d->lastpost = NULL;
+	return ss;
+}
+
+/*
+ - miss - handle a cache miss
+ ^ static struct sset *miss(struct vars *, struct dfa *, struct sset *,
+ ^ 	pcolor, chr *, chr *);
+ */
+static struct sset *		/* NULL if goes to empty set */
+miss(v, d, css, co, cp, start)
+struct vars *v;			/* used only for debug flags */
+struct dfa *d;
+struct sset *css;
+pcolor co;
+chr *cp;			/* next chr */
+chr *start;			/* where the attempt got started */
+{
+	struct cnfa *cnfa = d->cnfa;
+	int i;
+	unsigned h;
+	struct carc *ca;
+	struct sset *p;
+	int ispost;
+	int gotstate;
+	int dolacons;
+	int didlacons;
+
+	/* for convenience, we can be called even if it might not be a miss */
+	if (css->outs[co] != NULL) {
+		FDEBUG(("hit\n"));
+		return css->outs[co];
+	}
+	FDEBUG(("miss\n"));
+
+	/* first, what set of states would we end up in? */
+	for (i = 0; i < d->wordsper; i++)
+		d->work[i] = 0;
+	ispost = 0;
+	gotstate = 0;
+	for (i = 0; i < d->nstates; i++)
+		if (ISBSET(css->states, i))
+			for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++)
+				if (ca->co == co) {
+					BSET(d->work, ca->to);
+					gotstate = 1;
+					if (ca->to == cnfa->post)
+						ispost = 1;
+					FDEBUG(("%d -> %d\n", i, ca->to));
+				}
+	dolacons = (gotstate) ? (cnfa->flags&HASLACONS) : 0;
+	didlacons = 0;
+	while (dolacons) {		/* transitive closure */
+		dolacons = 0;
+		for (i = 0; i < d->nstates; i++)
+			if (ISBSET(d->work, i))
+				for (ca = cnfa->states[i]; ca->co != COLORLESS;
+									ca++)
+					if (ca->co > cnfa->ncolors &&
+						!ISBSET(d->work, ca->to) &&
+							lacon(v, cnfa, cp,
+								ca->co)) {
+						BSET(d->work, ca->to);
+						dolacons = 1;
+						didlacons = 1;
+						if (ca->to == cnfa->post)
+							ispost = 1;
+						FDEBUG(("%d :> %d\n",i,ca->to));
+					}
+	}
+	if (!gotstate)
+		return NULL;
+	h = hash(d->work, d->wordsper);
+
+	/* next, is that in the cache? */
+	for (p = d->ssets, i = d->nssused; i > 0; p++, i--)
+		if (p->hash == h && memcmp(VS(d->work), VS(p->states),
+					d->wordsper*sizeof(unsigned)) == 0) {
+			FDEBUG(("cached c%d\n", p - d->ssets));
+			break;			/* NOTE BREAK OUT */
+		}
+	if (i == 0) {		/* nope, need a new cache entry */
+		p = getvacant(v, d, cp, start);
+		assert(p != css);
+		for (i = 0; i < d->wordsper; i++)
+			p->states[i] = d->work[i];
+		p->hash = h;
+		p->flags = (ispost) ? POSTSTATE : 0;
+		/* lastseen to be dealt with by caller */
+	}
+
+	if (!didlacons) {		/* lookahead conds. always cache miss */
+		css->outs[co] = p;
+		css->inchain[co] = p->ins;
+		p->ins.ss = css;
+		p->ins.co = (color)co;
+	}
+	return p;
+}
+
+/*
+ - lacon - lookahead-constraint checker for miss()
+ ^ static int lacon(struct vars *, struct cnfa *, chr *, pcolor);
+ */
+static int			/* predicate:  constraint satisfied? */
+lacon(v, pcnfa, cp, co)
+struct vars *v;
+struct cnfa *pcnfa;		/* parent cnfa */
+chr *cp;
+pcolor co;			/* "color" of the lookahead constraint */
+{
+	int n;
+	struct subre *sub;
+	struct dfa *d;
+	chr *end;
+
+	n = co - pcnfa->ncolors;
+	assert(n < v->g->nlacons && v->g->lacons != NULL);
+	FDEBUG(("=== testing lacon %d\n", n));
+	sub = &v->g->lacons[n];
+	d = newdfa(v, &sub->cnfa, &v->g->cmap);
+	if (d == NULL) {
+		ERR(REG_ESPACE);
+		return 0;
+	}
+	end = longest(v, d, cp, v->stop);
+	freedfa(d);
+	FDEBUG(("=== lacon %d match %d\n", n, (end != NULL)));
+	return (sub->subno) ? (end != NULL) : (end == NULL);
+}
+
+/*
+ - getvacant - get a vacant state set
+ * This routine clears out the inarcs and outarcs, but does not otherwise
+ * clear the innards of the state set -- that's up to the caller.
+ ^ static struct sset *getvacant(struct vars *, struct dfa *, chr *, chr *);
+ */
+static struct sset *
+getvacant(v, d, cp, start)
+struct vars *v;			/* used only for debug flags */
+struct dfa *d;
+chr *cp;
+chr *start;
+{
+	int i;
+	struct sset *ss;
+	struct sset *p;
+	struct arcp ap;
+	struct arcp lastap;
+	color co;
+
+	ss = pickss(v, d, cp, start);
+	assert(!(ss->flags&LOCKED));
+
+	/* clear out its inarcs, including self-referential ones */
+	ap = ss->ins;
+	while ((p = ap.ss) != NULL) {
+		co = ap.co;
+		FDEBUG(("zapping c%d's %ld outarc\n", p - d->ssets, (long)co));
+		p->outs[co] = NULL;
+		ap = p->inchain[co];
+		p->inchain[co].ss = NULL;	/* paranoia */
+	}
+	ss->ins.ss = NULL;
+
+	/* take it off the inarc chains of the ssets reached by its outarcs */
+	for (i = 0; i < d->ncolors; i++) {
+		p = ss->outs[i];
+		assert(p != ss);		/* not self-referential */
+		if (p == NULL)
+			continue;		/* NOTE CONTINUE */
+		FDEBUG(("del outarc %d from c%d's in chn\n", i, p - d->ssets));
+		if (p->ins.ss == ss && p->ins.co == i)
+			p->ins = ss->inchain[i];
+		else {
+			assert(p->ins.ss != NULL);
+			for (ap = p->ins; ap.ss != NULL &&
+						!(ap.ss == ss && ap.co == i);
+						ap = ap.ss->inchain[ap.co])
+				lastap = ap;
+			assert(ap.ss != NULL);
+			lastap.ss->inchain[lastap.co] = ss->inchain[i];
+		}
+		ss->outs[i] = NULL;
+		ss->inchain[i].ss = NULL;
+	}
+
+	/* if ss was a success state, may need to remember location */
+	if ((ss->flags&POSTSTATE) && ss->lastseen != d->lastpost &&
+			(d->lastpost == NULL || d->lastpost < ss->lastseen))
+		d->lastpost = ss->lastseen;
+
+	return ss;
+}
+
+/*
+ - pickss - pick the next stateset to be used
+ ^ static struct sset *pickss(struct vars *, struct dfa *, chr *, chr *);
+ */
+static struct sset *
+pickss(v, d, cp, start)
+struct vars *v;			/* used only for debug flags */
+struct dfa *d;
+chr *cp;
+chr *start;
+{
+	int i;
+	struct sset *ss;
+	struct sset *end;
+	chr *ancient;
+
+	/* shortcut for cases where cache isn't full */
+	if (d->nssused < d->nssets) {
+		ss = &d->ssets[d->nssused];
+		d->nssused++;
+		FDEBUG(("new c%d\n", ss - d->ssets));
+		/* must make innards consistent */
+		ss->ins.ss = NULL;
+		ss->ins.co = WHITE;		/* give it some value */
+		for (i = 0; i < d->ncolors; i++) {
+			ss->outs[i] = NULL;
+			ss->inchain[i].ss = NULL;
+		}
+		ss->flags = 0;
+		return ss;
+	}
+
+	/* look for oldest, or old enough anyway */
+	if (cp - start > d->nssets*2/3)		/* oldest 33% are expendable */
+		ancient = cp - d->nssets*2/3;
+	else
+		ancient = start;
+	for (ss = d->search, end = &d->ssets[d->nssets]; ss < end; ss++)
+		if ((ss->lastseen == NULL || ss->lastseen < ancient) &&
+							!(ss->flags&LOCKED)) {
+			d->search = ss + 1;
+			FDEBUG(("replacing c%d\n", ss - d->ssets));
+			return ss;
+		}
+	for (ss = d->ssets, end = d->search; ss < end; ss++)
+		if ((ss->lastseen == NULL || ss->lastseen < ancient) &&
+							!(ss->flags&LOCKED)) {
+			d->search = ss + 1;
+			FDEBUG(("replacing c%d\n", ss - d->ssets));
+			return ss;
+		}
+
+	/* nobody's old enough?!? -- something's really wrong */
+	FDEBUG(("can't find victim to replace!\n"));
+	assert(NOTREACHED);
+	ERR(REG_ASSERT);
+	return d->ssets;
+}
diff --git a/generic/regerror.c b/generic/regerror.c
index 5eb67a7..7fb2ef3 100644
--- a/generic/regerror.c
+++ b/generic/regerror.c
@@ -15,7 +15,7 @@ static struct rerr {
 } rerrs[] = {
 	/* the actual table is built from regex.h */
 #	include "regerrs.h"
-	-1,	"",	"oops",		/* explanation special-cased in code */
+	{ -1,	"",	"oops" },	/* explanation special-cased in code */
 };
 
 /*
diff --git a/generic/regerrs.h b/generic/regerrs.h
index 8298597..af2805c 100644
--- a/generic/regerrs.h
+++ b/generic/regerrs.h
@@ -1,19 +1,18 @@
-REG_OKAY,	"REG_OKAY",	"no errors detected",
-REG_NOMATCH,	"REG_NOMATCH",	"failed to match",
-REG_BADPAT,	"REG_BADPAT",	"invalid regexp (reg version 0.1)",
-REG_ECOLLATE,	"REG_ECOLLATE",	"invalid collating element",
-REG_ECTYPE,	"REG_ECTYPE",	"invalid character class",
-REG_EESCAPE,	"REG_EESCAPE",	"invalid escape \\ sequence",
-REG_ESUBREG,	"REG_ESUBREG",	"invalid backreference number",
-REG_EBRACK,	"REG_EBRACK",	"brackets [] not balanced",
-REG_EPAREN,	"REG_EPAREN",	"parentheses () not balanced",
-REG_EBRACE,	"REG_EBRACE",	"braces {} not balanced",
-REG_BADBR,	"REG_BADBR",	"invalid repetition count(s)",
-REG_ERANGE,	"REG_ERANGE",	"invalid character range",
-REG_ESPACE,	"REG_ESPACE",	"out of memory",
-REG_BADRPT,	"REG_BADRPT",	"quantifier operand invalid",
-REG_ASSERT,	"REG_ASSERT",	"\"can't happen\" -- you found a bug",
-REG_INVARG,	"REG_INVARG",	"invalid argument to regex function",
-REG_MIXED,	"REG_MIXED",	"character widths of regex and string differ",
-REG_BADOPT,	"REG_BADOPT",	"invalid embedded option",
-REG_IMPOSS,	"REG_IMPOSS",	"can never match",
+{ REG_OKAY,	"REG_OKAY",	"no errors detected" },
+{ REG_NOMATCH,	"REG_NOMATCH",	"failed to match" },
+{ REG_BADPAT,	"REG_BADPAT",	"invalid regexp (reg version 0.1)" },
+{ REG_ECOLLATE,	"REG_ECOLLATE",	"invalid collating element" },
+{ REG_ECTYPE,	"REG_ECTYPE",	"invalid character class" },
+{ REG_EESCAPE,	"REG_EESCAPE",	"invalid escape \\ sequence" },
+{ REG_ESUBREG,	"REG_ESUBREG",	"invalid backreference number" },
+{ REG_EBRACK,	"REG_EBRACK",	"brackets [] not balanced" },
+{ REG_EPAREN,	"REG_EPAREN",	"parentheses () not balanced" },
+{ REG_EBRACE,	"REG_EBRACE",	"braces {} not balanced" },
+{ REG_BADBR,	"REG_BADBR",	"invalid repetition count(s)" },
+{ REG_ERANGE,	"REG_ERANGE",	"invalid character range" },
+{ REG_ESPACE,	"REG_ESPACE",	"out of memory" },
+{ REG_BADRPT,	"REG_BADRPT",	"quantifier operand invalid" },
+{ REG_ASSERT,	"REG_ASSERT",	"\"can't happen\" -- you found a bug" },
+{ REG_INVARG,	"REG_INVARG",	"invalid argument to regex function" },
+{ REG_MIXED,	"REG_MIXED",	"character widths of regex and string differ" },
+{ REG_BADOPT,	"REG_BADOPT",	"invalid embedded option" },
diff --git a/generic/regex.h b/generic/regex.h
index 6f61dd3..7ca6ad0 100644
--- a/generic/regex.h
+++ b/generic/regex.h
@@ -16,7 +16,7 @@
  * suggestive of the wide type, e.g. re_ucomp and re_uexec for Unicode).
  * For cranky old compilers, it may be necessary to do something like:
  * #define	__REG_WIDE_COMPILE(a,b,c,d)	re_Xcomp(a,b,c,d)
- * #define	__REG_WIDE_EXEC(a,b,c,d,e,f)	re_Xexec(a,b,c,d,e,f)
+ * #define	__REG_WIDE_EXEC(a,b,c,d,e,f,g)	re_Xexec(a,b,c,d,e,f,g)
  * rather than just #defining the names as parameterless macros.
  *
  * For some specialized purposes, it may be desirable to suppress the
@@ -44,8 +44,14 @@ extern "C" {
 
 
 /*
- * Add your own defines, if needed, here.  The --- stuff is for automatic
- * generation of this file from regproto.h and regcustom.h.
+ * Add your own defines, if needed, here.
+ */
+
+
+
+/*
+ * Location where a chunk of regcustom.h is automatically spliced into
+ * this file (working from its prototype, regproto.h).
  */
 /* --- begin --- */
 /* ensure certain things don't sneak in from system headers */
@@ -69,17 +75,20 @@ extern "C" {
 #endif
 /* interface types */
 #define	__REG_WIDE_T	Tcl_UniChar
-#define	__REG_WIDE_COMPILE	re_ucomp
-#define	__REG_WIDE_EXEC		re_uexec
 #define	__REG_REGOFF_T	long	/* not really right, but good enough... */
 #define	__REG_VOID_T	VOID
 #define	__REG_CONST	CONST
+/* names and declarations */
+#define	__REG_WIDE_COMPILE	re_ucomp
+#define	__REG_WIDE_EXEC		re_uexec
 #ifndef __REG_NOFRONT
 #define	__REG_NOFRONT		/* don't want regcomp() and regexec() */
 #endif
 #ifndef __REG_NOCHAR
 #define	__REG_NOCHAR		/* or the char versions */
 #endif
+#define	regfree		re_ufree
+#define	regerror	re_uerror
 /* --- end --- */
 
 
@@ -139,6 +148,7 @@ typedef struct {
 #		define	REG_UUNPORT		001000
 #		define	REG_ULOCALE		002000
 #		define	REG_UEMPTYMATCH		004000
+#		define	REG_UIMPOSSIBLE		010000
 	int re_csize;		/* sizeof(character) */
 	char *re_endp;		/* backward compatibility kludge */
 	/* the rest is opaque pointers to hidden innards */
@@ -249,7 +259,6 @@ typedef struct {
 #define	REG_INVARG	16	/* invalid argument to regex function */
 #define	REG_MIXED	17	/* character widths of regex and string differ */
 #define	REG_BADOPT	18	/* invalid embedded option */
-#define	REG_IMPOSS	19	/* can never match */
 /* two specials for debugging and testing */
 #define	REG_ATOI	101	/* convert error-code name to number */
 #define	REG_ITOA	102	/* convert error-code number to name */
@@ -280,8 +289,8 @@ int regexec _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, regmatch_t [], i
 #ifdef __REG_WIDE_T
 int __REG_WIDE_EXEC _ANSI_ARGS_((regex_t *, __REG_CONST __REG_WIDE_T *, size_t, rm_detail_t *, size_t, regmatch_t [], int));
 #endif
-re_void regfree _ANSI_ARGS_((regex_t *));
-extern size_t regerror _ANSI_ARGS_((int, __REG_CONST regex_t *, char *, size_t));
+re_void re_ufree _ANSI_ARGS_((regex_t *));
+extern size_t re_uerror _ANSI_ARGS_((int, __REG_CONST regex_t *, char *, size_t));
 /* automatically gathered by fwd; do not hand-edit */
 /* =====^!^===== end forwards =====^!^===== */
 
diff --git a/generic/regexec.c b/generic/regexec.c
index 4220062..1671866 100644
--- a/generic/regexec.c
+++ b/generic/regexec.c
@@ -17,8 +17,6 @@ struct vars {
 	chr *stop;		/* just past end of string */
 	int err;		/* error code if any (0 none) */
 	regoff_t *mem;		/* memory vector for backtracking */
-	regoff_t *mem1;		/* localizer vector */
-	regoff_t *mem2;		/* dissector vector */
 };
 #define	VISERR(vv)	((vv)->err != 0)	/* have we seen an error yet? */
 #define	ISERR()	VISERR(v)
@@ -79,21 +77,20 @@ struct dfa {
 int exec _ANSI_ARGS_((regex_t *, CONST chr *, size_t, rm_detail_t *, size_t, regmatch_t [], int));
 static int find _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *));
 static int cfind _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *));
-static VOID zapmatches _ANSI_ARGS_((regmatch_t *, size_t));
-static VOID zapmem _ANSI_ARGS_((struct vars *, struct rtree *));
+static VOID zapsubs _ANSI_ARGS_((regmatch_t *, size_t));
+static VOID zapmem _ANSI_ARGS_((struct vars *, struct subre *));
 static VOID subset _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
-static int dissect _ANSI_ARGS_((struct vars *, struct rtree *, chr *, chr *));
-static int altdissect _ANSI_ARGS_((struct vars *, struct rtree *, chr *, chr *));
-static int cdissect _ANSI_ARGS_((struct vars *, struct rtree *, chr *, chr *));
-static int crevdissect _ANSI_ARGS_((struct vars *, struct rtree *, chr *, chr *));
-static int csindissect _ANSI_ARGS_((struct vars *, struct rtree *, chr *, chr *));
-static int cbrdissect _ANSI_ARGS_((struct vars *, struct rtree *, chr *, chr *));
-static int caltdissect _ANSI_ARGS_((struct vars *, struct rtree *, chr *, chr *));
-static chr *dismatch _ANSI_ARGS_((struct vars *, struct rtree *, chr *, chr *));
-static chr *dismrev _ANSI_ARGS_((struct vars *, struct rtree *, chr *, chr *));
-static chr *dismsin _ANSI_ARGS_((struct vars *, struct rtree *, chr *, chr *));
+static int dissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
+static int condissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
+static int altdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
+static int cdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
+static int ccondissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
+static int crevdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
+static int cbrdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
+static int caltdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
+/* === rege_dfa.c === */
 static chr *longest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *));
-static chr *shortest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *, chr *));
+static chr *shortest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *, chr *, chr **));
 static struct dfa *newdfa _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *));
 static VOID freedfa _ANSI_ARGS_((struct dfa *));
 static unsigned hash _ANSI_ARGS_((unsigned *, int));
@@ -137,6 +134,8 @@ int flags;
 	/* setup */
 	v->re = re;
 	v->g = (struct guts *)re->re_guts;
+	if (v->g->unmatchable)
+		return REG_NOMATCH;
 	complications = (v->g->info&REG_UBACKREF) ? 1 : 0;
 	if (v->g->usedshorter)
 		complications = 1;
@@ -157,30 +156,30 @@ int flags;
 	v->stop = (chr *)string + len;
 	v->err = 0;
 	if (complications) {
-		v->mem1 = (regoff_t *)MALLOC(2*v->g->ntree*sizeof(regoff_t));
-		if (v->mem1 == NULL) {
+		v->mem = (regoff_t *)MALLOC(v->g->ntree*sizeof(regoff_t));
+		if (v->mem == NULL) {
 			if (v->pmatch != pmatch)
 				FREE(v->pmatch);
 			return REG_ESPACE;
 		}
-		v->mem2 = v->mem1 + v->g->ntree;
 	} else
-		v->mem1 = NULL;
+		v->mem = NULL;
 
 	/* do it */
+	assert(v->g->tree != NULL);
 	if (complications)
-		st = cfind(v, &v->g->cnfa, v->g->cm);
+		st = cfind(v, &v->g->tree->cnfa, &v->g->cmap);
 	else
-		st = find(v, &v->g->cnfa, v->g->cm);
+		st = find(v, &v->g->tree->cnfa, &v->g->cmap);
 	if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0) {
-		zapmatches(pmatch, nmatch);
+		zapsubs(pmatch, nmatch);
 		n = (nmatch < v->nmatch) ? nmatch : v->nmatch;
 		memcpy(VS(pmatch), VS(v->pmatch), n*sizeof(regmatch_t));
 	}
 	if (v->pmatch != pmatch)
 		FREE(v->pmatch);
-	if (v->mem1 != NULL)
-		FREE(v->mem1);
+	if (v->mem != NULL)
+		FREE(v->mem);
 	return st;
 }
 
@@ -195,33 +194,57 @@ struct cnfa *cnfa;
 struct colormap *cm;
 {
 	struct dfa *d = newdfa(v, cnfa, cm);
+	struct dfa *s = newdfa(v, &v->g->search, cm);
 	chr *begin;
 	chr *end;
+#ifdef notdef
+	chr *open;		/* open and close of range of possible starts */
+	chr *close;
+#endif
 	chr *stop = (cnfa->flags&LEFTANCH) ? v->start : v->stop;
 
 	if (d == NULL)
 		return v->err;
+	if (s == NULL) {
+		freedfa(d);
+		return v->err;
+	}
 
-	for (begin = v->start; begin <= stop; begin++) {
-		MDEBUG(("\ntrying at %ld\n", (long)OFF(begin)));
-		end = longest(v, d, begin, v->stop);
-		if (end != NULL) {
-			if (v->nmatch > 0) {
-				v->pmatch[0].rm_so = OFF(begin);
-				v->pmatch[0].rm_eo = OFF(end);
-			}
-			freedfa(d);
-			if (v->nmatch > 1) {
-				zapmatches(v->pmatch, v->nmatch);
-				return dissect(v, v->g->tree, begin, end);
+#ifdef notdef
+	close = v->start;
+	while (close < stop) {
+		MDEBUG(("\nsearch at %ld\n", (long)OFF(close)));
+		close = shortest(v, s, close, close, v->stop, &open);
+		if (close == NULL)
+			break;				/* NOTE BREAK */
+		MDEBUG(("between %ld and %ld\n", (long)OFF(open),
+							(long)OFF(close)));
+#endif
+		for (begin = v->start; begin <= stop; begin++) {
+			end = longest(v, d, begin, v->stop);
+			if (end != NULL) {
+				if (v->nmatch > 0) {
+					v->pmatch[0].rm_so = OFF(begin);
+					v->pmatch[0].rm_eo = OFF(end);
+				}
+				freedfa(d);
+				freedfa(s);
+				if (v->nmatch > 1) {
+					zapsubs(v->pmatch, v->nmatch);
+					return dissect(v, v->g->tree, begin,
+									end);
+				}
+				if (ISERR())
+					return v->err;
+				return REG_OKAY;
 			}
-			if (ISERR())
-				return v->err;
-			return REG_OKAY;
 		}
+#ifdef notdef
 	}
+#endif
 
 	freedfa(d);
+	freedfa(s);
 	if (ISERR())
 		return v->err;
 	return REG_NOMATCH;
@@ -241,33 +264,28 @@ struct colormap *cm;
 	chr *begin;
 	chr *end;
 	chr *stop = (cnfa->flags&LEFTANCH) ? v->start : v->stop;
+	chr *estart;
 	chr *estop;
 	int er;
-	int usedis = (v->g->tree == NULL || v->g->tree->op == '|') ? 0 : 1;
+	int shorter = v->g->tree->flags&SHORTER;
 
 	if (d == NULL)
 		return v->err;
 
-	if (!v->g->usedshorter)
-		usedis = 0;
 	for (begin = v->start; begin <= stop; begin++) {
-		MDEBUG(("\ntrying at %ld\n", (long)OFF(begin)));
-		if (usedis) {
-			v->mem = v->mem1;
-			zapmem(v, v->g->tree);
-		}
+		MDEBUG(("\ncfind trying at %ld\n", (long)OFF(begin)));
+		estart = begin;
 		estop = v->stop;
 		for (;;) {
-			if (usedis) {
-				v->mem = v->mem1;
-				end = dismatch(v, v->g->tree, begin, v->stop);
-			} else
+			if (shorter)
+				end = shortest(v, d, begin, estart, estop,
+								(chr **)NULL);
+			else
 				end = longest(v, d, begin, estop);
 			if (end == NULL)
 				break;		/* NOTE BREAK OUT */
 			MDEBUG(("tentative end %ld\n", (long)OFF(end)));
-			zapmatches(v->pmatch, v->nmatch);
-			v->mem = v->mem2;
+			zapsubs(v->pmatch, v->nmatch);
 			zapmem(v, v->g->tree);
 			er = cdissect(v, v->g->tree, begin, end);
 			switch (er) {
@@ -283,14 +301,15 @@ struct colormap *cm;
 				break;
 			case REG_NOMATCH:
 				/* go around and try again */
-				if (!usedis) {
-					if (end == begin) {
-						/* no point in trying again */
-						freedfa(d);
-						return REG_NOMATCH;
-					}
-					estop = end - 1;
+				if ((shorter) ? end == estop : end == begin) {
+					/* no point in trying again */
+					freedfa(d);
+					return REG_NOMATCH;
 				}
+				if (shorter)
+					estart = end + 1;
+				else
+					estop = end - 1;
 				break;
 			default:
 				freedfa(d);
@@ -307,11 +326,11 @@ struct colormap *cm;
 }
 
 /*
- - zapmatches - initialize the subexpression matches to "no match"
- ^ static VOID zapmatches(regmatch_t *, size_t);
+ - zapsubs - initialize the subexpression matches to "no match"
+ ^ static VOID zapsubs(regmatch_t *, size_t);
  */
 static VOID
-zapmatches(p, n)
+zapsubs(p, n)
 regmatch_t *p;
 size_t n;
 {
@@ -325,33 +344,28 @@ size_t n;
 
 /*
  - zapmem - initialize the retry memory of a subtree to zeros
- ^ static VOID zapmem(struct vars *, struct rtree *);
+ ^ static VOID zapmem(struct vars *, struct subre *);
  */
 static VOID
-zapmem(v, rt)
+zapmem(v, t)
 struct vars *v;
-struct rtree *rt;
+struct subre *t;
 {
-	if (rt == NULL)
+	if (t == NULL)
 		return;
 
 	assert(v->mem != NULL);
-	v->mem[rt->no] = 0;
-
-	if (rt->left.tree != NULL)
-		zapmem(v, rt->left.tree);
-	if (rt->left.subno > 0) {
-		v->pmatch[rt->left.subno].rm_so = -1;
-		v->pmatch[rt->left.subno].rm_eo = -1;
+	v->mem[t->retry] = 0;
+	if (t->op == '(') {
+		assert(t->subno > 0);
+		v->pmatch[t->subno].rm_so = -1;
+		v->pmatch[t->subno].rm_eo = -1;
 	}
-	if (rt->right.tree != NULL)
-		zapmem(v, rt->right.tree);
-	if (rt->right.subno > 0) {
-		v->pmatch[rt->right.subno].rm_so = -1;
-		v->pmatch[rt->right.subno].rm_eo = -1;
-	}
-	if (rt->next != NULL)
-		zapmem(v, rt->next);
+
+	if (t->left != NULL)
+		zapmem(v, t->left);
+	if (t->right != NULL)
+		zapmem(v, t->right);
 }
 
 /*
@@ -367,8 +381,6 @@ chr *end;
 {
 	int n = sub->subno;
 
-	if (n == 0)
-		return;
 	assert(n > 0);
 	if ((size_t)n >= v->nmatch)
 		return;
@@ -380,12 +392,54 @@ chr *end;
 
 /*
  - dissect - determine subexpression matches (uncomplicated case)
- ^ static int dissect(struct vars *, struct rtree *, chr *, chr *);
+ ^ static int dissect(struct vars *, struct subre *, chr *, chr *);
+ */
+static int			/* regexec return code */
+dissect(v, t, begin, end)
+struct vars *v;
+struct subre *t;
+chr *begin;			/* beginning of relevant substring */
+chr *end;			/* end of same */
+{
+	assert(t != NULL);
+	MDEBUG(("dissect %ld-%ld\n", (long)OFF(begin), (long)OFF(end)));
+
+	switch (t->op) {
+	case '=':		/* terminal node */
+		assert(t->left == NULL && t->right == NULL);
+		return REG_OKAY;	/* no action, parent did the work */
+		break;
+	case '|':		/* alternation */
+		assert(t->left != NULL);
+		return altdissect(v, t, begin, end);
+		break;
+	case 'b':		/* back ref -- shouldn't be calling us! */
+		return REG_ASSERT;
+		break;
+	case '.':		/* concatenation */
+		assert(t->left != NULL && t->right != NULL);
+		return condissect(v, t, begin, end);
+		break;
+	case '(':		/* capturing */
+		assert(t->left != NULL && t->right == NULL);
+		assert(t->subno > 0);
+		subset(v, t, begin, end);
+		return dissect(v, t->left, begin, end);
+		break;
+	default:
+		return REG_ASSERT;
+		break;
+	}
+}
+
+/*
+ - condissect - determine concatenation subexpression matches (uncomplicated)
+ ^ static int condissect(struct vars *, struct subre *, chr *, chr *);
  */
 static int			/* regexec return code */
-dissect(v, rt, begin, end)
+condissect(v, t, begin, end)
 struct vars *v;
-struct rtree *rt;
+struct subre *t;
 chr *begin;			/* beginning of relevant substring */
 chr *end;			/* end of same */
 {
@@ -394,37 +448,14 @@ chr *end;			/* end of same */
 	chr *mid;
 	int i;
 
-	if (rt == NULL)
-		return REG_OKAY;
-	MDEBUG(("substring %ld-%ld\n", (long)OFF(begin), (long)OFF(end)));
-
-	/* alternatives -- punt to auxiliary */
-	if (rt->op == '|')
-		return altdissect(v, rt, begin, end);
+	assert(t->op == '.');
+	assert(t->left != NULL && t->left->cnfa.nstates > 0);
+	assert(t->right != NULL && t->right->cnfa.nstates > 0);
 
-	/* concatenation -- need to split the substring between parts */
-	assert(rt->op == ',');
-	assert(rt->left.cnfa.nstates > 0);
-	d = newdfa(v, &rt->left.cnfa, v->g->cm);
+	d = newdfa(v, &t->left->cnfa, &v->g->cmap);
 	if (ISERR())
 		return v->err;
-
-	/* in some cases, there may be no right side... */
-	if (rt->right.cnfa.nstates == 0) {
-		MDEBUG(("singleton\n"));
-		if (longest(v, d, begin, end) != end) {
-			freedfa(d);
-			return REG_ASSERT;
-		}
-		freedfa(d);
-		assert(rt->left.subno >= 0);
-		subset(v, &rt->left, begin, end);
-		return dissect(v, rt->left.tree, begin, end);
-	}
-
-	/* general case */
-	assert(rt->right.cnfa.nstates > 0);
-	d2 = newdfa(v, &rt->right.cnfa, v->g->cm);
+	d2 = newdfa(v, &t->right->cnfa, &v->g->cmap);
 	if (ISERR()) {
 		freedfa(d);
 		return v->err;
@@ -464,45 +495,39 @@ chr *end;			/* end of same */
 	MDEBUG(("successful\n"));
 	freedfa(d);
 	freedfa(d2);
-	assert(rt->left.subno >= 0);
-	subset(v, &rt->left, begin, mid);
-	assert(rt->right.subno >= 0);
-	subset(v, &rt->right, mid, end);
-	i = dissect(v, rt->left.tree, begin, mid);
+	i = dissect(v, t->left, begin, mid);
 	if (i != REG_OKAY)
 		return i;
-	return dissect(v, rt->right.tree, mid, end);
+	return dissect(v, t->right, mid, end);
 }
 
 /*
  - altdissect - determine alternative subexpression matches (uncomplicated)
- ^ static int altdissect(struct vars *, struct rtree *, chr *, chr *);
+ ^ static int altdissect(struct vars *, struct subre *, chr *, chr *);
  */
 static int			/* regexec return code */
-altdissect(v, rt, begin, end)
+altdissect(v, t, begin, end)
 struct vars *v;
-struct rtree *rt;
+struct subre *t;
 chr *begin;			/* beginning of relevant substring */
 chr *end;			/* end of same */
 {
 	struct dfa *d;
 	int i;
 
-	assert(rt != NULL);
-	assert(rt->op == '|');
+	assert(t != NULL);
+	assert(t->op == '|');
 
-	for (i = 0; rt != NULL; rt = rt->next, i++) {
+	for (i = 0; t != NULL; t = t->right, i++) {
 		MDEBUG(("trying %dth\n", i));
-		assert(rt->left.begin != NULL);
-		d = newdfa(v, &rt->left.cnfa, v->g->cm);
+		assert(t->left != NULL && t->left->cnfa.nstates > 0);
+		d = newdfa(v, &t->left->cnfa, &v->g->cmap);
 		if (ISERR())
 			return v->err;
 		if (longest(v, d, begin, end) == end) {
 			MDEBUG(("success\n"));
 			freedfa(d);
-			assert(rt->left.subno >= 0);
-			subset(v, &rt->left, begin, end);
-			return dissect(v, rt->left.tree, begin, end);
+			return dissect(v, t->left, begin, end);
 		}
 		freedfa(d);
 	}
@@ -513,12 +538,61 @@ chr *end;			/* end of same */
  - cdissect - determine subexpression matches (with complications)
  * The retry memory stores the offset of the trial midpoint from begin, 
  * plus 1 so that 0 uniquely means "clean slate".
- ^ static int cdissect(struct vars *, struct rtree *, chr *, chr *);
+ ^ static int cdissect(struct vars *, struct subre *, chr *, chr *);
  */
 static int			/* regexec return code */
-cdissect(v, rt, begin, end)
+cdissect(v, t, begin, end)
 struct vars *v;
-struct rtree *rt;
+struct subre *t;
+chr *begin;			/* beginning of relevant substring */
+chr *end;			/* end of same */
+{
+	int er;
+
+	assert(t != NULL);
+	MDEBUG(("cdissect %ld-%ld\n", (long)OFF(begin), (long)OFF(end)));
+
+	switch (t->op) {
+	case '=':		/* terminal node */
+		assert(t->left == NULL && t->right == NULL);
+		return REG_OKAY;	/* no action, parent did the work */
+		break;
+	case '|':		/* alternation */
+		assert(t->left != NULL);
+		return caltdissect(v, t, begin, end);
+		break;
+	case 'b':		/* back ref -- shouldn't be calling us! */
+		assert(t->left == NULL && t->right == NULL);
+		return cbrdissect(v, t, begin, end);
+		break;
+	case '.':		/* concatenation */
+		assert(t->left != NULL && t->right != NULL);
+		return ccondissect(v, t, begin, end);
+		break;
+	case '(':		/* capturing */
+		assert(t->left != NULL && t->right == NULL);
+		assert(t->subno > 0);
+		er = cdissect(v, t->left, begin, end);
+		if (er == REG_OKAY)
+			subset(v, t, begin, end);
+		return er;
+		break;
+	default:
+		return REG_ASSERT;
+		break;
+	}
+}
+
+/*
+ - ccondissect - concatenation subexpression matches (with complications)
+ * The retry memory stores the offset of the trial midpoint from begin, 
+ * plus 1 so that 0 uniquely means "clean slate".
+ ^ static int ccondissect(struct vars *, struct subre *, chr *, chr *);
+ */
+static int			/* regexec return code */
+ccondissect(v, t, begin, end)
+struct vars *v;
+struct subre *t;
 chr *begin;			/* beginning of relevant substring */
 chr *end;			/* end of same */
 {
@@ -527,36 +601,25 @@ chr *end;			/* end of same */
 	chr *mid;
 	int er;
 
-	if (rt == NULL)
-		return REG_OKAY;
-	MDEBUG(("csubstr %ld-%ld\n", (long)OFF(begin), (long)OFF(end)));
-
-	/* punt various cases to auxiliaries */
-	if (rt->op == '|')			/* alternatives */
-		return caltdissect(v, rt, begin, end);
-	if (rt->op == 'b')			/* backref */
-		return cbrdissect(v, rt, begin, end);
-	if (rt->right.cnfa.nstates == 0)	/* no RHS */
-		return csindissect(v, rt, begin, end);
-	if (rt->left.prefer == SHORTER)		/* reverse scan */
-		return crevdissect(v, rt, begin, end);
+	assert(t->op == '.');
+	assert(t->left != NULL && t->left->cnfa.nstates > 0);
+	assert(t->right != NULL && t->right->cnfa.nstates > 0);
 
-	/* concatenation -- need to split the substring between parts */
-	assert(rt->op == ',');
-	assert(rt->left.cnfa.nstates > 0);
-	assert(rt->right.cnfa.nstates > 0);
-	d = newdfa(v, &rt->left.cnfa, v->g->cm);
+	if (t->left->flags&SHORTER)		/* reverse scan */
+		return crevdissect(v, t, begin, end);
+
+	d = newdfa(v, &t->left->cnfa, &v->g->cmap);
 	if (ISERR())
 		return v->err;
-	d2 = newdfa(v, &rt->right.cnfa, v->g->cm);
+	d2 = newdfa(v, &t->right->cnfa, &v->g->cmap);
 	if (ISERR()) {
 		freedfa(d);
 		return v->err;
 	}
-	MDEBUG(("cconcat %d\n", rt->no));
+	MDEBUG(("cconcat %d\n", t->retry));
 
 	/* pick a tentative midpoint */
-	if (v->mem[rt->no] == 0) {
+	if (v->mem[t->retry] == 0) {
 		mid = longest(v, d, begin, end);
 		if (mid == NULL) {
 			freedfa(d);
@@ -564,19 +627,18 @@ chr *end;			/* end of same */
 			return REG_NOMATCH;
 		}
 		MDEBUG(("tentative midpoint %ld\n", (long)OFF(mid)));
-		subset(v, &rt->left, begin, mid);
-		v->mem[rt->no] = (mid - begin) + 1;
+		v->mem[t->retry] = (mid - begin) + 1;
 	} else {
-		mid = begin + (v->mem[rt->no] - 1);
+		mid = begin + (v->mem[t->retry] - 1);
 		MDEBUG(("working midpoint %ld\n", (long)OFF(mid)));
 	}
 
 	/* iterate until satisfaction or failure */
 	for (;;) {
 		/* try this midpoint on for size */
-		er = cdissect(v, rt->left.tree, begin, mid);
+		er = cdissect(v, t->left, begin, mid);
 		if (er == REG_OKAY && longest(v, d2, mid, end) == end &&
-				(er = cdissect(v, rt->right.tree, mid, end)) == 
+				(er = cdissect(v, t->right, mid, end)) == 
 								REG_OKAY)
 			break;			/* NOTE BREAK OUT */
 		if (er != REG_OKAY && er != REG_NOMATCH) {
@@ -588,7 +650,7 @@ chr *end;			/* end of same */
 		/* that midpoint didn't work, find a new one */
 		if (mid == begin) {
 			/* all possibilities exhausted */
-			MDEBUG(("%d no midpoint\n", rt->no));
+			MDEBUG(("%d no midpoint\n", t->retry));
 			freedfa(d);
 			freedfa(d2);
 			return REG_NOMATCH;
@@ -596,23 +658,21 @@ chr *end;			/* end of same */
 		mid = longest(v, d, begin, mid-1);
 		if (mid == NULL) {
 			/* failed to find a new one */
-			MDEBUG(("%d failed midpoint\n", rt->no));
+			MDEBUG(("%d failed midpoint\n", t->retry));
 			freedfa(d);
 			freedfa(d2);
 			return REG_NOMATCH;
 		}
-		MDEBUG(("%d: new midpoint %ld\n", rt->no, (long)OFF(mid)));
-		subset(v, &rt->left, begin, mid);
-		v->mem[rt->no] = (mid - begin) + 1;
-		zapmem(v, rt->left.tree);
-		zapmem(v, rt->right.tree);
+		MDEBUG(("%d: new midpoint %ld\n", t->retry, (long)OFF(mid)));
+		v->mem[t->retry] = (mid - begin) + 1;
+		zapmem(v, t->left);
+		zapmem(v, t->right);
 	}
 
 	/* satisfaction */
 	MDEBUG(("successful\n"));
 	freedfa(d);
 	freedfa(d2);
-	subset(v, &rt->right, mid, end);
 	return REG_OKAY;
 }
 
@@ -620,12 +680,12 @@ chr *end;			/* end of same */
  - crevdissect - determine shortest-first subexpression matches
  * The retry memory stores the offset of the trial midpoint from begin, 
  * plus 1 so that 0 uniquely means "clean slate".
- ^ static int crevdissect(struct vars *, struct rtree *, chr *, chr *);
+ ^ static int crevdissect(struct vars *, struct subre *, chr *, chr *);
  */
 static int			/* regexec return code */
-crevdissect(v, rt, begin, end)
+crevdissect(v, t, begin, end)
 struct vars *v;
-struct rtree *rt;
+struct subre *t;
 chr *begin;			/* beginning of relevant substring */
 chr *end;			/* end of same */
 {
@@ -634,45 +694,43 @@ chr *end;			/* end of same */
 	chr *mid;
 	int er;
 
-	if (rt == NULL)
-		return REG_OKAY;
-	assert(rt->op == ',' && rt->left.prefer == SHORTER);
+	assert(t->op == '.');
+	assert(t->left != NULL && t->left->cnfa.nstates > 0);
+	assert(t->right != NULL && t->right->cnfa.nstates > 0);
+	assert(t->left->flags&SHORTER);
 
 	/* concatenation -- need to split the substring between parts */
-	assert(rt->left.cnfa.nstates > 0);
-	assert(rt->right.cnfa.nstates > 0);
-	d = newdfa(v, &rt->left.cnfa, v->g->cm);
+	d = newdfa(v, &t->left->cnfa, &v->g->cmap);
 	if (ISERR())
 		return v->err;
-	d2 = newdfa(v, &rt->right.cnfa, v->g->cm);
+	d2 = newdfa(v, &t->right->cnfa, &v->g->cmap);
 	if (ISERR()) {
 		freedfa(d);
 		return v->err;
 	}
-	MDEBUG(("crev %d\n", rt->no));
+	MDEBUG(("crev %d\n", t->retry));
 
 	/* pick a tentative midpoint */
-	if (v->mem[rt->no] == 0) {
-		mid = shortest(v, d, begin, begin, end);
+	if (v->mem[t->retry] == 0) {
+		mid = shortest(v, d, begin, begin, end, (chr **)NULL);
 		if (mid == NULL) {
 			freedfa(d);
 			freedfa(d2);
 			return REG_NOMATCH;
 		}
 		MDEBUG(("tentative midpoint %ld\n", (long)OFF(mid)));
-		subset(v, &rt->left, begin, mid);
-		v->mem[rt->no] = (mid - begin) + 1;
+		v->mem[t->retry] = (mid - begin) + 1;
 	} else {
-		mid = begin + (v->mem[rt->no] - 1);
+		mid = begin + (v->mem[t->retry] - 1);
 		MDEBUG(("working midpoint %ld\n", (long)OFF(mid)));
 	}
 
 	/* iterate until satisfaction or failure */
 	for (;;) {
 		/* try this midpoint on for size */
-		er = cdissect(v, rt->left.tree, begin, mid);
+		er = cdissect(v, t->left, begin, mid);
 		if (er == REG_OKAY && longest(v, d2, mid, end) == end &&
-				(er = cdissect(v, rt->right.tree, mid, end)) == 
+				(er = cdissect(v, t->right, mid, end)) == 
 								REG_OKAY)
 			break;			/* NOTE BREAK OUT */
 		if (er != REG_OKAY && er != REG_NOMATCH) {
@@ -684,101 +742,58 @@ chr *end;			/* end of same */
 		/* that midpoint didn't work, find a new one */
 		if (mid == end) {
 			/* all possibilities exhausted */
-			MDEBUG(("%d no midpoint\n", rt->no));
+			MDEBUG(("%d no midpoint\n", t->retry));
 			freedfa(d);
 			freedfa(d2);
 			return REG_NOMATCH;
 		}
-		mid = shortest(v, d, begin, mid+1, end);
+		mid = shortest(v, d, begin, mid+1, end, (chr **)NULL);
 		if (mid == NULL) {
 			/* failed to find a new one */
-			MDEBUG(("%d failed midpoint\n", rt->no));
+			MDEBUG(("%d failed midpoint\n", t->retry));
 			freedfa(d);
 			freedfa(d2);
 			return REG_NOMATCH;
 		}
-		MDEBUG(("%d: new midpoint %ld\n", rt->no, (long)OFF(mid)));
-		subset(v, &rt->left, begin, mid);
-		v->mem[rt->no] = (mid - begin) + 1;
-		zapmem(v, rt->left.tree);
-		zapmem(v, rt->right.tree);
+		MDEBUG(("%d: new midpoint %ld\n", t->retry, (long)OFF(mid)));
+		v->mem[t->retry] = (mid - begin) + 1;
+		zapmem(v, t->left);
+		zapmem(v, t->right);
 	}
 
 	/* satisfaction */
 	MDEBUG(("successful\n"));
 	freedfa(d);
 	freedfa(d2);
-	subset(v, &rt->right, mid, end);
-	return REG_OKAY;
-}
-
-/*
- - csindissect - determine singleton subexpression matches (with complications)
- ^ static int csindissect(struct vars *, struct rtree *, chr *, chr *);
- */
-static int			/* regexec return code */
-csindissect(v, rt, begin, end)
-struct vars *v;
-struct rtree *rt;
-chr *begin;			/* beginning of relevant substring */
-chr *end;			/* end of same */
-{
-	struct dfa *d;
-	int er;
-
-	assert(rt != NULL);
-	assert(rt->op == ',');
-	assert(rt->right.cnfa.nstates == 0);
-	MDEBUG(("csingleton %d\n", rt->no));
-
-	assert(rt->left.cnfa.nstates > 0);
-
-	/* exploit memory only to suppress repeated work in retries */
-	if (!v->mem[rt->no]) {
-		d = newdfa(v, &rt->left.cnfa, v->g->cm);
-		if (longest(v, d, begin, end) != end) {
-			freedfa(d);
-			return REG_NOMATCH;
-		}
-		freedfa(d);
-		v->mem[rt->no] = 1;
-		MDEBUG(("csingleton matched\n"));
-	}
-
-	er = cdissect(v, rt->left.tree, begin, end);
-	if (er != REG_OKAY)
-		return er;
-	subset(v, &rt->left, begin, end);
 	return REG_OKAY;
 }
 
 /*
  - cbrdissect - determine backref subexpression matches
- ^ static int cbrdissect(struct vars *, struct rtree *, chr *, chr *);
+ ^ static int cbrdissect(struct vars *, struct subre *, chr *, chr *);
  */
 static int			/* regexec return code */
-cbrdissect(v, rt, begin, end)
+cbrdissect(v, t, begin, end)
 struct vars *v;
-struct rtree *rt;
+struct subre *t;
 chr *begin;			/* beginning of relevant substring */
 chr *end;			/* end of same */
 {
 	int i;
-	int n = -rt->left.subno;
+	int n = t->subno;
 	size_t len;
 	chr *paren;
 	chr *p;
 	chr *stop;
-	int min = rt->left.min;
-	int max = rt->left.max;
+	int min = t->min;
+	int max = t->max;
 
-	assert(rt != NULL);
-	assert(rt->op == 'b');
-	assert(rt->right.cnfa.nstates == 0);
+	assert(t != NULL);
+	assert(t->op == 'b');
 	assert(n >= 0);
 	assert((size_t)n < v->nmatch);
 
-	MDEBUG(("cbackref n%d %d{%d-%d}\n", rt->no, n, min, max));
+	MDEBUG(("cbackref n%d %d{%d-%d}\n", t->retry, n, min, max));
 
 	if (v->pmatch[n].rm_so == -1)
 		return REG_NOMATCH;
@@ -786,9 +801,9 @@ chr *end;			/* end of same */
 	len = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;
 
 	/* no room to maneuver -- retries are pointless */
-	if (v->mem[rt->no])
+	if (v->mem[t->retry])
 		return REG_NOMATCH;
-	v->mem[rt->no] = 1;
+	v->mem[t->retry] = 1;
 
 	/* special-case zero-length string */
 	if (len == 0) {
@@ -822,12 +837,12 @@ chr *end;			/* end of same */
 
 /*
  - caltdissect - determine alternative subexpression matches (w. complications)
- ^ static int caltdissect(struct vars *, struct rtree *, chr *, chr *);
+ ^ static int caltdissect(struct vars *, struct subre *, chr *, chr *);
  */
 static int			/* regexec return code */
-caltdissect(v, rt, begin, end)
+caltdissect(v, t, begin, end)
 struct vars *v;
-struct rtree *rt;
+struct subre *t;
 chr *begin;			/* beginning of relevant substring */
 chr *end;			/* end of same */
 {
@@ -837,839 +852,37 @@ chr *end;			/* end of same */
 #	define	TRYING	1	/* top matched, trying submatches */
 #	define	TRIED	2	/* top didn't match or submatches exhausted */
 
-	if (rt == NULL)
+	if (t == NULL)
 		return REG_NOMATCH;
-	assert(rt->op == '|');
-	if (v->mem[rt->no] == TRIED)
-		return caltdissect(v, rt->next, begin, end);
+	assert(t->op == '|');
+	if (v->mem[t->retry] == TRIED)
+		return caltdissect(v, t->right, begin, end);
 
-	MDEBUG(("calt n%d\n", rt->no));
-	assert(rt->left.begin != NULL);
+	MDEBUG(("calt n%d\n", t->retry));
+	assert(t->left != NULL);
 
-	if (v->mem[rt->no] == UNTRIED) {
-		d = newdfa(v, &rt->left.cnfa, v->g->cm);
+	if (v->mem[t->retry] == UNTRIED) {
+		d = newdfa(v, &t->left->cnfa, &v->g->cmap);
 		if (ISERR())
 			return v->err;
 		if (longest(v, d, begin, end) != end) {
 			freedfa(d);
-			v->mem[rt->no] = TRIED;
-			return caltdissect(v, rt->next, begin, end);
+			v->mem[t->retry] = TRIED;
+			return caltdissect(v, t->right, begin, end);
 		}
 		freedfa(d);
 		MDEBUG(("calt matched\n"));
-		v->mem[rt->no] = TRYING;
+		v->mem[t->retry] = TRYING;
 	}
 
-	er = cdissect(v, rt->left.tree, begin, end);
-	if (er == REG_OKAY) {
-		subset(v, &rt->left, begin, end);
-		return REG_OKAY;
-	}
+	er = cdissect(v, t->left, begin, end);
 	if (er != REG_NOMATCH)
 		return er;
 
-	v->mem[rt->no] = TRIED;
-	return caltdissect(v, rt->next, begin, end);
-}
-
-/*
- - dismatch - determine overall match using top-level dissection
- * The retry memory stores the offset of the trial midpoint from begin, 
- * plus 1 so that 0 uniquely means "clean slate".
- ^ static chr *dismatch(struct vars *, struct rtree *, chr *, chr *);
- */
-static chr *			/* endpoint, or NULL */
-dismatch(v, rt, begin, end)
-struct vars *v;
-struct rtree *rt;
-chr *begin;			/* beginning of relevant substring */
-chr *end;			/* end of same */
-{
-	struct dfa *d;
-	struct dfa *d2;
-	chr *mid;
-	chr *ret;
-
-	if (rt == NULL)
-		return begin;
-	MDEBUG(("dsubstr %ld-%ld\n", (long)OFF(begin), (long)OFF(end)));
-
-	/* punt various cases to auxiliaries */
-	if (rt->right.cnfa.nstates == 0)	/* no RHS */
-		return dismsin(v, rt, begin, end);
-	if (rt->left.prefer == SHORTER)		/* reverse scan */
-		return dismrev(v, rt, begin, end);
-
-	/* concatenation -- need to split the substring between parts */
-	assert(rt->op == ',');
-	assert(rt->left.cnfa.nstates > 0);
-	assert(rt->right.cnfa.nstates > 0);
-	d = newdfa(v, &rt->left.cnfa, v->g->cm);
-	if (ISERR())
-		return NULL;
-	d2 = newdfa(v, &rt->right.cnfa, v->g->cm);
-	if (ISERR()) {
-		freedfa(d);
-		return NULL;
-	}
-	MDEBUG(("dconcat %d\n", rt->no));
-
-	/* pick a tentative midpoint */
-	if (v->mem[rt->no] == 0) {
-		mid = longest(v, d, begin, end);
-		if (mid == NULL) {
-			freedfa(d);
-			freedfa(d2);
-			return NULL;
-		}
-		MDEBUG(("tentative midpoint %ld\n", (long)OFF(mid)));
-		v->mem[rt->no] = (mid - begin) + 1;
-	} else {
-		mid = begin + (v->mem[rt->no] - 1);
-		MDEBUG(("working midpoint %ld\n", (long)OFF(mid)));
-	}
-
-	/* iterate until satisfaction or failure */
-	for (;;) {
-		/* try this midpoint on for size */
-		if (rt->right.tree == NULL || rt->right.tree->op == 'b') {
-			if (rt->right.prefer == LONGER)
-				ret = longest(v, d2, mid, end);
-			else
-				ret = shortest(v, d2, mid, mid, end);
-		} else {
-			if (longest(v, d2, mid, end) != NULL)
-				ret = dismatch(v, rt->right.tree, mid, end);
-			else
-				ret = NULL;
-		}
-		if (ret != NULL)
-			break;			/* NOTE BREAK OUT */
-
-		/* that midpoint didn't work, find a new one */
-		if (mid == begin) {
-			/* all possibilities exhausted */
-			MDEBUG(("%d no midpoint\n", rt->no));
-			freedfa(d);
-			freedfa(d2);
-			return NULL;
-		}
-		mid = longest(v, d, begin, mid-1);
-		if (mid == NULL) {
-			/* failed to find a new one */
-			MDEBUG(("%d failed midpoint\n", rt->no));
-			freedfa(d);
-			freedfa(d2);
-			return NULL;
-		}
-		MDEBUG(("%d: new midpoint %ld\n", rt->no, (long)OFF(mid)));
-		v->mem[rt->no] = (mid - begin) + 1;
-		zapmem(v, rt->right.tree);
-	}
-
-	/* satisfaction */
-	MDEBUG(("successful\n"));
-	freedfa(d);
-	freedfa(d2);
-	return ret;
-}
-
-/*
- - dismrev - determine overall match using top-level dissection
- * The retry memory stores the offset of the trial midpoint from begin, 
- * plus 1 so that 0 uniquely means "clean slate".
- ^ static chr *dismrev(struct vars *, struct rtree *, chr *, chr *);
- */
-static chr *			/* endpoint, or NULL */
-dismrev(v, rt, begin, end)
-struct vars *v;
-struct rtree *rt;
-chr *begin;			/* beginning of relevant substring */
-chr *end;			/* end of same */
-{
-	struct dfa *d;
-	struct dfa *d2;
-	chr *mid;
-	chr *ret;
-
-	if (rt == NULL)
-		return begin;
-	MDEBUG(("rsubstr %ld-%ld\n", (long)OFF(begin), (long)OFF(end)));
-
-	/* concatenation -- need to split the substring between parts */
-	assert(rt->op == ',');
-	assert(rt->left.cnfa.nstates > 0);
-	assert(rt->right.cnfa.nstates > 0);
-	d = newdfa(v, &rt->left.cnfa, v->g->cm);
-	if (ISERR())
-		return NULL;
-	d2 = newdfa(v, &rt->right.cnfa, v->g->cm);
-	if (ISERR()) {
-		freedfa(d);
-		return NULL;
-	}
-	MDEBUG(("dconcat %d\n", rt->no));
-
-	/* pick a tentative midpoint */
-	if (v->mem[rt->no] == 0) {
-		mid = shortest(v, d, begin, begin, end);
-		if (mid == NULL) {
-			freedfa(d);
-			freedfa(d2);
-			return NULL;
-		}
-		MDEBUG(("tentative midpoint %ld\n", (long)OFF(mid)));
-		v->mem[rt->no] = (mid - begin) + 1;
-	} else {
-		mid = begin + (v->mem[rt->no] - 1);
-		MDEBUG(("working midpoint %ld\n", (long)OFF(mid)));
-	}
-
-	/* iterate until satisfaction or failure */
-	for (;;) {
-		/* try this midpoint on for size */
-		if (rt->right.tree == NULL || rt->right.tree->op == 'b') {
-			if (rt->right.prefer == LONGER)
-				ret = longest(v, d2, mid, end);
-			else
-				ret = shortest(v, d2, mid, mid, end);
-		} else {
-			if (longest(v, d2, mid, end) != NULL)
-				ret = dismatch(v, rt->right.tree, mid, end);
-			else
-				ret = NULL;
-		}
-		if (ret != NULL)
-			break;			/* NOTE BREAK OUT */
-
-		/* that midpoint didn't work, find a new one */
-		if (mid == end) {
-			/* all possibilities exhausted */
-			MDEBUG(("%d no midpoint\n", rt->no));
-			freedfa(d);
-			freedfa(d2);
-			return NULL;
-		}
-		mid = shortest(v, d, begin, mid+1, end);
-		if (mid == NULL) {
-			/* failed to find a new one */
-			MDEBUG(("%d failed midpoint\n", rt->no));
-			freedfa(d);
-			freedfa(d2);
-			return NULL;
-		}
-		MDEBUG(("%d: new midpoint %ld\n", rt->no, (long)OFF(mid)));
-		v->mem[rt->no] = (mid - begin) + 1;
-		zapmem(v, rt->right.tree);
-	}
-
-	/* satisfaction */
-	MDEBUG(("successful\n"));
-	freedfa(d);
-	freedfa(d2);
-	return ret;
-}
-
-/*
- - dismsin - determine singleton subexpression matches (with complications)
- ^ static chr *dismsin(struct vars *, struct rtree *, chr *, chr *);
- */
-static chr *
-dismsin(v, rt, begin, end)
-struct vars *v;
-struct rtree *rt;
-chr *begin;			/* beginning of relevant substring */
-chr *end;			/* end of same */
-{
-	struct dfa *d;
-	chr *ret;
-
-	assert(rt != NULL);
-	assert(rt->op == ',');
-	assert(rt->right.cnfa.nstates == 0);
-	MDEBUG(("dsingleton %d\n", rt->no));
-
-	assert(rt->left.cnfa.nstates > 0);
-
-	/* retries are pointless */
-	if (v->mem[rt->no])
-		return NULL;
-	v->mem[rt->no] = 1;
-
-	d = newdfa(v, &rt->left.cnfa, v->g->cm);
-	if (d == NULL)
-		return NULL;
-	if (rt->left.prefer == LONGER)
-		ret = longest(v, d, begin, end);
-	else
-		ret = shortest(v, d, begin, begin, end);
-	freedfa(d);
-	if (ret != NULL)
-		MDEBUG(("dsingleton matched\n"));
-	return ret;
-}
-
-/*
- - longest - longest-preferred matching engine
- ^ static chr *longest(struct vars *, struct dfa *, chr *, chr *);
- */
-static chr *			/* endpoint, or NULL */
-longest(v, d, start, stop)
-struct vars *v;			/* used only for debug and exec flags */
-struct dfa *d;
-chr *start;			/* where the match should start */
-chr *stop;			/* match must end at or before here */
-{
-	chr *cp;
-	chr *realstop = (stop == v->stop) ? stop : stop + 1;
-	color co;
-	struct sset *css;
-	struct sset *ss;
-	chr *post;
-	int i;
-	struct colormap *cm = d->cm;
-
-	/* initialize */
-	css = initialize(v, d, start);
-	cp = start;
-
-	/* startup */
-	FDEBUG(("+++ startup +++\n"));
-	if (cp == v->start) {
-		co = d->cnfa->bos[(v->eflags&REG_NOTBOL) ? 0 : 1];
-		FDEBUG(("color %ld\n", (long)co));
-	} else {
-		co = GETCOLOR(cm, *(cp - 1));
-		FDEBUG(("char %c, color %ld\n", (char)*(cp-1), (long)co));
-	}
-	css = miss(v, d, css, co, cp, start);
-	if (css == NULL)
-		return NULL;
-	css->lastseen = cp;
-
-	/* main loop */
-	if (v->eflags&REG_FTRACE)
-		while (cp < realstop) {
-			FDEBUG(("+++ at c%d +++\n", css - d->ssets));
-			co = GETCOLOR(cm, *cp);
-			FDEBUG(("char %c, color %ld\n", (char)*cp, (long)co));
-			ss = css->outs[co];
-			if (ss == NULL) {
-				ss = miss(v, d, css, co, cp+1, start);
-				if (ss == NULL)
-					break;	/* NOTE BREAK OUT */
-			}
-			cp++;
-			ss->lastseen = cp;
-			css = ss;
-		}
-	else
-		while (cp < realstop) {
-			co = GETCOLOR(cm, *cp);
-			ss = css->outs[co];
-			if (ss == NULL) {
-				ss = miss(v, d, css, co, cp+1, start);
-				if (ss == NULL)
-					break;	/* NOTE BREAK OUT */
-			}
-			cp++;
-			ss->lastseen = cp;
-			css = ss;
-		}
-
-	/* shutdown */
-	FDEBUG(("+++ shutdown at c%d +++\n", css - d->ssets));
-	if (cp == v->stop && stop == v->stop) {
-		co = d->cnfa->eos[(v->eflags&REG_NOTEOL) ? 0 : 1];
-		FDEBUG(("color %ld\n", (long)co));
-		ss = miss(v, d, css, co, cp, start);
-		/* special case:  match ended at eol? */
-		if (ss != NULL && (ss->flags&POSTSTATE))
-			return cp;
-		else if (ss != NULL)
-			ss->lastseen = cp;	/* to be tidy */
-	}
-
-	/* find last match, if any */
-	post = d->lastpost;
-	for (ss = d->ssets, i = 0; i < d->nssused; ss++, i++)
-		if ((ss->flags&POSTSTATE) && post != ss->lastseen &&
-					(post == NULL || post < ss->lastseen))
-			post = ss->lastseen;
-	if (post != NULL)		/* found one */
-		return post - 1;
-
-	return NULL;
-}
-
-/*
- - shortest - shortest-preferred matching engine
- ^ static chr *shortest(struct vars *, struct dfa *, chr *, chr *, chr *);
- */
-static chr *			/* endpoint, or NULL */
-shortest(v, d, start, min, max)
-struct vars *v;			/* used only for debug and exec flags */
-struct dfa *d;
-chr *start;			/* where the match should start */
-chr *min;			/* match must end at or after here */
-chr *max;			/* match must end at or before here */
-{
-	chr *cp;
-	chr *realmin = (min == v->stop) ? min : min + 1;
-	chr *realmax = (max == v->stop) ? max : max + 1;
-	color co;
-	struct sset *css;
-	struct sset *ss;
-	struct colormap *cm = d->cm;
-
-	/* initialize */
-	css = initialize(v, d, start);
-	cp = start;
-
-	/* startup */
-	FDEBUG(("--- startup ---\n"));
-	if (cp == v->start) {
-		co = d->cnfa->bos[(v->eflags&REG_NOTBOL) ? 0 : 1];
-		FDEBUG(("color %ld\n", (long)co));
-	} else {
-		co = GETCOLOR(cm, *(cp - 1));
-		FDEBUG(("char %c, color %ld\n", (char)*(cp-1), (long)co));
-	}
-	css = miss(v, d, css, co, cp, start);
-	if (css == NULL)
-		return NULL;
-	css->lastseen = cp;
-	ss = css;
-
-	/* main loop */
-	if (v->eflags&REG_FTRACE)
-		while (cp < realmax) {
-			FDEBUG(("--- at c%d ---\n", css - d->ssets));
-			co = GETCOLOR(cm, *cp);
-			FDEBUG(("char %c, color %ld\n", (char)*cp, (long)co));
-			ss = css->outs[co];
-			if (ss == NULL) {
-				ss = miss(v, d, css, co, cp+1, start);
-				if (ss == NULL)
-					break;	/* NOTE BREAK OUT */
-			}
-			cp++;
-			ss->lastseen = cp;
-			css = ss;
-			if ((ss->flags&POSTSTATE) && cp >= realmin)
-				break;		/* NOTE BREAK OUT */
-		}
-	else
-		while (cp < realmax) {
-			co = GETCOLOR(cm, *cp);
-			ss = css->outs[co];
-			if (ss == NULL) {
-				ss = miss(v, d, css, co, cp+1, start);
-				if (ss == NULL)
-					break;	/* NOTE BREAK OUT */
-			}
-			cp++;
-			ss->lastseen = cp;
-			css = ss;
-			if ((ss->flags&POSTSTATE) && cp >= realmin)
-				break;		/* NOTE BREAK OUT */
-		}
-
-	if (ss == NULL)
-		return NULL;
-	if (ss->flags&POSTSTATE) {
-		assert(cp >= realmin);
-		return cp - 1;
-	}
-
-	/* shutdown */
-	FDEBUG(("--- shutdown at c%d ---\n", css - d->ssets));
-	if (cp == v->stop && max == v->stop) {
-		co = d->cnfa->eos[(v->eflags&REG_NOTEOL) ? 0 : 1];
-		FDEBUG(("color %ld\n", (long)co));
-		ss = miss(v, d, css, co, cp, start);
-		/* special case:  match ended at eol? */
-		if (ss != NULL && (ss->flags&POSTSTATE))
-			return cp;
-	}
-
-	return NULL;
-}
-
-/*
- - newdfa - set up a fresh DFA
- ^ static struct dfa *newdfa(struct vars *, struct cnfa *,
- ^ 	struct colormap *);
- */
-static struct dfa *
-newdfa(v, cnfa, cm)
-struct vars *v;
-struct cnfa *cnfa;
-struct colormap *cm;
-{
-	struct dfa *d = (struct dfa *)MALLOC(sizeof(struct dfa));
-	int wordsper = (cnfa->nstates + UBITS - 1) / UBITS;
-	struct sset *ss;
-	int i;
-
-	assert(cnfa != NULL && cnfa->nstates != 0);
-	if (d == NULL) {
-		ERR(REG_ESPACE);
-		return NULL;
-	}
-
-	d->ssets = (struct sset *)MALLOC(CACHE * sizeof(struct sset));
-	d->statesarea = (unsigned *)MALLOC((CACHE+WORK) * wordsper *
-							sizeof(unsigned));
-	d->work = &d->statesarea[CACHE * wordsper];
-	d->outsarea = (struct sset **)MALLOC(CACHE * cnfa->ncolors *
-							sizeof(struct sset *));
-	d->incarea = (struct arcp *)MALLOC(CACHE * cnfa->ncolors *
-							sizeof(struct arcp));
-	if (d->ssets == NULL || d->statesarea == NULL || d->outsarea == NULL ||
-							d->incarea == NULL) {
-		freedfa(d);
-		ERR(REG_ESPACE);
-		return NULL;
-	}
-
-	d->nssets = (v->eflags&REG_SMALL) ? 5 : CACHE;
-	d->nssused = 0;
-	d->nstates = cnfa->nstates;
-	d->ncolors = cnfa->ncolors;
-	d->wordsper = wordsper;
-	d->cnfa = cnfa;
-	d->cm = cm;
-	d->lastpost = NULL;
-	d->search = d->ssets;
-
-	for (ss = d->ssets, i = 0; i < d->nssets; ss++, i++) {
-		/* initialization of most fields is done as needed */
-		ss->states = &d->statesarea[i * d->wordsper];
-		ss->outs = &d->outsarea[i * d->ncolors];
-		ss->inchain = &d->incarea[i * d->ncolors];
-	}
-
-	return d;
-}
-
-/*
- - freedfa - free a DFA
- ^ static VOID freedfa(struct dfa *);
- */
-static VOID
-freedfa(d)
-struct dfa *d;
-{
-	if (d->ssets != NULL)
-		FREE(d->ssets);
-	if (d->statesarea != NULL)
-		FREE(d->statesarea);
-	if (d->outsarea != NULL)
-		FREE(d->outsarea);
-	if (d->incarea != NULL)
-		FREE(d->incarea);
-	FREE(d);
-}
-
-/*
- - hash - construct a hash code for a bitvector
- * There are probably better ways, but they're more expensive.
- ^ static unsigned hash(unsigned *, int);
- */
-static unsigned
-hash(uv, n)
-unsigned *uv;
-int n;
-{
-	int i;
-	unsigned h;
-
-	h = 0;
-	for (i = 0; i < n; i++)
-		h ^= uv[i];
-	return h;
-}
-
-/*
- - initialize - hand-craft a cache entry for startup, otherwise get ready
- ^ static struct sset *initialize(struct vars *, struct dfa *, chr *);
- */
-static struct sset *
-initialize(v, d, start)
-struct vars *v;			/* used only for debug flags */
-struct dfa *d;
-chr *start;
-{
-	struct sset *ss;
-	int i;
-
-	/* is previous one still there? */
-	if (d->nssused > 0 && (d->ssets[0].flags&STARTER))
-		ss = &d->ssets[0];
-	else {				/* no, must (re)build it */
-		ss = getvacant(v, d, start, start);
-		for (i = 0; i < d->wordsper; i++)
-			ss->states[i] = 0;
-		BSET(ss->states, d->cnfa->pre);
-		ss->hash = hash(ss->states, d->wordsper);
-		assert(d->cnfa->pre != d->cnfa->post);
-		ss->flags = STARTER;
-		/* lastseen dealt with below */
-	}
-
-	for (i = 0; i < d->nssused; i++)
-		d->ssets[i].lastseen = NULL;
-	ss->lastseen = start;		/* maybe untrue, but harmless */
-	d->lastpost = NULL;
-	return ss;
-}
-
-/*
- - miss - handle a cache miss
- ^ static struct sset *miss(struct vars *, struct dfa *, struct sset *,
- ^ 	pcolor, chr *, chr *);
- */
-static struct sset *		/* NULL if goes to empty set */
-miss(v, d, css, co, cp, start)
-struct vars *v;			/* used only for debug flags */
-struct dfa *d;
-struct sset *css;
-pcolor co;
-chr *cp;			/* next chr */
-chr *start;			/* where the attempt got started */
-{
-	struct cnfa *cnfa = d->cnfa;
-	int i;
-	unsigned h;
-	struct carc *ca;
-	struct sset *p;
-	int ispost;
-	int gotstate;
-	int dolacons;
-	int didlacons;
-
-	/* for convenience, we can be called even if it might not be a miss */
-	if (css->outs[co] != NULL) {
-		FDEBUG(("hit\n"));
-		return css->outs[co];
-	}
-	FDEBUG(("miss\n"));
-
-	/* first, what set of states would we end up in? */
-	for (i = 0; i < d->wordsper; i++)
-		d->work[i] = 0;
-	ispost = 0;
-	gotstate = 0;
-	for (i = 0; i < d->nstates; i++)
-		if (ISBSET(css->states, i))
-			for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++)
-				if (ca->co == co) {
-					BSET(d->work, ca->to);
-					gotstate = 1;
-					if (ca->to == cnfa->post)
-						ispost = 1;
-					FDEBUG(("%d -> %d\n", i, ca->to));
-				}
-	dolacons = (gotstate) ? (cnfa->flags&HASLACONS) : 0;
-	didlacons = 0;
-	while (dolacons) {		/* transitive closure */
-		dolacons = 0;
-		for (i = 0; i < d->nstates; i++)
-			if (ISBSET(d->work, i))
-				for (ca = cnfa->states[i]; ca->co != COLORLESS;
-									ca++)
-					if (ca->co > cnfa->ncolors &&
-						!ISBSET(d->work, ca->to) &&
-							lacon(v, cnfa, cp,
-								ca->co)) {
-						BSET(d->work, ca->to);
-						dolacons = 1;
-						didlacons = 1;
-						if (ca->to == cnfa->post)
-							ispost = 1;
-						FDEBUG(("%d :> %d\n",i,ca->to));
-					}
-	}
-	if (!gotstate)
-		return NULL;
-	h = hash(d->work, d->wordsper);
-
-	/* next, is that in the cache? */
-	for (p = d->ssets, i = d->nssused; i > 0; p++, i--)
-		if (p->hash == h && memcmp(VS(d->work), VS(p->states),
-					d->wordsper*sizeof(unsigned)) == 0) {
-			FDEBUG(("cached c%d\n", p - d->ssets));
-			break;			/* NOTE BREAK OUT */
-		}
-	if (i == 0) {		/* nope, need a new cache entry */
-		p = getvacant(v, d, cp, start);
-		assert(p != css);
-		for (i = 0; i < d->wordsper; i++)
-			p->states[i] = d->work[i];
-		p->hash = h;
-		p->flags = (ispost) ? POSTSTATE : 0;
-		/* lastseen to be dealt with by caller */
-	}
-
-	if (!didlacons) {		/* lookahead conds. always cache miss */
-		css->outs[co] = p;
-		css->inchain[co] = p->ins;
-		p->ins.ss = css;
-		p->ins.co = (color)co;
-	}
-	return p;
+	v->mem[t->retry] = TRIED;
+	return caltdissect(v, t->right, begin, end);
 }
 
-/*
- - lacon - lookahead-constraint checker for miss()
- ^ static int lacon(struct vars *, struct cnfa *, chr *, pcolor);
- */
-static int			/* predicate:  constraint satisfied? */
-lacon(v, pcnfa, cp, co)
-struct vars *v;
-struct cnfa *pcnfa;		/* parent cnfa */
-chr *cp;
-pcolor co;			/* "color" of the lookahead constraint */
-{
-	int n;
-	struct subre *sub;
-	struct dfa *d;
-	chr *end;
 
-	n = co - pcnfa->ncolors;
-	assert(n < v->g->nlacons && v->g->lacons != NULL);
-	FDEBUG(("=== testing lacon %d\n", n));
-	sub = &v->g->lacons[n];
-	d = newdfa(v, &sub->cnfa, v->g->cm);
-	if (d == NULL) {
-		ERR(REG_ESPACE);
-		return 0;
-	}
-	end = longest(v, d, cp, v->stop);
-	freedfa(d);
-	FDEBUG(("=== lacon %d match %d\n", n, (end != NULL)));
-	return (sub->subno) ? (end != NULL) : (end == NULL);
-}
 
-/*
- - getvacant - get a vacant state set
- * This routine clears out the inarcs and outarcs, but does not otherwise
- * clear the innards of the state set -- that's up to the caller.
- ^ static struct sset *getvacant(struct vars *, struct dfa *, chr *, chr *);
- */
-static struct sset *
-getvacant(v, d, cp, start)
-struct vars *v;			/* used only for debug flags */
-struct dfa *d;
-chr *cp;
-chr *start;
-{
-	int i;
-	struct sset *ss;
-	struct sset *p;
-	struct arcp ap;
-	struct arcp lastap;
-	color co;
-
-	ss = pickss(v, d, cp, start);
-	assert(!(ss->flags&LOCKED));
-
-	/* clear out its inarcs, including self-referential ones */
-	ap = ss->ins;
-	while ((p = ap.ss) != NULL) {
-		co = ap.co;
-		FDEBUG(("zapping c%d's %ld outarc\n", p - d->ssets, (long)co));
-		p->outs[co] = NULL;
-		ap = p->inchain[co];
-		p->inchain[co].ss = NULL;	/* paranoia */
-	}
-	ss->ins.ss = NULL;
-
-	/* take it off the inarc chains of the ssets reached by its outarcs */
-	for (i = 0; i < d->ncolors; i++) {
-		p = ss->outs[i];
-		assert(p != ss);		/* not self-referential */
-		if (p == NULL)
-			continue;		/* NOTE CONTINUE */
-		FDEBUG(("del outarc %d from c%d's in chn\n", i, p - d->ssets));
-		if (p->ins.ss == ss && p->ins.co == i)
-			p->ins = ss->inchain[i];
-		else {
-			assert(p->ins.ss != NULL);
-			for (ap = p->ins; ap.ss != NULL &&
-						!(ap.ss == ss && ap.co == i);
-						ap = ap.ss->inchain[ap.co])
-				lastap = ap;
-			assert(ap.ss != NULL);
-			lastap.ss->inchain[lastap.co] = ss->inchain[i];
-		}
-		ss->outs[i] = NULL;
-		ss->inchain[i].ss = NULL;
-	}
-
-	/* if ss was a success state, may need to remember location */
-	if ((ss->flags&POSTSTATE) && ss->lastseen != d->lastpost &&
-			(d->lastpost == NULL || d->lastpost < ss->lastseen))
-		d->lastpost = ss->lastseen;
-
-	return ss;
-}
-
-/*
- - pickss - pick the next stateset to be used
- ^ static struct sset *pickss(struct vars *, struct dfa *, chr *, chr *);
- */
-static struct sset *
-pickss(v, d, cp, start)
-struct vars *v;			/* used only for debug flags */
-struct dfa *d;
-chr *cp;
-chr *start;
-{
-	int i;
-	struct sset *ss;
-	struct sset *end;
-	chr *ancient;
-
-	/* shortcut for cases where cache isn't full */
-	if (d->nssused < d->nssets) {
-		ss = &d->ssets[d->nssused];
-		d->nssused++;
-		FDEBUG(("new c%d\n", ss - d->ssets));
-		/* must make innards consistent */
-		ss->ins.ss = NULL;
-		for (i = 0; i < d->ncolors; i++) {
-			ss->outs[i] = NULL;
-			ss->inchain[i].ss = NULL;
-		}
-		ss->flags = 0;
-		return ss;
-	}
-
-	/* look for oldest, or old enough anyway */
-	if (cp - start > d->nssets*3/4)		/* oldest 25% are expendable */
-		ancient = cp - d->nssets*3/4;
-	else
-		ancient = start;
-	for (ss = d->search, end = &d->ssets[d->nssets]; ss < end; ss++)
-		if ((ss->lastseen == NULL || ss->lastseen < ancient) &&
-							!(ss->flags&LOCKED)) {
-			d->search = ss + 1;
-			FDEBUG(("replacing c%d\n", ss - d->ssets));
-			return ss;
-		}
-	for (ss = d->ssets, end = d->search; ss < end; ss++)
-		if ((ss->lastseen == NULL || ss->lastseen < ancient) &&
-							!(ss->flags&LOCKED)) {
-			d->search = ss + 1;
-			FDEBUG(("replacing c%d\n", ss - d->ssets));
-			return ss;
-		}
-
-	/* nobody's old enough?!? -- something's really wrong */
-	FDEBUG(("can't find victim to replace!\n"));
-	assert(NOTREACHED);
-	ERR(REG_ASSERT);
-	return d->ssets;
-}
+#include "rege_dfa.c"
diff --git a/generic/regguts.h b/generic/regguts.h
index 93d0eec..5f9a0ab 100644
--- a/generic/regguts.h
+++ b/generic/regguts.h
@@ -96,7 +96,9 @@
  * debugging facilities
  */
 #ifdef REG_DEBUG
+/* FDEBUG does finite-state tracing */
 #define	FDEBUG(arglist)	{ if (v->eflags&REG_FTRACE) printf arglist; }
+/* MDEBUG does higher-level tracing */
 #define	MDEBUG(arglist)	{ if (v->eflags&REG_MTRACE) printf arglist; }
 #else
 #define	FDEBUG(arglist)	{}
@@ -115,15 +117,6 @@
 
 
 /*
- * Map a truth value into -1 for false, 1 for true.  This is so it is
- * possible to write compile-time assertions by declaring a dummy array
- * of this size.  (Why not #if?  Because sizeof is not available there.)
- */
-#define	NEGIFNOT(x)	(2*!!(x) - 1)		/* !! ensures 0 or 1 */
-
-
-
-/*
  * We dissect a chr into byts for colormap table indexing.  Here we define
  * a byt, which will be the same as a byte on most machines...  The exact
  * size of a byt is not critical, but about 8 bits is good, and extraction
@@ -154,10 +147,10 @@ typedef int pcolor;		/* what color promotes to */
  * A colormap is a tree -- more precisely, a DAG -- indexed at each level
  * by a byt of the chr, to map the chr to a color efficiently.  Because
  * lower sections of the tree can be shared, it can exploit the usual
- * sparseness of such a mapping table.  The final tree is always NBYTS
- * levels deep (at present it may be shallower during construction, but
- * it is always "filled" to full depth at the end of that, using pointers
- * to "fill blocks" which are entirely WHITE in color).
+ * sparseness of such a mapping table.  The tree is always NBYTS levels
+ * deep (in the past it was shallower during construction but was "filled"
+ * to full depth at the end of that); areas that are unaltered as yet point
+ * to "fill blocks" which are entirely WHITE in color.
  */
 
 /* the tree itself */
@@ -177,12 +170,14 @@ union tree {
 /* internal per-color structure for the color machinery */
 struct colordesc {
 	uchr nchrs;		/* number of chars of this color */
-	color sub;		/* open subcolor of this one, or NOSUB */
+	color sub;		/* open subcolor (if any); free chain ptr */
 #		define	NOSUB	COLORLESS
 	struct arc *arcs;	/* color chain */
-#	define	UNUSEDCOLOR(cd)	((cd)->nchrs == 0 && (cd)->sub == NOSUB)
 	int flags;
-#		define	PSEUDO	1	/* pseudocolor, no real chars */
+#		define	FREECOL	01	/* currently free */
+#		define	PSEUDO	02	/* pseudocolor, no real chars */
+#	define	UNUSEDCOLOR(cd)	((cd)->flags&FREECOL)
+	union tree *block;	/* block of solid color, if any */
 };
 
 /* the color map itself */
@@ -190,13 +185,13 @@ struct colormap {
 	int magic;
 #		define	CMMAGIC	0x876
 	struct vars *v;			/* for compile error reporting */
-	color rest;
-	int filled;			/* has it been filled? */
 	size_t ncds;			/* number of colordescs */
+	size_t max;			/* highest in use */
+	color free;			/* beginning of free chain (if non-0) */
 	struct colordesc *cd;
-#	define	CDEND(cm)	(&(cm)->cd[(cm)->ncds])
+#	define	CDEND(cm)	(&(cm)->cd[(cm)->max + 1])
 #		define	NINLINECDS	((size_t)10)
-	struct colordesc cds[NINLINECDS];
+	struct colordesc cdspace[NINLINECDS];
 	union tree tree[NBYTS];		/* tree top, plus fill blocks */
 };
 
@@ -208,6 +203,7 @@ struct colormap {
 #if NBYTS == 1
 #define	GETCOLOR(cm, c)	((cm)->tree->tcolor[B0(c)])
 #endif
+/* beware, for NBYTS>1, GETCOLOR() is unsafe -- 2nd arg used repeatedly */
 #if NBYTS == 2
 #define	GETCOLOR(cm, c)	((cm)->tree->tptr[B1(c)]->tcolor[B0(c)])
 #endif
@@ -326,34 +322,35 @@ struct cnfa {
 
 
 /*
- * definitions for subexpression tree
- * The intrepid code-reader is hereby warned that the subexpression tree
- * is kludge piled upon kludge, and is badly in need of rethinking.  Do
- * not expect it to look clean and sensible.
+ * subexpression tree
  */
 struct subre {
-	struct state *begin;	/* outarcs from here... */
-	struct state *end;	/* ...ending in inarcs here */
-	int prefer;		/* match preference */
-#		define	NONEYET		00
-#		define	LONGER		01
-#		define	SHORTER		02
-	int subno;		/* subexpression number (0 none, <0 backref) */
+	char op;		/* '|', '.' (concat), 'b' (backref), '(', '=' */
+	char flags;
+#		define	LONGER	01	/* prefers longer match */
+#		define	SHORTER	02	/* prefers shorter match */
+#		define	MIXED	04	/* mixed preference below */
+#		define	CAP	010	/* capturing parens below */
+#		define	BACKR	020	/* back reference below */
+#		define	INUSE	0100	/* in use in final tree */
+#		define	LOCAL	03	/* bits which may not propagate up */
+#		define	LMIX(f)	((f)<<2)	/* LONGER -> MIXED */
+#		define	SMIX(f)	((f)<<1)	/* SHORTER -> MIXED */
+#		define	UP(f)	(((f)&~LOCAL) | (LMIX(f) & SMIX(f) & MIXED))
+#		define	MESSY(f)	((f)&(MIXED|CAP|BACKR))
+#		define	PREF(f)	((f)&LOCAL)
+#		define	PREF2(f1, f2)	((PREF(f1) != 0) ? PREF(f1) : PREF(f2))
+#		define	COMBINE(f1, f2)	(UP((f1)|(f2)) | PREF2(f1, f2))
+	short retry;		/* index into retry memory */
+	int subno;		/* subexpression number (for 'b' and '(') */
 	short min;		/* min repetitions, for backref only */
 	short max;		/* max repetitions, for backref only */
-	struct rtree *tree;	/* substructure, if any */
+	struct subre *left;	/* left child, if any (also freelist chain) */
+	struct subre *right;	/* right child, if any */
+	struct state *begin;	/* outarcs from here... */
+	struct state *end;	/* ...ending in inarcs here */
 	struct cnfa cnfa;	/* compacted NFA, if any */
-};
-
-struct rtree {
-	char op;		/* operator:  '|', ',' */
-	char flags;
-#		define	INUSE	01	/* in use in the tree */
-	short no;		/* index into retry memory */
-	struct subre left;
-	struct rtree *next;	/* for '|' */
-	struct subre right;	/* for ',' */
-	struct rtree *chain;	/* for bookkeeping and error cleanup */
+	struct subre *chain;	/* for bookkeeping and error cleanup */
 };
 
 
@@ -377,12 +374,13 @@ struct guts {
 	int cflags;		/* copy of compile flags */
 	int info;		/* copy of re_info */
 	size_t nsub;		/* copy of re_nsub */
-	struct cnfa cnfa;
-	struct rtree *tree;
+	struct subre *tree;
+	struct cnfa search;	/* for fast preliminary search */
 	int ntree;
-	struct colormap *cm;
+	struct colormap cmap;
 	int FUNCPTR(compare, (CONST chr *, CONST chr *, size_t));
 	struct subre *lacons;	/* lookahead-constraint vector */
 	int nlacons;		/* size of lacons */
 	int usedshorter;	/* used non-greedy quantifiers? */
+	int unmatchable;	/* cannot match anything? */
 };
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index 9f46efc..1a1cf2a 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -12,7 +12,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclCmdMZ.c,v 1.1.2.4 1998/10/21 20:40:05 stanton Exp $
+ * RCS: @(#) $Id: tclCmdMZ.c,v 1.1.2.5 1998/11/11 01:44:52 stanton Exp $
  */
 
 #include "tclInt.h"
@@ -90,7 +90,9 @@ Tcl_PwdObjCmd(dummy, interp, objc, objv)
  * Tcl_RegexpObjCmd --
  *
  *	This procedure is invoked to process the "regexp" Tcl command.
- *	See the user documentation for details on what it does.
+ *	See the user documentation for details on what it does.  The
+ *	REGEXP_TEST stuff is to minimize code differences between this
+ *	and the "testregexp" command.
  *
  * Results:
  *	A standard Tcl result.
@@ -117,12 +119,24 @@ Tcl_RegexpObjCmd(dummy, interp, objc, objv)
     Tcl_UniChar *wStart;
     static char *options[] = {
 	"-indices",	"-nocase",	"-about",	"-expanded",
-	"-unsupported0",	"--",		(char *) NULL
+	"-line",	"-linestop",	"-lineanchor",
+#ifdef REGEXP_TEST
+	"-xflags",
+#endif
+	"--",		(char *) NULL
     };
     enum options {
 	REGEXP_INDICES, REGEXP_NOCASE,	REGEXP_ABOUT,	REGEXP_EXPANDED,
-	REGEXP_XFLAGS,		REGEXP_LAST
+	REGEXP_MULTI,	REGEXP_NOCROSS,	REGEXP_NEWL,
+#ifdef REGEXP_TEST
+	REGEXP_XFLAGS,
+#endif
+	REGEXP_LAST
     };
+#ifndef REGEXP_TEST
+#   define REGEXP_XFLAGS	-1	/* impossible value */
+#   define TclRegXflags(a,b,c,d)	/* do nothing */
+#endif
 
     indices = 0;
     about = 0;
@@ -159,6 +173,18 @@ Tcl_RegexpObjCmd(dummy, interp, objc, objv)
 		cflags |= REG_EXPANDED;
 		break;
 	    }
+	    case REGEXP_MULTI: {
+		cflags |= REG_NEWLINE;
+		break;
+	    }
+	    case REGEXP_NOCROSS: {
+		cflags |= REG_NLSTOP;
+		break;
+	    }
+	    case REGEXP_NEWL: {
+		cflags |= REG_NLANCH;
+		break;
+	    }
 	    case REGEXP_XFLAGS: {
 		hasxflags = 1;
 		break;
@@ -207,7 +233,7 @@ Tcl_RegexpObjCmd(dummy, interp, objc, objv)
     wStart = TclUtfToUniCharDString(string, stringLength, &stringBuffer);
     wLen = Tcl_DStringLength(&stringBuffer) / sizeof(Tcl_UniChar);
 
-    match = TclRegExpExecUniChar(interp, regExpr, wStart, wLen, eflags);
+    match = TclRegExpExecUniChar(interp, regExpr, wStart, wLen, objc-2, eflags);
     if (match < 0) {
 	result = TCL_ERROR;
 	goto done;
@@ -384,7 +410,7 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
 	 * so that "^" won't match.
 	 */
 
-	match = TclRegExpExecUniChar(interp, regExpr, w, wEnd - w,
+	match = TclRegExpExecUniChar(interp, regExpr, w, wEnd - w, 10,
 		((w > wStart) ? REG_NOTBOL : 0));
 	if (match < 0) {
 	    result = TCL_ERROR;
diff --git a/generic/tclRegexp.c b/generic/tclRegexp.c
index d65b19a..a4c9c37 100644
--- a/generic/tclRegexp.c
+++ b/generic/tclRegexp.c
@@ -10,7 +10,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclRegexp.c,v 1.1.2.3 1998/10/21 20:40:06 stanton Exp $
+ * RCS: @(#) $Id: tclRegexp.c,v 1.1.2.4 1998/11/11 01:44:53 stanton Exp $
  */
 
 #include "tclInt.h"
@@ -22,60 +22,42 @@
  * The routines in this file use Henry Spencer's regular expression
  * package contained in the following additional source files:
  *
- *	chr.h		tclRegexp.h	lex.c
- *	guts.h		color.c		locale.c
- *	wchar.h		compile.c	nfa.c
- *	wctype.h	exec.c
- *
- *	Copyright (c) 1986 by University of Toronto.
- *	Written by Henry Spencer.  Not derived from licensed software.
- *
- *	Permission is granted to anyone to use this software for any
- *	purpose on any computer system, and to redistribute it freely,
- *	subject to the following restrictions:
- *
- *	1. The author is not responsible for the consequences of use of
- *		this software, no matter how awful, even if they arise
- *		from defects in it.
- *
- *	2. The origin of this software must not be misrepresented, either
- *		by explicit claim or by omission.
- *
- *	3. Altered versions must be plainly marked as such, and must not
- *		be misrepresented as being the original software.
- *
- * Beware that some of this code is subtly aware of the way operator
- * precedence is structured in regular expressions.  Serious changes in
- * regular-expression syntax might require a total rethink.
+ *	regc_color.c	regc_cvec.c	regc_lex.c
+ *	regc_nfa.c	regcomp.c	regcustom.h
+ *	rege_dfa.c	regerror.c	regerrs.h
+ *	regex.h		regexec.c	regfree.c
+ *	regfronts.c	regguts.h
+ *
+ * Copyright (c) 1998 Henry Spencer.  All rights reserved.
+ * 
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
+ * Corporation, none of whom are responsible for the results.  The author
+ * thanks all of them. 
+ * 
+ * Redistribution and use in source and binary forms -- with or without
+ * modification -- are permitted for any purpose, provided that
+ * redistributions in source form retain this entire copyright notice and
+ * indicate the origin and nature of any modifications.
+ * 
+ * I'd appreciate being given credit for this package in the documentation
+ * of software which uses it, but that is not a requirement.
+ * 
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * *** NOTE: this code has been altered slightly for use in Tcl: ***
- * *** 1. Use ckalloc, ckfree, and ckrealloc instead of malloc,	 ***
- * ***    free, and realloc.					 ***
- * *** 2. Add extra argument to regexp to specify the real	 ***
- * ***    start of the string separately from the start of the	 ***
- * ***    current search. This is needed to search for multiple	 ***
- * ***    matches within a string.				 ***
- * *** 3. Names have been changed, e.g. from re_comp to		 ***
+ * *** 1. Names have been changed, e.g. from re_comp to		 ***
  * ***    TclRegComp, to avoid clashes with other 		 ***
  * ***    regexp implementations used by applications. 		 ***
- * *** 4. Various lint-like things, such as casting arguments	 ***
- * ***	  in procedure calls, removing debugging code and	 ***
- * ***	  unused vars and procs.	 	 	 	 ***
- * *** 5. Removed "backward-compatibility kludges" such as	 ***
- * ***	  REG_PEND and REG_STARTEND flags, the re_endp field in	 ***
- * ***	  the regex_t struct, and the fronts.c layer.		 ***
- * *** 6. Changed char to Tcl_UniChar.				 ***
- * *** 7. Removed -DPOSIX_MISTAKE compile-time flag.		 ***
- * ***	  This is now the default.				 ***
- * *** 8. For performance considerations, created new Unicode    ***
- * ***    interfaces to avoid having to convert between UTF and  ***
- * ***	  Unicode when we already had the Unicode string.  For   ***
- * ***    example, in a "regsub -all" we were converting the     ***
- * ***    match string to Unicode N times, where N is the number ***
- * ***    of bytes in the source string.                         ***
- * *** 9. Changed/widened some of the interfaces to allow	 ***
- * ***	  explicit passing of flags so that tclTest.c could      ***
- * ***    test the full range of acceptable flags.               ***
  */
 
 /*
@@ -181,7 +163,7 @@ Tcl_RegExpCompile(interp, string)
 
     if (iPtr->patterns[NUM_REGEXPS-1] != NULL) {
 	ckfree(iPtr->patterns[NUM_REGEXPS-1]);
-	regfree(&(iPtr->regexps[NUM_REGEXPS-1]->re));
+	re_ufree(&(iPtr->regexps[NUM_REGEXPS-1]->re));
 	ckfree((char *) iPtr->regexps[NUM_REGEXPS-1]);
     }
     for (i = NUM_REGEXPS - 2; i >= 0; i--) {
@@ -249,7 +231,7 @@ Tcl_RegExpExec(interp, re, string, start)
      * Perform the regexp match.
      */
 
-    result = TclRegExpExecUniChar(interp, re, uniString, numChars,
+    result = TclRegExpExecUniChar(interp, re, uniString, numChars, -1,
 	    ((string > start) ? REG_NOTBOL : 0));
 
     Tcl_DStringFree(&stringBuffer);
@@ -324,7 +306,7 @@ Tcl_RegExpRange(re, index, startPtr, endPtr)
  */
 
 int
-TclRegExpExecUniChar(interp, re, wString, numChars, flags)
+TclRegExpExecUniChar(interp, re, wString, numChars, nmatches, flags)
     Tcl_Interp *interp;		/* Interpreter to use for error reporting. */
     Tcl_RegExp re;		/* Compiled regular expression; returned by
 				 * a previous call to Tcl_RegExpCompile() or
@@ -332,14 +314,20 @@ TclRegExpExecUniChar(interp, re, wString, numChars, flags)
     CONST Tcl_UniChar *wString;	/* String against which to match re. */
     int numChars;		/* Length of string in Tcl_UniChars (must
 				 * be >= 0). */
+    int nmatches;		/* How many subexpression matches (counting
+				 * the whole match as subexpression 0) are
+				 * of interest.  -1 means "don't know". */
     int flags;			/* Regular expression flags. */
 {
     int status;
     TclRegexp *regexpPtr = (TclRegexp *) re;
+    size_t nm = regexpPtr->re.re_nsub + 1;
+
+    if (nmatches >= 0 && (size_t) nmatches < nm)
+	nm = (size_t) nmatches;
 
     status = re_uexec(&regexpPtr->re, wString, (size_t) numChars,
-	    (rm_detail_t *)NULL,
-	    regexpPtr->re.re_nsub + 1, regexpPtr->matches, flags);
+	    (rm_detail_t *)NULL, nm, regexpPtr->matches, flags);
 
     /*
      * Check for errors.
@@ -569,6 +557,7 @@ TclRegAbout(interp, re)
 	REG_UUNPORT,		"REG_UUNPORT",
 	REG_ULOCALE,		"REG_ULOCALE",
 	REG_UEMPTYMATCH,	"REG_UEMPTYMATCH",
+	REG_UIMPOSSIBLE,	"REG_UIMPOSSIBLE",
 	0,			"",
     };
     struct infoname *inf;
@@ -632,12 +621,12 @@ TclRegError(interp, msg, status)
     char *p;
 
     Tcl_ResetResult(interp);
-    n = regerror(status, (regex_t *)NULL, buf, sizeof(buf));
+    n = re_uerror(status, (regex_t *)NULL, buf, sizeof(buf));
     p = (n > sizeof(buf)) ? "..." : "";
     Tcl_AppendResult(interp, msg, buf, p, NULL);
 
     sprintf(cbuf, "%d", status);
-    (VOID) regerror(REG_ITOA, (regex_t *)NULL, cbuf, sizeof(cbuf));
+    (VOID) re_uerror(REG_ITOA, (regex_t *)NULL, cbuf, sizeof(cbuf));
     Tcl_SetErrorCode(interp, "REGEXP", cbuf, buf, NULL);
 }
 
@@ -665,7 +654,7 @@ FreeRegexpInternalRep(objPtr)
 {
     TclRegexp *regexpRepPtr = (TclRegexp *) objPtr->internalRep.otherValuePtr;
 
-    regfree(&regexpRepPtr->re);
+    re_ufree(&regexpRepPtr->re);
     if (regexpRepPtr->matches) {
 	ckfree((char *) regexpRepPtr->matches);
     }
@@ -677,7 +666,7 @@ FreeRegexpInternalRep(objPtr)
  *
  * DupRegexpInternalRep --
  *
- *	It is way to hairy to copy a regular expression, so we punt
+ *	It is way too hairy to copy a regular expression, so we punt
  *	and revert the object back to a vanilla string. 
  *
  * Results:
@@ -802,100 +791,3 @@ CompileRegexp(interp, string, length, flags)
 
     return regexpPtr;
 }
-
-/*
- *---------------------------------------------------------------------------
- *
- * TclRegXflags --
- *
- *	Parse a string of extended regexp flag letters, for testing.
- *
- * Results:
- *	No return value (you're on your own for errors here).
- *
- * Side effects:
- *	Modifies *cflagsPtr, a regcomp flags word, and *eflagsPtr, a
- *	regexec flags word, as appropriate.
- *
- *----------------------------------------------------------------------
- */
-
-VOID
-TclRegXflags(string, length, cflagsPtr, eflagsPtr)
-    char *string;		/* The string of flags. */
-    int length;			/* The length of the string in bytes. */
-    int *cflagsPtr;		/* compile flags word */
-    int *eflagsPtr;		/* exec flags word */
-{
-    int i;
-    int cflags;
-    int eflags;
-
-    cflags = *cflagsPtr;
-    eflags = *eflagsPtr;
-    for (i = 0; i < length; i++) {
-	switch (string[i]) {
-	    case 'a': {
-		cflags |= REG_ADVF;
-		break;
-	    }
-	    case 'b': {
-		cflags &= ~REG_ADVANCED;
-		break;
-	    }
-	    case 'e': {
-		cflags &= ~REG_ADVANCED;
-		cflags |= REG_EXTENDED;
-		break;
-	    }
-	    case 'q': {
-		cflags &= ~REG_ADVANCED;
-		cflags |= REG_QUOTE;
-		break;
-	    }
-	    case 'i': {
-		cflags |= REG_ICASE;
-		break;
-	    }
-	    case 'o': {			/* o for opaque */
-		cflags |= REG_NOSUB;
-		break;
-	    }
-	    case 'x': {
-		cflags |= REG_EXPANDED;
-		break;
-	    }
-	    case 'p': {
-		cflags |= REG_NLSTOP;
-		break;
-	    }
-	    case 'w': {
-		cflags |= REG_NLANCH;
-		break;
-	    }
-	    case 'n': {
-		cflags |= REG_NEWLINE;
-		break;
-	    }
-	    case '+': {
-		cflags |= REG_FAKEEC;
-		break;
-	    }
-	    case '^': {
-		eflags |= REG_NOTBOL;
-		break;
-	    }
-	    case '$': {
-		eflags |= REG_NOTEOL;
-		break;
-	    }
-	    case '%': {
-		eflags |= REG_SMALL;
-		break;
-	    }
-	}
-    }
-
-    *cflagsPtr = cflags;
-    *eflagsPtr = eflags;
-}
diff --git a/generic/tclRegexp.h b/generic/tclRegexp.h
index 9e56730..cd275f2 100644
--- a/generic/tclRegexp.h
+++ b/generic/tclRegexp.h
@@ -33,13 +33,13 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclRegexp.h,v 1.1.2.3 1998/10/21 20:40:06 stanton Exp $
+ * RCS: @(#) $Id: tclRegexp.h,v 1.1.2.4 1998/11/11 01:44:54 stanton Exp $
  */
 
 #ifndef _TCLREGEXP
 #define _TCLREGEXP
 
-#include "regcustom.h"
+#include "regex.h"
 
 #ifdef BUILD_tcl
 # undef TCL_STORAGE_CLASS
@@ -78,7 +78,7 @@ EXTERN VOID		TclRegXflags _ANSI_ARGS_((char *string, int length,
 			    int *cflagsPtr, int *eflagsPtr));
 EXTERN int		TclRegExpExecUniChar _ANSI_ARGS_((Tcl_Interp *interp,
 			    Tcl_RegExp re, CONST Tcl_UniChar *uniString,
-			    int numChars, int flags));
+			    int numChars, int nmatches, int flags));
 EXTERN int		TclRegExpMatchObj _ANSI_ARGS_((Tcl_Interp *interp,
 			    char *string, Tcl_Obj *patObj));
 EXTERN void		TclRegExpRangeUniChar _ANSI_ARGS_((Tcl_RegExp re,
diff --git a/generic/tclTest.c b/generic/tclTest.c
index 2136b7c..b408fee 100644
--- a/generic/tclTest.c
+++ b/generic/tclTest.c
@@ -12,13 +12,14 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclTest.c,v 1.1.2.3 1998/10/21 20:40:07 stanton Exp $
+ * RCS: @(#) $Id: tclTest.c,v 1.1.2.4 1998/11/11 01:44:54 stanton Exp $
  */
 
 #define TCL_TEST
 
 #include "tclInt.h"
 #include "tclPort.h"
+#include "tclRegexp.h"
 #include <locale.h>
 
 /*
@@ -244,6 +245,11 @@ static int		TestparsevarObjCmd _ANSI_ARGS_((ClientData dummy,
 static int		TestparsevarnameObjCmd _ANSI_ARGS_((ClientData dummy,
 			    Tcl_Interp *interp, int objc,
 			    Tcl_Obj *CONST objv[]));
+static int		TestregexpObjCmd _ANSI_ARGS_((ClientData dummy,
+			    Tcl_Interp *interp, int objc,
+			    Tcl_Obj *CONST objv[]));
+static void		TestregexpXflags _ANSI_ARGS_((char *string,
+			    int length, int *cflagsPtr, int *eflagsPtr));
 static int		TestsaveresultCmd _ANSI_ARGS_((ClientData dummy,
 			    Tcl_Interp *interp, int objc,
 			    Tcl_Obj *CONST objv[]));
@@ -394,6 +400,8 @@ Tcltest_Init(interp)
 	    (ClientData) 0, (Tcl_CmdDeleteProc *) NULL);
     Tcl_CreateObjCommand(interp, "testparsevarname", TestparsevarnameObjCmd,
 	    (ClientData) 0, (Tcl_CmdDeleteProc *) NULL);
+    Tcl_CreateObjCommand(interp, "testregexp", TestregexpObjCmd,
+	    (ClientData) 0, (Tcl_CmdDeleteProc *) NULL);
     Tcl_CreateObjCommand(interp, "testsaveresult", TestsaveresultCmd,
 	    (ClientData) 0, (Tcl_CmdDeleteProc *) NULL);
     Tcl_CreateCommand(interp, "testsetassocdata", TestsetassocdataCmd,
@@ -2501,6 +2509,317 @@ TestparsevarnameObjCmd(clientData, interp, objc, objv)
 /*
  *----------------------------------------------------------------------
  *
+ * TestregexpObjCmd --
+ *
+ *	This procedure implements the "testregexp" command. It is
+ *	used to give a direct interface for regexp flags.  It's identical
+ *	to Tcl_RegexpObjCmd except for the REGEXP_TEST define, which
+ *	enables the -xflags option.
+ *
+ * Results:
+ *	A standard Tcl result.
+ *
+ * Side effects:
+ *	See the user documentation.
+ *
+ *----------------------------------------------------------------------
+ */
+
+	/* ARGSUSED */
+int
+TestregexpObjCmd(dummy, interp, objc, objv)
+    ClientData dummy;			/* Not used. */
+    Tcl_Interp *interp;			/* Current interpreter. */
+    int objc;				/* Number of arguments. */
+    Tcl_Obj *CONST objv[];		/* Argument objects. */
+{
+    int i, result, indices, stringLength, wLen, match, about;
+    int hasxflags, cflags, eflags;
+    Tcl_RegExp regExpr;
+    char *string;
+    Tcl_DString stringBuffer, valueBuffer;
+    Tcl_UniChar *wStart;
+#   define	REGEXP_TEST	/* yes */
+    static char *options[] = {
+	"-indices",	"-nocase",	"-about",	"-expanded",
+	"-line",	"-linestop",	"-lineanchor",
+#ifdef REGEXP_TEST
+	"-xflags",
+#endif
+	"--",		(char *) NULL
+    };
+    enum options {
+	REGEXP_INDICES, REGEXP_NOCASE,	REGEXP_ABOUT,	REGEXP_EXPANDED,
+	REGEXP_MULTI,	REGEXP_NOCROSS,	REGEXP_NEWL,
+#ifdef REGEXP_TEST
+	REGEXP_XFLAGS,
+#endif
+	REGEXP_LAST
+    };
+#ifndef REGEXP_TEST
+#   define REGEXP_XFLAGS	-1	/* impossible value */
+#   define TestregexpXflags(a,b,c,d)	/* do nothing */
+#endif
+
+    indices = 0;
+    about = 0;
+    cflags = REG_ADVANCED;
+    eflags = 0;
+    hasxflags = 0;
+    
+    for (i = 1; i < objc; i++) {
+	char *name;
+	int index;
+
+	name = Tcl_GetString(objv[i]);
+	if (name[0] != '-') {
+	    break;
+	}
+	if (Tcl_GetIndexFromObj(interp, objv[i], options, "switch", TCL_EXACT,
+		&index) != TCL_OK) {
+	    return TCL_ERROR;
+	}
+	switch ((enum options) index) {
+	    case REGEXP_INDICES: {
+		indices = 1;
+		break;
+	    }
+	    case REGEXP_NOCASE: {
+		cflags |= REG_ICASE;
+		break;
+	    }
+	    case REGEXP_ABOUT: {
+		about = 1;
+		break;
+	    }
+	    case REGEXP_EXPANDED: {
+		cflags |= REG_EXPANDED;
+		break;
+	    }
+	    case REGEXP_MULTI: {
+		cflags |= REG_NEWLINE;
+		break;
+	    }
+	    case REGEXP_NOCROSS: {
+		cflags |= REG_NLSTOP;
+		break;
+	    }
+	    case REGEXP_NEWL: {
+		cflags |= REG_NLANCH;
+		break;
+	    }
+	    case REGEXP_XFLAGS: {
+		hasxflags = 1;
+		break;
+	    }
+	    case REGEXP_LAST: {
+		i++;
+		goto endOfForLoop;
+	    }
+	}
+    }
+
+    endOfForLoop:
+    if (objc - i < hasxflags + 2 - about) {
+	Tcl_WrongNumArgs(interp, 1, objv,
+		"?switches? exp string ?matchVar? ?subMatchVar subMatchVar ...?");
+	return TCL_ERROR;
+    }
+    objc -= i;
+    objv += i;
+
+    if (hasxflags) {
+	string = Tcl_GetStringFromObj(objv[0], &stringLength);
+	TestregexpXflags(string, stringLength, &cflags, &eflags);
+	objc--;
+	objv++;
+    }
+
+    regExpr = TclRegCompObj(interp, objv[0], cflags);
+    if (regExpr == NULL) {
+	return TCL_ERROR;
+    }
+
+    if (about) {
+	if (TclRegAbout(interp, regExpr) < 0) {
+	    return TCL_ERROR;
+	}
+	return TCL_OK;
+    }
+
+    result = TCL_OK;
+    string = Tcl_GetStringFromObj(objv[1], &stringLength);
+
+    Tcl_DStringInit(&valueBuffer);
+    
+    Tcl_DStringInit(&stringBuffer);
+    wStart = TclUtfToUniCharDString(string, stringLength, &stringBuffer);
+    wLen = Tcl_DStringLength(&stringBuffer) / sizeof(Tcl_UniChar);
+
+    match = TclRegExpExecUniChar(interp, regExpr, wStart, wLen, objc-2, eflags);
+    if (match < 0) {
+	result = TCL_ERROR;
+	goto done;
+    }
+    if (match == 0) {
+	/*
+	 * Set the interpreter's object result to an integer object w/ value 0. 
+	 */
+	
+	Tcl_SetIntObj(Tcl_GetObjResult(interp), 0);
+	goto done;
+    }
+
+    /*
+     * If additional variable names have been specified, return
+     * index information in those variables.
+     */
+
+    objc -= 2;
+    objv += 2;
+
+    for (i = 0; i < objc; i++) {
+	char *varName, *value;
+	int start, end;
+	
+	varName = Tcl_GetString(objv[i]);
+
+	TclRegExpRangeUniChar(regExpr, i, &start, &end);
+	if (start < 0) {
+	    if (indices) {
+		value = Tcl_SetVar(interp, varName, "-1 -1", 0);
+	    } else {
+		value = Tcl_SetVar(interp, varName, "", 0);
+	    }
+	} else {
+	    if (indices) {
+		char info[TCL_INTEGER_SPACE * 2];
+		
+		sprintf(info, "%d %d", start, end - 1);
+		value = Tcl_SetVar(interp, varName, info, 0);
+	    } else {
+		value = TclUniCharToUtfDString(wStart + start, end - start,
+			&valueBuffer);
+		value = Tcl_SetVar(interp, varName, value, 0);
+		Tcl_DStringSetLength(&valueBuffer, 0);
+	    }
+	}
+	if (value == NULL) {
+	    Tcl_AppendResult(interp, "couldn't set variable \"",
+		    varName, "\"", (char *) NULL);
+	    result = TCL_ERROR;
+	    goto done;
+	}
+    }
+
+    /*
+     * Set the interpreter's object result to an integer object w/ value 1. 
+     */
+	
+    Tcl_SetIntObj(Tcl_GetObjResult(interp), 1);
+
+    done:
+    Tcl_DStringFree(&stringBuffer);
+    Tcl_DStringFree(&valueBuffer);
+    return result;
+}
+
+/*
+ *---------------------------------------------------------------------------
+ *
+ * TestregexpXflags --
+ *
+ *	Parse a string of extended regexp flag letters, for testing.
+ *
+ * Results:
+ *	No return value (you're on your own for errors here).
+ *
+ * Side effects:
+ *	Modifies *cflagsPtr, a regcomp flags word, and *eflagsPtr, a
+ *	regexec flags word, as appropriate.
+ *
+ *----------------------------------------------------------------------
+ */
+
+VOID
+TestregexpXflags(string, length, cflagsPtr, eflagsPtr)
+    char *string;		/* The string of flags. */
+    int length;			/* The length of the string in bytes. */
+    int *cflagsPtr;		/* compile flags word */
+    int *eflagsPtr;		/* exec flags word */
+{
+    int i;
+    int cflags;
+    int eflags;
+
+    cflags = *cflagsPtr;
+    eflags = *eflagsPtr;
+    for (i = 0; i < length; i++) {
+	switch (string[i]) {
+	    case 'a': {
+		cflags |= REG_ADVF;
+		break;
+	    }
+	    case 'b': {
+		cflags &= ~REG_ADVANCED;
+		break;
+	    }
+	    case 'e': {
+		cflags &= ~REG_ADVANCED;
+		cflags |= REG_EXTENDED;
+		break;
+	    }
+	    case 'q': {
+		cflags &= ~REG_ADVANCED;
+		cflags |= REG_QUOTE;
+		break;
+	    }
+	    case 'o': {			/* o for opaque */
+		cflags |= REG_NOSUB;
+		break;
+	    }
+	    case '+': {
+		cflags |= REG_FAKEEC;
+		break;
+	    }
+	    case ',': {
+		cflags |= REG_PROGRESS;
+		break;
+	    }
+	    case '.': {
+		cflags |= REG_DUMP;
+		break;
+	    }
+	    case ':': {
+		eflags |= REG_MTRACE;
+		break;
+	    }
+	    case ';': {
+		eflags |= REG_FTRACE;
+		break;
+	    }
+	    case '^': {
+		eflags |= REG_NOTBOL;
+		break;
+	    }
+	    case '$': {
+		eflags |= REG_NOTEOL;
+		break;
+	    }
+	    case '%': {
+		eflags |= REG_SMALL;
+		break;
+	    }
+	}
+    }
+
+    *cflagsPtr = cflags;
+    *eflagsPtr = eflags;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
  * TestsetassocdataCmd --
  *
  *	This procedure implements the "testsetassocdata" command. It is used
diff --git a/tests/regexp.test b/tests/regexp.test
index a97a258..8e52abf 100644
--- a/tests/regexp.test
+++ b/tests/regexp.test
@@ -10,7 +10,7 @@
 # See the file "license.terms" for information on usage and redistribution
 # of this file, and for a DISCLAIMER OF ALL WARRANTIES.
 #
-# RCS: @(#) $Id: regexp.test,v 1.1.2.3 1998/10/21 20:40:08 stanton Exp $
+# RCS: @(#) $Id: regexp.test,v 1.1.2.4 1998/11/11 01:44:56 stanton Exp $
 
 if {[string compare test [info procs test]] == 1} then {source defs}
 
@@ -183,7 +183,7 @@ test regexp-6.2 {regexp errors} {
 } {1 {wrong # args: should be "regexp ?switches? exp string ?matchVar? ?subMatchVar subMatchVar ...?"}}
 test regexp-6.3 {regexp errors} {
     list [catch {regexp -gorp a} msg] $msg
-} {1 {bad switch "-gorp": must be -indices, -nocase, -about, -expanded, or --}}
+} {1 {bad switch "-gorp": must be -indices, -nocase, -about, -expanded, -line, -linestop, -lineanchor, or --}}
 test regexp-6.4 {regexp errors} {
     list [catch {regexp a( b} msg] $msg
 } {1 {couldn't compile regular expression pattern: parentheses () not balanced}}
diff --git a/tests/regtests.test b/tests/regtests.test
index 02efa79..d4b93e2 100644
--- a/tests/regtests.test
+++ b/tests/regtests.test
@@ -56,6 +56,7 @@ if {[string compare test [info procs test]] == 1} then {source defs}
 #	B	ERE/ARE literal-_b_race heuristic used
 #	E	backslash (_e_scape) seen within []
 #	H	looka_h_ead constraint seen
+#	I	_i_mpossible to match
 #	L	_l_ocale-specific construct seen
 #	M	unportable (_m_achine-specific) construct seen
 #	N	RE can match empty (_n_ull) string
@@ -74,7 +75,7 @@ if {[string compare test [info procs test]] == 1} then {source defs}
 # test procedures and related
 
 set ask "about"
-set xflags "unsupported0"
+set xflags "xflags"
 set testbypassed 0
 
 # re_info abbreviation mapping table
@@ -82,6 +83,7 @@ set infonames(A) "REG_UBSALNUM"
 set infonames(B) "REG_UBRACES"
 set infonames(E) "REG_UBBS"
 set infonames(H) "REG_ULOOKAHEAD"
+set infonames(I) "REG_UIMPOSSIBLE"
 set infonames(L) "REG_ULOCALE"
 set infonames(M) "REG_UUNPORT"
 set infonames(N) "REG_UEMPTYMATCH"
@@ -90,7 +92,7 @@ set infonames(Q) "REG_UBOUNDS"
 set infonames(R) "REG_UBACKREF"
 set infonames(S) "REG_UUNSPEC"
 set infonames(U) "REG_UPBOTCH"
-set infonameorder "RHQBAUEPSMLN"	;# must match bit order, lsb first
+set infonameorder "RHQBAUEPSMLNI"	;# must match bit order, lsb first
 
 # set major test number and description
 proc doing {major desc} {
@@ -132,6 +134,9 @@ proc flags {fl} {
 		switch -exact -- $f {
 		"i" { lappend args "-nocase" }
 		"x" { lappend args "-expanded" }
+		"n" { lappend args "-line" }
+		"p" { lappend args "-linestop" }
+		"w" { lappend args "-lineanchor" }
 		"-" { }
 		default { append flags $f }
 		}
@@ -169,7 +174,7 @@ proc e {testid flags re err} {
 		return
 	}
 
-	set cmd [concat [list regexp -$ask] [flags $flags] [list $re]]
+	set cmd [concat [list testregexp -$ask] [flags $flags] [list $re]]
 	set run "list \[catch \{$cmd\}\] \[lindex \$errorCode 1\]"
 	test $prefix.[tno $testid] [desc $testid] $run [list 1 REG_$err]
 }
@@ -192,7 +197,7 @@ proc f {testid flags re target args} {
 
 	set f [flags $flags]
 	set infoflags [infoflags $flags]
-	set ccmd [concat [list regexp -$ask] $f [list $re]]
+	set ccmd [concat [list testregexp -$ask] $f [list $re]]
 	set nsub [expr [llength $args] - 1]
 	if {$nsub == -1} {
 		# didn't tell us number of subexps
@@ -205,7 +210,7 @@ proc f {testid flags re target args} {
 	test $prefix.[tno $testid] [desc $testid] $ccmd $info
 
 	set testid [lreplace $testid end end "execute"]
-	set ecmd [concat [list regexp] $f [list $re $target]]
+	set ecmd [concat [list testregexp] $f [list $re $target]]
 	test $prefix.[tno $testid] [desc $testid] $ecmd 0
 }
 
@@ -229,8 +234,8 @@ proc matchexpected {opts testid flags re target args} {
 
 	set f [flags $flags]
 	set infoflags [infoflags $flags]
-	set ccmd [concat [list regexp -$ask] $f [list $re]]
-	set ecmd [concat [list regexp] $opts $f [list $re $target]]
+	set ccmd [concat [list testregexp -$ask] $f [list $re]]
+	set ecmd [concat [list testregexp] $opts $f [list $re $target]]
 
 	set nsub [expr [llength $args] - 1]
 	set names [list]
@@ -364,7 +369,7 @@ m  5	b	^*		*	*
 e  6	-	^*		BADRPT
 f  7	&	^b		^b
 m  8	b	x^		x^	x^
-e  9	-	x^		IMPOSS
+f  9	I	x^		x
 m 10	n	"\n^"		"x\nb"	"\n"
 f 11	bS	{\(^b\)}	^b
 m 12	-	(^b)		b	b	b
@@ -372,7 +377,7 @@ m 13	&	{x$}		x	x
 m 14	bS	{\(x$\)}	x	x	x
 m 15	-	{(x$)}		x	x	x
 m 16	b	{x$y}		"x\$y"	"x\$y"
-e 17	-	{x$y}		IMPOSS
+f 17	I	{x$y}		xy
 m 18	n	"x\$\n"		"x\n"	"x\n"
 e 19	-	+		BADRPT
 e 20	-	?		BADRPT
@@ -473,6 +478,8 @@ m 40	eE	{a[\\]b}	"a\\b"	"a\\b"
 m 41	bE	{a[\\]b}	"a\\b"	"a\\b"
 e 42	-	{a[\Z]b}	EESCAPE
 m 43	&	{a[[b]c}	"a\[c"	"a\[c"
+m 44	EMP	{a[\u00fe-\u0507][\u00ff-\u0300]b} \
+			"a\u0102\u02ffb"	"a\u0102\u02ffb"
 
 
 
@@ -539,6 +546,12 @@ e 23	b	{\<*}		BADRPT
 e 24	b	{\>*}		BADRPT
 e 25	-	{\y*}		BADRPT
 e 26	-	{\Y*}		BADRPT
+m 27	LP	{\ma}		a	a
+f 28	LP	{\ma}		ba
+m 29	LP	{a\M}		a	a
+f 30	LP	{a\M}		ab
+f 31	ILP	{\Ma}		a
+f 32	ILP	{a\m}		a
 
 
 
@@ -587,7 +600,7 @@ m 19	P	"a\\U00000008x"	"a\bx"	"a\bx"
 e 20	-	{a\U0000008x}	EESCAPE
 m 21	P	"a\\vb"		"a\vb"	"a\vb"
 m 22	MP	"a\\x08x"	"a\bx"	"a\bx"
-e 23	-	{a\xx}		EESCAPE
+e 23	-	{a\xq}		EESCAPE
 m 24	MP	"a\\x0008x"	"a\bx"	"a\bx"
 e 25	-	{a\z}		EESCAPE
 m 26	MP	"a\\010b"	"a\bb"	"a\bb"
@@ -663,7 +676,7 @@ m  1	P	a(?#comment)b	ab	ab
 
 
 doing 18 "unmatchable REs"
-e  1	-	a^b		IMPOSS
+f  1	I	a^b		ab
 
 
 
@@ -757,7 +770,7 @@ doing 22 "multicharacter collating elements"
 # again ugh
 # currently disabled because the fake MCCE we use for testing is unavailable
 xx m  1	&+L	{a[c]e}		ace	ace
-xx e  2	&+	{a[c]h}		IMPOSS
+xx f  2	&+I	{a[c]h}		ach
 xx m  3	&+L	{a[[.ch.]]}	ach	ach
 xx f  4	&+L	{a[[.ch.]]}	ace
 xx m  5	&+L	{a[c[.ch.]]}	ac	ac
diff --git a/unix/Makefile.in b/unix/Makefile.in
index fd2bdbc..cbcd510 100644
--- a/unix/Makefile.in
+++ b/unix/Makefile.in
@@ -5,7 +5,7 @@
 # "autoconf" program (constructs like "@foo@" will get replaced in the
 # actual Makefile.
 #
-# RCS: @(#) $Id: Makefile.in,v 1.1.2.4 1998/10/21 20:40:12 stanton Exp $
+# RCS: @(#) $Id: Makefile.in,v 1.1.2.5 1998/11/11 01:44:57 stanton Exp $
 
 # Current Tcl version;  used in various names.
 
@@ -618,7 +618,7 @@ regcomp.o: $(REGHDRS) $(GENERIC_DIR)/regcomp.c $(GENERIC_DIR)/regc_lex.c \
 		$(GENERIC_DIR)/regc_nfa.c $(GENERIC_DIR)/regc_cvec.c
 	$(CC) -c $(CC_SWITCHES) $(GENERIC_DIR)/regcomp.c
 
-regexec.o: $(REGHDRS) $(GENERIC_DIR)/regexec.c
+regexec.o: $(REGHDRS) $(GENERIC_DIR)/regexec.c $(GENERIC_DIR)/rege_dfa.c
 	$(CC) -c $(CC_SWITCHES) $(GENERIC_DIR)/regexec.c
 
 regfree.o: $(REGHDRS) $(GENERIC_DIR)/regfree.c
diff --git a/win/makefile.vc b/win/makefile.vc
index ac73808..de9f892 100644
--- a/win/makefile.vc
+++ b/win/makefile.vc
@@ -4,7 +4,7 @@
 # of this file, and for a DISCLAIMER OF ALL WARRANTIES.
 # 
 # Copyright (c) 1995-1996 Sun Microsystems, Inc.
-# RCS: @(#) $Id: makefile.vc,v 1.1.2.7 1998/11/04 04:41:26 stanton Exp $
+# RCS: @(#) $Id: makefile.vc,v 1.1.2.8 1998/11/11 01:44:57 stanton Exp $
 
 # Does not depend on the presence of any environment variables in
 # order to compile tcl; all needed information is derived from 
@@ -466,12 +466,23 @@ $(TMPDIR)\tclAppInit.obj : $(WINDIR)\tclAppInit.c
 # Dedependency rules
 
 $(GENERICDIR)\regcomp.c: \
-	$(GENERICDIR)\regc_locale.c \
 	$(GENERICDIR)\regguts.h \
 	$(GENERICDIR)\regc_lex.c \
 	$(GENERICDIR)\regc_color.c \
 	$(GENERICDIR)\regc_nfa.c \
-	$(GENERICDIR)\regc_cvec.c
+	$(GENERICDIR)\regc_cvec.c \
+	$(GENERICDIR)\regc_locale.c
+$(GENERICDIR)\regcustom.h: \
+	$(GENERICDIR)\tclInt.h \
+	$(GENERICDIR)\tclPort.h \
+	$(GENERICDIR)\regex.h
+$(GENERICDIR)\regexec.c: \
+	$(GENERICDIR)\rege_dfa.c \
+	$(GENERICDIR)\regguts.h
+$(GENERICDIR)\regerror.c: $(GENERICDIR)\regguts.h
+$(GENERICDIR)\regfree.c: $(GENERICDIR)\regguts.h
+$(GENERICDIR)\regfronts.c: $(GENERICDIR)\regguts.h
+$(GENERICDIR)\regguts.h: $(GENERICDIR)\regcustom.h
 
 #
 # Implicit rules
-- 
cgit v0.12