From 96c82d7028795dedc3360764dc6bfd03782a626f Mon Sep 17 00:00:00 2001
From: stanton <stanton>
Date: Tue, 17 Nov 1998 21:38:37 +0000
Subject: integrated latest changes from Henry Spencer, mostly performance
 improvements renamed re_u* interfaces to TclRe* to avoid conflicts with other
 software that uses the regular expression library

---
 generic/regc_nfa.c  |  21 +-
 generic/regcomp.c   |  80 ++++-
 generic/regcustom.h |  13 +-
 generic/rege_dfa.c  | 180 +++++++----
 generic/regerrs.h   |   2 +-
 generic/regex.h     |  12 +-
 generic/regexec.c   | 229 +++++++++-----
 generic/regguts.h   |   1 -
 generic/tclRegexp.c |  14 +-
 tests/reg.test      | 881 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 10 files changed, 1243 insertions(+), 190 deletions(-)
 create mode 100644 tests/reg.test
diff --git a/generic/regc_nfa.c b/generic/regc_nfa.c
index 8143181..80e9ac7 100644
--- a/generic/regc_nfa.c
+++ b/generic/regc_nfa.c
@@ -1192,7 +1192,8 @@ struct cnfa *cnfa;
 	narcs = 0;
 	for (s = nfa->states; s != NULL; s = s->next) {
 		nstates++;
-		narcs += s->nouts + 1;
+		narcs += 1 + s->nouts + 1;
+		/* 1 as a fake for flags, nouts for arcs, 1 as endmarker */
 	}
 
 	cnfa->states = (struct carc **)MALLOC(nstates * sizeof(struct carc *));
@@ -1213,12 +1214,14 @@ struct cnfa *cnfa;
 	cnfa->eos[0] = nfa->eos[0];
 	cnfa->eos[1] = nfa->eos[1];
 	cnfa->ncolors = maxcolor(nfa->cm) + 1;
-	cnfa->flags = LEFTANCH;		/* tentatively */
+	cnfa->flags = 0;
 
 	ca = cnfa->arcs;
 	for (s = nfa->states; s != NULL; s = s->next) {
 		assert((size_t)s->no < nstates);
 		cnfa->states[s->no] = ca;
+		ca->co = 0;		/* clear and skip flags "arc" */
+		ca++;
 		first = ca;
 		for (a = s->outs; a != NULL; a = a->outchain)
 			switch (a->type) {
@@ -1246,10 +1249,10 @@ struct cnfa *cnfa;
 	assert(ca == &cnfa->arcs[narcs]);
 	assert(cnfa->nstates != 0);
 
+	/* mark no-progress states */
 	for (a = nfa->pre->outs; a != NULL; a = a->outchain)
-		if (a->type == PLAIN && a->co != nfa->bos[0] &&
-							a->co != nfa->bos[1])
-			cnfa->flags &= ~LEFTANCH;
+		cnfa->states[a->to->no]->co = 1;
+	cnfa->states[nfa->pre->no]->co = 1;
 }
 
 /*
@@ -1480,8 +1483,6 @@ FILE *f;
 		fprintf(f, ", eol [%ld]", (long)cnfa->eos[1]);
 	if (cnfa->flags&HASLACONS)
 		fprintf(f, ", haslacons");
-	if (cnfa->flags&LEFTANCH)
-		fprintf(f, ", leftanch");
 	fprintf(f, "\n");
 	for (st = 0; st < cnfa->nstates; st++)
 		dumpcstate(st, cnfa->states[st], cnfa, f);
@@ -1505,9 +1506,9 @@ FILE *f;
 	int i;
 	int pos;
 
-	fprintf(f, "%d.", st);
+	fprintf(f, "%d%s", st, (ca[0].co) ? ":" : ".");
 	pos = 1;
-	for (i = 0; ca[i].co != COLORLESS; i++) {
+	for (i = 1; ca[i].co != COLORLESS; i++) {
 		if (ca[i].co < cnfa->ncolors)
 			fprintf(f, "\t[%ld]->%d", (long)ca[i].co, ca[i].to);
 		else
@@ -1519,7 +1520,7 @@ FILE *f;
 		} else
 			pos++;
 	}
-	if (i == 0 || pos != 1)
+	if (i == 1 || pos != 1)
 		fprintf(f, "\n");
 	fflush(f);
 }
diff --git a/generic/regcomp.c b/generic/regcomp.c
index 620dc1c..7a4ae34 100644
--- a/generic/regcomp.c
+++ b/generic/regcomp.c
@@ -14,6 +14,7 @@
 int compile _ANSI_ARGS_((regex_t *, CONST chr *, size_t, int));
 static VOID moresubs _ANSI_ARGS_((struct vars *, int));
 static int freev _ANSI_ARGS_((struct vars *, int));
+static VOID makescan _ANSI_ARGS_((struct vars *, struct nfa *));
 static struct subre *parse _ANSI_ARGS_((struct vars *, int, int, struct state *, struct state *));
 static struct subre *parsebranch _ANSI_ARGS_((struct vars *, int, int, struct state *, struct state *, int));
 static VOID parseqatom _ANSI_ARGS_((struct vars *, int, int, struct state *, struct state *, struct subre *));
@@ -369,13 +370,10 @@ int flags;
 	for (i = 1; i < v->nlacons; i++)
 		nfanode(v, &v->lacons[i], debug);
 	CNOERR();
-	rainbow(v->nfa, v->cm, PLAIN, COLORLESS, v->nfa->pre, v->nfa->pre);
-	newarc(v->nfa, PLAIN, v->nfa->bos[0], v->nfa->pre, v->nfa->pre);
-	newarc(v->nfa, PLAIN, v->nfa->bos[1], v->nfa->pre, v->nfa->pre);
-	newarc(v->nfa, PLAIN, v->nfa->eos[0], v->nfa->pre, v->nfa->pre);
-	newarc(v->nfa, PLAIN, v->nfa->eos[1], v->nfa->pre, v->nfa->pre);
 	(DISCARD)optimize(v->nfa, debug);
 	CNOERR();
+	makescan(v, v->nfa);
+	CNOERR();
 	compact(v->nfa, &g->search);
 	CNOERR();
 
@@ -470,6 +468,78 @@ int err;
 }
 
 /*
+ - makescan - turn an NFA into a fast-scan NFA (implicit prepend of .*?)
+ * NFA must have been optimize()d already.
+ ^ static VOID makescan(struct vars *, struct nfa *);
+ */
+static VOID
+makescan(v, nfa)
+struct vars *v;
+struct nfa *nfa;
+{
+	struct arc *a;
+	struct arc *b;
+	struct state *pre = nfa->pre;
+	struct state *s;
+	struct state *s2;
+	struct state *slist;
+
+	/* no changes are needed if it's anchored */
+	for (a = pre->outs; a != NULL; a = a->outchain) {
+		assert(a->type == PLAIN);
+		if (a->co != nfa->bos[0] && a->co != nfa->bos[1])
+			break;
+	}
+	if (a == NULL)
+		return;
+
+	/* add implicit .* in front */
+	rainbow(nfa, v->cm, PLAIN, COLORLESS, pre, pre);
+
+	/* and ^* and \Z* too -- not always necessary, but harmless */
+	newarc(nfa, PLAIN, nfa->bos[0], pre, pre);
+	newarc(nfa, PLAIN, nfa->bos[1], pre, pre);
+
+	/*
+	 * Now here's the subtle part.  Because many REs have no lookback
+	 * constraints, often knowing when you were in the pre state tells
+	 * you little; it's the next state(s) that are informative.  But
+	 * some of them may have other inarcs, i.e. it may be possible to
+	 * make actual progress and then return to one of them.  We must
+	 * de-optimize such cases, splitting each such state into progress
+	 * and no-progress states.
+	 */
+
+	/* first, make a list of the states */
+	slist = NULL;
+	for (a = pre->outs; a != NULL; a = a->outchain) {
+		s = a->to;
+		for (b = s->ins; b != NULL; b = b->inchain)
+			if (b->from != pre)
+				break;
+		if (b != NULL) {		/* must be split */
+			s->tmp = slist;
+			slist = s;
+		}
+	}
+
+	/* do the splits */
+	for (s = slist; s != NULL; s = s2) {
+		s2 = newstate(nfa);
+		copyouts(nfa, s, s2);
+		for (a = s->ins; a != NULL; a = b) {
+			b = a->inchain;
+			if (a->from != pre) {
+				cparc(nfa, a, a->from, s2);
+				freearc(nfa, a);
+			}
+		}
+		s2 = s->tmp;
+		s->tmp = NULL;		/* clean up while we're at it */
+	}
+}
+
+/*
  - parse - parse an RE
  * This is actually just the top level, which parses a bunch of branches
  * tied together with '|'.  They appear in the tree as the left children
diff --git a/generic/regcustom.h b/generic/regcustom.h
index eeaafa0..9ba4a2a 100644
--- a/generic/regcustom.h
+++ b/generic/regcustom.h
@@ -1,6 +1,5 @@
 /* headers (which also pick up the standard ones, or equivalents) */
 #include "tclInt.h"
-#include "tclPort.h"
 
 /* overrides for regguts.h definitions */
 /* function-pointer declarations */
@@ -41,16 +40,16 @@
 #define	__REG_VOID_T	VOID
 #define	__REG_CONST	CONST
 /* names and declarations */
-#define	__REG_WIDE_COMPILE	re_ucomp
-#define	__REG_WIDE_EXEC		re_uexec
+#define	__REG_WIDE_COMPILE	TclReComp
+#define	__REG_WIDE_EXEC		TclReExec
 #ifndef __REG_NOFRONT
 #define	__REG_NOFRONT		/* don't want regcomp() and regexec() */
 #endif
 #ifndef __REG_NOCHAR
 #define	__REG_NOCHAR		/* or the char versions */
 #endif
-#define	regfree		re_ufree
-#define	regerror	re_uerror
+#define	regfree		TclReFree
+#define	regerror	TclReError
 /* --- end --- */
 
 
@@ -74,8 +73,8 @@ typedef int celt;		/* type to hold chr, MCCE number, or NOCELT */
 #define	iscspace(x)	TclUniCharIsSpace(x)
 
 /* name the external functions */
-#define	compile		re_ucomp
-#define	exec		re_uexec
+#define	compile		TclReComp
+#define	exec		TclReExec
 
 /* enable/disable debugging code (by whether REG_DEBUG is defined or not) */
 #ifdef notdef
diff --git a/generic/rege_dfa.c b/generic/rege_dfa.c
index 6e4b941..eb31ffc6 100644
--- a/generic/rege_dfa.c
+++ b/generic/rege_dfa.c
@@ -115,13 +115,13 @@ chr **coldp;			/* store coldstart pointer here, if nonNULL */
 	chr *realmax = (max == v->stop) ? max : max + 1;
 	color co;
 	struct sset *css;
-	struct sset *firstss;
 	struct sset *ss;
 	struct colormap *cm = d->cm;
+	chr *nopr;
+	int i;
 
 	/* initialize */
 	css = initialize(v, d, start);
-	firstss = css;
 	cp = start;
 
 	/* startup */
@@ -175,72 +175,93 @@ chr **coldp;			/* store coldstart pointer here, if nonNULL */
 
 	if (ss == NULL)
 		return NULL;
-	if (ss->flags&POSTSTATE) {
-		assert(firstss->flags&STARTER);
-		assert(firstss->lastseen != NULL);
-		if (coldp != NULL)
-			*coldp = firstss->lastseen;
+	else if (ss->flags&POSTSTATE) {
 		assert(cp >= realmin);
-		return cp - 1;
-	}
-
-	/* shutdown */
-	FDEBUG(("--- shutdown at c%d ---\n", css - d->ssets));
-	if (cp == v->stop && max == v->stop) {
+		cp--;
+	} else if (cp == v->stop && max == v->stop) {
 		co = d->cnfa->eos[(v->eflags&REG_NOTEOL) ? 0 : 1];
 		FDEBUG(("color %ld\n", (long)co));
 		ss = miss(v, d, css, co, cp, start);
-		/* special case:  match ended at eol? */
-		if (ss != NULL && (ss->flags&POSTSTATE)) {
-			assert(firstss->flags&STARTER);
-			assert(firstss->lastseen != NULL);
-			if (coldp != NULL)
-				*coldp = firstss->lastseen;
-			return cp;
-		}
+		/* match might have ended at eol */
 	}
 
-	return NULL;
+	if (ss == NULL || !(ss->flags&POSTSTATE))
+		return NULL;
+
+	/* find last no-progress state set, if any */
+	nopr = d->lastnopr;
+	for (ss = d->ssets, i = 0; i < d->nssused; ss++, i++)
+		if ((ss->flags&NOPROGRESS) && nopr != ss->lastseen &&
+					(nopr == NULL || nopr < ss->lastseen))
+			nopr = ss->lastseen;
+	assert(nopr != NULL);
+	if (coldp != NULL)
+		*coldp = (nopr == v->start) ? nopr : nopr-1;
+	return cp;
 }
 
 /*
  - newdfa - set up a fresh DFA
  ^ static struct dfa *newdfa(struct vars *, struct cnfa *,
- ^ 	struct colormap *);
+ ^ 	struct colormap *, struct smalldfa *);
  */
 static struct dfa *
-newdfa(v, cnfa, cm)
+newdfa(v, cnfa, cm, small)
 struct vars *v;
 struct cnfa *cnfa;
 struct colormap *cm;
+struct smalldfa *small;		/* preallocated space, may be NULL */
 {
-	struct dfa *d = (struct dfa *)MALLOC(sizeof(struct dfa));
+	struct dfa *d;
+	size_t nss = cnfa->nstates * 2;
 	int wordsper = (cnfa->nstates + UBITS - 1) / UBITS;
-	struct sset *ss;
-	int i;
+	struct smalldfa *smallwas = small;
 
 	assert(cnfa != NULL && cnfa->nstates != 0);
-	if (d == NULL) {
-		ERR(REG_ESPACE);
-		return NULL;
-	}
 
-	d->ssets = (struct sset *)MALLOC(CACHE * sizeof(struct sset));
-	d->statesarea = (unsigned *)MALLOC((CACHE+WORK) * wordsper *
+	if (nss <= FEWSTATES && cnfa->ncolors <= FEWCOLORS) {
+		assert(wordsper == 1);
+		if (small == NULL) {
+			small = (struct smalldfa *)MALLOC(
+						sizeof(struct smalldfa));
+			if (small == NULL) {
+				ERR(REG_ESPACE);
+				return NULL;
+			}
+		}
+		d = &small->dfa;
+		d->ssets = small->ssets;
+		d->statesarea = small->statesarea;
+		d->work = &d->statesarea[nss];
+		d->outsarea = small->outsarea;
+		d->incarea = small->incarea;
+		d->cptsmalloced = 0;
+		d->mallocarea = (smallwas == NULL) ? (char *)small : NULL;
+	} else {
+		d = (struct dfa *)MALLOC(sizeof(struct dfa));
+		if (d == NULL) {
+			ERR(REG_ESPACE);
+			return NULL;
+		}
+		d->ssets = (struct sset *)MALLOC(nss * sizeof(struct sset));
+		d->statesarea = (unsigned *)MALLOC((nss+WORK) * wordsper *
 							sizeof(unsigned));
-	d->work = &d->statesarea[CACHE * wordsper];
-	d->outsarea = (struct sset **)MALLOC(CACHE * cnfa->ncolors *
+		d->work = &d->statesarea[nss * wordsper];
+		d->outsarea = (struct sset **)MALLOC(nss * cnfa->ncolors *
 							sizeof(struct sset *));
-	d->incarea = (struct arcp *)MALLOC(CACHE * cnfa->ncolors *
+		d->incarea = (struct arcp *)MALLOC(nss * cnfa->ncolors *
 							sizeof(struct arcp));
-	if (d->ssets == NULL || d->statesarea == NULL || d->outsarea == NULL ||
-							d->incarea == NULL) {
-		freedfa(d);
-		ERR(REG_ESPACE);
-		return NULL;
+		d->cptsmalloced = 1;
+		d->mallocarea = (char *)d;
+		if (d->ssets == NULL || d->statesarea == NULL ||
+				d->outsarea == NULL || d->incarea == NULL) {
+			freedfa(d);
+			ERR(REG_ESPACE);
+			return NULL;
+		}
 	}
 
-	d->nssets = (v->eflags&REG_SMALL) ? 7 : CACHE;
+	d->nssets = (v->eflags&REG_SMALL) ? 7 : nss;
 	d->nssused = 0;
 	d->nstates = cnfa->nstates;
 	d->ncolors = cnfa->ncolors;
@@ -248,14 +269,10 @@ struct colormap *cm;
 	d->cnfa = cnfa;
 	d->cm = cm;
 	d->lastpost = NULL;
+	d->lastnopr = NULL;
 	d->search = d->ssets;
 
-	for (ss = d->ssets, i = 0; i < d->nssets; ss++, i++) {
-		/* initialization of most fields is done as needed */
-		ss->states = &d->statesarea[i * d->wordsper];
-		ss->outs = &d->outsarea[i * d->ncolors];
-		ss->inchain = &d->incarea[i * d->ncolors];
-	}
+	/* initialization of sset fields is done as needed */
 
 	return d;
 }
@@ -268,15 +285,19 @@ static VOID
 freedfa(d)
 struct dfa *d;
 {
-	if (d->ssets != NULL)
-		FREE(d->ssets);
-	if (d->statesarea != NULL)
-		FREE(d->statesarea);
-	if (d->outsarea != NULL)
-		FREE(d->outsarea);
-	if (d->incarea != NULL)
-		FREE(d->incarea);
-	FREE(d);
+	if (d->cptsmalloced) {
+		if (d->ssets != NULL)
+			FREE(d->ssets);
+		if (d->statesarea != NULL)
+			FREE(d->statesarea);
+		if (d->outsarea != NULL)
+			FREE(d->outsarea);
+		if (d->incarea != NULL)
+			FREE(d->incarea);
+	}
+
+	if (d->mallocarea != NULL)
+		FREE(d->mallocarea);
 }
 
 /*
@@ -319,9 +340,9 @@ chr *start;
 		for (i = 0; i < d->wordsper; i++)
 			ss->states[i] = 0;
 		BSET(ss->states, d->cnfa->pre);
-		ss->hash = hash(ss->states, d->wordsper);
+		ss->hash = HASH(ss->states, d->wordsper);
 		assert(d->cnfa->pre != d->cnfa->post);
-		ss->flags = STARTER|LOCKED;
+		ss->flags = STARTER|LOCKED|NOPROGRESS;
 		/* lastseen dealt with below */
 	}
 
@@ -329,6 +350,7 @@ chr *start;
 		d->ssets[i].lastseen = NULL;
 	ss->lastseen = start;		/* maybe untrue, but harmless */
 	d->lastpost = NULL;
+	d->lastnopr = NULL;
 	return ss;
 }
 
@@ -352,6 +374,7 @@ chr *start;			/* where the attempt got started */
 	struct carc *ca;
 	struct sset *p;
 	int ispost;
+	int noprogress;
 	int gotstate;
 	int dolacons;
 	int didlacons;
@@ -367,15 +390,18 @@ chr *start;			/* where the attempt got started */
 	for (i = 0; i < d->wordsper; i++)
 		d->work[i] = 0;
 	ispost = 0;
+	noprogress = 1;
 	gotstate = 0;
 	for (i = 0; i < d->nstates; i++)
 		if (ISBSET(css->states, i))
-			for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++)
+			for (ca = cnfa->states[i]+1; ca->co != COLORLESS; ca++)
 				if (ca->co == co) {
 					BSET(d->work, ca->to);
 					gotstate = 1;
 					if (ca->to == cnfa->post)
 						ispost = 1;
+					if (!cnfa->states[ca->to]->co)
+						noprogress = 0;
 					FDEBUG(("%d -> %d\n", i, ca->to));
 				}
 	dolacons = (gotstate) ? (cnfa->flags&HASLACONS) : 0;
@@ -384,7 +410,7 @@ chr *start;			/* where the attempt got started */
 		dolacons = 0;
 		for (i = 0; i < d->nstates; i++)
 			if (ISBSET(d->work, i))
-				for (ca = cnfa->states[i]; ca->co != COLORLESS;
+				for (ca = cnfa->states[i]+1; ca->co != COLORLESS;
 									ca++)
 					if (ca->co > cnfa->ncolors &&
 						!ISBSET(d->work, ca->to) &&
@@ -395,17 +421,23 @@ chr *start;			/* where the attempt got started */
 						didlacons = 1;
 						if (ca->to == cnfa->post)
 							ispost = 1;
+						if (!cnfa->states[ca->to]->co)
+							noprogress = 0;
 						FDEBUG(("%d :> %d\n",i,ca->to));
 					}
 	}
 	if (!gotstate)
 		return NULL;
-	h = hash(d->work, d->wordsper);
+	h = HASH(d->work, d->wordsper);
 
 	/* next, is that in the cache? */
 	for (p = d->ssets, i = d->nssused; i > 0; p++, i--)
-		if (p->hash == h && memcmp(VS(d->work), VS(p->states),
+		if (HIT(h, d->work, p, d->wordsper)) {
+#ifndef xxx
+p->hash == h && 
+memcmp(VS(d->work), VS(p->states),
 					d->wordsper*sizeof(unsigned)) == 0) {
+#endif
 			FDEBUG(("cached c%d\n", p - d->ssets));
 			break;			/* NOTE BREAK OUT */
 		}
@@ -416,6 +448,8 @@ chr *start;			/* where the attempt got started */
 			p->states[i] = d->work[i];
 		p->hash = h;
 		p->flags = (ispost) ? POSTSTATE : 0;
+		if (noprogress)
+			p->flags |= NOPROGRESS;
 		/* lastseen to be dealt with by caller */
 	}
 
@@ -442,13 +476,14 @@ pcolor co;			/* "color" of the lookahead constraint */
 	int n;
 	struct subre *sub;
 	struct dfa *d;
+	struct smalldfa sd;
 	chr *end;
 
 	n = co - pcnfa->ncolors;
 	assert(n < v->g->nlacons && v->g->lacons != NULL);
 	FDEBUG(("=== testing lacon %d\n", n));
 	sub = &v->g->lacons[n];
-	d = newdfa(v, &sub->cnfa, &v->g->cmap);
+	d = newdfa(v, &sub->cnfa, &v->g->cmap, &sd);
 	if (d == NULL) {
 		ERR(REG_ESPACE);
 		return 0;
@@ -520,6 +555,11 @@ chr *start;
 			(d->lastpost == NULL || d->lastpost < ss->lastseen))
 		d->lastpost = ss->lastseen;
 
+	/* likewise for a no-progress state */
+	if ((ss->flags&NOPROGRESS) && ss->lastseen != d->lastnopr &&
+			(d->lastnopr == NULL || d->lastnopr < ss->lastseen))
+		d->lastnopr = ss->lastseen;
+
 	return ss;
 }
 
@@ -541,17 +581,21 @@ chr *start;
 
 	/* shortcut for cases where cache isn't full */
 	if (d->nssused < d->nssets) {
-		ss = &d->ssets[d->nssused];
+		i = d->nssused;
 		d->nssused++;
-		FDEBUG(("new c%d\n", ss - d->ssets));
-		/* must make innards consistent */
+		ss = &d->ssets[i];
+		FDEBUG(("new c%d\n", i));
+		/* set up innards */
+		ss->states = &d->statesarea[i * d->wordsper];
+		ss->flags = 0;
 		ss->ins.ss = NULL;
 		ss->ins.co = WHITE;		/* give it some value */
+		ss->outs = &d->outsarea[i * d->ncolors];
+		ss->inchain = &d->incarea[i * d->ncolors];
 		for (i = 0; i < d->ncolors; i++) {
 			ss->outs[i] = NULL;
 			ss->inchain[i].ss = NULL;
 		}
-		ss->flags = 0;
 		return ss;
 	}
 
diff --git a/generic/regerrs.h b/generic/regerrs.h
index af2805c..1b6552c 100644
--- a/generic/regerrs.h
+++ b/generic/regerrs.h
@@ -1,6 +1,6 @@
 { REG_OKAY,	"REG_OKAY",	"no errors detected" },
 { REG_NOMATCH,	"REG_NOMATCH",	"failed to match" },
-{ REG_BADPAT,	"REG_BADPAT",	"invalid regexp (reg version 0.1)" },
+{ REG_BADPAT,	"REG_BADPAT",	"invalid regexp (reg version 0.2)" },
 { REG_ECOLLATE,	"REG_ECOLLATE",	"invalid collating element" },
 { REG_ECTYPE,	"REG_ECTYPE",	"invalid character class" },
 { REG_EESCAPE,	"REG_EESCAPE",	"invalid escape \\ sequence" },
diff --git a/generic/regex.h b/generic/regex.h
index 7ca6ad0..2f3ebfa 100644
--- a/generic/regex.h
+++ b/generic/regex.h
@@ -79,16 +79,16 @@ extern "C" {
 #define	__REG_VOID_T	VOID
 #define	__REG_CONST	CONST
 /* names and declarations */
-#define	__REG_WIDE_COMPILE	re_ucomp
-#define	__REG_WIDE_EXEC		re_uexec
+#define	__REG_WIDE_COMPILE	TclReComp
+#define	__REG_WIDE_EXEC		TclReExec
 #ifndef __REG_NOFRONT
 #define	__REG_NOFRONT		/* don't want regcomp() and regexec() */
 #endif
 #ifndef __REG_NOCHAR
 #define	__REG_NOCHAR		/* or the char versions */
 #endif
-#define	regfree		re_ufree
-#define	regerror	re_uerror
+#define	regfree		TclReFree
+#define	regerror	TclReError
 /* --- end --- */
 
 
@@ -289,8 +289,8 @@ int regexec _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, regmatch_t [], i
 #ifdef __REG_WIDE_T
 int __REG_WIDE_EXEC _ANSI_ARGS_((regex_t *, __REG_CONST __REG_WIDE_T *, size_t, rm_detail_t *, size_t, regmatch_t [], int));
 #endif
-re_void re_ufree _ANSI_ARGS_((regex_t *));
-extern size_t re_uerror _ANSI_ARGS_((int, __REG_CONST regex_t *, char *, size_t));
+re_void regfree _ANSI_ARGS_((regex_t *));
+extern size_t regerror _ANSI_ARGS_((int, __REG_CONST regex_t *, char *, size_t));
 /* automatically gathered by fwd; do not hand-edit */
 /* =====^!^===== end forwards =====^!^===== */
 
diff --git a/generic/regexec.c b/generic/regexec.c
index 1671866..50fcc70 100644
--- a/generic/regexec.c
+++ b/generic/regexec.c
@@ -24,6 +24,7 @@ struct vars {
 #define	ERR(e)	VERR(v, e)		/* record an error */
 #define	NOERR()	{if (ISERR()) return;}	/* if error seen, return */
 #define	OFF(p)	((p) - v->start)
+#define	LOFF(p)	((long)OFF(p))
 
 
 
@@ -35,11 +36,15 @@ struct arcp {			/* "pointer" to an outarc */
 
 struct sset {			/* state set */
 	unsigned *states;	/* pointer to bitvector */
-	unsigned hash;		/* xor of bitvector */
+	unsigned hash;		/* hash of bitvector */
+#		define	HASH(bv, nw)	(((nw) == 1) ? *(bv) : hash(bv, nw))
+#	define	HIT(h,bv,ss,nw)	((ss)->hash == (h) && ((nw) == 1 || \
+		memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0))
 	int flags;
 #		define	STARTER		01	/* the initial state set */
 #		define	POSTSTATE	02	/* includes the goal state */
 #		define	LOCKED		04	/* locked in cache */
+#		define	NOPROGRESS	010	/* zero-progress state set */
 	struct arcp ins;	/* chain of inarcs pointing here */
 	chr *lastseen;		/* last entered on arrival here */
 	struct sset **outs;	/* outarc vector indexed by color */
@@ -60,12 +65,26 @@ struct dfa {
 	struct cnfa *cnfa;
 	struct colormap *cm;
 	chr *lastpost;		/* location of last cache-flushed success */
+	chr *lastnopr;		/* location of last cache-flushed NOPROGRESS */
 	struct sset *search;	/* replacement-search-pointer memory */
+	int cptsmalloced;	/* were the areas individually malloced? */
+	char *mallocarea;	/* self, or master malloced area, or NULL */
 };
 
-#define	CACHE	200
 #define	WORK	1		/* number of work bitvectors needed */
 
+/* setup for non-malloc allocation for small cases */
+#define	FEWSTATES	20	/* must be less than UBITS */
+#define	FEWCOLORS	15
+struct smalldfa {
+	struct dfa dfa;
+	struct sset ssets[FEWSTATES*2];
+	unsigned statesarea[FEWSTATES*2 + WORK];
+	struct sset *outsarea[FEWSTATES*2 * FEWCOLORS];
+	struct arcp incarea[FEWSTATES*2 * FEWCOLORS];
+};
+
+
 
 
 /*
@@ -91,7 +110,7 @@ static int caltdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)
 /* === rege_dfa.c === */
 static chr *longest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *));
 static chr *shortest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *, chr *, chr **));
-static struct dfa *newdfa _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *));
+static struct dfa *newdfa _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *, struct smalldfa *));
 static VOID freedfa _ANSI_ARGS_((struct dfa *));
 static unsigned hash _ANSI_ARGS_((unsigned *, int));
 static struct sset *initialize _ANSI_ARGS_((struct vars *, struct dfa *, chr *));
@@ -124,6 +143,10 @@ int flags;
 	int st;
 	size_t n;
 	int complications;
+#	define	LOCALMAT	20
+	regmatch_t mat[LOCALMAT];
+#	define	LOCALMEM	40
+	regoff_t mem[LOCALMEM];
 
 	/* sanity checks */
 	if (re == NULL || string == NULL || re->re_magic != REMAGIC)
@@ -145,7 +168,10 @@ int flags;
 	v->nmatch = nmatch;
 	if (complications && v->nmatch < v->g->nsub + 1) {
 		/* need work area bigger than what user gave us */
-		v->pmatch = (regmatch_t *)MALLOC((v->g->nsub + 1) *
+		if (v->g->nsub + 1 <= LOCALMAT)
+			v->pmatch = mat;
+		else
+			v->pmatch = (regmatch_t *)MALLOC((v->g->nsub + 1) *
 							sizeof(regmatch_t));
 		if (v->pmatch == NULL)
 			return REG_ESPACE;
@@ -156,9 +182,14 @@ int flags;
 	v->stop = (chr *)string + len;
 	v->err = 0;
 	if (complications) {
-		v->mem = (regoff_t *)MALLOC(v->g->ntree*sizeof(regoff_t));
+		assert(v->g->ntree >= 0);
+		n = (size_t)v->g->ntree;
+		if (n <= LOCALMEM)
+			v->mem = mem;
+		else
+			v->mem = (regoff_t *)MALLOC(n*sizeof(regoff_t));
 		if (v->mem == NULL) {
-			if (v->pmatch != pmatch)
+			if (v->pmatch != pmatch && v->pmatch != mat)
 				FREE(v->pmatch);
 			return REG_ESPACE;
 		}
@@ -171,14 +202,18 @@ int flags;
 		st = cfind(v, &v->g->tree->cnfa, &v->g->cmap);
 	else
 		st = find(v, &v->g->tree->cnfa, &v->g->cmap);
+
+	/* copy (portion of) match vector over if necessary */
 	if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0) {
 		zapsubs(pmatch, nmatch);
 		n = (nmatch < v->nmatch) ? nmatch : v->nmatch;
 		memcpy(VS(pmatch), VS(v->pmatch), n*sizeof(regmatch_t));
 	}
-	if (v->pmatch != pmatch)
+
+	/* clean up */
+	if (v->pmatch != pmatch && v->pmatch != mat)
 		FREE(v->pmatch);
-	if (v->mem != NULL)
+	if (v->mem != NULL && v->mem != mem)
 		FREE(v->mem);
 	return st;
 }
@@ -193,15 +228,14 @@ struct vars *v;
 struct cnfa *cnfa;
 struct colormap *cm;
 {
-	struct dfa *d = newdfa(v, cnfa, cm);
-	struct dfa *s = newdfa(v, &v->g->search, cm);
+	struct smalldfa da;
+	struct dfa *d = newdfa(v, cnfa, cm, &da);
+	struct smalldfa sa;
+	struct dfa *s = newdfa(v, &v->g->search, cm, &sa);
 	chr *begin;
 	chr *end;
-#ifdef notdef
 	chr *open;		/* open and close of range of possible starts */
 	chr *close;
-#endif
-	chr *stop = (cnfa->flags&LEFTANCH) ? v->start : v->stop;
 
 	if (d == NULL)
 		return v->err;
@@ -210,17 +244,15 @@ struct colormap *cm;
 		return v->err;
 	}
 
-#ifdef notdef
 	close = v->start;
-	while (close < stop) {
-		MDEBUG(("\nsearch at %ld\n", (long)OFF(close)));
+	do {
+		MDEBUG(("\nsearch at %ld\n", LOFF(close)));
 		close = shortest(v, s, close, close, v->stop, &open);
 		if (close == NULL)
 			break;				/* NOTE BREAK */
-		MDEBUG(("between %ld and %ld\n", (long)OFF(open),
-							(long)OFF(close)));
-#endif
-		for (begin = v->start; begin <= stop; begin++) {
+		MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close)));
+		for (begin = open; begin <= close; begin++) {
+			MDEBUG(("\nfind trying at %ld\n", LOFF(begin)));
 			end = longest(v, d, begin, v->stop);
 			if (end != NULL) {
 				if (v->nmatch > 0) {
@@ -239,9 +271,7 @@ struct colormap *cm;
 				return REG_OKAY;
 			}
 		}
-#ifdef notdef
-	}
-#endif
+	} while (close < v->stop);
 
 	freedfa(d);
 	freedfa(s);
@@ -260,10 +290,14 @@ struct vars *v;
 struct cnfa *cnfa;
 struct colormap *cm;
 {
-	struct dfa *d = newdfa(v, cnfa, cm);
+	struct smalldfa da;
+	struct dfa *d = newdfa(v, cnfa, cm, &da);
+	struct smalldfa sa;
+	struct dfa *s = newdfa(v, &v->g->search, cm, &sa);
 	chr *begin;
 	chr *end;
-	chr *stop = (cnfa->flags&LEFTANCH) ? v->start : v->stop;
+	chr *open;		/* open and close of range of possible starts */
+	chr *close;
 	chr *estart;
 	chr *estop;
 	int er;
@@ -271,55 +305,72 @@ struct colormap *cm;
 
 	if (d == NULL)
 		return v->err;
+	if (s == NULL) {
+		freedfa(d);
+		return v->err;
+	}
 
-	for (begin = v->start; begin <= stop; begin++) {
-		MDEBUG(("\ncfind trying at %ld\n", (long)OFF(begin)));
-		estart = begin;
-		estop = v->stop;
-		for (;;) {
-			if (shorter)
-				end = shortest(v, d, begin, estart, estop,
-								(chr **)NULL);
-			else
-				end = longest(v, d, begin, estop);
-			if (end == NULL)
-				break;		/* NOTE BREAK OUT */
-			MDEBUG(("tentative end %ld\n", (long)OFF(end)));
-			zapsubs(v->pmatch, v->nmatch);
-			zapmem(v, v->g->tree);
-			er = cdissect(v, v->g->tree, begin, end);
-			switch (er) {
-			case REG_OKAY:
-				if (v->nmatch > 0) {
-					v->pmatch[0].rm_so = OFF(begin);
-					v->pmatch[0].rm_eo = OFF(end);
-				}
-				freedfa(d);
-				if (ISERR())
-					return v->err;
-				return REG_OKAY;
-				break;
-			case REG_NOMATCH:
-				/* go around and try again */
-				if ((shorter) ? end == estop : end == begin) {
-					/* no point in trying again */
-					freedfa(d);
-					return REG_NOMATCH;
-				}
+	close = v->start;
+	do {
+		MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
+		close = shortest(v, s, close, close, v->stop, &open);
+		if (close == NULL)
+			break;				/* NOTE BREAK */
+		MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close)));
+		for (begin = open; begin <= close; begin++) {
+			MDEBUG(("\ncfind trying at %ld\n", LOFF(begin)));
+			estart = begin;
+			estop = v->stop;
+			for (;;) {
 				if (shorter)
-					estart = end + 1;
+					end = shortest(v, d, begin, estart,
+							estop, (chr **)NULL);
 				else
-					estop = end - 1;
-				break;
-			default:
-				freedfa(d);
-				return er;
-				break;
+					end = longest(v, d, begin, estop);
+				if (end == NULL)
+					break;		/* NOTE BREAK OUT */
+				MDEBUG(("tentative end %ld\n", LOFF(end)));
+				zapsubs(v->pmatch, v->nmatch);
+				zapmem(v, v->g->tree);
+				er = cdissect(v, v->g->tree, begin, end);
+				switch (er) {
+				case REG_OKAY:
+					if (v->nmatch > 0) {
+						v->pmatch[0].rm_so = OFF(begin);
+						v->pmatch[0].rm_eo = OFF(end);
+					}
+					freedfa(d);
+					freedfa(s);
+					if (ISERR())
+						return v->err;
+					return REG_OKAY;
+					break;
+				case REG_NOMATCH:
+					/* go around and try again */
+					if ((shorter) ? end == estop :
+								end == begin) {
+						/* no point in trying again */
+						freedfa(s);
+						freedfa(d);
+						return REG_NOMATCH;
+					}
+					if (shorter)
+						estart = end + 1;
+					else
+						estop = end - 1;
+					break;
+				default:
+					freedfa(d);
+					freedfa(s);
+					return er;
+					break;
+				}
 			}
 		}
-	}
+	} while (close < v->stop);
 
 	freedfa(d);
+	freedfa(s);
 	if (ISERR())
 		return v->err;
 	return REG_NOMATCH;
@@ -402,7 +453,7 @@ chr *begin;			/* beginning of relevant substring */
 chr *end;			/* end of same */
 {
 	assert(t != NULL);
-	MDEBUG(("dissect %ld-%ld\n", (long)OFF(begin), (long)OFF(end)));
+	MDEBUG(("dissect %ld-%ld\n", LOFF(begin), LOFF(end)));
 
 	switch (t->op) {
 	case '=':		/* terminal node */
@@ -443,7 +494,9 @@ struct subre *t;
 chr *begin;			/* beginning of relevant substring */
 chr *end;			/* end of same */
 {
+	struct smalldfa da;
 	struct dfa *d;
+	struct smalldfa d2a;
 	struct dfa *d2;
 	chr *mid;
 	int i;
@@ -452,10 +505,10 @@ chr *end;			/* end of same */
 	assert(t->left != NULL && t->left->cnfa.nstates > 0);
 	assert(t->right != NULL && t->right->cnfa.nstates > 0);
 
-	d = newdfa(v, &t->left->cnfa, &v->g->cmap);
+	d = newdfa(v, &t->left->cnfa, &v->g->cmap, &da);
 	if (ISERR())
 		return v->err;
-	d2 = newdfa(v, &t->right->cnfa, &v->g->cmap);
+	d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, &d2a);
 	if (ISERR()) {
 		freedfa(d);
 		return v->err;
@@ -468,7 +521,7 @@ chr *end;			/* end of same */
 		freedfa(d2);
 		return REG_ASSERT;
 	}
-	MDEBUG(("tentative midpoint %ld\n", (long)OFF(mid)));
+	MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
 
 	/* iterate until satisfaction or failure */
 	while (longest(v, d2, mid, end) != end) {
@@ -488,7 +541,7 @@ chr *end;			/* end of same */
 			freedfa(d2);
 			return REG_ASSERT;
 		}
-		MDEBUG(("new midpoint %ld\n", (long)OFF(mid)));
+		MDEBUG(("new midpoint %ld\n", LOFF(mid)));
 	}
 
 	/* satisfaction */
@@ -512,6 +565,7 @@ struct subre *t;
 chr *begin;			/* beginning of relevant substring */
 chr *end;			/* end of same */
 {
+	struct smalldfa da;
 	struct dfa *d;
 	int i;
 
@@ -521,7 +575,7 @@ chr *end;			/* end of same */
 	for (i = 0; t != NULL; t = t->right, i++) {
 		MDEBUG(("trying %dth\n", i));
 		assert(t->left != NULL && t->left->cnfa.nstates > 0);
-		d = newdfa(v, &t->left->cnfa, &v->g->cmap);
+		d = newdfa(v, &t->left->cnfa, &v->g->cmap, &da);
 		if (ISERR())
 			return v->err;
 		if (longest(v, d, begin, end) == end) {
@@ -550,7 +604,7 @@ chr *end;			/* end of same */
 	int er;
 
 	assert(t != NULL);
-	MDEBUG(("cdissect %ld-%ld\n", (long)OFF(begin), (long)OFF(end)));
+	MDEBUG(("cdissect %ld-%ld\n", LOFF(begin), LOFF(end)));
 
 	switch (t->op) {
 	case '=':		/* terminal node */
@@ -596,7 +650,9 @@ struct subre *t;
 chr *begin;			/* beginning of relevant substring */
 chr *end;			/* end of same */
 {
+	struct smalldfa da;
 	struct dfa *d;
+	struct smalldfa d2a;
 	struct dfa *d2;
 	chr *mid;
 	int er;
@@ -608,10 +664,10 @@ chr *end;			/* end of same */
 	if (t->left->flags&SHORTER)		/* reverse scan */
 		return crevdissect(v, t, begin, end);
 
-	d = newdfa(v, &t->left->cnfa, &v->g->cmap);
+	d = newdfa(v, &t->left->cnfa, &v->g->cmap, &da);
 	if (ISERR())
 		return v->err;
-	d2 = newdfa(v, &t->right->cnfa, &v->g->cmap);
+	d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, &d2a);
 	if (ISERR()) {
 		freedfa(d);
 		return v->err;
@@ -626,11 +682,11 @@ chr *end;			/* end of same */
 			freedfa(d2);
 			return REG_NOMATCH;
 		}
-		MDEBUG(("tentative midpoint %ld\n", (long)OFF(mid)));
+		MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
 		v->mem[t->retry] = (mid - begin) + 1;
 	} else {
 		mid = begin + (v->mem[t->retry] - 1);
-		MDEBUG(("working midpoint %ld\n", (long)OFF(mid)));
+		MDEBUG(("working midpoint %ld\n", LOFF(mid)));
 	}
 
 	/* iterate until satisfaction or failure */
@@ -663,7 +719,7 @@ chr *end;			/* end of same */
 			freedfa(d2);
 			return REG_NOMATCH;
 		}
-		MDEBUG(("%d: new midpoint %ld\n", t->retry, (long)OFF(mid)));
+		MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid)));
 		v->mem[t->retry] = (mid - begin) + 1;
 		zapmem(v, t->left);
 		zapmem(v, t->right);
@@ -689,7 +745,9 @@ struct subre *t;
 chr *begin;			/* beginning of relevant substring */
 chr *end;			/* end of same */
 {
+	struct smalldfa da;
 	struct dfa *d;
+	struct smalldfa d2a;
 	struct dfa *d2;
 	chr *mid;
 	int er;
@@ -700,10 +758,10 @@ chr *end;			/* end of same */
 	assert(t->left->flags&SHORTER);
 
 	/* concatenation -- need to split the substring between parts */
-	d = newdfa(v, &t->left->cnfa, &v->g->cmap);
+	d = newdfa(v, &t->left->cnfa, &v->g->cmap, &da);
 	if (ISERR())
 		return v->err;
-	d2 = newdfa(v, &t->right->cnfa, &v->g->cmap);
+	d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, &d2a);
 	if (ISERR()) {
 		freedfa(d);
 		return v->err;
@@ -718,11 +776,11 @@ chr *end;			/* end of same */
 			freedfa(d2);
 			return REG_NOMATCH;
 		}
-		MDEBUG(("tentative midpoint %ld\n", (long)OFF(mid)));
+		MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
 		v->mem[t->retry] = (mid - begin) + 1;
 	} else {
 		mid = begin + (v->mem[t->retry] - 1);
-		MDEBUG(("working midpoint %ld\n", (long)OFF(mid)));
+		MDEBUG(("working midpoint %ld\n", LOFF(mid)));
 	}
 
 	/* iterate until satisfaction or failure */
@@ -755,7 +813,7 @@ chr *end;			/* end of same */
 			freedfa(d2);
 			return REG_NOMATCH;
 		}
-		MDEBUG(("%d: new midpoint %ld\n", t->retry, (long)OFF(mid)));
+		MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid)));
 		v->mem[t->retry] = (mid - begin) + 1;
 		zapmem(v, t->left);
 		zapmem(v, t->right);
@@ -846,6 +904,7 @@ struct subre *t;
 chr *begin;			/* beginning of relevant substring */
 chr *end;			/* end of same */
 {
+	struct smalldfa da;
 	struct dfa *d;
 	int er;
 #	define	UNTRIED	0	/* not yet tried at all */
@@ -862,7 +921,7 @@ chr *end;			/* end of same */
 	assert(t->left != NULL);
 
 	if (v->mem[t->retry] == UNTRIED) {
-		d = newdfa(v, &t->left->cnfa, &v->g->cmap);
+		d = newdfa(v, &t->left->cnfa, &v->g->cmap, &da);
 		if (ISERR())
 			return v->err;
 		if (longest(v, d, begin, end) != end) {
diff --git a/generic/regguts.h b/generic/regguts.h
index 5f9a0ab..73aed2f 100644
--- a/generic/regguts.h
+++ b/generic/regguts.h
@@ -308,7 +308,6 @@ struct cnfa {
 	int ncolors;		/* number of colors */
 	int flags;
 #		define	HASLACONS	01	/* uses lookahead constraints */
-#		define	LEFTANCH	02	/* anchored on left */
 	int pre;		/* setup state number */
 	int post;		/* teardown state number */
 	color bos[2];		/* colors, if any, assigned to BOS and BOL */
diff --git a/generic/tclRegexp.c b/generic/tclRegexp.c
index a619d5b..c605591 100644
--- a/generic/tclRegexp.c
+++ b/generic/tclRegexp.c
@@ -10,7 +10,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclRegexp.c,v 1.1.2.5 1998/11/11 04:54:19 stanton Exp $
+ * RCS: @(#) $Id: tclRegexp.c,v 1.1.2.6 1998/11/17 21:38:39 stanton Exp $
  */
 
 #include "tclInt.h"
@@ -163,7 +163,7 @@ Tcl_RegExpCompile(interp, string)
 
     if (iPtr->patterns[NUM_REGEXPS-1] != NULL) {
 	ckfree(iPtr->patterns[NUM_REGEXPS-1]);
-	re_ufree(&(iPtr->regexps[NUM_REGEXPS-1]->re));
+	TclReFree(&(iPtr->regexps[NUM_REGEXPS-1]->re));
 	ckfree((char *) iPtr->regexps[NUM_REGEXPS-1]);
     }
     for (i = NUM_REGEXPS - 2; i >= 0; i--) {
@@ -326,7 +326,7 @@ TclRegExpExecUniChar(interp, re, wString, numChars, nmatches, flags)
     if (nmatches >= 0 && (size_t) nmatches < nm)
 	nm = (size_t) nmatches;
 
-    status = re_uexec(&regexpPtr->re, wString, (size_t) numChars,
+    status = TclReExec(&regexpPtr->re, wString, (size_t) numChars,
 	    (rm_detail_t *)NULL, nm, regexpPtr->matches, flags);
 
     /*
@@ -621,12 +621,12 @@ TclRegError(interp, msg, status)
     char *p;
 
     Tcl_ResetResult(interp);
-    n = re_uerror(status, (regex_t *)NULL, buf, sizeof(buf));
+    n = TclReError(status, (regex_t *)NULL, buf, sizeof(buf));
     p = (n > sizeof(buf)) ? "..." : "";
     Tcl_AppendResult(interp, msg, buf, p, NULL);
 
     sprintf(cbuf, "%d", status);
-    (VOID) re_uerror(REG_ITOA, (regex_t *)NULL, cbuf, sizeof(cbuf));
+    (VOID) TclReError(REG_ITOA, (regex_t *)NULL, cbuf, sizeof(cbuf));
     Tcl_SetErrorCode(interp, "REGEXP", cbuf, buf, NULL);
 }
 
@@ -654,7 +654,7 @@ FreeRegexpInternalRep(objPtr)
 {
     TclRegexp *regexpRepPtr = (TclRegexp *) objPtr->internalRep.otherValuePtr;
 
-    re_ufree(&regexpRepPtr->re);
+    TclReFree(&regexpRepPtr->re);
     if (regexpRepPtr->matches) {
 	ckfree((char *) regexpRepPtr->matches);
     }
@@ -765,7 +765,7 @@ CompileRegexp(interp, string, length, flags)
      */
 
     regexpPtr->flags = flags;
-    status = re_ucomp(&regexpPtr->re, uniString, (size_t) numChars, flags);
+    status = TclReComp(&regexpPtr->re, uniString, (size_t) numChars, flags);
     Tcl_DStringFree(&stringBuf);
 
     if (status != REG_OKAY) {
diff --git a/tests/reg.test b/tests/reg.test
new file mode 100644
index 0000000..26a981e
--- /dev/null
+++ b/tests/reg.test
@@ -0,0 +1,881 @@
+# regexp tests
+
+if {[string compare test [info procs test]] == 1} then {source defs}
+
+# This file uses some custom procedures, defined below, for regexp regression
+# testing.  The name of the procedure indicates the general nature of the
+# test:  e for compile error expected, f for match failure expected, m
+# for a successful match, and i for a successful match with -indices (used
+# in checking things like nonparticipating subexpressions).  There is also
+# a "doing" procedure which sets up title and major test number for each
+# block of tests, and an "xx" procedure which ignores its arguments and
+# arranges for the next invocation of "doing" to announce that some tests
+# were bypassed (which is better than just commenting them out).
+
+# The first 3 arguments are constant:  a minor number (which often gets
+# a letter or two suffixed to it internally), some flags, and the RE itself.
+# For e, the remaining argument is the name of the compile error expected,
+# less the leading "REG_".  For the rest, the next argument is the string
+# to try the match against.  Remaining arguments are the substring expected
+# to be matched, and any substrings expected to be matched by subexpressions.
+# (For f, these arguments are optional, and if present are ignored except
+# that they indicate how many subexpressions should be presents in the RE.)
+# It is an error for the number of subexpression arguments to be wrong.
+# Cases involving nonparticipating subexpressions, checking where empty
+# substrings are located, etc. should be done using i.
+
+# The flag characters are complex and a bit eclectic.  Generally speaking, 
+# lowercase letters are compile options, uppercase are expected re_info
+# bits, and nonalphabetics are match options, controls for how the test is 
+# run, or testing options.  The one small surprise is that AREs are the
+# default, and you must explicitly request lesser flavors of RE.  The flags
+# are as follows.  It is admitted that some are not very mnemonic.
+# There are some others which are purely debugging tools and are not
+# useful in this file.
+#
+#	-	no-op (placeholder)
+#	+	provide fake xy equivalence class
+#	%	force small state-set cache in matcher (to test cache replace)
+#	^	beginning of string is not beginning of line
+#	$	end of string is not end of line
+#
+#	&	test as both ARE and BRE
+#	b	BRE
+#	e	ERE
+#	a	turn advanced-features bit on (error unless ERE already)
+#	q	literal string, no metacharacters at all
+#
+#	i	case-independent matching
+#	o	("opaque") no subexpression capture
+#	p	newlines are half-magic, excluded from . and [^ only
+#	w	newlines are half-magic, significant to ^ and $ only
+#	n	newlines are fully magic, both effects
+#	x	expanded RE syntax
+#
+#	A	backslash-_a_lphanumeric seen
+#	B	ERE/ARE literal-_b_race heuristic used
+#	E	backslash (_e_scape) seen within []
+#	H	looka_h_ead constraint seen
+#	I	_i_mpossible to match
+#	L	_l_ocale-specific construct seen
+#	M	unportable (_m_achine-specific) construct seen
+#	N	RE can match empty (_n_ull) string
+#	P	non-_P_OSIX construct seen
+#	Q	{} _q_uantifier seen
+#	R	back _r_eference seen
+#	S	POSIX-un_s_pecified syntax seen
+#	U	saw original-POSIX botch:  unmatched right paren in ERE (_u_gh)
+
+# The one area we can't easily test is memory-allocation failures (which
+# are hard to provoke on command).  Embedded NULs also are not tested at
+# the moment, but this is a historical accident which should be fixed.
+
+
+
+# test procedures and related
+
+set ask "about"
+set xflags "xflags"
+set testbypassed 0
+
+# re_info abbreviation mapping table
+set infonames(A) "REG_UBSALNUM"
+set infonames(B) "REG_UBRACES"
+set infonames(E) "REG_UBBS"
+set infonames(H) "REG_ULOOKAHEAD"
+set infonames(I) "REG_UIMPOSSIBLE"
+set infonames(L) "REG_ULOCALE"
+set infonames(M) "REG_UUNPORT"
+set infonames(N) "REG_UEMPTYMATCH"
+set infonames(P) "REG_UNONPOSIX"
+set infonames(Q) "REG_UBOUNDS"
+set infonames(R) "REG_UBACKREF"
+set infonames(S) "REG_UUNSPEC"
+set infonames(U) "REG_UPBOTCH"
+set infonameorder "RHQBAUEPSMLNI"	;# must match bit order, lsb first
+
+# set major test number and description
+proc doing {major desc} {
+	global prefix description testbypassed
+
+	if {$testbypassed != 0} {
+		puts stdout "!!! bypassed $testbypassed tests in\
+					 reg-$major, `$description'"
+	}
+
+	set prefix reg-$major
+	set description "reg $desc"
+	set testbypassed 0
+}
+
+# build test number (internal)
+proc tno {testid} {
+	return [lindex $testid 0]
+}
+
+# build description, with possible modifiers (internal)
+proc desc {testid} {
+	global description
+
+	set d $description
+	if {[llength $testid] > 1} {
+		set d "([lreplace $testid 0 0]) $d"
+	}
+	return $d
+}
+
+# build trailing options and flags argument from a flags string (internal)
+proc flags {fl} {
+	global xflags
+
+	set args [list]
+	set flags ""
+	foreach f [split $fl ""] {
+		switch -exact -- $f {
+		"i" { lappend args "-nocase" }
+		"x" { lappend args "-expanded" }
+		"n" { lappend args "-line" }
+		"p" { lappend args "-linestop" }
+		"w" { lappend args "-lineanchor" }
+		"-" { }
+		default { append flags $f }
+		}
+	}
+	if {[string compare $flags ""] != 0} {
+		lappend args -$xflags $flags
+	}
+	return $args
+}
+
+# build info-flags list from a flags string (internal)
+proc infoflags {fl} {
+	global infonames infonameorder
+
+	set ret [list]
+	foreach f [split $infonameorder ""] {
+		if {[string first $f $fl] >= 0} {
+			lappend ret $infonames($f)
+		}
+	}
+	return $ret
+}
+
+# compilation error expected
+proc e {testid flags re err} {
+	global prefix ask errorCode
+
+	# if &, test as both ARE and BRE
+	set amp [string first "&" $flags]
+	if {$amp >= 0} {
+		set f [string range $flags 0 [expr $amp - 1]]
+		append f [string range $flags [expr $amp + 1] end]
+		e [linsert $testid end ARE] ${f} $re $err
+		e [linsert $testid end BRE] ${f}b $re $err
+		return
+	}
+
+	set cmd [concat [list testregexp -$ask] [flags $flags] [list $re]]
+	set run "list \[catch \{$cmd\}\] \[lindex \$errorCode 1\]"
+	test $prefix.[tno $testid] [desc $testid] $run [list 1 REG_$err]
+}
+
+# match failure expected
+proc f {testid flags re target args} {
+	global prefix description ask
+
+	# if &, test as both ARE and BRE
+	set amp [string first "&" $flags]
+	if {$amp >= 0} {
+		set f [string range $flags 0 [expr $amp - 1]]
+		append f [string range $flags [expr $amp + 1] end]
+		eval [linsert $args 0 f [linsert $testid end ARE] ${f} $re \
+								$target]
+		eval [linsert $args 0 f [linsert $testid end BRE] ${f}b $re \
+								$target]
+		return
+	}
+
+	set f [flags $flags]
+	set infoflags [infoflags $flags]
+	set ccmd [concat [list testregexp -$ask] $f [list $re]]
+	set nsub [expr [llength $args] - 1]
+	if {$nsub == -1} {
+		# didn't tell us number of subexps
+		set ccmd "lreplace \[$ccmd\] 0 0"
+		set info [list $infoflags]
+	} else {
+		set info [list $nsub $infoflags]
+	}
+	lappend testid "compile"
+	test $prefix.[tno $testid] [desc $testid] $ccmd $info
+
+	set testid [lreplace $testid end end "execute"]
+	set ecmd [concat [list testregexp] $f [list $re $target]]
+	test $prefix.[tno $testid] [desc $testid] $ecmd 0
+}
+
+# match expected, internal routine that does the work
+# parameters like the "real" routines except they don't have "opts",
+#  which is a possibly-empty list of switches for the regexp match attempt
+proc matchexpected {opts testid flags re target args} {
+	global prefix description ask
+
+	# if &, test as both BRE and ARE
+	set amp [string first "&" $flags]
+	if {$amp >= 0} {
+		set f [string range $flags 0 [expr $amp - 1]]
+		append f [string range $flags [expr $amp + 1] end]
+		eval [concat [list matchexpected $opts \
+			[linsert $testid end ARE] ${f} $re $target] $args]
+		eval [concat [list matchexpected $opts \
+			[linsert $testid end BRE] ${f}b $re $target] $args]
+		return
+	}
+
+	set f [flags $flags]
+	set infoflags [infoflags $flags]
+	set ccmd [concat [list testregexp -$ask] $f [list $re]]
+	set ecmd [concat [list testregexp] $opts $f [list $re $target]]
+
+	set nsub [expr [llength $args] - 1]
+	set names [list]
+	set refs ""
+	for {set i 0} {$i <= $nsub} {incr i} {
+		if {$i == 0} {
+			set name match
+		} else {
+			set name sub$i
+		}
+		lappend names $name
+		append refs " \$$name"
+		set $name ""
+	}
+	if {[string first "o" $flags] >= 0} {	;# REG_NOSUB
+		set nsub 0		;# unsigned value cannot be -1
+	}
+	set ecmd [concat $ecmd $names]
+	set erun "list \[$ecmd\] $refs"
+	set result [concat [list 1] $args]
+
+	set info [list $nsub $infoflags]
+	lappend testid "compile"
+	test $prefix.[tno $testid] [desc $testid] $ccmd $info
+	set testid [lreplace $testid end end "execute"]
+	test $prefix.[tno $testid] [desc $testid] $erun $result
+}
+
+# match expected (no missing, empty, or ambiguous submatches)
+# m testno flags re target mat submat ...
+proc m {args} {
+	eval matchexpected [linsert $args 0 [list]]
+}
+
+# match expected (full fanciness)
+# i testno flags re target mat submat ...
+proc i {args} {
+	eval matchexpected [linsert $args 0 [list "-indices"]]
+}
+
+# test temporarily unimplemented
+proc xx {args} {
+	global testbypassed
+
+	incr testbypassed
+}
+
+
+
+# the tests themselves
+
+
+
+# support functions and preliminary misc.
+# This is sensitive to changes in message wording, but we really have to
+# test the code->message expansion at least once.
+test regexp-0.1 "regexp error reporting" {
+	list [catch {regexp (*) ign} msg] $msg
+} {1 {couldn't compile regular expression pattern: quantifier operand invalid}}
+
+
+
+doing 1 "basic sanity checks"
+m  1	&	abc		abc	abc
+f  2	&	abc		def
+m  3	&	abc		xyabxabce	abc
+
+
+
+doing 2 "invalid option combinations"
+e  1	qe	a		INVARG
+e  2	qa	a		INVARG
+e  3	qx	a		INVARG
+e  4	qn	a		INVARG
+e  5	ba	a		INVARG
+
+
+
+doing 3 "basic syntax"
+i  1	&NS	""		a	{0 -1}
+m  2	NS	a|		a	a
+m  3	-	a|b		a	a
+m  4	-	a|b		b	b
+m  5	NS	a||b		b	b
+m  6	&	ab		ab	ab
+
+
+
+doing 4 "parentheses"
+m  1	-	(a)e		ae	ae	a
+m  2	o	(a)e		ae
+m  3	b	{\(a\)b}	ab	ab	a
+m  4	-	a((b)c)		abc	abc	bc	b
+m  5	-	a(b)(c)		abc	abc	b	c
+e  6	-	a(b		EPAREN
+e  7	b	{a\(b}		EPAREN
+# sigh, we blew it on the specs here... someday this will be fixed in POSIX,
+#  but meanwhile, it's fixed in AREs
+m  8	eU	a)b		a)b	a)b
+e  9	-	a)b		EPAREN
+e 10	b	{a\)b}		EPAREN
+m 11	P	a(?:b)c		abc	abc
+e 12	e	a(?:b)c		BADRPT
+i 13	S	a()b		ab	{0 1}	{1 0}
+m 14	SP	a(?:)b		ab	ab
+i 15	S	a(|b)c		ac	{0 1}	{1 0}
+m 16	S	a(b|)c		abc	abc	b
+
+
+
+doing 5 "simple one-char matching"
+# general case of brackets done later
+m  1	&	a.b		axb	axb
+f  2	&n	"a.b"		"a\nb"
+m  3	&	{a[bc]d}	abd	abd
+m  4	&	{a[bc]d}	acd	acd
+f  5	&	{a[bc]d}	aed
+f  6	&	{a[^bc]d}	abd
+m  7	&	{a[^bc]d}	aed	aed
+f  8	&p	"a\[^bc]d"	"a\nd"
+
+
+
+doing 6 "context-dependent syntax"
+# plus odds and ends
+e  1	-	*		BADRPT
+m  2	b	*		*	*
+m  3	b	{\(*\)}		*	*	*
+e  4	-	(*)		BADRPT
+m  5	b	^*		*	*
+e  6	-	^*		BADRPT
+f  7	&	^b		^b
+m  8	b	x^		x^	x^
+f  9	I	x^		x
+m 10	n	"\n^"		"x\nb"	"\n"
+f 11	bS	{\(^b\)}	^b
+m 12	-	(^b)		b	b	b
+m 13	&	{x$}		x	x
+m 14	bS	{\(x$\)}	x	x	x
+m 15	-	{(x$)}		x	x	x
+m 16	b	{x$y}		"x\$y"	"x\$y"
+f 17	I	{x$y}		xy
+m 18	n	"x\$\n"		"x\n"	"x\n"
+e 19	-	+		BADRPT
+e 20	-	?		BADRPT
+
+
+
+doing 7 "simple quantifiers"
+m  1	&N	a*		aa	aa
+i  2	&N	a*		b	{0 -1}
+m  3	-	a+		aa	aa
+m  4	-	a?b		ab	ab
+m  5	-	a?b		b	b
+e  6	-	**		BADRPT
+m  7	bN	**		***	***
+e  8	&	a**		BADRPT
+e  9	&	a**b		BADRPT
+e 10	&	***		BADRPT
+e 11	*	a++		BADRPT
+e 12	*	a?+		BADRPT
+e 13	*	a?*		BADRPT
+e 14	*	a+*		BADRPT
+e 15	*	a*+		BADRPT
+
+
+
+doing 8 "braces"
+m  1	NQ	"a{0,1}"	""	""
+m  2	NQ	"a{0,1}"	ac	a
+e  3	-	"a{1,0}"	BADBR
+e  4	-	"a{1,2,3}"	BADBR
+e  5	-	"a{257}"	BADBR
+e  6	-	"a{1000}"	BADBR
+e  7	-	"a{1"		EBRACE
+e  8	-	"a{1n}"		BADBR
+m  9	BS	"a{b"		"a\{b"	"a\{b"
+m 10	BS	"a{"		"a\{"	"a\{"
+m 11	bQ	{a\{0,1\}b}	cb	b
+e 12	b	{a\{0,1}	EBRACE
+e 13	-	"a{0,1\\"	BADBR
+m 14	Q	"a{0}b"		ab	b
+m 15	Q	"a{0,0}b"	ab	b
+m 16	Q	"a{0,1}b"	ab	ab
+m 17	Q	"a{0,2}b"	b	b
+m 18	Q	"a{0,2}b"	aab	aab
+m 19	Q	"a{0,}b"	aab	aab
+m 20	Q	"a{1,1}b"	aab	ab
+m 21	Q	"a{1,3}b"	aaaab	aaab
+f 22	Q	"a{1,3}b"	b
+m 23	Q	"a{1,}b"	aab	aab
+f 24	Q	"a{2,3}b"	ab
+m 25	Q	"a{2,3}b"	aaaab	aaab
+f 26	Q	"a{2,}b"	ab
+m 27	Q	"a{2,}b"	aaaab	aaaab
+
+
+
+doing 9 "brackets"
+m  1	&	{a[bc]}		ac	ac
+m  2	&	{a[-]}		a-	a-
+m  3	&	{a[[.-.]]}	a-	a-
+m  4	&L	{a[[.zero.]]}	a0	a0
+m  5	&LM	{a[[.zero.]-9]}	a2	a2
+m  6	&M	{a[0-[.9.]]}	a2	a2
+m  7	&+L	{a[[=x=]]}	ax	ax
+m  8	&+L	{a[[=x=]]}	ay	ay
+f  9	&+L	{a[[=x=]]}	az
+e 10	&	{a[0-[=x=]]}	ERANGE
+m 11	&L	{a[[:digit:]]}	a0	a0
+e 12	&	{a[[:woopsie:]]}	ECTYPE
+f 13	&L	{a[[:digit:]]}	ab
+e 14	&	{a[0-[:digit:]]}	ERANGE
+m 15	&LP	{[[:<:]]a}	a	a
+m 16	&LP	{a[[:>:]]}	a	a
+e 17	&	{a[[..]]b}	ECOLLATE
+e 18	&	{a[[==]]b}	ECOLLATE
+e 19	&	{a[[::]]b}	ECTYPE
+e 20	&	{a[[.a}		EBRACK
+e 21	&	{a[[=a}		EBRACK
+e 22	&	{a[[:a}		EBRACK
+e 23	&	{a[}		EBRACK
+e 24	&	{a[b}		EBRACK
+e 25	&	{a[b-}		EBRACK
+e 26	&	{a[b-c}		EBRACK
+m 27	&M	{a[b-c]}	ab	ab
+m 28	&	{a[b-b]}	ab	ab
+m 29	&M	{a[1-2]}	a2	a2
+e 30	&	{a[c-b]}	ERANGE
+e 31	&	{a[a-b-c]}	ERANGE
+m 32	&M	{a[--?]b}	a?b	a?b
+m 33	&	{a[---]b}	a-b	a-b
+m 34	&	{a[]b]c}	a]c	a]c
+m 35	EP	{a[\]]b}	a]b	a]b
+f 36	bE	{a[\]]b}	a]b
+m 37	bE	{a[\]]b}	"a\\]b"	"a\\]b"
+m 38	eE	{a[\]]b}	"a\\]b"	"a\\]b"
+m 39	EP	{a[\\]b}	"a\\b"	"a\\b"
+m 40	eE	{a[\\]b}	"a\\b"	"a\\b"
+m 41	bE	{a[\\]b}	"a\\b"	"a\\b"
+e 42	-	{a[\Z]b}	EESCAPE
+m 43	&	{a[[b]c}	"a\[c"	"a\[c"
+m 44	EMP	{a[\u00fe-\u0507][\u00ff-\u0300]b} \
+			"a\u0102\u02ffb"	"a\u0102\u02ffb"
+
+
+
+doing 10 "anchors and newlines"
+m  1	&	^a		a	a
+f  2	&^	^a		a
+i  3	&N	^		a	{0 -1}
+i  4	&	{a$}		aba	{2 2}
+f  5	{&$}	{a$}		a
+i  6	&N	{$}		ab	{2 1}
+m  7	&n	^a		a	a
+m  8	&n	"^a"		"b\na"	"a"
+i  9	&w	"^a"		"a\na"	{0 0}
+i 10	&n^	"^a"		"a\na"	{2 2}
+m 11	&n	{a$}		a	a
+m 12	&n	"a\$"		"a\nb"	"a"
+i 13	&n	"a\$"		"a\na"	{0 0}
+i 14	N	^^		a	{0 -1}
+m 15	b	^^		^	^
+i 16	N	{$$}		a	{1 0}
+m 17	b	{$$}		"\$"	"\$"
+m 18	&N	{^$}		""	""
+f 19	&N	{^$}		a
+i 20	&nN	"^\$"		"a\n\nb"	{2 1}
+m 21	N	{$^}		""	""
+m 22	b	{$^}		"\$^"	"\$^"
+m 23	P	{\Aa}		a	a
+m 24	^P	{\Aa}		a	a
+f 25	^nP	{\Aa}		"b\na"
+m 26	P	{a\Z}		a	a
+m 27	{$P}	{a\Z}		a	a
+f 28	{$nP}	{a\Z}		"a\nb"
+e 29	-	^*		BADRPT
+e 30	-	{$*}		BADRPT
+e 31	-	{\A*}		BADRPT
+e 32	-	{\Z*}		BADRPT
+
+
+
+doing 11 "boundary constraints"
+m  1	&LP	{[[:<:]]a}	a	a
+m  2	&LP	{[[:<:]]a}	-a	a
+f  3	&LP	{[[:<:]]a}	ba
+m  4	&LP	{a[[:>:]]}	a	a
+m  5	&LP	{a[[:>:]]}	a-	a
+f  6	&LP	{a[[:>:]]}	ab
+m  7	bLP	{\<a}		a	a
+f  8	bLP	{\<a}		ba
+m  9	bLP	{a\>}		a	a
+f 10	bLP	{a\>}		ab
+m 11	LP	{\ya}		a	a
+f 12	LP	{\ya}		ba
+m 13	LP	{a\y}		a	a
+f 14	LP	{a\y}		ab
+m 15	LP	{a\Y}		ab	a
+f 16	LP	{a\Y}		a-
+f 17	LP	{a\Y}		a
+f 18	LP	{-\Y}		-a
+m 19	LP	{-\Y}		-%	-
+f 20	LP	{\Y-}		a-
+e 21	-	{[[:<:]]*}	BADRPT
+e 22	-	{[[:>:]]*}	BADRPT
+e 23	b	{\<*}		BADRPT
+e 24	b	{\>*}		BADRPT
+e 25	-	{\y*}		BADRPT
+e 26	-	{\Y*}		BADRPT
+m 27	LP	{\ma}		a	a
+f 28	LP	{\ma}		ba
+m 29	LP	{a\M}		a	a
+f 30	LP	{a\M}		ab
+f 31	ILP	{\Ma}		a
+f 32	ILP	{a\m}		a
+
+
+
+doing 12 "character classes"
+m  1	LP	{a\db}		a0b	a0b
+f  2	LP	{a\db}		axb
+f  3	LP	{a\Db}		a0b
+m  4	LP	{a\Db}		axb	axb
+m  5	LP	"a\\sb"		"a b"	"a b"
+m  6	LP	"a\\sb"		"a\tb"	"a\tb"
+m  7	LP	"a\\sb"		"a\nb"	"a\nb"
+f  8	LP	{a\sb}		axb
+m  9	LP	{a\Sb}		axb	axb
+f 10	LP	"a\\Sb"		"a b"
+m 11	LP	{a\wb}		axb	axb
+f 12	LP	{a\wb}		a-b
+f 13	LP	{a\Wb}		axb
+m 14	LP	{a\Wb}		a-b	a-b
+m 15	LP	{\y\w+z\y}	adze-guz	guz
+m 16	LPE	{a[\d]b}	a1b	a1b
+m 17	LPE	"a\[\\s]b"	"a b"	"a b"
+m 18	LPE	{a[\w]b}	axb	axb
+
+
+
+doing 13 "escapes"
+e  1	&	"a\\"		EESCAPE
+m  2	-	{a\<b}		a<b	a<b
+m  3	e	{a\<b}		a<b	a<b
+m  4	bAS	{a\wb}		awb	awb
+m  5	eAS	{a\wb}		awb	awb
+m  6	PL	"a\\ab"		"a\007b"	"a\007b"
+m  7	P	"a\\bb"		"a\bb"	"a\bb"
+m  8	P	{a\Bb}		"a\\b"	"a\\b"
+m  9	MP	"a\\chb"	"a\bb"	"a\bb"
+m 10	MP	"a\\cHb"	"a\bb"	"a\bb"
+m 11	LMP	"a\\e"		"a\033"	"a\033"
+m 12	P	"a\\fb"		"a\fb"	"a\fb"
+m 13	P	"a\\nb"		"a\nb"	"a\nb"
+m 14	P	"a\\rb"		"a\rb"	"a\rb"
+m 15	P	"a\\tb"		"a\tb"	"a\tb"
+m 16	P	"a\\u0008x"	"a\bx"	"a\bx"
+e 17	-	{a\u008x}	EESCAPE
+m 18	P	"a\\u00088x"	"a\b8x"	"a\b8x"
+m 19	P	"a\\U00000008x"	"a\bx"	"a\bx"
+e 20	-	{a\U0000008x}	EESCAPE
+m 21	P	"a\\vb"		"a\vb"	"a\vb"
+m 22	MP	"a\\x08x"	"a\bx"	"a\bx"
+e 23	-	{a\xq}		EESCAPE
+m 24	MP	"a\\x0008x"	"a\bx"	"a\bx"
+e 25	-	{a\z}		EESCAPE
+m 26	MP	"a\\010b"	"a\bb"	"a\bb"
+
+
+
+doing 14 "back references"
+# ugh
+m  1	{R[1P}	{a(b*)c\1}	abbcbb	abbcbb	bb
+m  2	{R[1P}	{a(b*)c\1}	ac	ac	""
+f  3	{R[1P}	{a(b*)c\1}	abbcb
+m  4	{R[1P}	{a(b*)\1}	abbcbb	abb	b
+m  5	{R[1P}	{a(b|bb)\1}	abbcbb	abb	b
+m  6	{R[1P}	{a([bc])\1}	abb	abb	b
+f  7	{R[1P}	{a([bc])\1}	abc
+m  8	{R[1P}	{a([bc])\1}	abcabb	abb	b
+f  9	{R[1P}	{a([bc])*\1}	abc
+f 10	{R[1P}	{a([bc])\1}	abB
+m 11	{iR[1P}	{a([bc])\1}	abB	abB	b
+m 12	{R[1P}	{a([bc])\1+}	abbb	abbb	b
+m 13	{QR[1P}	"a(\[bc])\\1{3,4}"	abbbb	abbbb	b
+f 14	{QR[1P}	"a(\[bc])\\1{3,4}"	abbb
+m 15	{R[1P}	{a([bc])\1*}	abbb	abbb	b
+m 16	{R[1P}	{a([bc])\1*}	ab	ab	b
+m 17	{R[2P}	{a([bc])(\1*)}	ab	ab	b	""
+e 18	-	{a((b)\1)}	ESUBREG
+e 19	-	{a(b)c\2}	ESUBREG
+m 20	{bR[1}	{a\(b*\)c\1}	abbcbb	abbcbb	bb
+
+
+
+doing 15 "octal escapes vs back references"
+# initial zero is always octal
+m  1	MP	"a\\010b"	"a\bb"	"a\bb"
+m  2	MP	"a\\0070b"	"a\0070b"	"a\0070b"
+m  3	MP	"a\\07b"	"a\007b"	"a\007b"
+m  4	MP	"a(b)(b)(b)(b)(b)(b)(b)(b)(b)(b)\\07c"	"abbbbbbbbbb\007c" \
+	"abbbbbbbbbb\007c"	"b"	"b"	"b"	"b"	"b"	"b" \
+	"b"	"b"	"b"	"b"
+# a single digit is always a backref
+e  5	-	{a\7b}		ESUBREG
+# otherwise it's a backref only if within range (barf!)
+m  6	MP	"a\\10b"	"a\bb"	"a\bb"
+m  7	MP	{a\101b}	aAb	aAb
+m  8	RP	{a(b)(b)(b)(b)(b)(b)(b)(b)(b)(b)\10c}	abbbbbbbbbbbc \
+	abbbbbbbbbbbc	b	b	b	b	b	b	b \
+	b	b	b
+# but we're fussy about border cases -- guys who want octal should use the zero
+e  9	-	{a((((((((((b\10))))))))))c}	ESUBREG
+# BREs don't have octal, EREs don't have backrefs
+m 10	MP	"a\\12b"	"a\nb"	"a\nb"
+e 11	b	{a\12b}		ESUBREG
+m 12	eAS	{a\12b}		a12b	a12b
+
+
+
+doing 16 "expanded syntax"
+m  1	xP	"a b c"		"abc"	"abc"
+m  2	xP	"a b #oops\nc\td"	"abcd"	"abcd"
+m  3	x	"a\\ b\\\tc"	"a b\tc"	"a b\tc"
+m  4	xP	"a b\\#c"	"ab#c"	"ab#c"
+m  5	xP	"a b\[c d]e"	"ab e"	"ab e"
+m  6	xP	"a b\[c#d]e"	"ab#e"	"ab#e"
+m  7	xP	"a b\[c#d]e"	"abde"	"abde"
+m  8	xSPB	"ab{ d"		"ab\{d"	"ab\{d"
+m  9	xPQ	"ab{ 1 , 2 }c"	"abc"	"abc"
+
+
+
+doing 17 "misc syntax"
+m  1	P	a(?#comment)b	ab	ab
+
+
+
+doing 18 "unmatchable REs"
+f  1	I	a^b		ab
+
+
+
+doing 19 "case independence"
+m  1	&i	ab		Ab	Ab
+m  2	&i	{a[bc]}		aC	aC
+f  3	&i	{a[^bc]}	aB
+m  4	&iM	{a[b-d]}	aC	aC
+f  5	&iM	{a[^b-d]}	aC
+
+
+
+doing 20 "directors and embedded options"
+e  1	&	***?		BADPAT
+m  2	q	***?		***?	***?
+m  3	&P	***=a*b		a*b	a*b
+m  4	q	***=a*b		***=a*b	***=a*b
+m  5	bLP	{***:\w+}	ab	ab
+m  6	eLP	{***:\w+}	ab	ab
+e  7	&	***:***=a*b	BADRPT
+m  8	&P	***:(?b)a+b	a+b	a+b
+m  9	P	(?b)a+b		a+b	a+b
+e 10	e	{(?b)\w+}	BADRPT
+m 11	bAS	{(?b)\w+}	(?b)w+	(?b)w+
+m 12	iP	(?c)a		a	a
+f 13	iP	(?c)a		A
+m 14	APS	{(?e)\W+}	WW	WW
+m 15	P	(?i)a+		Aa	Aa
+f 16	P	"(?m)a.b"	"a\nb"
+m 17	P	"(?m)^b"	"a\nb"	"b"
+f 18	P	"(?n)a.b"	"a\nb"
+m 19	P	"(?n)^b"	"a\nb"	"b"
+f 20	P	"(?p)a.b"	"a\nb"
+f 21	P	"(?p)^b"	"a\nb"
+m 22	P	(?q)a+b		a+b	a+b
+m 23	nP	"(?s)a.b"	"a\nb"	"a\nb"
+m 24	xP	"(?t)a b"	"a b"	"a b"
+m 25	P	"(?w)a.b"	"a\nb"	"a\nb"
+m 26	P	"(?w)^b"	"a\nb"	"b"
+m 27	P	"(?x)a b"	"ab"	"ab"
+e 28	-	(?z)ab		BADOPT
+m 29	P	(?ici)a+	Aa	Aa
+e 30	P	(?i)(?q)a+	BADRPT
+m 31	P	(?q)(?i)a+	(?i)a+	(?i)a+
+m 32	P	(?qe)a+		a	a
+m 33	xP	"(?q)a b"	"a b"	"a b"
+m 34	P	"(?qx)a b"	"a b"	"a b"
+m 35	P	(?qi)ab		Ab	Ab
+
+
+
+doing 21 "capturing"
+m  1	-	a(b)c		abc	abc	b
+m  2	P	a(?:b)c		xabc	abc
+m  3	-	a((b))c		xabcy	abc	b	b
+m  4	P	a(?:(b))c	abcy	abc	b
+m  5	P	a((?:b))c	abc	abc	b
+m  6	P	a(?:(?:b))c	abc	abc
+i  7	Q	"a(b){0}c"	ac	{0 1}	{-1 -1}
+m  8	-	a(b)c(d)e	abcde	abcde	b	d
+m  9	-	(b)c(d)e	bcde	bcde	b	d
+m 10	-	a(b)(d)e	abde	abde	b	d
+m 11	-	a(b)c(d)	abcd	abcd	b	d
+m 12	-	(ab)(cd)	xabcdy	abcd	ab	cd
+m 13	-	a(b)?c		xabcy	abc	b
+i 14	-	a(b)?c		xacy	{1 2}	{-1 -1}
+m 15	-	a(b)?c(d)?e	xabcdey	abcde	b	d
+i 16	-	a(b)?c(d)?e	xacdey	{1 4}	{-1 -1}	{3 3}
+i 17	-	a(b)?c(d)?e	xabcey	{1 4}	{2 2}	{-1 -1}
+i 18	-	a(b)?c(d)?e	xacey	{1 3}	{-1 -1}	{-1 -1}
+m 19	-	a(b)*c		xabcy	abc	b
+i 20	-	a(b)*c		xabbbcy	{1 5}	{4 4}
+i 21	-	a(b)*c		xacy	{1 2}	{-1 -1}
+m 22	-	a(b*)c		xabbbcy	abbbc	bbb
+m 23	-	a(b*)c		xacy	ac	""
+f 24	-	a(b)+c		xacy
+m 25	-	a(b)+c		xabcy	abc	b
+i 26	-	a(b)+c		xabbbcy	{1 5}	{4 4}
+m 27	-	a(b+)c		xabbbcy	abbbc	bbb
+i 28	Q	"a(b){2,3}c"	xabbbcy	{1 5}	{4 4}
+i 29	Q	"a(b){2,3}c"	xabbcy	{1 4}	{3 3}
+f 30	Q	"a(b){2,3}c"	xabcy
+m 31	LP	"\\y(\\w+)\\y"	"-- abc-"	"abc"	"abc"
+m 32	-	a((b|c)d+)+	abacdbd	acdbd	bd	b
+m 33	N	(.*).*		abc	abc	abc
+m 34	N	(a*)*		bc	""	""
+
+
+
+doing 22 "multicharacter collating elements"
+# again ugh
+# currently disabled because the fake MCCE we use for testing is unavailable
+xx m  1	&+L	{a[c]e}		ace	ace
+xx f  2	&+I	{a[c]h}		ach
+xx m  3	&+L	{a[[.ch.]]}	ach	ach
+xx f  4	&+L	{a[[.ch.]]}	ace
+xx m  5	&+L	{a[c[.ch.]]}	ac	ac
+xx m  6	&+L	{a[c[.ch.]]}	ace	ac
+xx m  7	&+L	{a[c[.ch.]]}	ache	ach
+xx f  8	&+L	{a[^c]e}	ace
+xx m  9	&+L	{a[^c]e}	abe	abe
+xx m 10	&+L	{a[^c]e}	ache	ache
+xx f 11	&+L	{a[^[.ch.]]}	ach
+xx m 12	&+L	{a[^[.ch.]]}	ace	ac
+xx m 13	&+L	{a[^[.ch.]]}	ac	ac
+xx m 14	&+L	{a[^[.ch.]]}	abe	ab
+xx f 15	&+L	{a[^c[.ch.]]}	ach
+xx f 16	&+L	{a[^c[.ch.]]}	ace
+xx f 17	&+L	{a[^c[.ch.]]}	ac
+xx m 18	&+L	{a[^c[.ch.]]}	abe	ab
+xx m 19	&+L	{a[^b]}		ac	ac
+xx m 20	&+L	{a[^b]}		ace	ac
+xx m 21	&+L	{a[^b]}		ach	ach
+xx f 22	&+L	{a[^b]}		abe
+
+
+
+doing 23 "lookahead constraints"
+m  1	HP	a(?=b)b*	ab	ab
+f  2	HP	a(?=b)b*	a
+m  3	HP	a(?=b)b*(?=c)c*	abc	abc
+f  4	HP	a(?=b)b*(?=c)c*	ab
+f  5	HP	a(?!b)b*	ab
+m  6	HP	a(?!b)b*	a	a
+m  7	HP	(?=b)b		b	b
+f  8	HP	(?=b)b		a
+
+
+
+doing 24 "non-greedy quantifiers"
+m  1	P	ab+?		abb	ab
+m  2	P	ab+?c		abbc	abbc
+m  3	P	ab*?		abb	a
+m  4	P	ab*?c		abbc	abbc
+m  5	P	ab??		ab	a
+m  6	P	ab??c		abc	abc
+m  7	PQ	"ab{2,4}?"	abbbb	abb
+m  8	PQ	"ab{2,4}?c"	abbbbc	abbbbc
+
+
+
+doing 25 "mixed quantifiers"
+xx to be done, actually
+xx should include |
+
+
+
+doing 26 "tricky cases"
+# attempts to trick the matcher into accepting a short match
+m  1	-	(week|wee)(night|knights)	weeknights	weeknights \
+	wee	knights
+m  2	RP	{a(bc*).*\1}	abccbccb	abccbccb	b
+m  3	-	{a(b.[bc]*)+}	abcbd	abcbd	bd
+
+
+
+doing 27 "implementation misc."
+# duplicate arcs are suppressed
+m  1	P	a(?:b|b)c	abc	abc
+# make color/subcolor relationship go back and forth
+m  2	&	{[ab][ab][ab]}	aba	aba
+m  3	&	{[ab][ab][ab][ab][ab][ab][ab]}	abababa	abababa
+
+
+
+doing 28 "boundary busters etc."
+# color-descriptor allocation changes at 10
+m  1	&	abcdefghijkl	abcdefghijkl	abcdefghijkl
+# so does arc allocation
+m  2	P	a(?:b|c|d|e|f|g|h|i|j|k|l|m)n	agn	agn
+# subexpression tracking also at 10
+m  3	-	a(((((((((((((b)))))))))))))c	abc	abc	b	b	b	b	b	b	b	b	b	b	b	b	b
+# state-set handling changes slightly at unsigned size (might be 64...)
+# (also stresses arc allocation)
+m  4	Q	"ab{1,100}c"	abbc	abbc
+m  5	Q	"ab{1,100}c"	abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc \
+	abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc
+m  6	Q	"ab{1,100}c" \
+	abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc \
+	abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc
+# force small cache and bust it, several ways
+m  7	LP	{\w+abcdefgh}	xyzabcdefgh	xyzabcdefgh
+m  8	%LP	{\w+abcdefgh}	xyzabcdefgh	xyzabcdefgh
+m  9	%LP	{\w+abcdefghijklmnopqrst}	xyzabcdefghijklmnopqrst \
+	xyzabcdefghijklmnopqrst
+i 10	%LP	{\w+(abcdefgh)?}	xyz	{0 2}	{-1 -1}
+i 11	%LP	{\w+(abcdefgh)?}	xyzabcdefg	{0 9}	{-1 -1}
+i 12	%LP	{\w+(abcdefghijklmnopqrst)?}	xyzabcdefghijklmnopqrs \
+	{0 21}	{-1 -1}
+
+
+
+doing 29 "misc. oddities and old bugs"
+e  1	&	***		BADRPT
+m  2	N	a?b*		abb	abb
+m  3	N	a?b*		bb	bb
+m  4	&	a*b		aab	aab
+
+
+
+doing 0 "flush"			;# to flush any leftover complaints
+return
-- 
cgit v0.12