Integrated latest regexp changes from Henry Spencer.

Moved regexp related declarations out of tclInt.h and into tclRegexp.h. Added "encoding" command.
author: stanton <stanton> 1998-10-21 20:39:57 (GMT)
committer: stanton <stanton> 1998-10-21 20:39:57 (GMT)
commit: 7e7056e21d0a0d9fa39bdfd742e82b101a6c4b7c (patch)
tree: 99e08a09e1567ade05e7bc7edac3758b3695d424 /generic
parent: 966ff877247e93fbe6e641cfa77df19d03cfe932 (diff)
download: tcl-7e7056e21d0a0d9fa39bdfd742e82b101a6c4b7c.zip
tcl-7e7056e21d0a0d9fa39bdfd742e82b101a6c4b7c.tar.gz
tcl-7e7056e21d0a0d9fa39bdfd742e82b101a6c4b7c.tar.bz2
26 files changed, 2972 insertions, 2388 deletions
diff --git a/generic/chr.h b/generic/chr.h
deleted file mode 100644
index 6a21159..0000000
--- a/generic/chr.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * chr.h --
- *
- * 	Regexp package file:  Unichar version of stuff related to the
- * 	nature of a character.
- *
- * Copyright (c) 1998 Henry Spencer.  All rights reserved.
- * 
- * Development of this software was funded, in part, by Cray Research Inc.,
- * UUNET Communications Services Inc., and Sun Microsystems Inc., none of
- * whom are responsible for the results.  The author thanks all of them.
- * 
- * Redistribution and use in source and binary forms -- with or without
- * modification -- are permitted for any purpose, provided that
- * redistributions in source form retain this entire copyright notice and
- * indicate the origin and nature of any modifications. 
- * 
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
- * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
- * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- * 
- * Copyright (c) 1998 by Sun Microsystems, Inc.
- *
- * See the file "license.terms" for information on usage and redistribution
- * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
- *
- * RCS: @(#) $Id: chr.h,v 1.1.2.2 1998/10/03 01:56:40 stanton Exp $
- */
-
-typedef Tcl_UniChar chr;	/* internal character type */
-typedef int pchr;		/* what it promotes to */
-typedef unsigned uchr;		/* unsigned type big enough to hold a chr */
-#define	CHRBITS	(sizeof(Tcl_UniChar) * CHAR_BIT)	/* bits in a chr */
-#define	CHR(c)	(UCHAR(c))		/* turn a char literal into a chr literal */
-#define	DIGITVAL(c)	((c)-'0')	/* turn a chr digit into its value */
-
-/*
- * char names for the externally-visible functions
- */
-#define	compile	re_ucomp
-#define	exec	re_uexec
diff --git a/generic/locale.c b/generic/locale.c
deleted file mode 100644
index ca56fc4..0000000
--- a/generic/locale.c
+++ /dev/null
@@ -1,675 +0,0 @@
-/*
- * locale.c --
- *
- *	Regexp package file:
- * 	collating-element handling and other locale-specific stuff
- *
- * Copyright (c) 1998 Henry Spencer.  All rights reserved.
- * 
- * Development of this software was funded, in part, by Cray Research Inc.,
- * UUNET Communications Services Inc., and Sun Microsystems Inc., none of
- * whom are responsible for the results.  The author thanks all of them.
- * 
- * Redistribution and use in source and binary forms -- with or without
- * modification -- are permitted for any purpose, provided that
- * redistributions in source form retain this entire copyright notice and
- * indicate the origin and nature of any modifications. 
- * 
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
- * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
- * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- * 
- * Copyright (c) 1998 by Sun Microsystems, Inc.
- *
- * See the file "license.terms" for information on usage and redistribution
- * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
- *
- * RCS: @(#) $Id: locale.c,v 1.1.2.2 1998/10/03 01:56:41 stanton Exp $
- */
-
-/*
- * This is largely dummy code, since it needs locale interfaces.  The
- * dummy code implements more or less the C locale.  Parts of the code
- * are marked "dummy" and "generic" in hopes of making the situation
- * clearer.
- *
- * As a hack for testing, if REG_FAKE is turned on, we add a single 
- * collating element ch between c and d, and a single equivalence class
- * containing x and y.
- *
- * The type "celt" is an entirely opaque non-array type -- it need not be 
- * an integer type, it could be (say) a pointer -- which has distinct values 
- * for all chrs and all collating elements.  The only things the outside
- * world does to celts are copying them around and comparing them for
- * equality; everything else is done in this file.  There need be no "null" 
- * value for celt.  The dummy code uses wint_t as celt, with WEOF as the
- * celt code for ch (ugh!).
- */
-
-/*
- * dummy:
- ^ #def	MAXCE	2	// longest CE code is prepared to handle
- ^ typedef wint_t celt;	// type holding distinct codes for all chrs, all CEs
- */
-
-/* dummy:  character-name table */
-static struct cname {
-	char *name;
-	char code;
-} cnames[] = {
-	{"NUL",	'\0'},
-	{"SOH",	'\001'},
-	{"STX",	'\002'},
-	{"ETX",	'\003'},
-	{"EOT",	'\004'},
-	{"ENQ",	'\005'},
-	{"ACK",	'\006'},
-	{"BEL",	'\007'},
-	{"alert",	'\007'},
-	{"BS",		'\010'},
-	{"backspace",	'\b'},
-	{"HT",		'\011'},
-	{"tab",		'\t'},
-	{"LF",		'\012'},
-	{"newline",	'\n'},
-	{"VT",		'\013'},
-	{"vertical-tab",	'\v'},
-	{"FF",		'\014'},
-	{"form-feed",	'\f'},
-	{"CR",		'\015'},
-	{"carriage-return",	'\r'},
-	{"SO",	'\016'},
-	{"SI",	'\017'},
-	{"DLE",	'\020'},
-	{"DC1",	'\021'},
-	{"DC2",	'\022'},
-	{"DC3",	'\023'},
-	{"DC4",	'\024'},
-	{"NAK",	'\025'},
-	{"SYN",	'\026'},
-	{"ETB",	'\027'},
-	{"CAN",	'\030'},
-	{"EM",	'\031'},
-	{"SUB",	'\032'},
-	{"ESC",	'\033'},
-	{"IS4",	'\034'},
-	{"FS",	'\034'},
-	{"IS3",	'\035'},
-	{"GS",	'\035'},
-	{"IS2",	'\036'},
-	{"RS",	'\036'},
-	{"IS1",	'\037'},
-	{"US",	'\037'},
-	{"space",		' '},
-	{"exclamation-mark",	'!'},
-	{"quotation-mark",	'"'},
-	{"number-sign",		'#'},
-	{"dollar-sign",		'$'},
-	{"percent-sign",		'%'},
-	{"ampersand",		'&'},
-	{"apostrophe",		'\''},
-	{"left-parenthesis",	'('},
-	{"right-parenthesis",	')'},
-	{"asterisk",	'*'},
-	{"plus-sign",	'+'},
-	{"comma",	','},
-	{"hyphen",	'-'},
-	{"hyphen-minus",	'-'},
-	{"period",	'.'},
-	{"full-stop",	'.'},
-	{"slash",	'/'},
-	{"solidus",	'/'},
-	{"zero",		'0'},
-	{"one",		'1'},
-	{"two",		'2'},
-	{"three",	'3'},
-	{"four",		'4'},
-	{"five",		'5'},
-	{"six",		'6'},
-	{"seven",	'7'},
-	{"eight",	'8'},
-	{"nine",		'9'},
-	{"colon",	':'},
-	{"semicolon",	';'},
-	{"less-than-sign",	'<'},
-	{"equals-sign",		'='},
-	{"greater-than-sign",	'>'},
-	{"question-mark",	'?'},
-	{"commercial-at",	'@'},
-	{"left-square-bracket",	'['},
-	{"backslash",		'\\'},
-	{"reverse-solidus",	'\\'},
-	{"right-square-bracket",	']'},
-	{"circumflex",		'^'},
-	{"circumflex-accent",	'^'},
-	{"underscore",		'_'},
-	{"low-line",		'_'},
-	{"grave-accent",		'`'},
-	{"left-brace",		'{'},
-	{"left-curly-bracket",	'{'},
-	{"vertical-line",	'|'},
-	{"right-brace",		'}'},
-	{"right-curly-bracket",	'}'},
-	{"tilde",		'~'},
-	{"DEL",	'\177'},
-	{NULL,	0}
-};
-
-/* dummy:  character-class table */
-static struct cclass {
-	char *name;
-	char *chars;
-	int hasch;
-} cclasses[] = {
-	{"alnum",	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
-0123456789",				1},
-	{"alpha",	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
-					1},
-	{"blank",	" \t",		0},
-	{"cntrl",	"\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\
-\25\26\27\30\31\32\33\34\35\36\37\177",	0},
-	{"digit",	"0123456789",	0},
-	{"graph",	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
-0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
-					1},
-	{"lower",	"abcdefghijklmnopqrstuvwxyz",
-					1},
-	{"print",	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
-0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ",
-					1},
-	{"punct",	"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
-					0},
-	{"space",	"\t\n\v\f\r ",	0},
-	{"upper",	"ABCDEFGHIJKLMNOPQRSTUVWXYZ",
-					0},
-	{"xdigit",	"0123456789ABCDEFabcdef",
-					0},
-	{NULL,		0,		0}
-};
-
-#define	CH	WEOF		/* dummy */
-
-/*
- - nces - how many distinct collating elements are there?
- * This is pure dummy code, although a straight "return 0" is definitely
- * what's wanted for all locales lucky enough not to have these stupid
- * things.  Case counterparts should be included.
- ^ static int nces(struct vars *);
- */
-static int
-nces(v)
-struct vars *v;
-{
-	return (v->cflags&REG_FAKE) ? 1 : 0;
-}
-
-/*
- - nleaders - how many chrs can be first chrs of collating elements?
- * This is pure dummy code, although a straight "return 0" is definitely
- * what's wanted for all locales lucky enough not to have these stupid
- * things.  Case counterparts should be included.
- ^ static int nleaders(struct vars *);
- */
-static int
-nleaders(v)
-struct vars *v;
-{
-	return (v->cflags&REG_FAKE) ? 1 : 0;
-}
-
-/*
- - allces - return a cvec with all the collating elements of the locale
- * This would be kind of costly if there were large numbers of them; with
- * any luck, that case does not occur in reality.  Note that case variants
- * should be included; "all" means *all*.
- * This is pure dummy code.
- ^ static struct cvec *allces(struct vars *, struct cvec *);
- */
-static struct cvec *
-allces(v, cv)
-struct vars *v;
-struct cvec *cv;		/* this is supposed to have enough room */
-{
-	assert(cv->cespace > 0);
-	(VOID) clearcvec(cv);
-	if (v->cflags&REG_FAKE)
-		addce(cv, ch());
-	return cv;
-}
-
-/*
- - element - map collating-element name to celt
- ^ static celt element(struct vars *, chr *, chr *);
- */
-static celt
-element(v, startp, endp)
-struct vars *v;
-chr *startp;			/* points to start of name */
-chr *endp;			/* points just past end of name */
-{
-	register struct cname *cn;
-	register size_t len;
-	Tcl_DString ds;
-	char *name;
-
-	/* generic:  one-chr names stand for themselves */
-	assert(startp < endp);
-	len = endp - startp;
-	if (len == 1)
-		return *startp;
-
-	NOTE(REG_ULOCALE);
-
-	/*
-	 * INTL: ISO only, search table
-	 */
-
-	Tcl_DStringInit(&ds);
-	name = TclUniCharToUtfDString(startp, (int) len, &ds);
-
-	for (cn = cnames; cn->name != NULL; cn++) {
-	    if (strlen(cn->name) == len && strncmp(cn->name, name, len) == 0) {
-		return UCHAR(cn->code);
-	    }
-	}
-	Tcl_DStringFree(&ds);
-
-	/*
-	 * Special case for testing.
-	 */
-
-	if ((v->cflags&REG_FAKE) && len == 2) {
-		if (*startp == 'c' && *(startp+1) == 'h')
-			return (celt) CH;
-	}
-
-	/* generic:  couldn't find it */
-	ERR(REG_ECOLLATE);
-	return 0;
-}
-
-/*
- - range - supply cvec for a range, including legality check
- * Must include case counterparts on request.
- ^ static struct cvec *range(struct vars *, celt, celt, int);
- */
-static struct cvec *
-range(v, a, b, cases)
-struct vars *v;
-celt a;
-celt b;				/* might equal a */
-int cases;			/* case-independent? */
-{
-	int nchrs;
-	int appendch;
-	struct cvec *cv;
-	celt c;
-
-	/* generic:  legality check */
-	if (a != b && !before(a, b)) {
-		ERR(REG_ERANGE);
-		return NULL;
-	}
-
-	/* mostly dummy:  compute vector length, note presence of ch */
-	appendch = 0;
-	if (a == (celt) CH) {
-		if (b == (celt) CH) {
-			a = 'c';
-			b = a - 1;	/* kludge to get no chrs */
-			appendch = 1;
-		} else {
-			a = 'd';
-			appendch = 1;
-		}
-	} else {
-		if (b == CH) {
-			appendch = 1;
-			b = 'c';
-		} else {
-			if ((v->cflags&REG_FAKE) && a <= 'c' && b >= 'd')
-				appendch = 1;
-		}
-	}
-	nchrs = b - a + 1;
-	if (cases)
-		nchrs *= 2;
-	cv = getcvec(v, nchrs, appendch);
-	NOERRN();
-
-	/* mostly dummy:  fill in vector */
-	for (c = a; c <= b; c++) {
-		addchr(cv, c);
-		if (cases) {
-			if (TclUniCharIsUpper((Tcl_UniChar)c))
-				addchr(cv, (chr)Tcl_UniCharToLower(
-				    (Tcl_UniChar)c));
-			else if (TclUniCharIsLower((Tcl_UniChar)c))
-				addchr(cv, (chr)Tcl_UniCharToUpper(
-				    (Tcl_UniChar)c));
-		}
-	}
-	if (appendch)
-		addce(cv, ch());
-
-	return cv;
-}
-
-/*
- - before - is celt x before celt y, for purposes of range legality?
- * This is all dummy code.
- ^ static int before(celt, celt);
- */
-static int			/* predicate */
-before(x, y)
-celt x;
-celt y;
-{
-	int isxch = (x == CH);
-	int isych = (y == CH);
-
-	if (!isxch && !isych && x < y)
-		return 1;
-	if (isxch && !isych && y >= 'd')
-		return 1;
-	if (!isxch && isych && x <= 'c')
-		return 1;
-	return 0;
-}
-
-/*
- - eclass - supply cvec for an equivalence class
- * Must include case counterparts on request.
- * This is all dummy code.
- ^ static struct cvec *eclass(struct vars *, celt, int);
- */
-static struct cvec *
-eclass(v, c, cases)
-struct vars *v;
-celt c;
-int cases;			/* all cases? */
-{
-	struct cvec *cv;
-
-	if (c == CH) {
-		cv = getcvec(v, 0, 1);
-		assert(cv != NULL);
-		addce(cv, ch());
-		return cv;
-	}
-
-	if ((v->cflags&REG_FAKE) && (c == 'x' || c == 'y')) {
-		cv = getcvec(v, 4, 0);
-		assert(cv != NULL);
-		addchr(cv, (chr)'x');
-		addchr(cv, (chr)'y');
-		if (cases) {
-			addchr(cv, (chr)'X');
-			addchr(cv, (chr)'Y');
-		}
-		return cv;
-	}
-
-	/* no equivalence class by that name */
-	if (cases)
-		return allcases(v, c);
-	cv = getcvec(v, 1, 0);
-	assert(cv != NULL);
-	addchr(cv, (chr)c);
-	return cv;
-}
-
-/*
- - cclass - supply cvec for a character class
- * Must include case counterparts on request.
- * This is all dummy code.
- ^ static struct cvec *cclass(struct vars *, chr *, chr *, int);
- */
-static struct cvec *
-cclass(v, startp, endp, cases)
-struct vars *v;
-chr *startp;			/* where the name starts */
-chr *endp;			/* just past the end of the name */
-int cases;			/* case-independent? */
-{
-	size_t len;
-	register char *p;
-	register struct cclass *cc;
-	int hasch;
-	struct cvec *cv;
-	Tcl_DString ds;
-	char *name;
-
-	/* check out the name */
-	len = endp - startp;
-
-	Tcl_DStringInit(&ds);
-	name = TclUniCharToUtfDString(startp, (int) len, &ds);
-
-	if (cases && len == 5 && (strncmp("lower", name, 5) == 0 ||
-				strncmp("upper", name, 5) == 0))
-		name = "alpha";
-	for (cc = cclasses; cc->name != NULL; cc++) {
-	    if (strlen(cc->name) == len && strncmp(cc->name, name, len) == 0) {
-		break;
-	    }
-	}
-	Tcl_DStringFree(&ds);
-
-	if (cc->name == NULL) {
-		ERR(REG_ECTYPE);
-		return NULL;
-	}
-
-	/* set up vector */
-	hasch = (v->cflags&REG_FAKE) ? cc->hasch : 0;
-	cv = getcvec(v, (int) strlen(cc->chars), hasch);
-	if (cv == NULL) {
-		ERR(REG_ESPACE);
-		return NULL;
-	}
-
-	/* fill it in */
-	for (p = cc->chars; *p != '\0'; p++)
-		addchr(cv, (chr)*p);
-	if (hasch)
-		addce(cv, ch());
-
-	return cv;
-}
-
-/*
- - allcases - supply cvec for all case counterparts of a chr (including itself)
- * This is a shortcut, preferably an efficient one, for simple characters;
- * messy cases are done via range().
- * This is all dummy code.
- ^ static struct cvec *allcases(struct vars *, pchr);
- */
-static struct cvec *
-allcases(v, c)
-struct vars *v;
-pchr c;
-{
-	struct cvec *cv = getcvec(v, 2, 0);
-
-	assert(cv != NULL);
-	addchr(cv, c);
-	if (TclUniCharIsUpper((Tcl_UniChar)c))
-		addchr(cv, (chr)Tcl_UniCharToLower((Tcl_UniChar)c));
-	else if (TclUniCharIsLower((Tcl_UniChar)c))
-		addchr(cv, (chr)Tcl_UniCharToUpper((Tcl_UniChar)c));
-
-	return cv;
-}
-
-/*
- - sncmp - case-independent chr-string compare
- * REG_ICASE backrefs need this.  It should preferably be efficient.
- * This is all dummy code.
- ^ static int sncmp(CONST chr *, CONST chr *, size_t);
- */
-static int			/* -1, 0, 1 for <, =, > */
-sncmp(x, y, len)
-CONST chr *x;
-CONST chr *y;
-size_t len;			/* maximum length of comparison */
-{
-    int diff;
-    size_t i;
-
-    for (i = 0; i < len; i++) {
-	diff = Tcl_UniCharToLower(x[i]) - Tcl_UniCharToLower(y[i]);
-	if (diff) {
-	    return diff;
-	}
-    }
-    return 0;
-}
-
-/*
- * Utility functions for handling cvecs
- */
-
-/*
- - newcvec - allocate a new cvec
- ^ static struct cvec *newcvec(int, int);
- */
-static struct cvec *
-newcvec(nchrs, nces)
-int nchrs;			/* to hold this many chrs... */
-int nces;			/* ... and this many CEs */
-{
-	size_t n;
-	size_t nc;
-	struct cvec *cv;
-
-	nc = (size_t)nchrs + (size_t)nces*(MAXCE+1);
-	n = sizeof(struct cvec) + (size_t)(nces-1)*sizeof(chr *) +
-								nc*sizeof(chr);
-	cv = (struct cvec *)ckalloc(n);
-	if (cv == NULL)
-		return NULL;
-	cv->chrspace = nc;
-	cv->chrs = (chr *)&cv->ces[nces];	/* chrs just after CE ptrs */
-	cv->cespace = nces;
-	return clearcvec(cv);
-}
-
-/*
- - clearcvec - clear a possibly-new cvec
- * Returns pointer as convenience.
- ^ static struct cvec *clearcvec(struct cvec *);
- */
-static struct cvec *
-clearcvec(cv)
-struct cvec *cv;
-{
-	int i;
-
-	assert(cv != NULL);
-	cv->nchrs = 0;
-	assert(cv->chrs == (chr *)&cv->ces[cv->cespace]);
-	cv->nces = 0;
-	cv->ncechrs = 0;
-	for (i = 0; i < cv->cespace; i++)
-		cv->ces[i] = NULL;
-
-	return cv;
-}
-
-/*
- - addchr - add a chr to a cvec
- ^ static VOID addchr(struct cvec *, pchr);
- */
-static VOID
-addchr(cv, c)
-struct cvec *cv;
-pchr c;
-{
-	assert(cv->nchrs < cv->chrspace - cv->ncechrs);
-	cv->chrs[cv->nchrs++] = (chr) c;
-}
-
-/*
- - addce - add a CE to a cvec
- ^ static VOID addce(struct cvec *, chr *);
- */
-static VOID
-addce(cv, startp)
-struct cvec *cv;
-chr *startp;			/* 0-terminated text */
-{
-	int n = wcslen(startp);
-	int i;
-	chr *s;
-	chr *d;
-
-	assert(n > 0);
-	assert(cv->nchrs + n < cv->chrspace - cv->ncechrs);
-	assert(cv->nces < cv->cespace);
-	d = &cv->chrs[cv->chrspace - cv->ncechrs - n - 1];
-	cv->ces[cv->nces++] = d;
-	for (s = startp, i = n; i > 0; s++, i--)
-		*d++ = *s;
-	*d = 0;		/* endmarker */
-	assert(d == &cv->chrs[cv->chrspace - cv->ncechrs]);
-	cv->ncechrs += n + 1;
-}
-
-/*
- - haschr - does a cvec contain this chr?
- ^ static int haschr(struct cvec *, pchr);
- */
-static int			/* predicate */
-haschr(cv, c)
-struct cvec *cv;
-pchr c;
-{
-	int i;
-	chr *p;
-
-	for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--)
-		if (*p == c)
-			return 1;
-	return 0;
-}
-
-/*
- - getcvec - get a cvec, remembering it as v->cv
- ^ static struct cvec *getcvec(struct vars *, int, int);
- */
-static struct cvec *
-getcvec(v, nchrs, nces)
-struct vars *v;
-int nchrs;			/* to hold this many chrs... */
-int nces;			/* ... and this many CEs */
-{
-	if (v->cv != NULL && nchrs <= v->cv->chrspace && nces <= v->cv->cespace)
-		return clearcvec(v->cv);
-
-	if (v->cv != NULL)
-		freecvec(v->cv);
-	v->cv = newcvec(nchrs, nces);
-	if (v->cv == NULL)
-		ERR(REG_ESPACE);
-
-	return v->cv;
-}
-
-/*
- - freecvec - free a cvec
- ^ static VOID freecvec(struct cvec *);
- */
-static VOID
-freecvec(cv)
-struct cvec *cv;
-{
-	ckfree((char *)cv);
-}
diff --git a/generic/color.c b/generic/regc_color.c
index fa640f9..4a8a87c 100644
--- a/generic/color.c
+++ b/generic/regc_color.c
@@ -1,85 +1,25 @@
 /*
- * color.c  --
+ * colorings of characters
+ * This file is #included by regcomp.c.
  *
- *	Regexp package file:  colorings of characters.
- *	Note that there are some incestuous relationships between this code and
- *	NFA arc maintenance, which perhaps ought to be cleaned up sometime.
- *
- * Copyright (c) 1998 Henry Spencer.  All rights reserved.
- * 
- * Development of this software was funded, in part, by Cray Research Inc.,
- * UUNET Communications Services Inc., and Sun Microsystems Inc., none of
- * whom are responsible for the results.  The author thanks all of them.
- * 
- * Redistribution and use in source and binary forms -- with or without
- * modification -- are permitted for any purpose, provided that
- * redistributions in source form retain this entire copyright notice and
- * indicate the origin and nature of any modifications. 
- * 
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
- * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
- * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- * 
- * Copyright (c) 1998 by Sun Microsystems, Inc.
- *
- * See the file "license.terms" for information on usage and redistribution
- * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
- *
- * RCS: @(#) $Id: color.c,v 1.1.2.2 1998/10/03 01:56:40 stanton Exp $
+ * Note that there are some incestuous relationships between this code and
+ * NFA arc maintenance, which perhaps ought to be cleaned up sometime.
  */
 
+
+
 /*
- * The innards.
- */
-struct colors {
-	color ccolor[BYTTAB];
-};
-struct ptrs {
-	union tree *pptr[BYTTAB];
-};
-union tree {
-	struct colors colors;
-	struct ptrs ptrs;
-};
-#define	tcolor	colors.ccolor
-#define	tptr	ptrs.pptr
-/*
- * Some of the function prototypes need this.
- ^ union tree;
+ * If this declaration draws a complaint about a negative array size,
+ * then CHRBITS is defined incorrectly for the chr type.
  */
+static char isCHRBITSright[NEGIFNOT(sizeof(chr)*CHAR_BIT == CHRBITS)];
+
+
+
+#define	CISERR()	VISERR(cm->v)
+#define	CERR(e)		VERR(cm->v, (e))
 
-struct colordesc {
-	uchr nchrs;		/* number of chars of this color */
-	color sub;		/* open subcolor of this one, or NOSUB */
-#		define	NOSUB	COLORLESS
-	struct arc *arcs;	/* color chain */
-#	define	UNUSEDCOLOR(cd)	((cd)->nchrs == 0 && (cd)->sub == NOSUB)
-	int flags;
-#		define	PSEUDO	1	/* pseudocolor, no real chars */
-};
-
-struct colormap {
-	int magic;
-#		define	CMMAGIC	0x876
-	struct vars *v;			/* for error reporting */
-	color rest;
-	int filled;			/* has it been filled? */
-	int ncds;			/* number of colordescs */
-	struct colordesc *cd;
-#	define	CDEND(cm)	(&(cm)->cd[(cm)->ncds])
-#		define	NINLINECDS	10
-	struct colordesc cds[NINLINECDS];
-	union tree tree[NBYTS];		/* tree top, plus fill blocks */
-};
 
-#ifdef COMPILE
 
 /*
  - newcm - get new colormap
@@ -96,7 +36,7 @@ struct vars *v;
 	union tree *nextt;
 	struct colordesc *cd;
 
-	cm = (struct colormap *)ckalloc(sizeof(struct colormap));
+	cm = (struct colormap *)MALLOC(sizeof(struct colormap));
 	if (cm == NULL) {
 		ERR(REG_ESPACE);
 		return NULL;
@@ -114,15 +54,13 @@ struct vars *v;
 		cd->arcs = NULL;
 		cd->flags = 0;
 	}
-	cm->cd[WHITE].nchrs = WCHAR_MAX - WCHAR_MIN;
+	cm->cd[WHITE].nchrs = CHR_MAX - CHR_MIN + 1;
 
 	/* treetop starts as NULLs if there are lower levels */
 	t = cm->tree;
-	if (NBYTS > 1) {
-	    for (i = BYTTAB-1; i >= 0; i--)
-		    t->tptr[i] = NULL;
-	}
-
+	if (NBYTS > 1)
+		for (i = BYTTAB-1; i >= 0; i--)
+			t->tptr[i] = NULL;
 	/* if no lower levels, treetop and last fill block are the same */
 
 	/* fill blocks point to next fill block... */
@@ -149,13 +87,11 @@ freecm(cm)
 struct colormap *cm;
 {
 	cm->magic = 0;
-	if (NBYTS > 1) {
-	    cmtreefree(cm, cm->tree, 0);
-	}
-	if (cm->cd != cm->cds) {
-		ckfree((char *)cm->cd);
-	}
-	ckfree((char *) cm);	/* mem leak (CCS). */
+	if (NBYTS > 1)
+		cmtreefree(cm, cm->tree, 0);
+	if (cm->cd != cm->cds)
+		FREE(cm->cd);
+	FREE(cm);
 }
 
 /*
@@ -176,10 +112,9 @@ int level;			/* level number (top == 0) of this block */
 	for (i = BYTTAB-1; i >= 0; i--) {
 		t = tree->tptr[i];
 		if (t != NULL && t != fillt) {
-			if ((int) level < (int) NBYTS-2) {	/* more pointer blocks below */
+			if (level < NBYTS-2)	/* more pointer blocks below */
 				cmtreefree(cm, t, level+1);
-			}
-			ckfree((char *) t);
+			FREE(t);
 		}
 	}
 }
@@ -221,17 +156,13 @@ int level;			/* level number (top == 0) of this block */
 		t = tree->tptr[i];
 		if (t == fillt)			/* oops */
 			{}
-		else if (t == NULL) {
+		else if (t == NULL)
 			tree->tptr[i] = fillt;
-		}
-		else if ((int) level < (int) NBYTS-2)	{/* more pointer blocks below */
+		else if (level < NBYTS-2)	/* more pointer blocks below */
 			cmtreefill(cm, t, level+1);
-		}
 	}
 }
 
-#endif				/* ifdef COMPILE */
-
 /*
  - getcolor - get the color of a character from a colormap
  ^ static color getcolor(struct colormap *, pchr);
@@ -261,8 +192,6 @@ pchr c;
 	return cm->rest;
 }
 
-#ifdef COMPILE
-
 /*
  - setcolor - set the color of a character in a colormap
  ^ static color setcolor(struct colormap *, pchr, pcolor);
@@ -283,7 +212,7 @@ pcolor co;
 	color prev;
 
 	assert(cm->magic == CMMAGIC);
-	if (VISERR(cm->v) || co == COLORLESS)
+	if (CISERR() || co == COLORLESS)
 		return COLORLESS;
 
 	t = cm->tree;
@@ -293,10 +222,10 @@ pcolor co;
 		t = t->tptr[b];
 		if (t == NULL) {	/* fell off an incomplete part */
 			bottom = (shift <= BYTBITS) ? 1 : 0;
-			t = (union tree *)ckalloc((bottom) ?
+			t = (union tree *)MALLOC((bottom) ?
 				sizeof(struct colors) : sizeof(struct ptrs));
 			if (t == NULL) {
-				VERR(cm->v, REG_ESPACE);
+				CERR(REG_ESPACE);
 				return COLORLESS;
 			}
 			if (bottom)
@@ -312,7 +241,7 @@ pcolor co;
 
 	b = uc & BYTMASK;
 	prev = t->tcolor[b];
-	t->tcolor[b] = (color) co;
+	t->tcolor[b] = (color)co;
 	return prev;
 }
 
@@ -328,7 +257,7 @@ struct colormap *cm;
 	struct colordesc *end;
 	struct colordesc *lastused;
 
-	if (VISERR(cm->v))
+	if (CISERR())
 		return COLORLESS;
 
 	lastused = NULL;
@@ -337,7 +266,7 @@ struct colormap *cm;
 		if (!UNUSEDCOLOR(cd))
 			lastused = cd;
 	assert(lastused != NULL);
-	return (color) (lastused - cm->cd);
+	return (color)(lastused - cm->cd);
 }
 
 /*
@@ -352,31 +281,31 @@ struct colormap *cm;
 	struct colordesc *cd;
 	struct colordesc *end;
 	struct colordesc *firstnew;
-	int n;
+	size_t n;
 
-	if (VISERR(cm->v))
+	if (CISERR())
 		return COLORLESS;
 
 	end = CDEND(cm);
 	for (cd = cm->cd; cd < end; cd++)
 		if (UNUSEDCOLOR(cd)) {
 			assert(cd->arcs == NULL);
-			return (color) (cd - cm->cd);
+			return (color)(cd - cm->cd);
 		}
 
 	/* oops, must allocate more */
 	n = cm->ncds * 2;
 	if (cm->cd == cm->cds) {
-		cd = (struct colordesc *)ckalloc(sizeof(struct colordesc) * n);
+		cd = (struct colordesc *)MALLOC(sizeof(struct colordesc) * n);
 		if (cd != NULL)
-			memcpy((VOID *)cd, (VOID *)cm->cds, cm->ncds *
+			memcpy(VS(cd), VS(cm->cds), cm->ncds *
 						sizeof(struct colordesc));
 	} else {
-		cd = (struct colordesc *)ckrealloc((VOID *)cm->cd,
-						sizeof(struct colordesc) * n);
+		cd = (struct colordesc *)REALLOC(cm->cd,
+						n * sizeof(struct colordesc));
 	}
 	if (cd == NULL) {
-		VERR(cm->v, REG_ESPACE);
+		CERR(REG_ESPACE);
 		return COLORLESS;
 	}
 	cm->cd = cd;
@@ -390,7 +319,7 @@ struct colormap *cm;
 		cd->flags = 0;
 	}
 	assert(firstnew < CDEND(cm) && UNUSEDCOLOR(firstnew));
-	return (color) (firstnew - cm->cd);
+	return (color)(firstnew - cm->cd);
 }
 
 /*
@@ -404,7 +333,7 @@ struct colormap *cm;
 	color co;
 
 	co = newcolor(cm);
-	if (VISERR(cm->v))
+	if (CISERR())
 		return COLORLESS;
 	cm->cd[co].nchrs = 1;
 	cm->cd[co].flags = PSEUDO;
@@ -459,22 +388,22 @@ struct colormap *cm;
 	color co;
 	color sco;
 
- 	for (cd = cm->cd, co = 0; cd < end; cd++, co++) {
-  		sco = cd->sub;
- 		if (sco == NOSUB) {
- 			/* has no subcolor, no further action */
- 		} else if (sco == co) {
- 			/* is subcolor, let parent deal with it */
-  		} else if (cd->nchrs == 0) {
-  			/* parent empty, its arcs change color to subcolor */
- 			cd->sub = NOSUB;
-  			scd = &cm->cd[sco];
-  			assert(scd->nchrs > 0);
-  			assert(scd->sub == sco);
- 			scd->sub = NOSUB;
-  			while ((a = cd->arcs) != NULL) {
-  				assert(a->co == co);
-  				/* uncolorchain(cm, a); */
+	for (cd = cm->cd, co = 0; cd < end; cd++, co++) {
+		sco = cd->sub;
+		if (sco == NOSUB) {
+			/* has no subcolor, no further action */
+		} else if (sco == co) {
+			/* is subcolor, let parent deal with it */
+		} else if (cd->nchrs == 0) {
+			/* parent empty, its arcs change color to subcolor */
+			cd->sub = NOSUB;
+			scd = &cm->cd[sco];
+			assert(scd->nchrs > 0);
+			assert(scd->sub == sco);
+			scd->sub = NOSUB;
+			while ((a = cd->arcs) != NULL) {
+				assert(a->co == co);
+				/* uncolorchain(cm, a); */
 				cd->arcs = a->colorchain;
 				a->co = sco;
 				/* colorchain(cm, a); */
@@ -483,11 +412,11 @@ struct colormap *cm;
 			}
 		} else {
 			/* parent's arcs must gain parallel subcolor arcs */
- 			cd->sub = NOSUB;
- 			scd = &cm->cd[sco];
- 			assert(scd->nchrs > 0);
- 			assert(scd->sub == sco);
- 			scd->sub = NOSUB;
+			cd->sub = NOSUB;
+			scd = &cm->cd[sco];
+			assert(scd->nchrs > 0);
+			assert(scd->sub == sco);
+			scd->sub = NOSUB;
 			for (a = cd->arcs; a != NULL; a = a->colorchain) {
 				assert(a->co == co);
 				newarc(nfa, a->type, sco, a->from, a->to);
@@ -558,11 +487,11 @@ pchr c;
  ^ 	struct state *, struct state *);
  */
 static VOID
-rainbow(nfa, cm, type, exc, from, to)
+rainbow(nfa, cm, type, but, from, to)
 struct nfa *nfa;
 struct colormap *cm;
 int type;
-pcolor exc;			/* COLORLESS if no exceptions */
+pcolor but;			/* COLORLESS if no exceptions */
 struct state *from;
 struct state *to;
 {
@@ -570,8 +499,8 @@ struct state *to;
 	struct colordesc *end = CDEND(cm);
 	color co;
 
-	for (cd = cm->cd, co = 0; cd < end && !VISERR(nfa->v); cd++, co++)
-		if (!UNUSEDCOLOR(cd) && cd->sub != co && co != exc &&
+	for (cd = cm->cd, co = 0; cd < end && !CISERR(); cd++, co++)
+		if (!UNUSEDCOLOR(cd) && cd->sub != co && co != but &&
 							!(cd->flags&PSEUDO))
 			newarc(nfa, type, co, from, to);
 }
@@ -596,10 +525,95 @@ struct state *to;
 	color co;
 
 	assert(of != from);
-	for (cd = cm->cd, co = 0; cd < end && !VISERR(nfa->v); cd++, co++)
+	for (cd = cm->cd, co = 0; cd < end && !CISERR(); cd++, co++)
 		if (!UNUSEDCOLOR(cd) && !(cd->flags&PSEUDO))
 			if (findarc(of, PLAIN, co) == NULL)
 				newarc(nfa, type, co, from, to);
 }
 
-#endif				/* ifdef COMPILE */
+
+
+#ifdef REG_DEBUG
+
+/*
+ - dumpcolors - debugging output
+ ^ static VOID dumpcolors(struct colormap *, FILE *);
+ */
+static VOID
+dumpcolors(cm, f)
+struct colormap *cm;
+FILE *f;
+{
+	struct colordesc *cd;
+	struct colordesc *end;
+	color co;
+	chr c;
+
+	if (cm->filled) {
+		fprintf(f, "filled\n");
+		if (NBYTS > 1)
+			fillcheck(cm, cm->tree, 0, f);
+	}
+	end = CDEND(cm);
+	for (cd = cm->cd + 1, co = 1; cd < end; cd++, co++)	/* skip 0 */
+		if (cd->nchrs > 0) {
+			if (cd->flags&PSEUDO)
+				fprintf(f, "#%2ld(ps): ", (long)co);
+			else
+				fprintf(f, "#%2ld(%2d): ", (long)co, cd->nchrs);
+			for (c = CHR_MIN; c < CHR_MAX; c++)
+				if (getcolor(cm, c) == co)
+					dumpchr(c, f);
+			assert(c == CHR_MAX);
+			if (getcolor(cm, c) == co)
+				dumpchr(c, f);
+			fprintf(f, "\n");
+		}
+}
+
+/*
+ - fillcheck - check proper filling of a tree
+ ^ static VOID fillcheck(struct colormap *, union tree *, int, FILE *);
+ */
+static VOID
+fillcheck(cm, tree, level, f)
+struct colormap *cm;
+union tree *tree;
+int level;			/* level number (top == 0) of this block */
+FILE *f;
+{
+	int i;
+	union tree *t;
+	union tree *fillt = &cm->tree[level+1];
+
+	assert(level < NBYTS-1);	/* this level has pointers */
+	for (i = BYTTAB-1; i >= 0; i--) {
+		t = tree->tptr[i];
+		if (t == NULL)
+			fprintf(f, "NULL found in filled tree!\n");
+		else if (t == fillt)
+			{}
+		else if (level < NBYTS-2)	/* more pointer blocks below */
+			fillcheck(cm, t, level+1, f);
+	}
+}
+
+/*
+ - dumpchr - print a chr
+ * Kind of char-centric but works well enough for debug use.
+ ^ static VOID dumpchr(pchr, FILE *);
+ */
+static VOID
+dumpchr(c, f)
+pchr c;
+FILE *f;
+{
+	if (c == '\\')
+		fprintf(f, "\\\\");
+	else if (c > ' ' && c <= '~')
+		putc((char)c, f);
+	else
+		fprintf(f, "\\0%lo", (long)c);
+}
+
+#endif				/* ifdef REG_DEBUG */
diff --git a/generic/regc_cvec.c b/generic/regc_cvec.c
new file mode 100644
index 0000000..0650883
--- /dev/null
+++ b/generic/regc_cvec.c
@@ -0,0 +1,143 @@
+/*
+ * Utility functions for handling cvecs
+ * This file is #included by regcomp.c.
+ */
+
+/*
+ - newcvec - allocate a new cvec
+ ^ static struct cvec *newcvec(int, int);
+ */
+static struct cvec *
+newcvec(nchrs, nmcces)
+int nchrs;			/* to hold this many chrs... */
+int nmcces;			/* ... and this many MCCEs */
+{
+	size_t n;
+	size_t nc;
+	struct cvec *cv;
+
+	nc = (size_t)nchrs + (size_t)nmcces*(MAXMCCE+1);
+	n = sizeof(struct cvec) + (size_t)(nmcces-1)*sizeof(chr *) +
+								nc*sizeof(chr);
+	cv = (struct cvec *)MALLOC(n);
+	if (cv == NULL)
+		return NULL;
+	cv->chrspace = nc;
+	cv->chrs = (chr *)&cv->mcces[nmcces];	/* chrs just after MCCE ptrs */
+	cv->mccespace = nmcces;
+	return clearcvec(cv);
+}
+
+/*
+ - clearcvec - clear a possibly-new cvec
+ * Returns pointer as convenience.
+ ^ static struct cvec *clearcvec(struct cvec *);
+ */
+static struct cvec *
+clearcvec(cv)
+struct cvec *cv;
+{
+	int i;
+
+	assert(cv != NULL);
+	cv->nchrs = 0;
+	assert(cv->chrs == (chr *)&cv->mcces[cv->mccespace]);
+	cv->nmcces = 0;
+	cv->nmccechrs = 0;
+	for (i = 0; i < cv->mccespace; i++)
+		cv->mcces[i] = NULL;
+
+	return cv;
+}
+
+/*
+ - addchr - add a chr to a cvec
+ ^ static VOID addchr(struct cvec *, pchr);
+ */
+static VOID
+addchr(cv, c)
+struct cvec *cv;
+pchr c;
+{
+	assert(cv->nchrs < cv->chrspace - cv->nmccechrs);
+	cv->chrs[cv->nchrs++] = (chr)c;
+}
+
+/*
+ - addmcce - add an MCCE to a cvec
+ ^ static VOID addmcce(struct cvec *, chr *, chr *);
+ */
+static VOID
+addmcce(cv, startp, endp)
+struct cvec *cv;
+chr *startp;			/* beginning of text */
+chr *endp;			/* just past end of text */
+{
+	int n = endp - startp;
+	int i;
+	chr *s;
+	chr *d;
+
+	assert(n > 0);
+	assert(cv->nchrs + n < cv->chrspace - cv->nmccechrs);
+	assert(cv->nmcces < cv->mccespace);
+	d = &cv->chrs[cv->chrspace - cv->nmccechrs - n - 1];
+	cv->mcces[cv->nmcces++] = d;
+	for (s = startp, i = n; i > 0; s++, i--)
+		*d++ = *s;
+	*d++ = 0;		/* endmarker */
+	assert(d == &cv->chrs[cv->chrspace - cv->nmccechrs]);
+	cv->nmccechrs += n + 1;
+}
+
+/*
+ - haschr - does a cvec contain this chr?
+ ^ static int haschr(struct cvec *, pchr);
+ */
+static int			/* predicate */
+haschr(cv, c)
+struct cvec *cv;
+pchr c;
+{
+	int i;
+	chr *p;
+
+	for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--)
+		if (*p == c)
+			return 1;
+	return 0;
+}
+
+/*
+ - getcvec - get a cvec, remembering it as v->cv
+ ^ static struct cvec *getcvec(struct vars *, int, int);
+ */
+static struct cvec *
+getcvec(v, nchrs, nmcces)
+struct vars *v;
+int nchrs;			/* to hold this many chrs... */
+int nmcces;			/* ... and this many MCCEs */
+{
+	if (v->cv != NULL && nchrs <= v->cv->chrspace &&
+						nmcces <= v->cv->mccespace)
+		return clearcvec(v->cv);
+
+	if (v->cv != NULL)
+		freecvec(v->cv);
+	v->cv = newcvec(nchrs, nmcces);
+	if (v->cv == NULL)
+		ERR(REG_ESPACE);
+
+	return v->cv;
+}
+
+/*
+ - freecvec - free a cvec
+ ^ static VOID freecvec(struct cvec *);
+ */
+static VOID
+freecvec(cv)
+struct cvec *cv;
+{
+	FREE(cv);
+}
diff --git a/generic/lex.c b/generic/regc_lex.c
index 7ae3ccc..820b404 100644
--- a/generic/lex.c
+++ b/generic/regc_lex.c
@@ -1,36 +1,6 @@
 /*
- * lex --
- *
- *	Regexp package file:  lexical analyzer - #included in other source
- *
- * Copyright (c) 1998 Henry Spencer.  All rights reserved.
- * 
- * Development of this software was funded, in part, by Cray Research Inc.,
- * UUNET Communications Services Inc., and Sun Microsystems Inc., none of
- * whom are responsible for the results.  The author thanks all of them.
- * 
- * Redistribution and use in source and binary forms -- with or without
- * modification -- are permitted for any purpose, provided that
- * redistributions in source form retain this entire copyright notice and
- * indicate the origin and nature of any modifications. 
- * 
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
- * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
- * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- * 
- * Copyright (c) 1998 by Sun Microsystems, Inc.
- *
- * See the file "license.terms" for information on usage and redistribution
- * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
- *
- * RCS: @(#) $Id: lex.c,v 1.1.2.2 1998/10/03 01:56:40 stanton Exp $
+ * lexical analyzer
+ * This file is #included by regcomp.c.
  */
 
 /* scanning macros (know about v) */
@@ -58,8 +28,11 @@
 #define	L_CEL	7	/* collating element */
 #define	L_ECL	8	/* equivalence class */
 #define	L_CCL	9	/* character class */
-#define	INTO(c)		(v->lexcon = (c))
-#define	_IN(con)		(v->lexcon == (con))
+#define	INTOCON(c)	(v->lexcon = (c))
+#define	INCON(con)	(v->lexcon == (con))
+
+/* construct pointer past end of chr array */
+#define	ENDOF(array)	((array) + sizeof(array)/sizeof(chr))
 
 /*
  - lexstart - set up lexical stuff, scan leading options
@@ -67,19 +40,20 @@
  */
 static VOID
 lexstart(v)
-register struct vars *v;
+struct vars *v;
 {
 	prefixes(v);			/* may turn on new type bits etc. */
 	NOERR();
 
 	if (v->cflags&REG_QUOTE) {
-		v->cflags &= ~(REG_EXTENDED|REG_ADVF|REG_EXPANDED);
-		INTO(L_Q);
-	} else if (v->cflags&REG_EXTENDED)
-		INTO(L_ERE);
-	else {
-		v->cflags &= ~REG_ADVF;
-		INTO(L_BRE);
+		assert(!(v->cflags&(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE)));
+		INTOCON(L_Q);
+	} else if (v->cflags&REG_EXTENDED) {
+		assert(!(v->cflags&REG_QUOTE));
+		INTOCON(L_ERE);
+	} else {
+		assert(!(v->cflags&(REG_QUOTE|REG_ADVF)));
+		INTOCON(L_BRE);
 	}
 
 	v->nexttype = EMPTY;		/* remember we were at the start */
@@ -104,11 +78,14 @@ struct vars *v;
 		case CHR('?'):		/* "***?" error, msg shows version */
 			ERR(REG_BADPAT);
 			return;		/* proceed no further */
+			break;
 		case CHR('='):		/* "***=" shifts to literal string */
 			NOTE(REG_UNONPOSIX);
 			v->cflags |= REG_QUOTE;
+			v->cflags &= ~(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE);
 			v->now += 4;
 			return;		/* and there can be no more prefixes */
+			break;
 		case CHR(':'):		/* "***:" shifts to AREs */
 			NOTE(REG_UNONPOSIX);
 			v->cflags |= REG_ADVANCED;
@@ -117,26 +94,28 @@ struct vars *v;
 		default:		/* otherwise *** is just an error */
 			ERR(REG_BADRPT);
 			return;
+			break;
 		}
 
-	/* BREs and plain EREs don't get any other favors */
+	/* BREs and EREs don't get embedded options */
 	if ((v->cflags&REG_ADVANCED) != REG_ADVANCED)
 		return;
 
-	/* embedded options */
-	if (HAVE(3) && NEXT2('(', '?') && iswalpha(*(v->now + 2))) {
+	/* embedded options (AREs only) */
+	if (HAVE(3) && NEXT2('(', '?') && iscalpha(*(v->now + 2))) {
 		NOTE(REG_UNONPOSIX);
 		v->now += 2;
-		for (; !ATEOS() && iswalpha(*v->now); v->now++)
+		for (; !ATEOS() && iscalpha(*v->now); v->now++)
 			switch (*v->now) {
 			case CHR('b'):		/* BREs (but why???) */
-				v->cflags &= ~REG_EXTENDED;
+				v->cflags &= ~(REG_ADVANCED|REG_QUOTE);
 				break;
 			case CHR('c'):		/* case sensitive */
 				v->cflags &= ~REG_ICASE;
 				break;
 			case CHR('e'):		/* plain EREs */
-				v->cflags &= ~REG_ADVF;
+				v->cflags |= REG_EXTENDED;
+				v->cflags &= ~(REG_ADVF|REG_QUOTE);
 				break;
 			case CHR('i'):		/* case insensitive */
 				v->cflags |= REG_ICASE;
@@ -151,6 +130,7 @@ struct vars *v;
 				break;
 			case CHR('q'):		/* literal string */
 				v->cflags |= REG_QUOTE;
+				v->cflags &= ~REG_ADVANCED;
 				break;
 			case CHR('s'):		/* single line, \n ordinary */
 				v->cflags &= ~REG_NEWLINE;
@@ -174,6 +154,8 @@ struct vars *v;
 			return;
 		}
 		v->now++;
+		if (v->cflags&REG_QUOTE)
+			v->cflags &= ~(REG_EXPANDED|REG_NEWLINE);
 	}
 }
 
@@ -181,67 +163,68 @@ struct vars *v;
  - lexnest - "call a subroutine", interpolating string at the lexical level
  * Note, this is not a very general facility.  There are a number of
  * implicit assumptions about what sorts of strings can be subroutines.
- ^ static VOID lexnest(struct vars *, chr *);
+ ^ static VOID lexnest(struct vars *, chr *, chr *);
  */
 static VOID
-lexnest(v, s)
+lexnest(v, beginp, endp)
 struct vars *v;
-chr *s;
+chr *beginp;				/* start of interpolation */
+chr *endp;				/* one past end of interpolation */
 {
 	assert(v->savenow == NULL);	/* only one level of nesting */
 	v->savenow = v->now;
 	v->savestop = v->stop;
-	v->now = s;
-	v->stop = s + wcslen(s);
+	v->now = beginp;
+	v->stop = endp;
 }
 
 /*
- * string CONSTants to interpolate as expansions of things like \d
+ * string constants to interpolate as expansions of things like \d
  */
 static chr backd[] = {		/* \d */
 	CHR('['), CHR('['), CHR(':'),
 	CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
-	CHR(':'), CHR(']'), CHR(']'), CHR('\0')
+	CHR(':'), CHR(']'), CHR(']')
 };
 static chr backD[] = {		/* \D */
 	CHR('['), CHR('^'), CHR('['), CHR(':'),
 	CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
-	CHR(':'), CHR(']'), CHR(']'), CHR('\0')
+	CHR(':'), CHR(']'), CHR(']')
 };
 static chr brbackd[] = {	/* \d within brackets */
 	CHR('['), CHR(':'),
 	CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
-	CHR(':'), CHR(']'), CHR('\0')
+	CHR(':'), CHR(']')
 };
 static chr backs[] = {		/* \s */
 	CHR('['), CHR('['), CHR(':'),
 	CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
-	CHR(':'), CHR(']'), CHR(']'), CHR('\0')
+	CHR(':'), CHR(']'), CHR(']')
 };
 static chr backS[] = {		/* \S */
 	CHR('['), CHR('^'), CHR('['), CHR(':'),
 	CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
-	CHR(':'), CHR(']'), CHR(']'), CHR('\0')
+	CHR(':'), CHR(']'), CHR(']')
 };
 static chr brbacks[] = {	/* \s within brackets */
 	CHR('['), CHR(':'),
 	CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
-	CHR(':'), CHR(']'), CHR('\0')
+	CHR(':'), CHR(']')
 };
 static chr backw[] = {		/* \w */
 	CHR('['), CHR('['), CHR(':'),
 	CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
-	CHR(':'), CHR(']'), CHR('_'), CHR(']'), CHR('\0')
+	CHR(':'), CHR(']'), CHR('_'), CHR(']')
 };
 static chr backW[] = {		/* \W */
 	CHR('['), CHR('^'), CHR('['), CHR(':'),
 	CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
-	CHR(':'), CHR(']'), CHR('_'), CHR(']'), CHR('\0')
+	CHR(':'), CHR(']'), CHR('_'), CHR(']')
 };
 static chr brbackw[] = {	/* \w within brackets */
 	CHR('['), CHR(':'),
 	CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
-	CHR(':'), CHR(']'), CHR('_'), CHR('\0')
+	CHR(':'), CHR(']'), CHR('_')
 };
 
 /*
@@ -253,7 +236,7 @@ static VOID
 lexword(v)
 struct vars *v;
 {
-	lexnest(v, backw);
+	lexnest(v, backw, ENDOF(backw));
 }
 
 /*
@@ -262,9 +245,9 @@ struct vars *v;
  */
 static int			/* 1 normal, 0 failure */
 next(v)
-register struct vars *v;
+struct vars *v;
 {
-	register chr c;
+	chr c;
 
 	/* errors yield an infinite sequence of failures */
 	if (ISERR())
@@ -298,14 +281,17 @@ register struct vars *v;
 		case L_BRE:
 		case L_Q:
 			RET(EOS);
+			break;
 		case L_EBND:
 		case L_BBND:
 			FAILW(REG_EBRACE);
+			break;
 		case L_BRACK:
 		case L_CEL:
 		case L_ECL:
 		case L_CCL:
 			FAILW(REG_EBRACK);
+			break;
 		}
 		assert(NOTREACHED);
 	}
@@ -317,22 +303,26 @@ register struct vars *v;
 	switch (v->lexcon) {
 	case L_BRE:			/* punt BREs to separate function */
 		return brenext(v, c);
+		break;
 	case L_ERE:			/* see below */
 		break;
 	case L_Q:			/* literal strings are easy */
 		RETV(PLAIN, c);
+		break;
 	case L_BBND:			/* bounds are fairly simple */
 	case L_EBND:
 		switch (c) {
 		case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'):
 		case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'):
 		case CHR('8'): case CHR('9'):
-			RETV(DIGIT, (chr) DIGITVAL(c));
+			RETV(DIGIT, (chr)DIGITVAL(c));
+			break;
 		case CHR(','):
 			RET(',');
+			break;
 		case CHR('}'):		/* ERE bound ends with } */
-			if (_IN(L_EBND)) {
-				INTO(L_ERE);
+			if (INCON(L_EBND)) {
+				INTOCON(L_ERE);
 				if ((v->cflags&REG_ADVF) && NEXT1('?')) {
 					v->now++;
 					NOTE(REG_UNONPOSIX);
@@ -341,25 +331,32 @@ register struct vars *v;
 				RETV('}', 1);
 			} else
 				FAILW(REG_BADBR);
+			break;
 		case CHR('\\'):		/* BRE bound ends with \} */
-			if (_IN(L_BBND) && NEXT1('}')) {
+			if (INCON(L_BBND) && NEXT1('}')) {
 				v->now++;
-				INTO(L_BRE);
+				INTOCON(L_BRE);
 				RET('}');
 			} else
 				FAILW(REG_BADBR);
+			break;
 		default:
 			FAILW(REG_BADBR);
+			break;
 		}
+		assert(NOTREACHED);
+		break;
 	case L_BRACK:			/* brackets are not too hard */
 		switch (c) {
 		case CHR(']'):
 			if (LASTTYPE('['))
 				RETV(PLAIN, c);
 			else {
-				INTO((v->cflags&REG_EXTENDED) ? L_ERE : L_BRE);
+				INTOCON((v->cflags&REG_EXTENDED) ?
+							L_ERE : L_BRE);
 				RET(']');
 			}
+			break;
 		case CHR('\\'):
 			NOTE(REG_UBBS);
 			if (!(v->cflags&REG_ADVF))
@@ -367,85 +364,109 @@ register struct vars *v;
 			NOTE(REG_UNONPOSIX);
 			if (ATEOS())
 				FAILW(REG_EESCAPE);
-			(VOID) lexescape(v);
+			(DISCARD) lexescape(v);
 			switch (v->nexttype) {	/* not all escapes okay here */
 			case PLAIN:
 				return 1;
+				break;
 			case CCLASS:
 				switch (v->nextvalue) {
-				case 'd':	lexnest(v, brbackd); break;
-				case 's':	lexnest(v, brbacks); break;
-				case 'w':	lexnest(v, brbackw); break;
+				case 'd':
+					lexnest(v, brbackd, ENDOF(brbackd));
+					break;
+				case 's':
+					lexnest(v, brbacks, ENDOF(brbacks));
+					break;
+				case 'w':
+					lexnest(v, brbackw, ENDOF(brbackw));
+					break;
 				default:
 					FAILW(REG_EESCAPE);
+					break;
 				}
 				/* lexnest done, back up and try again */
 				v->nexttype = v->lasttype;
 				return next(v);
+				break;
 			}
 			/* not one of the acceptable escapes */
 			FAILW(REG_EESCAPE);
+			break;
 		case CHR('-'):
 			if (LASTTYPE('[') || NEXT1(']'))
 				RETV(PLAIN, c);
 			else
 				RETV(RANGE, c);
+			break;
 		case CHR('['):
 			if (ATEOS())
 				FAILW(REG_EBRACK);
 			switch (*v->now++) {
 			case CHR('.'):
-				INTO(L_CEL);
+				INTOCON(L_CEL);
 				/* might or might not be locale-specific */
 				RET(COLLEL);
+				break;
 			case CHR('='):
-				INTO(L_ECL);
+				INTOCON(L_ECL);
 				NOTE(REG_ULOCALE);
 				RET(ECLASS);
+				break;
 			case CHR(':'):
-				INTO(L_CCL);
+				INTOCON(L_CCL);
 				NOTE(REG_ULOCALE);
 				RET(CCLASS);
+				break;
 			default:			/* oops */
 				v->now--;
 				RETV(PLAIN, c);
+				break;
 			}
+			assert(NOTREACHED);
+			break;
 		default:
 			RETV(PLAIN, c);
+			break;
 		}
+		assert(NOTREACHED);
+		break;
 	case L_CEL:			/* collating elements are easy */
 		if (c == CHR('.') && NEXT1(']')) {
 			v->now++;
-			INTO(L_BRACK);
+			INTOCON(L_BRACK);
 			RETV(END, '.');
 		} else
 			RETV(PLAIN, c);
+		break;
 	case L_ECL:			/* ditto equivalence classes */
 		if (c == CHR('=') && NEXT1(']')) {
 			v->now++;
-			INTO(L_BRACK);
+			INTOCON(L_BRACK);
 			RETV(END, '=');
 		} else
 			RETV(PLAIN, c);
+		break;
 	case L_CCL:			/* ditto character classes */
 		if (c == CHR(':') && NEXT1(']')) {
 			v->now++;
-			INTO(L_BRACK);
+			INTOCON(L_BRACK);
 			RETV(END, ':');
 		} else
 			RETV(PLAIN, c);
+		break;
 	default:
 		assert(NOTREACHED);
 		break;
 	}
 
 	/* that got rid of everything except EREs */
-	assert(_IN(L_ERE));
+	assert(INCON(L_ERE));
 
 	/* deal with EREs, except for backslashes */
 	switch (c) {
 	case CHR('|'):
 		RET('|');
+		break;
 	case CHR('*'):
 		if ((v->cflags&REG_ADVF) && NEXT1('?')) {
 			v->now++;
@@ -453,6 +474,7 @@ register struct vars *v;
 			RETV('*', 0);
 		}
 		RETV('*', 1);
+		break;
 	case CHR('+'):
 		if ((v->cflags&REG_ADVF) && NEXT1('?')) {
 			v->now++;
@@ -460,6 +482,7 @@ register struct vars *v;
 			RETV('+', 0);
 		}
 		RETV('+', 1);
+		break;
 	case CHR('?'):
 		if ((v->cflags&REG_ADVF) && NEXT1('?')) {
 			v->now++;
@@ -467,18 +490,21 @@ register struct vars *v;
 			RETV('?', 0);
 		}
 		RETV('?', 1);
+		break;
 	case CHR('{'):		/* bounds start or plain character */
 		if (v->cflags&REG_EXPANDED)
 			skip(v);
-		if (ATEOS() || !iswdigit(*v->now)) {
+		if (ATEOS() || !iscdigit(*v->now)) {
 			NOTE(REG_UBRACES);
 			NOTE(REG_UUNSPEC);
 			RETV(PLAIN, c);
 		} else {
 			NOTE(REG_UBOUNDS);
-			INTO(L_EBND);
+			INTOCON(L_EBND);
 			RET('{');
 		}
+		assert(NOTREACHED);
+		break;
 	case CHR('('):		/* parenthesis, or advanced extension */
 		if ((v->cflags&REG_ADVF) && NEXT1('?')) {
 			NOTE(REG_UNONPOSIX);
@@ -486,6 +512,7 @@ register struct vars *v;
 			switch (*v->now++) {
 			case CHR(':'):		/* non-capturing paren */
 				RETV('(', 0);
+				break;
 			case CHR('#'):		/* comment */
 				while (!ATEOS() && *v->now != CHR(')'))
 					v->now++;
@@ -493,28 +520,37 @@ register struct vars *v;
 					v->now++;
 				assert(v->nexttype == v->lasttype);
 				return next(v);
+				break;
 			case CHR('='):		/* positive lookahead */
 				NOTE(REG_ULOOKAHEAD);
 				RETV(LACON, 1);
+				break;
 			case CHR('!'):		/* negative lookahead */
 				NOTE(REG_ULOOKAHEAD);
 				RETV(LACON, 0);
+				break;
 			case CHR('<'):		/* prefer short */
 				RETV(PREFER, 0);
+				break;
 			case CHR('>'):		/* prefer long */
 				RETV(PREFER, 1);
+				break;
 			default:
 				FAILW(REG_BADRPT);
+				break;
 			}
+			assert(NOTREACHED);
 		}
-		if (v->cflags&REG_NOSUB) {
-		    RETV('(', 0);		/* all parens non-capturing */
-		}
-		RETV('(', 1);
+		if (v->cflags&REG_NOSUB)
+			RETV('(', 0);		/* all parens non-capturing */
+		else
+			RETV('(', 1);
+		break;
 	case CHR(')'):
 		if (LASTTYPE('('))
 			NOTE(REG_UUNSPEC);
 		RETV(')', c);
+		break;
 	case CHR('['):		/* easy except for [[:<:]] and [[:>:]] */
 		if (HAVE(6) &&	*(v->now+0) == CHR('[') &&
 				*(v->now+1) == CHR(':') &&
@@ -528,49 +564,55 @@ register struct vars *v;
 			NOTE(REG_UNONPOSIX);
 			RET((c == CHR('<')) ? '<' : '>');
 		}
-		INTO(L_BRACK);
+		INTOCON(L_BRACK);
 		if (NEXT1('^')) {
 			v->now++;
 			RETV('[', 0);
 		}
 		RETV('[', 1);
+		break;
 	case CHR('.'):
 		RET('.');
+		break;
 	case CHR('^'):
 		RET('^');
+		break;
 	case CHR('$'):
 		RET('$');
+		break;
 	case CHR('\\'):		/* mostly punt backslashes to code below */
 		if (ATEOS())
 			FAILW(REG_EESCAPE);
 		break;
 	default:		/* ordinary character */
 		RETV(PLAIN, c);
+		break;
 	}
 
 	/* ERE backslash handling; backslash already eaten */
 	assert(!ATEOS());
 	if (!(v->cflags&REG_ADVF)) {	/* only AREs have non-trivial escapes */
-		if (iswalnum(*v->now)) {
+		if (iscalnum(*v->now)) {
 			NOTE(REG_UBSALNUM);
 			NOTE(REG_UUNSPEC);
 		}
 		RETV(PLAIN, *v->now++);
 	}
-	(VOID) lexescape(v);
+	(DISCARD) lexescape(v);
 	if (ISERR())
 		FAILW(REG_EESCAPE);
 	if (v->nexttype == CCLASS) {	/* fudge at lexical level */
 		switch (v->nextvalue) {
-		case 'd':	lexnest(v, backd); break;
-		case 'D':	lexnest(v, backD); break;
-		case 's':	lexnest(v, backs); break;
-		case 'S':	lexnest(v, backS); break;
-		case 'w':	lexnest(v, backw); break;
-		case 'W':	lexnest(v, backW); break;
+		case 'd':	lexnest(v, backd, ENDOF(backd)); break;
+		case 'D':	lexnest(v, backD, ENDOF(backD)); break;
+		case 's':	lexnest(v, backs, ENDOF(backs)); break;
+		case 'S':	lexnest(v, backS, ENDOF(backS)); break;
+		case 'w':	lexnest(v, backw, ENDOF(backw)); break;
+		case 'W':	lexnest(v, backW, ENDOF(backW)); break;
 		default:
 			assert(NOTREACHED);
 			FAILW(REG_ASSERT);
+			break;
 		}
 		/* lexnest done, back up and try again */
 		v->nexttype = v->lasttype;
@@ -591,10 +633,10 @@ struct vars *v;
 {
 	chr c;
 	static chr alert[] = {
-		CHR('a'), CHR('l'), CHR('e'), CHR('r'), CHR('t'), CHR('\0')
+		CHR('a'), CHR('l'), CHR('e'), CHR('r'), CHR('t')
 	};
 	static chr esc[] = {
-		CHR('E'), CHR('S'), CHR('C'), CHR('\0')
+		CHR('E'), CHR('S'), CHR('C')
 	};
 	chr *save;
 
@@ -602,79 +644,102 @@ struct vars *v;
 
 	assert(!ATEOS());
 	c = *v->now++;
-	if (!iswalnum(c))
+	if (!iscalnum(c))
 		RETV(PLAIN, c);
 
 	NOTE(REG_UNONPOSIX);
 	switch (c) {
 	case CHR('a'):
-		RETV(PLAIN, chrnamed(v, alert, CHR('\007')));
+		RETV(PLAIN, chrnamed(v, alert, ENDOF(alert), CHR('\007')));
+		break;
 	case CHR('A'):
 		RETV(SBEGIN, 0);
+		break;
 	case CHR('b'):
 		RETV(PLAIN, CHR('\b'));
+		break;
+	case CHR('B'):
+		RETV(PLAIN, CHR('\\'));
+		break;
 	case CHR('c'):
 		NOTE(REG_UUNPORT);
 		if (ATEOS())
 			FAILW(REG_EESCAPE);
-		RETV(PLAIN, (chr) (*v->now++ & 037));
+		RETV(PLAIN, (chr)(*v->now++ & 037));
+		break;
 	case CHR('d'):
 		NOTE(REG_ULOCALE);
 		RETV(CCLASS, 'd');
+		break;
 	case CHR('D'):
 		NOTE(REG_ULOCALE);
 		RETV(CCLASS, 'D');
+		break;
 	case CHR('e'):
 		NOTE(REG_UUNPORT);
-		RETV(PLAIN, chrnamed(v, esc, CHR('\033')));
-	case CHR('E'):
-		RETV(PLAIN, CHR('\\'));
+		RETV(PLAIN, chrnamed(v, esc, ENDOF(esc), CHR('\033')));
+		break;
 	case CHR('f'):
 		RETV(PLAIN, CHR('\f'));
+		break;
 	case CHR('n'):
 		RETV(PLAIN, CHR('\n'));
+		break;
 	case CHR('r'):
 		RETV(PLAIN, CHR('\r'));
+		break;
 	case CHR('s'):
 		NOTE(REG_ULOCALE);
 		RETV(CCLASS, 's');
+		break;
 	case CHR('S'):
 		NOTE(REG_ULOCALE);
 		RETV(CCLASS, 'S');
+		break;
 	case CHR('t'):
 		RETV(PLAIN, CHR('\t'));
+		break;
 	case CHR('u'):
 		c = lexdigits(v, 16, 4, 4);
 		if (ISERR())
 			FAILW(REG_EESCAPE);
 		RETV(PLAIN, c);
+		break;
 	case CHR('U'):
 		c = lexdigits(v, 16, 8, 8);
 		if (ISERR())
 			FAILW(REG_EESCAPE);
 		RETV(PLAIN, c);
+		break;
 	case CHR('v'):
 		RETV(PLAIN, CHR('\v'));
+		break;
 	case CHR('w'):
 		NOTE(REG_ULOCALE);
 		RETV(CCLASS, 'w');
+		break;
 	case CHR('W'):
 		NOTE(REG_ULOCALE);
 		RETV(CCLASS, 'W');
+		break;
 	case CHR('x'):
 		NOTE(REG_UUNPORT);
 		c = lexdigits(v, 16, 1, 255);	/* REs >255 long outside spec */
 		if (ISERR())
 			FAILW(REG_EESCAPE);
 		RETV(PLAIN, c);
+		break;
 	case CHR('y'):
 		NOTE(REG_ULOCALE);
 		RETV(WBDRY, 0);
+		break;
 	case CHR('Y'):
 		NOTE(REG_ULOCALE);
 		RETV(NWBDRY, 0);
+		break;
 	case CHR('Z'):
 		RETV(SEND, 0);
+		break;
 	case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'):
 	case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'):
 	case CHR('9'):
@@ -686,7 +751,7 @@ struct vars *v;
 		/* ugly heuristic (first test is "exactly 1 digit?") */
 		if (v->now - save == 0 || (int)c <= v->nsubexp) {
 			NOTE(REG_UBACKREF);
-			RETV(BACKREF, (chr) c);
+			RETV(BACKREF, (chr)c);
 		}
 		/* oops, doesn't look like it's a backref after all... */
 		v->now = save;
@@ -698,10 +763,13 @@ struct vars *v;
 		if (ISERR())
 			FAILW(REG_EESCAPE);
 		RETV(PLAIN, c);
+		break;
 	default:
-		assert(iswalpha(c));
+		assert(iscalpha(c));
 		FAILW(REG_EESCAPE);	/* unknown alphabetic escape */
+		break;
 	}
+	assert(NOTREACHED);
 }
 
 /*
@@ -715,7 +783,7 @@ int base;
 int minlen;
 int maxlen;
 {
-	uchr n;			/* unsigned to aVOID overflow misbehavior */
+	uchr n;			/* unsigned to avoid overflow misbehavior */
 	int len;
 	chr c;
 	int d;
@@ -764,16 +832,17 @@ int maxlen;
  */
 static int			/* 1 normal, 0 failure */
 brenext(v, pc)
-register struct vars *v;
-register pchr pc;
+struct vars *v;
+pchr pc;
 {
-	register chr c = (chr) pc;
+	chr c = (chr)pc;
 
 	switch (c) {
 	case CHR('*'):
 		if (LASTTYPE(EMPTY) || LASTTYPE('(') || LASTTYPE('^'))
 			RETV(PLAIN, c);
 		RET('*');
+		break;
 	case CHR('['):
 		if (HAVE(6) &&	*(v->now+0) == CHR('[') &&
 				*(v->now+1) == CHR(':') &&
@@ -787,14 +856,16 @@ register pchr pc;
 			NOTE(REG_UNONPOSIX);
 			RET((c == CHR('<')) ? '<' : '>');
 		}
-		INTO(L_BRACK);
+		INTOCON(L_BRACK);
 		if (NEXT1('^')) {
 			v->now++;
 			RETV('[', 0);
 		}
 		RETV('[', 1);
+		break;
 	case CHR('.'):
 		RET('.');
+		break;
 	case CHR('^'):
 		if (LASTTYPE(EMPTY))
 			RET('^');
@@ -803,6 +874,7 @@ register pchr pc;
 			RET('^');
 		}
 		RETV(PLAIN, c);
+		break;
 	case CHR('$'):
 		if (v->cflags&REG_EXPANDED)
 			skip(v);
@@ -813,10 +885,12 @@ register pchr pc;
 			RET('$');
 		}
 		RETV(PLAIN, c);
+		break;
 	case CHR('\\'):
 		break;		/* see below */
 	default:
 		RETV(PLAIN, c);
+		break;
 	}
 
 	assert(c == CHR('\\'));
@@ -827,31 +901,40 @@ register pchr pc;
 	c = *v->now++;
 	switch (c) {
 	case CHR('{'):
-		INTO(L_BBND);
+		INTOCON(L_BBND);
 		NOTE(REG_UBOUNDS);
 		RET('{');
+		break;
 	case CHR('('):
 		RETV('(', 1);
+		break;
 	case CHR(')'):
 		RETV(')', c);
+		break;
 	case CHR('<'):
 		NOTE(REG_UNONPOSIX);
 		RET('<');
+		break;
 	case CHR('>'):
 		NOTE(REG_UNONPOSIX);
 		RET('>');
+		break;
 	case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'):
 	case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'):
 	case CHR('9'):
 		NOTE(REG_UBACKREF);
-		RETV(BACKREF, (chr) DIGITVAL(c));
+		RETV(BACKREF, (chr)DIGITVAL(c));
+		break;
 	default:
-		if (iswalnum(c)) {
+		if (iscalnum(c)) {
 			NOTE(REG_UBSALNUM);
 			NOTE(REG_UUNSPEC);
 		}
 		RETV(PLAIN, c);
+		break;
 	}
+
+	assert(NOTREACHED);
 }
 
 /*
@@ -867,14 +950,14 @@ struct vars *v;
 	assert(v->cflags&REG_EXPANDED);
 
 	for (;;) {
-		while (!ATEOS() && iswspace(*v->now))
+		while (!ATEOS() && iscspace(*v->now))
 			v->now++;
 		if (ATEOS() || *v->now != CHR('#'))
 			break;				/* NOTE BREAK OUT */
 		assert(NEXT1('#'));
 		while (!ATEOS() && *v->now != CHR('\n'))
 			v->now++;
-		/* leave the newline to be picked up by the iswspace loop */
+		/* leave the newline to be picked up by the iscspace loop */
 	}
 
 	if (v->now != start)
@@ -884,7 +967,7 @@ struct vars *v;
 /*
  - newline - return the chr for a newline
  * This helps confine use of CHR to this source file.
- ^ static chr newline(VOID);
+ ^ static chr newline(NOPARMS);
  */
 static chr
 newline()
@@ -895,7 +978,7 @@ newline()
 /*
  - ch - return the chr sequence for locale.c's fake collating element ch
  * This helps confine use of CHR to this source file.
- ^ static chr *ch(VOID);
+ ^ static chr *ch(NOPARMS);
  */
 static chr *
 ch()
@@ -909,12 +992,13 @@ ch()
  - chrnamed - return the chr known by a given (chr string) name
  * The code is a bit clumsy, but this routine gets only such specialized
  * use that it hardly matters.
- ^ static chr chrnamed(struct vars *, chr *, pchr);
+ ^ static chr chrnamed(struct vars *, chr *, chr *, pchr);
  */
 static chr
-chrnamed(v, name, lastresort)
+chrnamed(v, startp, endp, lastresort)
 struct vars *v;
-chr *name;
+chr *startp;			/* start of name */
+chr *endp;			/* just past end of name */
 pchr lastresort;		/* what to return if name lookup fails */
 {
 	celt c;
@@ -924,15 +1008,15 @@ pchr lastresort;		/* what to return if name lookup fails */
 
 	errsave = v->err;
 	v->err = 0;
-	c = element(v, name, name+wcslen(name));
+	c = element(v, startp, endp);
 	e = v->err;
 	v->err = errsave;
 
 	if (e != 0)
-		return (chr) lastresort;
+		return (chr)lastresort;
 
 	cv = range(v, c, c, 0);
 	if (cv->nchrs == 0)
-		return (chr) lastresort;
+		return (chr)lastresort;
 	return cv->chrs[0];
 }
diff --git a/generic/regc_locale.c b/generic/regc_locale.c
new file mode 100644
index 0000000..769241f
--- /dev/null
+++ b/generic/regc_locale.c
@@ -0,0 +1,426 @@
+/*
+ * locale-specific stuff, including MCCE handling
+ * This file is #included by regcomp.c.
+ *
+ * No MCCEs for Tcl.  The handling of character names and classes is
+ * still ASCII-centric, and needs to be extended to handle full Unicode.
+ */
+
+/* ASCII character-name table */
+static struct cname {
+	char *name;
+	char code;
+} cnames[] = {
+	{"NUL",	'\0'},
+	{"SOH",	'\001'},
+	{"STX",	'\002'},
+	{"ETX",	'\003'},
+	{"EOT",	'\004'},
+	{"ENQ",	'\005'},
+	{"ACK",	'\006'},
+	{"BEL",	'\007'},
+	{"alert",	'\007'},
+	{"BS",		'\010'},
+	{"backspace",	'\b'},
+	{"HT",		'\011'},
+	{"tab",		'\t'},
+	{"LF",		'\012'},
+	{"newline",	'\n'},
+	{"VT",		'\013'},
+	{"vertical-tab",	'\v'},
+	{"FF",		'\014'},
+	{"form-feed",	'\f'},
+	{"CR",		'\015'},
+	{"carriage-return",	'\r'},
+	{"SO",	'\016'},
+	{"SI",	'\017'},
+	{"DLE",	'\020'},
+	{"DC1",	'\021'},
+	{"DC2",	'\022'},
+	{"DC3",	'\023'},
+	{"DC4",	'\024'},
+	{"NAK",	'\025'},
+	{"SYN",	'\026'},
+	{"ETB",	'\027'},
+	{"CAN",	'\030'},
+	{"EM",	'\031'},
+	{"SUB",	'\032'},
+	{"ESC",	'\033'},
+	{"IS4",	'\034'},
+	{"FS",	'\034'},
+	{"IS3",	'\035'},
+	{"GS",	'\035'},
+	{"IS2",	'\036'},
+	{"RS",	'\036'},
+	{"IS1",	'\037'},
+	{"US",	'\037'},
+	{"space",		' '},
+	{"exclamation-mark",	'!'},
+	{"quotation-mark",	'"'},
+	{"number-sign",		'#'},
+	{"dollar-sign",		'$'},
+	{"percent-sign",		'%'},
+	{"ampersand",		'&'},
+	{"apostrophe",		'\''},
+	{"left-parenthesis",	'('},
+	{"right-parenthesis",	')'},
+	{"asterisk",	'*'},
+	{"plus-sign",	'+'},
+	{"comma",	','},
+	{"hyphen",	'-'},
+	{"hyphen-minus",	'-'},
+	{"period",	'.'},
+	{"full-stop",	'.'},
+	{"slash",	'/'},
+	{"solidus",	'/'},
+	{"zero",		'0'},
+	{"one",		'1'},
+	{"two",		'2'},
+	{"three",	'3'},
+	{"four",		'4'},
+	{"five",		'5'},
+	{"six",		'6'},
+	{"seven",	'7'},
+	{"eight",	'8'},
+	{"nine",		'9'},
+	{"colon",	':'},
+	{"semicolon",	';'},
+	{"less-than-sign",	'<'},
+	{"equals-sign",		'='},
+	{"greater-than-sign",	'>'},
+	{"question-mark",	'?'},
+	{"commercial-at",	'@'},
+	{"left-square-bracket",	'['},
+	{"backslash",		'\\'},
+	{"reverse-solidus",	'\\'},
+	{"right-square-bracket",	']'},
+	{"circumflex",		'^'},
+	{"circumflex-accent",	'^'},
+	{"underscore",		'_'},
+	{"low-line",		'_'},
+	{"grave-accent",		'`'},
+	{"left-brace",		'{'},
+	{"left-curly-bracket",	'{'},
+	{"vertical-line",	'|'},
+	{"right-brace",		'}'},
+	{"right-curly-bracket",	'}'},
+	{"tilde",		'~'},
+	{"DEL",	'\177'},
+	{NULL,	0}
+};
+
+/* ASCII character-class table */
+static struct cclass {
+	char *name;
+	char *chars;
+	int hasch;
+} cclasses[] = {
+	{"alnum",	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
+0123456789",				1},
+	{"alpha",	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
+					1},
+	{"blank",	" \t",		0},
+	{"cntrl",	"\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\
+\25\26\27\30\31\32\33\34\35\36\37\177",	0},
+	{"digit",	"0123456789",	0},
+	{"graph",	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
+0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
+					1},
+	{"lower",	"abcdefghijklmnopqrstuvwxyz",
+					1},
+	{"print",	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
+0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ",
+					1},
+	{"punct",	"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
+					0},
+	{"space",	"\t\n\v\f\r ",	0},
+	{"upper",	"ABCDEFGHIJKLMNOPQRSTUVWXYZ",
+					0},
+	{"xdigit",	"0123456789ABCDEFabcdef",
+					0},
+	{NULL,		0,		0}
+};
+
+#define	CH	NOCELT
+
+/*
+ - nmcces - how many distinct MCCEs are there?
+ ^ static int nmcces(struct vars *);
+ */
+static int
+nmcces(v)
+struct vars *v;
+{
+	return 0;
+}
+
+/*
+ - nleaders - how many chrs can be first chrs of MCCEs?
+ ^ static int nleaders(struct vars *);
+ */
+static int
+nleaders(v)
+struct vars *v;
+{
+	return 0;
+}
+
+/*
+ - allmcces - return a cvec with all the MCCEs of the locale
+ ^ static struct cvec *allmcces(struct vars *, struct cvec *);
+ */
+static struct cvec *
+allmcces(v, cv)
+struct vars *v;
+struct cvec *cv;		/* this is supposed to have enough room */
+{
+	return clearcvec(cv);
+}
+
+/*
+ - element - map collating-element name to celt
+ ^ static celt element(struct vars *, chr *, chr *);
+ */
+static celt
+element(v, startp, endp)
+struct vars *v;
+chr *startp;			/* points to start of name */
+chr *endp;			/* points just past end of name */
+{
+	struct cname *cn;
+	size_t len;
+	Tcl_DString ds;
+	char *np;
+
+	/* generic:  one-chr names stand for themselves */
+	assert(startp < endp);
+	len = endp - startp;
+	if (len == 1)
+		return *startp;
+
+	NOTE(REG_ULOCALE);
+
+	/* search table */
+	Tcl_DStringInit(&ds);
+	np = TclUniCharToUtfDString(startp, (int)len, &ds);
+	for (cn = cnames; cn->name != NULL; cn++)
+		if (strlen(cn->name) == len && strncmp(cn->name, np, len) == 0)
+			break;		/* NOTE BREAK OUT */
+	Tcl_DStringFree(&ds);
+	if (cn->name != NULL)
+		return CHR(cn->code);
+
+	/* couldn't find it */
+	ERR(REG_ECOLLATE);
+	return 0;
+}
+
+/*
+ - range - supply cvec for a range, including legality check
+ ^ static struct cvec *range(struct vars *, celt, celt, int);
+ */
+static struct cvec *
+range(v, a, b, cases)
+struct vars *v;
+celt a;
+celt b;				/* might equal a */
+int cases;			/* case-independent? */
+{
+	int nchrs;
+	struct cvec *cv;
+	celt c, lc, uc, tc;
+
+	if (a != b && !before(a, b)) {
+		ERR(REG_ERANGE);
+		return NULL;
+	}
+
+	nchrs = b - a + 1;
+	if (cases)
+		nchrs *= 2;
+	cv = getcvec(v, nchrs, 0);
+	NOERRN();
+
+	for (c = a; c <= b; c++) {
+		addchr(cv, c);
+		if (cases) {
+		    lc = Tcl_UniCharToLower((chr)c);
+		    uc = Tcl_UniCharToUpper((chr)c);
+		    tc = Tcl_UniCharToTitle((chr)c);
+		    if (c != lc) {
+			addchr(cv, lc);
+		    }
+		    if (c != uc) {
+			addchr(cv, uc);
+		    }
+		    if (c != tc && tc != uc) {
+			addchr(cv, tc);
+		    }
+		}
+	}
+
+	return cv;
+}
+
+/*
+ - before - is celt x before celt y, for purposes of range legality?
+ ^ static int before(celt, celt);
+ */
+static int			/* predicate */
+before(x, y)
+celt x;
+celt y;
+{
+	/* trivial because no MCCEs */
+	if (x < y)
+		return 1;
+	return 0;
+}
+
+/*
+ - eclass - supply cvec for an equivalence class
+ * Must include case counterparts on request.
+ ^ static struct cvec *eclass(struct vars *, celt, int);
+ */
+static struct cvec *
+eclass(v, c, cases)
+struct vars *v;
+celt c;
+int cases;			/* all cases? */
+{
+	struct cvec *cv;
+
+	/* crude fake equivalence class for testing */
+	if ((v->cflags&REG_FAKEEC) && c == 'x') {
+		cv = getcvec(v, 4, 0);
+		addchr(cv, (chr)'x');
+		addchr(cv, (chr)'y');
+		if (cases) {
+			addchr(cv, (chr)'X');
+			addchr(cv, (chr)'Y');
+		}
+		return cv;
+	}
+
+	/* otherwise, none */
+	if (cases)
+		return allcases(v, c);
+	cv = getcvec(v, 1, 0);
+	assert(cv != NULL);
+	addchr(cv, (chr)c);
+	return cv;
+}
+
+/*
+ - cclass - supply cvec for a character class
+ * Must include case counterparts on request.
+ ^ static struct cvec *cclass(struct vars *, chr *, chr *, int);
+ */
+static struct cvec *
+cclass(v, startp, endp, cases)
+struct vars *v;
+chr *startp;			/* where the name starts */
+chr *endp;			/* just past the end of the name */
+int cases;			/* case-independent? */
+{
+	size_t len;
+	char *p;
+	struct cclass *cc;
+	struct cvec *cv;
+	Tcl_DString ds;
+	char *np;
+
+	/* find the name */
+	len = endp - startp;
+	Tcl_DStringInit(&ds);
+	np = TclUniCharToUtfDString(startp, (int)len, &ds);
+	if (cases && len == 5 && (strncmp("lower", np, 5) == 0 ||
+					strncmp("upper", np, 5) == 0))
+		np = "alpha";
+	for (cc = cclasses; cc->name != NULL; cc++)
+		if (strlen(cc->name) == len && strncmp(cc->name, np, len) == 0)
+			break;		/* NOTE BREAK OUT */
+	Tcl_DStringFree(&ds);
+	if (cc->name == NULL) {
+		ERR(REG_ECTYPE);
+		return NULL;
+	}
+
+	/* set up vector */
+	cv = getcvec(v, (int)strlen(cc->chars), 0);
+	if (cv == NULL) {
+		ERR(REG_ESPACE);
+		return NULL;
+	}
+
+	/* fill it in */
+	for (p = cc->chars; *p != '\0'; p++)
+		addchr(cv, (chr)*p);
+
+	return cv;
+}
+
+/*
+ - allcases - supply cvec for all case counterparts of a chr (including itself)
+ * This is a shortcut, preferably an efficient one, for simple characters;
+ * messy cases are done via range().
+ ^ static struct cvec *allcases(struct vars *, pchr);
+ */
+static struct cvec *
+allcases(v, pc)
+struct vars *v;
+pchr pc;
+{
+	struct cvec *cv = getcvec(v, 2, 0);
+	chr c = (chr)pc;
+
+	assert(cv != NULL);
+	addchr(cv, c);
+	if (TclUniCharIsUpper(c))
+		addchr(cv, Tcl_UniCharToLower(c));
+	else if (TclUniCharIsLower(c))
+		addchr(cv, Tcl_UniCharToUpper(c));
+
+	return cv;
+}
+
+/*
+ - cmp - chr-substring compare
+ * Backrefs need this.  It should preferably be efficient.
+ * Note that it does not need to report anything except equal/unequal.
+ * Note also that the length is exact, and the comparison should not
+ * stop at embedded NULs!
+ ^ static int cmp(CONST chr *, CONST chr *, size_t);
+ */
+static int			/* 0 for equal, nonzero for unequal */
+cmp(x, y, len)
+CONST chr *x;
+CONST chr *y;
+size_t len;			/* exact length of comparison */
+{
+	return memcmp(VS(x), VS(y), len*sizeof(chr));
+}
+
+/*
+ - casecmp - case-independent chr-substring compare
+ * REG_ICASE backrefs need this.  It should preferably be efficient.
+ * Note that it does not need to report anything except equal/unequal.
+ * Note also that the length is exact, and the comparison should not
+ * stop at embedded NULs!
+ ^ static int casecmp(CONST chr *, CONST chr *, size_t);
+ */
+static int			/* 0 for equal, nonzero for unequal */
+casecmp(x, y, len)
+CONST chr *x;
+CONST chr *y;
+size_t len;			/* exact length of comparison */
+{
+	size_t i;
+	CONST chr *xp;
+	CONST chr *yp;
+
+	for (xp = x, yp = y, i = len; i > 0; i--)
+		if (Tcl_UniCharToLower(*xp++) != Tcl_UniCharToLower(*yp++))
+			return 1;
+	return 0;
+}
diff --git a/generic/nfa.c b/generic/regc_nfa.c
index f6b8967..14ee077 100644
--- a/generic/nfa.c
+++ b/generic/regc_nfa.c
@@ -1,57 +1,29 @@
 /*
- * nfa.c --
+ * NFA utilities.
+ * This file is #included by regcomp.c.
  *
- *	Regexp package file:
- *	NFA utilities.  One or two things that technically ought to be 
- *	in here are actually in color.c, thanks to some incestuous 
- *	relationships in the color chains.
- *
- * Copyright (c) 1998 Henry Spencer.  All rights reserved.
- * 
- * Development of this software was funded, in part, by Cray Research Inc.,
- * UUNET Communications Services Inc., and Sun Microsystems Inc., none of
- * whom are responsible for the results.  The author thanks all of them.
- * 
- * Redistribution and use in source and binary forms -- with or without
- * modification -- are permitted for any purpose, provided that
- * redistributions in source form retain this entire copyright notice and
- * indicate the origin and nature of any modifications. 
- * 
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
- * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
- * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- * 
- * Copyright (c) 1998 by Sun Microsystems, Inc.
- *
- * See the file "license.terms" for information on usage and redistribution
- * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
- *
- * RCS: @(#) $Id: nfa.c,v 1.1.2.2 1998/10/03 01:56:41 stanton Exp $
+ * One or two things that technically ought to be in here
+ * are actually in color.c, thanks to some incestuous relationships in
+ * the color chains.
  */
 
 #define	NISERR()	VISERR(nfa->v)
+#define	NERR(e)		VERR(nfa->v, (e))
 
 
 /*
  - newnfa - set up an NFA
- * Caution:  colormap must be set up already.
- ^ static struct nfa *newnfa(struct vars *, struct nfa *);
+ ^ static struct nfa *newnfa(struct vars *, struct colormap *, struct nfa *);
  */
 static struct nfa *		/* the NFA, or NULL */
-newnfa(v, parent)
+newnfa(v, cm, parent)
 struct vars *v;
+struct colormap *cm;
 struct nfa *parent;		/* NULL if primary NFA */
 {
 	struct nfa *nfa;
 
-	nfa = (struct nfa *)ckalloc(sizeof(struct nfa));
+	nfa = (struct nfa *)MALLOC(sizeof(struct nfa));
 	if (nfa == NULL)
 		return NULL;
 
@@ -59,6 +31,7 @@ struct nfa *parent;		/* NULL if primary NFA */
 	nfa->slast = NULL;
 	nfa->free = NULL;
 	nfa->nstates = 0;
+	nfa->cm = cm;
 	nfa->v = v;
 	nfa->bos[0] = nfa->bos[1] = COLORLESS;
 	nfa->eos[0] = nfa->eos[1] = COLORLESS;
@@ -72,10 +45,10 @@ struct nfa *parent;		/* NULL if primary NFA */
 		freenfa(nfa);
 		return NULL;
 	}
-	rainbow(nfa, nfa->v->cm, PLAIN, COLORLESS, nfa->pre, nfa->init);
+	rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->pre, nfa->init);
 	newarc(nfa, '^', 1, nfa->pre, nfa->init);
 	newarc(nfa, '^', 0, nfa->pre, nfa->init);
-	rainbow(nfa, nfa->v->cm, PLAIN, COLORLESS, nfa->final, nfa->post);
+	rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->final, nfa->post);
 	newarc(nfa, '$', 1, nfa->final, nfa->post);
 	newarc(nfa, '$', 0, nfa->final, nfa->post);
 
@@ -109,7 +82,7 @@ struct nfa *nfa;
 	nfa->nstates = -1;
 	nfa->pre = NULL;
 	nfa->post = NULL;
-	ckfree((char *)nfa);
+	FREE(nfa);
 }
 
 /*
@@ -128,14 +101,11 @@ int flag;
 		s = nfa->free;
 		nfa->free = s->next;
 	} else {
-		s = (struct state *)ckalloc(sizeof(struct state));
+		s = (struct state *)MALLOC(sizeof(struct state));
 		if (s == NULL) {
-			VERR(nfa->v, REG_ESPACE);
+			NERR(REG_ESPACE);
 			return NULL;
 		}
-
-		/* memleak (CCS). */
-		
 		s->oas.next = NULL;
 		s->free = &s->oas.a[0];
 		for (i = 0; i < ABSIZE; i++) {
@@ -240,12 +210,12 @@ struct state *s;
 	assert(s->no == FREESTATE);
 	for (ab = s->oas.next; ab != NULL; ab = abnext) {
 		abnext = ab->next;
-		ckfree((char *)ab);
+		FREE(ab);
 	}
 	s->ins = NULL;
 	s->outs = NULL;
 	s->next = NULL;
-	ckfree((char *)s);
+	FREE(s);
 }
 
 /*
@@ -276,7 +246,7 @@ struct state *to;
 	assert(a != NULL);
 
 	a->type = t;
-	a->co = (color) co;
+	a->co = (color)co;
 	a->to = to;
 	a->from = from;
 
@@ -295,7 +265,7 @@ struct state *to;
 	to->nins++;
 
 	if (COLORED(a) && nfa->parent == NULL)
-		colorchain(nfa->v->cm, a);
+		colorchain(nfa->cm, a);
 
 	return;
 }
@@ -315,9 +285,9 @@ struct state *s;
 
 	/* if none at hand, get more */
 	if (s->free == NULL) {
-		new = (struct arcbatch *)ckalloc(sizeof(struct arcbatch));
+		new = (struct arcbatch *)MALLOC(sizeof(struct arcbatch));
 		if (new == NULL) {
-			VERR(nfa->v, REG_ESPACE);
+			NERR(REG_ESPACE);
 			return NULL;
 		}
 		new->next = s->oas.next;
@@ -354,7 +324,7 @@ struct arc *victim;
 
 	/* take it off color chain if necessary */
 	if (COLORED(victim) && nfa->parent == NULL)
-		uncolorchain(nfa->v->cm, victim);
+		uncolorchain(nfa->cm, victim);
 
 	/* take it off source's out-chain */
 	assert(from != NULL);
@@ -680,10 +650,10 @@ struct nfa *nfa;
 {
 	/* false colors for BOS, BOL, EOS, EOL */
 	if (nfa->parent == NULL) {
-		nfa->bos[0] = pseudocolor(nfa->v->cm);
-		nfa->bos[1] = pseudocolor(nfa->v->cm);
-		nfa->eos[0] = pseudocolor(nfa->v->cm);
-		nfa->eos[1] = pseudocolor(nfa->v->cm);
+		nfa->bos[0] = pseudocolor(nfa->cm);
+		nfa->bos[1] = pseudocolor(nfa->cm);
+		nfa->eos[0] = pseudocolor(nfa->cm);
+		nfa->eos[1] = pseudocolor(nfa->cm);
 	} else {
 		assert(nfa->parent->bos[0] != COLORLESS);
 		nfa->bos[0] = nfa->parent->bos[0];
@@ -698,42 +668,41 @@ struct nfa *nfa;
 
 /*
  - optimize - optimize an NFA
- ^ static VOID optimize(struct nfa *);
+ ^ static int optimize(struct nfa *, FILE *);
  */
-static VOID
-optimize(nfa)
+static int			/* re_info bits */
+optimize(nfa, f)
 struct nfa *nfa;
+FILE *f;			/* for debug output; NULL none */
 {
-	int verbose = (nfa->v->cflags&REG_PROGRESS) ? 1 : 0;
-	int info;
+	int verbose = (f != NULL) ? 1 : 0;
 
 	if (verbose)
-		printf("\ninitial cleanup:\n");
+		fprintf(f, "\ninitial cleanup:\n");
 	cleanup(nfa);		/* may simplify situation */
-	if (nfa->v->cflags&REG_PROGRESS)
-		dumpnfa(nfa, stdout);
 	if (verbose)
-		printf("\nempties:\n");
-	fixempties(nfa);	/* get rid of EMPTY arcs */
+		dumpnfa(nfa, f);
+	if (verbose)
+		fprintf(f, "\nempties:\n");
+	fixempties(nfa, f);	/* get rid of EMPTY arcs */
 	if (verbose)
-		printf("\nconstraints:\n");
-	pullback(nfa);		/* pull back constraints backward */
-	pushfwd(nfa);		/* push fwd constraints forward */
+		fprintf(f, "\nconstraints:\n");
+	pullback(nfa, f);	/* pull back constraints backward */
+	pushfwd(nfa, f);	/* push fwd constraints forward */
 	if (verbose)
-		printf("\nfinal cleanup:\n");
+		fprintf(f, "\nfinal cleanup:\n");
 	cleanup(nfa);		/* final tidying */
-	info = analyze(nfa->v, nfa);	/* and analysis */
-	if (nfa->parent == NULL)
-		nfa->v->re->re_info |= info;
+	return analyze(nfa);	/* and analysis */
 }
 
 /*
  - pullback - pull back constraints backward to (with luck) eliminate them
- ^ static VOID pullback(struct nfa *);
+ ^ static VOID pullback(struct nfa *, FILE *);
  */
 static VOID
-pullback(nfa)
+pullback(nfa, f)
 struct nfa *nfa;
+FILE *f;			/* for debug output; NULL none */
 {
 	struct state *s;
 	struct state *nexts;
@@ -754,8 +723,8 @@ struct nfa *nfa;
 				assert(nexta == NULL || s->no != FREESTATE);
 			}
 		}
-		if (progress && (nfa->v->cflags&REG_PROGRESS))
-			dumpnfa(nfa, stdout);
+		if (progress && f != NULL)
+			dumpnfa(nfa, f);
 	} while (progress && !NISERR());
 	if (NISERR())
 		return;
@@ -799,7 +768,7 @@ struct arc *con;
 		return 1;
 	}
 
-	/* first, clone from state if necessary to aVOID other outarcs */
+	/* first, clone from state if necessary to avoid other outarcs */
 	if (from->nouts > 1) {
 		s = newstate(nfa);
 		if (NISERR())
@@ -846,11 +815,12 @@ struct arc *con;
 
 /*
  - pushfwd - push forward constraints forward to (with luck) eliminate them
- ^ static VOID pushfwd(struct nfa *);
+ ^ static VOID pushfwd(struct nfa *, FILE *);
  */
 static VOID
-pushfwd(nfa)
+pushfwd(nfa, f)
 struct nfa *nfa;
+FILE *f;			/* for debug output; NULL none */
 {
 	struct state *s;
 	struct state *nexts;
@@ -871,8 +841,8 @@ struct nfa *nfa;
 				assert(nexta == NULL || s->no != FREESTATE);
 			}
 		}
-		if (progress && (nfa->v->cflags&REG_PROGRESS))
-			dumpnfa(nfa, stdout);
+		if (progress && f != NULL)
+			dumpnfa(nfa, f);
 	} while (progress && !NISERR());
 	if (NISERR())
 		return;
@@ -916,7 +886,7 @@ struct arc *con;
 		return 1;
 	}
 
-	/* first, clone to state if necessary to aVOID other inarcs */
+	/* first, clone to state if necessary to avoid other inarcs */
 	if (to->nins > 1) {
 		s = newstate(nfa);
 		if (NISERR())
@@ -978,11 +948,13 @@ struct arc *a;
 	case CA('^', PLAIN):		/* newlines are handled separately */
 	case CA('$', PLAIN):
 		return INCOMPATIBLE;
+		break;
 	case CA(AHEAD, PLAIN):		/* color constraints meet colors */
 	case CA(BEHIND, PLAIN):
 		if (con->co == a->co)
 			return SATISFIED;
 		return INCOMPATIBLE;
+		break;
 	case CA('^', '^'):		/* collision, similar constraints */
 	case CA('$', '$'):
 	case CA(AHEAD, AHEAD):
@@ -990,11 +962,13 @@ struct arc *a;
 		if (con->co == a->co)		/* true duplication */
 			return SATISFIED;
 		return INCOMPATIBLE;
+		break;
 	case CA('^', BEHIND):		/* collision, dissimilar constraints */
 	case CA(BEHIND, '^'):
 	case CA('$', AHEAD):
 	case CA(AHEAD, '$'):
 		return INCOMPATIBLE;
+		break;
 	case CA('^', '$'):		/* constraints passing each other */
 	case CA('^', AHEAD):
 	case CA(BEHIND, '$'):
@@ -1008,18 +982,20 @@ struct arc *a;
 	case CA('$', LACON):
 	case CA(AHEAD, LACON):
 		return COMPATIBLE;
+		break;
 	}
 	assert(NOTREACHED);
-	return INCOMPATIBLE;		/* keep compiler from complaining */
+	return INCOMPATIBLE;		/* for benefit of blind compilers */
 }
 
 /*
  - fixempties - get rid of EMPTY arcs
- ^ static VOID fixempties(struct nfa *);
+ ^ static VOID fixempties(struct nfa *, FILE *);
  */
 static VOID
-fixempties(nfa)
+fixempties(nfa, f)
 struct nfa *nfa;
+FILE *f;			/* for debug output; NULL none */
 {
 	struct state *s;
 	struct state *nexts;
@@ -1039,8 +1015,8 @@ struct nfa *nfa;
 				assert(nexta == NULL || s->no != FREESTATE);
 			}
 		}
-		if (progress && (nfa->v->cflags&REG_PROGRESS))
-			dumpnfa(nfa, stdout);
+		if (progress && f != NULL)
+			dumpnfa(nfa, f);
 	} while (progress && !NISERR());
 }
 
@@ -1176,11 +1152,10 @@ struct state *mark;		/* the value to mark with */
 
 /*
  - analyze - ascertain potentially-useful facts about an optimized NFA
- ^ static int analyze(struct vars *, struct nfa *);
+ ^ static int analyze(struct nfa *);
  */
 static int			/* re_info bits to be ORed in */
-analyze(v, nfa)
-struct vars *v;
+analyze(nfa)
 struct nfa *nfa;
 {
 	struct arc *a;
@@ -1219,11 +1194,10 @@ struct state *end;
 
 /*
  - compact - compact an NFA
- ^ static VOID compact(struct vars *, struct nfa *, struct cnfa *);
+ ^ static VOID compact(struct nfa *, struct cnfa *);
  */
 static VOID
-compact(v, nfa, cnfa)
-struct vars *v;
+compact(nfa, cnfa)
 struct nfa *nfa;
 struct cnfa *cnfa;
 {
@@ -1234,7 +1208,7 @@ struct cnfa *cnfa;
 	struct carc *ca;
 	struct carc *first;
 
-	assert (!ISERR());
+	assert (!NISERR());
 
 	nstates = 0;
 	narcs = 0;
@@ -1243,14 +1217,14 @@ struct cnfa *cnfa;
 		narcs += s->nouts + 1;
 	}
 
-	cnfa->states = (struct carc **)ckalloc(nstates * sizeof(struct carc *));
-	cnfa->arcs = (struct carc *)ckalloc(narcs * sizeof(struct carc));
+	cnfa->states = (struct carc **)MALLOC(nstates * sizeof(struct carc *));
+	cnfa->arcs = (struct carc *)MALLOC(narcs * sizeof(struct carc));
 	if (cnfa->states == NULL || cnfa->arcs == NULL) {
 		if (cnfa->states != NULL)
-			ckfree((char *)cnfa->states);
+			FREE(cnfa->states);
 		if (cnfa->arcs != NULL)
-			ckfree((char *)cnfa->arcs);
-		ERR(REG_ESPACE);
+			FREE(cnfa->arcs);
+		NERR(REG_ESPACE);
 		return;
 	}
 	cnfa->nstates = nstates;
@@ -1260,13 +1234,12 @@ struct cnfa *cnfa;
 	cnfa->bos[1] = nfa->bos[1];
 	cnfa->eos[0] = nfa->eos[0];
 	cnfa->eos[1] = nfa->eos[1];
-	cnfa->ncolors = maxcolor(v->cm) + 1;
-	cnfa->haslacons = 0;
- 	cnfa->leftanch = 1;		/* tentatively */
+	cnfa->ncolors = maxcolor(nfa->cm) + 1;
+	cnfa->flags = LEFTANCH;		/* tentatively */
 
 	ca = cnfa->arcs;
 	for (s = nfa->states; s != NULL; s = s->next) {
-		assert((size_t) s->no < nstates);
+		assert((size_t)s->no < nstates);
 		cnfa->states[s->no] = ca;
 		first = ca;
 		for (a = s->outs; a != NULL; a = a->outchain)
@@ -1278,10 +1251,10 @@ struct cnfa *cnfa;
 				break;
 			case LACON:
 				assert(s->no != cnfa->pre);
-				ca->co = (color) (a->co + cnfa->ncolors);
+				ca->co = (color)(cnfa->ncolors + a->co);
 				ca->to = a->to->no;
 				ca++;
-				cnfa->haslacons = 1;
+				cnfa->flags |= HASLACONS;
 				break;
 			default:
 				assert(NOTREACHED);
@@ -1297,9 +1270,9 @@ struct cnfa *cnfa;
 
 	for (a = nfa->pre->outs; a != NULL; a = a->outchain)
 		if (a->type == PLAIN && a->co != nfa->bos[0] &&
-			a->co != nfa->bos[1])
-		    cnfa->leftanch = 0;
- }
+							a->co != nfa->bos[1])
+			cnfa->flags &= ~LEFTANCH;
+}
 
 /*
  - carcsort - sort compacted-NFA arcs by color
@@ -1341,11 +1314,12 @@ int dynalloc;			/* is the cnfa struct itself dynamic? */
 {
 	assert(cnfa->nstates != 0);	/* not empty already */
 	cnfa->nstates = 0;
-	ckfree((char *)cnfa->states);
-	ckfree((char *)cnfa->arcs);
+	FREE(cnfa->states);
+	FREE(cnfa->arcs);
 	if (dynalloc)
-		ckfree((char *)cnfa);
+		FREE(cnfa);
 }
+
 /*
  - dumpnfa - dump an NFA in human-readable form
  ^ static VOID dumpnfa(struct nfa *, FILE *);
@@ -1355,7 +1329,159 @@ dumpnfa(nfa, f)
 struct nfa *nfa;
 FILE *f;
 {
+#ifdef REG_DEBUG
+	struct state *s;
+
+	fprintf(f, "pre %d, post %d", nfa->pre->no, nfa->post->no);
+	if (nfa->bos[0] != COLORLESS)
+		fprintf(f, ", bos [%ld]", (long)nfa->bos[0]);
+	if (nfa->bos[1] != COLORLESS)
+		fprintf(f, ", bol [%ld]", (long)nfa->bos[1]);
+	if (nfa->eos[0] != COLORLESS)
+		fprintf(f, ", eos [%ld]", (long)nfa->eos[0]);
+	if (nfa->eos[1] != COLORLESS)
+		fprintf(f, ", eol [%ld]", (long)nfa->eos[1]);
+	fprintf(f, "\n");
+	for (s = nfa->states; s != NULL; s = s->next)
+		dumpstate(s, f);
+	if (nfa->parent == NULL)
+		dumpcolors(nfa->cm, f);
+	fflush(f);
+#endif
 }
+
+#ifdef REG_DEBUG		/* subordinates of dumpnfa */
+
+/*
+ - dumpstate - dump an NFA state in human-readable form
+ ^ static VOID dumpstate(struct state *, FILE *);
+ */
+static VOID
+dumpstate(s, f)
+struct state *s;
+FILE *f;
+{
+	struct arc *a;
+
+	fprintf(f, "%d%s%c", s->no, (s->tmp != NULL) ? "T" : "",
+					(s->flag) ? s->flag : '.');
+	if (s->prev != NULL && s->prev->next != s)
+		fprintf(f, "\tstate chain bad\n");
+	if (s->nouts == 0)
+		fprintf(f, "\tno out arcs\n");
+	else
+		dumparcs(s, f);
+	fflush(f);
+	for (a = s->ins; a != NULL; a = a->inchain) {
+		if (a->to != s)
+			fprintf(f, "\tlink from %d to %d on %d's in-chain\n",
+					a->from->no, a->to->no, s->no);
+	}
+}
+
+/*
+ - dumparcs - dump out-arcs in human-readable form
+ ^ static VOID dumparcs(struct state *, FILE *);
+ */
+static VOID
+dumparcs(s, f)
+struct state *s;
+FILE *f;
+{
+	int pos;
+
+	assert(s->nouts > 0);
+	/* printing arcs in reverse order is usually clearer */
+	pos = dumprarcs(s->outs, s, f, 1);
+	if (pos != 1)
+		fprintf(f, "\n");
+}
+
+/*
+ - dumprarcs - dump remaining outarcs, recursively, in reverse order
+ ^ static int dumprarcs(struct arc *, struct state *, FILE *, int);
+ */
+static int			/* resulting print position */
+dumprarcs(a, s, f, pos)
+struct arc *a;
+struct state *s;
+FILE *f;
+int pos;			/* initial print position */
+{
+	if (a->outchain != NULL)
+		pos = dumprarcs(a->outchain, s, f, pos);
+	dumparc(a, s, f);
+	if (pos == 5) {
+		fprintf(f, "\n");
+		pos = 1;
+	} else
+		pos++;
+	return pos;
+}
+
+/*
+ - dumparc - dump one outarc in readable form, including prefixing tab
+ ^ static VOID dumparc(struct arc *, struct state *, FILE *);
+ */
+static VOID
+dumparc(a, s, f)
+struct arc *a;
+struct state *s;
+FILE *f;
+{
+	struct arc *aa;
+	struct arcbatch *ab;
+
+	fprintf(f, "\t");
+	switch (a->type) {
+	case PLAIN:
+		fprintf(f, "[%ld]", (long)a->co);
+		break;
+	case AHEAD:
+		fprintf(f, ">%ld>", (long)a->co);
+		break;
+	case BEHIND:
+		fprintf(f, "<%ld<", (long)a->co);
+		break;
+	case LACON:
+		fprintf(f, ":%ld:", (long)a->co);
+		break;
+	case '^':
+	case '$':
+		fprintf(f, "%c%d", a->type, (int)a->co);
+		break;
+	case EMPTY:
+		break;
+	default:
+		fprintf(f, "0x%x/0%lo", a->type, (long)a->co);
+		break;
+	}
+	if (a->from != s)
+		fprintf(f, "?%d?", a->from->no);
+	for (ab = &a->from->oas; ab != NULL; ab = ab->next) {
+		for (aa = &ab->a[0]; aa < &ab->a[ABSIZE]; aa++)
+			if (aa == a)
+				break;		/* NOTE BREAK OUT */
+		if (aa < &ab->a[ABSIZE])	/* propagate break */
+				break;		/* NOTE BREAK OUT */
+	}
+	if (ab == NULL)
+		fprintf(f, "?!?");	/* not in allocated space */
+	fprintf(f, "->");
+	if (a->to == NULL) {
+		fprintf(f, "NULL");
+		return;
+	}
+	fprintf(f, "%d", a->to->no);
+	for (aa = a->to->ins; aa != NULL; aa = aa->inchain)
+		if (aa == a)
+			break;		/* NOTE BREAK OUT */
+	if (aa == NULL)
+		fprintf(f, "?!?");	/* missing from in-chain */
+}
+
+#endif				/* ifdef REG_DEBUG */
+
 /*
  - dumpcnfa - dump a compacted NFA in human-readable form
  ^ static VOID dumpcnfa(struct cnfa *, FILE *);
@@ -1365,4 +1491,62 @@ dumpcnfa(cnfa, f)
 struct cnfa *cnfa;
 FILE *f;
 {
+#ifdef REG_DEBUG
+	int st;
+
+	fprintf(f, "pre %d, post %d", cnfa->pre, cnfa->post);
+	if (cnfa->bos[0] != COLORLESS)
+		fprintf(f, ", bos [%ld]", (long)cnfa->bos[0]);
+	if (cnfa->bos[1] != COLORLESS)
+		fprintf(f, ", bol [%ld]", (long)cnfa->bos[1]);
+	if (cnfa->eos[0] != COLORLESS)
+		fprintf(f, ", eos [%ld]", (long)cnfa->eos[0]);
+	if (cnfa->eos[1] != COLORLESS)
+		fprintf(f, ", eol [%ld]", (long)cnfa->eos[1]);
+	if (cnfa->flags&HASLACONS)
+		fprintf(f, ", haslacons");
+	if (cnfa->flags&LEFTANCH)
+		fprintf(f, ", leftanch");
+	fprintf(f, "\n");
+	for (st = 0; st < cnfa->nstates; st++)
+		dumpcstate(st, cnfa->states[st], cnfa, f);
+	fflush(f);
+#endif
 }
+
+#ifdef REG_DEBUG		/* subordinates of dumpcnfa */
+
+/*
+ - dumpcstate - dump a compacted-NFA state in human-readable form
+ ^ static VOID dumpcstate(int, struct carc *, struct cnfa *, FILE *);
+ */
+static VOID
+dumpcstate(st, ca, cnfa, f)
+int st;
+struct carc *ca;
+struct cnfa *cnfa;
+FILE *f;
+{
+	int i;
+	int pos;
+
+	fprintf(f, "%d.", st);
+	pos = 1;
+	for (i = 0; ca[i].co != COLORLESS; i++) {
+		if (ca[i].co < cnfa->ncolors)
+			fprintf(f, "\t[%ld]->%d", (long)ca[i].co, ca[i].to);
+		else
+			fprintf(f, "\t:%ld:->%d", (long)ca[i].co-cnfa->ncolors,
+								ca[i].to);
+		if (pos == 5) {
+			fprintf(f, "\n");
+			pos = 1;
+		} else
+			pos++;
+	}
+	if (i == 0 || pos != 1)
+		fprintf(f, "\n");
+	fflush(f);
+}
+
+#endif				/* ifdef REG_DEBUG */
diff --git a/generic/compile.c b/generic/regcomp.c
index ee12d04..2a13172 100644
--- a/generic/compile.c
+++ b/generic/regcomp.c
@@ -1,51 +1,16 @@
 /*
- * compile.c --
- *
- *	Regexp package file:  re_*comp and friends - compile REs
- *
- * Copyright (c) 1998 Henry Spencer.  All rights reserved.
- * 
- * Development of this software was funded, in part, by Cray Research Inc.,
- * UUNET Communications Services Inc., and Sun Microsystems Inc., none of
- * whom are responsible for the results.  The author thanks all of them.
- * 
- * Redistribution and use in source and binary forms -- with or without
- * modification -- are permitted for any purpose, provided that
- * redistributions in source form retain this entire copyright notice and
- * indicate the origin and nature of any modifications. 
- * 
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
- * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
- * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- * 
- * Copyright (c) 1998 by Sun Microsystems, Inc.
- *
- * See the file "license.terms" for information on usage and redistribution
- * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
- *
- * RCS: @(#) $Id: compile.c,v 1.1.2.2 1998/10/03 01:56:40 stanton Exp $
+ * re_*comp and friends - compile REs
+ * This file #includes several others (see the bottom).
  */
 
-#include "tclInt.h"
-#include <assert.h>
-#include "tclPort.h"
-#include "tclRegexp.h"
-#include "chr.h"
-#include "guts.h"
+#include "regguts.h"
 
 /*
  * forward declarations, up here so forward datatypes etc. are defined early
  */
 /* =====^!^===== begin forwards =====^!^===== */
 /* automatically gathered by fwd; do not hand-edit */
-/* === compile.c === */
+/* === regcomp.c === */
 int compile _ANSI_ARGS_((regex_t *, CONST chr *, size_t, int));
 static VOID moresubs _ANSI_ARGS_((struct vars *, int));
 static int freev _ANSI_ARGS_((struct vars *, int));
@@ -63,53 +28,34 @@ static color nlcolor _ANSI_ARGS_((struct vars *));
 static VOID wordchrs _ANSI_ARGS_((struct vars *));
 static struct subre subre _ANSI_ARGS_((struct state *, struct state *, int, int, struct rtree *));
 static struct rtree *newrt _ANSI_ARGS_((struct vars *));
-static VOID freert _ANSI_ARGS_((struct rtree *));
-static VOID freertnode _ANSI_ARGS_((struct rtree *));
+static VOID freert _ANSI_ARGS_((struct vars *, struct rtree *));
+static VOID freertnode _ANSI_ARGS_((struct vars *, struct rtree *));
 static VOID optrt _ANSI_ARGS_((struct vars *, struct rtree *));
 static int numrt _ANSI_ARGS_((struct rtree *, int));
-static VOID nfatree _ANSI_ARGS_((struct vars *, struct rtree *));
-static VOID nfanode _ANSI_ARGS_((struct vars *, struct subre *));
+static VOID markrt _ANSI_ARGS_((struct rtree *));
+static VOID cleanrt _ANSI_ARGS_((struct vars *));
+static VOID nfatree _ANSI_ARGS_((struct vars *, struct rtree *, FILE *));
+static VOID nfanode _ANSI_ARGS_((struct vars *, struct subre *, FILE *));
 static int newlacon _ANSI_ARGS_((struct vars *, struct state *, struct state *, int));
 static VOID freelacons _ANSI_ARGS_((struct subre *, int));
 static VOID rfree _ANSI_ARGS_((regex_t *));
 static VOID dump _ANSI_ARGS_((regex_t *, FILE *));
 static VOID dumprt _ANSI_ARGS_((struct rtree *, FILE *, int));
 static VOID rtdump _ANSI_ARGS_((struct rtree *, FILE *, int, int));
-/* === lex.c === */
+/* === regc_lex.c === */
 static VOID lexstart _ANSI_ARGS_((struct vars *));
 static VOID prefixes _ANSI_ARGS_((struct vars *));
-static VOID lexnest _ANSI_ARGS_((struct vars *, chr *));
+static VOID lexnest _ANSI_ARGS_((struct vars *, chr *, chr *));
 static VOID lexword _ANSI_ARGS_((struct vars *));
 static int next _ANSI_ARGS_((struct vars *));
 static int lexescape _ANSI_ARGS_((struct vars *));
 static chr lexdigits _ANSI_ARGS_((struct vars *, int, int, int));
 static int brenext _ANSI_ARGS_((struct vars *, pchr));
 static VOID skip _ANSI_ARGS_((struct vars *));
-static chr newline _ANSI_ARGS_((VOID));
-static chr *ch _ANSI_ARGS_((VOID));
-static chr chrnamed _ANSI_ARGS_((struct vars *, chr *, pchr));
-/* === locale.c === */
-#define	MAXCE	2	/* longest CE code is prepared to handle */
-typedef wint_t celt;	/* type holding distinct codes for all chrs, all CEs */
-static int nces _ANSI_ARGS_((struct vars *));
-static int nleaders _ANSI_ARGS_((struct vars *));
-static struct cvec *allces _ANSI_ARGS_((struct vars *, struct cvec *));
-static celt element _ANSI_ARGS_((struct vars *, chr *, chr *));
-static struct cvec *range _ANSI_ARGS_((struct vars *, celt, celt, int));
-static int before _ANSI_ARGS_((celt, celt));
-static struct cvec *eclass _ANSI_ARGS_((struct vars *, celt, int));
-static struct cvec *cclass _ANSI_ARGS_((struct vars *, chr *, chr *, int));
-static struct cvec *allcases _ANSI_ARGS_((struct vars *, pchr));
-static int sncmp _ANSI_ARGS_((CONST chr *, CONST chr *, size_t));
-static struct cvec *newcvec _ANSI_ARGS_((int, int));
-static struct cvec *clearcvec _ANSI_ARGS_((struct cvec *));
-static VOID addchr _ANSI_ARGS_((struct cvec *, pchr));
-static VOID addce _ANSI_ARGS_((struct cvec *, chr *));
-static int haschr _ANSI_ARGS_((struct cvec *, pchr));
-static struct cvec *getcvec _ANSI_ARGS_((struct vars *, int, int));
-static VOID freecvec _ANSI_ARGS_((struct cvec *));
-/* === color.c === */
-union tree;
+static chr newline _ANSI_ARGS_((NOPARMS));
+static chr *ch _ANSI_ARGS_((NOPARMS));
+static chr chrnamed _ANSI_ARGS_((struct vars *, chr *, chr *, pchr));
+/* === regc_color.c === */
 static struct colormap *newcm _ANSI_ARGS_((struct vars *));
 static VOID freecm _ANSI_ARGS_((struct colormap *));
 static VOID cmtreefree _ANSI_ARGS_((struct colormap *, union tree *, int));
@@ -127,8 +73,11 @@ static VOID uncolorchain _ANSI_ARGS_((struct colormap *, struct arc *));
 static int singleton _ANSI_ARGS_((struct colormap *, pchr c));
 static VOID rainbow _ANSI_ARGS_((struct nfa *, struct colormap *, int, pcolor, struct state *, struct state *));
 static VOID colorcomplement _ANSI_ARGS_((struct nfa *, struct colormap *, int, struct state *, struct state *, struct state *));
-/* === nfa.c === */
-static struct nfa *newnfa _ANSI_ARGS_((struct vars *, struct nfa *));
+static VOID dumpcolors _ANSI_ARGS_((struct colormap *, FILE *));
+static VOID fillcheck _ANSI_ARGS_((struct colormap *, union tree *, int, FILE *));
+static VOID dumpchr _ANSI_ARGS_((pchr, FILE *));
+/* === regc_nfa.c === */
+static struct nfa *newnfa _ANSI_ARGS_((struct vars *, struct colormap *, struct nfa *));
 static VOID freenfa _ANSI_ARGS_((struct nfa *));
 static struct state *newfstate _ANSI_ARGS_((struct nfa *, int flag));
 static struct state *newstate _ANSI_ARGS_((struct nfa *));
@@ -151,27 +100,52 @@ static VOID dupnfa _ANSI_ARGS_((struct nfa *, struct state *, struct state *, st
 static VOID duptraverse _ANSI_ARGS_((struct nfa *, struct state *, struct state *));
 static VOID cleartraverse _ANSI_ARGS_((struct nfa *, struct state *));
 static VOID specialcolors _ANSI_ARGS_((struct nfa *));
-static VOID optimize _ANSI_ARGS_((struct nfa *));
-static VOID pullback _ANSI_ARGS_((struct nfa *));
+static int optimize _ANSI_ARGS_((struct nfa *, FILE *));
+static VOID pullback _ANSI_ARGS_((struct nfa *, FILE *));
 static int pull _ANSI_ARGS_((struct nfa *, struct arc *));
-static VOID pushfwd _ANSI_ARGS_((struct nfa *));
+static VOID pushfwd _ANSI_ARGS_((struct nfa *, FILE *));
 static int push _ANSI_ARGS_((struct nfa *, struct arc *));
 #define	INCOMPATIBLE	1	/* destroys arc */
 #define	SATISFIED	2	/* constraint satisfied */
 #define	COMPATIBLE	3	/* compatible but not satisfied yet */
 static int combine _ANSI_ARGS_((struct arc *, struct arc *));
-static VOID fixempties _ANSI_ARGS_((struct nfa *));
+static VOID fixempties _ANSI_ARGS_((struct nfa *, FILE *));
 static int unempty _ANSI_ARGS_((struct nfa *, struct arc *));
 static VOID cleanup _ANSI_ARGS_((struct nfa *));
 static VOID markreachable _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *));
 static VOID markcanreach _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *));
-static int analyze _ANSI_ARGS_((struct vars *, struct nfa *));
+static int analyze _ANSI_ARGS_((struct nfa *));
 static int isempty _ANSI_ARGS_((struct state *, struct state *));
-static VOID compact _ANSI_ARGS_((struct vars *, struct nfa *, struct cnfa *));
+static VOID compact _ANSI_ARGS_((struct nfa *, struct cnfa *));
 static VOID carcsort _ANSI_ARGS_((struct carc *, struct carc *));
 static VOID freecnfa _ANSI_ARGS_((struct cnfa *, int));
 static VOID dumpnfa _ANSI_ARGS_((struct nfa *, FILE *));
+static VOID dumpstate _ANSI_ARGS_((struct state *, FILE *));
+static VOID dumparcs _ANSI_ARGS_((struct state *, FILE *));
+static int dumprarcs _ANSI_ARGS_((struct arc *, struct state *, FILE *, int));
+static VOID dumparc _ANSI_ARGS_((struct arc *, struct state *, FILE *));
 static VOID dumpcnfa _ANSI_ARGS_((struct cnfa *, FILE *));
+static VOID dumpcstate _ANSI_ARGS_((int, struct carc *, struct cnfa *, FILE *));
+/* === regc_cvec.c === */
+static struct cvec *newcvec _ANSI_ARGS_((int, int));
+static struct cvec *clearcvec _ANSI_ARGS_((struct cvec *));
+static VOID addchr _ANSI_ARGS_((struct cvec *, pchr));
+static VOID addmcce _ANSI_ARGS_((struct cvec *, chr *, chr *));
+static int haschr _ANSI_ARGS_((struct cvec *, pchr));
+static struct cvec *getcvec _ANSI_ARGS_((struct vars *, int, int));
+static VOID freecvec _ANSI_ARGS_((struct cvec *));
+/* === regc_locale.c === */
+static int nmcces _ANSI_ARGS_((struct vars *));
+static int nleaders _ANSI_ARGS_((struct vars *));
+static struct cvec *allmcces _ANSI_ARGS_((struct vars *, struct cvec *));
+static celt element _ANSI_ARGS_((struct vars *, chr *, chr *));
+static struct cvec *range _ANSI_ARGS_((struct vars *, celt, celt, int));
+static int before _ANSI_ARGS_((celt, celt));
+static struct cvec *eclass _ANSI_ARGS_((struct vars *, celt, int));
+static struct cvec *cclass _ANSI_ARGS_((struct vars *, chr *, chr *, int));
+static struct cvec *allcases _ANSI_ARGS_((struct vars *, pchr));
+static int cmp _ANSI_ARGS_((CONST chr *, CONST chr *, size_t));
+static int casecmp _ANSI_ARGS_((CONST chr *, CONST chr *, size_t));
 /* automatically gathered by fwd; do not hand-edit */
 /* =====^!^===== end forwards =====^!^===== */
 
@@ -199,12 +173,14 @@ struct vars {
 	color nlcolor;		/* color of newline */
 	struct state *wordchrs;	/* state in nfa holding word-char outarcs */
 	struct rtree *tree;	/* subexpression tree */
+	struct rtree *treechain;	/* all tree nodes allocated */
+	struct rtree *treefree;		/* any free tree nodes */
 	int ntree;		/* number of tree nodes */
 	struct cvec *cv;	/* utility cvec */
-	struct cvec *ces;	/* collating-element information */
-#		define	ISCELEADER(v,c)	(v->ces != NULL && haschr(v->ces, (c)))
-	struct state *cepbegin;	/* state in nfa, start of CE prototypes */
-	struct state *cepend;	/* state in nfa, end of CE prototypes */
+	struct cvec *mcces;	/* collating-element information */
+#		define	ISCELEADER(v,c)	(v->mcces != NULL && haschr(v->mcces, (c)))
+	struct state *mccepbegin;	/* in nfa, start of MCCE prototypes */
+	struct state *mccepend;	/* in nfa, end of MCCE prototypes */
 	struct subre *lacons;	/* lookahead-constraint vector */
 	int nlacons;		/* size of lacons */
 	int usedshorter;	/* used short-preferring quantifiers */
@@ -220,7 +196,7 @@ struct vars {
 							((vv)->err = (e)))
 #define	ERR(e)	VERR(v, e)		/* record an error */
 #define	NOERR()	{if (ISERR()) return;}	/* if error seen, return */
-#define	NOERRN()	{if (ISERR()) goto end;}	/* NOERR with retval */
+#define	NOERRN()	{if (ISERR()) return NULL;}	/* NOERR with retval */
 #define	INSIST(c, e)	((c) ? 0 : ERR(e))	/* if condition false, error */
 #define	NOTE(b)	(v->re->re_info |= (b))		/* note visible condition */
 #define	EMPTYARC(x, y)	newarc(v->nfa, EMPTY, 0, x, y)
@@ -259,22 +235,6 @@ static struct fns functions = {
 
 
 /*
- - regfree - free an RE (actually, just overall coordination)
- */
-VOID
-regfree(re)
-regex_t *re;
-{
-	if (re == NULL || re->re_magic != REMAGIC)
-		return;		/* no way we can report it, really */
-
-	/* free it, calling internal routine that knows details */
-	(*((struct fns *)re->re_fns)->free)(re);
-
-	re->re_magic = 0;
-}
-
-/*
  - compile - compile regular expression
  ^ int compile(regex_t *, CONST chr *, size_t, int);
  */
@@ -289,25 +249,20 @@ int flags;
 	struct vars *v = &var;
 	struct guts *g;
 	int i;
+	size_t j;
+	FILE *debug = (flags&REG_PROGRESS) ? stdout : (FILE *)NULL;
 #	define	CNOERR()	{ if (ISERR()) return freev(v, v->err); }
 
-	if (re == NULL) {
-	    return REG_INVARG;
-	}
-	
-	/*
-	 * Init re to known state, because we will try to free it if
-	 * compilation fails.
-	 */
-	 
-	re->re_magic = 0;
-
 	/* sanity checks */
-	if (string == NULL ||
-		((flags&REG_EXTENDED) && (flags&REG_QUOTE)) ||
-		(!(flags&REG_EXTENDED) && (flags&REG_ADVF))) {
-	    return REG_INVARG;
-	}
+
+	if (re == NULL || string == NULL)
+		return REG_INVARG;
+	assert(REG_ADVANCED == (REG_EXTENDED|REG_ADVF));
+	if ((flags&REG_QUOTE) &&
+			(flags&(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE)))
+		return REG_INVARG;
+	if (!(flags&REG_EXTENDED) && (flags&REG_ADVF))
+		return REG_INVARG;
 
 	/* initial setup (after which freev() is callable) */
 	v->re = re;
@@ -319,27 +274,31 @@ int flags;
 	v->nsubexp = 0;
 	v->subs = v->sub10;
 	v->nsubs = 10;
-	for (i = 0; (size_t) i < v->nsubs; i++)
-		v->subs[i] = NULL;
+	for (j = 0; j < v->nsubs; j++)
+		v->subs[j] = NULL;
 	v->nfa = NULL;
 	v->cm = NULL;
 	v->nlcolor = COLORLESS;
 	v->wordchrs = NULL;
 	v->tree = NULL;
+	v->treechain = NULL;
+	v->treefree = NULL;
 	v->cv = NULL;
-	v->ces = NULL;
+	v->mcces = NULL;
 	v->lacons = NULL;
 	v->nlacons = 0;
+	re->re_magic = REMAGIC;
 	re->re_info = 0;		/* bits get set during parse */
+	re->re_csize = sizeof(chr);
 	re->re_guts = NULL;
-	re->re_fns = NULL;
+	re->re_fns = VS(&functions);
 
 	/* more complex setup, malloced things */
-	v->cm = newcm(v);		/* colormap must precede nfa... */
+	v->cm = newcm(v);
 	CNOERR();
-	v->nfa = newnfa(v, (struct nfa *)NULL);	/* ...newnfa() uses it */
+	v->nfa = newnfa(v, v->cm, (struct nfa *)NULL);
 	CNOERR();
-	re->re_guts = ckalloc(sizeof(struct guts));
+	re->re_guts = VS(MALLOC(sizeof(struct guts)));
 	if (re->re_guts == NULL)
 		return freev(v, REG_ESPACE);
 	g = (struct guts *)re->re_guts;
@@ -351,19 +310,17 @@ int flags;
 	v->cv = newcvec(100, 10);
 	if (v->cv == NULL)
 		return freev(v, REG_ESPACE);
-	i = nces(v);
+	i = nmcces(v);
 	if (i > 0) {
-		v->ces = newcvec(nleaders(v), i);
+		v->mcces = newcvec(nleaders(v), i);
 		CNOERR();
-		v->ces = allces(v, v->ces);
-		leaders(v, v->ces);
+		v->mcces = allmcces(v, v->mcces);
+		leaders(v, v->mcces);
 	}
 	CNOERR();
 
 	/* parsing */
 	lexstart(v);			/* also handles prefixes */
-	if (SEE(EOS))			/* empty RE is illegal */
-		return freev(v, REG_EMPTY);
 	v->tree = parse(v, EOS, PLAIN, v->nfa->init, v->nfa->final, NONEYET);
 	assert(SEE(EOS));		/* even if error; ISERR() => SEE(EOS) */
 	CNOERR();
@@ -371,38 +328,40 @@ int flags;
 	/* finish setup of nfa and its subre tree */
 	specialcolors(v->nfa);
 	CNOERR();
-	if (flags&REG_PROGRESS) {
-		dumpnfa(v->nfa, stdout);
-		dumprt(v->tree, stdout, 1);
+	if (debug != NULL) {
+		dumpnfa(v->nfa, debug);
+		dumprt(v->tree, debug, 1);
 	}
 	v->usedshorter = 0;
 	optrt(v, v->tree);
-	if (v->tree != NULL)
+	if (v->tree != NULL) {
 		v->ntree = numrt(v->tree, 1);
-	else
+		markrt(v->tree);
+	} else
 		v->ntree = 0;
-	if (flags&REG_PROGRESS) {
-		printf("-->\n");
-		dumprt(v->tree, stdout, 1);
+	cleanrt(v);
+	if (debug != NULL) {
+		fprintf(debug, "-->\n");
+		dumprt(v->tree, debug, 1);
 	}
 
 	/* build compacted NFAs for tree, lacons, main nfa */
-	nfatree(v, v->tree);
-	if (flags&REG_PROGRESS) {
-		printf("---->\n");
-		dumprt(v->tree, stdout, 1);
+	nfatree(v, v->tree, debug);
+	if (debug != NULL) {
+		fprintf(debug, "---->\n");
+		dumprt(v->tree, debug, 1);
 	}
 	CNOERR();
 	assert(v->nlacons == 0 || v->lacons != NULL);
 	for (i = 1; i < v->nlacons; i++)
-		nfanode(v, &v->lacons[i]);
+		nfanode(v, &v->lacons[i], debug);
 	CNOERR();
-	optimize(v->nfa);		/* removes unreachable states */
+	re->re_info |= optimize(v->nfa, debug);
 	CNOERR();
 	if (v->nfa->post->nins <= 0)	
 		return freev(v, REG_IMPOSS);	/* end unreachable! */
 	assert(v->nfa->pre->nouts > 0);
-	compact(v, v->nfa, &g->cnfa);
+	compact(v->nfa, &g->cnfa);
 	CNOERR();
 	freenfa(v->nfa);
 	v->nfa = NULL;
@@ -412,13 +371,8 @@ int flags;
 	CNOERR();
 
 	/* looks okay, package it up */
-	re->re_magic = REMAGIC;
 	re->re_nsub = v->nsubexp;
-	/* re_info is already set */
-	re->re_csize = sizeof(chr);
-	re->re_guts = (VOID *)g;
-	re->re_fns = (VOID *)&functions;
-	v->re = NULL;
+	v->re = NULL;			/* freev no longer frees re */
 	g->magic = GUTSMAGIC;
 	g->cflags = v->cflags;
 	g->info = re->re_info;
@@ -428,7 +382,7 @@ int flags;
 	g->tree = v->tree;
 	v->tree = NULL;
 	g->ntree = v->ntree;
-	g->compare = (v->cflags&REG_ICASE) ? sncmp : wcsncmp;
+	g->compare = (v->cflags&REG_ICASE) ? casecmp : cmp;
 	g->lacons = v->lacons;
 	v->lacons = NULL;
 	g->nlacons = v->nlacons;
@@ -453,16 +407,15 @@ int wanted;			/* want enough room for this one */
 	struct subre **p;
 	size_t n;
 
-	assert((size_t)wanted >= v->nsubs);
+	assert(wanted > 0 && (size_t)wanted >= v->nsubs);
 	n = (size_t)wanted * 3 / 2 + 1;
 	if (v->subs == v->sub10) {
-		p = (struct subre **)ckalloc(n * sizeof(struct subre *));
+		p = (struct subre **)MALLOC(n * sizeof(struct subre *));
 		if (p != NULL)
-			memcpy((VOID *)p, (VOID *)v->subs,
+			memcpy(VS(p), VS(v->subs),
 					v->nsubs * sizeof(struct subre *));
 	} else
-		p = (struct subre **) ckrealloc((VOID *)v->subs,
-			n * sizeof(struct subre *));
+		p = REALLOC(v->subs, n * sizeof(struct subre *));
 	if (p == NULL) {
 		ERR(REG_ESPACE);
 		return;
@@ -476,8 +429,8 @@ int wanted;			/* want enough room for this one */
 
 /*
  - freev - free vars struct's substructures where necessary
- * Does optional error-number setting, and returns error code, to make
- * error code terser.
+ * Optionally does error-number setting, and always returns error code
+ * (if any), to make error-handling code terser.
  ^ static int freev(struct vars *, int);
  */
 static int
@@ -488,20 +441,22 @@ int err;
 	if (v->re != NULL)
 		rfree(v->re);
 	if (v->subs != v->sub10)
-		ckfree((char *)v->subs);
+		FREE(v->subs);
 	if (v->nfa != NULL)
 		freenfa(v->nfa);
 	if (v->cm != NULL)
 		freecm(v->cm);
 	if (v->tree != NULL)
-		freert(v->tree);
+		freert(v, v->tree);
+	if (v->treechain != NULL)
+		cleanrt(v);
 	if (v->cv != NULL)
 		freecvec(v->cv);
-	if (v->ces != NULL)
-		freecvec(v->ces);
+	if (v->mcces != NULL)
+		freecvec(v->mcces);
 	if (v->lacons != NULL)
 		freelacons(v->lacons, v->nlacons);
-	ERR(err);
+	ERR(err);			/* nop if err==0 */
 
 	return v->err;
 }
@@ -510,6 +465,9 @@ int err;
  - parse - parse an RE
  * Arguably this is too big and too complex and ought to be divided up.
  * However, the code is somewhat intertwined...
+ *
+ * Note that it is no longer necessary to be rigorous about freeing tree
+ * nodes on error exits, as the tree machinery keeps track of them.
  ^ static struct rtree *parse(struct vars *, int, int, struct state *,
  ^ 	struct state *, int);
  */
@@ -531,7 +489,6 @@ int pprefer;			/* parent's short/long preference */
 #	define	ARCV(t, val)	newarc(v->nfa, t, val, lp, rp)
 	int m, n;
 	int emptybranch;	/* is there anything in this branch yet? */
-	color co;
 	struct rtree *branches;	/* top level */
 	struct rtree *branch;	/* current branch */
 	struct subre *now;	/* current subtree's top */
@@ -545,11 +502,10 @@ int pprefer;			/* parent's short/long preference */
 
 	assert(stopper == ')' || stopper == EOS);
 
-        branch = NULL;		/* lint. */
-	rt1 = NULL;		/* lint. */
-	
 	capture = 0;
 	branches = newrt(v);
+	branch = branches;
+	rt1 = NULL;		/* shut up lint */
 	firstbranch = 1;
 	NOERRN();
 	do {
@@ -557,27 +513,17 @@ int pprefer;			/* parent's short/long preference */
 		emptybranch = 1;	/* tentatively */
 		left = newstate(v->nfa);
 		right = newstate(v->nfa);
-		if (!firstbranch)
+		NOERRN();
+		if (!firstbranch) {
 			rt1 = newrt(v);
-#if 1
-		if (ISERR()) {
-		    freert(rt1);
-		    freert(branches);	/* mem leak (CCS). */
-		    return NULL;
+			NOERRN();
+			branch->next = rt1;
+			branch = rt1;
 		}
-#else 
-		NOERRN();
-#endif
 		EMPTYARC(init, left);
 		EMPTYARC(right, final);
 		lp = left;
 		rp = right;
-		if (firstbranch)
-			branch = branches;
-		else {
-			branch->next = rt1;
-			branch = rt1;
-		}
 		branch->op = '|';
 		now = &branch->left;
 		*now = subre(left, right, NONEYET, 0, (struct rtree *)NULL);
@@ -609,7 +555,7 @@ int pprefer;			/* parent's short/long preference */
 					sub.subno = v->nsubexp;
 					if ((size_t)sub.subno >= v->nsubs)
 						moresubs(v, sub.subno);
-					assert((size_t) sub.subno < v->nsubs);
+					assert((size_t)sub.subno < v->nsubs);
 				} else
 					sub.subno = 0;
 				NEXT();
@@ -661,7 +607,7 @@ int pprefer;			/* parent's short/long preference */
 				assert(SEE(')') || ISERR());
 				NEXT();
 				m = newlacon(v, s, s2, m);
-				freert(rt1);
+				freert(v, rt1);
 				NOERRN();
 				ARCV(LACON, m);
 				constraint = 1;
@@ -696,10 +642,10 @@ int pprefer;			/* parent's short/long preference */
 				NEXT();
 				break;
 			case '.':
-				co = (color) ((v->cflags&REG_NLSTOP) 
-					? nlcolor(v) 
-					: COLORLESS);
-				rainbow(v->nfa, v->cm, PLAIN, co, lp, rp);
+				rainbow(v->nfa, v->cm, PLAIN,
+					(v->cflags&REG_NLSTOP) ?
+							nlcolor(v) : COLORLESS,
+					lp, rp);
 				NEXT();
 				break;
 			case '^':
@@ -804,13 +750,19 @@ int pprefer;			/* parent's short/long preference */
 				constraint = 1;
 				break;
 			case ')':		/* unbalanced paren */
+#ifdef POSIX_MISTAKE
 				if (!(v->cflags&REG_EXTENDED) ||
 							(v->cflags&REG_ADVF)) {
-				    ERR(REG_EPAREN);
-				    goto end;
+					ERR(REG_EPAREN);
+					return NULL;
 				}
 				NOTE(REG_UPBOTCH);
 				/* fallthrough into case PLAIN */
+#else
+				ERR(REG_EPAREN);
+				return NULL;
+				break;
+#endif
 			case PLAIN:
 				onechr(v, v->nextvalue, lp, rp);
 				okcolors(v->nfa, v->cm);
@@ -822,10 +774,12 @@ int pprefer;			/* parent's short/long preference */
 			case '?':
 			case '{':
 				ERR(REG_BADRPT);
-				goto end;
+				return NULL;
+				break;
 			default:
 				ERR(REG_ASSERT);
-				goto end;
+				return NULL;
+				break;
 			}
 
 			/* ...possibly followed by a quantifier */
@@ -858,13 +812,13 @@ int pprefer;			/* parent's short/long preference */
 						n = INFINITY;
 					if (m > n) {
 						ERR(REG_BADBR);
-						goto end;
+						return NULL;
 					}
 				} else
 					n = m;
 				if (!SEE('}')) {	/* gets errors too */
 					ERR(REG_BADBR);
-					goto end;
+					return NULL;
 				}
 				if (m != n)
 					sub.prefer = (v->nextvalue) ? LONGER :
@@ -880,19 +834,19 @@ int pprefer;			/* parent's short/long preference */
 			/* constraints may not be quantified */
 			if (constraint) {
 				ERR(REG_BADRPT);
-				goto end;
+				return NULL;
 			}
 
 			/* annoying special case:  {0,0} cancels everything */
 			if (m == 0 && n == 0 && sub.begin != NULL) {
-				freert(now->tree);
+				freert(v, now->tree);
 				now->tree = NULL;
 				sub.begin = NULL;	/* no substructure */
 				sub.prefer = NONEYET;
 				/* the repeat() below will do the rest */
 			}
 
-			/* if no substructure, aVOID hard part */
+			/* if no substructure, avoid hard part */
 			if (now->prefer == NONEYET)
 				now->prefer = sub.prefer;
 			if (sub.begin == NULL && (sub.prefer == NONEYET ||
@@ -983,8 +937,8 @@ int pprefer;			/* parent's short/long preference */
 				t->tree = rt1;
 				rt1->op = 'b';
 				rt1->left.subno = sub.subno;
-				rt1->left.min = (short) m;
-				rt1->left.max = (short) n;
+				rt1->left.min = (short)m;
+				rt1->left.max = (short)n;
 				rt1->left.prefer = sub.prefer;
 				continue;		/* NOTE CONTINUE */
 			}
@@ -1036,14 +990,13 @@ int pprefer;			/* parent's short/long preference */
 			branch->op = ',';
 		else {
 			branches = branch->left.tree;	/* might be NULL */
-			freertnode(branch);
+			freertnode(v, branch);
 		}
 	}
 
 	if (capture)			/* actually a catchall flag */
 		return branches;
-	end:				/* mem leak (CCS) */
-	freert(branches);
+	freert(v, branches);
 	return NULL;
 }
 
@@ -1197,7 +1150,7 @@ struct state *rp;
 	struct state *s;
 	struct arc *a;			/* arc from lp */
 	struct arc *ba;			/* arc from left, from bracket() */
-	struct arc *pa;			/* CE-prototype arc */
+	struct arc *pa;			/* MCCE-prototype arc */
 	color co;
 	chr *p;
 	int i;
@@ -1213,16 +1166,16 @@ struct state *rp;
 	/* easy part of complementing */
 	colorcomplement(v->nfa, v->cm, PLAIN, left, lp, rp);
 	NOERR();
-	if (v->ces == NULL) {		/* no CEs -- we're done */
+	if (v->mcces == NULL) {		/* no MCCEs -- we're done */
 		dropstate(v->nfa, left);
 		assert(right->nins == 0);
 		freestate(v->nfa, right);
 		return;
 	}
 
-	/* but complementing gets messy in the presence of CEs... */
+	/* but complementing gets messy in the presence of MCCEs... */
 	NOTE(REG_ULOCALE);
-	for (p = v->ces->chrs, i = v->ces->nchrs; i > 0; p++, i--) {
+	for (p = v->mcces->chrs, i = v->mcces->nchrs; i > 0; p++, i--) {
 		co = getcolor(v->cm, *p);
 		a = findarc(lp, PLAIN, co);
 		ba = findarc(left, PLAIN, co);
@@ -1236,7 +1189,7 @@ struct state *rp;
 		NOERR();
 		newarc(v->nfa, PLAIN, co, lp, s);
 		NOERR();
-		pa = findarc(v->cepbegin, PLAIN, co);
+		pa = findarc(v->mccepbegin, PLAIN, co);
 		assert(pa != NULL);
 		if (ba == NULL) {	/* easy case, need all of them */
 			cloneouts(v->nfa, pa->to, s, rp, PLAIN);
@@ -1288,10 +1241,11 @@ struct state *rp;
 	case RANGE:			/* a-b-c or other botch */
 		ERR(REG_ERANGE);
 		return;
+		break;
 	case PLAIN:
 		c[0] = v->nextvalue;
 		NEXT();
-		/* shortcut for ordinary chr (not range, not CE leader) */
+		/* shortcut for ordinary chr (not range, not MCCE leader) */
 		if (!SEE(RANGE) && !ISCELEADER(v, c[0])) {
 			onechr(v, c[0], lp, rp);
 			return;
@@ -1318,6 +1272,7 @@ struct state *rp;
 		NOERR();
 		dovec(v, cv, lp, rp);
 		return;
+		break;
 	case CCLASS:
 		startp = v->now;
 		endp = scanplain(v);
@@ -1327,9 +1282,11 @@ struct state *rp;
 		NOERR();
 		dovec(v, cv, lp, rp);
 		return;
+		break;
 	default:
 		ERR(REG_ASSERT);
 		return;
+		break;
 	}
 
 	if (SEE(RANGE)) {
@@ -1353,6 +1310,7 @@ struct state *rp;
 		default:
 			ERR(REG_ERANGE);
 			return;
+			break;
 		}
 	} else
 		endc = startc;
@@ -1407,35 +1365,35 @@ leaders(v, cv)
 struct vars *v;
 struct cvec *cv;
 {
-	int ce;
+	int mcce;
 	chr *p;
 	chr leader;
 	struct state *s;
 	struct arc *a;
 
-	v->cepbegin = newstate(v->nfa);
-	v->cepend = newstate(v->nfa);
+	v->mccepbegin = newstate(v->nfa);
+	v->mccepend = newstate(v->nfa);
 	NOERR();
 
-	for (ce = 0; ce < cv->nces; ce++) {
-		p = cv->ces[ce];
+	for (mcce = 0; mcce < cv->nmcces; mcce++) {
+		p = cv->mcces[mcce];
 		leader = *p;
 		if (!haschr(cv, leader)) {
 			addchr(cv, leader);
 			s = newstate(v->nfa);
 			newarc(v->nfa, PLAIN, subcolor(v->cm, leader),
-								v->cepbegin, s);
+							v->mccepbegin, s);
 			okcolors(v->nfa, v->cm);
 		} else {
-			a = findarc(v->cepbegin, PLAIN,
+			a = findarc(v->mccepbegin, PLAIN,
 						getcolor(v->cm, leader));
 			assert(a != NULL);
 			s = a->to;
-			assert(s != v->cepend);
+			assert(s != v->mccepend);
 		}
 		p++;
-		assert(*p != 0 && *(p+1) == 0);	/* only 2-char CEs at present */
-		newarc(v->nfa, PLAIN, subcolor(v->cm, *p), s, v->cepend);
+		assert(*p != 0 && *(p+1) == 0);	/* only 2-char MCCEs for now */
+		newarc(v->nfa, PLAIN, subcolor(v->cm, *p), s, v->mccepend);
 		okcolors(v->nfa, v->cm);
 	}
 }
@@ -1463,7 +1421,7 @@ struct state *rp;
 
 /*
  - dovec - fill in arcs for each element of a cvec
- * This one has to handle the messy cases, like CEs and CE leaders.
+ * This one has to handle the messy cases, like MCCEs and MCCE leaders.
  ^ static VOID dovec(struct vars *, struct cvec *, struct state *,
  ^ 	struct state *);
  */
@@ -1493,11 +1451,11 @@ struct state *rp;
 			assert(singleton(v->cm, *p));
 			*np++ = *p;
 		}
-	cv->nchrs = np - cv->chrs;	/* only CE leaders remain */
-	if (cv->nchrs == 0 && cv->nces == 0)
+	cv->nchrs = np - cv->chrs;	/* only MCCE leaders remain */
+	if (cv->nchrs == 0 && cv->nmcces == 0)
 		return;
 
-	/* deal with the CE leaders */
+	/* deal with the MCCE leaders */
 	NOTE(REG_ULOCALE);
 	for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) {
 		co = getcolor(v->cm, *p);
@@ -1510,7 +1468,7 @@ struct state *rp;
 			newarc(v->nfa, PLAIN, co, lp, s);
 			NOERR();
 		}
-		pa = findarc(v->cepbegin, PLAIN, co);
+		pa = findarc(v->mccepbegin, PLAIN, co);
 		assert(pa != NULL);
 		ps = pa->to;
 		newarc(v->nfa, '$', 1, s, rp);
@@ -1519,9 +1477,9 @@ struct state *rp;
 		NOERR();
 	}
 
-	/* and the CEs */
-	for (i = 0; i < cv->nces; i++) {
-		p = cv->ces[i];
+	/* and the MCCEs */
+	for (i = 0; i < cv->nmcces; i++) {
+		p = cv->mcces[i];
 		assert(singleton(v->cm, *p));
 		co = getcolor(v->cm, *p++);
 		a = findarc(lp, PLAIN, co);
@@ -1587,7 +1545,7 @@ struct vars *v;
 	NEXT();
 	assert(v->savenow != NULL && SEE('['));
 	bracket(v, left, right);
-	assert(((v->savenow != NULL) && SEE(']')) || ISERR());
+	assert((v->savenow != NULL && SEE(']')) || ISERR());
 	NEXT();
 	NOERR();
 	v->wordchrs = left;
@@ -1626,14 +1584,23 @@ static struct rtree *
 newrt(v)
 struct vars *v;
 {
-	struct rtree *rt = (struct rtree *)ckalloc(sizeof(struct rtree));
-
-	if (rt == NULL) {
-		ERR(REG_ESPACE);
-		return NULL;
+	struct rtree *rt;
+
+	rt = v->treefree;
+	if (rt != NULL)
+		v->treefree = rt->next;
+	else {
+		rt = (struct rtree *)MALLOC(sizeof(struct rtree));
+		if (rt == NULL) {
+			ERR(REG_ESPACE);
+			return NULL;
+		}
+		rt->chain = v->treechain;
+		v->treechain = rt;
 	}
 
 	rt->op = '?';		/* invalid */
+	rt->flags = 0;
 	rt->no = 0;
 	rt->left.begin = NULL;
 	rt->left.end = NULL;
@@ -1650,36 +1617,39 @@ struct vars *v;
 	rt->right.tree = NULL;
 	ZAPCNFA(rt->right.cnfa);
 	rt->next = NULL;
+
 	return rt;
 }
 
 /*
  - freert - free a subRE subtree
- ^ static VOID freert(struct rtree *);
+ ^ static VOID freert(struct vars *, struct rtree *);
  */
 static VOID
-freert(rt)
+freert(v, rt)
+struct vars *v;			/* might be NULL */
 struct rtree *rt;
 {
 	if (rt == NULL)
 		return;
 
 	if (rt->left.tree != NULL)
-		freert(rt->left.tree);
+		freert(v, rt->left.tree);
 	if (rt->right.tree != NULL)
-		freert(rt->right.tree);
+		freert(v, rt->right.tree);
 	if (rt->next != NULL)
-		freert(rt->next);
+		freert(v, rt->next);
 
-	freertnode(rt);
+	freertnode(v, rt);
 }
 
 /*
  - freertnode - free one node in a subRE subtree
- ^ static VOID freertnode(struct rtree *);
+ ^ static VOID freertnode(struct vars *, struct rtree *);
  */
 static VOID
-freertnode(rt)
+freertnode(v, rt)
+struct vars *v;			/* might be NULL */
 struct rtree *rt;
 {
 	if (rt == NULL)
@@ -1689,8 +1659,13 @@ struct rtree *rt;
 		freecnfa(&rt->left.cnfa, 0);
 	if (!NULLCNFA(rt->right.cnfa))
 		freecnfa(&rt->right.cnfa, 0);
+	rt->flags = 0;
 
-	ckfree((char *)rt);
+	if (v != NULL) {
+		rt->next = v->treefree;
+		v->treefree = rt;
+	} else
+		FREE(rt);
 }
 
 /*
@@ -1721,7 +1696,7 @@ struct rtree *rt;
 			subno = rt->left.subno;
 			rt->left = t->left;
 			assert(NULLCNFA(t->left.cnfa));
-			freertnode(t);
+			freertnode(v, t);
 			if (subno != 0) {
 				assert(rt->left.subno == 0 && subno > 0);
 				rt->left.subno = subno;
@@ -1739,7 +1714,7 @@ struct rtree *rt;
 			subno = rt->right.subno;
 			rt->right = t->left;
 			assert(NULLCNFA(t->right.cnfa));
-			freertnode(t);
+			freertnode(v, t);
 			if (subno != 0) {
 				assert(rt->right.subno == 0 && subno > 0);
 				rt->right.subno = subno;
@@ -1800,7 +1775,7 @@ int start;			/* starting point for subtree numbers */
 	assert(rt != NULL);
 
 	i = start;
-	rt->no = (short) i++;
+	rt->no = (short)i++;
 	if (rt->left.tree != NULL)
 		i = numrt(rt->left.tree, i);
 	if (rt->right.tree != NULL)
@@ -1811,54 +1786,95 @@ int start;			/* starting point for subtree numbers */
 }
 
 /*
+ - markrt - mark tree nodes as INUSE
+ ^ static VOID markrt(struct rtree *);
+ */
+static VOID
+markrt(rt)
+struct rtree *rt;
+{
+	assert(rt != NULL);
+
+	rt->flags |= INUSE;
+	if (rt->left.tree != NULL)
+		markrt(rt->left.tree);
+	if (rt->right.tree != NULL)
+		markrt(rt->right.tree);
+	if (rt->next != NULL)
+		markrt(rt->next);
+}
+
+/*
+ - cleanrt - free any tree nodes not marked INUSE
+ ^ static VOID cleanrt(struct vars *);
+ */
+static VOID
+cleanrt(v)
+struct vars *v;
+{
+	struct rtree *rt;
+	struct rtree *next;
+
+	for (rt = v->treechain; rt != NULL; rt = next) {
+		next = rt->next;
+		if (!(rt->flags&INUSE))
+			FREE(rt);
+	}
+	v->treechain = NULL;
+	v->treefree = NULL;		/* just on general principles */
+}
+
+/*
  - nfatree - turn a subRE subtree into a tree of compacted NFAs
- ^ static VOID nfatree(struct vars *, struct rtree *);
+ ^ static VOID nfatree(struct vars *, struct rtree *, FILE *);
  */
 static VOID
-nfatree(v, rt)
+nfatree(v, rt, f)
 struct vars *v;
 struct rtree *rt;
+FILE *f;			/* for debug output */
 {
 	if (rt == NULL)
 		return;
 
 	if (rt->left.begin != NULL)
-		nfanode(v, &rt->left);
+		nfanode(v, &rt->left, f);
 	if (rt->left.tree != NULL)
-		nfatree(v, rt->left.tree);
+		nfatree(v, rt->left.tree, f);
 
 	if (rt->right.begin != NULL)
-		nfanode(v, &rt->right);
+		nfanode(v, &rt->right, f);
 	if (rt->right.tree != NULL)
-		nfatree(v, rt->right.tree);
+		nfatree(v, rt->right.tree, f);
 
 	if (rt->next != NULL)
-		nfatree(v, rt->next);
+		nfatree(v, rt->next, f);
 }
 
 /*
  - nfanode - do one NFA for nfatree
- ^ static VOID nfanode(struct vars *, struct subre *);
+ ^ static VOID nfanode(struct vars *, struct subre *, FILE *);
  */
 static VOID
-nfanode(v, sub)
+nfanode(v, sub, f)
 struct vars *v;
 struct subre *sub;
+FILE *f;			/* for debug output */
 {
 	struct nfa *nfa;
 
 	if (sub->begin == NULL)
 		return;
 
-	nfa = newnfa(v, v->nfa);
+	nfa = newnfa(v, v->cm, v->nfa);
 	NOERR();
 	dupnfa(nfa, sub->begin, sub->end, nfa->init, nfa->final);
 	if (!ISERR()) {
 		specialcolors(nfa);
-		optimize(nfa);
+		(DISCARD) optimize(nfa, f);
 	}
 	if (!ISERR())
-		compact(v, nfa, &sub->cnfa);
+		compact(nfa, &sub->cnfa);
 	freenfa(nfa);
 }
 
@@ -1877,11 +1893,11 @@ int pos;
 	struct subre *sub;
 
 	if (v->nlacons == 0) {
-		v->lacons = (struct subre *)ckalloc(2 * sizeof(struct subre));
+		v->lacons = (struct subre *)MALLOC(2 * sizeof(struct subre));
 		n = 1;		/* skip 0th */
 		v->nlacons = 2;
 	} else {
-		v->lacons = (struct subre *)ckrealloc((VOID *) v->lacons,
+		v->lacons = (struct subre *)REALLOC(v->lacons,
 					(v->nlacons+1)*sizeof(struct subre));
 		n = v->nlacons++;
 	}
@@ -1909,10 +1925,11 @@ int n;
 	struct subre *sub;
 	int i;
 
+	assert(n > 0);
 	for (sub = subs + 1, i = n - 1; i > 0; sub++, i--)
 		if (!NULLCNFA(sub->cnfa))
 			freecnfa(&sub->cnfa, 0);
-	ckfree((char *)subs);
+	FREE(subs);
 }
 
 /*
@@ -1921,11 +1938,15 @@ int n;
  */
 static VOID
 rfree(re)
-regex_t *re;			/* regfree has validated it */
+regex_t *re;
 {
-	struct guts *g = (struct guts *)re->re_guts;
+	struct guts *g;
 
-	re->re_magic = 0;	/* invalidate it */
+	if (re == NULL || re->re_magic != REMAGIC)
+		return;
+
+	re->re_magic = 0;	/* invalidate RE */
+	g = (struct guts *)re->re_guts;
 	re->re_guts = NULL;
 	re->re_fns = NULL;
 	g->magic = 0;
@@ -1934,10 +1955,50 @@ regex_t *re;			/* regfree has validated it */
 	if (g->cm != NULL)
 		freecm(g->cm);
 	if (g->tree != NULL)
-		freert(g->tree);
+		freert((struct vars *)NULL, g->tree);
 	if (g->lacons != NULL)
 		freelacons(g->lacons, g->nlacons);
-	ckfree((char *)g);
+	FREE(g);
+}
+
+/*
+ - dump - dump an RE in human-readable form
+ ^ static VOID dump(regex_t *, FILE *);
+ */
+static VOID
+dump(re, f)
+regex_t *re;
+FILE *f;
+{
+#ifdef REG_DEBUG
+	struct guts *g;
+	int i;
+
+	if (re->re_magic != REMAGIC)
+		fprintf(f, "bad magic number (0x%x not 0x%x)\n", re->re_magic,
+								REMAGIC);
+	if (re->re_guts == NULL) {
+		fprintf(f, "NULL guts!!!\n");
+		return;
+	}
+	g = (struct guts *)re->re_guts;
+	if (g->magic != GUTSMAGIC)
+		fprintf(f, "bad guts magic number (0x%x not 0x%x)\n", g->magic,
+								GUTSMAGIC);
+
+	fprintf(f, "nsub %d, info 0%o, csize %d, ntree %d, usedshort %d\n", 
+		re->re_nsub, re->re_info, re->re_csize, g->ntree,
+		g->usedshorter);
+
+	dumpcolors(g->cm, f);
+	dumpcnfa(&g->cnfa, f);
+	for (i = 1; i < g->nlacons; i++) {
+		fprintf(f, "la%d (%s):\n", i,
+				(g->lacons[i].subno) ? "positive" : "negative");
+		dumpcnfa(&g->lacons[i].cnfa, f);
+	}
+	dumprt(g->tree, f, 0);
+#endif
 }
 
 /*
@@ -2068,22 +2129,9 @@ int level;
 	}
 }
 
-/*
- - dump - dump an RE in human-readable form
- ^ static VOID dump(regex_t *, FILE *);
- */
-static VOID
-dump(re, f)
-regex_t *re;
-FILE *f;
-{
-}
-
-#undef NOERRN
-#define	NOERRN()	{if (ISERR()) return NULL;}	/* NOERR with retval */
-
 #define	COMPILE	1
-#include "lex.c"
-#include "color.c"
-#include "locale.c"
-#include "nfa.c"
+#include "regc_lex.c"
+#include "regc_color.c"
+#include "regc_nfa.c"
+#include "regc_cvec.c"
+#include "regc_locale.c"
diff --git a/generic/regcustom.h b/generic/regcustom.h
new file mode 100644
index 0000000..0fda25f
--- /dev/null
+++ b/generic/regcustom.h
@@ -0,0 +1,90 @@
+/* headers (which also pick up the standard ones, or equivalents) */
+#include "tclInt.h"
+#include "tclPort.h"
+
+/* overrides for regguts.h definitions */
+/* function-pointer declarations */
+#define	FUNCPTR(name, args)	(*name) _ANSI_ARGS_(args)
+#define	MALLOC(n)		ckalloc(n)
+#define	FREE(p)			ckfree(VS(p))
+#define	REALLOC(p,n)		ckrealloc(VS(p),n)
+
+
+
+/*
+ * Do not insert extras between the "begin" and "end" lines -- this
+ * chunk is automatically extracted to be fitted into regex.h.
+ */
+/* --- begin --- */
+/* ensure certain things don't sneak in from system headers */
+#ifdef __REG_WIDE_T
+#undef __REG_WIDE_T
+#endif
+#ifdef __REG_WIDE_COMPILE
+#undef __REG_WIDE_COMPILE
+#endif
+#ifdef __REG_WIDE_EXEC
+#undef __REG_WIDE_EXEC
+#endif
+#ifdef __REG_REGOFF_T
+#undef __REG_REGOFF_T
+#endif
+#ifdef __REG_VOID_T
+#undef __REG_VOID_T
+#endif
+#ifdef __REG_CONST
+#undef __REG_CONST
+#endif
+/* interface types */
+#define	__REG_WIDE_T	Tcl_UniChar
+#define	__REG_WIDE_COMPILE	re_ucomp
+#define	__REG_WIDE_EXEC		re_uexec
+#define	__REG_REGOFF_T	long	/* not really right, but good enough... */
+#define	__REG_VOID_T	VOID
+#define	__REG_CONST	CONST
+#ifndef __REG_NOFRONT
+#define	__REG_NOFRONT		/* don't want regcomp() and regexec() */
+#endif
+#ifndef __REG_NOCHAR
+#define	__REG_NOCHAR		/* or the char versions */
+#endif
+/* --- end --- */
+
+
+
+/* internal character type and related */
+typedef Tcl_UniChar chr;	/* the type itself */
+typedef int pchr;		/* what it promotes to */
+typedef unsigned uchr;		/* unsigned type that will hold a chr */
+typedef int celt;		/* type to hold chr, MCCE number, or NOCELT */
+#define	NOCELT	(-1)		/* celt value which is not valid chr or MCCE */
+#define	CHR(c)	(UCHAR(c))	/* turn char literal into chr literal */
+#define	DIGITVAL(c)	((c)-'0')	/* turn chr digit into its value */
+#define	CHRBITS	16		/* bits in a chr; must not use sizeof */
+#define	CHR_MIN	0x0000		/* smallest and largest chr; the value */
+#define	CHR_MAX	0xffff		/*  CHR_MAX-CHR_MIN+1 should fit in uchr */
+
+/* functions operating on chr */
+#define	iscalnum(x)	TclUniCharIsAlnum(x)
+#define	iscalpha(x)	TclUniCharIsAlpha(x)
+#define	iscdigit(x)	TclUniCharIsDigit(x)
+#define	iscspace(x)	TclUniCharIsSpace(x)
+
+/* name the external functions */
+#define	compile		re_ucomp
+#define	exec		re_uexec
+#ifdef notdef
+#define	regfree		re_ufree
+#define	regerror	re_uerror
+#endif
+
+/*
+ * Implement a mistake in the original POSIX.2:  in EREs, and only in EREs
+ * (AREs do not support this botch), an unbalanced right parenthesis is an
+ * ordinary character rather than an error.  This was unintentional, and
+ * will be fixed someday.
+ */
+#define	POSIX_MISTAKE	/* sigh */
+
+/* and pick up the standard header */
+#include "regex.h"
diff --git a/generic/regerror.c b/generic/regerror.c
new file mode 100644
index 0000000..5eb67a7
--- /dev/null
+++ b/generic/regerror.c
@@ -0,0 +1,82 @@
+/*
+ * regerror - error-code expansion
+ */
+
+#include "regguts.h"
+
+/* unknown-error explanation */
+static char unk[] = "*** unknown regex error code 0x%x ***";
+
+/* struct to map among codes, code names, and explanations */
+static struct rerr {
+	int code;
+	char *name;
+	char *explain;
+} rerrs[] = {
+	/* the actual table is built from regex.h */
+#	include "regerrs.h"
+	-1,	"",	"oops",		/* explanation special-cased in code */
+};
+
+/*
+ - regerror - the interface to error numbers
+ */
+/* ARGSUSED */
+size_t				/* actual space needed (including NUL) */
+regerror(errcode, preg, errbuf, errbuf_size)
+int errcode;			/* error code, or REG_ATOI or REG_ITOA */
+const regex_t *preg;		/* associated regex_t (unused at present) */
+char *errbuf;			/* result buffer (unless errbuf_size==0) */
+size_t errbuf_size;		/* available space in errbuf, can be 0 */
+{
+	struct rerr *r;
+	char *msg;
+	char convbuf[sizeof(unk)+50];	/* 50 = plenty for int */
+	size_t len;
+	int icode;
+
+	switch (errcode) {
+	case REG_ATOI:		/* convert name to number */
+		for (r = rerrs; r->code >= 0; r++)
+			if (strcmp(r->name, errbuf) == 0)
+				break;
+		sprintf(convbuf, "%d", r->code);	/* -1 for unknown */
+		msg = convbuf;
+		break;
+	case REG_ITOA:		/* convert number to name */
+		icode = atoi(errbuf);	/* not our problem if this fails */
+		for (r = rerrs; r->code >= 0; r++)
+			if (r->code == icode)
+				break;
+		if (r->code >= 0)
+			msg = r->name;
+		else {			/* unknown; tell him the number */
+			sprintf(convbuf, "REG_%u", (unsigned)icode);
+			msg = convbuf;
+		}
+		break;
+	default:		/* a real, normal error code */
+		for (r = rerrs; r->code >= 0; r++)
+			if (r->code == errcode)
+				break;
+		if (r->code >= 0)
+			msg = r->explain;
+		else {			/* unknown; say so */
+			sprintf(convbuf, unk, errcode);
+			msg = convbuf;
+		}
+		break;
+	}
+
+	len = strlen(msg) + 1;		/* space needed, including NUL */
+	if (errbuf_size > 0) {
+		if (errbuf_size > len)
+			strcpy(errbuf, msg);
+		else {			/* truncate to fit */
+			strncpy(errbuf, msg, errbuf_size-1);
+			errbuf[errbuf_size-1] = '\0';
+		}
+	}
+
+	return len;
+}
diff --git a/generic/regerrs.h b/generic/regerrs.h
new file mode 100644
index 0000000..8298597
--- /dev/null
+++ b/generic/regerrs.h
@@ -0,0 +1,19 @@
+REG_OKAY,	"REG_OKAY",	"no errors detected",
+REG_NOMATCH,	"REG_NOMATCH",	"failed to match",
+REG_BADPAT,	"REG_BADPAT",	"invalid regexp (reg version 0.1)",
+REG_ECOLLATE,	"REG_ECOLLATE",	"invalid collating element",
+REG_ECTYPE,	"REG_ECTYPE",	"invalid character class",
+REG_EESCAPE,	"REG_EESCAPE",	"invalid escape \\ sequence",
+REG_ESUBREG,	"REG_ESUBREG",	"invalid backreference number",
+REG_EBRACK,	"REG_EBRACK",	"brackets [] not balanced",
+REG_EPAREN,	"REG_EPAREN",	"parentheses () not balanced",
+REG_EBRACE,	"REG_EBRACE",	"braces {} not balanced",
+REG_BADBR,	"REG_BADBR",	"invalid repetition count(s)",
+REG_ERANGE,	"REG_ERANGE",	"invalid character range",
+REG_ESPACE,	"REG_ESPACE",	"out of memory",
+REG_BADRPT,	"REG_BADRPT",	"quantifier operand invalid",
+REG_ASSERT,	"REG_ASSERT",	"\"can't happen\" -- you found a bug",
+REG_INVARG,	"REG_INVARG",	"invalid argument to regex function",
+REG_MIXED,	"REG_MIXED",	"character widths of regex and string differ",
+REG_BADOPT,	"REG_BADOPT",	"invalid embedded option",
+REG_IMPOSS,	"REG_IMPOSS",	"can never match",
diff --git a/generic/regex.h b/generic/regex.h
new file mode 100644
index 0000000..6f61dd3
--- /dev/null
+++ b/generic/regex.h
@@ -0,0 +1,299 @@
+#ifndef _REGEX_H_
+#define	_REGEX_H_	/* never again */
+/*
+ * regular expressions
+ *
+ * Prototypes etc. marked with "^" within comments get gathered up (and
+ * possibly edited) by the regfwd program and inserted near the bottom of
+ * this file.
+ *
+ * We offer the option of declaring one wide-character version of the
+ * RE functions as well as the char versions.  To do that, define
+ * __REG_WIDE_T to the type of wide characters (unfortunately, there
+ * is no consensus that wchar_t is suitable) and __REG_WIDE_COMPILE and
+ * __REG_WIDE_EXEC to the names to be used for the compile and execute
+ * functions (suggestion:  re_Xcomp and re_Xexec, where X is a letter
+ * suggestive of the wide type, e.g. re_ucomp and re_uexec for Unicode).
+ * For cranky old compilers, it may be necessary to do something like:
+ * #define	__REG_WIDE_COMPILE(a,b,c,d)	re_Xcomp(a,b,c,d)
+ * #define	__REG_WIDE_EXEC(a,b,c,d,e,f)	re_Xexec(a,b,c,d,e,f)
+ * rather than just #defining the names as parameterless macros.
+ *
+ * For some specialized purposes, it may be desirable to suppress the
+ * declarations of the "front end" functions, regcomp() and regexec(),
+ * or of the char versions of the compile and execute functions.  To
+ * suppress the front-end functions, define __REG_NOFRONT.  To suppress
+ * the char versions, define __REG_NOCHAR.
+ *
+ * The right place to do those defines (and some others you may want, see
+ * below) would be <sys/types.h>.  If you don't have control of that file,
+ * the right place to add your own defines to this file is marked below.
+ * This is normally done automatically, by the makefile and regmkhdr, based
+ * on the contents of regcustom.h.
+ */
+
+
+
+/*
+ * voodoo for C++
+ */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+
+/*
+ * Add your own defines, if needed, here.  The --- stuff is for automatic
+ * generation of this file from regproto.h and regcustom.h.
+ */
+/* --- begin --- */
+/* ensure certain things don't sneak in from system headers */
+#ifdef __REG_WIDE_T
+#undef __REG_WIDE_T
+#endif
+#ifdef __REG_WIDE_COMPILE
+#undef __REG_WIDE_COMPILE
+#endif
+#ifdef __REG_WIDE_EXEC
+#undef __REG_WIDE_EXEC
+#endif
+#ifdef __REG_REGOFF_T
+#undef __REG_REGOFF_T
+#endif
+#ifdef __REG_VOID_T
+#undef __REG_VOID_T
+#endif
+#ifdef __REG_CONST
+#undef __REG_CONST
+#endif
+/* interface types */
+#define	__REG_WIDE_T	Tcl_UniChar
+#define	__REG_WIDE_COMPILE	re_ucomp
+#define	__REG_WIDE_EXEC		re_uexec
+#define	__REG_REGOFF_T	long	/* not really right, but good enough... */
+#define	__REG_VOID_T	VOID
+#define	__REG_CONST	CONST
+#ifndef __REG_NOFRONT
+#define	__REG_NOFRONT		/* don't want regcomp() and regexec() */
+#endif
+#ifndef __REG_NOCHAR
+#define	__REG_NOCHAR		/* or the char versions */
+#endif
+/* --- end --- */
+
+
+/*
+ * interface types etc.
+ */
+
+/*
+ * regoff_t has to be large enough to hold either off_t or ssize_t,
+ * and must be signed; it's only a guess that long is suitable, so we
+ * offer <sys/types.h> an override.
+ */
+#ifdef __REG_REGOFF_T
+typedef __REG_REGOFF_T regoff_t;
+#else
+typedef long regoff_t;
+#endif
+
+/*
+ * For benefit of old compilers, we offer <sys/types.h> the option of
+ * overriding the `void' type used to declare nonexistent return types.
+ */
+#ifdef __REG_VOID_T
+typedef __REG_VOID_T re_void;
+#else
+typedef void re_void;
+#endif
+
+/*
+ * Also for benefit of old compilers, <sys/types.h> can supply a macro
+ * which expands to a substitute for `const'.
+ */
+#ifndef __REG_CONST
+#define	__REG_CONST	const
+#endif
+
+
+
+/*
+ * other interface types
+ */
+
+/* the biggie, a compiled RE (or rather, a front end to same) */
+typedef struct {
+	int re_magic;		/* magic number */
+	size_t re_nsub;		/* number of subexpressions */
+	int re_info;		/* information about RE */
+#		define	REG_UBACKREF		000001
+#		define	REG_ULOOKAHEAD		000002
+#		define	REG_UBOUNDS		000004
+#		define	REG_UBRACES		000010
+#		define	REG_UBSALNUM		000020
+#		define	REG_UPBOTCH		000040
+#		define	REG_UBBS		000100
+#		define	REG_UNONPOSIX		000200
+#		define	REG_UUNSPEC		000400
+#		define	REG_UUNPORT		001000
+#		define	REG_ULOCALE		002000
+#		define	REG_UEMPTYMATCH		004000
+	int re_csize;		/* sizeof(character) */
+	char *re_endp;		/* backward compatibility kludge */
+	/* the rest is opaque pointers to hidden innards */
+	char *re_guts;		/* `char *' is more portable than `void *' */
+	char *re_fns;
+} regex_t;
+
+/* result reporting (may acquire more fields later) */
+typedef struct {
+	regoff_t rm_so;		/* start of substring */
+	regoff_t rm_eo;		/* end of substring */
+} regmatch_t;
+
+/* supplementary control and reporting (placeholder for later work) */
+typedef struct {
+	int rm_dummy;
+} rm_detail_t;
+
+
+
+/*
+ * compilation
+ ^ #ifndef __REG_NOCHAR
+ ^ int re_comp(regex_t *, __REG_CONST char *, size_t, int);
+ ^ #endif
+ ^ #ifndef __REG_NOFRONT
+ ^ int regcomp(regex_t *, __REG_CONST char *, int);
+ ^ #endif
+ ^ #ifdef __REG_WIDE_T
+ ^ int __REG_WIDE_COMPILE(regex_t *, __REG_CONST __REG_WIDE_T *, size_t, int);
+ ^ #endif
+ */
+#define	REG_BASIC	000000	/* BREs (convenience) */
+#define	REG_EXTENDED	000001	/* EREs */
+#define	REG_ADVF	000002	/* advanced features in EREs */
+#define	REG_ADVANCED	000003	/* AREs (which are also EREs) */
+#define	REG_QUOTE	000004	/* no special characters, none */
+#define	REG_NOSPEC	REG_QUOTE	/* historical synonym */
+#define	REG_ICASE	000010	/* ignore case */
+#define	REG_NOSUB	000020	/* don't care about subexpressions */
+#define	REG_EXPANDED	000040	/* expanded format, white space & comments */
+#define	REG_NLSTOP	000100	/* \n doesn't match . or [^ ] */
+#define	REG_NLANCH	000200	/* ^ matches after \n, $ before */
+#define	REG_NEWLINE	000300	/* newlines are line terminators */
+#define	REG_PEND	000400	/* ugh -- backward-compatibility hack */
+#define	REG_DUMP	004000	/* none of your business :-) */
+#define	REG_FAKEEC	010000	/* none of your business :-) */
+#define	REG_PROGRESS	020000	/* none of your business :-) */
+
+
+
+/*
+ * execution
+ ^ #ifndef __REG_NOCHAR
+ ^ int re_exec(regex_t *, __REG_CONST char *, size_t,
+ ^				rm_detail_t *, size_t, regmatch_t [], int);
+ ^ #endif
+ ^ #ifndef __REG_NOFRONT
+ ^ int regexec(regex_t *, __REG_CONST char *, size_t, regmatch_t [], int);
+ ^ #endif
+ ^ #ifdef __REG_WIDE_T
+ ^ int __REG_WIDE_EXEC(regex_t *, __REG_CONST __REG_WIDE_T *, size_t,
+ ^				rm_detail_t *, size_t, regmatch_t [], int);
+ ^ #endif
+ */
+#define	REG_NOTBOL	0001	/* BOS is not BOL */
+#define	REG_NOTEOL	0002	/* EOS is not EOL */
+#define	REG_STARTEND	0004	/* backward compatibility kludge */
+#define	REG_FTRACE	0010	/* none of your business */
+#define	REG_MTRACE	0020	/* none of your business */
+#define	REG_SMALL	0040	/* none of your business */
+
+
+
+/*
+ * misc generics (may be more functions here eventually)
+ ^ re_void regfree(regex_t *);
+ */
+
+
+
+/*
+ * error reporting
+ * Be careful if modifying the list of error codes -- the table used by
+ * regerror() is generated automatically from this file!
+ *
+ * Note that there is no wide-char variant of regerror at this time; what
+ * kind of character is used for error reports is independent of what kind
+ * is used in matching.
+ *
+ ^ extern size_t regerror(int, __REG_CONST regex_t *, char *, size_t);
+ */
+#define	REG_OKAY	 0	/* no errors detected */
+#define	REG_NOMATCH	 1	/* failed to match */
+#define	REG_BADPAT	 2	/* invalid regexp */
+#define	REG_ECOLLATE	 3	/* invalid collating element */
+#define	REG_ECTYPE	 4	/* invalid character class */
+#define	REG_EESCAPE	 5	/* invalid escape \ sequence */
+#define	REG_ESUBREG	 6	/* invalid backreference number */
+#define	REG_EBRACK	 7	/* brackets [] not balanced */
+#define	REG_EPAREN	 8	/* parentheses () not balanced */
+#define	REG_EBRACE	 9	/* braces {} not balanced */
+#define	REG_BADBR	10	/* invalid repetition count(s) */
+#define	REG_ERANGE	11	/* invalid character range */
+#define	REG_ESPACE	12	/* out of memory */
+#define	REG_BADRPT	13	/* quantifier operand invalid */
+#define	REG_ASSERT	15	/* "can't happen" -- you found a bug */
+#define	REG_INVARG	16	/* invalid argument to regex function */
+#define	REG_MIXED	17	/* character widths of regex and string differ */
+#define	REG_BADOPT	18	/* invalid embedded option */
+#define	REG_IMPOSS	19	/* can never match */
+/* two specials for debugging and testing */
+#define	REG_ATOI	101	/* convert error-code name to number */
+#define	REG_ITOA	102	/* convert error-code number to name */
+
+
+
+/*
+ * the prototypes, as possibly munched by regfwd
+ */
+/* =====^!^===== begin forwards =====^!^===== */
+/* automatically gathered by fwd; do not hand-edit */
+/* === regproto.h === */
+#ifndef __REG_NOCHAR
+int re_comp _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, int));
+#endif
+#ifndef __REG_NOFRONT
+int regcomp _ANSI_ARGS_((regex_t *, __REG_CONST char *, int));
+#endif
+#ifdef __REG_WIDE_T
+int __REG_WIDE_COMPILE _ANSI_ARGS_((regex_t *, __REG_CONST __REG_WIDE_T *, size_t, int));
+#endif
+#ifndef __REG_NOCHAR
+int re_exec _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, rm_detail_t *, size_t, regmatch_t [], int));
+#endif
+#ifndef __REG_NOFRONT
+int regexec _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, regmatch_t [], int));
+#endif
+#ifdef __REG_WIDE_T
+int __REG_WIDE_EXEC _ANSI_ARGS_((regex_t *, __REG_CONST __REG_WIDE_T *, size_t, rm_detail_t *, size_t, regmatch_t [], int));
+#endif
+re_void regfree _ANSI_ARGS_((regex_t *));
+extern size_t regerror _ANSI_ARGS_((int, __REG_CONST regex_t *, char *, size_t));
+/* automatically gathered by fwd; do not hand-edit */
+/* =====^!^===== end forwards =====^!^===== */
+
+
+
+/*
+ * more C++ voodoo
+ */
+#ifdef __cplusplus
+}
+#endif
+
+
+
+#endif
diff --git a/generic/exec.c b/generic/regexec.c
index 92439aa..4220062 100644
--- a/generic/exec.c
+++ b/generic/regexec.c
@@ -1,43 +1,9 @@
 /*
- * exec.c --
- *
- *	Regexp package file:  re_*exec and friends - match REs
- *
- * Copyright (c) 1998 Henry Spencer.  All rights reserved.
- * 
- * Development of this software was funded, in part, by Cray Research Inc.,
- * UUNET Communications Services Inc., and Sun Microsystems Inc., none of
- * whom are responsible for the results.  The author thanks all of them.
- * 
- * Redistribution and use in source and binary forms -- with or without
- * modification -- are permitted for any purpose, provided that
- * redistributions in source form retain this entire copyright notice and
- * indicate the origin and nature of any modifications. 
- * 
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
- * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
- * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- * 
- * Copyright (c) 1998 by Sun Microsystems, Inc.
- *
- * See the file "license.terms" for information on usage and redistribution
- * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
- *
- * RCS: @(#) $Id: exec.c,v 1.1.2.2 1998/10/05 17:38:26 stanton Exp $
+ * re_*exec and friends - match REs
  */
 
-#include "tclInt.h"
-#include <assert.h>
-#include "tclRegexp.h"
-#include "chr.h"
-#include "guts.h"
+#include "regguts.h"
+
 
 
 /* internal variables, bundled for easy passing around */
@@ -75,6 +41,7 @@ struct sset {			/* state set */
 	int flags;
 #		define	STARTER		01	/* the initial state set */
 #		define	POSTSTATE	02	/* includes the goal state */
+#		define	LOCKED		04	/* locked in cache */
 	struct arcp ins;	/* chain of inarcs pointing here */
 	chr *lastseen;		/* last entered on arrival here */
 	struct sset **outs;	/* outarc vector indexed by color */
@@ -95,6 +62,7 @@ struct dfa {
 	struct cnfa *cnfa;
 	struct colormap *cm;
 	chr *lastpost;		/* location of last cache-flushed success */
+	struct sset *search;	/* replacement-search-pointer memory */
 };
 
 #define	CACHE	200
@@ -107,8 +75,8 @@ struct dfa {
  */
 /* =====^!^===== begin forwards =====^!^===== */
 /* automatically gathered by fwd; do not hand-edit */
-/* === exec.c === */
-int exec _ANSI_ARGS_((regex_t *, CONST chr *, size_t, size_t, regmatch_t [], int));
+/* === regexec.c === */
+int exec _ANSI_ARGS_((regex_t *, CONST chr *, size_t, rm_detail_t *, size_t, regmatch_t [], int));
 static int find _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *));
 static int cfind _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *));
 static VOID zapmatches _ANSI_ARGS_((regmatch_t *, size_t));
@@ -130,13 +98,10 @@ static struct dfa *newdfa _ANSI_ARGS_((struct vars *, struct cnfa *, struct colo
 static VOID freedfa _ANSI_ARGS_((struct dfa *));
 static unsigned hash _ANSI_ARGS_((unsigned *, int));
 static struct sset *initialize _ANSI_ARGS_((struct vars *, struct dfa *, chr *));
-static struct sset *miss _ANSI_ARGS_((struct vars *, struct dfa *, struct sset *, pcolor, chr *));
+static struct sset *miss _ANSI_ARGS_((struct vars *, struct dfa *, struct sset *, pcolor, chr *, chr *));
 static int lacon _ANSI_ARGS_((struct vars *, struct cnfa *, chr *, pcolor));
-static struct sset *getvacant _ANSI_ARGS_((struct vars *, struct dfa *));
-static struct sset *pickss _ANSI_ARGS_((struct vars *, struct dfa *));
-/* === color.c === */
-union tree;
-static color getcolor _ANSI_ARGS_((struct colormap *, pchr));
+static struct sset *getvacant _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *));
+static struct sset *pickss _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *));
 /* automatically gathered by fwd; do not hand-edit */
 /* =====^!^===== end forwards =====^!^===== */
 
@@ -144,13 +109,15 @@ static color getcolor _ANSI_ARGS_((struct colormap *, pchr));
 
 /*
  - exec - match regular expression
- ^ int exec(regex_t *, CONST chr *, size_t, size_t, regmatch_t [], int);
+ ^ int exec(regex_t *, CONST chr *, size_t, rm_detail_t *,
+ ^					size_t, regmatch_t [], int);
  */
 int
-exec(re, string, len, nmatch, pmatch, flags)
+exec(re, string, len, details, nmatch, pmatch, flags)
 regex_t *re;
 CONST chr *string;
 size_t len;
+rm_detail_t *details;		/* hook for future elaboration */
 size_t nmatch;
 regmatch_t pmatch[];
 int flags;
@@ -177,9 +144,9 @@ int flags;
 	if (v->g->cflags&REG_NOSUB)
 		nmatch = 0;		/* override client */
 	v->nmatch = nmatch;
-	if (complications && v->nmatch < (size_t)(v->g->nsub + 1)) {
+	if (complications && v->nmatch < v->g->nsub + 1) {
 		/* need work area bigger than what user gave us */
-		v->pmatch = (regmatch_t *)ckalloc((v->g->nsub + 1) *
+		v->pmatch = (regmatch_t *)MALLOC((v->g->nsub + 1) *
 							sizeof(regmatch_t));
 		if (v->pmatch == NULL)
 			return REG_ESPACE;
@@ -190,10 +157,10 @@ int flags;
 	v->stop = (chr *)string + len;
 	v->err = 0;
 	if (complications) {
-		v->mem1 = (regoff_t *)ckalloc(2*v->g->ntree*sizeof(regoff_t));
+		v->mem1 = (regoff_t *)MALLOC(2*v->g->ntree*sizeof(regoff_t));
 		if (v->mem1 == NULL) {
 			if (v->pmatch != pmatch)
-				ckfree((char *)v->pmatch);
+				FREE(v->pmatch);
 			return REG_ESPACE;
 		}
 		v->mem2 = v->mem1 + v->g->ntree;
@@ -208,12 +175,12 @@ int flags;
 	if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0) {
 		zapmatches(pmatch, nmatch);
 		n = (nmatch < v->nmatch) ? nmatch : v->nmatch;
-		memcpy((VOID *)pmatch, (VOID *)v->pmatch, n*sizeof(regmatch_t));
+		memcpy(VS(pmatch), VS(v->pmatch), n*sizeof(regmatch_t));
 	}
 	if (v->pmatch != pmatch)
-		ckfree((char *)v->pmatch);
+		FREE(v->pmatch);
 	if (v->mem1 != NULL)
-		ckfree((char *)v->mem1);
+		FREE(v->mem1);
 	return st;
 }
 
@@ -230,15 +197,14 @@ struct colormap *cm;
 	struct dfa *d = newdfa(v, cnfa, cm);
 	chr *begin;
 	chr *end;
- 	chr *stop = (cnfa->leftanch) ? v->start : v->stop;
+	chr *stop = (cnfa->flags&LEFTANCH) ? v->start : v->stop;
 
 	if (d == NULL)
 		return v->err;
 
 	for (begin = v->start; begin <= stop; begin++) {
-		if (v->eflags&REG_MTRACE)
-			printf("\ntrying at %ld\n", (long)OFF(begin));
- 		end = longest(v, d, begin, v->stop);
+		MDEBUG(("\ntrying at %ld\n", (long)OFF(begin)));
+		end = longest(v, d, begin, v->stop);
 		if (end != NULL) {
 			if (v->nmatch > 0) {
 				v->pmatch[0].rm_so = OFF(begin);
@@ -249,11 +215,15 @@ struct colormap *cm;
 				zapmatches(v->pmatch, v->nmatch);
 				return dissect(v, v->g->tree, begin, end);
 			}
+			if (ISERR())
+				return v->err;
 			return REG_OKAY;
 		}
 	}
 
 	freedfa(d);
+	if (ISERR())
+		return v->err;
 	return REG_NOMATCH;
 }
 
@@ -270,7 +240,7 @@ struct colormap *cm;
 	struct dfa *d = newdfa(v, cnfa, cm);
 	chr *begin;
 	chr *end;
- 	chr *stop = (cnfa->leftanch) ? v->start : v->stop;
+	chr *stop = (cnfa->flags&LEFTANCH) ? v->start : v->stop;
 	chr *estop;
 	int er;
 	int usedis = (v->g->tree == NULL || v->g->tree->op == '|') ? 0 : 1;
@@ -281,12 +251,11 @@ struct colormap *cm;
 	if (!v->g->usedshorter)
 		usedis = 0;
 	for (begin = v->start; begin <= stop; begin++) {
-		if (v->eflags&REG_MTRACE)
-			printf("\ntrying at %ld\n", (long)OFF(begin));
+		MDEBUG(("\ntrying at %ld\n", (long)OFF(begin)));
 		if (usedis) {
 			v->mem = v->mem1;
 			zapmem(v, v->g->tree);
- 		}
+		}
 		estop = v->stop;
 		for (;;) {
 			if (usedis) {
@@ -296,8 +265,7 @@ struct colormap *cm;
 				end = longest(v, d, begin, estop);
 			if (end == NULL)
 				break;		/* NOTE BREAK OUT */
-			if (v->eflags&REG_MTRACE)
-				printf("tentative end %ld\n", (long)OFF(end));
+			MDEBUG(("tentative end %ld\n", (long)OFF(end)));
 			zapmatches(v->pmatch, v->nmatch);
 			v->mem = v->mem2;
 			zapmem(v, v->g->tree);
@@ -309,7 +277,10 @@ struct colormap *cm;
 					v->pmatch[0].rm_eo = OFF(end);
 				}
 				freedfa(d);
+				if (ISERR())
+					return v->err;
 				return REG_OKAY;
+				break;
 			case REG_NOMATCH:
 				/* go around and try again */
 				if (!usedis) {
@@ -324,11 +295,14 @@ struct colormap *cm;
 			default:
 				freedfa(d);
 				return er;
+				break;
 			}
 		}
 	}
 
 	freedfa(d);
+	if (ISERR())
+		return v->err;
 	return REG_NOMATCH;
 }
 
@@ -343,7 +317,7 @@ size_t n;
 {
 	size_t i;
 
-	for (i = 1; i < n; i++) {
+	for (i = n-1; i > 0; i--) {
 		p[i].rm_so = -1;
 		p[i].rm_eo = -1;
 	}
@@ -399,8 +373,7 @@ chr *end;
 	if ((size_t)n >= v->nmatch)
 		return;
 
-	if (v->eflags&REG_MTRACE)
-		printf("setting %d\n", n);
+	MDEBUG(("setting %d\n", n));
 	v->pmatch[n].rm_so = OFF(begin);
 	v->pmatch[n].rm_eo = OFF(end);
 }
@@ -423,8 +396,7 @@ chr *end;			/* end of same */
 
 	if (rt == NULL)
 		return REG_OKAY;
-	if (v->eflags&REG_MTRACE)
-		printf("substring %ld-%ld\n", (long)OFF(begin), (long)OFF(end));
+	MDEBUG(("substring %ld-%ld\n", (long)OFF(begin), (long)OFF(end)));
 
 	/* alternatives -- punt to auxiliary */
 	if (rt->op == '|')
@@ -439,8 +411,7 @@ chr *end;			/* end of same */
 
 	/* in some cases, there may be no right side... */
 	if (rt->right.cnfa.nstates == 0) {
-		if (v->eflags&REG_MTRACE)
-			printf("singleton\n");
+		MDEBUG(("singleton\n"));
 		if (longest(v, d, begin, end) != end) {
 			freedfa(d);
 			return REG_ASSERT;
@@ -466,16 +437,14 @@ chr *end;			/* end of same */
 		freedfa(d2);
 		return REG_ASSERT;
 	}
-	if (v->eflags&REG_MTRACE)
-		printf("tentative midpoint %ld\n", (long)OFF(mid));
+	MDEBUG(("tentative midpoint %ld\n", (long)OFF(mid)));
 
 	/* iterate until satisfaction or failure */
 	while (longest(v, d2, mid, end) != end) {
 		/* that midpoint didn't work, find a new one */
 		if (mid == begin) {
 			/* all possibilities exhausted! */
-			if (v->eflags&REG_MTRACE)
-				printf("no midpoint!\n");
+			MDEBUG(("no midpoint!\n"));
 			freedfa(d);
 			freedfa(d2);
 			return REG_ASSERT;
@@ -483,19 +452,16 @@ chr *end;			/* end of same */
 		mid = longest(v, d, begin, mid-1);
 		if (mid == NULL) {
 			/* failed to find a new one! */
-			if (v->eflags&REG_MTRACE)
-				printf("failed midpoint!\n");
+			MDEBUG(("failed midpoint!\n"));
 			freedfa(d);
 			freedfa(d2);
 			return REG_ASSERT;
 		}
-		if (v->eflags&REG_MTRACE)
-			printf("new midpoint %ld\n", (long)OFF(mid));
+		MDEBUG(("new midpoint %ld\n", (long)OFF(mid)));
 	}
 
 	/* satisfaction */
-	if (v->eflags&REG_MTRACE)
-		printf("successful\n");
+	MDEBUG(("successful\n"));
 	freedfa(d);
 	freedfa(d2);
 	assert(rt->left.subno >= 0);
@@ -526,15 +492,13 @@ chr *end;			/* end of same */
 	assert(rt->op == '|');
 
 	for (i = 0; rt != NULL; rt = rt->next, i++) {
-		if (v->eflags&REG_MTRACE)
-			printf("trying %dth\n", i);
+		MDEBUG(("trying %dth\n", i));
 		assert(rt->left.begin != NULL);
 		d = newdfa(v, &rt->left.cnfa, v->g->cm);
 		if (ISERR())
 			return v->err;
 		if (longest(v, d, begin, end) == end) {
-			if (v->eflags&REG_MTRACE)
-				printf("success\n");
+			MDEBUG(("success\n"));
 			freedfa(d);
 			assert(rt->left.subno >= 0);
 			subset(v, &rt->left, begin, end);
@@ -565,8 +529,7 @@ chr *end;			/* end of same */
 
 	if (rt == NULL)
 		return REG_OKAY;
-	if (v->eflags&REG_MTRACE)
-		printf("csubstr %ld-%ld\n", (long)OFF(begin), (long)OFF(end));
+	MDEBUG(("csubstr %ld-%ld\n", (long)OFF(begin), (long)OFF(end)));
 
 	/* punt various cases to auxiliaries */
 	if (rt->op == '|')			/* alternatives */
@@ -590,8 +553,7 @@ chr *end;			/* end of same */
 		freedfa(d);
 		return v->err;
 	}
-	if (v->eflags&REG_MTRACE)
-		printf("cconcat %d\n", rt->no);
+	MDEBUG(("cconcat %d\n", rt->no));
 
 	/* pick a tentative midpoint */
 	if (v->mem[rt->no] == 0) {
@@ -601,14 +563,12 @@ chr *end;			/* end of same */
 			freedfa(d2);
 			return REG_NOMATCH;
 		}
-		if (v->eflags&REG_MTRACE)
-			printf("tentative midpoint %ld\n", (long)OFF(mid));
+		MDEBUG(("tentative midpoint %ld\n", (long)OFF(mid)));
 		subset(v, &rt->left, begin, mid);
 		v->mem[rt->no] = (mid - begin) + 1;
 	} else {
 		mid = begin + (v->mem[rt->no] - 1);
-		if (v->eflags&REG_MTRACE)
-			printf("working midpoint %ld\n", (long)OFF(mid));
+		MDEBUG(("working midpoint %ld\n", (long)OFF(mid)));
 	}
 
 	/* iterate until satisfaction or failure */
@@ -628,8 +588,7 @@ chr *end;			/* end of same */
 		/* that midpoint didn't work, find a new one */
 		if (mid == begin) {
 			/* all possibilities exhausted */
-			if (v->eflags&REG_MTRACE)
-				printf("%d no midpoint\n", rt->no);
+			MDEBUG(("%d no midpoint\n", rt->no));
 			freedfa(d);
 			freedfa(d2);
 			return REG_NOMATCH;
@@ -637,15 +596,12 @@ chr *end;			/* end of same */
 		mid = longest(v, d, begin, mid-1);
 		if (mid == NULL) {
 			/* failed to find a new one */
-			if (v->eflags&REG_MTRACE)
-				printf("%d failed midpoint\n", rt->no);
+			MDEBUG(("%d failed midpoint\n", rt->no));
 			freedfa(d);
 			freedfa(d2);
 			return REG_NOMATCH;
 		}
-		if (v->eflags&REG_MTRACE)
-			printf("%d: new midpoint %ld\n", rt->no,
-								(long)OFF(mid));
+		MDEBUG(("%d: new midpoint %ld\n", rt->no, (long)OFF(mid)));
 		subset(v, &rt->left, begin, mid);
 		v->mem[rt->no] = (mid - begin) + 1;
 		zapmem(v, rt->left.tree);
@@ -653,8 +609,7 @@ chr *end;			/* end of same */
 	}
 
 	/* satisfaction */
-	if (v->eflags&REG_MTRACE)
-		printf("successful\n");
+	MDEBUG(("successful\n"));
 	freedfa(d);
 	freedfa(d2);
 	subset(v, &rt->right, mid, end);
@@ -694,8 +649,7 @@ chr *end;			/* end of same */
 		freedfa(d);
 		return v->err;
 	}
-	if (v->eflags&REG_MTRACE)
-		printf("crev %d\n", rt->no);
+	MDEBUG(("crev %d\n", rt->no));
 
 	/* pick a tentative midpoint */
 	if (v->mem[rt->no] == 0) {
@@ -705,14 +659,12 @@ chr *end;			/* end of same */
 			freedfa(d2);
 			return REG_NOMATCH;
 		}
-		if (v->eflags&REG_MTRACE)
-			printf("tentative midpoint %ld\n", (long)OFF(mid));
+		MDEBUG(("tentative midpoint %ld\n", (long)OFF(mid)));
 		subset(v, &rt->left, begin, mid);
 		v->mem[rt->no] = (mid - begin) + 1;
 	} else {
 		mid = begin + (v->mem[rt->no] - 1);
-		if (v->eflags&REG_MTRACE)
-			printf("working midpoint %ld\n", (long)OFF(mid));
+		MDEBUG(("working midpoint %ld\n", (long)OFF(mid)));
 	}
 
 	/* iterate until satisfaction or failure */
@@ -732,8 +684,7 @@ chr *end;			/* end of same */
 		/* that midpoint didn't work, find a new one */
 		if (mid == end) {
 			/* all possibilities exhausted */
-			if (v->eflags&REG_MTRACE)
-				printf("%d no midpoint\n", rt->no);
+			MDEBUG(("%d no midpoint\n", rt->no));
 			freedfa(d);
 			freedfa(d2);
 			return REG_NOMATCH;
@@ -741,15 +692,12 @@ chr *end;			/* end of same */
 		mid = shortest(v, d, begin, mid+1, end);
 		if (mid == NULL) {
 			/* failed to find a new one */
-			if (v->eflags&REG_MTRACE)
-				printf("%d failed midpoint\n", rt->no);
+			MDEBUG(("%d failed midpoint\n", rt->no));
 			freedfa(d);
 			freedfa(d2);
 			return REG_NOMATCH;
 		}
-		if (v->eflags&REG_MTRACE)
-			printf("%d: new midpoint %ld\n", rt->no,
-								(long)OFF(mid));
+		MDEBUG(("%d: new midpoint %ld\n", rt->no, (long)OFF(mid)));
 		subset(v, &rt->left, begin, mid);
 		v->mem[rt->no] = (mid - begin) + 1;
 		zapmem(v, rt->left.tree);
@@ -757,8 +705,7 @@ chr *end;			/* end of same */
 	}
 
 	/* satisfaction */
-	if (v->eflags&REG_MTRACE)
-		printf("successful\n");
+	MDEBUG(("successful\n"));
 	freedfa(d);
 	freedfa(d2);
 	subset(v, &rt->right, mid, end);
@@ -782,8 +729,7 @@ chr *end;			/* end of same */
 	assert(rt != NULL);
 	assert(rt->op == ',');
 	assert(rt->right.cnfa.nstates == 0);
-	if (v->eflags&REG_MTRACE)
-		printf("csingleton %d\n", rt->no);
+	MDEBUG(("csingleton %d\n", rt->no));
 
 	assert(rt->left.cnfa.nstates > 0);
 
@@ -796,8 +742,7 @@ chr *end;			/* end of same */
 		}
 		freedfa(d);
 		v->mem[rt->no] = 1;
-		if (v->eflags&REG_MTRACE)
-			printf("csingleton matched\n");
+		MDEBUG(("csingleton matched\n"));
 	}
 
 	er = cdissect(v, rt->left.tree, begin, end);
@@ -830,10 +775,10 @@ chr *end;			/* end of same */
 	assert(rt != NULL);
 	assert(rt->op == 'b');
 	assert(rt->right.cnfa.nstates == 0);
+	assert(n >= 0);
 	assert((size_t)n < v->nmatch);
 
-	if (v->eflags&REG_MTRACE)
-		printf("cbackref n%d %d{%d-%d}\n", rt->no, n, min, max);
+	MDEBUG(("cbackref n%d %d{%d-%d}\n", rt->no, n, min, max));
 
 	if (v->pmatch[n].rm_so == -1)
 		return REG_NOMATCH;
@@ -853,6 +798,7 @@ chr *end;			/* end of same */
 	}
 
 	/* and too-short string */
+	assert(end >= begin);
 	if ((size_t)(end - begin) < len)
 		return REG_NOMATCH;
 	stop = end - len;
@@ -864,8 +810,7 @@ chr *end;			/* end of same */
 				break;
 		i++;
 	}
-	if (v->eflags&REG_MTRACE)
-		printf("cbackref found %d\n", i);
+	MDEBUG(("cbackref found %d\n", i));
 
 	/* and sort it out */
 	if (p != end)			/* didn't consume all of it */
@@ -898,8 +843,7 @@ chr *end;			/* end of same */
 	if (v->mem[rt->no] == TRIED)
 		return caltdissect(v, rt->next, begin, end);
 
-	if (v->eflags&REG_MTRACE)
-		printf("calt n%d\n", rt->no);
+	MDEBUG(("calt n%d\n", rt->no));
 	assert(rt->left.begin != NULL);
 
 	if (v->mem[rt->no] == UNTRIED) {
@@ -912,8 +856,7 @@ chr *end;			/* end of same */
 			return caltdissect(v, rt->next, begin, end);
 		}
 		freedfa(d);
-		if (v->eflags&REG_MTRACE)
-			printf("calt matched\n");
+		MDEBUG(("calt matched\n"));
 		v->mem[rt->no] = TRYING;
 	}
 
@@ -949,8 +892,7 @@ chr *end;			/* end of same */
 
 	if (rt == NULL)
 		return begin;
-	if (v->eflags&REG_MTRACE)
-		printf("dsubstr %ld-%ld\n", (long)OFF(begin), (long)OFF(end));
+	MDEBUG(("dsubstr %ld-%ld\n", (long)OFF(begin), (long)OFF(end)));
 
 	/* punt various cases to auxiliaries */
 	if (rt->right.cnfa.nstates == 0)	/* no RHS */
@@ -970,8 +912,7 @@ chr *end;			/* end of same */
 		freedfa(d);
 		return NULL;
 	}
-	if (v->eflags&REG_MTRACE)
-		printf("dconcat %d\n", rt->no);
+	MDEBUG(("dconcat %d\n", rt->no));
 
 	/* pick a tentative midpoint */
 	if (v->mem[rt->no] == 0) {
@@ -981,13 +922,11 @@ chr *end;			/* end of same */
 			freedfa(d2);
 			return NULL;
 		}
-		if (v->eflags&REG_MTRACE)
-			printf("tentative midpoint %ld\n", (long)OFF(mid));
+		MDEBUG(("tentative midpoint %ld\n", (long)OFF(mid)));
 		v->mem[rt->no] = (mid - begin) + 1;
 	} else {
 		mid = begin + (v->mem[rt->no] - 1);
-		if (v->eflags&REG_MTRACE)
-			printf("working midpoint %ld\n", (long)OFF(mid));
+		MDEBUG(("working midpoint %ld\n", (long)OFF(mid)));
 	}
 
 	/* iterate until satisfaction or failure */
@@ -1010,8 +949,7 @@ chr *end;			/* end of same */
 		/* that midpoint didn't work, find a new one */
 		if (mid == begin) {
 			/* all possibilities exhausted */
-			if (v->eflags&REG_MTRACE)
-				printf("%d no midpoint\n", rt->no);
+			MDEBUG(("%d no midpoint\n", rt->no));
 			freedfa(d);
 			freedfa(d2);
 			return NULL;
@@ -1019,22 +957,18 @@ chr *end;			/* end of same */
 		mid = longest(v, d, begin, mid-1);
 		if (mid == NULL) {
 			/* failed to find a new one */
-			if (v->eflags&REG_MTRACE)
-				printf("%d failed midpoint\n", rt->no);
+			MDEBUG(("%d failed midpoint\n", rt->no));
 			freedfa(d);
 			freedfa(d2);
 			return NULL;
 		}
-		if (v->eflags&REG_MTRACE)
-			printf("%d: new midpoint %ld\n", rt->no,
-								(long)OFF(mid));
+		MDEBUG(("%d: new midpoint %ld\n", rt->no, (long)OFF(mid)));
 		v->mem[rt->no] = (mid - begin) + 1;
 		zapmem(v, rt->right.tree);
 	}
 
 	/* satisfaction */
-	if (v->eflags&REG_MTRACE)
-		printf("successful\n");
+	MDEBUG(("successful\n"));
 	freedfa(d);
 	freedfa(d2);
 	return ret;
@@ -1060,8 +994,7 @@ chr *end;			/* end of same */
 
 	if (rt == NULL)
 		return begin;
-	if (v->eflags&REG_MTRACE)
-		printf("rsubstr %ld-%ld\n", (long)OFF(begin), (long)OFF(end));
+	MDEBUG(("rsubstr %ld-%ld\n", (long)OFF(begin), (long)OFF(end)));
 
 	/* concatenation -- need to split the substring between parts */
 	assert(rt->op == ',');
@@ -1075,8 +1008,7 @@ chr *end;			/* end of same */
 		freedfa(d);
 		return NULL;
 	}
-	if (v->eflags&REG_MTRACE)
-		printf("dconcat %d\n", rt->no);
+	MDEBUG(("dconcat %d\n", rt->no));
 
 	/* pick a tentative midpoint */
 	if (v->mem[rt->no] == 0) {
@@ -1086,13 +1018,11 @@ chr *end;			/* end of same */
 			freedfa(d2);
 			return NULL;
 		}
-		if (v->eflags&REG_MTRACE)
-			printf("tentative midpoint %ld\n", (long)OFF(mid));
+		MDEBUG(("tentative midpoint %ld\n", (long)OFF(mid)));
 		v->mem[rt->no] = (mid - begin) + 1;
 	} else {
 		mid = begin + (v->mem[rt->no] - 1);
-		if (v->eflags&REG_MTRACE)
-			printf("working midpoint %ld\n", (long)OFF(mid));
+		MDEBUG(("working midpoint %ld\n", (long)OFF(mid)));
 	}
 
 	/* iterate until satisfaction or failure */
@@ -1115,8 +1045,7 @@ chr *end;			/* end of same */
 		/* that midpoint didn't work, find a new one */
 		if (mid == end) {
 			/* all possibilities exhausted */
-			if (v->eflags&REG_MTRACE)
-				printf("%d no midpoint\n", rt->no);
+			MDEBUG(("%d no midpoint\n", rt->no));
 			freedfa(d);
 			freedfa(d2);
 			return NULL;
@@ -1124,22 +1053,18 @@ chr *end;			/* end of same */
 		mid = shortest(v, d, begin, mid+1, end);
 		if (mid == NULL) {
 			/* failed to find a new one */
-			if (v->eflags&REG_MTRACE)
-				printf("%d failed midpoint\n", rt->no);
+			MDEBUG(("%d failed midpoint\n", rt->no));
 			freedfa(d);
 			freedfa(d2);
 			return NULL;
 		}
-		if (v->eflags&REG_MTRACE)
-			printf("%d: new midpoint %ld\n", rt->no,
-								(long)OFF(mid));
+		MDEBUG(("%d: new midpoint %ld\n", rt->no, (long)OFF(mid)));
 		v->mem[rt->no] = (mid - begin) + 1;
 		zapmem(v, rt->right.tree);
 	}
 
 	/* satisfaction */
-	if (v->eflags&REG_MTRACE)
-		printf("successful\n");
+	MDEBUG(("successful\n"));
 	freedfa(d);
 	freedfa(d2);
 	return ret;
@@ -1162,8 +1087,7 @@ chr *end;			/* end of same */
 	assert(rt != NULL);
 	assert(rt->op == ',');
 	assert(rt->right.cnfa.nstates == 0);
-	if (v->eflags&REG_MTRACE)
-		printf("dsingleton %d\n", rt->no);
+	MDEBUG(("dsingleton %d\n", rt->no));
 
 	assert(rt->left.cnfa.nstates > 0);
 
@@ -1180,8 +1104,8 @@ chr *end;			/* end of same */
 	else
 		ret = shortest(v, d, begin, begin, end);
 	freedfa(d);
-	if (ret != NULL && (v->eflags&REG_MTRACE))
-		printf("dsingleton matched\n");
+	if (ret != NULL)
+		MDEBUG(("dsingleton matched\n"));
 	return ret;
 }
 
@@ -1210,18 +1134,15 @@ chr *stop;			/* match must end at or before here */
 	cp = start;
 
 	/* startup */
-	if (v->eflags&REG_FTRACE)
-		printf("+++ startup +++\n");
+	FDEBUG(("+++ startup +++\n"));
 	if (cp == v->start) {
 		co = d->cnfa->bos[(v->eflags&REG_NOTBOL) ? 0 : 1];
-		if (v->eflags&REG_FTRACE)
-			printf("color %ld\n", (long)co);
+		FDEBUG(("color %ld\n", (long)co));
 	} else {
-		co = getcolor(cm, *(cp - 1));
-		if (v->eflags&REG_FTRACE)
-			printf("char %c, color %ld\n", (char)*(cp-1), (long)co);
+		co = GETCOLOR(cm, *(cp - 1));
+		FDEBUG(("char %c, color %ld\n", (char)*(cp-1), (long)co));
 	}
-	css = miss(v, d, css, co, cp);
+	css = miss(v, d, css, co, cp, start);
 	if (css == NULL)
 		return NULL;
 	css->lastseen = cp;
@@ -1229,12 +1150,12 @@ chr *stop;			/* match must end at or before here */
 	/* main loop */
 	if (v->eflags&REG_FTRACE)
 		while (cp < realstop) {
-			printf("+++ at c%d +++\n", css - d->ssets);
-			co = getcolor(cm, *cp);
-			printf("char %c, color %ld\n", (char)*cp, (long)co);
+			FDEBUG(("+++ at c%d +++\n", css - d->ssets));
+			co = GETCOLOR(cm, *cp);
+			FDEBUG(("char %c, color %ld\n", (char)*cp, (long)co));
 			ss = css->outs[co];
 			if (ss == NULL) {
-				ss = miss(v, d, css, co, cp);
+				ss = miss(v, d, css, co, cp+1, start);
 				if (ss == NULL)
 					break;	/* NOTE BREAK OUT */
 			}
@@ -1244,10 +1165,10 @@ chr *stop;			/* match must end at or before here */
 		}
 	else
 		while (cp < realstop) {
-			co = getcolor(cm, *cp);
+			co = GETCOLOR(cm, *cp);
 			ss = css->outs[co];
 			if (ss == NULL) {
-				ss = miss(v, d, css, co, cp+1);
+				ss = miss(v, d, css, co, cp+1, start);
 				if (ss == NULL)
 					break;	/* NOTE BREAK OUT */
 			}
@@ -1257,13 +1178,11 @@ chr *stop;			/* match must end at or before here */
 		}
 
 	/* shutdown */
-	if (v->eflags&REG_FTRACE)
-		printf("+++ shutdown at c%d +++\n", css - d->ssets);
+	FDEBUG(("+++ shutdown at c%d +++\n", css - d->ssets));
 	if (cp == v->stop && stop == v->stop) {
 		co = d->cnfa->eos[(v->eflags&REG_NOTEOL) ? 0 : 1];
-		if (v->eflags&REG_FTRACE)
-			printf("color %ld\n", (long)co);
-		ss = miss(v, d, css, co, cp);
+		FDEBUG(("color %ld\n", (long)co));
+		ss = miss(v, d, css, co, cp, start);
 		/* special case:  match ended at eol? */
 		if (ss != NULL && (ss->flags&POSTSTATE))
 			return cp;
@@ -1300,7 +1219,7 @@ chr *max;			/* match must end at or before here */
 	chr *realmax = (max == v->stop) ? max : max + 1;
 	color co;
 	struct sset *css;
-	struct sset *ss = NULL;
+	struct sset *ss;
 	struct colormap *cm = d->cm;
 
 	/* initialize */
@@ -1308,31 +1227,29 @@ chr *max;			/* match must end at or before here */
 	cp = start;
 
 	/* startup */
-	if (v->eflags&REG_FTRACE)
-		printf("--- startup ---\n");
+	FDEBUG(("--- startup ---\n"));
 	if (cp == v->start) {
 		co = d->cnfa->bos[(v->eflags&REG_NOTBOL) ? 0 : 1];
-		if (v->eflags&REG_FTRACE)
-			printf("color %ld\n", (long)co);
+		FDEBUG(("color %ld\n", (long)co));
 	} else {
-		co = getcolor(cm, *(cp - 1));
-		if (v->eflags&REG_FTRACE)
-			printf("char %c, color %ld\n", (char)*(cp-1), (long)co);
+		co = GETCOLOR(cm, *(cp - 1));
+		FDEBUG(("char %c, color %ld\n", (char)*(cp-1), (long)co));
 	}
-	css = miss(v, d, css, co, cp);
+	css = miss(v, d, css, co, cp, start);
 	if (css == NULL)
 		return NULL;
 	css->lastseen = cp;
+	ss = css;
 
 	/* main loop */
 	if (v->eflags&REG_FTRACE)
 		while (cp < realmax) {
-			printf("--- at c%d ---\n", css - d->ssets);
-			co = getcolor(cm, *cp);
-			printf("char %c, color %ld\n", (char)*cp, (long)co);
+			FDEBUG(("--- at c%d ---\n", css - d->ssets));
+			co = GETCOLOR(cm, *cp);
+			FDEBUG(("char %c, color %ld\n", (char)*cp, (long)co));
 			ss = css->outs[co];
 			if (ss == NULL) {
-				ss = miss(v, d, css, co, cp);
+				ss = miss(v, d, css, co, cp+1, start);
 				if (ss == NULL)
 					break;	/* NOTE BREAK OUT */
 			}
@@ -1344,10 +1261,10 @@ chr *max;			/* match must end at or before here */
 		}
 	else
 		while (cp < realmax) {
-			co = getcolor(cm, *cp);
+			co = GETCOLOR(cm, *cp);
 			ss = css->outs[co];
 			if (ss == NULL) {
-				ss = miss(v, d, css, co, cp+1);
+				ss = miss(v, d, css, co, cp+1, start);
 				if (ss == NULL)
 					break;	/* NOTE BREAK OUT */
 			}
@@ -1366,13 +1283,11 @@ chr *max;			/* match must end at or before here */
 	}
 
 	/* shutdown */
-	if (v->eflags&REG_FTRACE)
-		printf("--- shutdown at c%d ---\n", css - d->ssets);
+	FDEBUG(("--- shutdown at c%d ---\n", css - d->ssets));
 	if (cp == v->stop && max == v->stop) {
 		co = d->cnfa->eos[(v->eflags&REG_NOTEOL) ? 0 : 1];
-		if (v->eflags&REG_FTRACE)
-			printf("color %ld\n", (long)co);
-		ss = miss(v, d, css, co, cp);
+		FDEBUG(("color %ld\n", (long)co));
+		ss = miss(v, d, css, co, cp, start);
 		/* special case:  match ended at eol? */
 		if (ss != NULL && (ss->flags&POSTSTATE))
 			return cp;
@@ -1392,7 +1307,7 @@ struct vars *v;
 struct cnfa *cnfa;
 struct colormap *cm;
 {
-	struct dfa *d = (struct dfa *)ckalloc(sizeof(struct dfa));
+	struct dfa *d = (struct dfa *)MALLOC(sizeof(struct dfa));
 	int wordsper = (cnfa->nstates + UBITS - 1) / UBITS;
 	struct sset *ss;
 	int i;
@@ -1403,13 +1318,13 @@ struct colormap *cm;
 		return NULL;
 	}
 
-	d->ssets = (struct sset *)ckalloc(CACHE * sizeof(struct sset));
-	d->statesarea = (unsigned *)ckalloc((CACHE+WORK) * wordsper *
+	d->ssets = (struct sset *)MALLOC(CACHE * sizeof(struct sset));
+	d->statesarea = (unsigned *)MALLOC((CACHE+WORK) * wordsper *
 							sizeof(unsigned));
 	d->work = &d->statesarea[CACHE * wordsper];
-	d->outsarea = (struct sset **)ckalloc(CACHE * cnfa->ncolors *
+	d->outsarea = (struct sset **)MALLOC(CACHE * cnfa->ncolors *
 							sizeof(struct sset *));
-	d->incarea = (struct arcp *)ckalloc(CACHE * cnfa->ncolors *
+	d->incarea = (struct arcp *)MALLOC(CACHE * cnfa->ncolors *
 							sizeof(struct arcp));
 	if (d->ssets == NULL || d->statesarea == NULL || d->outsarea == NULL ||
 							d->incarea == NULL) {
@@ -1426,6 +1341,7 @@ struct colormap *cm;
 	d->cnfa = cnfa;
 	d->cm = cm;
 	d->lastpost = NULL;
+	d->search = d->ssets;
 
 	for (ss = d->ssets, i = 0; i < d->nssets; ss++, i++) {
 		/* initialization of most fields is done as needed */
@@ -1446,14 +1362,14 @@ freedfa(d)
 struct dfa *d;
 {
 	if (d->ssets != NULL)
-		ckfree((char *)d->ssets);
+		FREE(d->ssets);
 	if (d->statesarea != NULL)
-		ckfree((char *)d->statesarea);
+		FREE(d->statesarea);
 	if (d->outsarea != NULL)
-		ckfree((char *)d->outsarea);
+		FREE(d->outsarea);
 	if (d->incarea != NULL)
-		ckfree((char *)d->incarea);
-	ckfree((char *)d);
+		FREE(d->incarea);
+	FREE(d);
 }
 
 /*
@@ -1492,7 +1408,7 @@ chr *start;
 	if (d->nssused > 0 && (d->ssets[0].flags&STARTER))
 		ss = &d->ssets[0];
 	else {				/* no, must (re)build it */
-		ss = getvacant(v, d);
+		ss = getvacant(v, d, start, start);
 		for (i = 0; i < d->wordsper; i++)
 			ss->states[i] = 0;
 		BSET(ss->states, d->cnfa->pre);
@@ -1512,15 +1428,16 @@ chr *start;
 /*
  - miss - handle a cache miss
  ^ static struct sset *miss(struct vars *, struct dfa *, struct sset *,
- ^ 	pcolor, chr *);
+ ^ 	pcolor, chr *, chr *);
  */
 static struct sset *		/* NULL if goes to empty set */
-miss(v, d, css, co, cp)
+miss(v, d, css, co, cp, start)
 struct vars *v;			/* used only for debug flags */
 struct dfa *d;
 struct sset *css;
 pcolor co;
 chr *cp;			/* next chr */
+chr *start;			/* where the attempt got started */
 {
 	struct cnfa *cnfa = d->cnfa;
 	int i;
@@ -1534,12 +1451,10 @@ chr *cp;			/* next chr */
 
 	/* for convenience, we can be called even if it might not be a miss */
 	if (css->outs[co] != NULL) {
-		if (v->eflags&REG_FTRACE)
-			printf("hit\n");
+		FDEBUG(("hit\n"));
 		return css->outs[co];
 	}
-	if (v->eflags&REG_FTRACE)
-		printf("miss\n");
+	FDEBUG(("miss\n"));
 
 	/* first, what set of states would we end up in? */
 	for (i = 0; i < d->wordsper; i++)
@@ -1554,10 +1469,9 @@ chr *cp;			/* next chr */
 					gotstate = 1;
 					if (ca->to == cnfa->post)
 						ispost = 1;
-					if (v->eflags&REG_FTRACE)
-						printf("%d -> %d\n", i, ca->to);
+					FDEBUG(("%d -> %d\n", i, ca->to));
 				}
-	dolacons = (gotstate) ? cnfa->haslacons : 0;
+	dolacons = (gotstate) ? (cnfa->flags&HASLACONS) : 0;
 	didlacons = 0;
 	while (dolacons) {		/* transitive closure */
 		dolacons = 0;
@@ -1574,9 +1488,7 @@ chr *cp;			/* next chr */
 						didlacons = 1;
 						if (ca->to == cnfa->post)
 							ispost = 1;
-						if (v->eflags&REG_FTRACE)
-							printf("%d :-> %d\n",
-								i, ca->to);
+						FDEBUG(("%d :> %d\n",i,ca->to));
 					}
 	}
 	if (!gotstate)
@@ -1585,14 +1497,13 @@ chr *cp;			/* next chr */
 
 	/* next, is that in the cache? */
 	for (p = d->ssets, i = d->nssused; i > 0; p++, i--)
-		if (p->hash == h && memcmp((VOID *)d->work, (VOID *)p->states,
+		if (p->hash == h && memcmp(VS(d->work), VS(p->states),
 					d->wordsper*sizeof(unsigned)) == 0) {
-			if (v->eflags&REG_FTRACE)
-				printf("cached c%d\n", p - d->ssets);
+			FDEBUG(("cached c%d\n", p - d->ssets));
 			break;			/* NOTE BREAK OUT */
 		}
 	if (i == 0) {		/* nope, need a new cache entry */
-		p = getvacant(v, d);
+		p = getvacant(v, d, cp, start);
 		assert(p != css);
 		for (i = 0; i < d->wordsper; i++)
 			p->states[i] = d->work[i];
@@ -1605,7 +1516,7 @@ chr *cp;			/* next chr */
 		css->outs[co] = p;
 		css->inchain[co] = p->ins;
 		p->ins.ss = css;
-		p->ins.co = (color) co;
+		p->ins.co = (color)co;
 	}
 	return p;
 }
@@ -1615,10 +1526,10 @@ chr *cp;			/* next chr */
  ^ static int lacon(struct vars *, struct cnfa *, chr *, pcolor);
  */
 static int			/* predicate:  constraint satisfied? */
-lacon(v, pcnfa, precp, co)
+lacon(v, pcnfa, cp, co)
 struct vars *v;
 struct cnfa *pcnfa;		/* parent cnfa */
-chr *precp;			/* points to previous chr */
+chr *cp;
 pcolor co;			/* "color" of the lookahead constraint */
 {
 	int n;
@@ -1628,18 +1539,16 @@ pcolor co;			/* "color" of the lookahead constraint */
 
 	n = co - pcnfa->ncolors;
 	assert(n < v->g->nlacons && v->g->lacons != NULL);
-	if (v->eflags&REG_FTRACE)
-		printf("=== testing lacon %d\n", n);
+	FDEBUG(("=== testing lacon %d\n", n));
 	sub = &v->g->lacons[n];
 	d = newdfa(v, &sub->cnfa, v->g->cm);
 	if (d == NULL) {
 		ERR(REG_ESPACE);
 		return 0;
 	}
-	end = longest(v, d, precp, v->stop);
+	end = longest(v, d, cp, v->stop);
 	freedfa(d);
-	if (v->eflags&REG_FTRACE)
-		printf("=== lacon %d match %d\n", n, (end != NULL));
+	FDEBUG(("=== lacon %d match %d\n", n, (end != NULL)));
 	return (sub->subno) ? (end != NULL) : (end == NULL);
 }
 
@@ -1647,12 +1556,14 @@ pcolor co;			/* "color" of the lookahead constraint */
  - getvacant - get a vacant state set
  * This routine clears out the inarcs and outarcs, but does not otherwise
  * clear the innards of the state set -- that's up to the caller.
- ^ static struct sset *getvacant(struct vars *, struct dfa *);
+ ^ static struct sset *getvacant(struct vars *, struct dfa *, chr *, chr *);
  */
 static struct sset *
-getvacant(v, d)
+getvacant(v, d, cp, start)
 struct vars *v;			/* used only for debug flags */
 struct dfa *d;
+chr *cp;
+chr *start;
 {
 	int i;
 	struct sset *ss;
@@ -1661,15 +1572,14 @@ struct dfa *d;
 	struct arcp lastap;
 	color co;
 
-	ss = pickss(v, d);
+	ss = pickss(v, d, cp, start);
+	assert(!(ss->flags&LOCKED));
 
 	/* clear out its inarcs, including self-referential ones */
 	ap = ss->ins;
 	while ((p = ap.ss) != NULL) {
 		co = ap.co;
-		if (v->eflags&REG_FTRACE)
-			printf("zapping c%d's %ld outarc\n", p - d->ssets,
-								(long)co);
+		FDEBUG(("zapping c%d's %ld outarc\n", p - d->ssets, (long)co));
 		p->outs[co] = NULL;
 		ap = p->inchain[co];
 		p->inchain[co].ss = NULL;	/* paranoia */
@@ -1682,9 +1592,7 @@ struct dfa *d;
 		assert(p != ss);		/* not self-referential */
 		if (p == NULL)
 			continue;		/* NOTE CONTINUE */
-		if (v->eflags&REG_FTRACE)
-			printf("deleting outarc %d from c%d's inarc chain\n",
-							i, p - d->ssets);
+		FDEBUG(("del outarc %d from c%d's in chn\n", i, p - d->ssets));
 		if (p->ins.ss == ss && p->ins.co == i)
 			p->ins = ss->inchain[i];
 		else {
@@ -1710,23 +1618,25 @@ struct dfa *d;
 
 /*
  - pickss - pick the next stateset to be used
- ^ static struct sset *pickss(struct vars *, struct dfa *);
+ ^ static struct sset *pickss(struct vars *, struct dfa *, chr *, chr *);
  */
 static struct sset *
-pickss(v, d)
+pickss(v, d, cp, start)
 struct vars *v;			/* used only for debug flags */
 struct dfa *d;
+chr *cp;
+chr *start;
 {
 	int i;
 	struct sset *ss;
-	struct sset *oldest;
+	struct sset *end;
+	chr *ancient;
 
 	/* shortcut for cases where cache isn't full */
 	if (d->nssused < d->nssets) {
 		ss = &d->ssets[d->nssused];
 		d->nssused++;
-		if (v->eflags&REG_FTRACE)
-			printf("new c%d\n", ss - d->ssets);
+		FDEBUG(("new c%d\n", ss - d->ssets));
 		/* must make innards consistent */
 		ss->ins.ss = NULL;
 		for (i = 0; i < d->ncolors; i++) {
@@ -1734,21 +1644,32 @@ struct dfa *d;
 			ss->inchain[i].ss = NULL;
 		}
 		ss->flags = 0;
-		ss->ins.co = 0;
 		return ss;
 	}
 
-	/* look for oldest */
-	oldest = d->ssets;
-	for (ss = d->ssets, i = d->nssets; i > 0; ss++, i--) {
-		if (ss->lastseen != oldest->lastseen && (ss->lastseen == NULL ||
-					ss->lastseen < oldest->lastseen))
-			oldest = ss;
-	}
-	if (v->eflags&REG_FTRACE)
-		printf("replacing c%d\n", oldest - d->ssets);
-	return oldest;
-}
+	/* look for oldest, or old enough anyway */
+	if (cp - start > d->nssets*3/4)		/* oldest 25% are expendable */
+		ancient = cp - d->nssets*3/4;
+	else
+		ancient = start;
+	for (ss = d->search, end = &d->ssets[d->nssets]; ss < end; ss++)
+		if ((ss->lastseen == NULL || ss->lastseen < ancient) &&
+							!(ss->flags&LOCKED)) {
+			d->search = ss + 1;
+			FDEBUG(("replacing c%d\n", ss - d->ssets));
+			return ss;
+		}
+	for (ss = d->ssets, end = d->search; ss < end; ss++)
+		if ((ss->lastseen == NULL || ss->lastseen < ancient) &&
+							!(ss->flags&LOCKED)) {
+			d->search = ss + 1;
+			FDEBUG(("replacing c%d\n", ss - d->ssets));
+			return ss;
+		}
 
-#define	EXEC	1
-#include "color.c"
+	/* nobody's old enough?!? -- something's really wrong */
+	FDEBUG(("can't find victim to replace!\n"));
+	assert(NOTREACHED);
+	ERR(REG_ASSERT);
+	return d->ssets;
+}
diff --git a/generic/regfree.c b/generic/regfree.c
new file mode 100644
index 0000000..a5c3f0b
--- /dev/null
+++ b/generic/regfree.c
@@ -0,0 +1,25 @@
+/*
+ * regfree - free an RE
+ *
+ * You might think that this could be incorporated into regcomp.c, and
+ * that would be a reasonable idea... except that this is a generic
+ * function (with a generic name), applicable to all compiled REs
+ * regardless of the size of their characters, whereas the stuff in
+ * regcomp.c gets compiled once per character size.
+ */
+
+#include "regguts.h"
+
+/*
+ - regfree - free an RE (generic function, punts to RE-specific function)
+ *
+ * Ignoring invocation with NULL is a convenience.
+ */
+VOID
+regfree(re)
+regex_t *re;
+{
+	if (re == NULL)
+		return;
+	(*((struct fns *)re->re_fns)->free)(re);
+}
diff --git a/generic/regfronts.c b/generic/regfronts.c
new file mode 100644
index 0000000..a9bd556
--- /dev/null
+++ b/generic/regfronts.c
@@ -0,0 +1,56 @@
+/*
+ * regcomp and regexec - front ends to re_ routines
+ *
+ * Mostly for implementation of backward-compatibility kludges.  Note
+ * that these routines exist ONLY in char versions.
+ */
+
+#include "regguts.h"
+
+/*
+ - regcomp - compile regular expression
+ */
+int
+regcomp(re, str, flags)
+regex_t *re;
+CONST char *str;
+int flags;
+{
+	size_t len;
+	int f = flags;
+
+	if (f&REG_PEND) {
+		len = re->re_endp - str;
+		f &= ~REG_PEND;
+	} else
+		len = strlen(str);
+
+	return re_comp(re, str, len, f);
+}
+
+/*
+ - regexec - execute regular expression
+ */
+int
+regexec(re, str, nmatch, pmatch, flags)
+regex_t *re;
+CONST char *str;
+size_t nmatch;
+regmatch_t pmatch[];
+int flags;
+{
+	CONST char *start;
+	size_t len;
+	int f = flags;
+
+	if (f&REG_STARTEND) {
+		start = str + pmatch[0].rm_so;
+		len = pmatch[0].rm_eo - pmatch[0].rm_so;
+		f &= ~REG_STARTEND;
+	} else {
+		start = str;
+		len = strlen(str);
+	}
+
+	return re_exec(re, start, len, nmatch, pmatch, f);
+}
diff --git a/generic/guts.h b/generic/regguts.h
index 7b847ac..1490d44 100644
--- a/generic/guts.h
+++ b/generic/regguts.h
@@ -1,54 +1,119 @@
 /*
- * guts.h --
- *
- * 	Regexp package file:  Misc. utilities.
- *
- * Copyright (c) 1998 Henry Spencer.  All rights reserved.
- * 
- * Development of this software was funded, in part, by Cray Research Inc.,
- * UUNET Communications Services Inc., and Sun Microsystems Inc., none of
- * whom are responsible for the results.  The author thanks all of them.
- * 
- * Redistribution and use in source and binary forms -- with or without
- * modification -- are permitted for any purpose, provided that
- * redistributions in source form retain this entire copyright notice and
- * indicate the origin and nature of any modifications. 
- * 
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
- * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
- * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- * 
- * Copyright (c) 1998 by Sun Microsystems, Inc.
- *
- * See the file "license.terms" for information on usage and redistribution
- * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
- *
- * RCS: @(#) $Id: guts.h,v 1.1.2.2 1998/10/03 01:56:40 stanton Exp $
+ * Internal interface definitions, etc., for the regex package
  */
 
-#include "tclInt.h"
 
-#define	NOTREACHED	0
-#define	xxx		1
 
+/*
+ * Environmental customization.  It should not (I hope) be necessary to
+ * alter the file you are now reading -- regcustom.h should handle it all,
+ * given care here and elsewhere.
+ */
+#include "regcustom.h"
+
+
+
+/*
+ * Things that regcustom.h might override.
+ */
+
+/* standard header files (NULL is a reasonable indicator for them) */
+#ifndef NULL
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <limits.h>
+#include <string.h>
+#endif
+
+/* assertions */
+#ifndef assert
+#include <assert.h>
+#endif
+
+/* voids */
+#ifndef VOID
+#define	VOID	void			/* for function return values */
+#endif
+#ifndef DISCARD
+#define	DISCARD	VOID			/* for throwing values away */
+#endif
+#ifndef PVOID
+#define	PVOID	VOID *			/* generic pointer */
+#endif
+#ifndef VS
+#define	VS(x)	((PVOID)(x))		/* cast something to generic ptr */
+#endif
+#ifndef NOPARMS
+#define	NOPARMS	VOID			/* for empty parm lists */
+#endif
+
+/* function-pointer declarator */
+#ifndef FUNCPTR
+#if __STDC__ >= 1
+#define	FUNCPTR(name, args)	(*name)args
+#else
+#define	FUNCPTR(name, args)	(*name)()
+#endif
+#endif
+
+/* memory allocation */
+#ifndef MALLOC
+#define	MALLOC(n)	malloc(n)
+#endif
+#ifndef REALLOC
+#define	REALLOC(p, n)	realloc(VS(p), n)
+#endif
+#ifndef FREE
+#define	FREE(p)		free(VS(p))
+#endif
+
+/* want size of a char in bits, and max value in bounded quantifiers */
+#ifndef CHAR_BIT
+#include <limits.h>
+#endif
 #ifndef _POSIX2_RE_DUP_MAX
-#define	_POSIX2_RE_DUP_MAX	255
+#define	_POSIX2_RE_DUP_MAX	255	/* normally from <limits.h> */
 #endif
+
+
+
+/*
+ * misc
+ */
+
+#define	NOTREACHED	0
+#define	xxx		1
+
 #define	DUPMAX	_POSIX2_RE_DUP_MAX
 #define	INFINITY	(DUPMAX+1)
 
-/* bitmap manipulation */
+#define	REMAGIC	0xfed7		/* magic number for main struct */
+
+
+
+/*
+ * debugging facilities
+ */
+#ifdef REG_DEBUG
+#define	FDEBUG(arglist)	{ if (v->eflags&REG_FTRACE) printf arglist; }
+#define	MDEBUG(arglist)	{ if (v->eflags&REG_MTRACE) printf arglist; }
+#else
+#define	FDEBUG(arglist)	{}
+#define	MDEBUG(arglist)	{}
+#endif
+
+
+
+/*
+ * bitmap manipulation
+ */
 #define	UBITS	(CHAR_BIT * sizeof(unsigned))
 #define	BSET(uv, sn)	((uv)[(sn)/UBITS] |= (unsigned)1 << ((sn)%UBITS))
 #define	ISBSET(uv, sn)	((uv)[(sn)/UBITS] & ((unsigned)1 << ((sn)%UBITS)))
 
+
+
 /*
  * Map a truth value into -1 for false, 1 for true.  This is so it is
  * possible to write compile-time assertions by declaring a dummy array
@@ -56,14 +121,13 @@
  */
 #define	NEGIFNOT(x)	(2*!!(x) - 1)		/* !! ensures 0 or 1 */
 
+
+
 /*
  * We dissect a chr into byts for colormap table indexing.  Here we define
  * a byt, which will be the same as a byte on most machines...  The exact
  * size of a byt is not critical, but about 8 bits is good, and extraction
  * of 8-bit chunks is sometimes especially fast.
- *
- * Changes in several places are needed to handle an increase in MAXBYTS.
- * Those places check whether MAXBYTS is larger than they expect.
  */
 #ifndef BYTBITS
 #define	BYTBITS	8		/* bits in a byt */
@@ -71,7 +135,9 @@
 #define	BYTTAB	(1<<BYTBITS)	/* size of table with one entry per byt value */
 #define	BYTMASK	(BYTTAB-1)	/* bit mask for byt */
 #define	NBYTS	((CHRBITS+BYTBITS-1)/BYTBITS)
-#define	MAXBYTS	8		/* maximum NBYTS the code can handle */
+/* the definition of GETCOLOR(), below, assumes NBYTS <= 4 */
+
+
 
 /*
  * As soon as possible, we map chrs into equivalence classes -- "colors" --
@@ -81,22 +147,96 @@ typedef short color;		/* colors of characters */
 typedef int pcolor;		/* what color promotes to */
 #define	COLORLESS	(-1)	/* impossible color */
 #define	WHITE		0	/* default color, parent of all others */
-struct colormap;		/* forward def for master type */
+
+
 
 /*
- * Interface definitions for locale-interface functions in locale.c
+ * A colormap is a tree -- more precisely, a DAG -- indexed at each level
+ * by a byt of the chr, to map the chr to a color efficiently.  Because
+ * lower sections of the tree can be shared, it can exploit the usual
+ * sparseness of such a mapping table.  The final tree is always NBYTS
+ * levels deep (at present it may be shallower during construction, but
+ * it is always "filled" to full depth at the end of that, using pointers
+ * to "fill blocks" which are entirely WHITE in color).
+ */
+
+/* the tree itself */
+struct colors {
+	color ccolor[BYTTAB];
+};
+struct ptrs {
+	union tree *pptr[BYTTAB];
+};
+union tree {
+	struct colors colors;
+	struct ptrs ptrs;
+};
+#define	tcolor	colors.ccolor
+#define	tptr	ptrs.pptr
+
+/* internal per-color structure for the color machinery */
+struct colordesc {
+	uchr nchrs;		/* number of chars of this color */
+	color sub;		/* open subcolor of this one, or NOSUB */
+#		define	NOSUB	COLORLESS
+	struct arc *arcs;	/* color chain */
+#	define	UNUSEDCOLOR(cd)	((cd)->nchrs == 0 && (cd)->sub == NOSUB)
+	int flags;
+#		define	PSEUDO	1	/* pseudocolor, no real chars */
+};
+
+/* the color map itself */
+struct colormap {
+	int magic;
+#		define	CMMAGIC	0x876
+	struct vars *v;			/* for compile error reporting */
+	color rest;
+	int filled;			/* has it been filled? */
+	size_t ncds;			/* number of colordescs */
+	struct colordesc *cd;
+#	define	CDEND(cm)	(&(cm)->cd[(cm)->ncds])
+#		define	NINLINECDS	((size_t)10)
+	struct colordesc cds[NINLINECDS];
+	union tree tree[NBYTS];		/* tree top, plus fill blocks */
+};
+
+/* optimization magic to do fast chr->color mapping */
+#define	B0(c)	((c) & BYTMASK)
+#define	B1(c)	(((c)>>BYTBITS) & BYTMASK)
+#define	B2(c)	(((c)>>(2*BYTBITS)) & BYTMASK)
+#define	B3(c)	(((c)>>(3*BYTBITS)) & BYTMASK)
+#if NBYTS == 1
+#define	GETCOLOR(cm, c)	((cm)->tree->tcolor[B0(c)])
+#endif
+#if NBYTS == 2
+#define	GETCOLOR(cm, c)	((cm)->tree->tptr[B1(c)]->tcolor[B0(c)])
+#endif
+#if NBYTS == 4
+#define	GETCOLOR(cm, c)	((cm)->tree->tptr[B3(c)]->tptr[B2(c)]->tptr[B1(c)]->tcolor[B0(c)])
+#endif
+
+
+
+/*
+ * Interface definitions for locale-interface functions in locale.c.
+ * Multi-character collating elements (MCCEs) cause most of the trouble.
  */
 struct cvec {
 	int nchrs;		/* number of chrs */
 	int chrspace;		/* number of chrs possible */
 	chr *chrs;		/* pointer to vector of chrs */
-	int nces;		/* number of multichr collating elements */
-	int cespace;		/* number of CEs possible */
-	int ncechrs;		/* number of chrs used for CEs */
-	chr *ces[1];		/* pointers to 0-terminated CEs */
+	int nmcces;		/* number of MCCEs */
+	int mccespace;		/* number of MCCEs possible */
+	int nmccechrs;		/* number of chrs used for MCCEs */
+	chr *mcces[1];		/* pointers to 0-terminated MCCEs */
 				/* and both batches of chrs are on the end */
 };
 
+/* caution:  this value cannot be changed easily */
+#define	MAXMCCE	2		/* length of longest MCCE */
+
+
+
 /*
  * definitions for NFA internal representation
  *
@@ -147,12 +287,15 @@ struct nfa {
 	struct state *states;	/* state-chain header */
 	struct state *slast;	/* tail of the chain */
 	struct state *free;	/* free list */
+	struct colormap *cm;	/* the color map */
 	color bos[2];		/* colors, if any, assigned to BOS and BOL */
 	color eos[2];		/* colors, if any, assigned to EOS and EOL */
 	struct vars *v;		/* simplifies compile error reporting */
 	struct nfa *parent;	/* parent NFA, if any */
 };
 
+
+
 /*
  * definitions for compacted NFA
  */
@@ -164,8 +307,9 @@ struct carc {
 struct cnfa {
 	int nstates;		/* number of states */
 	int ncolors;		/* number of colors */
-	int haslacons;		/* does it use lookahead constraints? */
- 	int leftanch;		/* is it anchored on the left? */
+	int flags;
+#		define	HASLACONS	01	/* uses lookahead constraints */
+#		define	LEFTANCH	02	/* anchored on left */
 	int pre;		/* setup state number */
 	int post;		/* teardown state number */
 	color bos[2];		/* colors, if any, assigned to BOS and BOL */
@@ -176,6 +320,8 @@ struct cnfa {
 #define	ZAPCNFA(cnfa)	((cnfa).nstates = 0)
 #define	NULLCNFA(cnfa)	((cnfa).nstates == 0)
 
+
+
 /*
  * definitions for subexpression tree
  * The intrepid code-reader is hereby warned that the subexpression tree
@@ -198,20 +344,27 @@ struct subre {
 
 struct rtree {
 	char op;		/* operator:  '|', ',' */
-	short no;		/* node numbering */
+	char flags;
+#		define	INUSE	01	/* in use in the tree */
+	short no;		/* index into retry memory */
 	struct subre left;
 	struct rtree *next;	/* for '|' */
 	struct subre right;	/* for ',' */
+	struct rtree *chain;	/* for bookkeeping and error cleanup */
 };
 
+
+
 /*
  * table of function pointers for generic manipulation functions
  * A regex_t's re_fns points to one of these.
  */
 struct fns {
-	VOID (*free) _ANSI_ARGS_((regex_t *));
+	VOID FUNCPTR(free, (regex_t *));
 };
 
+
+
 /*
  * the insides of a regex_t, hidden behind a void *
  */
@@ -220,13 +373,12 @@ struct guts {
 #		define	GUTSMAGIC	0xfed9
 	int cflags;		/* copy of compile flags */
 	int info;		/* copy of re_info */
-	int nsub;		/* copy of re_nsub */
+	size_t nsub;		/* copy of re_nsub */
 	struct cnfa cnfa;
 	struct rtree *tree;
 	int ntree;
 	struct colormap *cm;
-	int (*compare) _ANSI_ARGS_((CONST chr *, CONST chr *, size_t));
-				/* string-compare function */
+	int FUNCPTR(compare, (CONST chr *, CONST chr *, size_t));
 	struct subre *lacons;	/* lookahead-constraint vector */
 	int nlacons;		/* size of lacons */
 	int usedshorter;	/* used non-greedy quantifiers? */
diff --git a/generic/tclBasic.c b/generic/tclBasic.c
index e8fa7ad..fcc1f93 100644
--- a/generic/tclBasic.c
+++ b/generic/tclBasic.c
@@ -12,7 +12,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclBasic.c,v 1.1.2.3 1998/09/30 20:46:21 stanton Exp $
+ * RCS: @(#) $Id: tclBasic.c,v 1.1.2.4 1998/10/21 20:40:02 stanton Exp $
  */
 
 #include "tclInt.h"
@@ -77,6 +77,8 @@ static CmdInfo builtInCmds[] = {
         (CompileProc *) NULL,		1},
     {"continue",	(Tcl_CmdProc *) NULL,	Tcl_ContinueObjCmd,
         TclCompileContinueCmd,		1},
+    {"encoding",	(Tcl_CmdProc *) NULL,	Tcl_EncodingObjCmd,
+        (CompileProc *) NULL,		1},
     {"error",		(Tcl_CmdProc *) NULL,	Tcl_ErrorObjCmd,
         (CompileProc *) NULL,		1},
     {"eval",		(Tcl_CmdProc *) NULL,	Tcl_EvalObjCmd,
diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c
index f17b8fc..54a3046 100644
--- a/generic/tclCmdAH.c
+++ b/generic/tclCmdAH.c
@@ -11,7 +11,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclCmdAH.c,v 1.1.2.3 1998/09/28 20:24:18 stanton Exp $
+ * RCS: @(#) $Id: tclCmdAH.c,v 1.1.2.4 1998/10/21 20:40:03 stanton Exp $
  */
 
 #include "tclInt.h"
@@ -343,7 +343,7 @@ Tcl_CdObjCmd(dummy, interp, objc, objv)
  * Tcl_ConcatObjCmd --
  *
  *	This object-based procedure is invoked to process the "concat" Tcl
- *	command. See the user documentation for details on what it does/
+ *	command. See the user documentation for details on what it does.
  *
  * Results:
  *	A standard Tcl object result.
@@ -407,6 +407,123 @@ Tcl_ContinueObjCmd(dummy, interp, objc, objv)
 /*
  *----------------------------------------------------------------------
  *
+ * Tcl_EncodingObjCmd --
+ *
+ *	This command manipulates encodings.
+ *
+ * Results:
+ *	A standard Tcl result.
+ *
+ * Side effects:
+ *	See the user documentation.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+Tcl_EncodingObjCmd(dummy, interp, objc, objv)
+    ClientData dummy;		/* Not used. */
+    Tcl_Interp *interp;		/* Current interpreter. */
+    int objc;			/* Number of arguments. */
+    Tcl_Obj *CONST objv[];	/* Argument objects. */
+{
+    int index, length;
+    Tcl_Encoding encoding;
+    char *string;
+    Tcl_DString ds;
+    Tcl_Obj *resultPtr;
+
+    static char *optionStrings[] = {
+	"convertfrom", "convertto", "names", "system",
+	NULL
+    };
+    enum options {
+	ENC_CONVERTFROM, ENC_CONVERTTO, ENC_NAMES, ENC_SYSTEM
+    };
+
+    if (objc < 2) {
+    	Tcl_WrongNumArgs(interp, 1, objv, "option ?arg ...?");
+        return TCL_ERROR;
+    }
+    if (Tcl_GetIndexFromObj(interp, objv[1], optionStrings, "option", 0,
+	    &index) != TCL_OK) {
+	return TCL_ERROR;
+    }
+
+    switch ((enum options) index) {
+	case ENC_CONVERTTO:
+	case ENC_CONVERTFROM: {
+	    char *name;
+	    Tcl_Obj *data;
+	    if (objc == 3) {
+		name = NULL;
+		data = objv[2];
+	    } else if (objc == 4) {
+		name = Tcl_GetString(objv[2]);
+		data = objv[3];
+	    } else {
+		Tcl_WrongNumArgs(interp, 2, objv, "?encoding? data");
+		return TCL_ERROR;
+	    }
+	    
+	    encoding = Tcl_GetEncoding(interp, name);
+	    if (!encoding) {
+		return TCL_ERROR;
+	    }
+
+	    if ((enum options) index == ENC_CONVERTFROM) {
+		/*
+		 * Treat the string as binary data.
+		 */
+
+		string = (char *) Tcl_GetByteArrayFromObj(data, &length);
+		Tcl_ExternalToUtfDString(encoding, string, length, &ds);
+		Tcl_DStringResult(interp, &ds);
+	    } else {
+		/*
+		 * Store the result as binary data.
+		 */
+
+		string = Tcl_GetStringFromObj(data, &length);
+		Tcl_UtfToExternalDString(encoding, string, length, &ds);
+		resultPtr = Tcl_GetObjResult(interp);
+		Tcl_SetByteArrayObj(resultPtr, 
+			(unsigned char *) Tcl_DStringValue(&ds),
+			Tcl_DStringLength(&ds));
+		Tcl_DStringFree(&ds);
+	    }
+
+	    Tcl_FreeEncoding(encoding);
+	    break;
+	}
+	case ENC_NAMES: {
+	    if (objc > 2) {
+		Tcl_WrongNumArgs(interp, 2, objv, NULL);
+		return TCL_ERROR;
+	    }
+	    Tcl_GetEncodingNames(interp);
+	    break;
+	}
+	case ENC_SYSTEM: {
+	    if (objc > 3) {
+		Tcl_WrongNumArgs(interp, 2, objv, "?encoding?");
+		return TCL_ERROR;
+	    }
+	    if (objc == 2) {
+	        Tcl_SetResult(interp, Tcl_GetEncodingName(NULL), TCL_STATIC);
+	    } else {
+	        return Tcl_SetSystemEncoding(interp,
+			Tcl_GetStringFromObj(objv[2], NULL));
+	    }
+	    break;
+	}
+    }
+    return TCL_OK;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
  * Tcl_ErrorObjCmd --
  *
  *	This procedure is invoked to process the "error" Tcl command.
diff --git a/generic/tclCmdIL.c b/generic/tclCmdIL.c
index f47fb1e..6b4cc39 100644
--- a/generic/tclCmdIL.c
+++ b/generic/tclCmdIL.c
@@ -13,12 +13,13 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclCmdIL.c,v 1.1.2.2 1998/09/24 23:58:42 stanton Exp $
+ * RCS: @(#) $Id: tclCmdIL.c,v 1.1.2.3 1998/10/21 20:40:04 stanton Exp $
  */
 
 #include "tclInt.h"
 #include "tclPort.h"
 #include "tclCompile.h"
+#include "tclRegexp.h"
 
 /*
  * During execution of the "lsort" command, structures of the following
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index 8a3b6d5..9f46efc 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -12,12 +12,13 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclCmdMZ.c,v 1.1.2.3 1998/10/16 01:16:57 stanton Exp $
+ * RCS: @(#) $Id: tclCmdMZ.c,v 1.1.2.4 1998/10/21 20:40:05 stanton Exp $
  */
 
 #include "tclInt.h"
 #include "tclPort.h"
 #include "tclCompile.h"
+#include "tclRegexp.h"
 
 /*
  * Structure used to hold information about variable traces:
@@ -108,20 +109,26 @@ Tcl_RegexpObjCmd(dummy, interp, objc, objv)
     int objc;				/* Number of arguments. */
     Tcl_Obj *CONST objv[];		/* Argument objects. */
 {
-    int i, result, indices, flags, stringLength, wLen, match;
+    int i, result, indices, stringLength, wLen, match, about;
+    int hasxflags, cflags, eflags;
     Tcl_RegExp regExpr;
     char *string;
     Tcl_DString stringBuffer, valueBuffer;
     Tcl_UniChar *wStart;
     static char *options[] = {
-	"-indices",	"-nocase",	"--",		(char *) NULL
+	"-indices",	"-nocase",	"-about",	"-expanded",
+	"-unsupported0",	"--",		(char *) NULL
     };
     enum options {
-	REGEXP_INDICES, REGEXP_NOCASE,	REGEXP_LAST
+	REGEXP_INDICES, REGEXP_NOCASE,	REGEXP_ABOUT,	REGEXP_EXPANDED,
+	REGEXP_XFLAGS,		REGEXP_LAST
     };
 
     indices = 0;
-    flags = 0;
+    about = 0;
+    cflags = REG_ADVANCED;
+    eflags = 0;
+    hasxflags = 0;
     
     for (i = 1; i < objc; i++) {
 	char *name;
@@ -141,7 +148,19 @@ Tcl_RegexpObjCmd(dummy, interp, objc, objv)
 		break;
 	    }
 	    case REGEXP_NOCASE: {
-		flags |= REG_ICASE;
+		cflags |= REG_ICASE;
+		break;
+	    }
+	    case REGEXP_ABOUT: {
+		about = 1;
+		break;
+	    }
+	    case REGEXP_EXPANDED: {
+		cflags |= REG_EXPANDED;
+		break;
+	    }
+	    case REGEXP_XFLAGS: {
+		hasxflags = 1;
 		break;
 	    }
 	    case REGEXP_LAST: {
@@ -152,7 +171,7 @@ Tcl_RegexpObjCmd(dummy, interp, objc, objv)
     }
 
     endOfForLoop:
-    if (objc - i < 2) {
+    if (objc - i < hasxflags + 2 - about) {
 	Tcl_WrongNumArgs(interp, 1, objv,
 		"?switches? exp string ?matchVar? ?subMatchVar subMatchVar ...?");
 	return TCL_ERROR;
@@ -160,11 +179,25 @@ Tcl_RegexpObjCmd(dummy, interp, objc, objv)
     objc -= i;
     objv += i;
 
-    regExpr = TclRegCompObj(interp, objv[0], flags | REG_ADVANCED);
+    if (hasxflags) {
+	string = Tcl_GetStringFromObj(objv[0], &stringLength);
+	TclRegXflags(string, stringLength, &cflags, &eflags);
+	objc--;
+	objv++;
+    }
+
+    regExpr = TclRegCompObj(interp, objv[0], cflags);
     if (regExpr == NULL) {
 	return TCL_ERROR;
     }
 
+    if (about) {
+	if (TclRegAbout(interp, regExpr) < 0) {
+	    return TCL_ERROR;
+	}
+	return TCL_OK;
+    }
+
     result = TCL_OK;
     string = Tcl_GetStringFromObj(objv[1], &stringLength);
 
@@ -174,7 +207,7 @@ Tcl_RegexpObjCmd(dummy, interp, objc, objv)
     wStart = TclUtfToUniCharDString(string, stringLength, &stringBuffer);
     wLen = Tcl_DStringLength(&stringBuffer) / sizeof(Tcl_UniChar);
 
-    match = TclRegExpExecUniChar(interp, regExpr, wStart, wLen, 0);
+    match = TclRegExpExecUniChar(interp, regExpr, wStart, wLen, eflags);
     if (match < 0) {
 	result = TCL_ERROR;
 	goto done;
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 9b3f18d..06da42e 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -8,7 +8,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclEncoding.c,v 1.1.2.2 1998/10/03 01:56:41 stanton Exp $
+ * RCS: @(#) $Id: tclEncoding.c,v 1.1.2.3 1998/10/21 20:40:05 stanton Exp $
  */
 
 #include "tclInt.h"
@@ -136,8 +136,8 @@ typedef struct EscapeEncodingData {
 #define ENCODING_ESCAPE		3
 
 /*
- * Hash table that keeps track of all loaded TextEncodings.  Keys are
- * the string names that represent the encoding, values are (TextEncoding *).
+ * Hash table that keeps track of all loaded Encodings.  Keys are
+ * the string names that represent the encoding, values are (Encoding *).
  */
  
 static Tcl_HashTable encodingTable;
@@ -277,6 +277,23 @@ TclInitEncodingSubsystem()
     Tcl_CreateEncoding(&type);
 }
 
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * TclFinalizeEncodingSubsystem --
+ *
+ *	Release the state associated with the encoding subsystem.
+ *
+ * Results:
+ *	None.
+ *
+ * Side effects:
+ *	Frees all of the encodings.
+ *
+ *----------------------------------------------------------------------
+ */
+
 void
 TclFinalizeEncodingSubsystem()
 {
@@ -515,6 +532,11 @@ Tcl_GetEncodingNames(interp)
 	Tcl_DStringFree(&pwdString);
     }
 
+    /*
+     * Clear any values placed in the result by globbing.
+     */
+
+    Tcl_ResetResult(interp);
     resultPtr = Tcl_GetObjResult(interp);
 
     hPtr = Tcl_FirstHashEntry(&table, &search);
@@ -573,9 +595,9 @@ Tcl_SetSystemEncoding(interp, name)
 	    return TCL_ERROR;
 	}
     }
-    Tcl_FreeEncoding(systemEncoding);
 
     Tcl_MutexLock(&encodingMutex);
+    Tcl_FreeEncoding(systemEncoding);
     systemEncoding = encoding;
     Tcl_MutexUnlock(&encodingMutex);
 
@@ -1009,7 +1031,7 @@ LoadEncodingFile(interp, name)
 
     pathPtr = TclGetLibraryPath();
     if (pathPtr == NULL) {
-	return NULL;
+	goto unknown;
     }
     objc = 0;
     Tcl_ListObjGetElements(NULL, pathPtr, &objc, &objv);
@@ -1023,10 +1045,7 @@ LoadEncodingFile(interp, name)
     }
 
     if (chan == NULL) {
-	if (interp != NULL) {
-	    Tcl_AppendResult(interp, "unknown encoding \"", name, "\"", NULL);
-	}
-	return NULL;
+	goto unknown;
     }
 
     Tcl_SetChannelOption(NULL, chan, "-encoding", "utf-8");
@@ -1070,7 +1089,30 @@ LoadEncodingFile(interp, name)
     }
     Tcl_Close(NULL, chan);
     return encoding;
+
+    unknown:
+    if (interp != NULL) {
+	Tcl_AppendResult(interp, "unknown encoding \"", name, "\"", NULL);
+    }
+    return NULL;
 }
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * OpenEncodingFile --
+ *
+ *	Look for the file encoding/<name>.enc in the specified
+ *	directory.
+ *
+ * Results:
+ *	Returns an open file channel if the file exists.
+ *
+ * Side effects:
+ *	None.
+ *
+ *----------------------------------------------------------------------
+ */
 
 static Tcl_Channel
 OpenEncodingFile(dir, name)
diff --git a/generic/tclFileName.c b/generic/tclFileName.c
index 01fefa7..55832ab 100644
--- a/generic/tclFileName.c
+++ b/generic/tclFileName.c
@@ -9,11 +9,12 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclFileName.c,v 1.1.2.3 1998/10/06 00:36:56 stanton Exp $
+ * RCS: @(#) $Id: tclFileName.c,v 1.1.2.4 1998/10/21 20:40:05 stanton Exp $
  */
 
 #include "tclInt.h"
 #include "tclPort.h"
+#include "tclRegexp.h"
 
 /*
  * This variable indicates whether the cleanup procedure has been
diff --git a/generic/tclInt.h b/generic/tclInt.h
index 140a2eb..0babdfd 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -10,7 +10,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclInt.h,v 1.1.2.6 1998/10/16 01:16:57 stanton Exp $
+ * RCS: @(#) $Id: tclInt.h,v 1.1.2.7 1998/10/21 20:40:06 stanton Exp $
  */
 
 #ifndef _TCLINT
@@ -774,48 +774,6 @@ typedef struct MathFunc {
 } MathFunc;
 
 /*
- *---------------------------------------------------------------------------
- * Definitions of flags used in regexp compilation and execution that need
- * to be visible to the rest of the Tcl core.  Definitions that are
- * entirely private to the regexp package live in tclRegexp.h.
- *---------------------------------------------------------------------------
- */
-
-/*
- *Compilation flags.
- */
-
-#define	REG_BASIC	000000	/* BREs (convenience) */
-#define	REG_EXTENDED	000001	/* EREs */
-#define	REG_ADVF	000002	/* advanced features in EREs */
-#define	REG_ADVANCED	000003	/* AREs (which are also EREs) */
-#define	REG_QUOTE	000004	/* no special characters, none */
-#define	REG_NOSPEC	REG_QUOTE	/* historical synonym */
-#define	REG_ICASE	000010	/* ignore case */
-#define	REG_NOSUB	000020	/* don't care about subexpressions */
-#define	REG_EXPANDED	000040	/* expanded format, white space & comments */
-#define	REG_NLSTOP	000100	/* \n doesn't match . or [^ ] */
-#define	REG_NLANCH	000200	/* ^ matches after \n, $ before */
-#define	REG_NEWLINE	000300	/* newlines are line terminators */
-
-/*
- * Execution flags.
- */
-
-#define	REG_NOTBOL	0001	/* BOS is not BOL */
-#define	REG_NOTEOL	0002	/* EOS is not EOL */
-
-EXTERN Tcl_RegExp	TclRegCompObj _ANSI_ARGS_((Tcl_Interp *interp,
-			    Tcl_Obj *patObj, int flags));
-EXTERN int		TclRegExpExecUniChar _ANSI_ARGS_((Tcl_Interp *interp,
-			    Tcl_RegExp re, CONST Tcl_UniChar *uniString,
-			    int numChars, int flags));
-EXTERN int		TclRegExpMatchObj _ANSI_ARGS_((Tcl_Interp *interp,
-			    char *string, Tcl_Obj *patObj));
-EXTERN void		TclRegExpRangeUniChar _ANSI_ARGS_((Tcl_RegExp re,
-			    int index, int *startPtr, int *endPtr));
-
-/*
  * Threads support.
  * These routines are used to implement Tcl_GetThreadData.
  */
@@ -2161,6 +2119,8 @@ EXTERN int	Tcl_ConcatObjCmd _ANSI_ARGS_((ClientData clientData,
 		    Tcl_Interp *interp, int objc, Tcl_Obj *CONST objv[]));
 EXTERN int	Tcl_ContinueObjCmd _ANSI_ARGS_((ClientData clientData,
 		    Tcl_Interp *interp, int objc, Tcl_Obj *CONST objv[]));
+EXTERN int	Tcl_EncodingObjCmd _ANSI_ARGS_((ClientData clientData,
+		    Tcl_Interp *interp, int objc, Tcl_Obj *CONST objv[]));
 EXTERN int	Tcl_EofObjCmd _ANSI_ARGS_((ClientData clientData,
 		    Tcl_Interp *interp, int objc, Tcl_Obj *CONST objv[]));
 EXTERN int	Tcl_ErrorObjCmd _ANSI_ARGS_((ClientData clientData,
diff --git a/generic/tclRegexp.c b/generic/tclRegexp.c
index 44b575c..d65b19a 100644
--- a/generic/tclRegexp.c
+++ b/generic/tclRegexp.c
@@ -4,12 +4,13 @@
  *	This file contains the public interfaces to the Tcl regular
  *	expression mechanism.
  *
+ * Copyright (c) 1998 by Scriptics Corporation.
  * Copyright (c) 1998 by Sun Microsystems, Inc.
  *
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclRegexp.c,v 1.1.2.2 1998/10/03 01:56:41 stanton Exp $
+ * RCS: @(#) $Id: tclRegexp.c,v 1.1.2.3 1998/10/21 20:40:06 stanton Exp $
  */
 
 #include "tclInt.h"
@@ -337,6 +338,7 @@ TclRegExpExecUniChar(interp, re, wString, numChars, flags)
     TclRegexp *regexpPtr = (TclRegexp *) re;
 
     status = re_uexec(&regexpPtr->re, wString, (size_t) numChars,
+	    (rm_detail_t *)NULL,
 	    regexpPtr->re.re_nsub + 1, regexpPtr->matches, flags);
 
     /*
@@ -528,6 +530,83 @@ TclRegCompObj(interp, objPtr, flags)
 /*
  *----------------------------------------------------------------------
  *
+ * TclRegAbout --
+ *
+ *	Return information about a compiled regular expression.
+ *
+ * Results:
+ *	The return value is -1 for failure, 0 for success, although at
+ *	the moment there's nothing that could fail.  On success, a list
+ *	is left in the interp's result:  first element is the subexpression
+ *	count, second is a list of re_info bit names.
+ *
+ * Side effects:
+ *	None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+TclRegAbout(interp, re)
+    Tcl_Interp *interp;		/* For use in variable assignment. */
+    Tcl_RegExp re;		/* The compiled regular expression. */
+{
+    TclRegexp *regexpPtr = (TclRegexp *)re;
+    char buf[TCL_INTEGER_SPACE];
+    static struct infoname {
+	int bit;
+	char *text;
+    } infonames[] = {
+	REG_UBACKREF,		"REG_UBACKREF",
+	REG_ULOOKAHEAD,		"REG_ULOOKAHEAD",
+	REG_UBOUNDS,		"REG_UBOUNDS",
+	REG_UBRACES,		"REG_UBRACES",
+	REG_UBSALNUM,		"REG_UBSALNUM",
+	REG_UPBOTCH,		"REG_UPBOTCH",
+	REG_UBBS,		"REG_UBBS",
+	REG_UNONPOSIX,		"REG_UNONPOSIX",
+	REG_UUNSPEC,		"REG_UUNSPEC",
+	REG_UUNPORT,		"REG_UUNPORT",
+	REG_ULOCALE,		"REG_ULOCALE",
+	REG_UEMPTYMATCH,	"REG_UEMPTYMATCH",
+	0,			"",
+    };
+    struct infoname *inf;
+    int n;
+
+    Tcl_ResetResult(interp);
+
+    sprintf(buf, "%u", (unsigned)(regexpPtr->re.re_nsub));
+    Tcl_AppendElement(interp, buf);
+
+    /*
+     * Must count bits before generating list, because we must know
+     * whether {} are needed before we start appending names.
+     */
+    n = 0;
+    for (inf = infonames; inf->bit != 0; inf++) {
+	if (regexpPtr->re.re_info&inf->bit) {
+	    n++;
+	}
+    }
+    if (n != 1) {
+	Tcl_AppendResult(interp, " {", NULL);
+    }
+    for (inf = infonames; inf->bit != 0; inf++) {
+	if (regexpPtr->re.re_info&inf->bit) {
+	    Tcl_AppendElement(interp, inf->text);
+	}
+    }
+    if (n != 1) {
+	Tcl_AppendResult(interp, "}", NULL);
+    }
+
+    return 0;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
  * TclRegError --
  *
  *	Generate an error message based on the regexp status code.
@@ -536,7 +615,7 @@ TclRegCompObj(interp, objPtr, flags)
  *	Places an error in the interpreter.
  *
  * Side effects:
- *	None.
+ *	Sets errorCode as well.
  *
  *----------------------------------------------------------------------
  */
@@ -547,66 +626,19 @@ TclRegError(interp, msg, status)
     char *msg;			/* Message to prepend to error. */
     int status;			/* Status code to report. */
 {
-    char *errMsg;
-
-    switch(status) {
-	case REG_BADPAT:
-	    errMsg = "invalid regular expression";
-	    break;
-	case REG_ECOLLATE:
-	    errMsg = "invalid collating element";
-	    break;
-	case REG_ECTYPE:
-	    errMsg = "invalid character class";
-	    break;
-	case REG_EESCAPE:
-	    errMsg = "invalid escape sequence";
-	    break;
-	case REG_ESUBREG:
-	    errMsg = "invalid backreference number";
-	    break;
-	case REG_EBRACK:
-	    errMsg = "unmatched []";
-	    break;
-	case REG_EPAREN:
-	    errMsg = "unmatched ()";
-	    break;
-	case REG_EBRACE:
-	    errMsg = "unmatched {}";
-	    break;
-	case REG_BADBR:
-	    errMsg = "invalid repetition count(s)";
-	    break;
-	case REG_ERANGE:
-	    errMsg = "invalid character range";
-	    break;
-	case REG_ESPACE:
-	    errMsg = "out of memory";
-	    break;
-	case REG_BADRPT:
-	    errMsg = "?+* follows nothing";
-	    break;
-	case REG_ASSERT:
-	    errMsg = "\"can't happen\" -- you found a bug";
-	    break;
-	case REG_INVARG:
-	    errMsg = "invalid argument to regex routine";
-	    break;
-	case REG_MIXED:
-	    errMsg = "char RE applied to wchar_t string (etc.)";
-	    break;
-	case REG_BADOPT:
-	    errMsg = "invalid embedded option";
-	    break;
-	case REG_IMPOSS:
-	    errMsg = "can never match";
-	    break;
-	default:
-	    errMsg = "\"can't happen\" -- you found an undefined error code";
-	    break;
-    }
+    char buf[100];		/* ample in practice */
+    char cbuf[100];		/* lots in practice */
+    size_t n;
+    char *p;
+
     Tcl_ResetResult(interp);
-    Tcl_AppendResult(interp, msg, errMsg, NULL);
+    n = regerror(status, (regex_t *)NULL, buf, sizeof(buf));
+    p = (n > sizeof(buf)) ? "..." : "";
+    Tcl_AppendResult(interp, msg, buf, p, NULL);
+
+    sprintf(cbuf, "%d", status);
+    (VOID) regerror(REG_ITOA, (regex_t *)NULL, cbuf, sizeof(cbuf));
+    Tcl_SetErrorCode(interp, "REGEXP", cbuf, buf, NULL);
 }
 
 
@@ -749,36 +781,15 @@ CompileRegexp(interp, string, length, flags)
 
     if (status != REG_OKAY) {
 	/*
-	 * Warning, the following is a hack to allow empty regexp.
-	 * The goal is to compile a non-empty regexp that will always
-	 * find one empty match.  If you use "(?:)" (an empty pair of
-	 * non-capturing parentheses) instead, that will avoid both the
-	 * overhead and the subexpression report.
-	 */
-	
-	if (status == REG_EMPTY) {
-	    static Tcl_UniChar uniEmpty[] = {'(', '?', ':', ')', '\0'};
-	    
-	    uniString = uniEmpty;
-	    numChars = 4;
-	    status = re_ucomp(&regexpPtr->re, uniString, (size_t) numChars,
-		    REG_ADVANCED);
-	}
-
-	/*
 	 * Clean up and report errors in the interpreter, if possible.
 	 */
-
-	if (status != REG_OKAY) {
-	    regfree(&regexpPtr->re);
-	    ckfree((char *)regexpPtr);
-	    if (interp) {
-		TclRegError(interp,
-			"couldn't compile regular expression pattern: ",
-			status);
-	    }
-	    return NULL;
+	ckfree((char *)regexpPtr);
+	if (interp) {
+	    TclRegError(interp,
+		    "couldn't compile regular expression pattern: ",
+		    status);
 	}
+	return NULL;
     }
 
     /*
@@ -791,4 +802,100 @@ CompileRegexp(interp, string, length, flags)
 
     return regexpPtr;
 }
+
+/*
+ *---------------------------------------------------------------------------
+ *
+ * TclRegXflags --
+ *
+ *	Parse a string of extended regexp flag letters, for testing.
+ *
+ * Results:
+ *	No return value (you're on your own for errors here).
+ *
+ * Side effects:
+ *	Modifies *cflagsPtr, a regcomp flags word, and *eflagsPtr, a
+ *	regexec flags word, as appropriate.
+ *
+ *----------------------------------------------------------------------
+ */
 
+VOID
+TclRegXflags(string, length, cflagsPtr, eflagsPtr)
+    char *string;		/* The string of flags. */
+    int length;			/* The length of the string in bytes. */
+    int *cflagsPtr;		/* compile flags word */
+    int *eflagsPtr;		/* exec flags word */
+{
+    int i;
+    int cflags;
+    int eflags;
+
+    cflags = *cflagsPtr;
+    eflags = *eflagsPtr;
+    for (i = 0; i < length; i++) {
+	switch (string[i]) {
+	    case 'a': {
+		cflags |= REG_ADVF;
+		break;
+	    }
+	    case 'b': {
+		cflags &= ~REG_ADVANCED;
+		break;
+	    }
+	    case 'e': {
+		cflags &= ~REG_ADVANCED;
+		cflags |= REG_EXTENDED;
+		break;
+	    }
+	    case 'q': {
+		cflags &= ~REG_ADVANCED;
+		cflags |= REG_QUOTE;
+		break;
+	    }
+	    case 'i': {
+		cflags |= REG_ICASE;
+		break;
+	    }
+	    case 'o': {			/* o for opaque */
+		cflags |= REG_NOSUB;
+		break;
+	    }
+	    case 'x': {
+		cflags |= REG_EXPANDED;
+		break;
+	    }
+	    case 'p': {
+		cflags |= REG_NLSTOP;
+		break;
+	    }
+	    case 'w': {
+		cflags |= REG_NLANCH;
+		break;
+	    }
+	    case 'n': {
+		cflags |= REG_NEWLINE;
+		break;
+	    }
+	    case '+': {
+		cflags |= REG_FAKEEC;
+		break;
+	    }
+	    case '^': {
+		eflags |= REG_NOTBOL;
+		break;
+	    }
+	    case '$': {
+		eflags |= REG_NOTEOL;
+		break;
+	    }
+	    case '%': {
+		eflags |= REG_SMALL;
+		break;
+	    }
+	}
+    }
+
+    *cflagsPtr = cflags;
+    *eflagsPtr = eflags;
+}
diff --git a/generic/tclRegexp.h b/generic/tclRegexp.h
index be5cb77..9e56730 100644
--- a/generic/tclRegexp.h
+++ b/generic/tclRegexp.h
@@ -7,8 +7,9 @@
  * Copyright (c) 1998 Henry Spencer.  All rights reserved.
  * 
  * Development of this software was funded, in part, by Cray Research Inc.,
- * UUNET Communications Services Inc., and Sun Microsystems Inc., none of
- * whom are responsible for the results.  The author thanks all of them.
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and
+ * Scriptics Corporation, none of whom are responsible for the results.
+ * The author thanks all of them.
  * 
  * Redistribution and use in source and binary forms -- with or without
  * modification -- are permitted for any purpose, provided that
@@ -26,20 +27,19 @@
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  * 
+ * Copyright (c) 1998 by Scriptics Corporation.
  * Copyright (c) 1998 by Sun Microsystems, Inc.
  *
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclRegexp.h,v 1.1.2.2 1998/09/24 23:59:02 stanton Exp $
+ * RCS: @(#) $Id: tclRegexp.h,v 1.1.2.3 1998/10/21 20:40:06 stanton Exp $
  */
 
 #ifndef _TCLREGEXP
 #define _TCLREGEXP
 
-#ifndef _TCLINT
-#include "tclInt.h"
-#endif
+#include "regcustom.h"
 
 #ifdef BUILD_tcl
 # undef TCL_STORAGE_CLASS
@@ -47,190 +47,6 @@
 #endif
 
 /*
- * The following definitions were culled from wctype.h and wchar.h.
- * Those two header files are now gone.  Eventually we should replace all
- * instances of, e.g., iswalnum() with TclUniCharIsAlnum() in the regexp
- * code.
- */
- 
-#undef wint_t
-#define wint_t int
-
-#undef WEOF
-#undef WCHAR_MIN
-#undef WCHAR_MAX
-
-#define WEOF		-1
-#define WCHAR_MIN	0x0000
-#define WCHAR_MAX	0xffff
-
-#undef iswalnum
-#undef iswalpha
-#undef iswdigit
-#undef iswspace
-
-#define	iswalnum(x)	TclUniCharIsAlnum(x)
-#define	iswalpha(x)	TclUniCharIsAlpha(x)
-#define	iswdigit(x)	TclUniCharIsDigit(x)
-#define	iswspace(x)	TclUniCharIsSpace(x)
-
-#undef wcslen
-#undef wcsncmp
-
-#define	wcslen		TclUniCharLen
-#define	wcsncmp		TclUniCharNcmp
-
-/*
- * The following definitions were added by JO to make Tcl compile
- * under SunOS, where off_t and wchar_t aren't defined; perhaps all of
- * the code below can be collapsed into a few simple definitions?
- */
-
-#ifndef __RE_REGOFF_T
-#   define __RE_REGOFF_T int
-#endif
-#ifndef __RE_WCHAR_T
-#   define __RE_WCHAR_T Tcl_UniChar
-#endif
-
-/*
- * regoff_t has to be large enough to hold either off_t or ssize_t,
- * and must be signed; it's only a guess that off_t is big enough, so we
- * offer an override.
- */
-#ifdef __RE_REGOFF_T
-typedef __RE_REGOFF_T regoff_t;		/* offset type for result reporting */
-#else
-typedef off_t regoff_t;
-#endif
-
-/*
- * We offer the option of using a non-wchar_t type in the w prototypes so
- * that <regex.h> can be included without first including (e.g.) <wchar.h>.
- * Note that __RE_WCHAR_T must in fact be the same type as wchar_t!
- */
-#ifdef __RE_WCHAR_T
-typedef __RE_WCHAR_T re_wchar;	/* internal name for the type */
-#else
-typedef wchar_t re_wchar;
-#endif
-
-#define	REMAGIC	0xfed7
-
-/*
- * other interface types
- */
-
-/* the biggie, a compiled RE (or rather, a front end to same) */
-typedef struct {
-	int re_magic;		/* magic number */
-	size_t re_nsub;		/* number of subexpressions */
-	int re_info;		/* information about RE */
-#		define	REG_UBACKREF		000001
-#		define	REG_ULOOKAHEAD		000002
-#		define	REG_UBOUNDS		000004
-#		define	REG_UBRACES		000010
-#		define	REG_UBSALNUM		000020
-#		define	REG_UPBOTCH		000040
-#		define	REG_UBBS		000100
-#		define	REG_UNONPOSIX		000200
-#		define	REG_UUNSPEC		000400
-#		define	REG_UUNPORT		001000
-#		define	REG_ULOCALE		002000
-#		define	REG_UEMPTYMATCH		004000
-	int re_csize;		/* sizeof(character) */
-	VOID *re_guts;		/* none of your business :-) */
-	VOID *re_fns;		/* none of your business :-) */
-} regex_t;
-
-/* result reporting (may acquire more fields later) */
-typedef struct {
-	regoff_t rm_so;		/* start of substring */
-	regoff_t rm_eo;		/* end of substring */
-} regmatch_t;
-
-
-
-/*
- * compilation
- ^ int regcomp(regex_t *, const char *, int);
- ^ int re_comp(regex_t *, const char *, size_t, int);
- ^ #ifndef __RE_NOWIDE
- ^ int re_wcomp(regex_t *, const re_wchar *, size_t, int);
- ^ #endif
- */
-
-#define	REG_DUMP	004000	/* none of your business :-) */
-#define	REG_FAKE	010000	/* none of your business :-) */
-#define	REG_PROGRESS	020000	/* none of your business :-) */
-
-
-
-/*
- * execution
- ^ int regexec(regex_t *, const char *, size_t, regmatch_t [], int);
- ^ int re_exec(regex_t *, const char *, size_t, size_t, regmatch_t [], int);
- ^ #ifndef __RE_NOWIDE
- ^ int re_wexec(regex_t *, const re_wchar *, size_t, size_t, regmatch_t [], int);
- ^ #endif
- */
-#define	REG_FTRACE	0010	/* none of your business */
-#define	REG_MTRACE	0020	/* none of your business */
-#define	REG_SMALL	0040	/* none of your business */
-
-/*
- * error reporting
- * Be careful if modifying the list of error codes -- the table used by
- * regerror() is generated automatically from this file!
- *
- * Note that there is no wchar_t variant of regerror at this time; what
- * kind of character is used for error reports is independent of what kind
- * is used in matching.
- *
- ^ extern size_t regerror(int, const regex_t *, char *, size_t);
- */
-#define	REG_OKAY	 0	/* no errors detected */
-#define	REG_NOMATCH	 1	/* regexec() failed to match */
-#define	REG_BADPAT	 2	/* invalid regular expression */
-#define	REG_ECOLLATE	 3	/* invalid collating element */
-#define	REG_ECTYPE	 4	/* invalid character class */
-#define	REG_EESCAPE	 5	/* invalid escape \ sequence */
-#define	REG_ESUBREG	 6	/* invalid backreference number */
-#define	REG_EBRACK	 7	/* brackets [] not balanced */
-#define	REG_EPAREN	 8	/* parentheses () not balanced */
-#define	REG_EBRACE	 9	/* braces {} not balanced */
-#define	REG_BADBR	10	/* invalid repetition count(s) */
-#define	REG_ERANGE	11	/* invalid character range */
-#define	REG_ESPACE	12	/* out of memory */
-#define	REG_BADRPT	13	/* quantifier operand invalid */
-#define	REG_EMPTY	14	/* empty regular expression */
-#define	REG_ASSERT	15	/* "can't happen" -- you found a bug */
-#define	REG_INVARG	16	/* invalid argument to regex routine */
-#define	REG_MIXED	17	/* char RE applied to wchar_t string (etc.) */
-#define	REG_BADOPT	18	/* invalid embedded option */
-#define	REG_IMPOSS	19	/* can never match */
-/* two specials for debugging and testing */
-#define	REG_ATOI	101	/* convert error-code name to number */
-#define	REG_ITOA	102	/* convert error-code number to name */
-
-
-
-/*
- * the prototypes, as possibly munched by fwd
- */
-/* =====^!^===== begin forwards =====^!^===== */
-/* automatically gathered by fwd; do not hand-edit */
-/* === regex.h === */
-EXTERN int	re_ucomp _ANSI_ARGS_((regex_t *, const Tcl_UniChar *,
-		    size_t, int));
-EXTERN int	re_uexec _ANSI_ARGS_((regex_t *, const Tcl_UniChar *,
-		    size_t, size_t, regmatch_t [], int));
-EXTERN VOID	regfree _ANSI_ARGS_((regex_t *));
-EXTERN size_t	regerror _ANSI_ARGS_((int, const regex_t *, char *, size_t));
-/* automatically gathered by fwd; do not hand-edit */
-/* =====^!^===== end forwards =====^!^===== */
-
-/*
  * The TclRegexp structure encapsulates a compiled regex_t,
  * the flags that were used to compile it, and an array of pointers
  * that are used to indicate subexpressions after a call to Tcl_RegExpExec.
@@ -251,6 +67,24 @@ typedef struct TclRegexp {
 } TclRegexp;
 
 /*
+ * Functions exported for use within the rest of Tcl.
+ */
+
+EXTERN Tcl_RegExp	TclRegCompObj _ANSI_ARGS_((Tcl_Interp *interp,
+			    Tcl_Obj *patObj, int flags));
+EXTERN int		TclRegAbout _ANSI_ARGS_((Tcl_Interp *interp,
+			    Tcl_RegExp re));
+EXTERN VOID		TclRegXflags _ANSI_ARGS_((char *string, int length,
+			    int *cflagsPtr, int *eflagsPtr));
+EXTERN int		TclRegExpExecUniChar _ANSI_ARGS_((Tcl_Interp *interp,
+			    Tcl_RegExp re, CONST Tcl_UniChar *uniString,
+			    int numChars, int flags));
+EXTERN int		TclRegExpMatchObj _ANSI_ARGS_((Tcl_Interp *interp,
+			    char *string, Tcl_Obj *patObj));
+EXTERN void		TclRegExpRangeUniChar _ANSI_ARGS_((Tcl_RegExp re,
+			    int index, int *startPtr, int *endPtr));
+
+/*
  * Functions exported from the regexp package for the test package to use.
  */
 
@@ -258,8 +92,3 @@ EXTERN void		TclRegError _ANSI_ARGS_((Tcl_Interp *interp, char *msg,
 			    int status));
 
 #endif /* _TCLREGEXP */
-
-
-
-
-
diff --git a/generic/tclTest.c b/generic/tclTest.c
index 8da6785..2136b7c 100644
--- a/generic/tclTest.c
+++ b/generic/tclTest.c
@@ -12,14 +12,13 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclTest.c,v 1.1.2.2 1998/09/24 23:59:02 stanton Exp $
+ * RCS: @(#) $Id: tclTest.c,v 1.1.2.3 1998/10/21 20:40:07 stanton Exp $
  */
 
 #define TCL_TEST
 
 #include "tclInt.h"
 #include "tclPort.h"
-#include "tclRegexp.h"		/* To test internals of regexp package. */
 #include <locale.h>
 
 /*
@@ -245,9 +244,6 @@ static int		TestparsevarObjCmd _ANSI_ARGS_((ClientData dummy,
 static int		TestparsevarnameObjCmd _ANSI_ARGS_((ClientData dummy,
 			    Tcl_Interp *interp, int objc,
 			    Tcl_Obj *CONST objv[]));
-static int		TestregexpObjCmd _ANSI_ARGS_((ClientData dummy,
-			    Tcl_Interp *interp, int objc,
-			    Tcl_Obj *CONST objv[]));
 static int		TestsaveresultCmd _ANSI_ARGS_((ClientData dummy,
 			    Tcl_Interp *interp, int objc,
 			    Tcl_Obj *CONST objv[]));
@@ -398,8 +394,6 @@ Tcltest_Init(interp)
 	    (ClientData) 0, (Tcl_CmdDeleteProc *) NULL);
     Tcl_CreateObjCommand(interp, "testparsevarname", TestparsevarnameObjCmd,
 	    (ClientData) 0, (Tcl_CmdDeleteProc *) NULL);
-    Tcl_CreateObjCommand(interp, "testregexp", TestregexpObjCmd,
-	    (ClientData) 0, (Tcl_CmdDeleteProc *) NULL);
     Tcl_CreateObjCommand(interp, "testsaveresult", TestsaveresultCmd,
 	    (ClientData) 0, (Tcl_CmdDeleteProc *) NULL);
     Tcl_CreateCommand(interp, "testsetassocdata", TestsetassocdataCmd,
@@ -1317,19 +1311,15 @@ TestencodingObjCmd(dummy, interp, objc, objv)
     Tcl_Obj *CONST objv[];	/* Argument objects. */
 {
     Tcl_Encoding encoding;
-    Tcl_DString ds;
     int index, length;
     char *string;
-    Tcl_Obj *resultPtr;
     TclEncoding *encodingPtr;
     static char *optionStrings[] = {
-	"create",	"delete",	"toutf",	"fromutf",
-	"names",	"system",	"path",
+	"create",	"delete",	"path",
 	NULL
     };
     enum options {
-	ENC_CREATE,	ENC_DELETE,	ENC_TOUTF,	ENC_FROMUTF,
-	ENC_NAMES,	ENC_SYSTEM,	ENC_PATH
+	ENC_CREATE,	ENC_DELETE,	ENC_PATH
     };
     
     if (Tcl_GetIndexFromObj(interp, objv[1], optionStrings, "option", 0,
@@ -1376,79 +1366,6 @@ TestencodingObjCmd(dummy, interp, objc, objv)
 	    Tcl_FreeEncoding(encoding);
 	    break;
 	}
-	case ENC_TOUTF: {
-	    if (objc < 3) {
-		return TCL_ERROR;
-	    }
-	    if (objc == 3) {
-		string = "iso8859-1";
-	    } else {
-		string = Tcl_GetString(objv[3]);
-	    }
-	    encoding = Tcl_GetEncoding(NULL, string);
-
-	    string = (char *) Tcl_GetByteArrayFromObj(objv[2], &length);
-	    Tcl_ExternalToUtfDString(encoding, string, length, &ds);
-
-	    /*
-	     * If the encoding performs a Tcl_Eval() (which is the case for
-	     * encodings created by the "encoding create" command, the 
-	     * resultPtr from the interp will be invalidated and we need to 
-	     * get it again.
-	     */
-
-	    resultPtr = Tcl_GetObjResult(interp);
-	    Tcl_SetStringObj(resultPtr, Tcl_DStringValue(&ds),
-		    Tcl_DStringLength(&ds));
-	    Tcl_DStringFree(&ds);
-	    Tcl_FreeEncoding(encoding);
-	    break;
-	}
-	case ENC_FROMUTF: {
-	    if (objc < 3) {
-		return TCL_ERROR;
-	    }
-	    if (objc == 3) {
-		string = "iso8859-1";
-	    } else {
-		string = Tcl_GetString(objv[3]);
-	    }
-	    encoding = Tcl_GetEncoding(NULL, string);
-
-	    string = Tcl_GetStringFromObj(objv[2], &length);
-	    Tcl_UtfToExternalDString(encoding, string, length, &ds);
-
-	    /*
-	     * If the encoding performs a Tcl_Eval() (which is the case for
-	     * encodings created by the "encoding create" command, the 
-	     * resultPtr from the interp will be invalidated and we need to 
-	     * get it again.
-	     */
-
-	    resultPtr = Tcl_GetObjResult(interp);
-	    Tcl_SetByteArrayObj(resultPtr, 
-		    (unsigned char *) Tcl_DStringValue(&ds),
-		    Tcl_DStringLength(&ds));
-	    Tcl_DStringFree(&ds);
-	    Tcl_FreeEncoding(encoding);
-	    break;
-	}
-
-	case ENC_NAMES: {
-	    Tcl_GetEncodingNames(interp);
-	    break;
-	}
-	case ENC_SYSTEM: {
-	    if (objc == 2) {
-	        Tcl_SetResult(interp, Tcl_GetEncodingName(NULL), TCL_STATIC);
-	    } else {
-		char *str;
-		
-		str = Tcl_GetStringFromObj(objv[2], NULL);
-	        return Tcl_SetSystemEncoding(interp, str);
-	    }
-	    break;
-	}
 	case ENC_PATH: {
 	    if (objc == 2) {
 		Tcl_SetObjResult(interp, TclGetLibraryPath());
@@ -2584,251 +2501,6 @@ TestparsevarnameObjCmd(clientData, interp, objc, objv)
 /*
  *----------------------------------------------------------------------
  *
- * TestregexpObjCmd --
- *
- *	This procedure implements the "testregexp" command. It is
- *	used to give a direct interface for regexp flags.
- *
- * Results:
- *	A standard Tcl result.
- *
- * Side effects:
- *	None.
- *
- *----------------------------------------------------------------------
- */
-
-static int
-TestregexpObjCmd(dummy, interp, objc, objv)
-    ClientData dummy;			/* Not used. */
-    Tcl_Interp *interp;			/* Current interpreter. */
-    int objc;				/* Number of arguments. */
-    Tcl_Obj *CONST objv[];		/* Argument objects. */
-{
-    TclRegexp *regExpr;
-    char *string, *flagString, *start, *end;
-    int flags, match, i, j;
-    
-    if (objc < 4) {
-	Tcl_WrongNumArgs(interp, 1, objv,
-		"flags exp string ?subMatchVar subMatchVar ...?");
-        return TCL_ERROR;
-    }
-    flagString = Tcl_GetString(objv[1]);
-    string = Tcl_GetString(objv[3]);
-
-    flags = RegGetCompFlags(flagString);
-    regExpr = (TclRegexp *) TclRegCompObj(interp, objv[2], flags);
-    if (regExpr == NULL) {
-	return TCL_ERROR;
-    }
-
-    flags = RegGetExecFlags(flagString);
-    if (flags == -1) {
-	/*
-	 * Do not try to match the string.
-	 */
-	
-	match = 0;
-    } else {
-	Tcl_DString stringBuffer;
-	Tcl_UniChar *uniString;
-	int numChars;
-
-	/*
-	 * Remember the UTF-8 string so Tcl_RegExpRange() can convert the
-	 * matches from character to byte offsets.
-	 */
-
-	regExpr->string = string;
-
-	Tcl_DStringInit(&stringBuffer);
-	uniString = TclUtfToUniCharDString(string, -1, &stringBuffer);
-	numChars = Tcl_DStringLength(&stringBuffer) / sizeof(Tcl_UniChar);
-
-	match = TclRegExpExecUniChar(interp, (Tcl_RegExp) regExpr, uniString,
-		numChars, flags);
-	Tcl_DStringFree(&stringBuffer);
-
-	if (match < 0) {
-	    return TCL_ERROR;
-	}
-	if (flags & REG_NOSUB) {
-	    for (i = 0; i <= (int) regExpr->re.re_nsub; i++) {
-		regExpr->matches[i].rm_so = -1;
-		regExpr->matches[i].rm_eo = -1;
-	    } 
-	} 
-    }
-    if (!match) {
-	/*
-	 * Set the interpreter's object result to an integer object w/ value 0. 
-	 */
-	
-	Tcl_SetIntObj(Tcl_GetObjResult(interp), 0);
-	return TCL_OK;
-    }
-
-    /*
-     * If additional variable names have been specified, return
-     * index information in those variables.
-     */
-
-    for (i = 0, j = 4; j < objc; i++, j++) {
-	char *result;
-	char *currentString = Tcl_GetString(objv[j]);
-
-	Tcl_RegExpRange((Tcl_RegExp) regExpr, i, &start, &end);
-	if (start == NULL) {
-	    result = Tcl_SetVar(interp, currentString, "", 0);
-	} else {
-	    char savedChar, *first, *last;
-	    char *tempString = Tcl_GetString(objv[3]);
-	    first = tempString + (start - string);
-	    last = tempString + (end - string);
-	    if (first == last) { /* don't modify argument */
-		result = Tcl_SetVar(interp, currentString, "", 0);
-	    } else {
-		savedChar = *last;
-		*last = 0;
-		result = Tcl_SetVar(interp, currentString, first, 0);
-		*last = savedChar;
-	    }
-	}
-	if (result == NULL) {
-	    Tcl_AppendResult(interp, "couldn't set variable \"",
-		    currentString, "\"", (char *) NULL);
-	    return TCL_ERROR;
-	}
-    }
-
-    /*
-     * Set the interpreter's object result to an integer object w/ value 1. 
-     */
-	
-    Tcl_SetIntObj(Tcl_GetObjResult(interp), 1);
-    return TCL_OK;
-}
-
-/*
- *----------------------------------------------------------------------
- *
- * RegGetCompFlags --
- *
- *	Internal interface to regular expression compile flags.  
- *	Converts a string of chars to a single flag.
- *
- * Results:
- *	Returns a flags for regular expression compilation.
- *
- * Side effects:
- *	None.
- *
- *----------------------------------------------------------------------
- */
-static int
-RegGetCompFlags(s)
-    char *s;
-{
-    char c;
-    register char *p;
-    int result = REG_ADVANCED;
-
-    for (p = s; (c = *p) != '\0'; p++)
-	switch (c) {
-	    case 'a':
-		result |= REG_ADVF;
-		break;
-	    case 'b':
-		result &= ~REG_ADVANCED;
-		break;
-	    case 'e':
-		result &= ~REG_ADVF;
-		result |= REG_EXTENDED;
-		break;
-	    case 'i':
-		result |= REG_ICASE;
-		break;
-	    case 'm':
-	    case 'n':
-		result |= REG_NEWLINE;
-		break;
-	    case 'p':
-		result |= REG_NLSTOP;
-		break;
-	    case 'q':
-		result &= ~REG_ADVANCED;
-		result |= REG_QUOTE;
-		break;
-	    case 's':
-		result |= REG_NOSUB;
-		break;
-	    case 'w':
-		result |= REG_NLANCH;
-		break;
-	    case 'x':
-		result |= REG_EXPANDED;
-		break;
-	    case '+':
-		result |= REG_FAKE;
-		break;
-	    case ',':
-		result |= REG_PROGRESS;
-		break;
-	}
-    return result;
-}
-
-/*
- *----------------------------------------------------------------------
- *
- * RegGetExecFlags --
- *
- *	Internal interface to regular expression exec flags.  
- *	Converts a string of chars to a single flag.
- *
- * Results:
- *	Returns a flags for regular expression matching.
- *
- * Side effects:
- *	None.
- *
- *----------------------------------------------------------------------
- */
-static int
-RegGetExecFlags(s)
-    char *s;
-{
-    char c;
-    register char *p;
-    int result = 0;
-    
-    for (p = s; (c = *p) != '\0'; p++)
-	switch (c) {
-	    case '^':
-		result |= REG_NOTBOL;
-		break;
-	    case '$':
-		result |= REG_NOTEOL;
-		break;
-	    case ';':
-		result |= REG_FTRACE;
-		break;
-	    case ':':
-		result |= REG_MTRACE;
-		break;
-	    case '.':
-		result |= REG_SMALL;
-		break;
-	    case '/':
-		return -1;
-	}
-    return result;
-}
-
-/*
- *----------------------------------------------------------------------
- *
  * TestsetassocdataCmd --
  *
  *	This procedure implements the "testsetassocdata" command. It is used
author	stanton <stanton>	1998-10-21 20:39:57 (GMT)
committer	stanton <stanton>	1998-10-21 20:39:57 (GMT)
commit	7e7056e21d0a0d9fa39bdfd742e82b101a6c4b7c (patch)
tree	99e08a09e1567ade05e7bc7edac3758b3695d424 /generic
parent	966ff877247e93fbe6e641cfa77df19d03cfe932 (diff)
download	tcl-7e7056e21d0a0d9fa39bdfd742e82b101a6c4b7c.zip tcl-7e7056e21d0a0d9fa39bdfd742e82b101a6c4b7c.tar.gz tcl-7e7056e21d0a0d9fa39bdfd742e82b101a6c4b7c.tar.bz2