8 files changed, 133 insertions, 636 deletions
diff --git a/ChangeLog b/ChangeLog
index 5ca8133..f48ec8b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2007-11-14  Donal K. Fellows  <donal.k.fellows@man.ac.uk>
+
+	* generic/regc*.c: Eliminate multi-char collating element code
+	completely. Simplifies the code quite a bit. If people still want the
+	full code, it will remain on the 8.4 branch. [Bug 1831425]
+
 2007-11-13  Jeff Hobbs  <jeffh@ActiveState.com>
 
 	* generic/tclCompCmds.c (TclCompileRegexpCmd): clean up comments,
diff --git a/generic/regc_color.c b/generic/regc_color.c
index 02634d9..003f5fc 100644
--- a/generic/regc_color.c
+++ b/generic/regc_color.c
@@ -678,22 +678,6 @@ uncolorchain(
     a->colorchain = NULL;	/* paranoia */
 }
 
-#ifdef REGEXP_MCCE_ENABLED
-/*
- - singleton - is this character in its own color?
- ^ static int singleton(struct colormap *, pchr c);
- */
-static int			/* predicate */
-singleton(
-    struct colormap *cm,
-    pchr c)
-{
-    color co = GETCOLOR(cm, c);	/* color of c */
-
-    return (cm->cd[co].nchrs == 1) && (cm->cd[co].sub == NOSUB);
-}
-#endif
-
 /*
  - rainbow - add arcs of all full colors (but one) between specified states
  ^ static VOID rainbow(struct nfa *, struct colormap *, int, pcolor,
diff --git a/generic/regc_cvec.c b/generic/regc_cvec.c
index afb2f48..64f34cd 100644
--- a/generic/regc_cvec.c
+++ b/generic/regc_cvec.c
@@ -36,37 +36,17 @@
 
 /*
  - newcvec - allocate a new cvec
- ^ static struct cvec *newcvec(int, int, int);
+ ^ static struct cvec *newcvec(int, int);
  */
 static struct cvec *
 newcvec(
     int nchrs,			/* to hold this many chrs... */
     int nranges)		/* ... and this many ranges... */
-#ifdef REGEXP_MCCE_ENABLED
-    int nmcces)			/* ... and this many MCCEs */
-#endif
 {
-    size_t n, nc;
-    struct cvec *cv;
+    size_t nc = (size_t)nchrs + (size_t)nranges*2;
+    size_t n = sizeof(struct cvec) + nc*sizeof(chr);
+    struct cvec *cv = (struct cvec *) MALLOC(n);
 
-#ifdef REGEXP_MCCE_ENABLED
-    nc = (size_t)nchrs + (size_t)nmcces*(MAXMCCE+1) + (size_t)nranges*2;
-    n = sizeof(struct cvec) + (size_t)(nmcces-1)*sizeof(chr *)
-	    + nc*sizeof(chr);
-    cv = (struct cvec *) MALLOC(n);
-    if (cv == NULL) {
-	return NULL;
-    }
-    cv->chrspace = nchrs;
-    cv->chrs = (chr *)&cv->mcces[nmcces];	/* chrs just after MCCE ptrs */
-    cv->mccespace = nmcces;
-    cv->ranges = cv->chrs + nchrs + nmcces*(MAXMCCE+1);
-    cv->rangespace = nranges;
-    return clearcvec(cv);
-#else
-    nc = (size_t)nchrs + (size_t)nranges*2;
-    n = sizeof(struct cvec) + nc*sizeof(chr);
-    cv = (struct cvec *) MALLOC(n);
     if (cv == NULL) {
 	return NULL;
     }
@@ -75,7 +55,6 @@ newcvec(
     cv->ranges = cv->chrs + nchrs;
     cv->rangespace = nranges;
     return clearcvec(cv);
-#endif /*REGEXP_MCCE_ENABLED*/
 }
 
 /*
@@ -87,21 +66,9 @@ static struct cvec *
 clearcvec(
     struct cvec *cv)		/* character vector */
 {
-#ifdef REGEXP_MCCE_ENABLED
-    int i;
-#endif
-
     assert(cv != NULL);
     cv->nchrs = 0;
     cv->nranges = 0;
-#ifdef REGEXP_MCCE_ENABLED
-    assert(cv->chrs == (chr *)&cv->mcces[cv->mccespace]);
-    cv->nmcces = 0;
-    cv->nmccechrs = 0;
-    for (i = 0; i < cv->mccespace; i++) {
-	cv->mcces[i] = NULL;
-    }
-#endif
     return cv;
 }
 
@@ -114,7 +81,6 @@ addchr(
     struct cvec *cv,		/* character vector */
     pchr c)			/* character to add */
 {
-    assert(cv->nchrs < cv->chrspace - cv->nmccechrs);
     cv->chrs[cv->nchrs++] = (chr)c;
 }
 
@@ -134,89 +100,17 @@ addrange(
     cv->nranges++;
 }
 
-#ifdef REGEXP_MCCE_ENABLED
-/*
- * This static function is currently called from a single spot in regcomp.c,
- * with two NULL pointers; in that case it does nothing, so that we define out
- * both the call and the code.
- */
-
-/*
- - addmcce - add an MCCE to a cvec
- ^ static VOID addmcce(struct cvec *, const chr *, const chr *);
- */
-
-static void
-addmcce(
-    struct cvec *cv,		/* character vector */
-    const chr *startp,		/* beginning of text */
-    const chr *endp)		/* just past end of text */
-{
-    int len, i;
-    const chr *s, *d;
-
-    if (startp == NULL && endp == NULL) {
-	return;
-    }
-    len = endp - startp;
-    assert(len > 0);
-    assert(cv->nchrs + len < cv->chrspace - cv->nmccechrs);
-    assert(cv->nmcces < cv->mccespace);
-    d = &cv->chrs[cv->chrspace - cv->nmccechrs - len - 1];
-    cv->mcces[cv->nmcces++] = d;
-    for (s = startp, i = len; i > 0; s++, i--) {
-	*d++ = *s;
-    }
-    *d++ = 0;			/* endmarker */
-    assert(d == &cv->chrs[cv->chrspace - cv->nmccechrs]);
-    cv->nmccechrs += len + 1;
-}
-#endif
-
-/*
- - haschr - does a cvec contain this chr?
- ^ static int haschr(struct cvec *, pchr);
- */
-#ifdef REGEXP_MCCE_ENABLED
-static int			/* predicate */
-haschr(
-    struct cvec *cv,		/* character vector */
-    pchr c)			/* character to test for */
-{
-    int i;
-    const chr *p;
-
-    for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) {
-	if (*p == c) {
-	    return 1;
-	}
-    }
-    for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--) {
-	if ((*p <= c) && (c <= *(p+1))) {
-	    return 1;
-	}
-    }
-    return 0;
-}
-#endif
-
 /*
  - getcvec - get a cvec, remembering it as v->cv
- ^ static struct cvec *getcvec(struct vars *, int, int, int);
+ ^ static struct cvec *getcvec(struct vars *, int, int);
  */
 static struct cvec *
 getcvec(
     struct vars *v,		/* context */
     int nchrs,			/* to hold this many chrs... */
     int nranges)		/* ... and this many ranges... */
-#ifdef REGEXP_MCCE_ENABLED
-    int nmcces)			/* ... and this many MCCEs */
-#endif
 {
     if ((v->cv != NULL) && (nchrs <= v->cv->chrspace) &&
-#ifdef REGEXP_MCCE_ENABLED
-	    (nmcces <= v->cv->mccespace) &&
-#endif
 	    (nranges <= v->cv->rangespace)) {
 	return clearcvec(v->cv);
     }
@@ -224,7 +118,7 @@ getcvec(
     if (v->cv != NULL) {
 	freecvec(v->cv);
     }
-    v->cv = newcvec(nchrs, nranges/*, nmcces*/);
+    v->cv = newcvec(nchrs, nranges);
     if (v->cv == NULL) {
 	ERR(REG_ESPACE);
     }
diff --git a/generic/regc_locale.c b/generic/regc_locale.c
index 438e821..ac310c9 100644
--- a/generic/regc_locale.c
+++ b/generic/regc_locale.c
@@ -9,7 +9,7 @@
  * See the file "license.terms" for information on usage and redistribution of
  * this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: regc_locale.c,v 1.18 2007/11/14 00:07:58 dkf Exp $
+ * RCS: @(#) $Id: regc_locale.c,v 1.19 2007/11/14 11:04:59 dkf Exp $
  */
 
 /* ASCII character-name table */
@@ -611,49 +611,6 @@ static const chr printCharTable[] = {
 #define	CH	NOCELT
 
 /*
- - nmcces - how many distinct MCCEs are there?
- ^ static int nmcces(struct vars *);
- */
-#ifdef REGEXP_MCCE_ENABLED
-static int
-nmcces(
-    struct vars *v)		/* context */
-{
-    /*
-     * No multi-character collating elements defined at the moment.
-     */
-    return 0;
-}
-#endif
-
-/*
- - nleaders - how many chrs can be first chrs of MCCEs?
- ^ static int nleaders(struct vars *);
- */
-#ifdef REGEXP_MCCE_ENABLED
-static int
-nleaders(
-    struct vars *v)		/* context */
-{
-    return 0;
-}
-#endif
-
-/*
- - allmcces - return a cvec with all the MCCEs of the locale
- ^ static struct cvec *allmcces(struct vars *, struct cvec *);
- */
-#ifdef REGEXP_MCCE_ENABLED
-static struct cvec *
-allmcces(
-    struct vars *v,		/* context */
-    struct cvec *cv)		/* this is supposed to have enough room */
-{
-    return clearcvec(cv);
-}
-#endif
-
-/*
  - element - map collating-element name to celt
  ^ static celt element(struct vars *, const chr *, const chr *);
  */
@@ -724,8 +681,8 @@ range(
 	return NULL;
     }
 
-    if (!cases) {			/* easy version */
-	cv = getcvec(v, 0, 1/*, 0*/);
+    if (!cases) {		/* easy version */
+	cv = getcvec(v, 0, 1);
 	NOERRN();
 	addrange(cv, a, b);
 	return cv;
@@ -739,7 +696,7 @@ range(
 
     nchrs = (b - a + 1)*2 + 4;
 
-    cv = getcvec(v, nchrs, 0/*, 0*/);
+    cv = getcvec(v, nchrs, 0);
     NOERRN();
 
     for (c=a; c<=b; c++) {
@@ -765,14 +722,10 @@ range(
  - before - is celt x before celt y, for purposes of range legality?
  ^ static int before(celt, celt);
  */
-static int				/* predicate */
+static int			/* predicate */
 before(
-    celt x, celt y)			/* collating elements */
+    celt x, celt y)		/* collating elements */
 {
-    /*
-     * trivial because no MCCEs.
-     */
-
     if (x < y) {
 	return 1;
     }
@@ -798,7 +751,7 @@ eclass(
      */
 
     if ((v->cflags&REG_FAKE) && c == 'x') {
-	cv = getcvec(v, 4, 0/*, 0*/);
+	cv = getcvec(v, 4, 0);
 	addchr(cv, (chr)'x');
 	addchr(cv, (chr)'y');
 	if (cases) {
@@ -815,7 +768,7 @@ eclass(
     if (cases) {
 	return allcases(v, c);
     }
-    cv = getcvec(v, 1, 0/*, 0*/);
+    cv = getcvec(v, 1, 0);
     assert(cv != NULL);
     addchr(cv, (chr)c);
     return cv;
@@ -895,7 +848,7 @@ cclass(
 
     switch((enum classes) index) {
     case CC_PRINT:
-	cv = getcvec(v, NUM_PRINT_CHAR, NUM_PRINT_RANGE/*, 0*/);
+	cv = getcvec(v, NUM_PRINT_CHAR, NUM_PRINT_RANGE);
 	if (cv) {
 	    for (i=0 ; (size_t)i<NUM_PRINT_CHAR ; i++) {
 		addchr(cv, printCharTable[i]);
@@ -907,7 +860,7 @@ cclass(
 	}
 	break;
     case CC_ALNUM:
-	cv = getcvec(v, NUM_ALPHA_CHAR, NUM_DIGIT_RANGE + NUM_ALPHA_RANGE/*, 0*/);
+	cv = getcvec(v, NUM_ALPHA_CHAR, NUM_DIGIT_RANGE + NUM_ALPHA_RANGE);
 	if (cv) {
 	    for (i=0 ; (size_t)i<NUM_ALPHA_CHAR ; i++) {
 		addchr(cv, alphaCharTable[i]);
@@ -923,7 +876,7 @@ cclass(
 	}
 	break;
     case CC_ALPHA:
-	cv = getcvec(v, NUM_ALPHA_CHAR, NUM_ALPHA_RANGE/*, 0*/);
+	cv = getcvec(v, NUM_ALPHA_CHAR, NUM_ALPHA_RANGE);
 	if (cv) {
 	    for (i=0 ; (size_t)i<NUM_ALPHA_RANGE ; i++) {
 		addrange(cv, alphaRangeTable[i].start,
@@ -935,23 +888,23 @@ cclass(
 	}
 	break;
     case CC_ASCII:
-	cv = getcvec(v, 0, 1/*, 0*/);
+	cv = getcvec(v, 0, 1);
 	if (cv) {
 	    addrange(cv, 0, 0x7f);
 	}
 	break;
     case CC_BLANK:
-	cv = getcvec(v, 2, 0/*, 0*/);
+	cv = getcvec(v, 2, 0);
 	addchr(cv, '\t');
 	addchr(cv, ' ');
 	break;
     case CC_CNTRL:
-	cv = getcvec(v, 0, 2/*, 0*/);
+	cv = getcvec(v, 0, 2);
 	addrange(cv, 0x0, 0x1f);
 	addrange(cv, 0x7f, 0x9f);
 	break;
     case CC_DIGIT:
-	cv = getcvec(v, 0, NUM_DIGIT_RANGE/*, 0*/);
+	cv = getcvec(v, 0, NUM_DIGIT_RANGE);
 	if (cv) {
 	    for (i=0 ; (size_t)i<NUM_DIGIT_RANGE ; i++) {
 		addrange(cv, digitRangeTable[i].start,
@@ -960,7 +913,7 @@ cclass(
 	}
 	break;
     case CC_PUNCT:
-	cv = getcvec(v, NUM_PUNCT_CHAR, NUM_PUNCT_RANGE/*, 0*/);
+	cv = getcvec(v, NUM_PUNCT_CHAR, NUM_PUNCT_RANGE);
 	if (cv) {
 	    for (i=0 ; (size_t)i<NUM_PUNCT_RANGE ; i++) {
 		addrange(cv, punctRangeTable[i].start,
@@ -981,7 +934,7 @@ cclass(
 	 * someone comes up with a better arrangement!)
 	 */
 
-	cv = getcvec(v, 0, 3/*, 0*/);
+	cv = getcvec(v, 0, 3);
 	if (cv) {
 	    addrange(cv, '0', '9');
 	    addrange(cv, 'a', 'f');
@@ -989,7 +942,7 @@ cclass(
 	}
 	break;
     case CC_SPACE:
-	cv = getcvec(v, NUM_SPACE_CHAR, NUM_SPACE_RANGE/*, 0*/);
+	cv = getcvec(v, NUM_SPACE_CHAR, NUM_SPACE_RANGE);
 	if (cv) {
 	    for (i=0 ; (size_t)i<NUM_SPACE_RANGE ; i++) {
 		addrange(cv, spaceRangeTable[i].start,
@@ -1001,7 +954,7 @@ cclass(
 	}
 	break;
     case CC_LOWER:
-	cv  = getcvec(v, NUM_LOWER_CHAR, NUM_LOWER_RANGE/*, 0*/);
+	cv  = getcvec(v, NUM_LOWER_CHAR, NUM_LOWER_RANGE);
 	if (cv) {
 	    for (i=0 ; (size_t)i<NUM_LOWER_RANGE ; i++) {
 		addrange(cv, lowerRangeTable[i].start,
@@ -1013,7 +966,7 @@ cclass(
 	}
 	break;
     case CC_UPPER:
-	cv  = getcvec(v, NUM_UPPER_CHAR, NUM_UPPER_RANGE/*, 0*/);
+	cv  = getcvec(v, NUM_UPPER_CHAR, NUM_UPPER_RANGE);
 	if (cv) {
 	    for (i=0 ; (size_t)i<NUM_UPPER_RANGE ; i++) {
 		addrange(cv, upperRangeTable[i].start,
@@ -1025,7 +978,7 @@ cclass(
 	}
 	break;
     case CC_GRAPH:
-	cv  = getcvec(v, NUM_GRAPH_CHAR, NUM_GRAPH_RANGE/*, 0*/);
+	cv  = getcvec(v, NUM_GRAPH_CHAR, NUM_GRAPH_RANGE);
 	if (cv) {
 	    for (i=0 ; (size_t)i<NUM_GRAPH_RANGE ; i++) {
 		addrange(cv, graphRangeTable[i].start,
@@ -1063,10 +1016,10 @@ allcases(
     tc = Tcl_UniCharToTitle((chr)c);
 
     if (tc != uc) {
-	cv = getcvec(v, 3, 0/*, 0*/);
+	cv = getcvec(v, 3, 0);
 	addchr(cv, tc);
     } else {
-	cv = getcvec(v, 2, 0/*, 0*/);
+	cv = getcvec(v, 2, 0);
     }
     addchr(cv, lc);
     if (lc != uc) {
diff --git a/generic/regc_nfa.c b/generic/regc_nfa.c
index 9f63f73..20e821f 100644
--- a/generic/regc_nfa.c
+++ b/generic/regc_nfa.c
@@ -88,7 +88,7 @@ newnfa(
  - freenfa - free an entire NFA
  ^ static VOID freenfa(struct nfa *);
  */
-static VOID
+static void
 freenfa(
     struct nfa *nfa)
 {
diff --git a/generic/regcomp.c b/generic/regcomp.c
index 8a43240..b397334 100644
--- a/generic/regcomp.c
+++ b/generic/regcomp.c
@@ -55,10 +55,6 @@ static void brackpart(struct vars *, struct state *, struct state *);
 static const chr *scanplain(struct vars *);
 static void onechr(struct vars *, pchr, struct state *, struct state *);
 static void dovec(struct vars *, struct cvec *, struct state *, struct state *);
-#ifdef REGEXP_MCCE_ENABLED
-static void leaders(struct vars *, struct cvec *);
-static celt nextleader(struct vars *, pchr, pchr);
-#endif
 static void wordchrs(struct vars *);
 static struct subre *subre(struct vars *, int, int, struct state *, struct state *);
 static void freesubre(struct vars *, struct subre *);
@@ -107,9 +103,6 @@ static void subblock(struct vars *, pchr, struct state *, struct state *);
 static void okcolors(struct nfa *, struct colormap *);
 static void colorchain(struct colormap *, struct arc *);
 static void uncolorchain(struct colormap *, struct arc *);
-#ifdef REGEXP_MCCE_ENABLED
-static int singleton(struct colormap *, pchr c);
-#endif
 static void rainbow(struct nfa *, struct colormap *, int, pcolor, struct state *, struct state *);
 static void colorcomplement(struct nfa *, struct colormap *, int, struct state *, struct state *, struct state *);
 #ifdef REG_DEBUG
@@ -174,22 +167,10 @@ static void dumpcstate(int, struct carc *, struct cnfa *, FILE *);
 static struct cvec *clearcvec(struct cvec *);
 static void addchr(struct cvec *, pchr);
 static void addrange(struct cvec *, pchr, pchr);
-#ifdef REGEXP_MCCE_ENABLED
-static struct cvec *newcvec(int, int, int);
-static void addmcce(struct cvec *, const chr *, const chr *);
-static struct cvec *getcvec(struct vars *, int, int, int);
-static int haschr(struct cvec *, pchr);
-#else
 static struct cvec *newcvec(int, int);
 static struct cvec *getcvec(struct vars *, int, int);
-#endif
 static void freecvec(struct cvec *);
 /* === regc_locale.c === */
-#ifdef REGEXP_MCCE_ENABLED
-static int nleaders(struct vars *);
-static int nmcces(struct vars *);
-static struct cvec *allmcces(struct vars *, struct cvec *);
-#endif
 static celt element(struct vars *, const chr *, const chr *);
 static struct cvec *range(struct vars *, celt, celt, int);
 static int before(celt, celt);
@@ -228,12 +209,6 @@ struct vars {
     int ntree;			/* number of tree nodes */
     struct cvec *cv;		/* interface cvec */
     struct cvec *cv2;		/* utility cvec */
-#ifdef REGEXP_MCCE_ENABLED
-    struct cvec *mcces;		/* collating-element information */
-#define	ISCELEADER(v,c)	(v->mcces != NULL && haschr(v->mcces, (c)))
-    struct state *mccepbegin;	/* in nfa, start of MCCE prototypes */
-    struct state *mccepend;	/* in nfa, end of MCCE prototypes */
-#endif
     struct subre *lacons;	/* lookahead-constraint vector */
     int nlacons;		/* size of lacons */
 };
@@ -343,9 +318,6 @@ compile(
     v->treefree = NULL;
     v->cv = NULL;
     v->cv2 = NULL;
-#ifdef REGEXP_MCCE_ENABLED
-    v->mcces = NULL;
-#endif
     v->lacons = NULL;
     v->nlacons = 0;
     re->re_magic = REMAGIC;
@@ -375,18 +347,6 @@ compile(
     if (v->cv == NULL) {
 	return freev(v, REG_ESPACE);
     }
-#ifdef REGEXP_MCCE_ENABLED
-    i = nmcces(v);
-    if (i > 0) {
-	v->mcces = newcvec(nleaders(v), 0);
-	CNOERR();
-	v->mcces = allmcces(v, v->mcces);
-	leaders(v, v->mcces);
-	/* Function does nothing with NULL pointers */
-	addmcce(v->mcces, NULL, NULL); /* dummy */
-    }
-    CNOERR();
-#endif
 
     /*
      * Parsing.
@@ -559,11 +519,6 @@ freev(
     if (v->cv2 != NULL) {
 	freecvec(v->cv2);
     }
-#ifdef REGEXP_MCCE_ENABLED
-    if (v->mcces != NULL) {
-	freecvec(v->mcces);
-    }
-#endif
     if (v->lacons != NULL) {
 	freelacons(v->lacons, v->nlacons);
     }
@@ -850,7 +805,6 @@ parseqatom(
 	}
 	NEXT();
 	return;
-	break;
     case '$':
 	ARCV('$', 1);
 	if (v->cflags&REG_NLANCH) {
@@ -858,19 +812,16 @@ parseqatom(
 	}
 	NEXT();
 	return;
-	break;
     case SBEGIN:
 	ARCV('^', 1);		/* BOL */
 	ARCV('^', 0);		/* or BOS */
 	NEXT();
 	return;
-	break;
     case SEND:
 	ARCV('$', 1);		/* EOL */
 	ARCV('$', 0);		/* or EOS */
 	NEXT();
 	return;
-	break;
     case '<':
 	wordchrs(v);		/* does NEXT() */
 	s = newstate(v->nfa);
@@ -878,7 +829,6 @@ parseqatom(
 	nonword(v, BEHIND, lp, s);
 	word(v, AHEAD, s, rp);
 	return;
-	break;
     case '>':
 	wordchrs(v);		/* does NEXT() */
 	s = newstate(v->nfa);
@@ -886,7 +836,6 @@ parseqatom(
 	word(v, BEHIND, lp, s);
 	nonword(v, AHEAD, s, rp);
 	return;
-	break;
     case WBDRY:
 	wordchrs(v);		/* does NEXT() */
 	s = newstate(v->nfa);
@@ -898,7 +847,6 @@ parseqatom(
 	word(v, BEHIND, lp, s);
 	nonword(v, AHEAD, s, rp);
 	return;
-	break;
     case NWBDRY:
 	wordchrs(v);		/* does NEXT() */
 	s = newstate(v->nfa);
@@ -910,7 +858,6 @@ parseqatom(
 	nonword(v, BEHIND, lp, s);
 	nonword(v, AHEAD, s, rp);
 	return;
-	break;
     case LACON:			/* lookahead constraint */
 	pos = v->nextvalue;
 	NEXT();
@@ -925,7 +872,6 @@ parseqatom(
 	NOERR();
 	ARCV(LACON, n);
 	return;
-	break;
 
 	/*
 	 * Then errors, to get them out of the way.
@@ -937,11 +883,9 @@ parseqatom(
     case '{':
 	ERR(REG_BADRPT);
 	return;
-	break;
     default:
 	ERR(REG_ASSERT);
 	return;
-	break;
 
 	/*
 	 * Then plain characters, and minor variants on that theme.
@@ -1478,15 +1422,6 @@ cbracket(
 {
     struct state *left = newstate(v->nfa);
     struct state *right = newstate(v->nfa);
-#ifdef REGEXP_MCCE_ENABLED
-    struct state *s;
-    struct arc *a;		/* arc from lp */
-    struct arc *ba;		/* arc from left, from bracket() */
-    struct arc *pa;		/* MCCE-prototype arc */
-    color co;
-    const chr *p;
-    int i;
-#endif
 
     NOERR();
     bracket(v, left, right);
@@ -1498,69 +1433,16 @@ cbracket(
     assert(lp->nouts == 0);	/* all outarcs will be ours */
 
     /*
-     * Easy part of complementing
+     * Easy part of complementing, and all there is to do since the MCCE code
+     * was removed.
      */
 
     colorcomplement(v->nfa, v->cm, PLAIN, left, lp, rp);
     NOERR();
-    if (1 /*v->mcces == NULL*/) {	/* no MCCEs -- we're done */
-	dropstate(v->nfa, left);
-	assert(right->nins == 0);
-	freestate(v->nfa, right);
-	return;
-    }
-
-#ifdef REGEXP_MCCE_ENABLED
-    /*
-     * But complementing gets messy in the presence of MCCEs...
-     */
-
-    NOTE(REG_ULOCALE);
-    for (p = v->mcces->chrs, i = v->mcces->nchrs; i > 0; p++, i--) {
-	co = GETCOLOR(v->cm, *p);
-	a = findarc(lp, PLAIN, co);
-	ba = findarc(left, PLAIN, co);
-	if (ba == NULL) {
-	    assert(a != NULL);
-	    freearc(v->nfa, a);
-	} else {
-	    assert(a == NULL);
-	}
-	s = newstate(v->nfa);
-	NOERR();
-	newarc(v->nfa, PLAIN, co, lp, s);
-	NOERR();
-	pa = findarc(v->mccepbegin, PLAIN, co);
-	assert(pa != NULL);
-	if (ba == NULL) {	/* easy case, need all of them */
-	    cloneouts(v->nfa, pa->to, s, rp, PLAIN);
-	    newarc(v->nfa, '$', 1, s, rp);
-	    newarc(v->nfa, '$', 0, s, rp);
-	    colorcomplement(v->nfa, v->cm, AHEAD, pa->to, s, rp);
-	} else {		/* must be selective */
-	    if (findarc(ba->to, '$', 1) == NULL) {
-		newarc(v->nfa, '$', 1, s, rp);
-		newarc(v->nfa, '$', 0, s, rp);
-		colorcomplement(v->nfa, v->cm, AHEAD, pa->to, s, rp);
-	    }
-	    for (pa = pa->to->outs; pa != NULL; pa = pa->outchain) {
-		if (findarc(ba->to, PLAIN, pa->co) == NULL) {
-		    newarc(v->nfa, PLAIN, pa->co, s, rp);
-		}
-	    }
-	    if (s->nouts == 0) {	/* limit of selectivity: none */
-		dropstate(v->nfa, s);	/* frees arc too */
-	    }
-	}
-	NOERR();
-    }
-
-    delsub(v->nfa, left, right);
-    assert(left->nouts == 0);
-    freestate(v->nfa, left);
+    dropstate(v->nfa, left);
     assert(right->nins == 0);
     freestate(v->nfa, right);
-#endif
+    return;
 }
 
 /*
@@ -1592,10 +1474,10 @@ brackpart(
 	NEXT();
 
 	/*
-	 * Shortcut for ordinary chr (not range, not MCCE leader).
+	 * Shortcut for ordinary chr (not range).
 	 */
 
-	if (!SEE(RANGE) /*&& !ISCELEADER(v, c[0])*/) {
+	if (!SEE(RANGE)) {
 	    onechr(v, c[0], lp, rp);
 	    return;
 	}
@@ -1706,50 +1588,6 @@ scanplain(
 }
 
 /*
- - leaders - process a cvec of collating elements to also include leaders
- * Also gives all characters involved their own colors, which is almost
- * certainly necessary, and sets up little disconnected subNFA.
- ^ static void leaders(struct vars *, struct cvec *);
- */
-#ifdef REGEXP_MCCE_ENABLED
-static void
-leaders(
-    struct vars *v,
-    struct cvec *cv)
-{
-    int mcce;
-    const chr *p;
-    chr leader;
-    struct state *s;
-    struct arc *a;
-
-    v->mccepbegin = newstate(v->nfa);
-    v->mccepend = newstate(v->nfa);
-    NOERR();
-
-    for (mcce = 0; mcce < cv->nmcces; mcce++) {
-	p = cv->mcces[mcce];
-	leader = *p;
-	if (!haschr(cv, leader)) {
-	    addchr(cv, leader);
-	    s = newstate(v->nfa);
-	    newarc(v->nfa, PLAIN, subcolor(v->cm, leader), v->mccepbegin, s);
-	    okcolors(v->nfa, v->cm);
-	} else {
-	    a = findarc(v->mccepbegin, PLAIN, GETCOLOR(v->cm, leader));
-	    assert(a != NULL);
-	    s = a->to;
-	    assert(s != v->mccepend);
-	}
-	p++;
-	assert(*p != 0 && *(p+1) == 0);	/* only 2-char MCCEs for now */
-	newarc(v->nfa, PLAIN, subcolor(v->cm, *p), s, v->mccepend);
-	okcolors(v->nfa, v->cm);
-    }
-}
-#endif
-
-/*
  - onechr - fill in arcs for a plain character, and possible case complements
  * This is mostly a shortcut for efficient handling of the common case.
  ^ static void onechr(struct vars *, pchr, struct state *, struct state *);
@@ -1766,17 +1604,18 @@ onechr(
 	return;
     }
 
-    /* rats, need general case anyway... */
+    /*
+     * Rats, need general case anyway...
+     */
+
     dovec(v, allcases(v, c), lp, rp);
 }
 
 /*
  - dovec - fill in arcs for each element of a cvec
- * This one has to handle the messy cases, like MCCEs and MCCE leaders.
  ^ static void dovec(struct vars *, struct cvec *, struct state *,
  ^ 	struct state *);
  */
-#ifndef REGEXP_MCCE_ENABLED
 static void
 dovec(
     struct vars *v,
@@ -1802,184 +1641,6 @@ dovec(
     }
 
 }
-#else /* REGEXP_MCCE_ENABLED */
-static void
-dovec(
-    struct vars *v,
-    struct cvec *cv,
-    struct state *lp,
-    struct state *rp)
-{
-    chr ch, from, to;
-    celt ce;
-    const chr *p;
-    int i;
-    struct cvec *leads;
-    color co;
-    struct arc *a;
-    struct arc *pa;		/* arc in prototype */
-    struct state *s;
-    struct state *ps;		/* state in prototype */
-
-    /*
-     * Need a place to store leaders, if any.
-     */
-
-    if (nmcces(v) > 0) {
-	assert(v->mcces != NULL);
-	if (v->cv2 == NULL || v->cv2->nchrs < v->mcces->nchrs) {
-	    if (v->cv2 != NULL) {
-		free(v->cv2);
-	    }
-	    v->cv2 = newcvec(v->mcces->nchrs, 0, v->mcces->nmcces);
-	    NOERR();
-	    leads = v->cv2;
-	} else {
-	    leads = clearcvec(v->cv2);
-	}
-    } else {
-	leads = NULL;
-    }
-
-    /*
-     * First, get the ordinary characters out of the way.
-     */
-
-    for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) {
-	ch = *p;
-	if (!ISCELEADER(v, ch)) {
-	    newarc(v->nfa, PLAIN, subcolor(v->cm, ch), lp, rp);
-	} else {
-	    assert(singleton(v->cm, ch));
-	    assert(leads != NULL);
-	    if (!haschr(leads, ch)) {
-		addchr(leads, ch);
-	    }
-	}
-    }
-
-    /*
-     * And the ranges.
-     */
-
-    for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--) {
-	from = *p;
-	to = *(p+1);
-	while (from <= to && (ce = nextleader(v, from, to)) != NOCELT) {
-	    if (from < ce) {
-		subrange(v, from, ce - 1, lp, rp);
-	    }
-	    assert(singleton(v->cm, ce));
-	    assert(leads != NULL);
-	    if (!haschr(leads, ce)) {
-		addchr(leads, ce);
-	    }
-	    from = ce + 1;
-	}
-	if (from <= to) {
-	    subrange(v, from, to, lp, rp);
-	}
-    }
-
-    /* *** WARNING ***
-     *
-     * This was buggy, check before enabling: the original version would cause
-     * a segfault at the loopinit below if (leads==NULL && cv->nmcces!=0)
-     * Possibly just a problem with parens? The original condition was
-     * ((leads == NULL || leads->nchrs == 0) && cv->nmcces == 0)
-     */
-
-    if (leads == NULL || (leads->nchrs == 0 && cv->nmcces == 0)) {
-	return;
-    }
-
-    /*
-     * Deal with the MCCE leaders.
-     */
-
-    NOTE(REG_ULOCALE);
-    for (p = leads->chrs, i = leads->nchrs; i > 0; p++, i--) {
-	co = GETCOLOR(v->cm, *p);
-	a = findarc(lp, PLAIN, co);
-	if (a != NULL) {
-	    s = a->to;
-	} else {
-	    s = newstate(v->nfa);
-	    NOERR();
-	    newarc(v->nfa, PLAIN, co, lp, s);
-	    NOERR();
-	}
-	pa = findarc(v->mccepbegin, PLAIN, co);
-	assert(pa != NULL);
-	ps = pa->to;
-	newarc(v->nfa, '$', 1, s, rp);
-	newarc(v->nfa, '$', 0, s, rp);
-	colorcomplement(v->nfa, v->cm, AHEAD, ps, s, rp);
-	NOERR();
-    }
-
-    /*
-     * And the MCCEs.
-     */
-
-    for (i = 0; i < cv->nmcces; i++) {
-	p = cv->mcces[i];
-	assert(singleton(v->cm, *p));
-	if (!singleton(v->cm, *p)) {
-	    ERR(REG_ASSERT);
-	    return;
-	}
-	ch = *p++;
-	co = GETCOLOR(v->cm, ch);
-	a = findarc(lp, PLAIN, co);
-	if (a != NULL) {
-	    s = a->to;
-	} else {
-	    s = newstate(v->nfa);
-	    NOERR();
-	    newarc(v->nfa, PLAIN, co, lp, s);
-	    NOERR();
-	}
-	assert(*p != 0);	/* at least two chars */
-	assert(singleton(v->cm, *p));
-	ch = *p++;
-	co = GETCOLOR(v->cm, ch);
-	assert(*p == 0);	/* and only two, for now */
-	newarc(v->nfa, PLAIN, co, s, rp);
-	NOERR();
-    }
-}
-
-/*
- - nextleader - find next MCCE leader within range
- ^ static celt nextleader(struct vars *, pchr, pchr);
- */
-static celt			/* NOCELT means none */
-nextleader(
-    struct vars *v,
-    pchr from,
-    pchr to)
-{
-    int i;
-    const chr *p;
-    chr ch;
-    celt it = NOCELT;
-
-    if (v->mcces == NULL) {
-	return it;
-    }
-
-    for (i = v->mcces->nchrs, p = v->mcces->chrs; i > 0; i--, p++) {
-	ch = *p;
-	if (from <= ch && ch <= to) {
-	    if (it == NOCELT || ch < it) {
-		it = ch;
-	    }
-	}
-    }
-    return it;
-}
-#endif
 
 /*
  - wordchrs - set up word-chr list for word-boundary stuff, if needed
@@ -2120,30 +1781,14 @@ optst(
     struct vars *v,
     struct subre *t)
 {
-#if 0
-    if (t == NULL) {
-	return;
-    }
-
     /*
-     * Recurse through children.
+     * DGP (2007-11-13): I assume it was the programmer's intent to eventually
+     * come back and add code to optimize subRE trees, but the routine coded
+     * just spends effort traversing the tree and doing nothing. We can do
+     * nothing with less effort.
      */
 
-    if (t->left != NULL) {
-	optst(v, t->left);
-    }
-    if (t->right != NULL) {
-	optst(v, t->right);
-    }
-#else
-    /*
-     * DGP (2007-11-13): I assume it was the programmer's intent to 
-     * eventually come back and add code above to optimize subRE trees,
-     * but the routine coded just spends effort traversing the tree and
-     * doing nothing.  We can do nothing with less effort.
-     */
     return;
-#endif
 }
 
 /*
diff --git a/generic/regcustom.h b/generic/regcustom.h
index 6b6b38c..ac33087 100644
--- a/generic/regcustom.h
+++ b/generic/regcustom.h
@@ -3,13 +3,13 @@
  *
  * Development of this software was funded, in part, by Cray Research Inc.,
  * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
- * Corporation, none of whom are responsible for the results.  The author
+ * Corporation, none of whom are responsible for the results. The author
  * thanks all of them.
  *
- * Redistribution and use in source and binary forms -- with or without
- * modification -- are permitted for any purpose, provided that
- * redistributions in source form retain this entire copyright notice and
- * indicate the origin and nature of any modifications.
+ * Redistribution and use in source and binary forms - with or without
+ * modification - are permitted for any purpose, provided that redistributions
+ * in source form retain this entire copyright notice and indicate the origin
+ * and nature of any modifications.
  *
  * I'd appreciate being given credit for this package in the documentation of
  * software which uses it, but that is not a requirement.
@@ -26,23 +26,28 @@
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-/* headers if any */
+/*
+ * Headers if any.
+ */
+
 #include "tclInt.h"
 
-/* overrides for regguts.h definitions, if any */
-#define	FUNCPTR(name, args)	(*name) _ANSI_ARGS_(args)
+/*
+ * Overrides for regguts.h definitions, if any.
+ */
+
+#define	FUNCPTR(name, args)	(*name)args
 #define	MALLOC(n)		ckalloc(n)
 #define	FREE(p)			ckfree(VS(p))
 #define	REALLOC(p,n)		ckrealloc(VS(p),n)
 
-
-
 /*
- * Do not insert extras between the "begin" and "end" lines -- this
- * chunk is automatically extracted to be fitted into regex.h.
+ * Do not insert extras between the "begin" and "end" lines - this chunk is
+ * automatically extracted to be fitted into regex.h.
  */
+
 /* --- begin --- */
-/* ensure certain things don't sneak in from system headers */
+/* Ensure certain things don't sneak in from system headers. */
 #ifdef __REG_WIDE_T
 #undef __REG_WIDE_T
 #endif
@@ -67,70 +72,90 @@
 #ifdef __REG_NOCHAR
 #undef __REG_NOCHAR
 #endif
-/* interface types */
+/* Interface types */
 #define	__REG_WIDE_T	Tcl_UniChar
-#define	__REG_REGOFF_T	long	/* not really right, but good enough... */
-#define	__REG_VOID_T	VOID
-#define	__REG_CONST	CONST
-/* names and declarations */
+#define	__REG_REGOFF_T	long	/* Not really right, but good enough... */
+#define	__REG_VOID_T	void
+#define	__REG_CONST	const
+/* Names and declarations */
 #define	__REG_WIDE_COMPILE	TclReComp
 #define	__REG_WIDE_EXEC		TclReExec
-#define	__REG_NOFRONT		/* don't want regcomp() and regexec() */
-#define	__REG_NOCHAR		/* or the char versions */
+#define	__REG_NOFRONT		/* Don't want regcomp() and regexec() */
+#define	__REG_NOCHAR		/* Or the char versions */
 #define	regfree		TclReFree
 #define	regerror	TclReError
 /* --- end --- */
 
+/*
+ * Internal character type and related.
+ */
 
-
-/* internal character type and related */
-typedef Tcl_UniChar chr;	/* the type itself */
-typedef int pchr;		/* what it promotes to */
-typedef unsigned uchr;		/* unsigned type that will hold a chr */
-typedef int celt;		/* type to hold chr, MCCE number, or NOCELT */
-#define	NOCELT	(-1)		/* celt value which is not valid chr or MCCE */
-#define	CHR(c)	(UCHAR(c))	/* turn char literal into chr literal */
-#define	DIGITVAL(c)	((c)-'0')	/* turn chr digit into its value */
+typedef Tcl_UniChar chr;	/* The type itself. */
+typedef int pchr;		/* What it promotes to. */
+typedef unsigned uchr;		/* Unsigned type that will hold a chr. */
+typedef int celt;		/* Type to hold chr, or NOCELT */
+#define	NOCELT (-1)		/* Celt value which is not valid chr */
+#define	CHR(c) (UCHAR(c))	/* Turn char literal into chr literal */
+#define	DIGITVAL(c) ((c)-'0')	/* Turn chr digit into its value */
 #if TCL_UTF_MAX > 3
-#define	CHRBITS	32		/* bits in a chr; must not use sizeof */
-#define	CHR_MIN	0x00000000	/* smallest and largest chr; the value */
-#define	CHR_MAX	0xffffffff	/*  CHR_MAX-CHR_MIN+1 should fit in uchr */
+#define	CHRBITS	32		/* Bits in a chr; must not use sizeof */
+#define	CHR_MIN	0x00000000	/* Smallest and largest chr; the value */
+#define	CHR_MAX	0xffffffff	/* CHR_MAX-CHR_MIN+1 should fit in uchr */
 #else
-#define	CHRBITS	16		/* bits in a chr; must not use sizeof */
-#define	CHR_MIN	0x0000		/* smallest and largest chr; the value */
-#define	CHR_MAX	0xffff		/*  CHR_MAX-CHR_MIN+1 should fit in uchr */
+#define	CHRBITS	16		/* Bits in a chr; must not use sizeof */
+#define	CHR_MIN	0x0000		/* Smallest and largest chr; the value */
+#define	CHR_MAX	0xffff		/* CHR_MAX-CHR_MIN+1 should fit in uchr */
 #endif
 
-/* functions operating on chr */
+/*
+ * Functions operating on chr.
+ */
+
 #define	iscalnum(x)	Tcl_UniCharIsAlnum(x)
 #define	iscalpha(x)	Tcl_UniCharIsAlpha(x)
 #define	iscdigit(x)	Tcl_UniCharIsDigit(x)
 #define	iscspace(x)	Tcl_UniCharIsSpace(x)
 
-/* name the external functions */
+/*
+ * Name the external functions.
+ */
+
 #define	compile		TclReComp
 #define	exec		TclReExec
 
-/* enable/disable debugging code (by whether REG_DEBUG is defined or not) */
-#if 0		/* no debug unless requested by makefile */
+/*
+& Enable/disable debugging code (by whether REG_DEBUG is defined or not).
+*/
+
+#if 0				/* No debug unless requested by makefile. */
 #define	REG_DEBUG	/* */
 #endif
 
-/* method of allocating a local workspace */
+/*
+ * Method of allocating a local workspace. We used a thread-specific data
+ * space to store this because the regular expression engine is never
+ * reentered from the same thread; it doesn't make any callbacks.
+ */
+
 #if 1
 #define AllocVars(vPtr) \
     static Tcl_ThreadDataKey varsKey; \
     register struct vars *vPtr = (struct vars *) \
-	Tcl_GetThreadData(&varsKey, sizeof(struct vars))
+	    Tcl_GetThreadData(&varsKey, sizeof(struct vars))
 #else
-/* This strategy for allocating workspace is "more proper" in some sense, but
+/*
+ * This strategy for allocating workspace is "more proper" in some sense, but
  * quite a bit slower. Using TSD (as above) leads to code that is quite a bit
- * faster in practice. */
+ * faster in practice (measured!)
+ */
 #define AllocVars(vPtr) \
     register struct vars *vPtr = (struct vars *) MALLOC(sizeof(struct vars))
 #define FreeVars(vPtr) \
     FREE(vPtr)
 #endif
 
-/* and pick up the standard header */
+/*
+ * And pick up the standard header.
+ */
+
 #include "regex.h"
diff --git a/generic/regguts.h b/generic/regguts.h
index bc1d7a2..cbf6615 100644
--- a/generic/regguts.h
+++ b/generic/regguts.h
@@ -60,24 +60,24 @@
 
 /* voids */
 #ifndef VOID
-#define	VOID	void			/* for function return values */
+#define	VOID	void		/* for function return values */
 #endif
 #ifndef DISCARD
-#define	DISCARD	void			/* for throwing values away */
+#define	DISCARD	void		/* for throwing values away */
 #endif
 #ifndef PVOID
-#define	PVOID	void *			/* generic pointer */
+#define	PVOID	void *		/* generic pointer */
 #endif
 #ifndef VS
-#define	VS(x)	((void*)(x))		/* cast something to generic ptr */
+#define	VS(x)	((void*)(x))	/* cast something to generic ptr */
 #endif
 #ifndef NOPARMS
-#define	NOPARMS	void			/* for empty parm lists */
+#define	NOPARMS	void		/* for empty parm lists */
 #endif
 
 /* const */
 #ifndef CONST
-#define	CONST	const			/* for old compilers, might be empty */
+#define	CONST	const		/* for old compilers, might be empty */
 #endif
 
 /* function-pointer declarator */
@@ -105,7 +105,7 @@
 #include <limits.h>
 #endif
 #ifndef _POSIX2_RE_DUP_MAX
-#define	_POSIX2_RE_DUP_MAX	255	/* normally from <limits.h> */
+#define	_POSIX2_RE_DUP_MAX 255	/* normally from <limits.h> */
 #endif
 
 /*
@@ -189,7 +189,7 @@ union tree {
 #define	tcolor	colors.ccolor
 #define	tptr	ptrs.pptr
 
-/* internal per-color structure for the color machinery */
+/* Internal per-color descriptor structure for the color machinery */
 struct colordesc {
     uchr nchrs;			/* number of chars of this color */
     color sub;			/* open subcolor (if any); free chain ptr */
@@ -235,9 +235,9 @@ struct colormap {
 
 /*
  * Interface definitions for locale-interface functions in locale.c.
- * Multi-character collating elements (MCCEs) cause most of the trouble.
  */
 
+/* Representation of a set of characters. */
 struct cvec {
     int nchrs;			/* number of chrs */
     int chrspace;		/* number of chrs possible */
@@ -245,22 +245,11 @@ struct cvec {
     int nranges;		/* number of ranges (chr pairs) */
     int rangespace;		/* number of chrs possible */
     chr *ranges;		/* pointer to vector of chr pairs */
-#ifdef REGEXP_MCCE_ENABLED
-    int nmcces;			/* number of MCCEs */
-    int mccespace;		/* number of MCCEs possible */
-    int nmccechrs;		/* number of chrs used for MCCEs */
-    chr *mcces[1];		/* pointers to 0-terminated MCCEs */
-				/* and both batches of chrs are on the end */
-#endif
 };
 
-#ifdef REGEXP_MCCE_ENABLED
-/* caution:  this value cannot be changed easily */
-#define	MAXMCCE	2		/* length of longest MCCE */
-#endif
-
 /*
- * definitions for NFA internal representation
+ * definitions for non-deterministic finite autmaton (NFA) internal
+ * representation
  *
  * Having a "from" pointer within each arc may seem redundant, but it saves a
  * lot of hassle.
@@ -288,7 +277,7 @@ struct arcbatch {		/* for bulk allocation of arcs */
 
 struct state {
     int no;
-#		define	FREESTATE	(-1)
+#define	FREESTATE	(-1)
     char flag;			/* marks special states */
     int nins;			/* number of inarcs */
     struct arc *ins;		/* chain of inarcs */
@@ -405,7 +394,8 @@ struct guts {
 };
 
 /*
- * Magic for allocating a variable workspace.
+ * Magic for allocating a variable workspace. This default version is
+ * stack-hungry.
  */
 
 #ifndef AllocVars