summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog6
-rw-r--r--generic/regc_color.c16
-rw-r--r--generic/regc_cvec.c118
-rw-r--r--generic/regc_locale.c93
-rw-r--r--generic/regc_nfa.c2
-rw-r--r--generic/regcomp.c383
-rw-r--r--generic/regcustom.h113
-rw-r--r--generic/regguts.h38
8 files changed, 133 insertions, 636 deletions
diff --git a/ChangeLog b/ChangeLog
index 5ca8133..f48ec8b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2007-11-14 Donal K. Fellows <donal.k.fellows@man.ac.uk>
+
+ * generic/regc*.c: Eliminate multi-char collating element code
+ completely. Simplifies the code quite a bit. If people still want the
+ full code, it will remain on the 8.4 branch. [Bug 1831425]
+
2007-11-13 Jeff Hobbs <jeffh@ActiveState.com>
* generic/tclCompCmds.c (TclCompileRegexpCmd): clean up comments,
diff --git a/generic/regc_color.c b/generic/regc_color.c
index 02634d9..003f5fc 100644
--- a/generic/regc_color.c
+++ b/generic/regc_color.c
@@ -678,22 +678,6 @@ uncolorchain(
a->colorchain = NULL; /* paranoia */
}
-#ifdef REGEXP_MCCE_ENABLED
-/*
- - singleton - is this character in its own color?
- ^ static int singleton(struct colormap *, pchr c);
- */
-static int /* predicate */
-singleton(
- struct colormap *cm,
- pchr c)
-{
- color co = GETCOLOR(cm, c); /* color of c */
-
- return (cm->cd[co].nchrs == 1) && (cm->cd[co].sub == NOSUB);
-}
-#endif
-
/*
- rainbow - add arcs of all full colors (but one) between specified states
^ static VOID rainbow(struct nfa *, struct colormap *, int, pcolor,
diff --git a/generic/regc_cvec.c b/generic/regc_cvec.c
index afb2f48..64f34cd 100644
--- a/generic/regc_cvec.c
+++ b/generic/regc_cvec.c
@@ -36,37 +36,17 @@
/*
- newcvec - allocate a new cvec
- ^ static struct cvec *newcvec(int, int, int);
+ ^ static struct cvec *newcvec(int, int);
*/
static struct cvec *
newcvec(
int nchrs, /* to hold this many chrs... */
int nranges) /* ... and this many ranges... */
-#ifdef REGEXP_MCCE_ENABLED
- int nmcces) /* ... and this many MCCEs */
-#endif
{
- size_t n, nc;
- struct cvec *cv;
+ size_t nc = (size_t)nchrs + (size_t)nranges*2;
+ size_t n = sizeof(struct cvec) + nc*sizeof(chr);
+ struct cvec *cv = (struct cvec *) MALLOC(n);
-#ifdef REGEXP_MCCE_ENABLED
- nc = (size_t)nchrs + (size_t)nmcces*(MAXMCCE+1) + (size_t)nranges*2;
- n = sizeof(struct cvec) + (size_t)(nmcces-1)*sizeof(chr *)
- + nc*sizeof(chr);
- cv = (struct cvec *) MALLOC(n);
- if (cv == NULL) {
- return NULL;
- }
- cv->chrspace = nchrs;
- cv->chrs = (chr *)&cv->mcces[nmcces]; /* chrs just after MCCE ptrs */
- cv->mccespace = nmcces;
- cv->ranges = cv->chrs + nchrs + nmcces*(MAXMCCE+1);
- cv->rangespace = nranges;
- return clearcvec(cv);
-#else
- nc = (size_t)nchrs + (size_t)nranges*2;
- n = sizeof(struct cvec) + nc*sizeof(chr);
- cv = (struct cvec *) MALLOC(n);
if (cv == NULL) {
return NULL;
}
@@ -75,7 +55,6 @@ newcvec(
cv->ranges = cv->chrs + nchrs;
cv->rangespace = nranges;
return clearcvec(cv);
-#endif /*REGEXP_MCCE_ENABLED*/
}
/*
@@ -87,21 +66,9 @@ static struct cvec *
clearcvec(
struct cvec *cv) /* character vector */
{
-#ifdef REGEXP_MCCE_ENABLED
- int i;
-#endif
-
assert(cv != NULL);
cv->nchrs = 0;
cv->nranges = 0;
-#ifdef REGEXP_MCCE_ENABLED
- assert(cv->chrs == (chr *)&cv->mcces[cv->mccespace]);
- cv->nmcces = 0;
- cv->nmccechrs = 0;
- for (i = 0; i < cv->mccespace; i++) {
- cv->mcces[i] = NULL;
- }
-#endif
return cv;
}
@@ -114,7 +81,6 @@ addchr(
struct cvec *cv, /* character vector */
pchr c) /* character to add */
{
- assert(cv->nchrs < cv->chrspace - cv->nmccechrs);
cv->chrs[cv->nchrs++] = (chr)c;
}
@@ -134,89 +100,17 @@ addrange(
cv->nranges++;
}
-#ifdef REGEXP_MCCE_ENABLED
-/*
- * This static function is currently called from a single spot in regcomp.c,
- * with two NULL pointers; in that case it does nothing, so that we define out
- * both the call and the code.
- */
-
-/*
- - addmcce - add an MCCE to a cvec
- ^ static VOID addmcce(struct cvec *, const chr *, const chr *);
- */
-
-static void
-addmcce(
- struct cvec *cv, /* character vector */
- const chr *startp, /* beginning of text */
- const chr *endp) /* just past end of text */
-{
- int len, i;
- const chr *s, *d;
-
- if (startp == NULL && endp == NULL) {
- return;
- }
- len = endp - startp;
- assert(len > 0);
- assert(cv->nchrs + len < cv->chrspace - cv->nmccechrs);
- assert(cv->nmcces < cv->mccespace);
- d = &cv->chrs[cv->chrspace - cv->nmccechrs - len - 1];
- cv->mcces[cv->nmcces++] = d;
- for (s = startp, i = len; i > 0; s++, i--) {
- *d++ = *s;
- }
- *d++ = 0; /* endmarker */
- assert(d == &cv->chrs[cv->chrspace - cv->nmccechrs]);
- cv->nmccechrs += len + 1;
-}
-#endif
-
-/*
- - haschr - does a cvec contain this chr?
- ^ static int haschr(struct cvec *, pchr);
- */
-#ifdef REGEXP_MCCE_ENABLED
-static int /* predicate */
-haschr(
- struct cvec *cv, /* character vector */
- pchr c) /* character to test for */
-{
- int i;
- const chr *p;
-
- for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) {
- if (*p == c) {
- return 1;
- }
- }
- for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--) {
- if ((*p <= c) && (c <= *(p+1))) {
- return 1;
- }
- }
- return 0;
-}
-#endif
-
/*
- getcvec - get a cvec, remembering it as v->cv
- ^ static struct cvec *getcvec(struct vars *, int, int, int);
+ ^ static struct cvec *getcvec(struct vars *, int, int);
*/
static struct cvec *
getcvec(
struct vars *v, /* context */
int nchrs, /* to hold this many chrs... */
int nranges) /* ... and this many ranges... */
-#ifdef REGEXP_MCCE_ENABLED
- int nmcces) /* ... and this many MCCEs */
-#endif
{
if ((v->cv != NULL) && (nchrs <= v->cv->chrspace) &&
-#ifdef REGEXP_MCCE_ENABLED
- (nmcces <= v->cv->mccespace) &&
-#endif
(nranges <= v->cv->rangespace)) {
return clearcvec(v->cv);
}
@@ -224,7 +118,7 @@ getcvec(
if (v->cv != NULL) {
freecvec(v->cv);
}
- v->cv = newcvec(nchrs, nranges/*, nmcces*/);
+ v->cv = newcvec(nchrs, nranges);
if (v->cv == NULL) {
ERR(REG_ESPACE);
}
diff --git a/generic/regc_locale.c b/generic/regc_locale.c
index 438e821..ac310c9 100644
--- a/generic/regc_locale.c
+++ b/generic/regc_locale.c
@@ -9,7 +9,7 @@
* See the file "license.terms" for information on usage and redistribution of
* this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: regc_locale.c,v 1.18 2007/11/14 00:07:58 dkf Exp $
+ * RCS: @(#) $Id: regc_locale.c,v 1.19 2007/11/14 11:04:59 dkf Exp $
*/
/* ASCII character-name table */
@@ -611,49 +611,6 @@ static const chr printCharTable[] = {
#define CH NOCELT
/*
- - nmcces - how many distinct MCCEs are there?
- ^ static int nmcces(struct vars *);
- */
-#ifdef REGEXP_MCCE_ENABLED
-static int
-nmcces(
- struct vars *v) /* context */
-{
- /*
- * No multi-character collating elements defined at the moment.
- */
- return 0;
-}
-#endif
-
-/*
- - nleaders - how many chrs can be first chrs of MCCEs?
- ^ static int nleaders(struct vars *);
- */
-#ifdef REGEXP_MCCE_ENABLED
-static int
-nleaders(
- struct vars *v) /* context */
-{
- return 0;
-}
-#endif
-
-/*
- - allmcces - return a cvec with all the MCCEs of the locale
- ^ static struct cvec *allmcces(struct vars *, struct cvec *);
- */
-#ifdef REGEXP_MCCE_ENABLED
-static struct cvec *
-allmcces(
- struct vars *v, /* context */
- struct cvec *cv) /* this is supposed to have enough room */
-{
- return clearcvec(cv);
-}
-#endif
-
-/*
- element - map collating-element name to celt
^ static celt element(struct vars *, const chr *, const chr *);
*/
@@ -724,8 +681,8 @@ range(
return NULL;
}
- if (!cases) { /* easy version */
- cv = getcvec(v, 0, 1/*, 0*/);
+ if (!cases) { /* easy version */
+ cv = getcvec(v, 0, 1);
NOERRN();
addrange(cv, a, b);
return cv;
@@ -739,7 +696,7 @@ range(
nchrs = (b - a + 1)*2 + 4;
- cv = getcvec(v, nchrs, 0/*, 0*/);
+ cv = getcvec(v, nchrs, 0);
NOERRN();
for (c=a; c<=b; c++) {
@@ -765,14 +722,10 @@ range(
- before - is celt x before celt y, for purposes of range legality?
^ static int before(celt, celt);
*/
-static int /* predicate */
+static int /* predicate */
before(
- celt x, celt y) /* collating elements */
+ celt x, celt y) /* collating elements */
{
- /*
- * trivial because no MCCEs.
- */
-
if (x < y) {
return 1;
}
@@ -798,7 +751,7 @@ eclass(
*/
if ((v->cflags&REG_FAKE) && c == 'x') {
- cv = getcvec(v, 4, 0/*, 0*/);
+ cv = getcvec(v, 4, 0);
addchr(cv, (chr)'x');
addchr(cv, (chr)'y');
if (cases) {
@@ -815,7 +768,7 @@ eclass(
if (cases) {
return allcases(v, c);
}
- cv = getcvec(v, 1, 0/*, 0*/);
+ cv = getcvec(v, 1, 0);
assert(cv != NULL);
addchr(cv, (chr)c);
return cv;
@@ -895,7 +848,7 @@ cclass(
switch((enum classes) index) {
case CC_PRINT:
- cv = getcvec(v, NUM_PRINT_CHAR, NUM_PRINT_RANGE/*, 0*/);
+ cv = getcvec(v, NUM_PRINT_CHAR, NUM_PRINT_RANGE);
if (cv) {
for (i=0 ; (size_t)i<NUM_PRINT_CHAR ; i++) {
addchr(cv, printCharTable[i]);
@@ -907,7 +860,7 @@ cclass(
}
break;
case CC_ALNUM:
- cv = getcvec(v, NUM_ALPHA_CHAR, NUM_DIGIT_RANGE + NUM_ALPHA_RANGE/*, 0*/);
+ cv = getcvec(v, NUM_ALPHA_CHAR, NUM_DIGIT_RANGE + NUM_ALPHA_RANGE);
if (cv) {
for (i=0 ; (size_t)i<NUM_ALPHA_CHAR ; i++) {
addchr(cv, alphaCharTable[i]);
@@ -923,7 +876,7 @@ cclass(
}
break;
case CC_ALPHA:
- cv = getcvec(v, NUM_ALPHA_CHAR, NUM_ALPHA_RANGE/*, 0*/);
+ cv = getcvec(v, NUM_ALPHA_CHAR, NUM_ALPHA_RANGE);
if (cv) {
for (i=0 ; (size_t)i<NUM_ALPHA_RANGE ; i++) {
addrange(cv, alphaRangeTable[i].start,
@@ -935,23 +888,23 @@ cclass(
}
break;
case CC_ASCII:
- cv = getcvec(v, 0, 1/*, 0*/);
+ cv = getcvec(v, 0, 1);
if (cv) {
addrange(cv, 0, 0x7f);
}
break;
case CC_BLANK:
- cv = getcvec(v, 2, 0/*, 0*/);
+ cv = getcvec(v, 2, 0);
addchr(cv, '\t');
addchr(cv, ' ');
break;
case CC_CNTRL:
- cv = getcvec(v, 0, 2/*, 0*/);
+ cv = getcvec(v, 0, 2);
addrange(cv, 0x0, 0x1f);
addrange(cv, 0x7f, 0x9f);
break;
case CC_DIGIT:
- cv = getcvec(v, 0, NUM_DIGIT_RANGE/*, 0*/);
+ cv = getcvec(v, 0, NUM_DIGIT_RANGE);
if (cv) {
for (i=0 ; (size_t)i<NUM_DIGIT_RANGE ; i++) {
addrange(cv, digitRangeTable[i].start,
@@ -960,7 +913,7 @@ cclass(
}
break;
case CC_PUNCT:
- cv = getcvec(v, NUM_PUNCT_CHAR, NUM_PUNCT_RANGE/*, 0*/);
+ cv = getcvec(v, NUM_PUNCT_CHAR, NUM_PUNCT_RANGE);
if (cv) {
for (i=0 ; (size_t)i<NUM_PUNCT_RANGE ; i++) {
addrange(cv, punctRangeTable[i].start,
@@ -981,7 +934,7 @@ cclass(
* someone comes up with a better arrangement!)
*/
- cv = getcvec(v, 0, 3/*, 0*/);
+ cv = getcvec(v, 0, 3);
if (cv) {
addrange(cv, '0', '9');
addrange(cv, 'a', 'f');
@@ -989,7 +942,7 @@ cclass(
}
break;
case CC_SPACE:
- cv = getcvec(v, NUM_SPACE_CHAR, NUM_SPACE_RANGE/*, 0*/);
+ cv = getcvec(v, NUM_SPACE_CHAR, NUM_SPACE_RANGE);
if (cv) {
for (i=0 ; (size_t)i<NUM_SPACE_RANGE ; i++) {
addrange(cv, spaceRangeTable[i].start,
@@ -1001,7 +954,7 @@ cclass(
}
break;
case CC_LOWER:
- cv = getcvec(v, NUM_LOWER_CHAR, NUM_LOWER_RANGE/*, 0*/);
+ cv = getcvec(v, NUM_LOWER_CHAR, NUM_LOWER_RANGE);
if (cv) {
for (i=0 ; (size_t)i<NUM_LOWER_RANGE ; i++) {
addrange(cv, lowerRangeTable[i].start,
@@ -1013,7 +966,7 @@ cclass(
}
break;
case CC_UPPER:
- cv = getcvec(v, NUM_UPPER_CHAR, NUM_UPPER_RANGE/*, 0*/);
+ cv = getcvec(v, NUM_UPPER_CHAR, NUM_UPPER_RANGE);
if (cv) {
for (i=0 ; (size_t)i<NUM_UPPER_RANGE ; i++) {
addrange(cv, upperRangeTable[i].start,
@@ -1025,7 +978,7 @@ cclass(
}
break;
case CC_GRAPH:
- cv = getcvec(v, NUM_GRAPH_CHAR, NUM_GRAPH_RANGE/*, 0*/);
+ cv = getcvec(v, NUM_GRAPH_CHAR, NUM_GRAPH_RANGE);
if (cv) {
for (i=0 ; (size_t)i<NUM_GRAPH_RANGE ; i++) {
addrange(cv, graphRangeTable[i].start,
@@ -1063,10 +1016,10 @@ allcases(
tc = Tcl_UniCharToTitle((chr)c);
if (tc != uc) {
- cv = getcvec(v, 3, 0/*, 0*/);
+ cv = getcvec(v, 3, 0);
addchr(cv, tc);
} else {
- cv = getcvec(v, 2, 0/*, 0*/);
+ cv = getcvec(v, 2, 0);
}
addchr(cv, lc);
if (lc != uc) {
diff --git a/generic/regc_nfa.c b/generic/regc_nfa.c
index 9f63f73..20e821f 100644
--- a/generic/regc_nfa.c
+++ b/generic/regc_nfa.c
@@ -88,7 +88,7 @@ newnfa(
- freenfa - free an entire NFA
^ static VOID freenfa(struct nfa *);
*/
-static VOID
+static void
freenfa(
struct nfa *nfa)
{
diff --git a/generic/regcomp.c b/generic/regcomp.c
index 8a43240..b397334 100644
--- a/generic/regcomp.c
+++ b/generic/regcomp.c
@@ -55,10 +55,6 @@ static void brackpart(struct vars *, struct state *, struct state *);
static const chr *scanplain(struct vars *);
static void onechr(struct vars *, pchr, struct state *, struct state *);
static void dovec(struct vars *, struct cvec *, struct state *, struct state *);
-#ifdef REGEXP_MCCE_ENABLED
-static void leaders(struct vars *, struct cvec *);
-static celt nextleader(struct vars *, pchr, pchr);
-#endif
static void wordchrs(struct vars *);
static struct subre *subre(struct vars *, int, int, struct state *, struct state *);
static void freesubre(struct vars *, struct subre *);
@@ -107,9 +103,6 @@ static void subblock(struct vars *, pchr, struct state *, struct state *);
static void okcolors(struct nfa *, struct colormap *);
static void colorchain(struct colormap *, struct arc *);
static void uncolorchain(struct colormap *, struct arc *);
-#ifdef REGEXP_MCCE_ENABLED
-static int singleton(struct colormap *, pchr c);
-#endif
static void rainbow(struct nfa *, struct colormap *, int, pcolor, struct state *, struct state *);
static void colorcomplement(struct nfa *, struct colormap *, int, struct state *, struct state *, struct state *);
#ifdef REG_DEBUG
@@ -174,22 +167,10 @@ static void dumpcstate(int, struct carc *, struct cnfa *, FILE *);
static struct cvec *clearcvec(struct cvec *);
static void addchr(struct cvec *, pchr);
static void addrange(struct cvec *, pchr, pchr);
-#ifdef REGEXP_MCCE_ENABLED
-static struct cvec *newcvec(int, int, int);
-static void addmcce(struct cvec *, const chr *, const chr *);
-static struct cvec *getcvec(struct vars *, int, int, int);
-static int haschr(struct cvec *, pchr);
-#else
static struct cvec *newcvec(int, int);
static struct cvec *getcvec(struct vars *, int, int);
-#endif
static void freecvec(struct cvec *);
/* === regc_locale.c === */
-#ifdef REGEXP_MCCE_ENABLED
-static int nleaders(struct vars *);
-static int nmcces(struct vars *);
-static struct cvec *allmcces(struct vars *, struct cvec *);
-#endif
static celt element(struct vars *, const chr *, const chr *);
static struct cvec *range(struct vars *, celt, celt, int);
static int before(celt, celt);
@@ -228,12 +209,6 @@ struct vars {
int ntree; /* number of tree nodes */
struct cvec *cv; /* interface cvec */
struct cvec *cv2; /* utility cvec */
-#ifdef REGEXP_MCCE_ENABLED
- struct cvec *mcces; /* collating-element information */
-#define ISCELEADER(v,c) (v->mcces != NULL && haschr(v->mcces, (c)))
- struct state *mccepbegin; /* in nfa, start of MCCE prototypes */
- struct state *mccepend; /* in nfa, end of MCCE prototypes */
-#endif
struct subre *lacons; /* lookahead-constraint vector */
int nlacons; /* size of lacons */
};
@@ -343,9 +318,6 @@ compile(
v->treefree = NULL;
v->cv = NULL;
v->cv2 = NULL;
-#ifdef REGEXP_MCCE_ENABLED
- v->mcces = NULL;
-#endif
v->lacons = NULL;
v->nlacons = 0;
re->re_magic = REMAGIC;
@@ -375,18 +347,6 @@ compile(
if (v->cv == NULL) {
return freev(v, REG_ESPACE);
}
-#ifdef REGEXP_MCCE_ENABLED
- i = nmcces(v);
- if (i > 0) {
- v->mcces = newcvec(nleaders(v), 0);
- CNOERR();
- v->mcces = allmcces(v, v->mcces);
- leaders(v, v->mcces);
- /* Function does nothing with NULL pointers */
- addmcce(v->mcces, NULL, NULL); /* dummy */
- }
- CNOERR();
-#endif
/*
* Parsing.
@@ -559,11 +519,6 @@ freev(
if (v->cv2 != NULL) {
freecvec(v->cv2);
}
-#ifdef REGEXP_MCCE_ENABLED
- if (v->mcces != NULL) {
- freecvec(v->mcces);
- }
-#endif
if (v->lacons != NULL) {
freelacons(v->lacons, v->nlacons);
}
@@ -850,7 +805,6 @@ parseqatom(
}
NEXT();
return;
- break;
case '$':
ARCV('$', 1);
if (v->cflags&REG_NLANCH) {
@@ -858,19 +812,16 @@ parseqatom(
}
NEXT();
return;
- break;
case SBEGIN:
ARCV('^', 1); /* BOL */
ARCV('^', 0); /* or BOS */
NEXT();
return;
- break;
case SEND:
ARCV('$', 1); /* EOL */
ARCV('$', 0); /* or EOS */
NEXT();
return;
- break;
case '<':
wordchrs(v); /* does NEXT() */
s = newstate(v->nfa);
@@ -878,7 +829,6 @@ parseqatom(
nonword(v, BEHIND, lp, s);
word(v, AHEAD, s, rp);
return;
- break;
case '>':
wordchrs(v); /* does NEXT() */
s = newstate(v->nfa);
@@ -886,7 +836,6 @@ parseqatom(
word(v, BEHIND, lp, s);
nonword(v, AHEAD, s, rp);
return;
- break;
case WBDRY:
wordchrs(v); /* does NEXT() */
s = newstate(v->nfa);
@@ -898,7 +847,6 @@ parseqatom(
word(v, BEHIND, lp, s);
nonword(v, AHEAD, s, rp);
return;
- break;
case NWBDRY:
wordchrs(v); /* does NEXT() */
s = newstate(v->nfa);
@@ -910,7 +858,6 @@ parseqatom(
nonword(v, BEHIND, lp, s);
nonword(v, AHEAD, s, rp);
return;
- break;
case LACON: /* lookahead constraint */
pos = v->nextvalue;
NEXT();
@@ -925,7 +872,6 @@ parseqatom(
NOERR();
ARCV(LACON, n);
return;
- break;
/*
* Then errors, to get them out of the way.
@@ -937,11 +883,9 @@ parseqatom(
case '{':
ERR(REG_BADRPT);
return;
- break;
default:
ERR(REG_ASSERT);
return;
- break;
/*
* Then plain characters, and minor variants on that theme.
@@ -1478,15 +1422,6 @@ cbracket(
{
struct state *left = newstate(v->nfa);
struct state *right = newstate(v->nfa);
-#ifdef REGEXP_MCCE_ENABLED
- struct state *s;
- struct arc *a; /* arc from lp */
- struct arc *ba; /* arc from left, from bracket() */
- struct arc *pa; /* MCCE-prototype arc */
- color co;
- const chr *p;
- int i;
-#endif
NOERR();
bracket(v, left, right);
@@ -1498,69 +1433,16 @@ cbracket(
assert(lp->nouts == 0); /* all outarcs will be ours */
/*
- * Easy part of complementing
+ * Easy part of complementing, and all there is to do since the MCCE code
+ * was removed.
*/
colorcomplement(v->nfa, v->cm, PLAIN, left, lp, rp);
NOERR();
- if (1 /*v->mcces == NULL*/) { /* no MCCEs -- we're done */
- dropstate(v->nfa, left);
- assert(right->nins == 0);
- freestate(v->nfa, right);
- return;
- }
-
-#ifdef REGEXP_MCCE_ENABLED
- /*
- * But complementing gets messy in the presence of MCCEs...
- */
-
- NOTE(REG_ULOCALE);
- for (p = v->mcces->chrs, i = v->mcces->nchrs; i > 0; p++, i--) {
- co = GETCOLOR(v->cm, *p);
- a = findarc(lp, PLAIN, co);
- ba = findarc(left, PLAIN, co);
- if (ba == NULL) {
- assert(a != NULL);
- freearc(v->nfa, a);
- } else {
- assert(a == NULL);
- }
- s = newstate(v->nfa);
- NOERR();
- newarc(v->nfa, PLAIN, co, lp, s);
- NOERR();
- pa = findarc(v->mccepbegin, PLAIN, co);
- assert(pa != NULL);
- if (ba == NULL) { /* easy case, need all of them */
- cloneouts(v->nfa, pa->to, s, rp, PLAIN);
- newarc(v->nfa, '$', 1, s, rp);
- newarc(v->nfa, '$', 0, s, rp);
- colorcomplement(v->nfa, v->cm, AHEAD, pa->to, s, rp);
- } else { /* must be selective */
- if (findarc(ba->to, '$', 1) == NULL) {
- newarc(v->nfa, '$', 1, s, rp);
- newarc(v->nfa, '$', 0, s, rp);
- colorcomplement(v->nfa, v->cm, AHEAD, pa->to, s, rp);
- }
- for (pa = pa->to->outs; pa != NULL; pa = pa->outchain) {
- if (findarc(ba->to, PLAIN, pa->co) == NULL) {
- newarc(v->nfa, PLAIN, pa->co, s, rp);
- }
- }
- if (s->nouts == 0) { /* limit of selectivity: none */
- dropstate(v->nfa, s); /* frees arc too */
- }
- }
- NOERR();
- }
-
- delsub(v->nfa, left, right);
- assert(left->nouts == 0);
- freestate(v->nfa, left);
+ dropstate(v->nfa, left);
assert(right->nins == 0);
freestate(v->nfa, right);
-#endif
+ return;
}
/*
@@ -1592,10 +1474,10 @@ brackpart(
NEXT();
/*
- * Shortcut for ordinary chr (not range, not MCCE leader).
+ * Shortcut for ordinary chr (not range).
*/
- if (!SEE(RANGE) /*&& !ISCELEADER(v, c[0])*/) {
+ if (!SEE(RANGE)) {
onechr(v, c[0], lp, rp);
return;
}
@@ -1706,50 +1588,6 @@ scanplain(
}
/*
- - leaders - process a cvec of collating elements to also include leaders
- * Also gives all characters involved their own colors, which is almost
- * certainly necessary, and sets up little disconnected subNFA.
- ^ static void leaders(struct vars *, struct cvec *);
- */
-#ifdef REGEXP_MCCE_ENABLED
-static void
-leaders(
- struct vars *v,
- struct cvec *cv)
-{
- int mcce;
- const chr *p;
- chr leader;
- struct state *s;
- struct arc *a;
-
- v->mccepbegin = newstate(v->nfa);
- v->mccepend = newstate(v->nfa);
- NOERR();
-
- for (mcce = 0; mcce < cv->nmcces; mcce++) {
- p = cv->mcces[mcce];
- leader = *p;
- if (!haschr(cv, leader)) {
- addchr(cv, leader);
- s = newstate(v->nfa);
- newarc(v->nfa, PLAIN, subcolor(v->cm, leader), v->mccepbegin, s);
- okcolors(v->nfa, v->cm);
- } else {
- a = findarc(v->mccepbegin, PLAIN, GETCOLOR(v->cm, leader));
- assert(a != NULL);
- s = a->to;
- assert(s != v->mccepend);
- }
- p++;
- assert(*p != 0 && *(p+1) == 0); /* only 2-char MCCEs for now */
- newarc(v->nfa, PLAIN, subcolor(v->cm, *p), s, v->mccepend);
- okcolors(v->nfa, v->cm);
- }
-}
-#endif
-
-/*
- onechr - fill in arcs for a plain character, and possible case complements
* This is mostly a shortcut for efficient handling of the common case.
^ static void onechr(struct vars *, pchr, struct state *, struct state *);
@@ -1766,17 +1604,18 @@ onechr(
return;
}
- /* rats, need general case anyway... */
+ /*
+ * Rats, need general case anyway...
+ */
+
dovec(v, allcases(v, c), lp, rp);
}
/*
- dovec - fill in arcs for each element of a cvec
- * This one has to handle the messy cases, like MCCEs and MCCE leaders.
^ static void dovec(struct vars *, struct cvec *, struct state *,
^ struct state *);
*/
-#ifndef REGEXP_MCCE_ENABLED
static void
dovec(
struct vars *v,
@@ -1802,184 +1641,6 @@ dovec(
}
}
-#else /* REGEXP_MCCE_ENABLED */
-static void
-dovec(
- struct vars *v,
- struct cvec *cv,
- struct state *lp,
- struct state *rp)
-{
- chr ch, from, to;
- celt ce;
- const chr *p;
- int i;
- struct cvec *leads;
- color co;
- struct arc *a;
- struct arc *pa; /* arc in prototype */
- struct state *s;
- struct state *ps; /* state in prototype */
-
- /*
- * Need a place to store leaders, if any.
- */
-
- if (nmcces(v) > 0) {
- assert(v->mcces != NULL);
- if (v->cv2 == NULL || v->cv2->nchrs < v->mcces->nchrs) {
- if (v->cv2 != NULL) {
- free(v->cv2);
- }
- v->cv2 = newcvec(v->mcces->nchrs, 0, v->mcces->nmcces);
- NOERR();
- leads = v->cv2;
- } else {
- leads = clearcvec(v->cv2);
- }
- } else {
- leads = NULL;
- }
-
- /*
- * First, get the ordinary characters out of the way.
- */
-
- for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) {
- ch = *p;
- if (!ISCELEADER(v, ch)) {
- newarc(v->nfa, PLAIN, subcolor(v->cm, ch), lp, rp);
- } else {
- assert(singleton(v->cm, ch));
- assert(leads != NULL);
- if (!haschr(leads, ch)) {
- addchr(leads, ch);
- }
- }
- }
-
- /*
- * And the ranges.
- */
-
- for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--) {
- from = *p;
- to = *(p+1);
- while (from <= to && (ce = nextleader(v, from, to)) != NOCELT) {
- if (from < ce) {
- subrange(v, from, ce - 1, lp, rp);
- }
- assert(singleton(v->cm, ce));
- assert(leads != NULL);
- if (!haschr(leads, ce)) {
- addchr(leads, ce);
- }
- from = ce + 1;
- }
- if (from <= to) {
- subrange(v, from, to, lp, rp);
- }
- }
-
- /* *** WARNING ***
- *
- * This was buggy, check before enabling: the original version would cause
- * a segfault at the loopinit below if (leads==NULL && cv->nmcces!=0)
- * Possibly just a problem with parens? The original condition was
- * ((leads == NULL || leads->nchrs == 0) && cv->nmcces == 0)
- */
-
- if (leads == NULL || (leads->nchrs == 0 && cv->nmcces == 0)) {
- return;
- }
-
- /*
- * Deal with the MCCE leaders.
- */
-
- NOTE(REG_ULOCALE);
- for (p = leads->chrs, i = leads->nchrs; i > 0; p++, i--) {
- co = GETCOLOR(v->cm, *p);
- a = findarc(lp, PLAIN, co);
- if (a != NULL) {
- s = a->to;
- } else {
- s = newstate(v->nfa);
- NOERR();
- newarc(v->nfa, PLAIN, co, lp, s);
- NOERR();
- }
- pa = findarc(v->mccepbegin, PLAIN, co);
- assert(pa != NULL);
- ps = pa->to;
- newarc(v->nfa, '$', 1, s, rp);
- newarc(v->nfa, '$', 0, s, rp);
- colorcomplement(v->nfa, v->cm, AHEAD, ps, s, rp);
- NOERR();
- }
-
- /*
- * And the MCCEs.
- */
-
- for (i = 0; i < cv->nmcces; i++) {
- p = cv->mcces[i];
- assert(singleton(v->cm, *p));
- if (!singleton(v->cm, *p)) {
- ERR(REG_ASSERT);
- return;
- }
- ch = *p++;
- co = GETCOLOR(v->cm, ch);
- a = findarc(lp, PLAIN, co);
- if (a != NULL) {
- s = a->to;
- } else {
- s = newstate(v->nfa);
- NOERR();
- newarc(v->nfa, PLAIN, co, lp, s);
- NOERR();
- }
- assert(*p != 0); /* at least two chars */
- assert(singleton(v->cm, *p));
- ch = *p++;
- co = GETCOLOR(v->cm, ch);
- assert(*p == 0); /* and only two, for now */
- newarc(v->nfa, PLAIN, co, s, rp);
- NOERR();
- }
-}
-
-/*
- - nextleader - find next MCCE leader within range
- ^ static celt nextleader(struct vars *, pchr, pchr);
- */
-static celt /* NOCELT means none */
-nextleader(
- struct vars *v,
- pchr from,
- pchr to)
-{
- int i;
- const chr *p;
- chr ch;
- celt it = NOCELT;
-
- if (v->mcces == NULL) {
- return it;
- }
-
- for (i = v->mcces->nchrs, p = v->mcces->chrs; i > 0; i--, p++) {
- ch = *p;
- if (from <= ch && ch <= to) {
- if (it == NOCELT || ch < it) {
- it = ch;
- }
- }
- }
- return it;
-}
-#endif
/*
- wordchrs - set up word-chr list for word-boundary stuff, if needed
@@ -2120,30 +1781,14 @@ optst(
struct vars *v,
struct subre *t)
{
-#if 0
- if (t == NULL) {
- return;
- }
-
/*
- * Recurse through children.
+ * DGP (2007-11-13): I assume it was the programmer's intent to eventually
+ * come back and add code to optimize subRE trees, but the routine coded
+ * just spends effort traversing the tree and doing nothing. We can do
+ * nothing with less effort.
*/
- if (t->left != NULL) {
- optst(v, t->left);
- }
- if (t->right != NULL) {
- optst(v, t->right);
- }
-#else
- /*
- * DGP (2007-11-13): I assume it was the programmer's intent to
- * eventually come back and add code above to optimize subRE trees,
- * but the routine coded just spends effort traversing the tree and
- * doing nothing. We can do nothing with less effort.
- */
return;
-#endif
}
/*
diff --git a/generic/regcustom.h b/generic/regcustom.h
index 6b6b38c..ac33087 100644
--- a/generic/regcustom.h
+++ b/generic/regcustom.h
@@ -3,13 +3,13 @@
*
* Development of this software was funded, in part, by Cray Research Inc.,
* UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
- * Corporation, none of whom are responsible for the results. The author
+ * Corporation, none of whom are responsible for the results. The author
* thanks all of them.
*
- * Redistribution and use in source and binary forms -- with or without
- * modification -- are permitted for any purpose, provided that
- * redistributions in source form retain this entire copyright notice and
- * indicate the origin and nature of any modifications.
+ * Redistribution and use in source and binary forms - with or without
+ * modification - are permitted for any purpose, provided that redistributions
+ * in source form retain this entire copyright notice and indicate the origin
+ * and nature of any modifications.
*
* I'd appreciate being given credit for this package in the documentation of
* software which uses it, but that is not a requirement.
@@ -26,23 +26,28 @@
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/* headers if any */
+/*
+ * Headers if any.
+ */
+
#include "tclInt.h"
-/* overrides for regguts.h definitions, if any */
-#define FUNCPTR(name, args) (*name) _ANSI_ARGS_(args)
+/*
+ * Overrides for regguts.h definitions, if any.
+ */
+
+#define FUNCPTR(name, args) (*name)args
#define MALLOC(n) ckalloc(n)
#define FREE(p) ckfree(VS(p))
#define REALLOC(p,n) ckrealloc(VS(p),n)
-
-
/*
- * Do not insert extras between the "begin" and "end" lines -- this
- * chunk is automatically extracted to be fitted into regex.h.
+ * Do not insert extras between the "begin" and "end" lines - this chunk is
+ * automatically extracted to be fitted into regex.h.
*/
+
/* --- begin --- */
-/* ensure certain things don't sneak in from system headers */
+/* Ensure certain things don't sneak in from system headers. */
#ifdef __REG_WIDE_T
#undef __REG_WIDE_T
#endif
@@ -67,70 +72,90 @@
#ifdef __REG_NOCHAR
#undef __REG_NOCHAR
#endif
-/* interface types */
+/* Interface types */
#define __REG_WIDE_T Tcl_UniChar
-#define __REG_REGOFF_T long /* not really right, but good enough... */
-#define __REG_VOID_T VOID
-#define __REG_CONST CONST
-/* names and declarations */
+#define __REG_REGOFF_T long /* Not really right, but good enough... */
+#define __REG_VOID_T void
+#define __REG_CONST const
+/* Names and declarations */
#define __REG_WIDE_COMPILE TclReComp
#define __REG_WIDE_EXEC TclReExec
-#define __REG_NOFRONT /* don't want regcomp() and regexec() */
-#define __REG_NOCHAR /* or the char versions */
+#define __REG_NOFRONT /* Don't want regcomp() and regexec() */
+#define __REG_NOCHAR /* Or the char versions */
#define regfree TclReFree
#define regerror TclReError
/* --- end --- */
+/*
+ * Internal character type and related.
+ */
-
-/* internal character type and related */
-typedef Tcl_UniChar chr; /* the type itself */
-typedef int pchr; /* what it promotes to */
-typedef unsigned uchr; /* unsigned type that will hold a chr */
-typedef int celt; /* type to hold chr, MCCE number, or NOCELT */
-#define NOCELT (-1) /* celt value which is not valid chr or MCCE */
-#define CHR(c) (UCHAR(c)) /* turn char literal into chr literal */
-#define DIGITVAL(c) ((c)-'0') /* turn chr digit into its value */
+typedef Tcl_UniChar chr; /* The type itself. */
+typedef int pchr; /* What it promotes to. */
+typedef unsigned uchr; /* Unsigned type that will hold a chr. */
+typedef int celt; /* Type to hold chr, or NOCELT */
+#define NOCELT (-1) /* Celt value which is not valid chr */
+#define CHR(c) (UCHAR(c)) /* Turn char literal into chr literal */
+#define DIGITVAL(c) ((c)-'0') /* Turn chr digit into its value */
#if TCL_UTF_MAX > 3
-#define CHRBITS 32 /* bits in a chr; must not use sizeof */
-#define CHR_MIN 0x00000000 /* smallest and largest chr; the value */
-#define CHR_MAX 0xffffffff /* CHR_MAX-CHR_MIN+1 should fit in uchr */
+#define CHRBITS 32 /* Bits in a chr; must not use sizeof */
+#define CHR_MIN 0x00000000 /* Smallest and largest chr; the value */
+#define CHR_MAX 0xffffffff /* CHR_MAX-CHR_MIN+1 should fit in uchr */
#else
-#define CHRBITS 16 /* bits in a chr; must not use sizeof */
-#define CHR_MIN 0x0000 /* smallest and largest chr; the value */
-#define CHR_MAX 0xffff /* CHR_MAX-CHR_MIN+1 should fit in uchr */
+#define CHRBITS 16 /* Bits in a chr; must not use sizeof */
+#define CHR_MIN 0x0000 /* Smallest and largest chr; the value */
+#define CHR_MAX 0xffff /* CHR_MAX-CHR_MIN+1 should fit in uchr */
#endif
-/* functions operating on chr */
+/*
+ * Functions operating on chr.
+ */
+
#define iscalnum(x) Tcl_UniCharIsAlnum(x)
#define iscalpha(x) Tcl_UniCharIsAlpha(x)
#define iscdigit(x) Tcl_UniCharIsDigit(x)
#define iscspace(x) Tcl_UniCharIsSpace(x)
-/* name the external functions */
+/*
+ * Name the external functions.
+ */
+
#define compile TclReComp
#define exec TclReExec
-/* enable/disable debugging code (by whether REG_DEBUG is defined or not) */
-#if 0 /* no debug unless requested by makefile */
+/*
+& Enable/disable debugging code (by whether REG_DEBUG is defined or not).
+*/
+
+#if 0 /* No debug unless requested by makefile. */
#define REG_DEBUG /* */
#endif
-/* method of allocating a local workspace */
+/*
+ * Method of allocating a local workspace. We used a thread-specific data
+ * space to store this because the regular expression engine is never
+ * reentered from the same thread; it doesn't make any callbacks.
+ */
+
#if 1
#define AllocVars(vPtr) \
static Tcl_ThreadDataKey varsKey; \
register struct vars *vPtr = (struct vars *) \
- Tcl_GetThreadData(&varsKey, sizeof(struct vars))
+ Tcl_GetThreadData(&varsKey, sizeof(struct vars))
#else
-/* This strategy for allocating workspace is "more proper" in some sense, but
+/*
+ * This strategy for allocating workspace is "more proper" in some sense, but
* quite a bit slower. Using TSD (as above) leads to code that is quite a bit
- * faster in practice. */
+ * faster in practice (measured!)
+ */
#define AllocVars(vPtr) \
register struct vars *vPtr = (struct vars *) MALLOC(sizeof(struct vars))
#define FreeVars(vPtr) \
FREE(vPtr)
#endif
-/* and pick up the standard header */
+/*
+ * And pick up the standard header.
+ */
+
#include "regex.h"
diff --git a/generic/regguts.h b/generic/regguts.h
index bc1d7a2..cbf6615 100644
--- a/generic/regguts.h
+++ b/generic/regguts.h
@@ -60,24 +60,24 @@
/* voids */
#ifndef VOID
-#define VOID void /* for function return values */
+#define VOID void /* for function return values */
#endif
#ifndef DISCARD
-#define DISCARD void /* for throwing values away */
+#define DISCARD void /* for throwing values away */
#endif
#ifndef PVOID
-#define PVOID void * /* generic pointer */
+#define PVOID void * /* generic pointer */
#endif
#ifndef VS
-#define VS(x) ((void*)(x)) /* cast something to generic ptr */
+#define VS(x) ((void*)(x)) /* cast something to generic ptr */
#endif
#ifndef NOPARMS
-#define NOPARMS void /* for empty parm lists */
+#define NOPARMS void /* for empty parm lists */
#endif
/* const */
#ifndef CONST
-#define CONST const /* for old compilers, might be empty */
+#define CONST const /* for old compilers, might be empty */
#endif
/* function-pointer declarator */
@@ -105,7 +105,7 @@
#include <limits.h>
#endif
#ifndef _POSIX2_RE_DUP_MAX
-#define _POSIX2_RE_DUP_MAX 255 /* normally from <limits.h> */
+#define _POSIX2_RE_DUP_MAX 255 /* normally from <limits.h> */
#endif
/*
@@ -189,7 +189,7 @@ union tree {
#define tcolor colors.ccolor
#define tptr ptrs.pptr
-/* internal per-color structure for the color machinery */
+/* Internal per-color descriptor structure for the color machinery */
struct colordesc {
uchr nchrs; /* number of chars of this color */
color sub; /* open subcolor (if any); free chain ptr */
@@ -235,9 +235,9 @@ struct colormap {
/*
* Interface definitions for locale-interface functions in locale.c.
- * Multi-character collating elements (MCCEs) cause most of the trouble.
*/
+/* Representation of a set of characters. */
struct cvec {
int nchrs; /* number of chrs */
int chrspace; /* number of chrs possible */
@@ -245,22 +245,11 @@ struct cvec {
int nranges; /* number of ranges (chr pairs) */
int rangespace; /* number of chrs possible */
chr *ranges; /* pointer to vector of chr pairs */
-#ifdef REGEXP_MCCE_ENABLED
- int nmcces; /* number of MCCEs */
- int mccespace; /* number of MCCEs possible */
- int nmccechrs; /* number of chrs used for MCCEs */
- chr *mcces[1]; /* pointers to 0-terminated MCCEs */
- /* and both batches of chrs are on the end */
-#endif
};
-#ifdef REGEXP_MCCE_ENABLED
-/* caution: this value cannot be changed easily */
-#define MAXMCCE 2 /* length of longest MCCE */
-#endif
-
/*
- * definitions for NFA internal representation
+ * definitions for non-deterministic finite autmaton (NFA) internal
+ * representation
*
* Having a "from" pointer within each arc may seem redundant, but it saves a
* lot of hassle.
@@ -288,7 +277,7 @@ struct arcbatch { /* for bulk allocation of arcs */
struct state {
int no;
-# define FREESTATE (-1)
+#define FREESTATE (-1)
char flag; /* marks special states */
int nins; /* number of inarcs */
struct arc *ins; /* chain of inarcs */
@@ -405,7 +394,8 @@ struct guts {
};
/*
- * Magic for allocating a variable workspace.
+ * Magic for allocating a variable workspace. This default version is
+ * stack-hungry.
*/
#ifndef AllocVars