diff options
-rw-r--r-- | ChangeLog | 6 | ||||
-rw-r--r-- | generic/regc_color.c | 16 | ||||
-rw-r--r-- | generic/regc_cvec.c | 118 | ||||
-rw-r--r-- | generic/regc_locale.c | 93 | ||||
-rw-r--r-- | generic/regc_nfa.c | 2 | ||||
-rw-r--r-- | generic/regcomp.c | 383 | ||||
-rw-r--r-- | generic/regcustom.h | 113 | ||||
-rw-r--r-- | generic/regguts.h | 38 |
8 files changed, 133 insertions, 636 deletions
@@ -1,3 +1,9 @@ +2007-11-14 Donal K. Fellows <donal.k.fellows@man.ac.uk> + + * generic/regc*.c: Eliminate multi-char collating element code + completely. Simplifies the code quite a bit. If people still want the + full code, it will remain on the 8.4 branch. [Bug 1831425] + 2007-11-13 Jeff Hobbs <jeffh@ActiveState.com> * generic/tclCompCmds.c (TclCompileRegexpCmd): clean up comments, diff --git a/generic/regc_color.c b/generic/regc_color.c index 02634d9..003f5fc 100644 --- a/generic/regc_color.c +++ b/generic/regc_color.c @@ -678,22 +678,6 @@ uncolorchain( a->colorchain = NULL; /* paranoia */ } -#ifdef REGEXP_MCCE_ENABLED -/* - - singleton - is this character in its own color? - ^ static int singleton(struct colormap *, pchr c); - */ -static int /* predicate */ -singleton( - struct colormap *cm, - pchr c) -{ - color co = GETCOLOR(cm, c); /* color of c */ - - return (cm->cd[co].nchrs == 1) && (cm->cd[co].sub == NOSUB); -} -#endif - /* - rainbow - add arcs of all full colors (but one) between specified states ^ static VOID rainbow(struct nfa *, struct colormap *, int, pcolor, diff --git a/generic/regc_cvec.c b/generic/regc_cvec.c index afb2f48..64f34cd 100644 --- a/generic/regc_cvec.c +++ b/generic/regc_cvec.c @@ -36,37 +36,17 @@ /* - newcvec - allocate a new cvec - ^ static struct cvec *newcvec(int, int, int); + ^ static struct cvec *newcvec(int, int); */ static struct cvec * newcvec( int nchrs, /* to hold this many chrs... */ int nranges) /* ... and this many ranges... */ -#ifdef REGEXP_MCCE_ENABLED - int nmcces) /* ... and this many MCCEs */ -#endif { - size_t n, nc; - struct cvec *cv; + size_t nc = (size_t)nchrs + (size_t)nranges*2; + size_t n = sizeof(struct cvec) + nc*sizeof(chr); + struct cvec *cv = (struct cvec *) MALLOC(n); -#ifdef REGEXP_MCCE_ENABLED - nc = (size_t)nchrs + (size_t)nmcces*(MAXMCCE+1) + (size_t)nranges*2; - n = sizeof(struct cvec) + (size_t)(nmcces-1)*sizeof(chr *) - + nc*sizeof(chr); - cv = (struct cvec *) MALLOC(n); - if (cv == NULL) { - return NULL; - } - cv->chrspace = nchrs; - cv->chrs = (chr *)&cv->mcces[nmcces]; /* chrs just after MCCE ptrs */ - cv->mccespace = nmcces; - cv->ranges = cv->chrs + nchrs + nmcces*(MAXMCCE+1); - cv->rangespace = nranges; - return clearcvec(cv); -#else - nc = (size_t)nchrs + (size_t)nranges*2; - n = sizeof(struct cvec) + nc*sizeof(chr); - cv = (struct cvec *) MALLOC(n); if (cv == NULL) { return NULL; } @@ -75,7 +55,6 @@ newcvec( cv->ranges = cv->chrs + nchrs; cv->rangespace = nranges; return clearcvec(cv); -#endif /*REGEXP_MCCE_ENABLED*/ } /* @@ -87,21 +66,9 @@ static struct cvec * clearcvec( struct cvec *cv) /* character vector */ { -#ifdef REGEXP_MCCE_ENABLED - int i; -#endif - assert(cv != NULL); cv->nchrs = 0; cv->nranges = 0; -#ifdef REGEXP_MCCE_ENABLED - assert(cv->chrs == (chr *)&cv->mcces[cv->mccespace]); - cv->nmcces = 0; - cv->nmccechrs = 0; - for (i = 0; i < cv->mccespace; i++) { - cv->mcces[i] = NULL; - } -#endif return cv; } @@ -114,7 +81,6 @@ addchr( struct cvec *cv, /* character vector */ pchr c) /* character to add */ { - assert(cv->nchrs < cv->chrspace - cv->nmccechrs); cv->chrs[cv->nchrs++] = (chr)c; } @@ -134,89 +100,17 @@ addrange( cv->nranges++; } -#ifdef REGEXP_MCCE_ENABLED -/* - * This static function is currently called from a single spot in regcomp.c, - * with two NULL pointers; in that case it does nothing, so that we define out - * both the call and the code. - */ - -/* - - addmcce - add an MCCE to a cvec - ^ static VOID addmcce(struct cvec *, const chr *, const chr *); - */ - -static void -addmcce( - struct cvec *cv, /* character vector */ - const chr *startp, /* beginning of text */ - const chr *endp) /* just past end of text */ -{ - int len, i; - const chr *s, *d; - - if (startp == NULL && endp == NULL) { - return; - } - len = endp - startp; - assert(len > 0); - assert(cv->nchrs + len < cv->chrspace - cv->nmccechrs); - assert(cv->nmcces < cv->mccespace); - d = &cv->chrs[cv->chrspace - cv->nmccechrs - len - 1]; - cv->mcces[cv->nmcces++] = d; - for (s = startp, i = len; i > 0; s++, i--) { - *d++ = *s; - } - *d++ = 0; /* endmarker */ - assert(d == &cv->chrs[cv->chrspace - cv->nmccechrs]); - cv->nmccechrs += len + 1; -} -#endif - -/* - - haschr - does a cvec contain this chr? - ^ static int haschr(struct cvec *, pchr); - */ -#ifdef REGEXP_MCCE_ENABLED -static int /* predicate */ -haschr( - struct cvec *cv, /* character vector */ - pchr c) /* character to test for */ -{ - int i; - const chr *p; - - for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) { - if (*p == c) { - return 1; - } - } - for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--) { - if ((*p <= c) && (c <= *(p+1))) { - return 1; - } - } - return 0; -} -#endif - /* - getcvec - get a cvec, remembering it as v->cv - ^ static struct cvec *getcvec(struct vars *, int, int, int); + ^ static struct cvec *getcvec(struct vars *, int, int); */ static struct cvec * getcvec( struct vars *v, /* context */ int nchrs, /* to hold this many chrs... */ int nranges) /* ... and this many ranges... */ -#ifdef REGEXP_MCCE_ENABLED - int nmcces) /* ... and this many MCCEs */ -#endif { if ((v->cv != NULL) && (nchrs <= v->cv->chrspace) && -#ifdef REGEXP_MCCE_ENABLED - (nmcces <= v->cv->mccespace) && -#endif (nranges <= v->cv->rangespace)) { return clearcvec(v->cv); } @@ -224,7 +118,7 @@ getcvec( if (v->cv != NULL) { freecvec(v->cv); } - v->cv = newcvec(nchrs, nranges/*, nmcces*/); + v->cv = newcvec(nchrs, nranges); if (v->cv == NULL) { ERR(REG_ESPACE); } diff --git a/generic/regc_locale.c b/generic/regc_locale.c index 438e821..ac310c9 100644 --- a/generic/regc_locale.c +++ b/generic/regc_locale.c @@ -9,7 +9,7 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: regc_locale.c,v 1.18 2007/11/14 00:07:58 dkf Exp $ + * RCS: @(#) $Id: regc_locale.c,v 1.19 2007/11/14 11:04:59 dkf Exp $ */ /* ASCII character-name table */ @@ -611,49 +611,6 @@ static const chr printCharTable[] = { #define CH NOCELT /* - - nmcces - how many distinct MCCEs are there? - ^ static int nmcces(struct vars *); - */ -#ifdef REGEXP_MCCE_ENABLED -static int -nmcces( - struct vars *v) /* context */ -{ - /* - * No multi-character collating elements defined at the moment. - */ - return 0; -} -#endif - -/* - - nleaders - how many chrs can be first chrs of MCCEs? - ^ static int nleaders(struct vars *); - */ -#ifdef REGEXP_MCCE_ENABLED -static int -nleaders( - struct vars *v) /* context */ -{ - return 0; -} -#endif - -/* - - allmcces - return a cvec with all the MCCEs of the locale - ^ static struct cvec *allmcces(struct vars *, struct cvec *); - */ -#ifdef REGEXP_MCCE_ENABLED -static struct cvec * -allmcces( - struct vars *v, /* context */ - struct cvec *cv) /* this is supposed to have enough room */ -{ - return clearcvec(cv); -} -#endif - -/* - element - map collating-element name to celt ^ static celt element(struct vars *, const chr *, const chr *); */ @@ -724,8 +681,8 @@ range( return NULL; } - if (!cases) { /* easy version */ - cv = getcvec(v, 0, 1/*, 0*/); + if (!cases) { /* easy version */ + cv = getcvec(v, 0, 1); NOERRN(); addrange(cv, a, b); return cv; @@ -739,7 +696,7 @@ range( nchrs = (b - a + 1)*2 + 4; - cv = getcvec(v, nchrs, 0/*, 0*/); + cv = getcvec(v, nchrs, 0); NOERRN(); for (c=a; c<=b; c++) { @@ -765,14 +722,10 @@ range( - before - is celt x before celt y, for purposes of range legality? ^ static int before(celt, celt); */ -static int /* predicate */ +static int /* predicate */ before( - celt x, celt y) /* collating elements */ + celt x, celt y) /* collating elements */ { - /* - * trivial because no MCCEs. - */ - if (x < y) { return 1; } @@ -798,7 +751,7 @@ eclass( */ if ((v->cflags®_FAKE) && c == 'x') { - cv = getcvec(v, 4, 0/*, 0*/); + cv = getcvec(v, 4, 0); addchr(cv, (chr)'x'); addchr(cv, (chr)'y'); if (cases) { @@ -815,7 +768,7 @@ eclass( if (cases) { return allcases(v, c); } - cv = getcvec(v, 1, 0/*, 0*/); + cv = getcvec(v, 1, 0); assert(cv != NULL); addchr(cv, (chr)c); return cv; @@ -895,7 +848,7 @@ cclass( switch((enum classes) index) { case CC_PRINT: - cv = getcvec(v, NUM_PRINT_CHAR, NUM_PRINT_RANGE/*, 0*/); + cv = getcvec(v, NUM_PRINT_CHAR, NUM_PRINT_RANGE); if (cv) { for (i=0 ; (size_t)i<NUM_PRINT_CHAR ; i++) { addchr(cv, printCharTable[i]); @@ -907,7 +860,7 @@ cclass( } break; case CC_ALNUM: - cv = getcvec(v, NUM_ALPHA_CHAR, NUM_DIGIT_RANGE + NUM_ALPHA_RANGE/*, 0*/); + cv = getcvec(v, NUM_ALPHA_CHAR, NUM_DIGIT_RANGE + NUM_ALPHA_RANGE); if (cv) { for (i=0 ; (size_t)i<NUM_ALPHA_CHAR ; i++) { addchr(cv, alphaCharTable[i]); @@ -923,7 +876,7 @@ cclass( } break; case CC_ALPHA: - cv = getcvec(v, NUM_ALPHA_CHAR, NUM_ALPHA_RANGE/*, 0*/); + cv = getcvec(v, NUM_ALPHA_CHAR, NUM_ALPHA_RANGE); if (cv) { for (i=0 ; (size_t)i<NUM_ALPHA_RANGE ; i++) { addrange(cv, alphaRangeTable[i].start, @@ -935,23 +888,23 @@ cclass( } break; case CC_ASCII: - cv = getcvec(v, 0, 1/*, 0*/); + cv = getcvec(v, 0, 1); if (cv) { addrange(cv, 0, 0x7f); } break; case CC_BLANK: - cv = getcvec(v, 2, 0/*, 0*/); + cv = getcvec(v, 2, 0); addchr(cv, '\t'); addchr(cv, ' '); break; case CC_CNTRL: - cv = getcvec(v, 0, 2/*, 0*/); + cv = getcvec(v, 0, 2); addrange(cv, 0x0, 0x1f); addrange(cv, 0x7f, 0x9f); break; case CC_DIGIT: - cv = getcvec(v, 0, NUM_DIGIT_RANGE/*, 0*/); + cv = getcvec(v, 0, NUM_DIGIT_RANGE); if (cv) { for (i=0 ; (size_t)i<NUM_DIGIT_RANGE ; i++) { addrange(cv, digitRangeTable[i].start, @@ -960,7 +913,7 @@ cclass( } break; case CC_PUNCT: - cv = getcvec(v, NUM_PUNCT_CHAR, NUM_PUNCT_RANGE/*, 0*/); + cv = getcvec(v, NUM_PUNCT_CHAR, NUM_PUNCT_RANGE); if (cv) { for (i=0 ; (size_t)i<NUM_PUNCT_RANGE ; i++) { addrange(cv, punctRangeTable[i].start, @@ -981,7 +934,7 @@ cclass( * someone comes up with a better arrangement!) */ - cv = getcvec(v, 0, 3/*, 0*/); + cv = getcvec(v, 0, 3); if (cv) { addrange(cv, '0', '9'); addrange(cv, 'a', 'f'); @@ -989,7 +942,7 @@ cclass( } break; case CC_SPACE: - cv = getcvec(v, NUM_SPACE_CHAR, NUM_SPACE_RANGE/*, 0*/); + cv = getcvec(v, NUM_SPACE_CHAR, NUM_SPACE_RANGE); if (cv) { for (i=0 ; (size_t)i<NUM_SPACE_RANGE ; i++) { addrange(cv, spaceRangeTable[i].start, @@ -1001,7 +954,7 @@ cclass( } break; case CC_LOWER: - cv = getcvec(v, NUM_LOWER_CHAR, NUM_LOWER_RANGE/*, 0*/); + cv = getcvec(v, NUM_LOWER_CHAR, NUM_LOWER_RANGE); if (cv) { for (i=0 ; (size_t)i<NUM_LOWER_RANGE ; i++) { addrange(cv, lowerRangeTable[i].start, @@ -1013,7 +966,7 @@ cclass( } break; case CC_UPPER: - cv = getcvec(v, NUM_UPPER_CHAR, NUM_UPPER_RANGE/*, 0*/); + cv = getcvec(v, NUM_UPPER_CHAR, NUM_UPPER_RANGE); if (cv) { for (i=0 ; (size_t)i<NUM_UPPER_RANGE ; i++) { addrange(cv, upperRangeTable[i].start, @@ -1025,7 +978,7 @@ cclass( } break; case CC_GRAPH: - cv = getcvec(v, NUM_GRAPH_CHAR, NUM_GRAPH_RANGE/*, 0*/); + cv = getcvec(v, NUM_GRAPH_CHAR, NUM_GRAPH_RANGE); if (cv) { for (i=0 ; (size_t)i<NUM_GRAPH_RANGE ; i++) { addrange(cv, graphRangeTable[i].start, @@ -1063,10 +1016,10 @@ allcases( tc = Tcl_UniCharToTitle((chr)c); if (tc != uc) { - cv = getcvec(v, 3, 0/*, 0*/); + cv = getcvec(v, 3, 0); addchr(cv, tc); } else { - cv = getcvec(v, 2, 0/*, 0*/); + cv = getcvec(v, 2, 0); } addchr(cv, lc); if (lc != uc) { diff --git a/generic/regc_nfa.c b/generic/regc_nfa.c index 9f63f73..20e821f 100644 --- a/generic/regc_nfa.c +++ b/generic/regc_nfa.c @@ -88,7 +88,7 @@ newnfa( - freenfa - free an entire NFA ^ static VOID freenfa(struct nfa *); */ -static VOID +static void freenfa( struct nfa *nfa) { diff --git a/generic/regcomp.c b/generic/regcomp.c index 8a43240..b397334 100644 --- a/generic/regcomp.c +++ b/generic/regcomp.c @@ -55,10 +55,6 @@ static void brackpart(struct vars *, struct state *, struct state *); static const chr *scanplain(struct vars *); static void onechr(struct vars *, pchr, struct state *, struct state *); static void dovec(struct vars *, struct cvec *, struct state *, struct state *); -#ifdef REGEXP_MCCE_ENABLED -static void leaders(struct vars *, struct cvec *); -static celt nextleader(struct vars *, pchr, pchr); -#endif static void wordchrs(struct vars *); static struct subre *subre(struct vars *, int, int, struct state *, struct state *); static void freesubre(struct vars *, struct subre *); @@ -107,9 +103,6 @@ static void subblock(struct vars *, pchr, struct state *, struct state *); static void okcolors(struct nfa *, struct colormap *); static void colorchain(struct colormap *, struct arc *); static void uncolorchain(struct colormap *, struct arc *); -#ifdef REGEXP_MCCE_ENABLED -static int singleton(struct colormap *, pchr c); -#endif static void rainbow(struct nfa *, struct colormap *, int, pcolor, struct state *, struct state *); static void colorcomplement(struct nfa *, struct colormap *, int, struct state *, struct state *, struct state *); #ifdef REG_DEBUG @@ -174,22 +167,10 @@ static void dumpcstate(int, struct carc *, struct cnfa *, FILE *); static struct cvec *clearcvec(struct cvec *); static void addchr(struct cvec *, pchr); static void addrange(struct cvec *, pchr, pchr); -#ifdef REGEXP_MCCE_ENABLED -static struct cvec *newcvec(int, int, int); -static void addmcce(struct cvec *, const chr *, const chr *); -static struct cvec *getcvec(struct vars *, int, int, int); -static int haschr(struct cvec *, pchr); -#else static struct cvec *newcvec(int, int); static struct cvec *getcvec(struct vars *, int, int); -#endif static void freecvec(struct cvec *); /* === regc_locale.c === */ -#ifdef REGEXP_MCCE_ENABLED -static int nleaders(struct vars *); -static int nmcces(struct vars *); -static struct cvec *allmcces(struct vars *, struct cvec *); -#endif static celt element(struct vars *, const chr *, const chr *); static struct cvec *range(struct vars *, celt, celt, int); static int before(celt, celt); @@ -228,12 +209,6 @@ struct vars { int ntree; /* number of tree nodes */ struct cvec *cv; /* interface cvec */ struct cvec *cv2; /* utility cvec */ -#ifdef REGEXP_MCCE_ENABLED - struct cvec *mcces; /* collating-element information */ -#define ISCELEADER(v,c) (v->mcces != NULL && haschr(v->mcces, (c))) - struct state *mccepbegin; /* in nfa, start of MCCE prototypes */ - struct state *mccepend; /* in nfa, end of MCCE prototypes */ -#endif struct subre *lacons; /* lookahead-constraint vector */ int nlacons; /* size of lacons */ }; @@ -343,9 +318,6 @@ compile( v->treefree = NULL; v->cv = NULL; v->cv2 = NULL; -#ifdef REGEXP_MCCE_ENABLED - v->mcces = NULL; -#endif v->lacons = NULL; v->nlacons = 0; re->re_magic = REMAGIC; @@ -375,18 +347,6 @@ compile( if (v->cv == NULL) { return freev(v, REG_ESPACE); } -#ifdef REGEXP_MCCE_ENABLED - i = nmcces(v); - if (i > 0) { - v->mcces = newcvec(nleaders(v), 0); - CNOERR(); - v->mcces = allmcces(v, v->mcces); - leaders(v, v->mcces); - /* Function does nothing with NULL pointers */ - addmcce(v->mcces, NULL, NULL); /* dummy */ - } - CNOERR(); -#endif /* * Parsing. @@ -559,11 +519,6 @@ freev( if (v->cv2 != NULL) { freecvec(v->cv2); } -#ifdef REGEXP_MCCE_ENABLED - if (v->mcces != NULL) { - freecvec(v->mcces); - } -#endif if (v->lacons != NULL) { freelacons(v->lacons, v->nlacons); } @@ -850,7 +805,6 @@ parseqatom( } NEXT(); return; - break; case '$': ARCV('$', 1); if (v->cflags®_NLANCH) { @@ -858,19 +812,16 @@ parseqatom( } NEXT(); return; - break; case SBEGIN: ARCV('^', 1); /* BOL */ ARCV('^', 0); /* or BOS */ NEXT(); return; - break; case SEND: ARCV('$', 1); /* EOL */ ARCV('$', 0); /* or EOS */ NEXT(); return; - break; case '<': wordchrs(v); /* does NEXT() */ s = newstate(v->nfa); @@ -878,7 +829,6 @@ parseqatom( nonword(v, BEHIND, lp, s); word(v, AHEAD, s, rp); return; - break; case '>': wordchrs(v); /* does NEXT() */ s = newstate(v->nfa); @@ -886,7 +836,6 @@ parseqatom( word(v, BEHIND, lp, s); nonword(v, AHEAD, s, rp); return; - break; case WBDRY: wordchrs(v); /* does NEXT() */ s = newstate(v->nfa); @@ -898,7 +847,6 @@ parseqatom( word(v, BEHIND, lp, s); nonword(v, AHEAD, s, rp); return; - break; case NWBDRY: wordchrs(v); /* does NEXT() */ s = newstate(v->nfa); @@ -910,7 +858,6 @@ parseqatom( nonword(v, BEHIND, lp, s); nonword(v, AHEAD, s, rp); return; - break; case LACON: /* lookahead constraint */ pos = v->nextvalue; NEXT(); @@ -925,7 +872,6 @@ parseqatom( NOERR(); ARCV(LACON, n); return; - break; /* * Then errors, to get them out of the way. @@ -937,11 +883,9 @@ parseqatom( case '{': ERR(REG_BADRPT); return; - break; default: ERR(REG_ASSERT); return; - break; /* * Then plain characters, and minor variants on that theme. @@ -1478,15 +1422,6 @@ cbracket( { struct state *left = newstate(v->nfa); struct state *right = newstate(v->nfa); -#ifdef REGEXP_MCCE_ENABLED - struct state *s; - struct arc *a; /* arc from lp */ - struct arc *ba; /* arc from left, from bracket() */ - struct arc *pa; /* MCCE-prototype arc */ - color co; - const chr *p; - int i; -#endif NOERR(); bracket(v, left, right); @@ -1498,69 +1433,16 @@ cbracket( assert(lp->nouts == 0); /* all outarcs will be ours */ /* - * Easy part of complementing + * Easy part of complementing, and all there is to do since the MCCE code + * was removed. */ colorcomplement(v->nfa, v->cm, PLAIN, left, lp, rp); NOERR(); - if (1 /*v->mcces == NULL*/) { /* no MCCEs -- we're done */ - dropstate(v->nfa, left); - assert(right->nins == 0); - freestate(v->nfa, right); - return; - } - -#ifdef REGEXP_MCCE_ENABLED - /* - * But complementing gets messy in the presence of MCCEs... - */ - - NOTE(REG_ULOCALE); - for (p = v->mcces->chrs, i = v->mcces->nchrs; i > 0; p++, i--) { - co = GETCOLOR(v->cm, *p); - a = findarc(lp, PLAIN, co); - ba = findarc(left, PLAIN, co); - if (ba == NULL) { - assert(a != NULL); - freearc(v->nfa, a); - } else { - assert(a == NULL); - } - s = newstate(v->nfa); - NOERR(); - newarc(v->nfa, PLAIN, co, lp, s); - NOERR(); - pa = findarc(v->mccepbegin, PLAIN, co); - assert(pa != NULL); - if (ba == NULL) { /* easy case, need all of them */ - cloneouts(v->nfa, pa->to, s, rp, PLAIN); - newarc(v->nfa, '$', 1, s, rp); - newarc(v->nfa, '$', 0, s, rp); - colorcomplement(v->nfa, v->cm, AHEAD, pa->to, s, rp); - } else { /* must be selective */ - if (findarc(ba->to, '$', 1) == NULL) { - newarc(v->nfa, '$', 1, s, rp); - newarc(v->nfa, '$', 0, s, rp); - colorcomplement(v->nfa, v->cm, AHEAD, pa->to, s, rp); - } - for (pa = pa->to->outs; pa != NULL; pa = pa->outchain) { - if (findarc(ba->to, PLAIN, pa->co) == NULL) { - newarc(v->nfa, PLAIN, pa->co, s, rp); - } - } - if (s->nouts == 0) { /* limit of selectivity: none */ - dropstate(v->nfa, s); /* frees arc too */ - } - } - NOERR(); - } - - delsub(v->nfa, left, right); - assert(left->nouts == 0); - freestate(v->nfa, left); + dropstate(v->nfa, left); assert(right->nins == 0); freestate(v->nfa, right); -#endif + return; } /* @@ -1592,10 +1474,10 @@ brackpart( NEXT(); /* - * Shortcut for ordinary chr (not range, not MCCE leader). + * Shortcut for ordinary chr (not range). */ - if (!SEE(RANGE) /*&& !ISCELEADER(v, c[0])*/) { + if (!SEE(RANGE)) { onechr(v, c[0], lp, rp); return; } @@ -1706,50 +1588,6 @@ scanplain( } /* - - leaders - process a cvec of collating elements to also include leaders - * Also gives all characters involved their own colors, which is almost - * certainly necessary, and sets up little disconnected subNFA. - ^ static void leaders(struct vars *, struct cvec *); - */ -#ifdef REGEXP_MCCE_ENABLED -static void -leaders( - struct vars *v, - struct cvec *cv) -{ - int mcce; - const chr *p; - chr leader; - struct state *s; - struct arc *a; - - v->mccepbegin = newstate(v->nfa); - v->mccepend = newstate(v->nfa); - NOERR(); - - for (mcce = 0; mcce < cv->nmcces; mcce++) { - p = cv->mcces[mcce]; - leader = *p; - if (!haschr(cv, leader)) { - addchr(cv, leader); - s = newstate(v->nfa); - newarc(v->nfa, PLAIN, subcolor(v->cm, leader), v->mccepbegin, s); - okcolors(v->nfa, v->cm); - } else { - a = findarc(v->mccepbegin, PLAIN, GETCOLOR(v->cm, leader)); - assert(a != NULL); - s = a->to; - assert(s != v->mccepend); - } - p++; - assert(*p != 0 && *(p+1) == 0); /* only 2-char MCCEs for now */ - newarc(v->nfa, PLAIN, subcolor(v->cm, *p), s, v->mccepend); - okcolors(v->nfa, v->cm); - } -} -#endif - -/* - onechr - fill in arcs for a plain character, and possible case complements * This is mostly a shortcut for efficient handling of the common case. ^ static void onechr(struct vars *, pchr, struct state *, struct state *); @@ -1766,17 +1604,18 @@ onechr( return; } - /* rats, need general case anyway... */ + /* + * Rats, need general case anyway... + */ + dovec(v, allcases(v, c), lp, rp); } /* - dovec - fill in arcs for each element of a cvec - * This one has to handle the messy cases, like MCCEs and MCCE leaders. ^ static void dovec(struct vars *, struct cvec *, struct state *, ^ struct state *); */ -#ifndef REGEXP_MCCE_ENABLED static void dovec( struct vars *v, @@ -1802,184 +1641,6 @@ dovec( } } -#else /* REGEXP_MCCE_ENABLED */ -static void -dovec( - struct vars *v, - struct cvec *cv, - struct state *lp, - struct state *rp) -{ - chr ch, from, to; - celt ce; - const chr *p; - int i; - struct cvec *leads; - color co; - struct arc *a; - struct arc *pa; /* arc in prototype */ - struct state *s; - struct state *ps; /* state in prototype */ - - /* - * Need a place to store leaders, if any. - */ - - if (nmcces(v) > 0) { - assert(v->mcces != NULL); - if (v->cv2 == NULL || v->cv2->nchrs < v->mcces->nchrs) { - if (v->cv2 != NULL) { - free(v->cv2); - } - v->cv2 = newcvec(v->mcces->nchrs, 0, v->mcces->nmcces); - NOERR(); - leads = v->cv2; - } else { - leads = clearcvec(v->cv2); - } - } else { - leads = NULL; - } - - /* - * First, get the ordinary characters out of the way. - */ - - for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) { - ch = *p; - if (!ISCELEADER(v, ch)) { - newarc(v->nfa, PLAIN, subcolor(v->cm, ch), lp, rp); - } else { - assert(singleton(v->cm, ch)); - assert(leads != NULL); - if (!haschr(leads, ch)) { - addchr(leads, ch); - } - } - } - - /* - * And the ranges. - */ - - for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--) { - from = *p; - to = *(p+1); - while (from <= to && (ce = nextleader(v, from, to)) != NOCELT) { - if (from < ce) { - subrange(v, from, ce - 1, lp, rp); - } - assert(singleton(v->cm, ce)); - assert(leads != NULL); - if (!haschr(leads, ce)) { - addchr(leads, ce); - } - from = ce + 1; - } - if (from <= to) { - subrange(v, from, to, lp, rp); - } - } - - /* *** WARNING *** - * - * This was buggy, check before enabling: the original version would cause - * a segfault at the loopinit below if (leads==NULL && cv->nmcces!=0) - * Possibly just a problem with parens? The original condition was - * ((leads == NULL || leads->nchrs == 0) && cv->nmcces == 0) - */ - - if (leads == NULL || (leads->nchrs == 0 && cv->nmcces == 0)) { - return; - } - - /* - * Deal with the MCCE leaders. - */ - - NOTE(REG_ULOCALE); - for (p = leads->chrs, i = leads->nchrs; i > 0; p++, i--) { - co = GETCOLOR(v->cm, *p); - a = findarc(lp, PLAIN, co); - if (a != NULL) { - s = a->to; - } else { - s = newstate(v->nfa); - NOERR(); - newarc(v->nfa, PLAIN, co, lp, s); - NOERR(); - } - pa = findarc(v->mccepbegin, PLAIN, co); - assert(pa != NULL); - ps = pa->to; - newarc(v->nfa, '$', 1, s, rp); - newarc(v->nfa, '$', 0, s, rp); - colorcomplement(v->nfa, v->cm, AHEAD, ps, s, rp); - NOERR(); - } - - /* - * And the MCCEs. - */ - - for (i = 0; i < cv->nmcces; i++) { - p = cv->mcces[i]; - assert(singleton(v->cm, *p)); - if (!singleton(v->cm, *p)) { - ERR(REG_ASSERT); - return; - } - ch = *p++; - co = GETCOLOR(v->cm, ch); - a = findarc(lp, PLAIN, co); - if (a != NULL) { - s = a->to; - } else { - s = newstate(v->nfa); - NOERR(); - newarc(v->nfa, PLAIN, co, lp, s); - NOERR(); - } - assert(*p != 0); /* at least two chars */ - assert(singleton(v->cm, *p)); - ch = *p++; - co = GETCOLOR(v->cm, ch); - assert(*p == 0); /* and only two, for now */ - newarc(v->nfa, PLAIN, co, s, rp); - NOERR(); - } -} - -/* - - nextleader - find next MCCE leader within range - ^ static celt nextleader(struct vars *, pchr, pchr); - */ -static celt /* NOCELT means none */ -nextleader( - struct vars *v, - pchr from, - pchr to) -{ - int i; - const chr *p; - chr ch; - celt it = NOCELT; - - if (v->mcces == NULL) { - return it; - } - - for (i = v->mcces->nchrs, p = v->mcces->chrs; i > 0; i--, p++) { - ch = *p; - if (from <= ch && ch <= to) { - if (it == NOCELT || ch < it) { - it = ch; - } - } - } - return it; -} -#endif /* - wordchrs - set up word-chr list for word-boundary stuff, if needed @@ -2120,30 +1781,14 @@ optst( struct vars *v, struct subre *t) { -#if 0 - if (t == NULL) { - return; - } - /* - * Recurse through children. + * DGP (2007-11-13): I assume it was the programmer's intent to eventually + * come back and add code to optimize subRE trees, but the routine coded + * just spends effort traversing the tree and doing nothing. We can do + * nothing with less effort. */ - if (t->left != NULL) { - optst(v, t->left); - } - if (t->right != NULL) { - optst(v, t->right); - } -#else - /* - * DGP (2007-11-13): I assume it was the programmer's intent to - * eventually come back and add code above to optimize subRE trees, - * but the routine coded just spends effort traversing the tree and - * doing nothing. We can do nothing with less effort. - */ return; -#endif } /* diff --git a/generic/regcustom.h b/generic/regcustom.h index 6b6b38c..ac33087 100644 --- a/generic/regcustom.h +++ b/generic/regcustom.h @@ -3,13 +3,13 @@ * * Development of this software was funded, in part, by Cray Research Inc., * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics - * Corporation, none of whom are responsible for the results. The author + * Corporation, none of whom are responsible for the results. The author * thanks all of them. * - * Redistribution and use in source and binary forms -- with or without - * modification -- are permitted for any purpose, provided that - * redistributions in source form retain this entire copyright notice and - * indicate the origin and nature of any modifications. + * Redistribution and use in source and binary forms - with or without + * modification - are permitted for any purpose, provided that redistributions + * in source form retain this entire copyright notice and indicate the origin + * and nature of any modifications. * * I'd appreciate being given credit for this package in the documentation of * software which uses it, but that is not a requirement. @@ -26,23 +26,28 @@ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* headers if any */ +/* + * Headers if any. + */ + #include "tclInt.h" -/* overrides for regguts.h definitions, if any */ -#define FUNCPTR(name, args) (*name) _ANSI_ARGS_(args) +/* + * Overrides for regguts.h definitions, if any. + */ + +#define FUNCPTR(name, args) (*name)args #define MALLOC(n) ckalloc(n) #define FREE(p) ckfree(VS(p)) #define REALLOC(p,n) ckrealloc(VS(p),n) - - /* - * Do not insert extras between the "begin" and "end" lines -- this - * chunk is automatically extracted to be fitted into regex.h. + * Do not insert extras between the "begin" and "end" lines - this chunk is + * automatically extracted to be fitted into regex.h. */ + /* --- begin --- */ -/* ensure certain things don't sneak in from system headers */ +/* Ensure certain things don't sneak in from system headers. */ #ifdef __REG_WIDE_T #undef __REG_WIDE_T #endif @@ -67,70 +72,90 @@ #ifdef __REG_NOCHAR #undef __REG_NOCHAR #endif -/* interface types */ +/* Interface types */ #define __REG_WIDE_T Tcl_UniChar -#define __REG_REGOFF_T long /* not really right, but good enough... */ -#define __REG_VOID_T VOID -#define __REG_CONST CONST -/* names and declarations */ +#define __REG_REGOFF_T long /* Not really right, but good enough... */ +#define __REG_VOID_T void +#define __REG_CONST const +/* Names and declarations */ #define __REG_WIDE_COMPILE TclReComp #define __REG_WIDE_EXEC TclReExec -#define __REG_NOFRONT /* don't want regcomp() and regexec() */ -#define __REG_NOCHAR /* or the char versions */ +#define __REG_NOFRONT /* Don't want regcomp() and regexec() */ +#define __REG_NOCHAR /* Or the char versions */ #define regfree TclReFree #define regerror TclReError /* --- end --- */ +/* + * Internal character type and related. + */ - -/* internal character type and related */ -typedef Tcl_UniChar chr; /* the type itself */ -typedef int pchr; /* what it promotes to */ -typedef unsigned uchr; /* unsigned type that will hold a chr */ -typedef int celt; /* type to hold chr, MCCE number, or NOCELT */ -#define NOCELT (-1) /* celt value which is not valid chr or MCCE */ -#define CHR(c) (UCHAR(c)) /* turn char literal into chr literal */ -#define DIGITVAL(c) ((c)-'0') /* turn chr digit into its value */ +typedef Tcl_UniChar chr; /* The type itself. */ +typedef int pchr; /* What it promotes to. */ +typedef unsigned uchr; /* Unsigned type that will hold a chr. */ +typedef int celt; /* Type to hold chr, or NOCELT */ +#define NOCELT (-1) /* Celt value which is not valid chr */ +#define CHR(c) (UCHAR(c)) /* Turn char literal into chr literal */ +#define DIGITVAL(c) ((c)-'0') /* Turn chr digit into its value */ #if TCL_UTF_MAX > 3 -#define CHRBITS 32 /* bits in a chr; must not use sizeof */ -#define CHR_MIN 0x00000000 /* smallest and largest chr; the value */ -#define CHR_MAX 0xffffffff /* CHR_MAX-CHR_MIN+1 should fit in uchr */ +#define CHRBITS 32 /* Bits in a chr; must not use sizeof */ +#define CHR_MIN 0x00000000 /* Smallest and largest chr; the value */ +#define CHR_MAX 0xffffffff /* CHR_MAX-CHR_MIN+1 should fit in uchr */ #else -#define CHRBITS 16 /* bits in a chr; must not use sizeof */ -#define CHR_MIN 0x0000 /* smallest and largest chr; the value */ -#define CHR_MAX 0xffff /* CHR_MAX-CHR_MIN+1 should fit in uchr */ +#define CHRBITS 16 /* Bits in a chr; must not use sizeof */ +#define CHR_MIN 0x0000 /* Smallest and largest chr; the value */ +#define CHR_MAX 0xffff /* CHR_MAX-CHR_MIN+1 should fit in uchr */ #endif -/* functions operating on chr */ +/* + * Functions operating on chr. + */ + #define iscalnum(x) Tcl_UniCharIsAlnum(x) #define iscalpha(x) Tcl_UniCharIsAlpha(x) #define iscdigit(x) Tcl_UniCharIsDigit(x) #define iscspace(x) Tcl_UniCharIsSpace(x) -/* name the external functions */ +/* + * Name the external functions. + */ + #define compile TclReComp #define exec TclReExec -/* enable/disable debugging code (by whether REG_DEBUG is defined or not) */ -#if 0 /* no debug unless requested by makefile */ +/* +& Enable/disable debugging code (by whether REG_DEBUG is defined or not). +*/ + +#if 0 /* No debug unless requested by makefile. */ #define REG_DEBUG /* */ #endif -/* method of allocating a local workspace */ +/* + * Method of allocating a local workspace. We used a thread-specific data + * space to store this because the regular expression engine is never + * reentered from the same thread; it doesn't make any callbacks. + */ + #if 1 #define AllocVars(vPtr) \ static Tcl_ThreadDataKey varsKey; \ register struct vars *vPtr = (struct vars *) \ - Tcl_GetThreadData(&varsKey, sizeof(struct vars)) + Tcl_GetThreadData(&varsKey, sizeof(struct vars)) #else -/* This strategy for allocating workspace is "more proper" in some sense, but +/* + * This strategy for allocating workspace is "more proper" in some sense, but * quite a bit slower. Using TSD (as above) leads to code that is quite a bit - * faster in practice. */ + * faster in practice (measured!) + */ #define AllocVars(vPtr) \ register struct vars *vPtr = (struct vars *) MALLOC(sizeof(struct vars)) #define FreeVars(vPtr) \ FREE(vPtr) #endif -/* and pick up the standard header */ +/* + * And pick up the standard header. + */ + #include "regex.h" diff --git a/generic/regguts.h b/generic/regguts.h index bc1d7a2..cbf6615 100644 --- a/generic/regguts.h +++ b/generic/regguts.h @@ -60,24 +60,24 @@ /* voids */ #ifndef VOID -#define VOID void /* for function return values */ +#define VOID void /* for function return values */ #endif #ifndef DISCARD -#define DISCARD void /* for throwing values away */ +#define DISCARD void /* for throwing values away */ #endif #ifndef PVOID -#define PVOID void * /* generic pointer */ +#define PVOID void * /* generic pointer */ #endif #ifndef VS -#define VS(x) ((void*)(x)) /* cast something to generic ptr */ +#define VS(x) ((void*)(x)) /* cast something to generic ptr */ #endif #ifndef NOPARMS -#define NOPARMS void /* for empty parm lists */ +#define NOPARMS void /* for empty parm lists */ #endif /* const */ #ifndef CONST -#define CONST const /* for old compilers, might be empty */ +#define CONST const /* for old compilers, might be empty */ #endif /* function-pointer declarator */ @@ -105,7 +105,7 @@ #include <limits.h> #endif #ifndef _POSIX2_RE_DUP_MAX -#define _POSIX2_RE_DUP_MAX 255 /* normally from <limits.h> */ +#define _POSIX2_RE_DUP_MAX 255 /* normally from <limits.h> */ #endif /* @@ -189,7 +189,7 @@ union tree { #define tcolor colors.ccolor #define tptr ptrs.pptr -/* internal per-color structure for the color machinery */ +/* Internal per-color descriptor structure for the color machinery */ struct colordesc { uchr nchrs; /* number of chars of this color */ color sub; /* open subcolor (if any); free chain ptr */ @@ -235,9 +235,9 @@ struct colormap { /* * Interface definitions for locale-interface functions in locale.c. - * Multi-character collating elements (MCCEs) cause most of the trouble. */ +/* Representation of a set of characters. */ struct cvec { int nchrs; /* number of chrs */ int chrspace; /* number of chrs possible */ @@ -245,22 +245,11 @@ struct cvec { int nranges; /* number of ranges (chr pairs) */ int rangespace; /* number of chrs possible */ chr *ranges; /* pointer to vector of chr pairs */ -#ifdef REGEXP_MCCE_ENABLED - int nmcces; /* number of MCCEs */ - int mccespace; /* number of MCCEs possible */ - int nmccechrs; /* number of chrs used for MCCEs */ - chr *mcces[1]; /* pointers to 0-terminated MCCEs */ - /* and both batches of chrs are on the end */ -#endif }; -#ifdef REGEXP_MCCE_ENABLED -/* caution: this value cannot be changed easily */ -#define MAXMCCE 2 /* length of longest MCCE */ -#endif - /* - * definitions for NFA internal representation + * definitions for non-deterministic finite autmaton (NFA) internal + * representation * * Having a "from" pointer within each arc may seem redundant, but it saves a * lot of hassle. @@ -288,7 +277,7 @@ struct arcbatch { /* for bulk allocation of arcs */ struct state { int no; -# define FREESTATE (-1) +#define FREESTATE (-1) char flag; /* marks special states */ int nins; /* number of inarcs */ struct arc *ins; /* chain of inarcs */ @@ -405,7 +394,8 @@ struct guts { }; /* - * Magic for allocating a variable workspace. + * Magic for allocating a variable workspace. This default version is + * stack-hungry. */ #ifndef AllocVars |