diff options
author | dkf <donal.k.fellows@manchester.ac.uk> | 2007-11-14 11:04:58 (GMT) |
---|---|---|
committer | dkf <donal.k.fellows@manchester.ac.uk> | 2007-11-14 11:04:58 (GMT) |
commit | 45b2a969f63ab7f6184c8acaec8bb3d525650090 (patch) | |
tree | 145f0b7237a0fa87bff299c233682cca06b5cdd5 /generic/regcomp.c | |
parent | 32e51a96573d4ab8a91f7e5484b7ed780b875ae7 (diff) | |
download | tcl-45b2a969f63ab7f6184c8acaec8bb3d525650090.zip tcl-45b2a969f63ab7f6184c8acaec8bb3d525650090.tar.gz tcl-45b2a969f63ab7f6184c8acaec8bb3d525650090.tar.bz2 |
Eliminate multi-char collating element code completely. Simplifies the code
quite a bit. If people still want the full code, it will remain on the 8.4
branch. [Bug 1831425]
Diffstat (limited to 'generic/regcomp.c')
-rw-r--r-- | generic/regcomp.c | 383 |
1 files changed, 14 insertions, 369 deletions
diff --git a/generic/regcomp.c b/generic/regcomp.c index 8a43240..b397334 100644 --- a/generic/regcomp.c +++ b/generic/regcomp.c @@ -55,10 +55,6 @@ static void brackpart(struct vars *, struct state *, struct state *); static const chr *scanplain(struct vars *); static void onechr(struct vars *, pchr, struct state *, struct state *); static void dovec(struct vars *, struct cvec *, struct state *, struct state *); -#ifdef REGEXP_MCCE_ENABLED -static void leaders(struct vars *, struct cvec *); -static celt nextleader(struct vars *, pchr, pchr); -#endif static void wordchrs(struct vars *); static struct subre *subre(struct vars *, int, int, struct state *, struct state *); static void freesubre(struct vars *, struct subre *); @@ -107,9 +103,6 @@ static void subblock(struct vars *, pchr, struct state *, struct state *); static void okcolors(struct nfa *, struct colormap *); static void colorchain(struct colormap *, struct arc *); static void uncolorchain(struct colormap *, struct arc *); -#ifdef REGEXP_MCCE_ENABLED -static int singleton(struct colormap *, pchr c); -#endif static void rainbow(struct nfa *, struct colormap *, int, pcolor, struct state *, struct state *); static void colorcomplement(struct nfa *, struct colormap *, int, struct state *, struct state *, struct state *); #ifdef REG_DEBUG @@ -174,22 +167,10 @@ static void dumpcstate(int, struct carc *, struct cnfa *, FILE *); static struct cvec *clearcvec(struct cvec *); static void addchr(struct cvec *, pchr); static void addrange(struct cvec *, pchr, pchr); -#ifdef REGEXP_MCCE_ENABLED -static struct cvec *newcvec(int, int, int); -static void addmcce(struct cvec *, const chr *, const chr *); -static struct cvec *getcvec(struct vars *, int, int, int); -static int haschr(struct cvec *, pchr); -#else static struct cvec *newcvec(int, int); static struct cvec *getcvec(struct vars *, int, int); -#endif static void freecvec(struct cvec *); /* === regc_locale.c === */ -#ifdef REGEXP_MCCE_ENABLED -static int nleaders(struct vars *); -static int nmcces(struct vars *); -static struct cvec *allmcces(struct vars *, struct cvec *); -#endif static celt element(struct vars *, const chr *, const chr *); static struct cvec *range(struct vars *, celt, celt, int); static int before(celt, celt); @@ -228,12 +209,6 @@ struct vars { int ntree; /* number of tree nodes */ struct cvec *cv; /* interface cvec */ struct cvec *cv2; /* utility cvec */ -#ifdef REGEXP_MCCE_ENABLED - struct cvec *mcces; /* collating-element information */ -#define ISCELEADER(v,c) (v->mcces != NULL && haschr(v->mcces, (c))) - struct state *mccepbegin; /* in nfa, start of MCCE prototypes */ - struct state *mccepend; /* in nfa, end of MCCE prototypes */ -#endif struct subre *lacons; /* lookahead-constraint vector */ int nlacons; /* size of lacons */ }; @@ -343,9 +318,6 @@ compile( v->treefree = NULL; v->cv = NULL; v->cv2 = NULL; -#ifdef REGEXP_MCCE_ENABLED - v->mcces = NULL; -#endif v->lacons = NULL; v->nlacons = 0; re->re_magic = REMAGIC; @@ -375,18 +347,6 @@ compile( if (v->cv == NULL) { return freev(v, REG_ESPACE); } -#ifdef REGEXP_MCCE_ENABLED - i = nmcces(v); - if (i > 0) { - v->mcces = newcvec(nleaders(v), 0); - CNOERR(); - v->mcces = allmcces(v, v->mcces); - leaders(v, v->mcces); - /* Function does nothing with NULL pointers */ - addmcce(v->mcces, NULL, NULL); /* dummy */ - } - CNOERR(); -#endif /* * Parsing. @@ -559,11 +519,6 @@ freev( if (v->cv2 != NULL) { freecvec(v->cv2); } -#ifdef REGEXP_MCCE_ENABLED - if (v->mcces != NULL) { - freecvec(v->mcces); - } -#endif if (v->lacons != NULL) { freelacons(v->lacons, v->nlacons); } @@ -850,7 +805,6 @@ parseqatom( } NEXT(); return; - break; case '$': ARCV('$', 1); if (v->cflags®_NLANCH) { @@ -858,19 +812,16 @@ parseqatom( } NEXT(); return; - break; case SBEGIN: ARCV('^', 1); /* BOL */ ARCV('^', 0); /* or BOS */ NEXT(); return; - break; case SEND: ARCV('$', 1); /* EOL */ ARCV('$', 0); /* or EOS */ NEXT(); return; - break; case '<': wordchrs(v); /* does NEXT() */ s = newstate(v->nfa); @@ -878,7 +829,6 @@ parseqatom( nonword(v, BEHIND, lp, s); word(v, AHEAD, s, rp); return; - break; case '>': wordchrs(v); /* does NEXT() */ s = newstate(v->nfa); @@ -886,7 +836,6 @@ parseqatom( word(v, BEHIND, lp, s); nonword(v, AHEAD, s, rp); return; - break; case WBDRY: wordchrs(v); /* does NEXT() */ s = newstate(v->nfa); @@ -898,7 +847,6 @@ parseqatom( word(v, BEHIND, lp, s); nonword(v, AHEAD, s, rp); return; - break; case NWBDRY: wordchrs(v); /* does NEXT() */ s = newstate(v->nfa); @@ -910,7 +858,6 @@ parseqatom( nonword(v, BEHIND, lp, s); nonword(v, AHEAD, s, rp); return; - break; case LACON: /* lookahead constraint */ pos = v->nextvalue; NEXT(); @@ -925,7 +872,6 @@ parseqatom( NOERR(); ARCV(LACON, n); return; - break; /* * Then errors, to get them out of the way. @@ -937,11 +883,9 @@ parseqatom( case '{': ERR(REG_BADRPT); return; - break; default: ERR(REG_ASSERT); return; - break; /* * Then plain characters, and minor variants on that theme. @@ -1478,15 +1422,6 @@ cbracket( { struct state *left = newstate(v->nfa); struct state *right = newstate(v->nfa); -#ifdef REGEXP_MCCE_ENABLED - struct state *s; - struct arc *a; /* arc from lp */ - struct arc *ba; /* arc from left, from bracket() */ - struct arc *pa; /* MCCE-prototype arc */ - color co; - const chr *p; - int i; -#endif NOERR(); bracket(v, left, right); @@ -1498,69 +1433,16 @@ cbracket( assert(lp->nouts == 0); /* all outarcs will be ours */ /* - * Easy part of complementing + * Easy part of complementing, and all there is to do since the MCCE code + * was removed. */ colorcomplement(v->nfa, v->cm, PLAIN, left, lp, rp); NOERR(); - if (1 /*v->mcces == NULL*/) { /* no MCCEs -- we're done */ - dropstate(v->nfa, left); - assert(right->nins == 0); - freestate(v->nfa, right); - return; - } - -#ifdef REGEXP_MCCE_ENABLED - /* - * But complementing gets messy in the presence of MCCEs... - */ - - NOTE(REG_ULOCALE); - for (p = v->mcces->chrs, i = v->mcces->nchrs; i > 0; p++, i--) { - co = GETCOLOR(v->cm, *p); - a = findarc(lp, PLAIN, co); - ba = findarc(left, PLAIN, co); - if (ba == NULL) { - assert(a != NULL); - freearc(v->nfa, a); - } else { - assert(a == NULL); - } - s = newstate(v->nfa); - NOERR(); - newarc(v->nfa, PLAIN, co, lp, s); - NOERR(); - pa = findarc(v->mccepbegin, PLAIN, co); - assert(pa != NULL); - if (ba == NULL) { /* easy case, need all of them */ - cloneouts(v->nfa, pa->to, s, rp, PLAIN); - newarc(v->nfa, '$', 1, s, rp); - newarc(v->nfa, '$', 0, s, rp); - colorcomplement(v->nfa, v->cm, AHEAD, pa->to, s, rp); - } else { /* must be selective */ - if (findarc(ba->to, '$', 1) == NULL) { - newarc(v->nfa, '$', 1, s, rp); - newarc(v->nfa, '$', 0, s, rp); - colorcomplement(v->nfa, v->cm, AHEAD, pa->to, s, rp); - } - for (pa = pa->to->outs; pa != NULL; pa = pa->outchain) { - if (findarc(ba->to, PLAIN, pa->co) == NULL) { - newarc(v->nfa, PLAIN, pa->co, s, rp); - } - } - if (s->nouts == 0) { /* limit of selectivity: none */ - dropstate(v->nfa, s); /* frees arc too */ - } - } - NOERR(); - } - - delsub(v->nfa, left, right); - assert(left->nouts == 0); - freestate(v->nfa, left); + dropstate(v->nfa, left); assert(right->nins == 0); freestate(v->nfa, right); -#endif + return; } /* @@ -1592,10 +1474,10 @@ brackpart( NEXT(); /* - * Shortcut for ordinary chr (not range, not MCCE leader). + * Shortcut for ordinary chr (not range). */ - if (!SEE(RANGE) /*&& !ISCELEADER(v, c[0])*/) { + if (!SEE(RANGE)) { onechr(v, c[0], lp, rp); return; } @@ -1706,50 +1588,6 @@ scanplain( } /* - - leaders - process a cvec of collating elements to also include leaders - * Also gives all characters involved their own colors, which is almost - * certainly necessary, and sets up little disconnected subNFA. - ^ static void leaders(struct vars *, struct cvec *); - */ -#ifdef REGEXP_MCCE_ENABLED -static void -leaders( - struct vars *v, - struct cvec *cv) -{ - int mcce; - const chr *p; - chr leader; - struct state *s; - struct arc *a; - - v->mccepbegin = newstate(v->nfa); - v->mccepend = newstate(v->nfa); - NOERR(); - - for (mcce = 0; mcce < cv->nmcces; mcce++) { - p = cv->mcces[mcce]; - leader = *p; - if (!haschr(cv, leader)) { - addchr(cv, leader); - s = newstate(v->nfa); - newarc(v->nfa, PLAIN, subcolor(v->cm, leader), v->mccepbegin, s); - okcolors(v->nfa, v->cm); - } else { - a = findarc(v->mccepbegin, PLAIN, GETCOLOR(v->cm, leader)); - assert(a != NULL); - s = a->to; - assert(s != v->mccepend); - } - p++; - assert(*p != 0 && *(p+1) == 0); /* only 2-char MCCEs for now */ - newarc(v->nfa, PLAIN, subcolor(v->cm, *p), s, v->mccepend); - okcolors(v->nfa, v->cm); - } -} -#endif - -/* - onechr - fill in arcs for a plain character, and possible case complements * This is mostly a shortcut for efficient handling of the common case. ^ static void onechr(struct vars *, pchr, struct state *, struct state *); @@ -1766,17 +1604,18 @@ onechr( return; } - /* rats, need general case anyway... */ + /* + * Rats, need general case anyway... + */ + dovec(v, allcases(v, c), lp, rp); } /* - dovec - fill in arcs for each element of a cvec - * This one has to handle the messy cases, like MCCEs and MCCE leaders. ^ static void dovec(struct vars *, struct cvec *, struct state *, ^ struct state *); */ -#ifndef REGEXP_MCCE_ENABLED static void dovec( struct vars *v, @@ -1802,184 +1641,6 @@ dovec( } } -#else /* REGEXP_MCCE_ENABLED */ -static void -dovec( - struct vars *v, - struct cvec *cv, - struct state *lp, - struct state *rp) -{ - chr ch, from, to; - celt ce; - const chr *p; - int i; - struct cvec *leads; - color co; - struct arc *a; - struct arc *pa; /* arc in prototype */ - struct state *s; - struct state *ps; /* state in prototype */ - - /* - * Need a place to store leaders, if any. - */ - - if (nmcces(v) > 0) { - assert(v->mcces != NULL); - if (v->cv2 == NULL || v->cv2->nchrs < v->mcces->nchrs) { - if (v->cv2 != NULL) { - free(v->cv2); - } - v->cv2 = newcvec(v->mcces->nchrs, 0, v->mcces->nmcces); - NOERR(); - leads = v->cv2; - } else { - leads = clearcvec(v->cv2); - } - } else { - leads = NULL; - } - - /* - * First, get the ordinary characters out of the way. - */ - - for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) { - ch = *p; - if (!ISCELEADER(v, ch)) { - newarc(v->nfa, PLAIN, subcolor(v->cm, ch), lp, rp); - } else { - assert(singleton(v->cm, ch)); - assert(leads != NULL); - if (!haschr(leads, ch)) { - addchr(leads, ch); - } - } - } - - /* - * And the ranges. - */ - - for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--) { - from = *p; - to = *(p+1); - while (from <= to && (ce = nextleader(v, from, to)) != NOCELT) { - if (from < ce) { - subrange(v, from, ce - 1, lp, rp); - } - assert(singleton(v->cm, ce)); - assert(leads != NULL); - if (!haschr(leads, ce)) { - addchr(leads, ce); - } - from = ce + 1; - } - if (from <= to) { - subrange(v, from, to, lp, rp); - } - } - - /* *** WARNING *** - * - * This was buggy, check before enabling: the original version would cause - * a segfault at the loopinit below if (leads==NULL && cv->nmcces!=0) - * Possibly just a problem with parens? The original condition was - * ((leads == NULL || leads->nchrs == 0) && cv->nmcces == 0) - */ - - if (leads == NULL || (leads->nchrs == 0 && cv->nmcces == 0)) { - return; - } - - /* - * Deal with the MCCE leaders. - */ - - NOTE(REG_ULOCALE); - for (p = leads->chrs, i = leads->nchrs; i > 0; p++, i--) { - co = GETCOLOR(v->cm, *p); - a = findarc(lp, PLAIN, co); - if (a != NULL) { - s = a->to; - } else { - s = newstate(v->nfa); - NOERR(); - newarc(v->nfa, PLAIN, co, lp, s); - NOERR(); - } - pa = findarc(v->mccepbegin, PLAIN, co); - assert(pa != NULL); - ps = pa->to; - newarc(v->nfa, '$', 1, s, rp); - newarc(v->nfa, '$', 0, s, rp); - colorcomplement(v->nfa, v->cm, AHEAD, ps, s, rp); - NOERR(); - } - - /* - * And the MCCEs. - */ - - for (i = 0; i < cv->nmcces; i++) { - p = cv->mcces[i]; - assert(singleton(v->cm, *p)); - if (!singleton(v->cm, *p)) { - ERR(REG_ASSERT); - return; - } - ch = *p++; - co = GETCOLOR(v->cm, ch); - a = findarc(lp, PLAIN, co); - if (a != NULL) { - s = a->to; - } else { - s = newstate(v->nfa); - NOERR(); - newarc(v->nfa, PLAIN, co, lp, s); - NOERR(); - } - assert(*p != 0); /* at least two chars */ - assert(singleton(v->cm, *p)); - ch = *p++; - co = GETCOLOR(v->cm, ch); - assert(*p == 0); /* and only two, for now */ - newarc(v->nfa, PLAIN, co, s, rp); - NOERR(); - } -} - -/* - - nextleader - find next MCCE leader within range - ^ static celt nextleader(struct vars *, pchr, pchr); - */ -static celt /* NOCELT means none */ -nextleader( - struct vars *v, - pchr from, - pchr to) -{ - int i; - const chr *p; - chr ch; - celt it = NOCELT; - - if (v->mcces == NULL) { - return it; - } - - for (i = v->mcces->nchrs, p = v->mcces->chrs; i > 0; i--, p++) { - ch = *p; - if (from <= ch && ch <= to) { - if (it == NOCELT || ch < it) { - it = ch; - } - } - } - return it; -} -#endif /* - wordchrs - set up word-chr list for word-boundary stuff, if needed @@ -2120,30 +1781,14 @@ optst( struct vars *v, struct subre *t) { -#if 0 - if (t == NULL) { - return; - } - /* - * Recurse through children. + * DGP (2007-11-13): I assume it was the programmer's intent to eventually + * come back and add code to optimize subRE trees, but the routine coded + * just spends effort traversing the tree and doing nothing. We can do + * nothing with less effort. */ - if (t->left != NULL) { - optst(v, t->left); - } - if (t->right != NULL) { - optst(v, t->right); - } -#else - /* - * DGP (2007-11-13): I assume it was the programmer's intent to - * eventually come back and add code above to optimize subRE trees, - * but the routine coded just spends effort traversing the tree and - * doing nothing. We can do nothing with less effort. - */ return; -#endif } /* |