summaryrefslogtreecommitdiffstats
path: root/generic/regcomp.c
diff options
context:
space:
mode:
authordkf <donal.k.fellows@manchester.ac.uk>2007-11-14 11:04:58 (GMT)
committerdkf <donal.k.fellows@manchester.ac.uk>2007-11-14 11:04:58 (GMT)
commit45b2a969f63ab7f6184c8acaec8bb3d525650090 (patch)
tree145f0b7237a0fa87bff299c233682cca06b5cdd5 /generic/regcomp.c
parent32e51a96573d4ab8a91f7e5484b7ed780b875ae7 (diff)
downloadtcl-45b2a969f63ab7f6184c8acaec8bb3d525650090.zip
tcl-45b2a969f63ab7f6184c8acaec8bb3d525650090.tar.gz
tcl-45b2a969f63ab7f6184c8acaec8bb3d525650090.tar.bz2
Eliminate multi-char collating element code completely. Simplifies the code
quite a bit. If people still want the full code, it will remain on the 8.4 branch. [Bug 1831425]
Diffstat (limited to 'generic/regcomp.c')
-rw-r--r--generic/regcomp.c383
1 files changed, 14 insertions, 369 deletions
diff --git a/generic/regcomp.c b/generic/regcomp.c
index 8a43240..b397334 100644
--- a/generic/regcomp.c
+++ b/generic/regcomp.c
@@ -55,10 +55,6 @@ static void brackpart(struct vars *, struct state *, struct state *);
static const chr *scanplain(struct vars *);
static void onechr(struct vars *, pchr, struct state *, struct state *);
static void dovec(struct vars *, struct cvec *, struct state *, struct state *);
-#ifdef REGEXP_MCCE_ENABLED
-static void leaders(struct vars *, struct cvec *);
-static celt nextleader(struct vars *, pchr, pchr);
-#endif
static void wordchrs(struct vars *);
static struct subre *subre(struct vars *, int, int, struct state *, struct state *);
static void freesubre(struct vars *, struct subre *);
@@ -107,9 +103,6 @@ static void subblock(struct vars *, pchr, struct state *, struct state *);
static void okcolors(struct nfa *, struct colormap *);
static void colorchain(struct colormap *, struct arc *);
static void uncolorchain(struct colormap *, struct arc *);
-#ifdef REGEXP_MCCE_ENABLED
-static int singleton(struct colormap *, pchr c);
-#endif
static void rainbow(struct nfa *, struct colormap *, int, pcolor, struct state *, struct state *);
static void colorcomplement(struct nfa *, struct colormap *, int, struct state *, struct state *, struct state *);
#ifdef REG_DEBUG
@@ -174,22 +167,10 @@ static void dumpcstate(int, struct carc *, struct cnfa *, FILE *);
static struct cvec *clearcvec(struct cvec *);
static void addchr(struct cvec *, pchr);
static void addrange(struct cvec *, pchr, pchr);
-#ifdef REGEXP_MCCE_ENABLED
-static struct cvec *newcvec(int, int, int);
-static void addmcce(struct cvec *, const chr *, const chr *);
-static struct cvec *getcvec(struct vars *, int, int, int);
-static int haschr(struct cvec *, pchr);
-#else
static struct cvec *newcvec(int, int);
static struct cvec *getcvec(struct vars *, int, int);
-#endif
static void freecvec(struct cvec *);
/* === regc_locale.c === */
-#ifdef REGEXP_MCCE_ENABLED
-static int nleaders(struct vars *);
-static int nmcces(struct vars *);
-static struct cvec *allmcces(struct vars *, struct cvec *);
-#endif
static celt element(struct vars *, const chr *, const chr *);
static struct cvec *range(struct vars *, celt, celt, int);
static int before(celt, celt);
@@ -228,12 +209,6 @@ struct vars {
int ntree; /* number of tree nodes */
struct cvec *cv; /* interface cvec */
struct cvec *cv2; /* utility cvec */
-#ifdef REGEXP_MCCE_ENABLED
- struct cvec *mcces; /* collating-element information */
-#define ISCELEADER(v,c) (v->mcces != NULL && haschr(v->mcces, (c)))
- struct state *mccepbegin; /* in nfa, start of MCCE prototypes */
- struct state *mccepend; /* in nfa, end of MCCE prototypes */
-#endif
struct subre *lacons; /* lookahead-constraint vector */
int nlacons; /* size of lacons */
};
@@ -343,9 +318,6 @@ compile(
v->treefree = NULL;
v->cv = NULL;
v->cv2 = NULL;
-#ifdef REGEXP_MCCE_ENABLED
- v->mcces = NULL;
-#endif
v->lacons = NULL;
v->nlacons = 0;
re->re_magic = REMAGIC;
@@ -375,18 +347,6 @@ compile(
if (v->cv == NULL) {
return freev(v, REG_ESPACE);
}
-#ifdef REGEXP_MCCE_ENABLED
- i = nmcces(v);
- if (i > 0) {
- v->mcces = newcvec(nleaders(v), 0);
- CNOERR();
- v->mcces = allmcces(v, v->mcces);
- leaders(v, v->mcces);
- /* Function does nothing with NULL pointers */
- addmcce(v->mcces, NULL, NULL); /* dummy */
- }
- CNOERR();
-#endif
/*
* Parsing.
@@ -559,11 +519,6 @@ freev(
if (v->cv2 != NULL) {
freecvec(v->cv2);
}
-#ifdef REGEXP_MCCE_ENABLED
- if (v->mcces != NULL) {
- freecvec(v->mcces);
- }
-#endif
if (v->lacons != NULL) {
freelacons(v->lacons, v->nlacons);
}
@@ -850,7 +805,6 @@ parseqatom(
}
NEXT();
return;
- break;
case '$':
ARCV('$', 1);
if (v->cflags&REG_NLANCH) {
@@ -858,19 +812,16 @@ parseqatom(
}
NEXT();
return;
- break;
case SBEGIN:
ARCV('^', 1); /* BOL */
ARCV('^', 0); /* or BOS */
NEXT();
return;
- break;
case SEND:
ARCV('$', 1); /* EOL */
ARCV('$', 0); /* or EOS */
NEXT();
return;
- break;
case '<':
wordchrs(v); /* does NEXT() */
s = newstate(v->nfa);
@@ -878,7 +829,6 @@ parseqatom(
nonword(v, BEHIND, lp, s);
word(v, AHEAD, s, rp);
return;
- break;
case '>':
wordchrs(v); /* does NEXT() */
s = newstate(v->nfa);
@@ -886,7 +836,6 @@ parseqatom(
word(v, BEHIND, lp, s);
nonword(v, AHEAD, s, rp);
return;
- break;
case WBDRY:
wordchrs(v); /* does NEXT() */
s = newstate(v->nfa);
@@ -898,7 +847,6 @@ parseqatom(
word(v, BEHIND, lp, s);
nonword(v, AHEAD, s, rp);
return;
- break;
case NWBDRY:
wordchrs(v); /* does NEXT() */
s = newstate(v->nfa);
@@ -910,7 +858,6 @@ parseqatom(
nonword(v, BEHIND, lp, s);
nonword(v, AHEAD, s, rp);
return;
- break;
case LACON: /* lookahead constraint */
pos = v->nextvalue;
NEXT();
@@ -925,7 +872,6 @@ parseqatom(
NOERR();
ARCV(LACON, n);
return;
- break;
/*
* Then errors, to get them out of the way.
@@ -937,11 +883,9 @@ parseqatom(
case '{':
ERR(REG_BADRPT);
return;
- break;
default:
ERR(REG_ASSERT);
return;
- break;
/*
* Then plain characters, and minor variants on that theme.
@@ -1478,15 +1422,6 @@ cbracket(
{
struct state *left = newstate(v->nfa);
struct state *right = newstate(v->nfa);
-#ifdef REGEXP_MCCE_ENABLED
- struct state *s;
- struct arc *a; /* arc from lp */
- struct arc *ba; /* arc from left, from bracket() */
- struct arc *pa; /* MCCE-prototype arc */
- color co;
- const chr *p;
- int i;
-#endif
NOERR();
bracket(v, left, right);
@@ -1498,69 +1433,16 @@ cbracket(
assert(lp->nouts == 0); /* all outarcs will be ours */
/*
- * Easy part of complementing
+ * Easy part of complementing, and all there is to do since the MCCE code
+ * was removed.
*/
colorcomplement(v->nfa, v->cm, PLAIN, left, lp, rp);
NOERR();
- if (1 /*v->mcces == NULL*/) { /* no MCCEs -- we're done */
- dropstate(v->nfa, left);
- assert(right->nins == 0);
- freestate(v->nfa, right);
- return;
- }
-
-#ifdef REGEXP_MCCE_ENABLED
- /*
- * But complementing gets messy in the presence of MCCEs...
- */
-
- NOTE(REG_ULOCALE);
- for (p = v->mcces->chrs, i = v->mcces->nchrs; i > 0; p++, i--) {
- co = GETCOLOR(v->cm, *p);
- a = findarc(lp, PLAIN, co);
- ba = findarc(left, PLAIN, co);
- if (ba == NULL) {
- assert(a != NULL);
- freearc(v->nfa, a);
- } else {
- assert(a == NULL);
- }
- s = newstate(v->nfa);
- NOERR();
- newarc(v->nfa, PLAIN, co, lp, s);
- NOERR();
- pa = findarc(v->mccepbegin, PLAIN, co);
- assert(pa != NULL);
- if (ba == NULL) { /* easy case, need all of them */
- cloneouts(v->nfa, pa->to, s, rp, PLAIN);
- newarc(v->nfa, '$', 1, s, rp);
- newarc(v->nfa, '$', 0, s, rp);
- colorcomplement(v->nfa, v->cm, AHEAD, pa->to, s, rp);
- } else { /* must be selective */
- if (findarc(ba->to, '$', 1) == NULL) {
- newarc(v->nfa, '$', 1, s, rp);
- newarc(v->nfa, '$', 0, s, rp);
- colorcomplement(v->nfa, v->cm, AHEAD, pa->to, s, rp);
- }
- for (pa = pa->to->outs; pa != NULL; pa = pa->outchain) {
- if (findarc(ba->to, PLAIN, pa->co) == NULL) {
- newarc(v->nfa, PLAIN, pa->co, s, rp);
- }
- }
- if (s->nouts == 0) { /* limit of selectivity: none */
- dropstate(v->nfa, s); /* frees arc too */
- }
- }
- NOERR();
- }
-
- delsub(v->nfa, left, right);
- assert(left->nouts == 0);
- freestate(v->nfa, left);
+ dropstate(v->nfa, left);
assert(right->nins == 0);
freestate(v->nfa, right);
-#endif
+ return;
}
/*
@@ -1592,10 +1474,10 @@ brackpart(
NEXT();
/*
- * Shortcut for ordinary chr (not range, not MCCE leader).
+ * Shortcut for ordinary chr (not range).
*/
- if (!SEE(RANGE) /*&& !ISCELEADER(v, c[0])*/) {
+ if (!SEE(RANGE)) {
onechr(v, c[0], lp, rp);
return;
}
@@ -1706,50 +1588,6 @@ scanplain(
}
/*
- - leaders - process a cvec of collating elements to also include leaders
- * Also gives all characters involved their own colors, which is almost
- * certainly necessary, and sets up little disconnected subNFA.
- ^ static void leaders(struct vars *, struct cvec *);
- */
-#ifdef REGEXP_MCCE_ENABLED
-static void
-leaders(
- struct vars *v,
- struct cvec *cv)
-{
- int mcce;
- const chr *p;
- chr leader;
- struct state *s;
- struct arc *a;
-
- v->mccepbegin = newstate(v->nfa);
- v->mccepend = newstate(v->nfa);
- NOERR();
-
- for (mcce = 0; mcce < cv->nmcces; mcce++) {
- p = cv->mcces[mcce];
- leader = *p;
- if (!haschr(cv, leader)) {
- addchr(cv, leader);
- s = newstate(v->nfa);
- newarc(v->nfa, PLAIN, subcolor(v->cm, leader), v->mccepbegin, s);
- okcolors(v->nfa, v->cm);
- } else {
- a = findarc(v->mccepbegin, PLAIN, GETCOLOR(v->cm, leader));
- assert(a != NULL);
- s = a->to;
- assert(s != v->mccepend);
- }
- p++;
- assert(*p != 0 && *(p+1) == 0); /* only 2-char MCCEs for now */
- newarc(v->nfa, PLAIN, subcolor(v->cm, *p), s, v->mccepend);
- okcolors(v->nfa, v->cm);
- }
-}
-#endif
-
-/*
- onechr - fill in arcs for a plain character, and possible case complements
* This is mostly a shortcut for efficient handling of the common case.
^ static void onechr(struct vars *, pchr, struct state *, struct state *);
@@ -1766,17 +1604,18 @@ onechr(
return;
}
- /* rats, need general case anyway... */
+ /*
+ * Rats, need general case anyway...
+ */
+
dovec(v, allcases(v, c), lp, rp);
}
/*
- dovec - fill in arcs for each element of a cvec
- * This one has to handle the messy cases, like MCCEs and MCCE leaders.
^ static void dovec(struct vars *, struct cvec *, struct state *,
^ struct state *);
*/
-#ifndef REGEXP_MCCE_ENABLED
static void
dovec(
struct vars *v,
@@ -1802,184 +1641,6 @@ dovec(
}
}
-#else /* REGEXP_MCCE_ENABLED */
-static void
-dovec(
- struct vars *v,
- struct cvec *cv,
- struct state *lp,
- struct state *rp)
-{
- chr ch, from, to;
- celt ce;
- const chr *p;
- int i;
- struct cvec *leads;
- color co;
- struct arc *a;
- struct arc *pa; /* arc in prototype */
- struct state *s;
- struct state *ps; /* state in prototype */
-
- /*
- * Need a place to store leaders, if any.
- */
-
- if (nmcces(v) > 0) {
- assert(v->mcces != NULL);
- if (v->cv2 == NULL || v->cv2->nchrs < v->mcces->nchrs) {
- if (v->cv2 != NULL) {
- free(v->cv2);
- }
- v->cv2 = newcvec(v->mcces->nchrs, 0, v->mcces->nmcces);
- NOERR();
- leads = v->cv2;
- } else {
- leads = clearcvec(v->cv2);
- }
- } else {
- leads = NULL;
- }
-
- /*
- * First, get the ordinary characters out of the way.
- */
-
- for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) {
- ch = *p;
- if (!ISCELEADER(v, ch)) {
- newarc(v->nfa, PLAIN, subcolor(v->cm, ch), lp, rp);
- } else {
- assert(singleton(v->cm, ch));
- assert(leads != NULL);
- if (!haschr(leads, ch)) {
- addchr(leads, ch);
- }
- }
- }
-
- /*
- * And the ranges.
- */
-
- for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--) {
- from = *p;
- to = *(p+1);
- while (from <= to && (ce = nextleader(v, from, to)) != NOCELT) {
- if (from < ce) {
- subrange(v, from, ce - 1, lp, rp);
- }
- assert(singleton(v->cm, ce));
- assert(leads != NULL);
- if (!haschr(leads, ce)) {
- addchr(leads, ce);
- }
- from = ce + 1;
- }
- if (from <= to) {
- subrange(v, from, to, lp, rp);
- }
- }
-
- /* *** WARNING ***
- *
- * This was buggy, check before enabling: the original version would cause
- * a segfault at the loopinit below if (leads==NULL && cv->nmcces!=0)
- * Possibly just a problem with parens? The original condition was
- * ((leads == NULL || leads->nchrs == 0) && cv->nmcces == 0)
- */
-
- if (leads == NULL || (leads->nchrs == 0 && cv->nmcces == 0)) {
- return;
- }
-
- /*
- * Deal with the MCCE leaders.
- */
-
- NOTE(REG_ULOCALE);
- for (p = leads->chrs, i = leads->nchrs; i > 0; p++, i--) {
- co = GETCOLOR(v->cm, *p);
- a = findarc(lp, PLAIN, co);
- if (a != NULL) {
- s = a->to;
- } else {
- s = newstate(v->nfa);
- NOERR();
- newarc(v->nfa, PLAIN, co, lp, s);
- NOERR();
- }
- pa = findarc(v->mccepbegin, PLAIN, co);
- assert(pa != NULL);
- ps = pa->to;
- newarc(v->nfa, '$', 1, s, rp);
- newarc(v->nfa, '$', 0, s, rp);
- colorcomplement(v->nfa, v->cm, AHEAD, ps, s, rp);
- NOERR();
- }
-
- /*
- * And the MCCEs.
- */
-
- for (i = 0; i < cv->nmcces; i++) {
- p = cv->mcces[i];
- assert(singleton(v->cm, *p));
- if (!singleton(v->cm, *p)) {
- ERR(REG_ASSERT);
- return;
- }
- ch = *p++;
- co = GETCOLOR(v->cm, ch);
- a = findarc(lp, PLAIN, co);
- if (a != NULL) {
- s = a->to;
- } else {
- s = newstate(v->nfa);
- NOERR();
- newarc(v->nfa, PLAIN, co, lp, s);
- NOERR();
- }
- assert(*p != 0); /* at least two chars */
- assert(singleton(v->cm, *p));
- ch = *p++;
- co = GETCOLOR(v->cm, ch);
- assert(*p == 0); /* and only two, for now */
- newarc(v->nfa, PLAIN, co, s, rp);
- NOERR();
- }
-}
-
-/*
- - nextleader - find next MCCE leader within range
- ^ static celt nextleader(struct vars *, pchr, pchr);
- */
-static celt /* NOCELT means none */
-nextleader(
- struct vars *v,
- pchr from,
- pchr to)
-{
- int i;
- const chr *p;
- chr ch;
- celt it = NOCELT;
-
- if (v->mcces == NULL) {
- return it;
- }
-
- for (i = v->mcces->nchrs, p = v->mcces->chrs; i > 0; i--, p++) {
- ch = *p;
- if (from <= ch && ch <= to) {
- if (it == NOCELT || ch < it) {
- it = ch;
- }
- }
- }
- return it;
-}
-#endif
/*
- wordchrs - set up word-chr list for word-boundary stuff, if needed
@@ -2120,30 +1781,14 @@ optst(
struct vars *v,
struct subre *t)
{
-#if 0
- if (t == NULL) {
- return;
- }
-
/*
- * Recurse through children.
+ * DGP (2007-11-13): I assume it was the programmer's intent to eventually
+ * come back and add code to optimize subRE trees, but the routine coded
+ * just spends effort traversing the tree and doing nothing. We can do
+ * nothing with less effort.
*/
- if (t->left != NULL) {
- optst(v, t->left);
- }
- if (t->right != NULL) {
- optst(v, t->right);
- }
-#else
- /*
- * DGP (2007-11-13): I assume it was the programmer's intent to
- * eventually come back and add code above to optimize subRE trees,
- * but the routine coded just spends effort traversing the tree and
- * doing nothing. We can do nothing with less effort.
- */
return;
-#endif
}
/*