summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordkf <donal.k.fellows@manchester.ac.uk>2007-11-14 00:07:57 (GMT)
committerdkf <donal.k.fellows@manchester.ac.uk>2007-11-14 00:07:57 (GMT)
commit07768704e04d2aafa94546335092ec3fd5f57f69 (patch)
treebac87fc73ae9e69be7cd5b0909fc15f0191505f9
parent4b4b968689d3029079c5298593d1cb210189c041 (diff)
downloadtcl-07768704e04d2aafa94546335092ec3fd5f57f69.zip
tcl-07768704e04d2aafa94546335092ec3fd5f57f69.tar.gz
tcl-07768704e04d2aafa94546335092ec3fd5f57f69.tar.bz2
Comment/#ifdef out the multi-char collating element support code.
We never used or supported it. [Bug 1831425]
-rw-r--r--ChangeLog6
-rw-r--r--generic/regc_cvec.c38
-rw-r--r--generic/regc_locale.c50
-rw-r--r--generic/regcomp.c39
-rw-r--r--generic/regguts.h4
5 files changed, 98 insertions, 39 deletions
diff --git a/ChangeLog b/ChangeLog
index 4baa164..f853fd4 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2007-11-14 Donal K. Fellows <dkf@users.sf.net>
+
+ * generic/regc*.c: #ifdef/comment out the code that deals with
+ multi-character collating elements, which have never been supported.
+ Cuts the memory consumption of the RE compiler. [Bug 1831425]
+
2007-11-13 Donal K. Fellows <dkf@users.sf.net>
* generic/tclCompCmds.c (TclCompileSwitchCmd, TclCompileRegexpCmd):
diff --git a/generic/regc_cvec.c b/generic/regc_cvec.c
index a0a14c2..afb2f48 100644
--- a/generic/regc_cvec.c
+++ b/generic/regc_cvec.c
@@ -41,12 +41,15 @@
static struct cvec *
newcvec(
int nchrs, /* to hold this many chrs... */
- int nranges, /* ... and this many ranges... */
+ int nranges) /* ... and this many ranges... */
+#ifdef REGEXP_MCCE_ENABLED
int nmcces) /* ... and this many MCCEs */
+#endif
{
size_t n, nc;
struct cvec *cv;
+#ifdef REGEXP_MCCE_ENABLED
nc = (size_t)nchrs + (size_t)nmcces*(MAXMCCE+1) + (size_t)nranges*2;
n = sizeof(struct cvec) + (size_t)(nmcces-1)*sizeof(chr *)
+ nc*sizeof(chr);
@@ -60,6 +63,19 @@ newcvec(
cv->ranges = cv->chrs + nchrs + nmcces*(MAXMCCE+1);
cv->rangespace = nranges;
return clearcvec(cv);
+#else
+ nc = (size_t)nchrs + (size_t)nranges*2;
+ n = sizeof(struct cvec) + nc*sizeof(chr);
+ cv = (struct cvec *) MALLOC(n);
+ if (cv == NULL) {
+ return NULL;
+ }
+ cv->chrspace = nchrs;
+ cv->chrs = (chr *)(((char *)cv)+sizeof(struct cvec));
+ cv->ranges = cv->chrs + nchrs;
+ cv->rangespace = nranges;
+ return clearcvec(cv);
+#endif /*REGEXP_MCCE_ENABLED*/
}
/*
@@ -71,18 +87,21 @@ static struct cvec *
clearcvec(
struct cvec *cv) /* character vector */
{
+#ifdef REGEXP_MCCE_ENABLED
int i;
+#endif
assert(cv != NULL);
cv->nchrs = 0;
+ cv->nranges = 0;
+#ifdef REGEXP_MCCE_ENABLED
assert(cv->chrs == (chr *)&cv->mcces[cv->mccespace]);
cv->nmcces = 0;
cv->nmccechrs = 0;
- cv->nranges = 0;
for (i = 0; i < cv->mccespace; i++) {
cv->mcces[i] = NULL;
}
-
+#endif
return cv;
}
@@ -158,6 +177,7 @@ addmcce(
- haschr - does a cvec contain this chr?
^ static int haschr(struct cvec *, pchr);
*/
+#ifdef REGEXP_MCCE_ENABLED
static int /* predicate */
haschr(
struct cvec *cv, /* character vector */
@@ -178,6 +198,7 @@ haschr(
}
return 0;
}
+#endif
/*
- getcvec - get a cvec, remembering it as v->cv
@@ -187,18 +208,23 @@ static struct cvec *
getcvec(
struct vars *v, /* context */
int nchrs, /* to hold this many chrs... */
- int nranges, /* ... and this many ranges... */
+ int nranges) /* ... and this many ranges... */
+#ifdef REGEXP_MCCE_ENABLED
int nmcces) /* ... and this many MCCEs */
+#endif
{
if ((v->cv != NULL) && (nchrs <= v->cv->chrspace) &&
- (nranges <= v->cv->rangespace) && (nmcces <= v->cv->mccespace)) {
+#ifdef REGEXP_MCCE_ENABLED
+ (nmcces <= v->cv->mccespace) &&
+#endif
+ (nranges <= v->cv->rangespace)) {
return clearcvec(v->cv);
}
if (v->cv != NULL) {
freecvec(v->cv);
}
- v->cv = newcvec(nchrs, nranges, nmcces);
+ v->cv = newcvec(nchrs, nranges/*, nmcces*/);
if (v->cv == NULL) {
ERR(REG_ESPACE);
}
diff --git a/generic/regc_locale.c b/generic/regc_locale.c
index b08c300..438e821 100644
--- a/generic/regc_locale.c
+++ b/generic/regc_locale.c
@@ -9,7 +9,7 @@
* See the file "license.terms" for information on usage and redistribution of
* this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: regc_locale.c,v 1.17 2007/04/19 09:00:55 dkf Exp $
+ * RCS: @(#) $Id: regc_locale.c,v 1.18 2007/11/14 00:07:58 dkf Exp $
*/
/* ASCII character-name table */
@@ -614,6 +614,7 @@ static const chr printCharTable[] = {
- nmcces - how many distinct MCCEs are there?
^ static int nmcces(struct vars *);
*/
+#ifdef REGEXP_MCCE_ENABLED
static int
nmcces(
struct vars *v) /* context */
@@ -622,23 +623,27 @@ nmcces(
* No multi-character collating elements defined at the moment.
*/
return 0;
- }
-
+}
+#endif
+
/*
- nleaders - how many chrs can be first chrs of MCCEs?
^ static int nleaders(struct vars *);
*/
+#ifdef REGEXP_MCCE_ENABLED
static int
nleaders(
struct vars *v) /* context */
{
return 0;
}
+#endif
/*
- allmcces - return a cvec with all the MCCEs of the locale
^ static struct cvec *allmcces(struct vars *, struct cvec *);
*/
+#ifdef REGEXP_MCCE_ENABLED
static struct cvec *
allmcces(
struct vars *v, /* context */
@@ -646,6 +651,7 @@ allmcces(
{
return clearcvec(cv);
}
+#endif
/*
- element - map collating-element name to celt
@@ -719,7 +725,7 @@ range(
}
if (!cases) { /* easy version */
- cv = getcvec(v, 0, 1, 0);
+ cv = getcvec(v, 0, 1/*, 0*/);
NOERRN();
addrange(cv, a, b);
return cv;
@@ -733,7 +739,7 @@ range(
nchrs = (b - a + 1)*2 + 4;
- cv = getcvec(v, nchrs, 0, 0);
+ cv = getcvec(v, nchrs, 0/*, 0*/);
NOERRN();
for (c=a; c<=b; c++) {
@@ -792,7 +798,7 @@ eclass(
*/
if ((v->cflags&REG_FAKE) && c == 'x') {
- cv = getcvec(v, 4, 0, 0);
+ cv = getcvec(v, 4, 0/*, 0*/);
addchr(cv, (chr)'x');
addchr(cv, (chr)'y');
if (cases) {
@@ -809,7 +815,7 @@ eclass(
if (cases) {
return allcases(v, c);
}
- cv = getcvec(v, 1, 0, 0);
+ cv = getcvec(v, 1, 0/*, 0*/);
assert(cv != NULL);
addchr(cv, (chr)c);
return cv;
@@ -889,7 +895,7 @@ cclass(
switch((enum classes) index) {
case CC_PRINT:
- cv = getcvec(v, NUM_PRINT_CHAR, NUM_PRINT_RANGE, 0);
+ cv = getcvec(v, NUM_PRINT_CHAR, NUM_PRINT_RANGE/*, 0*/);
if (cv) {
for (i=0 ; (size_t)i<NUM_PRINT_CHAR ; i++) {
addchr(cv, printCharTable[i]);
@@ -901,7 +907,7 @@ cclass(
}
break;
case CC_ALNUM:
- cv = getcvec(v, NUM_ALPHA_CHAR, NUM_DIGIT_RANGE + NUM_ALPHA_RANGE, 0);
+ cv = getcvec(v, NUM_ALPHA_CHAR, NUM_DIGIT_RANGE + NUM_ALPHA_RANGE/*, 0*/);
if (cv) {
for (i=0 ; (size_t)i<NUM_ALPHA_CHAR ; i++) {
addchr(cv, alphaCharTable[i]);
@@ -917,7 +923,7 @@ cclass(
}
break;
case CC_ALPHA:
- cv = getcvec(v, NUM_ALPHA_CHAR, NUM_ALPHA_RANGE, 0);
+ cv = getcvec(v, NUM_ALPHA_CHAR, NUM_ALPHA_RANGE/*, 0*/);
if (cv) {
for (i=0 ; (size_t)i<NUM_ALPHA_RANGE ; i++) {
addrange(cv, alphaRangeTable[i].start,
@@ -929,23 +935,23 @@ cclass(
}
break;
case CC_ASCII:
- cv = getcvec(v, 0, 1, 0);
+ cv = getcvec(v, 0, 1/*, 0*/);
if (cv) {
addrange(cv, 0, 0x7f);
}
break;
case CC_BLANK:
- cv = getcvec(v, 2, 0, 0);
+ cv = getcvec(v, 2, 0/*, 0*/);
addchr(cv, '\t');
addchr(cv, ' ');
break;
case CC_CNTRL:
- cv = getcvec(v, 0, 2, 0);
+ cv = getcvec(v, 0, 2/*, 0*/);
addrange(cv, 0x0, 0x1f);
addrange(cv, 0x7f, 0x9f);
break;
case CC_DIGIT:
- cv = getcvec(v, 0, NUM_DIGIT_RANGE, 0);
+ cv = getcvec(v, 0, NUM_DIGIT_RANGE/*, 0*/);
if (cv) {
for (i=0 ; (size_t)i<NUM_DIGIT_RANGE ; i++) {
addrange(cv, digitRangeTable[i].start,
@@ -954,7 +960,7 @@ cclass(
}
break;
case CC_PUNCT:
- cv = getcvec(v, NUM_PUNCT_CHAR, NUM_PUNCT_RANGE, 0);
+ cv = getcvec(v, NUM_PUNCT_CHAR, NUM_PUNCT_RANGE/*, 0*/);
if (cv) {
for (i=0 ; (size_t)i<NUM_PUNCT_RANGE ; i++) {
addrange(cv, punctRangeTable[i].start,
@@ -975,7 +981,7 @@ cclass(
* someone comes up with a better arrangement!)
*/
- cv = getcvec(v, 0, 3, 0);
+ cv = getcvec(v, 0, 3/*, 0*/);
if (cv) {
addrange(cv, '0', '9');
addrange(cv, 'a', 'f');
@@ -983,7 +989,7 @@ cclass(
}
break;
case CC_SPACE:
- cv = getcvec(v, NUM_SPACE_CHAR, NUM_SPACE_RANGE, 0);
+ cv = getcvec(v, NUM_SPACE_CHAR, NUM_SPACE_RANGE/*, 0*/);
if (cv) {
for (i=0 ; (size_t)i<NUM_SPACE_RANGE ; i++) {
addrange(cv, spaceRangeTable[i].start,
@@ -995,7 +1001,7 @@ cclass(
}
break;
case CC_LOWER:
- cv = getcvec(v, NUM_LOWER_CHAR, NUM_LOWER_RANGE, 0);
+ cv = getcvec(v, NUM_LOWER_CHAR, NUM_LOWER_RANGE/*, 0*/);
if (cv) {
for (i=0 ; (size_t)i<NUM_LOWER_RANGE ; i++) {
addrange(cv, lowerRangeTable[i].start,
@@ -1007,7 +1013,7 @@ cclass(
}
break;
case CC_UPPER:
- cv = getcvec(v, NUM_UPPER_CHAR, NUM_UPPER_RANGE, 0);
+ cv = getcvec(v, NUM_UPPER_CHAR, NUM_UPPER_RANGE/*, 0*/);
if (cv) {
for (i=0 ; (size_t)i<NUM_UPPER_RANGE ; i++) {
addrange(cv, upperRangeTable[i].start,
@@ -1019,7 +1025,7 @@ cclass(
}
break;
case CC_GRAPH:
- cv = getcvec(v, NUM_GRAPH_CHAR, NUM_GRAPH_RANGE, 0);
+ cv = getcvec(v, NUM_GRAPH_CHAR, NUM_GRAPH_RANGE/*, 0*/);
if (cv) {
for (i=0 ; (size_t)i<NUM_GRAPH_RANGE ; i++) {
addrange(cv, graphRangeTable[i].start,
@@ -1057,10 +1063,10 @@ allcases(
tc = Tcl_UniCharToTitle((chr)c);
if (tc != uc) {
- cv = getcvec(v, 3, 0, 0);
+ cv = getcvec(v, 3, 0/*, 0*/);
addchr(cv, tc);
} else {
- cv = getcvec(v, 2, 0, 0);
+ cv = getcvec(v, 2, 0/*, 0*/);
}
addchr(cv, lc);
if (lc != uc) {
diff --git a/generic/regcomp.c b/generic/regcomp.c
index 7faf793..8a43240 100644
--- a/generic/regcomp.c
+++ b/generic/regcomp.c
@@ -53,10 +53,10 @@ static void bracket(struct vars *, struct state *, struct state *);
static void cbracket(struct vars *, struct state *, struct state *);
static void brackpart(struct vars *, struct state *, struct state *);
static const chr *scanplain(struct vars *);
-static void leaders(struct vars *, struct cvec *);
static void onechr(struct vars *, pchr, struct state *, struct state *);
static void dovec(struct vars *, struct cvec *, struct state *, struct state *);
#ifdef REGEXP_MCCE_ENABLED
+static void leaders(struct vars *, struct cvec *);
static celt nextleader(struct vars *, pchr, pchr);
#endif
static void wordchrs(struct vars *);
@@ -171,20 +171,25 @@ static void dumpcnfa(struct cnfa *, FILE *);
static void dumpcstate(int, struct carc *, struct cnfa *, FILE *);
#endif
/* === regc_cvec.c === */
-static struct cvec *newcvec(int, int, int);
static struct cvec *clearcvec(struct cvec *);
static void addchr(struct cvec *, pchr);
static void addrange(struct cvec *, pchr, pchr);
#ifdef REGEXP_MCCE_ENABLED
+static struct cvec *newcvec(int, int, int);
static void addmcce(struct cvec *, const chr *, const chr *);
-#endif
-static int haschr(struct cvec *, pchr);
static struct cvec *getcvec(struct vars *, int, int, int);
+static int haschr(struct cvec *, pchr);
+#else
+static struct cvec *newcvec(int, int);
+static struct cvec *getcvec(struct vars *, int, int);
+#endif
static void freecvec(struct cvec *);
/* === regc_locale.c === */
-static int nmcces(struct vars *);
+#ifdef REGEXP_MCCE_ENABLED
static int nleaders(struct vars *);
+static int nmcces(struct vars *);
static struct cvec *allmcces(struct vars *, struct cvec *);
+#endif
static celt element(struct vars *, const chr *, const chr *);
static struct cvec *range(struct vars *, celt, celt, int);
static int before(celt, celt);
@@ -223,10 +228,12 @@ struct vars {
int ntree; /* number of tree nodes */
struct cvec *cv; /* interface cvec */
struct cvec *cv2; /* utility cvec */
+#ifdef REGEXP_MCCE_ENABLED
struct cvec *mcces; /* collating-element information */
#define ISCELEADER(v,c) (v->mcces != NULL && haschr(v->mcces, (c)))
struct state *mccepbegin; /* in nfa, start of MCCE prototypes */
struct state *mccepend; /* in nfa, end of MCCE prototypes */
+#endif
struct subre *lacons; /* lookahead-constraint vector */
int nlacons; /* size of lacons */
};
@@ -336,7 +343,9 @@ compile(
v->treefree = NULL;
v->cv = NULL;
v->cv2 = NULL;
+#ifdef REGEXP_MCCE_ENABLED
v->mcces = NULL;
+#endif
v->lacons = NULL;
v->nlacons = 0;
re->re_magic = REMAGIC;
@@ -362,22 +371,22 @@ compile(
ZAPCNFA(g->search);
v->nfa = newnfa(v, v->cm, NULL);
CNOERR();
- v->cv = newcvec(100, 20, 10);
+ v->cv = newcvec(100, 20);
if (v->cv == NULL) {
return freev(v, REG_ESPACE);
}
+#ifdef REGEXP_MCCE_ENABLED
i = nmcces(v);
if (i > 0) {
- v->mcces = newcvec(nleaders(v), 0, i);
+ v->mcces = newcvec(nleaders(v), 0);
CNOERR();
v->mcces = allmcces(v, v->mcces);
leaders(v, v->mcces);
-#ifdef REGEXP_MCCE_ENABLED
/* Function does nothing with NULL pointers */
addmcce(v->mcces, NULL, NULL); /* dummy */
-#endif
}
CNOERR();
+#endif
/*
* Parsing.
@@ -550,9 +559,11 @@ freev(
if (v->cv2 != NULL) {
freecvec(v->cv2);
}
+#ifdef REGEXP_MCCE_ENABLED
if (v->mcces != NULL) {
freecvec(v->mcces);
}
+#endif
if (v->lacons != NULL) {
freelacons(v->lacons, v->nlacons);
}
@@ -1467,6 +1478,7 @@ cbracket(
{
struct state *left = newstate(v->nfa);
struct state *right = newstate(v->nfa);
+#ifdef REGEXP_MCCE_ENABLED
struct state *s;
struct arc *a; /* arc from lp */
struct arc *ba; /* arc from left, from bracket() */
@@ -1474,6 +1486,7 @@ cbracket(
color co;
const chr *p;
int i;
+#endif
NOERR();
bracket(v, left, right);
@@ -1490,13 +1503,14 @@ cbracket(
colorcomplement(v->nfa, v->cm, PLAIN, left, lp, rp);
NOERR();
- if (v->mcces == NULL) { /* no MCCEs -- we're done */
+ if (1 /*v->mcces == NULL*/) { /* no MCCEs -- we're done */
dropstate(v->nfa, left);
assert(right->nins == 0);
freestate(v->nfa, right);
return;
}
+#ifdef REGEXP_MCCE_ENABLED
/*
* But complementing gets messy in the presence of MCCEs...
*/
@@ -1546,6 +1560,7 @@ cbracket(
freestate(v->nfa, left);
assert(right->nins == 0);
freestate(v->nfa, right);
+#endif
}
/*
@@ -1580,7 +1595,7 @@ brackpart(
* Shortcut for ordinary chr (not range, not MCCE leader).
*/
- if (!SEE(RANGE) && !ISCELEADER(v, c[0])) {
+ if (!SEE(RANGE) /*&& !ISCELEADER(v, c[0])*/) {
onechr(v, c[0], lp, rp);
return;
}
@@ -1696,6 +1711,7 @@ scanplain(
* certainly necessary, and sets up little disconnected subNFA.
^ static void leaders(struct vars *, struct cvec *);
*/
+#ifdef REGEXP_MCCE_ENABLED
static void
leaders(
struct vars *v,
@@ -1731,6 +1747,7 @@ leaders(
okcolors(v->nfa, v->cm);
}
}
+#endif
/*
- onechr - fill in arcs for a plain character, and possible case complements
diff --git a/generic/regguts.h b/generic/regguts.h
index 991979e..bc1d7a2 100644
--- a/generic/regguts.h
+++ b/generic/regguts.h
@@ -245,15 +245,19 @@ struct cvec {
int nranges; /* number of ranges (chr pairs) */
int rangespace; /* number of chrs possible */
chr *ranges; /* pointer to vector of chr pairs */
+#ifdef REGEXP_MCCE_ENABLED
int nmcces; /* number of MCCEs */
int mccespace; /* number of MCCEs possible */
int nmccechrs; /* number of chrs used for MCCEs */
chr *mcces[1]; /* pointers to 0-terminated MCCEs */
/* and both batches of chrs are on the end */
+#endif
};
+#ifdef REGEXP_MCCE_ENABLED
/* caution: this value cannot be changed easily */
#define MAXMCCE 2 /* length of longest MCCE */
+#endif
/*
* definitions for NFA internal representation