diff options
-rw-r--r-- | ChangeLog | 6 | ||||
-rw-r--r-- | generic/regc_cvec.c | 38 | ||||
-rw-r--r-- | generic/regc_locale.c | 50 | ||||
-rw-r--r-- | generic/regcomp.c | 39 | ||||
-rw-r--r-- | generic/regguts.h | 4 |
5 files changed, 98 insertions, 39 deletions
@@ -1,3 +1,9 @@ +2007-11-14 Donal K. Fellows <dkf@users.sf.net> + + * generic/regc*.c: #ifdef/comment out the code that deals with + multi-character collating elements, which have never been supported. + Cuts the memory consumption of the RE compiler. [Bug 1831425] + 2007-11-13 Donal K. Fellows <dkf@users.sf.net> * generic/tclCompCmds.c (TclCompileSwitchCmd, TclCompileRegexpCmd): diff --git a/generic/regc_cvec.c b/generic/regc_cvec.c index a0a14c2..afb2f48 100644 --- a/generic/regc_cvec.c +++ b/generic/regc_cvec.c @@ -41,12 +41,15 @@ static struct cvec * newcvec( int nchrs, /* to hold this many chrs... */ - int nranges, /* ... and this many ranges... */ + int nranges) /* ... and this many ranges... */ +#ifdef REGEXP_MCCE_ENABLED int nmcces) /* ... and this many MCCEs */ +#endif { size_t n, nc; struct cvec *cv; +#ifdef REGEXP_MCCE_ENABLED nc = (size_t)nchrs + (size_t)nmcces*(MAXMCCE+1) + (size_t)nranges*2; n = sizeof(struct cvec) + (size_t)(nmcces-1)*sizeof(chr *) + nc*sizeof(chr); @@ -60,6 +63,19 @@ newcvec( cv->ranges = cv->chrs + nchrs + nmcces*(MAXMCCE+1); cv->rangespace = nranges; return clearcvec(cv); +#else + nc = (size_t)nchrs + (size_t)nranges*2; + n = sizeof(struct cvec) + nc*sizeof(chr); + cv = (struct cvec *) MALLOC(n); + if (cv == NULL) { + return NULL; + } + cv->chrspace = nchrs; + cv->chrs = (chr *)(((char *)cv)+sizeof(struct cvec)); + cv->ranges = cv->chrs + nchrs; + cv->rangespace = nranges; + return clearcvec(cv); +#endif /*REGEXP_MCCE_ENABLED*/ } /* @@ -71,18 +87,21 @@ static struct cvec * clearcvec( struct cvec *cv) /* character vector */ { +#ifdef REGEXP_MCCE_ENABLED int i; +#endif assert(cv != NULL); cv->nchrs = 0; + cv->nranges = 0; +#ifdef REGEXP_MCCE_ENABLED assert(cv->chrs == (chr *)&cv->mcces[cv->mccespace]); cv->nmcces = 0; cv->nmccechrs = 0; - cv->nranges = 0; for (i = 0; i < cv->mccespace; i++) { cv->mcces[i] = NULL; } - +#endif return cv; } @@ -158,6 +177,7 @@ addmcce( - haschr - does a cvec contain this chr? ^ static int haschr(struct cvec *, pchr); */ +#ifdef REGEXP_MCCE_ENABLED static int /* predicate */ haschr( struct cvec *cv, /* character vector */ @@ -178,6 +198,7 @@ haschr( } return 0; } +#endif /* - getcvec - get a cvec, remembering it as v->cv @@ -187,18 +208,23 @@ static struct cvec * getcvec( struct vars *v, /* context */ int nchrs, /* to hold this many chrs... */ - int nranges, /* ... and this many ranges... */ + int nranges) /* ... and this many ranges... */ +#ifdef REGEXP_MCCE_ENABLED int nmcces) /* ... and this many MCCEs */ +#endif { if ((v->cv != NULL) && (nchrs <= v->cv->chrspace) && - (nranges <= v->cv->rangespace) && (nmcces <= v->cv->mccespace)) { +#ifdef REGEXP_MCCE_ENABLED + (nmcces <= v->cv->mccespace) && +#endif + (nranges <= v->cv->rangespace)) { return clearcvec(v->cv); } if (v->cv != NULL) { freecvec(v->cv); } - v->cv = newcvec(nchrs, nranges, nmcces); + v->cv = newcvec(nchrs, nranges/*, nmcces*/); if (v->cv == NULL) { ERR(REG_ESPACE); } diff --git a/generic/regc_locale.c b/generic/regc_locale.c index b08c300..438e821 100644 --- a/generic/regc_locale.c +++ b/generic/regc_locale.c @@ -9,7 +9,7 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: regc_locale.c,v 1.17 2007/04/19 09:00:55 dkf Exp $ + * RCS: @(#) $Id: regc_locale.c,v 1.18 2007/11/14 00:07:58 dkf Exp $ */ /* ASCII character-name table */ @@ -614,6 +614,7 @@ static const chr printCharTable[] = { - nmcces - how many distinct MCCEs are there? ^ static int nmcces(struct vars *); */ +#ifdef REGEXP_MCCE_ENABLED static int nmcces( struct vars *v) /* context */ @@ -622,23 +623,27 @@ nmcces( * No multi-character collating elements defined at the moment. */ return 0; -} - +} +#endif + /* - nleaders - how many chrs can be first chrs of MCCEs? ^ static int nleaders(struct vars *); */ +#ifdef REGEXP_MCCE_ENABLED static int nleaders( struct vars *v) /* context */ { return 0; } +#endif /* - allmcces - return a cvec with all the MCCEs of the locale ^ static struct cvec *allmcces(struct vars *, struct cvec *); */ +#ifdef REGEXP_MCCE_ENABLED static struct cvec * allmcces( struct vars *v, /* context */ @@ -646,6 +651,7 @@ allmcces( { return clearcvec(cv); } +#endif /* - element - map collating-element name to celt @@ -719,7 +725,7 @@ range( } if (!cases) { /* easy version */ - cv = getcvec(v, 0, 1, 0); + cv = getcvec(v, 0, 1/*, 0*/); NOERRN(); addrange(cv, a, b); return cv; @@ -733,7 +739,7 @@ range( nchrs = (b - a + 1)*2 + 4; - cv = getcvec(v, nchrs, 0, 0); + cv = getcvec(v, nchrs, 0/*, 0*/); NOERRN(); for (c=a; c<=b; c++) { @@ -792,7 +798,7 @@ eclass( */ if ((v->cflags®_FAKE) && c == 'x') { - cv = getcvec(v, 4, 0, 0); + cv = getcvec(v, 4, 0/*, 0*/); addchr(cv, (chr)'x'); addchr(cv, (chr)'y'); if (cases) { @@ -809,7 +815,7 @@ eclass( if (cases) { return allcases(v, c); } - cv = getcvec(v, 1, 0, 0); + cv = getcvec(v, 1, 0/*, 0*/); assert(cv != NULL); addchr(cv, (chr)c); return cv; @@ -889,7 +895,7 @@ cclass( switch((enum classes) index) { case CC_PRINT: - cv = getcvec(v, NUM_PRINT_CHAR, NUM_PRINT_RANGE, 0); + cv = getcvec(v, NUM_PRINT_CHAR, NUM_PRINT_RANGE/*, 0*/); if (cv) { for (i=0 ; (size_t)i<NUM_PRINT_CHAR ; i++) { addchr(cv, printCharTable[i]); @@ -901,7 +907,7 @@ cclass( } break; case CC_ALNUM: - cv = getcvec(v, NUM_ALPHA_CHAR, NUM_DIGIT_RANGE + NUM_ALPHA_RANGE, 0); + cv = getcvec(v, NUM_ALPHA_CHAR, NUM_DIGIT_RANGE + NUM_ALPHA_RANGE/*, 0*/); if (cv) { for (i=0 ; (size_t)i<NUM_ALPHA_CHAR ; i++) { addchr(cv, alphaCharTable[i]); @@ -917,7 +923,7 @@ cclass( } break; case CC_ALPHA: - cv = getcvec(v, NUM_ALPHA_CHAR, NUM_ALPHA_RANGE, 0); + cv = getcvec(v, NUM_ALPHA_CHAR, NUM_ALPHA_RANGE/*, 0*/); if (cv) { for (i=0 ; (size_t)i<NUM_ALPHA_RANGE ; i++) { addrange(cv, alphaRangeTable[i].start, @@ -929,23 +935,23 @@ cclass( } break; case CC_ASCII: - cv = getcvec(v, 0, 1, 0); + cv = getcvec(v, 0, 1/*, 0*/); if (cv) { addrange(cv, 0, 0x7f); } break; case CC_BLANK: - cv = getcvec(v, 2, 0, 0); + cv = getcvec(v, 2, 0/*, 0*/); addchr(cv, '\t'); addchr(cv, ' '); break; case CC_CNTRL: - cv = getcvec(v, 0, 2, 0); + cv = getcvec(v, 0, 2/*, 0*/); addrange(cv, 0x0, 0x1f); addrange(cv, 0x7f, 0x9f); break; case CC_DIGIT: - cv = getcvec(v, 0, NUM_DIGIT_RANGE, 0); + cv = getcvec(v, 0, NUM_DIGIT_RANGE/*, 0*/); if (cv) { for (i=0 ; (size_t)i<NUM_DIGIT_RANGE ; i++) { addrange(cv, digitRangeTable[i].start, @@ -954,7 +960,7 @@ cclass( } break; case CC_PUNCT: - cv = getcvec(v, NUM_PUNCT_CHAR, NUM_PUNCT_RANGE, 0); + cv = getcvec(v, NUM_PUNCT_CHAR, NUM_PUNCT_RANGE/*, 0*/); if (cv) { for (i=0 ; (size_t)i<NUM_PUNCT_RANGE ; i++) { addrange(cv, punctRangeTable[i].start, @@ -975,7 +981,7 @@ cclass( * someone comes up with a better arrangement!) */ - cv = getcvec(v, 0, 3, 0); + cv = getcvec(v, 0, 3/*, 0*/); if (cv) { addrange(cv, '0', '9'); addrange(cv, 'a', 'f'); @@ -983,7 +989,7 @@ cclass( } break; case CC_SPACE: - cv = getcvec(v, NUM_SPACE_CHAR, NUM_SPACE_RANGE, 0); + cv = getcvec(v, NUM_SPACE_CHAR, NUM_SPACE_RANGE/*, 0*/); if (cv) { for (i=0 ; (size_t)i<NUM_SPACE_RANGE ; i++) { addrange(cv, spaceRangeTable[i].start, @@ -995,7 +1001,7 @@ cclass( } break; case CC_LOWER: - cv = getcvec(v, NUM_LOWER_CHAR, NUM_LOWER_RANGE, 0); + cv = getcvec(v, NUM_LOWER_CHAR, NUM_LOWER_RANGE/*, 0*/); if (cv) { for (i=0 ; (size_t)i<NUM_LOWER_RANGE ; i++) { addrange(cv, lowerRangeTable[i].start, @@ -1007,7 +1013,7 @@ cclass( } break; case CC_UPPER: - cv = getcvec(v, NUM_UPPER_CHAR, NUM_UPPER_RANGE, 0); + cv = getcvec(v, NUM_UPPER_CHAR, NUM_UPPER_RANGE/*, 0*/); if (cv) { for (i=0 ; (size_t)i<NUM_UPPER_RANGE ; i++) { addrange(cv, upperRangeTable[i].start, @@ -1019,7 +1025,7 @@ cclass( } break; case CC_GRAPH: - cv = getcvec(v, NUM_GRAPH_CHAR, NUM_GRAPH_RANGE, 0); + cv = getcvec(v, NUM_GRAPH_CHAR, NUM_GRAPH_RANGE/*, 0*/); if (cv) { for (i=0 ; (size_t)i<NUM_GRAPH_RANGE ; i++) { addrange(cv, graphRangeTable[i].start, @@ -1057,10 +1063,10 @@ allcases( tc = Tcl_UniCharToTitle((chr)c); if (tc != uc) { - cv = getcvec(v, 3, 0, 0); + cv = getcvec(v, 3, 0/*, 0*/); addchr(cv, tc); } else { - cv = getcvec(v, 2, 0, 0); + cv = getcvec(v, 2, 0/*, 0*/); } addchr(cv, lc); if (lc != uc) { diff --git a/generic/regcomp.c b/generic/regcomp.c index 7faf793..8a43240 100644 --- a/generic/regcomp.c +++ b/generic/regcomp.c @@ -53,10 +53,10 @@ static void bracket(struct vars *, struct state *, struct state *); static void cbracket(struct vars *, struct state *, struct state *); static void brackpart(struct vars *, struct state *, struct state *); static const chr *scanplain(struct vars *); -static void leaders(struct vars *, struct cvec *); static void onechr(struct vars *, pchr, struct state *, struct state *); static void dovec(struct vars *, struct cvec *, struct state *, struct state *); #ifdef REGEXP_MCCE_ENABLED +static void leaders(struct vars *, struct cvec *); static celt nextleader(struct vars *, pchr, pchr); #endif static void wordchrs(struct vars *); @@ -171,20 +171,25 @@ static void dumpcnfa(struct cnfa *, FILE *); static void dumpcstate(int, struct carc *, struct cnfa *, FILE *); #endif /* === regc_cvec.c === */ -static struct cvec *newcvec(int, int, int); static struct cvec *clearcvec(struct cvec *); static void addchr(struct cvec *, pchr); static void addrange(struct cvec *, pchr, pchr); #ifdef REGEXP_MCCE_ENABLED +static struct cvec *newcvec(int, int, int); static void addmcce(struct cvec *, const chr *, const chr *); -#endif -static int haschr(struct cvec *, pchr); static struct cvec *getcvec(struct vars *, int, int, int); +static int haschr(struct cvec *, pchr); +#else +static struct cvec *newcvec(int, int); +static struct cvec *getcvec(struct vars *, int, int); +#endif static void freecvec(struct cvec *); /* === regc_locale.c === */ -static int nmcces(struct vars *); +#ifdef REGEXP_MCCE_ENABLED static int nleaders(struct vars *); +static int nmcces(struct vars *); static struct cvec *allmcces(struct vars *, struct cvec *); +#endif static celt element(struct vars *, const chr *, const chr *); static struct cvec *range(struct vars *, celt, celt, int); static int before(celt, celt); @@ -223,10 +228,12 @@ struct vars { int ntree; /* number of tree nodes */ struct cvec *cv; /* interface cvec */ struct cvec *cv2; /* utility cvec */ +#ifdef REGEXP_MCCE_ENABLED struct cvec *mcces; /* collating-element information */ #define ISCELEADER(v,c) (v->mcces != NULL && haschr(v->mcces, (c))) struct state *mccepbegin; /* in nfa, start of MCCE prototypes */ struct state *mccepend; /* in nfa, end of MCCE prototypes */ +#endif struct subre *lacons; /* lookahead-constraint vector */ int nlacons; /* size of lacons */ }; @@ -336,7 +343,9 @@ compile( v->treefree = NULL; v->cv = NULL; v->cv2 = NULL; +#ifdef REGEXP_MCCE_ENABLED v->mcces = NULL; +#endif v->lacons = NULL; v->nlacons = 0; re->re_magic = REMAGIC; @@ -362,22 +371,22 @@ compile( ZAPCNFA(g->search); v->nfa = newnfa(v, v->cm, NULL); CNOERR(); - v->cv = newcvec(100, 20, 10); + v->cv = newcvec(100, 20); if (v->cv == NULL) { return freev(v, REG_ESPACE); } +#ifdef REGEXP_MCCE_ENABLED i = nmcces(v); if (i > 0) { - v->mcces = newcvec(nleaders(v), 0, i); + v->mcces = newcvec(nleaders(v), 0); CNOERR(); v->mcces = allmcces(v, v->mcces); leaders(v, v->mcces); -#ifdef REGEXP_MCCE_ENABLED /* Function does nothing with NULL pointers */ addmcce(v->mcces, NULL, NULL); /* dummy */ -#endif } CNOERR(); +#endif /* * Parsing. @@ -550,9 +559,11 @@ freev( if (v->cv2 != NULL) { freecvec(v->cv2); } +#ifdef REGEXP_MCCE_ENABLED if (v->mcces != NULL) { freecvec(v->mcces); } +#endif if (v->lacons != NULL) { freelacons(v->lacons, v->nlacons); } @@ -1467,6 +1478,7 @@ cbracket( { struct state *left = newstate(v->nfa); struct state *right = newstate(v->nfa); +#ifdef REGEXP_MCCE_ENABLED struct state *s; struct arc *a; /* arc from lp */ struct arc *ba; /* arc from left, from bracket() */ @@ -1474,6 +1486,7 @@ cbracket( color co; const chr *p; int i; +#endif NOERR(); bracket(v, left, right); @@ -1490,13 +1503,14 @@ cbracket( colorcomplement(v->nfa, v->cm, PLAIN, left, lp, rp); NOERR(); - if (v->mcces == NULL) { /* no MCCEs -- we're done */ + if (1 /*v->mcces == NULL*/) { /* no MCCEs -- we're done */ dropstate(v->nfa, left); assert(right->nins == 0); freestate(v->nfa, right); return; } +#ifdef REGEXP_MCCE_ENABLED /* * But complementing gets messy in the presence of MCCEs... */ @@ -1546,6 +1560,7 @@ cbracket( freestate(v->nfa, left); assert(right->nins == 0); freestate(v->nfa, right); +#endif } /* @@ -1580,7 +1595,7 @@ brackpart( * Shortcut for ordinary chr (not range, not MCCE leader). */ - if (!SEE(RANGE) && !ISCELEADER(v, c[0])) { + if (!SEE(RANGE) /*&& !ISCELEADER(v, c[0])*/) { onechr(v, c[0], lp, rp); return; } @@ -1696,6 +1711,7 @@ scanplain( * certainly necessary, and sets up little disconnected subNFA. ^ static void leaders(struct vars *, struct cvec *); */ +#ifdef REGEXP_MCCE_ENABLED static void leaders( struct vars *v, @@ -1731,6 +1747,7 @@ leaders( okcolors(v->nfa, v->cm); } } +#endif /* - onechr - fill in arcs for a plain character, and possible case complements diff --git a/generic/regguts.h b/generic/regguts.h index 991979e..bc1d7a2 100644 --- a/generic/regguts.h +++ b/generic/regguts.h @@ -245,15 +245,19 @@ struct cvec { int nranges; /* number of ranges (chr pairs) */ int rangespace; /* number of chrs possible */ chr *ranges; /* pointer to vector of chr pairs */ +#ifdef REGEXP_MCCE_ENABLED int nmcces; /* number of MCCEs */ int mccespace; /* number of MCCEs possible */ int nmccechrs; /* number of chrs used for MCCEs */ chr *mcces[1]; /* pointers to 0-terminated MCCEs */ /* and both batches of chrs are on the end */ +#endif }; +#ifdef REGEXP_MCCE_ENABLED /* caution: this value cannot be changed easily */ #define MAXMCCE 2 /* length of longest MCCE */ +#endif /* * definitions for NFA internal representation |