diff options
Diffstat (limited to 'generic/regexec.c')
| -rw-r--r-- | generic/regexec.c | 129 |
1 files changed, 56 insertions, 73 deletions
diff --git a/generic/regexec.c b/generic/regexec.c index 7ef048e..a7053c3 100644 --- a/generic/regexec.c +++ b/generic/regexec.c @@ -1,7 +1,7 @@ /* * re_*exec and friends - match REs * - * Copyright © 1998, 1999 Henry Spencer. All rights reserved. + * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. * * Development of this software was funded, in part, by Cray Research Inc., * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics @@ -44,7 +44,7 @@ struct sset { /* state set */ unsigned hash; /* hash of bitvector */ #define HASH(bv, nw) (((nw) == 1) ? *(bv) : hash(bv, nw)) #define HIT(h,bv,ss,nw) ((ss)->hash == (h) && ((nw) == 1 || \ - memcmp((void*)(bv), (void*)((ss)->states), (nw)*sizeof(unsigned)) == 0)) + memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0)) int flags; #define STARTER 01 /* the initial state set */ #define POSTSTATE 02 /* includes the goal state */ @@ -73,7 +73,7 @@ struct dfa { chr *lastnopr; /* location of last cache-flushed NOPROGRESS */ struct sset *search; /* replacement-search-pointer memory */ int cptsmalloced; /* were the areas individually malloced? */ - char *mallocarea; /* self, or malloced area, or NULL */ + char *mallocarea; /* self, or master malloced area, or NULL */ }; #define WORK 1 /* number of work bitvectors needed */ @@ -91,6 +91,7 @@ struct smalldfa { struct sset *outsarea[FEWSTATES*2 * FEWCOLORS]; struct arcp incarea[FEWSTATES*2 * FEWCOLORS]; }; +#define DOMALLOC ((struct smalldfa *)NULL) /* force malloc */ /* * Internal variables, bundled for easy passing around. @@ -116,7 +117,7 @@ struct vars { #define ERR(e) VERR(v, e) /* record an error */ #define NOERR() {if (ISERR()) return v->err;} /* if error seen, return it */ #define OFF(p) ((p) - v->start) -#define LOFF(p) ((size_t)OFF(p)) +#define LOFF(p) ((long)OFF(p)) /* * forward declarations @@ -128,7 +129,7 @@ int exec(regex_t *, const chr *, size_t, rm_detail_t *, size_t, regmatch_t [], i static struct dfa *getsubdfa(struct vars *, struct subre *); static int simpleFind(struct vars *const, struct cnfa *const, struct colormap *const); static int complicatedFind(struct vars *const, struct cnfa *const, struct colormap *const); -static int complicatedFindLoop(struct vars *const, struct dfa *const, struct dfa *const, chr **const); +static int complicatedFindLoop(struct vars *const, struct cnfa *const, struct colormap *const, struct dfa *const, struct dfa *const, chr **const); static void zapallsubs(regmatch_t *const, const size_t); static void zaptreesubs(struct vars *const, struct subre *const); static void subset(struct vars *const, struct subre *const, chr *const, chr *const); @@ -145,7 +146,7 @@ static chr *shortest(struct vars *const, struct dfa *const, chr *const, chr *con static chr *lastCold(struct vars *const, struct dfa *const); static struct dfa *newDFA(struct vars *const, struct cnfa *const, struct colormap *const, struct smalldfa *); static void freeDFA(struct dfa *const); -static unsigned hash(unsigned *const, int); +static unsigned hash(unsigned *const, const int); static struct sset *initialize(struct vars *const, struct dfa *const, chr *const); static struct sset *miss(struct vars *const, struct dfa *const, struct sset *const, const pcolor, chr *const, chr *const); static int checkLAConstraint(struct vars *const, struct cnfa *const, chr *const, const pcolor); @@ -162,7 +163,7 @@ static struct sset *pickNextSS(struct vars *const, struct dfa *const, chr *const int exec( regex_t *re, - const chr *string, + CONST chr *string, size_t len, rm_detail_t *details, size_t nmatch, @@ -171,8 +172,8 @@ exec( { AllocVars(v); int st, backref; - int n; - int i; + size_t n; + size_t i; #define LOCALMAT 20 regmatch_t mat[LOCALMAT]; #define LOCALDFAS 40 @@ -235,16 +236,14 @@ exec( v->stop = (chr *)string + len; v->err = 0; assert(v->g->ntree >= 0); - n = v->g->ntree; - if (n <= LOCALDFAS) { + n = (size_t) v->g->ntree; + if (n <= LOCALDFAS) v->subdfas = subdfas; - } else { + else v->subdfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *)); - } if (v->subdfas == NULL) { - if (v->pmatch != pmatch && v->pmatch != mat) { + if (v->pmatch != pmatch && v->pmatch != mat) FREE(v->pmatch); - } FreeVars(v); return REG_ESPACE; } @@ -269,7 +268,7 @@ exec( if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0) { zapallsubs(pmatch, nmatch); n = (nmatch < v->nmatch) ? nmatch : v->nmatch; - memcpy((void*)(pmatch), (void*)(v->pmatch), n*sizeof(regmatch_t)); + memcpy(VS(pmatch), VS(v->pmatch), n*sizeof(regmatch_t)); } /* @@ -279,15 +278,13 @@ exec( if (v->pmatch != pmatch && v->pmatch != mat) { FREE(v->pmatch); } - n = v->g->ntree; + n = (size_t) v->g->ntree; for (i = 0; i < n; i++) { - if (v->subdfas[i] != NULL) { + if (v->subdfas[i] != NULL) freeDFA(v->subdfas[i]); - } } - if (v->subdfas != subdfas) { + if (v->subdfas != subdfas) FREE(v->subdfas); - } FreeVars(v); return st; } @@ -302,10 +299,9 @@ getsubdfa(struct vars * v, struct subre * t) { if (v->subdfas[t->id] == NULL) { - v->subdfas[t->id] = newDFA(v, &t->cnfa, &v->g->cmap, NULL); - if (ISERR()) { + v->subdfas[t->id] = newDFA(v, &t->cnfa, &v->g->cmap, DOMALLOC); + if (ISERR()) return NULL; - } } return v->subdfas[t->id]; } @@ -335,7 +331,7 @@ simpleFind( s = newDFA(v, &v->g->search, cm, &v->dfa1); assert(!(ISERR() && s != NULL)); NOERR(); - MDEBUG(("\nsearch at %" TCL_Z_MODIFIER "u\n", LOFF(v->start))); + MDEBUG(("\nsearch at %ld\n", LOFF(v->start))); cold = NULL; close = shortest(v, s, v->start, v->start, v->stop, &cold, NULL); freeDFA(s); @@ -363,12 +359,12 @@ simpleFind( assert(cold != NULL); open = cold; cold = NULL; - MDEBUG(("between %" TCL_Z_MODIFIER "u and %" TCL_Z_MODIFIER "u\n", LOFF(open), LOFF(close))); + MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close))); d = newDFA(v, cnfa, cm, &v->dfa1); assert(!(ISERR() && d != NULL)); NOERR(); for (begin = open; begin <= close; begin++) { - MDEBUG(("\nfind trying at %" TCL_Z_MODIFIER "u\n", LOFF(begin))); + MDEBUG(("\nfind trying at %ld\n", LOFF(begin))); if (shorter) { end = shortest(v, d, begin, begin, v->stop, NULL, &hitend); } else { @@ -438,7 +434,7 @@ complicatedFind( return v->err; } - ret = complicatedFindLoop(v, d, s, &cold); + ret = complicatedFindLoop(v, cnfa, cm, d, s, &cold); freeDFA(d); freeDFA(s); @@ -457,12 +453,14 @@ complicatedFind( /* - complicatedFindLoop - the heart of complicatedFind - ^ static int complicatedFindLoop(struct vars *, + ^ static int complicatedFindLoop(struct vars *, struct cnfa *, struct colormap *, ^ struct dfa *, struct dfa *, chr **); */ static int complicatedFindLoop( struct vars *const v, + struct cnfa *const cnfa, + struct colormap *const cm, struct dfa *const d, struct dfa *const s, chr **const coldp) /* where to put coldstart pointer */ @@ -479,7 +477,7 @@ complicatedFindLoop( cold = NULL; close = v->start; do { - MDEBUG(("\ncsearch at %" TCL_Z_MODIFIER "u\n", LOFF(close))); + MDEBUG(("\ncsearch at %ld\n", LOFF(close))); close = shortest(v, s, close, close, v->stop, &cold, NULL); if (close == NULL) { break; /* NOTE BREAK */ @@ -487,9 +485,9 @@ complicatedFindLoop( assert(cold != NULL); open = cold; cold = NULL; - MDEBUG(("cbetween %" TCL_Z_MODIFIER "u and %" TCL_Z_MODIFIER "u\n", LOFF(open), LOFF(close))); + MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close))); for (begin = open; begin <= close; begin++) { - MDEBUG(("\ncomplicatedFind trying at %" TCL_Z_MODIFIER "u\n", LOFF(begin))); + MDEBUG(("\ncomplicatedFind trying at %ld\n", LOFF(begin))); estart = begin; estop = v->stop; for (;;) { @@ -505,7 +503,7 @@ complicatedFindLoop( break; /* NOTE BREAK OUT */ } - MDEBUG(("tentative end %" TCL_Z_MODIFIER "u\n", LOFF(end))); + MDEBUG(("tentative end %ld\n", LOFF(end))); zapallsubs(v->pmatch, v->nmatch); er = cdissect(v, v->g->tree, begin, end); if (er == REG_OKAY) { @@ -632,7 +630,7 @@ cdissect( int er; assert(t != NULL); - MDEBUG(("cdissect %" TCL_Z_MODIFIER "u-%" TCL_Z_MODIFIER "u %c\n", LOFF(begin), LOFF(end), t->op)); + MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op)); switch (t->op) { case '=': /* terminal node */ @@ -645,11 +643,10 @@ cdissect( break; case '.': /* concatenation */ assert(t->left != NULL && t->right != NULL); - if (t->left->flags & SHORTER) {/* reverse scan */ + if (t->left->flags & SHORTER) /* reverse scan */ er = crevcondissect(v, t, begin, end); - } else { + else er = ccondissect(v, t, begin, end); - } break; case '|': /* alternation */ assert(t->left != NULL); @@ -657,11 +654,10 @@ cdissect( break; case '*': /* iteration */ assert(t->left != NULL); - if (t->left->flags & SHORTER) {/* reverse scan */ + if (t->left->flags & SHORTER) /* reverse scan */ er = creviterdissect(v, t, begin, end); - } else { + else er = citerdissect(v, t, begin, end); - } break; case '(': /* capturing */ assert(t->left != NULL && t->right == NULL); @@ -719,7 +715,7 @@ ccondissect( if (mid == NULL) { return REG_NOMATCH; } - MDEBUG(("tentative midpoint %" TCL_Z_MODIFIER "u\n", LOFF(mid))); + MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); /* * Iterate until satisfaction or failure. @@ -770,7 +766,7 @@ ccondissect( MDEBUG(("%d failed midpoint\n", t->id)); return REG_NOMATCH; } - MDEBUG(("%d: new midpoint %" TCL_Z_MODIFIER "u\n", t->id, LOFF(mid))); + MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid))); zaptreesubs(v, t->left); zaptreesubs(v, t->right); } @@ -810,7 +806,7 @@ crevcondissect( if (mid == NULL) { return REG_NOMATCH; } - MDEBUG(("tentative midpoint %" TCL_Z_MODIFIER "u\n", LOFF(mid))); + MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); /* * Iterate until satisfaction or failure. @@ -861,7 +857,7 @@ crevcondissect( MDEBUG(("%d failed midpoint\n", t->id)); return REG_NOMATCH; } - MDEBUG(("%d: new midpoint %" TCL_Z_MODIFIER "u\n", t->id, LOFF(mid))); + MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid))); zaptreesubs(v, t->left); zaptreesubs(v, t->right); } @@ -893,7 +889,7 @@ cbrdissect( MDEBUG(("cbackref n%d %d{%d-%d}\n", t->id, n, min, max)); /* get the backreferenced string */ - if (v->pmatch[n].rm_so == TCL_INDEX_NONE) { + if (v->pmatch[n].rm_so == -1) { return REG_NOMATCH; } brstring = v->start + v->pmatch[n].rm_so; @@ -927,20 +923,17 @@ cbrdissect( assert(end > begin); tlen = end - begin; - if (tlen % brlen != 0) { + if (tlen % brlen != 0) return REG_NOMATCH; - } numreps = tlen / brlen; - if (numreps < (size_t)min || (numreps > (size_t)max && max != DUPINF)) { + if (numreps < (size_t)min || (numreps > (size_t)max && max != DUPINF)) return REG_NOMATCH; - } /* okay, compare the actual string contents */ p = begin; while (numreps-- > 0) { - if ((*v->g->compare) (brstring, p, brlen) != 0) { + if ((*v->g->compare) (brstring, p, brlen) != 0) return REG_NOMATCH; - } p += brlen; } @@ -1017,9 +1010,8 @@ citerdissect(struct vars * v, */ min_matches = t->min; if (min_matches <= 0) { - if (begin == end) { + if (begin == end) return REG_OKAY; - } min_matches = 1; } @@ -1033,9 +1025,8 @@ citerdissect(struct vars * v, * sub-match endpoints in endpts[1..max_matches]. */ max_matches = end - begin; - if (max_matches > (size_t)t->max && t->max != DUPINF) { + if (max_matches > (size_t)t->max && t->max != DUPINF) max_matches = t->max; - } if (max_matches < (size_t)min_matches) max_matches = min_matches; endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *)); @@ -1074,13 +1065,12 @@ citerdissect(struct vars * v, k--; goto backtrack; } - MDEBUG(("%d: working endpoint %d: %" TCL_Z_MODIFIER "u\n", + MDEBUG(("%d: working endpoint %d: %ld\n", t->id, k, LOFF(endpts[k]))); /* k'th sub-match can no longer be considered verified */ - if (nverified >= k) { + if (nverified >= k) nverified = k - 1; - } if (endpts[k] != end) { /* haven't reached end yet, try another iteration if allowed */ @@ -1106,9 +1096,8 @@ citerdissect(struct vars * v, * number of matches, start the slow part: recurse to verify each * sub-match. We always have k <= max_matches, needn't check that. */ - if (k < min_matches) { + if (k < min_matches) goto backtrack; - } MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k)); @@ -1119,9 +1108,8 @@ citerdissect(struct vars * v, nverified = i; continue; } - if (er == REG_NOMATCH) { + if (er == REG_NOMATCH) break; - } /* oops, something failed */ FREE(endpts); return er; @@ -1195,9 +1183,8 @@ creviterdissect(struct vars * v, */ min_matches = t->min; if (min_matches <= 0) { - if (begin == end) { + if (begin == end) return REG_OKAY; - } min_matches = 1; } @@ -1251,9 +1238,8 @@ creviterdissect(struct vars * v, limit++; /* if this is the last allowed sub-match, it must reach to the end */ - if ((size_t)k >= max_matches) { + if ((size_t)k >= max_matches) limit = end; - } /* try to find an endpoint for the k'th sub-match */ endpts[k] = shortest(v, d, endpts[k - 1], limit, end, @@ -1263,13 +1249,12 @@ creviterdissect(struct vars * v, k--; goto backtrack; } - MDEBUG(("%d: working endpoint %d: %" TCL_Z_MODIFIER "u\n", + MDEBUG(("%d: working endpoint %d: %ld\n", t->id, k, LOFF(endpts[k]))); /* k'th sub-match can no longer be considered verified */ - if (nverified >= k) { + if (nverified >= k) nverified = k - 1; - } if (endpts[k] != end) { /* haven't reached end yet, try another iteration if allowed */ @@ -1290,9 +1275,8 @@ creviterdissect(struct vars * v, * number of matches, start the slow part: recurse to verify each * sub-match. We always have k <= max_matches, needn't check that. */ - if (k < min_matches) { + if (k < min_matches) goto backtrack; - } MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k)); @@ -1303,9 +1287,8 @@ creviterdissect(struct vars * v, nverified = i; continue; } - if (er == REG_NOMATCH) { + if (er == REG_NOMATCH) break; - } /* oops, something failed */ FREE(endpts); return er; |
