summaryrefslogtreecommitdiffstats
path: root/generic/regexec.c
diff options
context:
space:
mode:
Diffstat (limited to 'generic/regexec.c')
-rw-r--r--generic/regexec.c129
1 files changed, 56 insertions, 73 deletions
diff --git a/generic/regexec.c b/generic/regexec.c
index 7ef048e..a7053c3 100644
--- a/generic/regexec.c
+++ b/generic/regexec.c
@@ -1,7 +1,7 @@
/*
* re_*exec and friends - match REs
*
- * Copyright © 1998, 1999 Henry Spencer. All rights reserved.
+ * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
*
* Development of this software was funded, in part, by Cray Research Inc.,
* UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
@@ -44,7 +44,7 @@ struct sset { /* state set */
unsigned hash; /* hash of bitvector */
#define HASH(bv, nw) (((nw) == 1) ? *(bv) : hash(bv, nw))
#define HIT(h,bv,ss,nw) ((ss)->hash == (h) && ((nw) == 1 || \
- memcmp((void*)(bv), (void*)((ss)->states), (nw)*sizeof(unsigned)) == 0))
+ memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0))
int flags;
#define STARTER 01 /* the initial state set */
#define POSTSTATE 02 /* includes the goal state */
@@ -73,7 +73,7 @@ struct dfa {
chr *lastnopr; /* location of last cache-flushed NOPROGRESS */
struct sset *search; /* replacement-search-pointer memory */
int cptsmalloced; /* were the areas individually malloced? */
- char *mallocarea; /* self, or malloced area, or NULL */
+ char *mallocarea; /* self, or master malloced area, or NULL */
};
#define WORK 1 /* number of work bitvectors needed */
@@ -91,6 +91,7 @@ struct smalldfa {
struct sset *outsarea[FEWSTATES*2 * FEWCOLORS];
struct arcp incarea[FEWSTATES*2 * FEWCOLORS];
};
+#define DOMALLOC ((struct smalldfa *)NULL) /* force malloc */
/*
* Internal variables, bundled for easy passing around.
@@ -116,7 +117,7 @@ struct vars {
#define ERR(e) VERR(v, e) /* record an error */
#define NOERR() {if (ISERR()) return v->err;} /* if error seen, return it */
#define OFF(p) ((p) - v->start)
-#define LOFF(p) ((size_t)OFF(p))
+#define LOFF(p) ((long)OFF(p))
/*
* forward declarations
@@ -128,7 +129,7 @@ int exec(regex_t *, const chr *, size_t, rm_detail_t *, size_t, regmatch_t [], i
static struct dfa *getsubdfa(struct vars *, struct subre *);
static int simpleFind(struct vars *const, struct cnfa *const, struct colormap *const);
static int complicatedFind(struct vars *const, struct cnfa *const, struct colormap *const);
-static int complicatedFindLoop(struct vars *const, struct dfa *const, struct dfa *const, chr **const);
+static int complicatedFindLoop(struct vars *const, struct cnfa *const, struct colormap *const, struct dfa *const, struct dfa *const, chr **const);
static void zapallsubs(regmatch_t *const, const size_t);
static void zaptreesubs(struct vars *const, struct subre *const);
static void subset(struct vars *const, struct subre *const, chr *const, chr *const);
@@ -145,7 +146,7 @@ static chr *shortest(struct vars *const, struct dfa *const, chr *const, chr *con
static chr *lastCold(struct vars *const, struct dfa *const);
static struct dfa *newDFA(struct vars *const, struct cnfa *const, struct colormap *const, struct smalldfa *);
static void freeDFA(struct dfa *const);
-static unsigned hash(unsigned *const, int);
+static unsigned hash(unsigned *const, const int);
static struct sset *initialize(struct vars *const, struct dfa *const, chr *const);
static struct sset *miss(struct vars *const, struct dfa *const, struct sset *const, const pcolor, chr *const, chr *const);
static int checkLAConstraint(struct vars *const, struct cnfa *const, chr *const, const pcolor);
@@ -162,7 +163,7 @@ static struct sset *pickNextSS(struct vars *const, struct dfa *const, chr *const
int
exec(
regex_t *re,
- const chr *string,
+ CONST chr *string,
size_t len,
rm_detail_t *details,
size_t nmatch,
@@ -171,8 +172,8 @@ exec(
{
AllocVars(v);
int st, backref;
- int n;
- int i;
+ size_t n;
+ size_t i;
#define LOCALMAT 20
regmatch_t mat[LOCALMAT];
#define LOCALDFAS 40
@@ -235,16 +236,14 @@ exec(
v->stop = (chr *)string + len;
v->err = 0;
assert(v->g->ntree >= 0);
- n = v->g->ntree;
- if (n <= LOCALDFAS) {
+ n = (size_t) v->g->ntree;
+ if (n <= LOCALDFAS)
v->subdfas = subdfas;
- } else {
+ else
v->subdfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
- }
if (v->subdfas == NULL) {
- if (v->pmatch != pmatch && v->pmatch != mat) {
+ if (v->pmatch != pmatch && v->pmatch != mat)
FREE(v->pmatch);
- }
FreeVars(v);
return REG_ESPACE;
}
@@ -269,7 +268,7 @@ exec(
if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0) {
zapallsubs(pmatch, nmatch);
n = (nmatch < v->nmatch) ? nmatch : v->nmatch;
- memcpy((void*)(pmatch), (void*)(v->pmatch), n*sizeof(regmatch_t));
+ memcpy(VS(pmatch), VS(v->pmatch), n*sizeof(regmatch_t));
}
/*
@@ -279,15 +278,13 @@ exec(
if (v->pmatch != pmatch && v->pmatch != mat) {
FREE(v->pmatch);
}
- n = v->g->ntree;
+ n = (size_t) v->g->ntree;
for (i = 0; i < n; i++) {
- if (v->subdfas[i] != NULL) {
+ if (v->subdfas[i] != NULL)
freeDFA(v->subdfas[i]);
- }
}
- if (v->subdfas != subdfas) {
+ if (v->subdfas != subdfas)
FREE(v->subdfas);
- }
FreeVars(v);
return st;
}
@@ -302,10 +299,9 @@ getsubdfa(struct vars * v,
struct subre * t)
{
if (v->subdfas[t->id] == NULL) {
- v->subdfas[t->id] = newDFA(v, &t->cnfa, &v->g->cmap, NULL);
- if (ISERR()) {
+ v->subdfas[t->id] = newDFA(v, &t->cnfa, &v->g->cmap, DOMALLOC);
+ if (ISERR())
return NULL;
- }
}
return v->subdfas[t->id];
}
@@ -335,7 +331,7 @@ simpleFind(
s = newDFA(v, &v->g->search, cm, &v->dfa1);
assert(!(ISERR() && s != NULL));
NOERR();
- MDEBUG(("\nsearch at %" TCL_Z_MODIFIER "u\n", LOFF(v->start)));
+ MDEBUG(("\nsearch at %ld\n", LOFF(v->start)));
cold = NULL;
close = shortest(v, s, v->start, v->start, v->stop, &cold, NULL);
freeDFA(s);
@@ -363,12 +359,12 @@ simpleFind(
assert(cold != NULL);
open = cold;
cold = NULL;
- MDEBUG(("between %" TCL_Z_MODIFIER "u and %" TCL_Z_MODIFIER "u\n", LOFF(open), LOFF(close)));
+ MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close)));
d = newDFA(v, cnfa, cm, &v->dfa1);
assert(!(ISERR() && d != NULL));
NOERR();
for (begin = open; begin <= close; begin++) {
- MDEBUG(("\nfind trying at %" TCL_Z_MODIFIER "u\n", LOFF(begin)));
+ MDEBUG(("\nfind trying at %ld\n", LOFF(begin)));
if (shorter) {
end = shortest(v, d, begin, begin, v->stop, NULL, &hitend);
} else {
@@ -438,7 +434,7 @@ complicatedFind(
return v->err;
}
- ret = complicatedFindLoop(v, d, s, &cold);
+ ret = complicatedFindLoop(v, cnfa, cm, d, s, &cold);
freeDFA(d);
freeDFA(s);
@@ -457,12 +453,14 @@ complicatedFind(
/*
- complicatedFindLoop - the heart of complicatedFind
- ^ static int complicatedFindLoop(struct vars *,
+ ^ static int complicatedFindLoop(struct vars *, struct cnfa *, struct colormap *,
^ struct dfa *, struct dfa *, chr **);
*/
static int
complicatedFindLoop(
struct vars *const v,
+ struct cnfa *const cnfa,
+ struct colormap *const cm,
struct dfa *const d,
struct dfa *const s,
chr **const coldp) /* where to put coldstart pointer */
@@ -479,7 +477,7 @@ complicatedFindLoop(
cold = NULL;
close = v->start;
do {
- MDEBUG(("\ncsearch at %" TCL_Z_MODIFIER "u\n", LOFF(close)));
+ MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
close = shortest(v, s, close, close, v->stop, &cold, NULL);
if (close == NULL) {
break; /* NOTE BREAK */
@@ -487,9 +485,9 @@ complicatedFindLoop(
assert(cold != NULL);
open = cold;
cold = NULL;
- MDEBUG(("cbetween %" TCL_Z_MODIFIER "u and %" TCL_Z_MODIFIER "u\n", LOFF(open), LOFF(close)));
+ MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close)));
for (begin = open; begin <= close; begin++) {
- MDEBUG(("\ncomplicatedFind trying at %" TCL_Z_MODIFIER "u\n", LOFF(begin)));
+ MDEBUG(("\ncomplicatedFind trying at %ld\n", LOFF(begin)));
estart = begin;
estop = v->stop;
for (;;) {
@@ -505,7 +503,7 @@ complicatedFindLoop(
break; /* NOTE BREAK OUT */
}
- MDEBUG(("tentative end %" TCL_Z_MODIFIER "u\n", LOFF(end)));
+ MDEBUG(("tentative end %ld\n", LOFF(end)));
zapallsubs(v->pmatch, v->nmatch);
er = cdissect(v, v->g->tree, begin, end);
if (er == REG_OKAY) {
@@ -632,7 +630,7 @@ cdissect(
int er;
assert(t != NULL);
- MDEBUG(("cdissect %" TCL_Z_MODIFIER "u-%" TCL_Z_MODIFIER "u %c\n", LOFF(begin), LOFF(end), t->op));
+ MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op));
switch (t->op) {
case '=': /* terminal node */
@@ -645,11 +643,10 @@ cdissect(
break;
case '.': /* concatenation */
assert(t->left != NULL && t->right != NULL);
- if (t->left->flags & SHORTER) {/* reverse scan */
+ if (t->left->flags & SHORTER) /* reverse scan */
er = crevcondissect(v, t, begin, end);
- } else {
+ else
er = ccondissect(v, t, begin, end);
- }
break;
case '|': /* alternation */
assert(t->left != NULL);
@@ -657,11 +654,10 @@ cdissect(
break;
case '*': /* iteration */
assert(t->left != NULL);
- if (t->left->flags & SHORTER) {/* reverse scan */
+ if (t->left->flags & SHORTER) /* reverse scan */
er = creviterdissect(v, t, begin, end);
- } else {
+ else
er = citerdissect(v, t, begin, end);
- }
break;
case '(': /* capturing */
assert(t->left != NULL && t->right == NULL);
@@ -719,7 +715,7 @@ ccondissect(
if (mid == NULL) {
return REG_NOMATCH;
}
- MDEBUG(("tentative midpoint %" TCL_Z_MODIFIER "u\n", LOFF(mid)));
+ MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
/*
* Iterate until satisfaction or failure.
@@ -770,7 +766,7 @@ ccondissect(
MDEBUG(("%d failed midpoint\n", t->id));
return REG_NOMATCH;
}
- MDEBUG(("%d: new midpoint %" TCL_Z_MODIFIER "u\n", t->id, LOFF(mid)));
+ MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
zaptreesubs(v, t->left);
zaptreesubs(v, t->right);
}
@@ -810,7 +806,7 @@ crevcondissect(
if (mid == NULL) {
return REG_NOMATCH;
}
- MDEBUG(("tentative midpoint %" TCL_Z_MODIFIER "u\n", LOFF(mid)));
+ MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
/*
* Iterate until satisfaction or failure.
@@ -861,7 +857,7 @@ crevcondissect(
MDEBUG(("%d failed midpoint\n", t->id));
return REG_NOMATCH;
}
- MDEBUG(("%d: new midpoint %" TCL_Z_MODIFIER "u\n", t->id, LOFF(mid)));
+ MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
zaptreesubs(v, t->left);
zaptreesubs(v, t->right);
}
@@ -893,7 +889,7 @@ cbrdissect(
MDEBUG(("cbackref n%d %d{%d-%d}\n", t->id, n, min, max));
/* get the backreferenced string */
- if (v->pmatch[n].rm_so == TCL_INDEX_NONE) {
+ if (v->pmatch[n].rm_so == -1) {
return REG_NOMATCH;
}
brstring = v->start + v->pmatch[n].rm_so;
@@ -927,20 +923,17 @@ cbrdissect(
assert(end > begin);
tlen = end - begin;
- if (tlen % brlen != 0) {
+ if (tlen % brlen != 0)
return REG_NOMATCH;
- }
numreps = tlen / brlen;
- if (numreps < (size_t)min || (numreps > (size_t)max && max != DUPINF)) {
+ if (numreps < (size_t)min || (numreps > (size_t)max && max != DUPINF))
return REG_NOMATCH;
- }
/* okay, compare the actual string contents */
p = begin;
while (numreps-- > 0) {
- if ((*v->g->compare) (brstring, p, brlen) != 0) {
+ if ((*v->g->compare) (brstring, p, brlen) != 0)
return REG_NOMATCH;
- }
p += brlen;
}
@@ -1017,9 +1010,8 @@ citerdissect(struct vars * v,
*/
min_matches = t->min;
if (min_matches <= 0) {
- if (begin == end) {
+ if (begin == end)
return REG_OKAY;
- }
min_matches = 1;
}
@@ -1033,9 +1025,8 @@ citerdissect(struct vars * v,
* sub-match endpoints in endpts[1..max_matches].
*/
max_matches = end - begin;
- if (max_matches > (size_t)t->max && t->max != DUPINF) {
+ if (max_matches > (size_t)t->max && t->max != DUPINF)
max_matches = t->max;
- }
if (max_matches < (size_t)min_matches)
max_matches = min_matches;
endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));
@@ -1074,13 +1065,12 @@ citerdissect(struct vars * v,
k--;
goto backtrack;
}
- MDEBUG(("%d: working endpoint %d: %" TCL_Z_MODIFIER "u\n",
+ MDEBUG(("%d: working endpoint %d: %ld\n",
t->id, k, LOFF(endpts[k])));
/* k'th sub-match can no longer be considered verified */
- if (nverified >= k) {
+ if (nverified >= k)
nverified = k - 1;
- }
if (endpts[k] != end) {
/* haven't reached end yet, try another iteration if allowed */
@@ -1106,9 +1096,8 @@ citerdissect(struct vars * v,
* number of matches, start the slow part: recurse to verify each
* sub-match. We always have k <= max_matches, needn't check that.
*/
- if (k < min_matches) {
+ if (k < min_matches)
goto backtrack;
- }
MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
@@ -1119,9 +1108,8 @@ citerdissect(struct vars * v,
nverified = i;
continue;
}
- if (er == REG_NOMATCH) {
+ if (er == REG_NOMATCH)
break;
- }
/* oops, something failed */
FREE(endpts);
return er;
@@ -1195,9 +1183,8 @@ creviterdissect(struct vars * v,
*/
min_matches = t->min;
if (min_matches <= 0) {
- if (begin == end) {
+ if (begin == end)
return REG_OKAY;
- }
min_matches = 1;
}
@@ -1251,9 +1238,8 @@ creviterdissect(struct vars * v,
limit++;
/* if this is the last allowed sub-match, it must reach to the end */
- if ((size_t)k >= max_matches) {
+ if ((size_t)k >= max_matches)
limit = end;
- }
/* try to find an endpoint for the k'th sub-match */
endpts[k] = shortest(v, d, endpts[k - 1], limit, end,
@@ -1263,13 +1249,12 @@ creviterdissect(struct vars * v,
k--;
goto backtrack;
}
- MDEBUG(("%d: working endpoint %d: %" TCL_Z_MODIFIER "u\n",
+ MDEBUG(("%d: working endpoint %d: %ld\n",
t->id, k, LOFF(endpts[k])));
/* k'th sub-match can no longer be considered verified */
- if (nverified >= k) {
+ if (nverified >= k)
nverified = k - 1;
- }
if (endpts[k] != end) {
/* haven't reached end yet, try another iteration if allowed */
@@ -1290,9 +1275,8 @@ creviterdissect(struct vars * v,
* number of matches, start the slow part: recurse to verify each
* sub-match. We always have k <= max_matches, needn't check that.
*/
- if (k < min_matches) {
+ if (k < min_matches)
goto backtrack;
- }
MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
@@ -1303,9 +1287,8 @@ creviterdissect(struct vars * v,
nverified = i;
continue;
}
- if (er == REG_NOMATCH) {
+ if (er == REG_NOMATCH)
break;
- }
/* oops, something failed */
FREE(endpts);
return er;