summaryrefslogtreecommitdiffstats
path: root/generic
diff options
context:
space:
mode:
Diffstat (limited to 'generic')
-rw-r--r--generic/regc_nfa.c21
-rw-r--r--generic/regcomp.c80
-rw-r--r--generic/regcustom.h13
-rw-r--r--generic/rege_dfa.c180
-rw-r--r--generic/regerrs.h2
-rw-r--r--generic/regex.h12
-rw-r--r--generic/regexec.c229
-rw-r--r--generic/regguts.h1
-rw-r--r--generic/tclRegexp.c14
9 files changed, 362 insertions, 190 deletions
diff --git a/generic/regc_nfa.c b/generic/regc_nfa.c
index 8143181..80e9ac7 100644
--- a/generic/regc_nfa.c
+++ b/generic/regc_nfa.c
@@ -1192,7 +1192,8 @@ struct cnfa *cnfa;
narcs = 0;
for (s = nfa->states; s != NULL; s = s->next) {
nstates++;
- narcs += s->nouts + 1;
+ narcs += 1 + s->nouts + 1;
+ /* 1 as a fake for flags, nouts for arcs, 1 as endmarker */
}
cnfa->states = (struct carc **)MALLOC(nstates * sizeof(struct carc *));
@@ -1213,12 +1214,14 @@ struct cnfa *cnfa;
cnfa->eos[0] = nfa->eos[0];
cnfa->eos[1] = nfa->eos[1];
cnfa->ncolors = maxcolor(nfa->cm) + 1;
- cnfa->flags = LEFTANCH; /* tentatively */
+ cnfa->flags = 0;
ca = cnfa->arcs;
for (s = nfa->states; s != NULL; s = s->next) {
assert((size_t)s->no < nstates);
cnfa->states[s->no] = ca;
+ ca->co = 0; /* clear and skip flags "arc" */
+ ca++;
first = ca;
for (a = s->outs; a != NULL; a = a->outchain)
switch (a->type) {
@@ -1246,10 +1249,10 @@ struct cnfa *cnfa;
assert(ca == &cnfa->arcs[narcs]);
assert(cnfa->nstates != 0);
+ /* mark no-progress states */
for (a = nfa->pre->outs; a != NULL; a = a->outchain)
- if (a->type == PLAIN && a->co != nfa->bos[0] &&
- a->co != nfa->bos[1])
- cnfa->flags &= ~LEFTANCH;
+ cnfa->states[a->to->no]->co = 1;
+ cnfa->states[nfa->pre->no]->co = 1;
}
/*
@@ -1480,8 +1483,6 @@ FILE *f;
fprintf(f, ", eol [%ld]", (long)cnfa->eos[1]);
if (cnfa->flags&HASLACONS)
fprintf(f, ", haslacons");
- if (cnfa->flags&LEFTANCH)
- fprintf(f, ", leftanch");
fprintf(f, "\n");
for (st = 0; st < cnfa->nstates; st++)
dumpcstate(st, cnfa->states[st], cnfa, f);
@@ -1505,9 +1506,9 @@ FILE *f;
int i;
int pos;
- fprintf(f, "%d.", st);
+ fprintf(f, "%d%s", st, (ca[0].co) ? ":" : ".");
pos = 1;
- for (i = 0; ca[i].co != COLORLESS; i++) {
+ for (i = 1; ca[i].co != COLORLESS; i++) {
if (ca[i].co < cnfa->ncolors)
fprintf(f, "\t[%ld]->%d", (long)ca[i].co, ca[i].to);
else
@@ -1519,7 +1520,7 @@ FILE *f;
} else
pos++;
}
- if (i == 0 || pos != 1)
+ if (i == 1 || pos != 1)
fprintf(f, "\n");
fflush(f);
}
diff --git a/generic/regcomp.c b/generic/regcomp.c
index 620dc1c..7a4ae34 100644
--- a/generic/regcomp.c
+++ b/generic/regcomp.c
@@ -14,6 +14,7 @@
int compile _ANSI_ARGS_((regex_t *, CONST chr *, size_t, int));
static VOID moresubs _ANSI_ARGS_((struct vars *, int));
static int freev _ANSI_ARGS_((struct vars *, int));
+static VOID makescan _ANSI_ARGS_((struct vars *, struct nfa *));
static struct subre *parse _ANSI_ARGS_((struct vars *, int, int, struct state *, struct state *));
static struct subre *parsebranch _ANSI_ARGS_((struct vars *, int, int, struct state *, struct state *, int));
static VOID parseqatom _ANSI_ARGS_((struct vars *, int, int, struct state *, struct state *, struct subre *));
@@ -369,13 +370,10 @@ int flags;
for (i = 1; i < v->nlacons; i++)
nfanode(v, &v->lacons[i], debug);
CNOERR();
- rainbow(v->nfa, v->cm, PLAIN, COLORLESS, v->nfa->pre, v->nfa->pre);
- newarc(v->nfa, PLAIN, v->nfa->bos[0], v->nfa->pre, v->nfa->pre);
- newarc(v->nfa, PLAIN, v->nfa->bos[1], v->nfa->pre, v->nfa->pre);
- newarc(v->nfa, PLAIN, v->nfa->eos[0], v->nfa->pre, v->nfa->pre);
- newarc(v->nfa, PLAIN, v->nfa->eos[1], v->nfa->pre, v->nfa->pre);
(DISCARD)optimize(v->nfa, debug);
CNOERR();
+ makescan(v, v->nfa);
+ CNOERR();
compact(v->nfa, &g->search);
CNOERR();
@@ -470,6 +468,78 @@ int err;
}
/*
+ - makescan - turn an NFA into a fast-scan NFA (implicit prepend of .*?)
+ * NFA must have been optimize()d already.
+ ^ static VOID makescan(struct vars *, struct nfa *);
+ */
+static VOID
+makescan(v, nfa)
+struct vars *v;
+struct nfa *nfa;
+{
+ struct arc *a;
+ struct arc *b;
+ struct state *pre = nfa->pre;
+ struct state *s;
+ struct state *s2;
+ struct state *slist;
+
+ /* no changes are needed if it's anchored */
+ for (a = pre->outs; a != NULL; a = a->outchain) {
+ assert(a->type == PLAIN);
+ if (a->co != nfa->bos[0] && a->co != nfa->bos[1])
+ break;
+ }
+ if (a == NULL)
+ return;
+
+ /* add implicit .* in front */
+ rainbow(nfa, v->cm, PLAIN, COLORLESS, pre, pre);
+
+ /* and ^* and \Z* too -- not always necessary, but harmless */
+ newarc(nfa, PLAIN, nfa->bos[0], pre, pre);
+ newarc(nfa, PLAIN, nfa->bos[1], pre, pre);
+
+ /*
+ * Now here's the subtle part. Because many REs have no lookback
+ * constraints, often knowing when you were in the pre state tells
+ * you little; it's the next state(s) that are informative. But
+ * some of them may have other inarcs, i.e. it may be possible to
+ * make actual progress and then return to one of them. We must
+ * de-optimize such cases, splitting each such state into progress
+ * and no-progress states.
+ */
+
+ /* first, make a list of the states */
+ slist = NULL;
+ for (a = pre->outs; a != NULL; a = a->outchain) {
+ s = a->to;
+ for (b = s->ins; b != NULL; b = b->inchain)
+ if (b->from != pre)
+ break;
+ if (b != NULL) { /* must be split */
+ s->tmp = slist;
+ slist = s;
+ }
+ }
+
+ /* do the splits */
+ for (s = slist; s != NULL; s = s2) {
+ s2 = newstate(nfa);
+ copyouts(nfa, s, s2);
+ for (a = s->ins; a != NULL; a = b) {
+ b = a->inchain;
+ if (a->from != pre) {
+ cparc(nfa, a, a->from, s2);
+ freearc(nfa, a);
+ }
+ }
+ s2 = s->tmp;
+ s->tmp = NULL; /* clean up while we're at it */
+ }
+}
+
+/*
- parse - parse an RE
* This is actually just the top level, which parses a bunch of branches
* tied together with '|'. They appear in the tree as the left children
diff --git a/generic/regcustom.h b/generic/regcustom.h
index eeaafa0..9ba4a2a 100644
--- a/generic/regcustom.h
+++ b/generic/regcustom.h
@@ -1,6 +1,5 @@
/* headers (which also pick up the standard ones, or equivalents) */
#include "tclInt.h"
-#include "tclPort.h"
/* overrides for regguts.h definitions */
/* function-pointer declarations */
@@ -41,16 +40,16 @@
#define __REG_VOID_T VOID
#define __REG_CONST CONST
/* names and declarations */
-#define __REG_WIDE_COMPILE re_ucomp
-#define __REG_WIDE_EXEC re_uexec
+#define __REG_WIDE_COMPILE TclReComp
+#define __REG_WIDE_EXEC TclReExec
#ifndef __REG_NOFRONT
#define __REG_NOFRONT /* don't want regcomp() and regexec() */
#endif
#ifndef __REG_NOCHAR
#define __REG_NOCHAR /* or the char versions */
#endif
-#define regfree re_ufree
-#define regerror re_uerror
+#define regfree TclReFree
+#define regerror TclReError
/* --- end --- */
@@ -74,8 +73,8 @@ typedef int celt; /* type to hold chr, MCCE number, or NOCELT */
#define iscspace(x) TclUniCharIsSpace(x)
/* name the external functions */
-#define compile re_ucomp
-#define exec re_uexec
+#define compile TclReComp
+#define exec TclReExec
/* enable/disable debugging code (by whether REG_DEBUG is defined or not) */
#ifdef notdef
diff --git a/generic/rege_dfa.c b/generic/rege_dfa.c
index 6e4b941..eb31ffc6 100644
--- a/generic/rege_dfa.c
+++ b/generic/rege_dfa.c
@@ -115,13 +115,13 @@ chr **coldp; /* store coldstart pointer here, if nonNULL */
chr *realmax = (max == v->stop) ? max : max + 1;
color co;
struct sset *css;
- struct sset *firstss;
struct sset *ss;
struct colormap *cm = d->cm;
+ chr *nopr;
+ int i;
/* initialize */
css = initialize(v, d, start);
- firstss = css;
cp = start;
/* startup */
@@ -175,72 +175,93 @@ chr **coldp; /* store coldstart pointer here, if nonNULL */
if (ss == NULL)
return NULL;
- if (ss->flags&POSTSTATE) {
- assert(firstss->flags&STARTER);
- assert(firstss->lastseen != NULL);
- if (coldp != NULL)
- *coldp = firstss->lastseen;
+ else if (ss->flags&POSTSTATE) {
assert(cp >= realmin);
- return cp - 1;
- }
-
- /* shutdown */
- FDEBUG(("--- shutdown at c%d ---\n", css - d->ssets));
- if (cp == v->stop && max == v->stop) {
+ cp--;
+ } else if (cp == v->stop && max == v->stop) {
co = d->cnfa->eos[(v->eflags&REG_NOTEOL) ? 0 : 1];
FDEBUG(("color %ld\n", (long)co));
ss = miss(v, d, css, co, cp, start);
- /* special case: match ended at eol? */
- if (ss != NULL && (ss->flags&POSTSTATE)) {
- assert(firstss->flags&STARTER);
- assert(firstss->lastseen != NULL);
- if (coldp != NULL)
- *coldp = firstss->lastseen;
- return cp;
- }
+ /* match might have ended at eol */
}
- return NULL;
+ if (ss == NULL || !(ss->flags&POSTSTATE))
+ return NULL;
+
+ /* find last no-progress state set, if any */
+ nopr = d->lastnopr;
+ for (ss = d->ssets, i = 0; i < d->nssused; ss++, i++)
+ if ((ss->flags&NOPROGRESS) && nopr != ss->lastseen &&
+ (nopr == NULL || nopr < ss->lastseen))
+ nopr = ss->lastseen;
+ assert(nopr != NULL);
+ if (coldp != NULL)
+ *coldp = (nopr == v->start) ? nopr : nopr-1;
+ return cp;
}
/*
- newdfa - set up a fresh DFA
^ static struct dfa *newdfa(struct vars *, struct cnfa *,
- ^ struct colormap *);
+ ^ struct colormap *, struct smalldfa *);
*/
static struct dfa *
-newdfa(v, cnfa, cm)
+newdfa(v, cnfa, cm, small)
struct vars *v;
struct cnfa *cnfa;
struct colormap *cm;
+struct smalldfa *small; /* preallocated space, may be NULL */
{
- struct dfa *d = (struct dfa *)MALLOC(sizeof(struct dfa));
+ struct dfa *d;
+ size_t nss = cnfa->nstates * 2;
int wordsper = (cnfa->nstates + UBITS - 1) / UBITS;
- struct sset *ss;
- int i;
+ struct smalldfa *smallwas = small;
assert(cnfa != NULL && cnfa->nstates != 0);
- if (d == NULL) {
- ERR(REG_ESPACE);
- return NULL;
- }
- d->ssets = (struct sset *)MALLOC(CACHE * sizeof(struct sset));
- d->statesarea = (unsigned *)MALLOC((CACHE+WORK) * wordsper *
+ if (nss <= FEWSTATES && cnfa->ncolors <= FEWCOLORS) {
+ assert(wordsper == 1);
+ if (small == NULL) {
+ small = (struct smalldfa *)MALLOC(
+ sizeof(struct smalldfa));
+ if (small == NULL) {
+ ERR(REG_ESPACE);
+ return NULL;
+ }
+ }
+ d = &small->dfa;
+ d->ssets = small->ssets;
+ d->statesarea = small->statesarea;
+ d->work = &d->statesarea[nss];
+ d->outsarea = small->outsarea;
+ d->incarea = small->incarea;
+ d->cptsmalloced = 0;
+ d->mallocarea = (smallwas == NULL) ? (char *)small : NULL;
+ } else {
+ d = (struct dfa *)MALLOC(sizeof(struct dfa));
+ if (d == NULL) {
+ ERR(REG_ESPACE);
+ return NULL;
+ }
+ d->ssets = (struct sset *)MALLOC(nss * sizeof(struct sset));
+ d->statesarea = (unsigned *)MALLOC((nss+WORK) * wordsper *
sizeof(unsigned));
- d->work = &d->statesarea[CACHE * wordsper];
- d->outsarea = (struct sset **)MALLOC(CACHE * cnfa->ncolors *
+ d->work = &d->statesarea[nss * wordsper];
+ d->outsarea = (struct sset **)MALLOC(nss * cnfa->ncolors *
sizeof(struct sset *));
- d->incarea = (struct arcp *)MALLOC(CACHE * cnfa->ncolors *
+ d->incarea = (struct arcp *)MALLOC(nss * cnfa->ncolors *
sizeof(struct arcp));
- if (d->ssets == NULL || d->statesarea == NULL || d->outsarea == NULL ||
- d->incarea == NULL) {
- freedfa(d);
- ERR(REG_ESPACE);
- return NULL;
+ d->cptsmalloced = 1;
+ d->mallocarea = (char *)d;
+ if (d->ssets == NULL || d->statesarea == NULL ||
+ d->outsarea == NULL || d->incarea == NULL) {
+ freedfa(d);
+ ERR(REG_ESPACE);
+ return NULL;
+ }
}
- d->nssets = (v->eflags&REG_SMALL) ? 7 : CACHE;
+ d->nssets = (v->eflags&REG_SMALL) ? 7 : nss;
d->nssused = 0;
d->nstates = cnfa->nstates;
d->ncolors = cnfa->ncolors;
@@ -248,14 +269,10 @@ struct colormap *cm;
d->cnfa = cnfa;
d->cm = cm;
d->lastpost = NULL;
+ d->lastnopr = NULL;
d->search = d->ssets;
- for (ss = d->ssets, i = 0; i < d->nssets; ss++, i++) {
- /* initialization of most fields is done as needed */
- ss->states = &d->statesarea[i * d->wordsper];
- ss->outs = &d->outsarea[i * d->ncolors];
- ss->inchain = &d->incarea[i * d->ncolors];
- }
+ /* initialization of sset fields is done as needed */
return d;
}
@@ -268,15 +285,19 @@ static VOID
freedfa(d)
struct dfa *d;
{
- if (d->ssets != NULL)
- FREE(d->ssets);
- if (d->statesarea != NULL)
- FREE(d->statesarea);
- if (d->outsarea != NULL)
- FREE(d->outsarea);
- if (d->incarea != NULL)
- FREE(d->incarea);
- FREE(d);
+ if (d->cptsmalloced) {
+ if (d->ssets != NULL)
+ FREE(d->ssets);
+ if (d->statesarea != NULL)
+ FREE(d->statesarea);
+ if (d->outsarea != NULL)
+ FREE(d->outsarea);
+ if (d->incarea != NULL)
+ FREE(d->incarea);
+ }
+
+ if (d->mallocarea != NULL)
+ FREE(d->mallocarea);
}
/*
@@ -319,9 +340,9 @@ chr *start;
for (i = 0; i < d->wordsper; i++)
ss->states[i] = 0;
BSET(ss->states, d->cnfa->pre);
- ss->hash = hash(ss->states, d->wordsper);
+ ss->hash = HASH(ss->states, d->wordsper);
assert(d->cnfa->pre != d->cnfa->post);
- ss->flags = STARTER|LOCKED;
+ ss->flags = STARTER|LOCKED|NOPROGRESS;
/* lastseen dealt with below */
}
@@ -329,6 +350,7 @@ chr *start;
d->ssets[i].lastseen = NULL;
ss->lastseen = start; /* maybe untrue, but harmless */
d->lastpost = NULL;
+ d->lastnopr = NULL;
return ss;
}
@@ -352,6 +374,7 @@ chr *start; /* where the attempt got started */
struct carc *ca;
struct sset *p;
int ispost;
+ int noprogress;
int gotstate;
int dolacons;
int didlacons;
@@ -367,15 +390,18 @@ chr *start; /* where the attempt got started */
for (i = 0; i < d->wordsper; i++)
d->work[i] = 0;
ispost = 0;
+ noprogress = 1;
gotstate = 0;
for (i = 0; i < d->nstates; i++)
if (ISBSET(css->states, i))
- for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++)
+ for (ca = cnfa->states[i]+1; ca->co != COLORLESS; ca++)
if (ca->co == co) {
BSET(d->work, ca->to);
gotstate = 1;
if (ca->to == cnfa->post)
ispost = 1;
+ if (!cnfa->states[ca->to]->co)
+ noprogress = 0;
FDEBUG(("%d -> %d\n", i, ca->to));
}
dolacons = (gotstate) ? (cnfa->flags&HASLACONS) : 0;
@@ -384,7 +410,7 @@ chr *start; /* where the attempt got started */
dolacons = 0;
for (i = 0; i < d->nstates; i++)
if (ISBSET(d->work, i))
- for (ca = cnfa->states[i]; ca->co != COLORLESS;
+ for (ca = cnfa->states[i]+1; ca->co != COLORLESS;
ca++)
if (ca->co > cnfa->ncolors &&
!ISBSET(d->work, ca->to) &&
@@ -395,17 +421,23 @@ chr *start; /* where the attempt got started */
didlacons = 1;
if (ca->to == cnfa->post)
ispost = 1;
+ if (!cnfa->states[ca->to]->co)
+ noprogress = 0;
FDEBUG(("%d :> %d\n",i,ca->to));
}
}
if (!gotstate)
return NULL;
- h = hash(d->work, d->wordsper);
+ h = HASH(d->work, d->wordsper);
/* next, is that in the cache? */
for (p = d->ssets, i = d->nssused; i > 0; p++, i--)
- if (p->hash == h && memcmp(VS(d->work), VS(p->states),
+ if (HIT(h, d->work, p, d->wordsper)) {
+#ifndef xxx
+p->hash == h &&
+memcmp(VS(d->work), VS(p->states),
d->wordsper*sizeof(unsigned)) == 0) {
+#endif
FDEBUG(("cached c%d\n", p - d->ssets));
break; /* NOTE BREAK OUT */
}
@@ -416,6 +448,8 @@ chr *start; /* where the attempt got started */
p->states[i] = d->work[i];
p->hash = h;
p->flags = (ispost) ? POSTSTATE : 0;
+ if (noprogress)
+ p->flags |= NOPROGRESS;
/* lastseen to be dealt with by caller */
}
@@ -442,13 +476,14 @@ pcolor co; /* "color" of the lookahead constraint */
int n;
struct subre *sub;
struct dfa *d;
+ struct smalldfa sd;
chr *end;
n = co - pcnfa->ncolors;
assert(n < v->g->nlacons && v->g->lacons != NULL);
FDEBUG(("=== testing lacon %d\n", n));
sub = &v->g->lacons[n];
- d = newdfa(v, &sub->cnfa, &v->g->cmap);
+ d = newdfa(v, &sub->cnfa, &v->g->cmap, &sd);
if (d == NULL) {
ERR(REG_ESPACE);
return 0;
@@ -520,6 +555,11 @@ chr *start;
(d->lastpost == NULL || d->lastpost < ss->lastseen))
d->lastpost = ss->lastseen;
+ /* likewise for a no-progress state */
+ if ((ss->flags&NOPROGRESS) && ss->lastseen != d->lastnopr &&
+ (d->lastnopr == NULL || d->lastnopr < ss->lastseen))
+ d->lastnopr = ss->lastseen;
+
return ss;
}
@@ -541,17 +581,21 @@ chr *start;
/* shortcut for cases where cache isn't full */
if (d->nssused < d->nssets) {
- ss = &d->ssets[d->nssused];
+ i = d->nssused;
d->nssused++;
- FDEBUG(("new c%d\n", ss - d->ssets));
- /* must make innards consistent */
+ ss = &d->ssets[i];
+ FDEBUG(("new c%d\n", i));
+ /* set up innards */
+ ss->states = &d->statesarea[i * d->wordsper];
+ ss->flags = 0;
ss->ins.ss = NULL;
ss->ins.co = WHITE; /* give it some value */
+ ss->outs = &d->outsarea[i * d->ncolors];
+ ss->inchain = &d->incarea[i * d->ncolors];
for (i = 0; i < d->ncolors; i++) {
ss->outs[i] = NULL;
ss->inchain[i].ss = NULL;
}
- ss->flags = 0;
return ss;
}
diff --git a/generic/regerrs.h b/generic/regerrs.h
index af2805c..1b6552c 100644
--- a/generic/regerrs.h
+++ b/generic/regerrs.h
@@ -1,6 +1,6 @@
{ REG_OKAY, "REG_OKAY", "no errors detected" },
{ REG_NOMATCH, "REG_NOMATCH", "failed to match" },
-{ REG_BADPAT, "REG_BADPAT", "invalid regexp (reg version 0.1)" },
+{ REG_BADPAT, "REG_BADPAT", "invalid regexp (reg version 0.2)" },
{ REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element" },
{ REG_ECTYPE, "REG_ECTYPE", "invalid character class" },
{ REG_EESCAPE, "REG_EESCAPE", "invalid escape \\ sequence" },
diff --git a/generic/regex.h b/generic/regex.h
index 7ca6ad0..2f3ebfa 100644
--- a/generic/regex.h
+++ b/generic/regex.h
@@ -79,16 +79,16 @@ extern "C" {
#define __REG_VOID_T VOID
#define __REG_CONST CONST
/* names and declarations */
-#define __REG_WIDE_COMPILE re_ucomp
-#define __REG_WIDE_EXEC re_uexec
+#define __REG_WIDE_COMPILE TclReComp
+#define __REG_WIDE_EXEC TclReExec
#ifndef __REG_NOFRONT
#define __REG_NOFRONT /* don't want regcomp() and regexec() */
#endif
#ifndef __REG_NOCHAR
#define __REG_NOCHAR /* or the char versions */
#endif
-#define regfree re_ufree
-#define regerror re_uerror
+#define regfree TclReFree
+#define regerror TclReError
/* --- end --- */
@@ -289,8 +289,8 @@ int regexec _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, regmatch_t [], i
#ifdef __REG_WIDE_T
int __REG_WIDE_EXEC _ANSI_ARGS_((regex_t *, __REG_CONST __REG_WIDE_T *, size_t, rm_detail_t *, size_t, regmatch_t [], int));
#endif
-re_void re_ufree _ANSI_ARGS_((regex_t *));
-extern size_t re_uerror _ANSI_ARGS_((int, __REG_CONST regex_t *, char *, size_t));
+re_void regfree _ANSI_ARGS_((regex_t *));
+extern size_t regerror _ANSI_ARGS_((int, __REG_CONST regex_t *, char *, size_t));
/* automatically gathered by fwd; do not hand-edit */
/* =====^!^===== end forwards =====^!^===== */
diff --git a/generic/regexec.c b/generic/regexec.c
index 1671866..50fcc70 100644
--- a/generic/regexec.c
+++ b/generic/regexec.c
@@ -24,6 +24,7 @@ struct vars {
#define ERR(e) VERR(v, e) /* record an error */
#define NOERR() {if (ISERR()) return;} /* if error seen, return */
#define OFF(p) ((p) - v->start)
+#define LOFF(p) ((long)OFF(p))
@@ -35,11 +36,15 @@ struct arcp { /* "pointer" to an outarc */
struct sset { /* state set */
unsigned *states; /* pointer to bitvector */
- unsigned hash; /* xor of bitvector */
+ unsigned hash; /* hash of bitvector */
+# define HASH(bv, nw) (((nw) == 1) ? *(bv) : hash(bv, nw))
+# define HIT(h,bv,ss,nw) ((ss)->hash == (h) && ((nw) == 1 || \
+ memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0))
int flags;
# define STARTER 01 /* the initial state set */
# define POSTSTATE 02 /* includes the goal state */
# define LOCKED 04 /* locked in cache */
+# define NOPROGRESS 010 /* zero-progress state set */
struct arcp ins; /* chain of inarcs pointing here */
chr *lastseen; /* last entered on arrival here */
struct sset **outs; /* outarc vector indexed by color */
@@ -60,12 +65,26 @@ struct dfa {
struct cnfa *cnfa;
struct colormap *cm;
chr *lastpost; /* location of last cache-flushed success */
+ chr *lastnopr; /* location of last cache-flushed NOPROGRESS */
struct sset *search; /* replacement-search-pointer memory */
+ int cptsmalloced; /* were the areas individually malloced? */
+ char *mallocarea; /* self, or master malloced area, or NULL */
};
-#define CACHE 200
#define WORK 1 /* number of work bitvectors needed */
+/* setup for non-malloc allocation for small cases */
+#define FEWSTATES 20 /* must be less than UBITS */
+#define FEWCOLORS 15
+struct smalldfa {
+ struct dfa dfa;
+ struct sset ssets[FEWSTATES*2];
+ unsigned statesarea[FEWSTATES*2 + WORK];
+ struct sset *outsarea[FEWSTATES*2 * FEWCOLORS];
+ struct arcp incarea[FEWSTATES*2 * FEWCOLORS];
+};
+
+
/*
@@ -91,7 +110,7 @@ static int caltdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)
/* === rege_dfa.c === */
static chr *longest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *));
static chr *shortest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *, chr *, chr **));
-static struct dfa *newdfa _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *));
+static struct dfa *newdfa _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *, struct smalldfa *));
static VOID freedfa _ANSI_ARGS_((struct dfa *));
static unsigned hash _ANSI_ARGS_((unsigned *, int));
static struct sset *initialize _ANSI_ARGS_((struct vars *, struct dfa *, chr *));
@@ -124,6 +143,10 @@ int flags;
int st;
size_t n;
int complications;
+# define LOCALMAT 20
+ regmatch_t mat[LOCALMAT];
+# define LOCALMEM 40
+ regoff_t mem[LOCALMEM];
/* sanity checks */
if (re == NULL || string == NULL || re->re_magic != REMAGIC)
@@ -145,7 +168,10 @@ int flags;
v->nmatch = nmatch;
if (complications && v->nmatch < v->g->nsub + 1) {
/* need work area bigger than what user gave us */
- v->pmatch = (regmatch_t *)MALLOC((v->g->nsub + 1) *
+ if (v->g->nsub + 1 <= LOCALMAT)
+ v->pmatch = mat;
+ else
+ v->pmatch = (regmatch_t *)MALLOC((v->g->nsub + 1) *
sizeof(regmatch_t));
if (v->pmatch == NULL)
return REG_ESPACE;
@@ -156,9 +182,14 @@ int flags;
v->stop = (chr *)string + len;
v->err = 0;
if (complications) {
- v->mem = (regoff_t *)MALLOC(v->g->ntree*sizeof(regoff_t));
+ assert(v->g->ntree >= 0);
+ n = (size_t)v->g->ntree;
+ if (n <= LOCALMEM)
+ v->mem = mem;
+ else
+ v->mem = (regoff_t *)MALLOC(n*sizeof(regoff_t));
if (v->mem == NULL) {
- if (v->pmatch != pmatch)
+ if (v->pmatch != pmatch && v->pmatch != mat)
FREE(v->pmatch);
return REG_ESPACE;
}
@@ -171,14 +202,18 @@ int flags;
st = cfind(v, &v->g->tree->cnfa, &v->g->cmap);
else
st = find(v, &v->g->tree->cnfa, &v->g->cmap);
+
+ /* copy (portion of) match vector over if necessary */
if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0) {
zapsubs(pmatch, nmatch);
n = (nmatch < v->nmatch) ? nmatch : v->nmatch;
memcpy(VS(pmatch), VS(v->pmatch), n*sizeof(regmatch_t));
}
- if (v->pmatch != pmatch)
+
+ /* clean up */
+ if (v->pmatch != pmatch && v->pmatch != mat)
FREE(v->pmatch);
- if (v->mem != NULL)
+ if (v->mem != NULL && v->mem != mem)
FREE(v->mem);
return st;
}
@@ -193,15 +228,14 @@ struct vars *v;
struct cnfa *cnfa;
struct colormap *cm;
{
- struct dfa *d = newdfa(v, cnfa, cm);
- struct dfa *s = newdfa(v, &v->g->search, cm);
+ struct smalldfa da;
+ struct dfa *d = newdfa(v, cnfa, cm, &da);
+ struct smalldfa sa;
+ struct dfa *s = newdfa(v, &v->g->search, cm, &sa);
chr *begin;
chr *end;
-#ifdef notdef
chr *open; /* open and close of range of possible starts */
chr *close;
-#endif
- chr *stop = (cnfa->flags&LEFTANCH) ? v->start : v->stop;
if (d == NULL)
return v->err;
@@ -210,17 +244,15 @@ struct colormap *cm;
return v->err;
}
-#ifdef notdef
close = v->start;
- while (close < stop) {
- MDEBUG(("\nsearch at %ld\n", (long)OFF(close)));
+ do {
+ MDEBUG(("\nsearch at %ld\n", LOFF(close)));
close = shortest(v, s, close, close, v->stop, &open);
if (close == NULL)
break; /* NOTE BREAK */
- MDEBUG(("between %ld and %ld\n", (long)OFF(open),
- (long)OFF(close)));
-#endif
- for (begin = v->start; begin <= stop; begin++) {
+ MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close)));
+ for (begin = open; begin <= close; begin++) {
+ MDEBUG(("\nfind trying at %ld\n", LOFF(begin)));
end = longest(v, d, begin, v->stop);
if (end != NULL) {
if (v->nmatch > 0) {
@@ -239,9 +271,7 @@ struct colormap *cm;
return REG_OKAY;
}
}
-#ifdef notdef
- }
-#endif
+ } while (close < v->stop);
freedfa(d);
freedfa(s);
@@ -260,10 +290,14 @@ struct vars *v;
struct cnfa *cnfa;
struct colormap *cm;
{
- struct dfa *d = newdfa(v, cnfa, cm);
+ struct smalldfa da;
+ struct dfa *d = newdfa(v, cnfa, cm, &da);
+ struct smalldfa sa;
+ struct dfa *s = newdfa(v, &v->g->search, cm, &sa);
chr *begin;
chr *end;
- chr *stop = (cnfa->flags&LEFTANCH) ? v->start : v->stop;
+ chr *open; /* open and close of range of possible starts */
+ chr *close;
chr *estart;
chr *estop;
int er;
@@ -271,55 +305,72 @@ struct colormap *cm;
if (d == NULL)
return v->err;
+ if (s == NULL) {
+ freedfa(d);
+ return v->err;
+ }
- for (begin = v->start; begin <= stop; begin++) {
- MDEBUG(("\ncfind trying at %ld\n", (long)OFF(begin)));
- estart = begin;
- estop = v->stop;
- for (;;) {
- if (shorter)
- end = shortest(v, d, begin, estart, estop,
- (chr **)NULL);
- else
- end = longest(v, d, begin, estop);
- if (end == NULL)
- break; /* NOTE BREAK OUT */
- MDEBUG(("tentative end %ld\n", (long)OFF(end)));
- zapsubs(v->pmatch, v->nmatch);
- zapmem(v, v->g->tree);
- er = cdissect(v, v->g->tree, begin, end);
- switch (er) {
- case REG_OKAY:
- if (v->nmatch > 0) {
- v->pmatch[0].rm_so = OFF(begin);
- v->pmatch[0].rm_eo = OFF(end);
- }
- freedfa(d);
- if (ISERR())
- return v->err;
- return REG_OKAY;
- break;
- case REG_NOMATCH:
- /* go around and try again */
- if ((shorter) ? end == estop : end == begin) {
- /* no point in trying again */
- freedfa(d);
- return REG_NOMATCH;
- }
+ close = v->start;
+ do {
+ MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
+ close = shortest(v, s, close, close, v->stop, &open);
+ if (close == NULL)
+ break; /* NOTE BREAK */
+ MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close)));
+ for (begin = open; begin <= close; begin++) {
+ MDEBUG(("\ncfind trying at %ld\n", LOFF(begin)));
+ estart = begin;
+ estop = v->stop;
+ for (;;) {
if (shorter)
- estart = end + 1;
+ end = shortest(v, d, begin, estart,
+ estop, (chr **)NULL);
else
- estop = end - 1;
- break;
- default:
- freedfa(d);
- return er;
- break;
+ end = longest(v, d, begin, estop);
+ if (end == NULL)
+ break; /* NOTE BREAK OUT */
+ MDEBUG(("tentative end %ld\n", LOFF(end)));
+ zapsubs(v->pmatch, v->nmatch);
+ zapmem(v, v->g->tree);
+ er = cdissect(v, v->g->tree, begin, end);
+ switch (er) {
+ case REG_OKAY:
+ if (v->nmatch > 0) {
+ v->pmatch[0].rm_so = OFF(begin);
+ v->pmatch[0].rm_eo = OFF(end);
+ }
+ freedfa(d);
+ freedfa(s);
+ if (ISERR())
+ return v->err;
+ return REG_OKAY;
+ break;
+ case REG_NOMATCH:
+ /* go around and try again */
+ if ((shorter) ? end == estop :
+ end == begin) {
+ /* no point in trying again */
+ freedfa(s);
+ freedfa(d);
+ return REG_NOMATCH;
+ }
+ if (shorter)
+ estart = end + 1;
+ else
+ estop = end - 1;
+ break;
+ default:
+ freedfa(d);
+ freedfa(s);
+ return er;
+ break;
+ }
}
}
- }
+ } while (close < v->stop);
freedfa(d);
+ freedfa(s);
if (ISERR())
return v->err;
return REG_NOMATCH;
@@ -402,7 +453,7 @@ chr *begin; /* beginning of relevant substring */
chr *end; /* end of same */
{
assert(t != NULL);
- MDEBUG(("dissect %ld-%ld\n", (long)OFF(begin), (long)OFF(end)));
+ MDEBUG(("dissect %ld-%ld\n", LOFF(begin), LOFF(end)));
switch (t->op) {
case '=': /* terminal node */
@@ -443,7 +494,9 @@ struct subre *t;
chr *begin; /* beginning of relevant substring */
chr *end; /* end of same */
{
+ struct smalldfa da;
struct dfa *d;
+ struct smalldfa d2a;
struct dfa *d2;
chr *mid;
int i;
@@ -452,10 +505,10 @@ chr *end; /* end of same */
assert(t->left != NULL && t->left->cnfa.nstates > 0);
assert(t->right != NULL && t->right->cnfa.nstates > 0);
- d = newdfa(v, &t->left->cnfa, &v->g->cmap);
+ d = newdfa(v, &t->left->cnfa, &v->g->cmap, &da);
if (ISERR())
return v->err;
- d2 = newdfa(v, &t->right->cnfa, &v->g->cmap);
+ d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, &d2a);
if (ISERR()) {
freedfa(d);
return v->err;
@@ -468,7 +521,7 @@ chr *end; /* end of same */
freedfa(d2);
return REG_ASSERT;
}
- MDEBUG(("tentative midpoint %ld\n", (long)OFF(mid)));
+ MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
/* iterate until satisfaction or failure */
while (longest(v, d2, mid, end) != end) {
@@ -488,7 +541,7 @@ chr *end; /* end of same */
freedfa(d2);
return REG_ASSERT;
}
- MDEBUG(("new midpoint %ld\n", (long)OFF(mid)));
+ MDEBUG(("new midpoint %ld\n", LOFF(mid)));
}
/* satisfaction */
@@ -512,6 +565,7 @@ struct subre *t;
chr *begin; /* beginning of relevant substring */
chr *end; /* end of same */
{
+ struct smalldfa da;
struct dfa *d;
int i;
@@ -521,7 +575,7 @@ chr *end; /* end of same */
for (i = 0; t != NULL; t = t->right, i++) {
MDEBUG(("trying %dth\n", i));
assert(t->left != NULL && t->left->cnfa.nstates > 0);
- d = newdfa(v, &t->left->cnfa, &v->g->cmap);
+ d = newdfa(v, &t->left->cnfa, &v->g->cmap, &da);
if (ISERR())
return v->err;
if (longest(v, d, begin, end) == end) {
@@ -550,7 +604,7 @@ chr *end; /* end of same */
int er;
assert(t != NULL);
- MDEBUG(("cdissect %ld-%ld\n", (long)OFF(begin), (long)OFF(end)));
+ MDEBUG(("cdissect %ld-%ld\n", LOFF(begin), LOFF(end)));
switch (t->op) {
case '=': /* terminal node */
@@ -596,7 +650,9 @@ struct subre *t;
chr *begin; /* beginning of relevant substring */
chr *end; /* end of same */
{
+ struct smalldfa da;
struct dfa *d;
+ struct smalldfa d2a;
struct dfa *d2;
chr *mid;
int er;
@@ -608,10 +664,10 @@ chr *end; /* end of same */
if (t->left->flags&SHORTER) /* reverse scan */
return crevdissect(v, t, begin, end);
- d = newdfa(v, &t->left->cnfa, &v->g->cmap);
+ d = newdfa(v, &t->left->cnfa, &v->g->cmap, &da);
if (ISERR())
return v->err;
- d2 = newdfa(v, &t->right->cnfa, &v->g->cmap);
+ d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, &d2a);
if (ISERR()) {
freedfa(d);
return v->err;
@@ -626,11 +682,11 @@ chr *end; /* end of same */
freedfa(d2);
return REG_NOMATCH;
}
- MDEBUG(("tentative midpoint %ld\n", (long)OFF(mid)));
+ MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
v->mem[t->retry] = (mid - begin) + 1;
} else {
mid = begin + (v->mem[t->retry] - 1);
- MDEBUG(("working midpoint %ld\n", (long)OFF(mid)));
+ MDEBUG(("working midpoint %ld\n", LOFF(mid)));
}
/* iterate until satisfaction or failure */
@@ -663,7 +719,7 @@ chr *end; /* end of same */
freedfa(d2);
return REG_NOMATCH;
}
- MDEBUG(("%d: new midpoint %ld\n", t->retry, (long)OFF(mid)));
+ MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid)));
v->mem[t->retry] = (mid - begin) + 1;
zapmem(v, t->left);
zapmem(v, t->right);
@@ -689,7 +745,9 @@ struct subre *t;
chr *begin; /* beginning of relevant substring */
chr *end; /* end of same */
{
+ struct smalldfa da;
struct dfa *d;
+ struct smalldfa d2a;
struct dfa *d2;
chr *mid;
int er;
@@ -700,10 +758,10 @@ chr *end; /* end of same */
assert(t->left->flags&SHORTER);
/* concatenation -- need to split the substring between parts */
- d = newdfa(v, &t->left->cnfa, &v->g->cmap);
+ d = newdfa(v, &t->left->cnfa, &v->g->cmap, &da);
if (ISERR())
return v->err;
- d2 = newdfa(v, &t->right->cnfa, &v->g->cmap);
+ d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, &d2a);
if (ISERR()) {
freedfa(d);
return v->err;
@@ -718,11 +776,11 @@ chr *end; /* end of same */
freedfa(d2);
return REG_NOMATCH;
}
- MDEBUG(("tentative midpoint %ld\n", (long)OFF(mid)));
+ MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
v->mem[t->retry] = (mid - begin) + 1;
} else {
mid = begin + (v->mem[t->retry] - 1);
- MDEBUG(("working midpoint %ld\n", (long)OFF(mid)));
+ MDEBUG(("working midpoint %ld\n", LOFF(mid)));
}
/* iterate until satisfaction or failure */
@@ -755,7 +813,7 @@ chr *end; /* end of same */
freedfa(d2);
return REG_NOMATCH;
}
- MDEBUG(("%d: new midpoint %ld\n", t->retry, (long)OFF(mid)));
+ MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid)));
v->mem[t->retry] = (mid - begin) + 1;
zapmem(v, t->left);
zapmem(v, t->right);
@@ -846,6 +904,7 @@ struct subre *t;
chr *begin; /* beginning of relevant substring */
chr *end; /* end of same */
{
+ struct smalldfa da;
struct dfa *d;
int er;
# define UNTRIED 0 /* not yet tried at all */
@@ -862,7 +921,7 @@ chr *end; /* end of same */
assert(t->left != NULL);
if (v->mem[t->retry] == UNTRIED) {
- d = newdfa(v, &t->left->cnfa, &v->g->cmap);
+ d = newdfa(v, &t->left->cnfa, &v->g->cmap, &da);
if (ISERR())
return v->err;
if (longest(v, d, begin, end) != end) {
diff --git a/generic/regguts.h b/generic/regguts.h
index 5f9a0ab..73aed2f 100644
--- a/generic/regguts.h
+++ b/generic/regguts.h
@@ -308,7 +308,6 @@ struct cnfa {
int ncolors; /* number of colors */
int flags;
# define HASLACONS 01 /* uses lookahead constraints */
-# define LEFTANCH 02 /* anchored on left */
int pre; /* setup state number */
int post; /* teardown state number */
color bos[2]; /* colors, if any, assigned to BOS and BOL */
diff --git a/generic/tclRegexp.c b/generic/tclRegexp.c
index a619d5b..c605591 100644
--- a/generic/tclRegexp.c
+++ b/generic/tclRegexp.c
@@ -10,7 +10,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclRegexp.c,v 1.1.2.5 1998/11/11 04:54:19 stanton Exp $
+ * RCS: @(#) $Id: tclRegexp.c,v 1.1.2.6 1998/11/17 21:38:39 stanton Exp $
*/
#include "tclInt.h"
@@ -163,7 +163,7 @@ Tcl_RegExpCompile(interp, string)
if (iPtr->patterns[NUM_REGEXPS-1] != NULL) {
ckfree(iPtr->patterns[NUM_REGEXPS-1]);
- re_ufree(&(iPtr->regexps[NUM_REGEXPS-1]->re));
+ TclReFree(&(iPtr->regexps[NUM_REGEXPS-1]->re));
ckfree((char *) iPtr->regexps[NUM_REGEXPS-1]);
}
for (i = NUM_REGEXPS - 2; i >= 0; i--) {
@@ -326,7 +326,7 @@ TclRegExpExecUniChar(interp, re, wString, numChars, nmatches, flags)
if (nmatches >= 0 && (size_t) nmatches < nm)
nm = (size_t) nmatches;
- status = re_uexec(&regexpPtr->re, wString, (size_t) numChars,
+ status = TclReExec(&regexpPtr->re, wString, (size_t) numChars,
(rm_detail_t *)NULL, nm, regexpPtr->matches, flags);
/*
@@ -621,12 +621,12 @@ TclRegError(interp, msg, status)
char *p;
Tcl_ResetResult(interp);
- n = re_uerror(status, (regex_t *)NULL, buf, sizeof(buf));
+ n = TclReError(status, (regex_t *)NULL, buf, sizeof(buf));
p = (n > sizeof(buf)) ? "..." : "";
Tcl_AppendResult(interp, msg, buf, p, NULL);
sprintf(cbuf, "%d", status);
- (VOID) re_uerror(REG_ITOA, (regex_t *)NULL, cbuf, sizeof(cbuf));
+ (VOID) TclReError(REG_ITOA, (regex_t *)NULL, cbuf, sizeof(cbuf));
Tcl_SetErrorCode(interp, "REGEXP", cbuf, buf, NULL);
}
@@ -654,7 +654,7 @@ FreeRegexpInternalRep(objPtr)
{
TclRegexp *regexpRepPtr = (TclRegexp *) objPtr->internalRep.otherValuePtr;
- re_ufree(&regexpRepPtr->re);
+ TclReFree(&regexpRepPtr->re);
if (regexpRepPtr->matches) {
ckfree((char *) regexpRepPtr->matches);
}
@@ -765,7 +765,7 @@ CompileRegexp(interp, string, length, flags)
*/
regexpPtr->flags = flags;
- status = re_ucomp(&regexpPtr->re, uniString, (size_t) numChars, flags);
+ status = TclReComp(&regexpPtr->re, uniString, (size_t) numChars, flags);
Tcl_DStringFree(&stringBuf);
if (status != REG_OKAY) {