summaryrefslogtreecommitdiffstats
path: root/generic
diff options
context:
space:
mode:
authordkf <donal.k.fellows@manchester.ac.uk>2007-12-18 11:23:11 (GMT)
committerdkf <donal.k.fellows@manchester.ac.uk>2007-12-18 11:23:11 (GMT)
commitfdc0ebcebdabf8d44d0fb97275444ef980028f66 (patch)
tree136091002ef147caa5d9a01bf9cc8ea9be25baf5 /generic
parenta5dd9045b97811ec0ab14334094c710878cd6083 (diff)
downloadtcl-fdc0ebcebdabf8d44d0fb97275444ef980028f66.zip
tcl-fdc0ebcebdabf8d44d0fb97275444ef980028f66.tar.gz
tcl-fdc0ebcebdabf8d44d0fb97275444ef980028f66.tar.bz2
Fixes for problems created when processing regular expressions that
generate very large automata. An enormous number of thanks to Will Drewry <wad@google.com>, Tavis Ormandy <taviso@google.com>, and Tom Lane <tgl@sss.pgh.pa.us> from the Postgresql crowd for their help in tracking these problems down. [Bug 1810264]
Diffstat (limited to 'generic')
-rw-r--r--generic/regc_color.c27
-rw-r--r--generic/regc_nfa.c64
-rw-r--r--generic/regerrs.h1
-rw-r--r--generic/regex.h1
-rw-r--r--generic/regguts.h8
5 files changed, 87 insertions, 14 deletions
diff --git a/generic/regc_color.c b/generic/regc_color.c
index 5aed21c..f6716be 100644
--- a/generic/regc_color.c
+++ b/generic/regc_color.c
@@ -552,12 +552,9 @@ struct colormap *cm;
scd->sub = NOSUB;
while ((a = cd->arcs) != NULL) {
assert(a->co == co);
- /* uncolorchain(cm, a); */
- cd->arcs = a->colorchain;
+ uncolorchain(cm, a);
a->co = sco;
- /* colorchain(cm, a); */
- a->colorchain = scd->arcs;
- scd->arcs = a;
+ colorchain(cm, a);
}
freecolor(cm, co);
} else {
@@ -586,7 +583,10 @@ struct arc *a;
{
struct colordesc *cd = &cm->cd[a->co];
+ if (cd->arcs)
+ cd->arcs->colorchain_rev = a;
a->colorchain = cd->arcs;
+ a->colorchain_rev = NULL;
cd->arcs = a;
}
@@ -600,18 +600,19 @@ struct colormap *cm;
struct arc *a;
{
struct colordesc *cd = &cm->cd[a->co];
- struct arc *aa;
+ struct arc *aa = a->colorchain_rev;
- aa = cd->arcs;
- if (aa == a) /* easy case */
+ if (aa == NULL) {
+ assert(cd->arcs == a);
cd->arcs = a->colorchain;
- else {
- for (; aa != NULL && aa->colorchain != a; aa = aa->colorchain)
- continue;
- assert(aa != NULL);
+ } else {
+ assert(aa->colorchain == a);
aa->colorchain = a->colorchain;
}
- a->colorchain = NULL; /* paranoia */
+ if (a->colorchain)
+ a->colorchain->colorchain_rev = aa;
+ a->colorchain = NULL; /* paranoia */
+ a->colorchain_rev = NULL;
}
/*
diff --git a/generic/regc_nfa.c b/generic/regc_nfa.c
index dd26e64..48f56a9 100644
--- a/generic/regc_nfa.c
+++ b/generic/regc_nfa.c
@@ -61,11 +61,12 @@ struct nfa *parent; /* NULL if primary NFA */
nfa->nstates = 0;
nfa->cm = cm;
nfa->v = v;
+ nfa->size = 0;
nfa->bos[0] = nfa->bos[1] = COLORLESS;
nfa->eos[0] = nfa->eos[1] = COLORLESS;
+ nfa->parent = parent;
nfa->post = newfstate(nfa, '@'); /* number 0 */
nfa->pre = newfstate(nfa, '>'); /* number 1 */
- nfa->parent = parent;
nfa->init = newstate(nfa); /* may become invalid later */
nfa->final = newstate(nfa);
@@ -88,6 +89,57 @@ struct nfa *parent; /* NULL if primary NFA */
}
/*
+ - too_many_states - checks if the max states exceeds the compile-time value
+ ^ static int too_many_states(struct nfa *);
+ */
+static int
+too_many_states(nfa)
+struct nfa *nfa;
+{
+ struct nfa *parent = nfa->parent;
+ size_t sz = nfa->size;
+ while (parent != NULL) {
+ sz = parent->size;
+ parent = parent->parent;
+ }
+ if (sz > REG_MAX_STATES)
+ return 1;
+ return 0;
+}
+
+/*
+ - increment_size - increases the tracked size of the NFA and its parents.
+ ^ static void increment_size(struct nfa *);
+ */
+static void
+increment_size(nfa)
+struct nfa *nfa;
+{
+ struct nfa *parent = nfa->parent;
+ nfa->size++;
+ while (parent != NULL) {
+ parent->size++;
+ parent = parent->parent;
+ }
+}
+
+/*
+ - decrement_size - increases the tracked size of the NFA and its parents.
+ ^ static void decrement_size(struct nfa *);
+ */
+static void
+decrement_size(nfa)
+struct nfa *nfa;
+{
+ struct nfa *parent = nfa->parent;
+ nfa->size--;
+ while (parent != NULL) {
+ parent->size--;
+ parent = parent->parent;
+ }
+}
+
+/*
- freenfa - free an entire NFA
^ static VOID freenfa(struct nfa *);
*/
@@ -123,6 +175,11 @@ struct nfa *nfa;
{
struct state *s;
+ if (too_many_states(nfa)) {
+ /* XXX: add specific error for this */
+ NERR(REG_ETOOBIG);
+ return NULL;
+ }
if (nfa->free != NULL) {
s = nfa->free;
nfa->free = s->next;
@@ -154,6 +211,8 @@ struct nfa *nfa;
}
s->prev = nfa->slast;
nfa->slast = s;
+ /* Track the current size and the parent size */
+ increment_size(nfa);
return s;
}
@@ -221,6 +280,7 @@ struct state *s;
s->prev = NULL;
s->next = nfa->free; /* don't delete it, put it on the free list */
nfa->free = s;
+ decrement_size(nfa);
}
/*
@@ -651,6 +711,8 @@ struct state *stmp; /* s's duplicate, or NULL */
for (a = s->outs; a != NULL && !NISERR(); a = a->outchain) {
duptraverse(nfa, a->to, (struct state *)NULL);
+ if (NISERR())
+ break;
assert(a->to->tmp != NULL);
cparc(nfa, a, s->tmp, a->to->tmp);
}
diff --git a/generic/regerrs.h b/generic/regerrs.h
index a3d98b6..259c0cb 100644
--- a/generic/regerrs.h
+++ b/generic/regerrs.h
@@ -16,3 +16,4 @@
{ REG_INVARG, "REG_INVARG", "invalid argument to regex function" },
{ REG_MIXED, "REG_MIXED", "character widths of regex and string differ" },
{ REG_BADOPT, "REG_BADOPT", "invalid embedded option" },
+{ REG_ETOOBIG, "REG_ETOOBIG", "nfa has too many states" },
diff --git a/generic/regex.h b/generic/regex.h
index 8289a50..a35925a 100644
--- a/generic/regex.h
+++ b/generic/regex.h
@@ -292,6 +292,7 @@ typedef struct {
#define REG_INVARG 16 /* invalid argument to regex function */
#define REG_MIXED 17 /* character widths of regex and string differ */
#define REG_BADOPT 18 /* invalid embedded option */
+#define REG_ETOOBIG 19 /* nfa has too many states */
/* two specials for debugging and testing */
#define REG_ATOI 101 /* convert error-code name to number */
#define REG_ITOA 102 /* convert error-code number to name */
diff --git a/generic/regguts.h b/generic/regguts.h
index b6152f4..c77a8fc 100644
--- a/generic/regguts.h
+++ b/generic/regguts.h
@@ -290,6 +290,7 @@ struct arc {
# define freechain outchain
struct arc *inchain; /* *to's ins chain */
struct arc *colorchain; /* color's arc chain */
+ struct arc *colorchain_rev; /* back-link in color's arc chain */
};
struct arcbatch { /* for bulk allocation of arcs */
@@ -326,6 +327,8 @@ struct nfa {
struct colormap *cm; /* the color map */
color bos[2]; /* colors, if any, assigned to BOS and BOL */
color eos[2]; /* colors, if any, assigned to EOS and EOL */
+ size_t size; /* current NFA size; differs from nstates as
+ * it will be incremented by its children */
struct vars *v; /* simplifies compile error reporting */
struct nfa *parent; /* parent NFA, if any */
};
@@ -356,6 +359,11 @@ struct cnfa {
#define NULLCNFA(cnfa) ((cnfa).nstates == 0)
+/* Used to limit the maximum NFA size */
+#ifndef REG_MAX_STATES
+#define REG_MAX_STATES 100000
+#endif
+
/*
* subexpression tree