summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordkf <donal.k.fellows@manchester.ac.uk>2007-12-18 11:23:11 (GMT)
committerdkf <donal.k.fellows@manchester.ac.uk>2007-12-18 11:23:11 (GMT)
commitfdc0ebcebdabf8d44d0fb97275444ef980028f66 (patch)
tree136091002ef147caa5d9a01bf9cc8ea9be25baf5
parenta5dd9045b97811ec0ab14334094c710878cd6083 (diff)
downloadtcl-fdc0ebcebdabf8d44d0fb97275444ef980028f66.zip
tcl-fdc0ebcebdabf8d44d0fb97275444ef980028f66.tar.gz
tcl-fdc0ebcebdabf8d44d0fb97275444ef980028f66.tar.bz2
Fixes for problems created when processing regular expressions that
generate very large automata. An enormous number of thanks to Will Drewry <wad@google.com>, Tavis Ormandy <taviso@google.com>, and Tom Lane <tgl@sss.pgh.pa.us> from the Postgresql crowd for their help in tracking these problems down. [Bug 1810264]
-rw-r--r--ChangeLog9
-rw-r--r--generic/regc_color.c27
-rw-r--r--generic/regc_nfa.c64
-rw-r--r--generic/regerrs.h1
-rw-r--r--generic/regex.h1
-rw-r--r--generic/regguts.h8
-rw-r--r--tests/reg.test15
7 files changed, 110 insertions, 15 deletions
diff --git a/ChangeLog b/ChangeLog
index e21c61e..c7dfe53 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,12 @@
+2007-12-18 Donal K. Fellows <donal.k.fellows@manchester.ac.uk>
+
+ * generic/regguts.h, generic/regc_color.c, generic/regc_nfa.c:
+ Fixes for problems created when processing regular expressions that
+ generate very large automata. An enormous number of thanks to Will
+ Drewry <wad@google.com>, Tavis Ormandy <taviso@google.com>, and Tom
+ Lane <tgl@sss.pgh.pa.us> from the Postgresql crowd for their help in
+ tracking these problems down. [Bug 1810264]
+
2007-12-14 Jeff Hobbs <jeffh@ActiveState.com>
* win/README: updated notes
diff --git a/generic/regc_color.c b/generic/regc_color.c
index 5aed21c..f6716be 100644
--- a/generic/regc_color.c
+++ b/generic/regc_color.c
@@ -552,12 +552,9 @@ struct colormap *cm;
scd->sub = NOSUB;
while ((a = cd->arcs) != NULL) {
assert(a->co == co);
- /* uncolorchain(cm, a); */
- cd->arcs = a->colorchain;
+ uncolorchain(cm, a);
a->co = sco;
- /* colorchain(cm, a); */
- a->colorchain = scd->arcs;
- scd->arcs = a;
+ colorchain(cm, a);
}
freecolor(cm, co);
} else {
@@ -586,7 +583,10 @@ struct arc *a;
{
struct colordesc *cd = &cm->cd[a->co];
+ if (cd->arcs)
+ cd->arcs->colorchain_rev = a;
a->colorchain = cd->arcs;
+ a->colorchain_rev = NULL;
cd->arcs = a;
}
@@ -600,18 +600,19 @@ struct colormap *cm;
struct arc *a;
{
struct colordesc *cd = &cm->cd[a->co];
- struct arc *aa;
+ struct arc *aa = a->colorchain_rev;
- aa = cd->arcs;
- if (aa == a) /* easy case */
+ if (aa == NULL) {
+ assert(cd->arcs == a);
cd->arcs = a->colorchain;
- else {
- for (; aa != NULL && aa->colorchain != a; aa = aa->colorchain)
- continue;
- assert(aa != NULL);
+ } else {
+ assert(aa->colorchain == a);
aa->colorchain = a->colorchain;
}
- a->colorchain = NULL; /* paranoia */
+ if (a->colorchain)
+ a->colorchain->colorchain_rev = aa;
+ a->colorchain = NULL; /* paranoia */
+ a->colorchain_rev = NULL;
}
/*
diff --git a/generic/regc_nfa.c b/generic/regc_nfa.c
index dd26e64..48f56a9 100644
--- a/generic/regc_nfa.c
+++ b/generic/regc_nfa.c
@@ -61,11 +61,12 @@ struct nfa *parent; /* NULL if primary NFA */
nfa->nstates = 0;
nfa->cm = cm;
nfa->v = v;
+ nfa->size = 0;
nfa->bos[0] = nfa->bos[1] = COLORLESS;
nfa->eos[0] = nfa->eos[1] = COLORLESS;
+ nfa->parent = parent;
nfa->post = newfstate(nfa, '@'); /* number 0 */
nfa->pre = newfstate(nfa, '>'); /* number 1 */
- nfa->parent = parent;
nfa->init = newstate(nfa); /* may become invalid later */
nfa->final = newstate(nfa);
@@ -88,6 +89,57 @@ struct nfa *parent; /* NULL if primary NFA */
}
/*
+ - too_many_states - checks if the max states exceeds the compile-time value
+ ^ static int too_many_states(struct nfa *);
+ */
+static int
+too_many_states(nfa)
+struct nfa *nfa;
+{
+ struct nfa *parent = nfa->parent;
+ size_t sz = nfa->size;
+ while (parent != NULL) {
+ sz = parent->size;
+ parent = parent->parent;
+ }
+ if (sz > REG_MAX_STATES)
+ return 1;
+ return 0;
+}
+
+/*
+ - increment_size - increases the tracked size of the NFA and its parents.
+ ^ static void increment_size(struct nfa *);
+ */
+static void
+increment_size(nfa)
+struct nfa *nfa;
+{
+ struct nfa *parent = nfa->parent;
+ nfa->size++;
+ while (parent != NULL) {
+ parent->size++;
+ parent = parent->parent;
+ }
+}
+
+/*
+ - decrement_size - increases the tracked size of the NFA and its parents.
+ ^ static void decrement_size(struct nfa *);
+ */
+static void
+decrement_size(nfa)
+struct nfa *nfa;
+{
+ struct nfa *parent = nfa->parent;
+ nfa->size--;
+ while (parent != NULL) {
+ parent->size--;
+ parent = parent->parent;
+ }
+}
+
+/*
- freenfa - free an entire NFA
^ static VOID freenfa(struct nfa *);
*/
@@ -123,6 +175,11 @@ struct nfa *nfa;
{
struct state *s;
+ if (too_many_states(nfa)) {
+ /* XXX: add specific error for this */
+ NERR(REG_ETOOBIG);
+ return NULL;
+ }
if (nfa->free != NULL) {
s = nfa->free;
nfa->free = s->next;
@@ -154,6 +211,8 @@ struct nfa *nfa;
}
s->prev = nfa->slast;
nfa->slast = s;
+ /* Track the current size and the parent size */
+ increment_size(nfa);
return s;
}
@@ -221,6 +280,7 @@ struct state *s;
s->prev = NULL;
s->next = nfa->free; /* don't delete it, put it on the free list */
nfa->free = s;
+ decrement_size(nfa);
}
/*
@@ -651,6 +711,8 @@ struct state *stmp; /* s's duplicate, or NULL */
for (a = s->outs; a != NULL && !NISERR(); a = a->outchain) {
duptraverse(nfa, a->to, (struct state *)NULL);
+ if (NISERR())
+ break;
assert(a->to->tmp != NULL);
cparc(nfa, a, s->tmp, a->to->tmp);
}
diff --git a/generic/regerrs.h b/generic/regerrs.h
index a3d98b6..259c0cb 100644
--- a/generic/regerrs.h
+++ b/generic/regerrs.h
@@ -16,3 +16,4 @@
{ REG_INVARG, "REG_INVARG", "invalid argument to regex function" },
{ REG_MIXED, "REG_MIXED", "character widths of regex and string differ" },
{ REG_BADOPT, "REG_BADOPT", "invalid embedded option" },
+{ REG_ETOOBIG, "REG_ETOOBIG", "nfa has too many states" },
diff --git a/generic/regex.h b/generic/regex.h
index 8289a50..a35925a 100644
--- a/generic/regex.h
+++ b/generic/regex.h
@@ -292,6 +292,7 @@ typedef struct {
#define REG_INVARG 16 /* invalid argument to regex function */
#define REG_MIXED 17 /* character widths of regex and string differ */
#define REG_BADOPT 18 /* invalid embedded option */
+#define REG_ETOOBIG 19 /* nfa has too many states */
/* two specials for debugging and testing */
#define REG_ATOI 101 /* convert error-code name to number */
#define REG_ITOA 102 /* convert error-code number to name */
diff --git a/generic/regguts.h b/generic/regguts.h
index b6152f4..c77a8fc 100644
--- a/generic/regguts.h
+++ b/generic/regguts.h
@@ -290,6 +290,7 @@ struct arc {
# define freechain outchain
struct arc *inchain; /* *to's ins chain */
struct arc *colorchain; /* color's arc chain */
+ struct arc *colorchain_rev; /* back-link in color's arc chain */
};
struct arcbatch { /* for bulk allocation of arcs */
@@ -326,6 +327,8 @@ struct nfa {
struct colormap *cm; /* the color map */
color bos[2]; /* colors, if any, assigned to BOS and BOL */
color eos[2]; /* colors, if any, assigned to EOS and EOL */
+ size_t size; /* current NFA size; differs from nstates as
+ * it will be incremented by its children */
struct vars *v; /* simplifies compile error reporting */
struct nfa *parent; /* parent NFA, if any */
};
@@ -356,6 +359,11 @@ struct cnfa {
#define NULLCNFA(cnfa) ((cnfa).nstates == 0)
+/* Used to limit the maximum NFA size */
+#ifndef REG_MAX_STATES
+#define REG_MAX_STATES 100000
+#endif
+
/*
* subexpression tree
diff --git a/tests/reg.test b/tests/reg.test
index 8994fdc..2abe7b6 100644
--- a/tests/reg.test
+++ b/tests/reg.test
@@ -9,7 +9,7 @@
#
# Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
#
-# RCS: @(#) $Id: reg.test,v 1.16.2.3 2004/11/27 05:44:13 dgp Exp $
+# RCS: @(#) $Id: reg.test,v 1.16.2.4 2007/12/18 11:23:16 dkf Exp $
if {[lsearch [namespace children] ::tcltest] == -1} {
package require tcltest 2
@@ -1130,6 +1130,19 @@ test reg-33.11 {Bug 840258} {
"TQ\r\n.?<5000267>Test already stopped\r\n" {} tmp
} 1
+test reg-33.12 {Bug 1810264 - bad read} {
+ regexp {\3161573148} {\3161573148}
+} 0
+test reg-33.13 {Bug 1810264 - infinite loop} {
+ regexp {($|^)*} {x}
+} 1
+test reg-33.14 {Bug 1810264 - super-expensive expression} {
+ set start [clock seconds]
+ regexp {(x{200}){200}$y} {x}
+ set time [expr {[clock seconds] - $start}]
+ expr {$time < 5 ? "ok" : "Complex RE took $time seconds - bad!"}
+} ok
+
# cleanup
::tcltest::cleanupTests
return