summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordkf <donal.k.fellows@manchester.ac.uk>2007-12-18 10:53:14 (GMT)
committerdkf <donal.k.fellows@manchester.ac.uk>2007-12-18 10:53:14 (GMT)
commitd8deec65a9511d476698293a485ee7c721a003c7 (patch)
tree73535333530cb8bb79ed8d41856c01d0f6dc4fd0
parent296e1c32e596a9e2c73528c0aa67e7e76bb03778 (diff)
downloadtcl-d8deec65a9511d476698293a485ee7c721a003c7.zip
tcl-d8deec65a9511d476698293a485ee7c721a003c7.tar.gz
tcl-d8deec65a9511d476698293a485ee7c721a003c7.tar.bz2
Fixes for problems created when processing regular expressions that
generate very large automata. An enormous number of thanks to Will Drewry <wad@google.com>, Tavis Ormandy <taviso@google.com>, and Tom Lane <tgl@sss.pgh.pa.us> from the Postgresql crowd for their help in tracking these problems down. [Bug 1810264]
-rw-r--r--ChangeLog9
-rw-r--r--generic/regc_color.c27
-rw-r--r--generic/regc_nfa.c75
-rw-r--r--generic/regerrs.h1
-rw-r--r--generic/regex.h1
-rw-r--r--generic/regguts.h12
-rw-r--r--tests/reg.test11
7 files changed, 120 insertions, 16 deletions
diff --git a/ChangeLog b/ChangeLog
index 322a551..9aa147d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,12 @@
+2007-12-18 Donal K. Fellows <donal.k.fellows@manchester.ac.uk>
+
+ * generic/regguts.h, generic/regc_color.c, generic/regc_nfa.c:
+ Fixes for problems created when processing regular expressions that
+ generate very large automata. An enormous number of thanks to Will
+ Drewry <wad@google.com>, Tavis Ormandy <taviso@google.com>, and Tom
+ Lane <tgl@sss.pgh.pa.us> from the Postgresql crowd for their help in
+ tracking these problems down. [Bug 1810264]
+
2007-12-17 Don Porter <dgp@users.sourceforge.net>
*** 8.5.0 TAGGED FOR RELEASE ***
diff --git a/generic/regc_color.c b/generic/regc_color.c
index 003f5fc..ba1f668 100644
--- a/generic/regc_color.c
+++ b/generic/regc_color.c
@@ -611,12 +611,9 @@ okcolors(
scd->sub = NOSUB;
while ((a = cd->arcs) != NULL) {
assert(a->co == co);
- /* uncolorchain(cm, a); */
- cd->arcs = a->colorchain;
+ uncolorchain(cm, a);
a->co = sco;
- /* colorchain(cm, a); */
- a->colorchain = scd->arcs;
- scd->arcs = a;
+ colorchain(cm, a);
}
freecolor(cm, co);
} else {
@@ -648,7 +645,11 @@ colorchain(
{
struct colordesc *cd = &cm->cd[a->co];
+ if (cd->arcs != NULL) {
+ cd->arcs->colorchainRev = a;
+ }
a->colorchain = cd->arcs;
+ a->colorchainRev = NULL;
cd->arcs = a;
}
@@ -662,20 +663,20 @@ uncolorchain(
struct arc *a)
{
struct colordesc *cd = &cm->cd[a->co];
- struct arc *aa;
+ struct arc *aa = a->colorchainRev;
- aa = cd->arcs;
- if (aa == a) { /* easy case */
+ if (aa == NULL) {
+ assert(cd->arcs == a);
cd->arcs = a->colorchain;
} else {
- assert(aa != NULL);
- for (; aa->colorchain!=a ; aa=aa->colorchain) {
- assert(aa->colorchain != NULL);
- continue;
- }
+ assert(aa->colorchain == a);
aa->colorchain = a->colorchain;
}
+ if (a->colorchain != NULL) {
+ a->colorchain->colorchainRev = aa;
+ }
a->colorchain = NULL; /* paranoia */
+ a->colorchainRev = NULL;
}
/*
diff --git a/generic/regc_nfa.c b/generic/regc_nfa.c
index 741887f..19dbe63 100644
--- a/generic/regc_nfa.c
+++ b/generic/regc_nfa.c
@@ -58,13 +58,14 @@ newnfa(
nfa->nstates = 0;
nfa->cm = cm;
nfa->v = v;
+ nfa->size = 0;
nfa->bos[0] = nfa->bos[1] = COLORLESS;
nfa->eos[0] = nfa->eos[1] = COLORLESS;
+ nfa->parent = parent; /* Precedes newfstate so parent is valid. */
nfa->post = newfstate(nfa, '@'); /* number 0 */
nfa->pre = newfstate(nfa, '>'); /* number 1 */
- nfa->parent = parent;
- nfa->init = newstate(nfa); /* may become invalid later */
+ nfa->init = newstate(nfa); /* May become invalid later. */
nfa->final = newstate(nfa);
if (ISERR()) {
freenfa(nfa);
@@ -85,6 +86,61 @@ newnfa(
}
/*
+ - TooManyStates - checks if the max states exceeds the compile-time value
+ ^ static int TooManyStates(struct nfa *);
+ */
+static int
+TooManyStates(
+ struct nfa *nfa)
+{
+ struct nfa *parent = nfa->parent;
+ size_t sz = nfa->size;
+
+ while (parent != NULL) {
+ sz = parent->size;
+ parent = parent->parent;
+ }
+ if (sz > REG_MAX_STATES) {
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ - IncrementSize - increases the tracked size of the NFA and its parents.
+ ^ static void IncrementSize(struct nfa *);
+ */
+static void
+IncrementSize(
+ struct nfa *nfa)
+{
+ struct nfa *parent = nfa->parent;
+
+ nfa->size++;
+ while (parent != NULL) {
+ parent->size++;
+ parent = parent->parent;
+ }
+}
+
+/*
+ - DecrementSize - increases the tracked size of the NFA and its parents.
+ ^ static void DecrementSize(struct nfa *);
+ */
+static void
+DecrementSize(
+ struct nfa *nfa)
+{
+ struct nfa *parent = nfa->parent;
+
+ nfa->size--;
+ while (parent != NULL) {
+ parent->size--;
+ parent = parent->parent;
+ }
+}
+
+/*
- freenfa - free an entire NFA
^ static VOID freenfa(struct nfa *);
*/
@@ -120,6 +176,11 @@ newstate(
{
struct state *s;
+ if (TooManyStates(nfa)) {
+ /* XXX: add specific error for this */
+ NERR(REG_ETOOBIG);
+ return NULL;
+ }
if (nfa->free != NULL) {
s = nfa->free;
nfa->free = s->next;
@@ -152,6 +213,12 @@ newstate(
}
s->prev = nfa->slast;
nfa->slast = s;
+
+ /*
+ * Track the current size and the parent size.
+ */
+
+ IncrementSize(nfa);
return s;
}
@@ -222,6 +289,7 @@ freestate(
s->prev = NULL;
s->next = nfa->free; /* don't delete it, put it on the free list */
nfa->free = s;
+ DecrementSize(nfa);
}
/*
@@ -688,6 +756,9 @@ duptraverse(
for (a=s->outs ; a!=NULL && !NISERR() ; a=a->outchain) {
duptraverse(nfa, a->to, NULL);
+ if (NISERR()) {
+ break;
+ }
assert(a->to->tmp != NULL);
cparc(nfa, a, s->tmp, a->to->tmp);
}
diff --git a/generic/regerrs.h b/generic/regerrs.h
index a3d98b6..259c0cb 100644
--- a/generic/regerrs.h
+++ b/generic/regerrs.h
@@ -16,3 +16,4 @@
{ REG_INVARG, "REG_INVARG", "invalid argument to regex function" },
{ REG_MIXED, "REG_MIXED", "character widths of regex and string differ" },
{ REG_BADOPT, "REG_BADOPT", "invalid embedded option" },
+{ REG_ETOOBIG, "REG_ETOOBIG", "nfa has too many states" },
diff --git a/generic/regex.h b/generic/regex.h
index b8498ab..fa86092 100644
--- a/generic/regex.h
+++ b/generic/regex.h
@@ -277,6 +277,7 @@ typedef struct {
#define REG_INVARG 16 /* invalid argument to regex function */
#define REG_MIXED 17 /* character widths of regex and string differ */
#define REG_BADOPT 18 /* invalid embedded option */
+#define REG_ETOOBIG 19 /* nfa has too many states */
/* two specials for debugging and testing */
#define REG_ATOI 101 /* convert error-code name to number */
#define REG_ITOA 102 /* convert error-code number to name */
diff --git a/generic/regguts.h b/generic/regguts.h
index cbf6615..67e3d03 100644
--- a/generic/regguts.h
+++ b/generic/regguts.h
@@ -267,6 +267,7 @@ struct arc {
#define freechain outchain
struct arc *inchain; /* *to's ins chain */
struct arc *colorchain; /* color's arc chain */
+ struct arc *colorchainRev; /* back-link in color's arc chain */
};
struct arcbatch { /* for bulk allocation of arcs */
@@ -303,6 +304,9 @@ struct nfa {
struct colormap *cm; /* the color map */
color bos[2]; /* colors, if any, assigned to BOS and BOL */
color eos[2]; /* colors, if any, assigned to EOS and EOL */
+ size_t size; /* Current NFA size; differs from nstates as
+ * it also counts the number of states created
+ * by children of this state. */
struct vars *v; /* simplifies compile error reporting */
struct nfa *parent; /* parent NFA, if any */
};
@@ -332,6 +336,14 @@ struct cnfa {
#define NULLCNFA(cnfa) ((cnfa).nstates == 0)
/*
+ * Used to limit the maximum NFA size to something sane. [Bug 1810264]
+ */
+
+#ifndef REG_MAX_STATES
+# define REG_MAX_STATES 100000
+#endif
+
+/*
* subexpression tree
*/
diff --git a/tests/reg.test b/tests/reg.test
index 40b8766..63d4d3d 100644
--- a/tests/reg.test
+++ b/tests/reg.test
@@ -9,7 +9,7 @@
#
# Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
#
-# RCS: @(#) $Id: reg.test,v 1.23 2006/11/03 00:34:53 hobbs Exp $
+# RCS: @(#) $Id: reg.test,v 1.24 2007/12/18 10:53:16 dkf Exp $
if {[lsearch [namespace children] ::tcltest] == -1} {
package require tcltest 2
@@ -1057,6 +1057,15 @@ test reg-33.11 {Bug 840258} {
regsub {(^|[\n\r]+)\.*\?<.*?(\n|\r)+} \
"TQ\r\n.?<5000267>Test already stopped\r\n" {} tmp
} 1
+test reg-33.12 {Bug 1810264 - bad read} {
+ regexp {\3161573148} {\3161573148}
+} 0
+test reg-33.13 {Bug 1810264 - infinite loop} {
+ regexp {($|^)*} {x}
+} 1
+test reg-33.14 {Bug 1810264 - super-expensive expression} {
+ regexp {(x{200}){200}$y} {x}
+} 0
# cleanup
::tcltest::cleanupTests