diff options
author | dkf <donal.k.fellows@manchester.ac.uk> | 2007-12-18 10:53:14 (GMT) |
---|---|---|
committer | dkf <donal.k.fellows@manchester.ac.uk> | 2007-12-18 10:53:14 (GMT) |
commit | d8deec65a9511d476698293a485ee7c721a003c7 (patch) | |
tree | 73535333530cb8bb79ed8d41856c01d0f6dc4fd0 | |
parent | 296e1c32e596a9e2c73528c0aa67e7e76bb03778 (diff) | |
download | tcl-d8deec65a9511d476698293a485ee7c721a003c7.zip tcl-d8deec65a9511d476698293a485ee7c721a003c7.tar.gz tcl-d8deec65a9511d476698293a485ee7c721a003c7.tar.bz2 |
Fixes for problems created when processing regular expressions that
generate very large automata. An enormous number of thanks to Will
Drewry <wad@google.com>, Tavis Ormandy <taviso@google.com>, and Tom
Lane <tgl@sss.pgh.pa.us> from the Postgresql crowd for their help in
tracking these problems down. [Bug 1810264]
-rw-r--r-- | ChangeLog | 9 | ||||
-rw-r--r-- | generic/regc_color.c | 27 | ||||
-rw-r--r-- | generic/regc_nfa.c | 75 | ||||
-rw-r--r-- | generic/regerrs.h | 1 | ||||
-rw-r--r-- | generic/regex.h | 1 | ||||
-rw-r--r-- | generic/regguts.h | 12 | ||||
-rw-r--r-- | tests/reg.test | 11 |
7 files changed, 120 insertions, 16 deletions
@@ -1,3 +1,12 @@ +2007-12-18 Donal K. Fellows <donal.k.fellows@manchester.ac.uk> + + * generic/regguts.h, generic/regc_color.c, generic/regc_nfa.c: + Fixes for problems created when processing regular expressions that + generate very large automata. An enormous number of thanks to Will + Drewry <wad@google.com>, Tavis Ormandy <taviso@google.com>, and Tom + Lane <tgl@sss.pgh.pa.us> from the Postgresql crowd for their help in + tracking these problems down. [Bug 1810264] + 2007-12-17 Don Porter <dgp@users.sourceforge.net> *** 8.5.0 TAGGED FOR RELEASE *** diff --git a/generic/regc_color.c b/generic/regc_color.c index 003f5fc..ba1f668 100644 --- a/generic/regc_color.c +++ b/generic/regc_color.c @@ -611,12 +611,9 @@ okcolors( scd->sub = NOSUB; while ((a = cd->arcs) != NULL) { assert(a->co == co); - /* uncolorchain(cm, a); */ - cd->arcs = a->colorchain; + uncolorchain(cm, a); a->co = sco; - /* colorchain(cm, a); */ - a->colorchain = scd->arcs; - scd->arcs = a; + colorchain(cm, a); } freecolor(cm, co); } else { @@ -648,7 +645,11 @@ colorchain( { struct colordesc *cd = &cm->cd[a->co]; + if (cd->arcs != NULL) { + cd->arcs->colorchainRev = a; + } a->colorchain = cd->arcs; + a->colorchainRev = NULL; cd->arcs = a; } @@ -662,20 +663,20 @@ uncolorchain( struct arc *a) { struct colordesc *cd = &cm->cd[a->co]; - struct arc *aa; + struct arc *aa = a->colorchainRev; - aa = cd->arcs; - if (aa == a) { /* easy case */ + if (aa == NULL) { + assert(cd->arcs == a); cd->arcs = a->colorchain; } else { - assert(aa != NULL); - for (; aa->colorchain!=a ; aa=aa->colorchain) { - assert(aa->colorchain != NULL); - continue; - } + assert(aa->colorchain == a); aa->colorchain = a->colorchain; } + if (a->colorchain != NULL) { + a->colorchain->colorchainRev = aa; + } a->colorchain = NULL; /* paranoia */ + a->colorchainRev = NULL; } /* diff --git a/generic/regc_nfa.c b/generic/regc_nfa.c index 741887f..19dbe63 100644 --- a/generic/regc_nfa.c +++ b/generic/regc_nfa.c @@ -58,13 +58,14 @@ newnfa( nfa->nstates = 0; nfa->cm = cm; nfa->v = v; + nfa->size = 0; nfa->bos[0] = nfa->bos[1] = COLORLESS; nfa->eos[0] = nfa->eos[1] = COLORLESS; + nfa->parent = parent; /* Precedes newfstate so parent is valid. */ nfa->post = newfstate(nfa, '@'); /* number 0 */ nfa->pre = newfstate(nfa, '>'); /* number 1 */ - nfa->parent = parent; - nfa->init = newstate(nfa); /* may become invalid later */ + nfa->init = newstate(nfa); /* May become invalid later. */ nfa->final = newstate(nfa); if (ISERR()) { freenfa(nfa); @@ -85,6 +86,61 @@ newnfa( } /* + - TooManyStates - checks if the max states exceeds the compile-time value + ^ static int TooManyStates(struct nfa *); + */ +static int +TooManyStates( + struct nfa *nfa) +{ + struct nfa *parent = nfa->parent; + size_t sz = nfa->size; + + while (parent != NULL) { + sz = parent->size; + parent = parent->parent; + } + if (sz > REG_MAX_STATES) { + return 1; + } + return 0; +} + +/* + - IncrementSize - increases the tracked size of the NFA and its parents. + ^ static void IncrementSize(struct nfa *); + */ +static void +IncrementSize( + struct nfa *nfa) +{ + struct nfa *parent = nfa->parent; + + nfa->size++; + while (parent != NULL) { + parent->size++; + parent = parent->parent; + } +} + +/* + - DecrementSize - increases the tracked size of the NFA and its parents. + ^ static void DecrementSize(struct nfa *); + */ +static void +DecrementSize( + struct nfa *nfa) +{ + struct nfa *parent = nfa->parent; + + nfa->size--; + while (parent != NULL) { + parent->size--; + parent = parent->parent; + } +} + +/* - freenfa - free an entire NFA ^ static VOID freenfa(struct nfa *); */ @@ -120,6 +176,11 @@ newstate( { struct state *s; + if (TooManyStates(nfa)) { + /* XXX: add specific error for this */ + NERR(REG_ETOOBIG); + return NULL; + } if (nfa->free != NULL) { s = nfa->free; nfa->free = s->next; @@ -152,6 +213,12 @@ newstate( } s->prev = nfa->slast; nfa->slast = s; + + /* + * Track the current size and the parent size. + */ + + IncrementSize(nfa); return s; } @@ -222,6 +289,7 @@ freestate( s->prev = NULL; s->next = nfa->free; /* don't delete it, put it on the free list */ nfa->free = s; + DecrementSize(nfa); } /* @@ -688,6 +756,9 @@ duptraverse( for (a=s->outs ; a!=NULL && !NISERR() ; a=a->outchain) { duptraverse(nfa, a->to, NULL); + if (NISERR()) { + break; + } assert(a->to->tmp != NULL); cparc(nfa, a, s->tmp, a->to->tmp); } diff --git a/generic/regerrs.h b/generic/regerrs.h index a3d98b6..259c0cb 100644 --- a/generic/regerrs.h +++ b/generic/regerrs.h @@ -16,3 +16,4 @@ { REG_INVARG, "REG_INVARG", "invalid argument to regex function" }, { REG_MIXED, "REG_MIXED", "character widths of regex and string differ" }, { REG_BADOPT, "REG_BADOPT", "invalid embedded option" }, +{ REG_ETOOBIG, "REG_ETOOBIG", "nfa has too many states" }, diff --git a/generic/regex.h b/generic/regex.h index b8498ab..fa86092 100644 --- a/generic/regex.h +++ b/generic/regex.h @@ -277,6 +277,7 @@ typedef struct { #define REG_INVARG 16 /* invalid argument to regex function */ #define REG_MIXED 17 /* character widths of regex and string differ */ #define REG_BADOPT 18 /* invalid embedded option */ +#define REG_ETOOBIG 19 /* nfa has too many states */ /* two specials for debugging and testing */ #define REG_ATOI 101 /* convert error-code name to number */ #define REG_ITOA 102 /* convert error-code number to name */ diff --git a/generic/regguts.h b/generic/regguts.h index cbf6615..67e3d03 100644 --- a/generic/regguts.h +++ b/generic/regguts.h @@ -267,6 +267,7 @@ struct arc { #define freechain outchain struct arc *inchain; /* *to's ins chain */ struct arc *colorchain; /* color's arc chain */ + struct arc *colorchainRev; /* back-link in color's arc chain */ }; struct arcbatch { /* for bulk allocation of arcs */ @@ -303,6 +304,9 @@ struct nfa { struct colormap *cm; /* the color map */ color bos[2]; /* colors, if any, assigned to BOS and BOL */ color eos[2]; /* colors, if any, assigned to EOS and EOL */ + size_t size; /* Current NFA size; differs from nstates as + * it also counts the number of states created + * by children of this state. */ struct vars *v; /* simplifies compile error reporting */ struct nfa *parent; /* parent NFA, if any */ }; @@ -332,6 +336,14 @@ struct cnfa { #define NULLCNFA(cnfa) ((cnfa).nstates == 0) /* + * Used to limit the maximum NFA size to something sane. [Bug 1810264] + */ + +#ifndef REG_MAX_STATES +# define REG_MAX_STATES 100000 +#endif + +/* * subexpression tree */ diff --git a/tests/reg.test b/tests/reg.test index 40b8766..63d4d3d 100644 --- a/tests/reg.test +++ b/tests/reg.test @@ -9,7 +9,7 @@ # # Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. # -# RCS: @(#) $Id: reg.test,v 1.23 2006/11/03 00:34:53 hobbs Exp $ +# RCS: @(#) $Id: reg.test,v 1.24 2007/12/18 10:53:16 dkf Exp $ if {[lsearch [namespace children] ::tcltest] == -1} { package require tcltest 2 @@ -1057,6 +1057,15 @@ test reg-33.11 {Bug 840258} { regsub {(^|[\n\r]+)\.*\?<.*?(\n|\r)+} \ "TQ\r\n.?<5000267>Test already stopped\r\n" {} tmp } 1 +test reg-33.12 {Bug 1810264 - bad read} { + regexp {\3161573148} {\3161573148} +} 0 +test reg-33.13 {Bug 1810264 - infinite loop} { + regexp {($|^)*} {x} +} 1 +test reg-33.14 {Bug 1810264 - super-expensive expression} { + regexp {(x{200}){200}$y} {x} +} 0 # cleanup ::tcltest::cleanupTests |