diff options
author | dkf <donal.k.fellows@manchester.ac.uk> | 2007-12-18 11:23:11 (GMT) |
---|---|---|
committer | dkf <donal.k.fellows@manchester.ac.uk> | 2007-12-18 11:23:11 (GMT) |
commit | fdc0ebcebdabf8d44d0fb97275444ef980028f66 (patch) | |
tree | 136091002ef147caa5d9a01bf9cc8ea9be25baf5 /generic | |
parent | a5dd9045b97811ec0ab14334094c710878cd6083 (diff) | |
download | tcl-fdc0ebcebdabf8d44d0fb97275444ef980028f66.zip tcl-fdc0ebcebdabf8d44d0fb97275444ef980028f66.tar.gz tcl-fdc0ebcebdabf8d44d0fb97275444ef980028f66.tar.bz2 |
Fixes for problems created when processing regular expressions that
generate very large automata. An enormous number of thanks to Will
Drewry <wad@google.com>, Tavis Ormandy <taviso@google.com>, and Tom
Lane <tgl@sss.pgh.pa.us> from the Postgresql crowd for their help in
tracking these problems down. [Bug 1810264]
Diffstat (limited to 'generic')
-rw-r--r-- | generic/regc_color.c | 27 | ||||
-rw-r--r-- | generic/regc_nfa.c | 64 | ||||
-rw-r--r-- | generic/regerrs.h | 1 | ||||
-rw-r--r-- | generic/regex.h | 1 | ||||
-rw-r--r-- | generic/regguts.h | 8 |
5 files changed, 87 insertions, 14 deletions
diff --git a/generic/regc_color.c b/generic/regc_color.c index 5aed21c..f6716be 100644 --- a/generic/regc_color.c +++ b/generic/regc_color.c @@ -552,12 +552,9 @@ struct colormap *cm; scd->sub = NOSUB; while ((a = cd->arcs) != NULL) { assert(a->co == co); - /* uncolorchain(cm, a); */ - cd->arcs = a->colorchain; + uncolorchain(cm, a); a->co = sco; - /* colorchain(cm, a); */ - a->colorchain = scd->arcs; - scd->arcs = a; + colorchain(cm, a); } freecolor(cm, co); } else { @@ -586,7 +583,10 @@ struct arc *a; { struct colordesc *cd = &cm->cd[a->co]; + if (cd->arcs) + cd->arcs->colorchain_rev = a; a->colorchain = cd->arcs; + a->colorchain_rev = NULL; cd->arcs = a; } @@ -600,18 +600,19 @@ struct colormap *cm; struct arc *a; { struct colordesc *cd = &cm->cd[a->co]; - struct arc *aa; + struct arc *aa = a->colorchain_rev; - aa = cd->arcs; - if (aa == a) /* easy case */ + if (aa == NULL) { + assert(cd->arcs == a); cd->arcs = a->colorchain; - else { - for (; aa != NULL && aa->colorchain != a; aa = aa->colorchain) - continue; - assert(aa != NULL); + } else { + assert(aa->colorchain == a); aa->colorchain = a->colorchain; } - a->colorchain = NULL; /* paranoia */ + if (a->colorchain) + a->colorchain->colorchain_rev = aa; + a->colorchain = NULL; /* paranoia */ + a->colorchain_rev = NULL; } /* diff --git a/generic/regc_nfa.c b/generic/regc_nfa.c index dd26e64..48f56a9 100644 --- a/generic/regc_nfa.c +++ b/generic/regc_nfa.c @@ -61,11 +61,12 @@ struct nfa *parent; /* NULL if primary NFA */ nfa->nstates = 0; nfa->cm = cm; nfa->v = v; + nfa->size = 0; nfa->bos[0] = nfa->bos[1] = COLORLESS; nfa->eos[0] = nfa->eos[1] = COLORLESS; + nfa->parent = parent; nfa->post = newfstate(nfa, '@'); /* number 0 */ nfa->pre = newfstate(nfa, '>'); /* number 1 */ - nfa->parent = parent; nfa->init = newstate(nfa); /* may become invalid later */ nfa->final = newstate(nfa); @@ -88,6 +89,57 @@ struct nfa *parent; /* NULL if primary NFA */ } /* + - too_many_states - checks if the max states exceeds the compile-time value + ^ static int too_many_states(struct nfa *); + */ +static int +too_many_states(nfa) +struct nfa *nfa; +{ + struct nfa *parent = nfa->parent; + size_t sz = nfa->size; + while (parent != NULL) { + sz = parent->size; + parent = parent->parent; + } + if (sz > REG_MAX_STATES) + return 1; + return 0; +} + +/* + - increment_size - increases the tracked size of the NFA and its parents. + ^ static void increment_size(struct nfa *); + */ +static void +increment_size(nfa) +struct nfa *nfa; +{ + struct nfa *parent = nfa->parent; + nfa->size++; + while (parent != NULL) { + parent->size++; + parent = parent->parent; + } +} + +/* + - decrement_size - increases the tracked size of the NFA and its parents. + ^ static void decrement_size(struct nfa *); + */ +static void +decrement_size(nfa) +struct nfa *nfa; +{ + struct nfa *parent = nfa->parent; + nfa->size--; + while (parent != NULL) { + parent->size--; + parent = parent->parent; + } +} + +/* - freenfa - free an entire NFA ^ static VOID freenfa(struct nfa *); */ @@ -123,6 +175,11 @@ struct nfa *nfa; { struct state *s; + if (too_many_states(nfa)) { + /* XXX: add specific error for this */ + NERR(REG_ETOOBIG); + return NULL; + } if (nfa->free != NULL) { s = nfa->free; nfa->free = s->next; @@ -154,6 +211,8 @@ struct nfa *nfa; } s->prev = nfa->slast; nfa->slast = s; + /* Track the current size and the parent size */ + increment_size(nfa); return s; } @@ -221,6 +280,7 @@ struct state *s; s->prev = NULL; s->next = nfa->free; /* don't delete it, put it on the free list */ nfa->free = s; + decrement_size(nfa); } /* @@ -651,6 +711,8 @@ struct state *stmp; /* s's duplicate, or NULL */ for (a = s->outs; a != NULL && !NISERR(); a = a->outchain) { duptraverse(nfa, a->to, (struct state *)NULL); + if (NISERR()) + break; assert(a->to->tmp != NULL); cparc(nfa, a, s->tmp, a->to->tmp); } diff --git a/generic/regerrs.h b/generic/regerrs.h index a3d98b6..259c0cb 100644 --- a/generic/regerrs.h +++ b/generic/regerrs.h @@ -16,3 +16,4 @@ { REG_INVARG, "REG_INVARG", "invalid argument to regex function" }, { REG_MIXED, "REG_MIXED", "character widths of regex and string differ" }, { REG_BADOPT, "REG_BADOPT", "invalid embedded option" }, +{ REG_ETOOBIG, "REG_ETOOBIG", "nfa has too many states" }, diff --git a/generic/regex.h b/generic/regex.h index 8289a50..a35925a 100644 --- a/generic/regex.h +++ b/generic/regex.h @@ -292,6 +292,7 @@ typedef struct { #define REG_INVARG 16 /* invalid argument to regex function */ #define REG_MIXED 17 /* character widths of regex and string differ */ #define REG_BADOPT 18 /* invalid embedded option */ +#define REG_ETOOBIG 19 /* nfa has too many states */ /* two specials for debugging and testing */ #define REG_ATOI 101 /* convert error-code name to number */ #define REG_ITOA 102 /* convert error-code number to name */ diff --git a/generic/regguts.h b/generic/regguts.h index b6152f4..c77a8fc 100644 --- a/generic/regguts.h +++ b/generic/regguts.h @@ -290,6 +290,7 @@ struct arc { # define freechain outchain struct arc *inchain; /* *to's ins chain */ struct arc *colorchain; /* color's arc chain */ + struct arc *colorchain_rev; /* back-link in color's arc chain */ }; struct arcbatch { /* for bulk allocation of arcs */ @@ -326,6 +327,8 @@ struct nfa { struct colormap *cm; /* the color map */ color bos[2]; /* colors, if any, assigned to BOS and BOL */ color eos[2]; /* colors, if any, assigned to EOS and EOL */ + size_t size; /* current NFA size; differs from nstates as + * it will be incremented by its children */ struct vars *v; /* simplifies compile error reporting */ struct nfa *parent; /* parent NFA, if any */ }; @@ -356,6 +359,11 @@ struct cnfa { #define NULLCNFA(cnfa) ((cnfa).nstates == 0) +/* Used to limit the maximum NFA size */ +#ifndef REG_MAX_STATES +#define REG_MAX_STATES 100000 +#endif + /* * subexpression tree |