From 75ed02d88822dad90ad256551f2df875aa884975 Mon Sep 17 00:00:00 2001 From: dgp Date: Tue, 20 Oct 2015 19:53:03 +0000 Subject: Adaptation of re-memaccounting.patch from Tom Lane @postgres. --- generic/regc_nfa.c | 84 +++++++++--------------------------------------------- generic/regcomp.c | 2 ++ generic/regerrs.h | 2 +- generic/regex.h | 2 +- generic/regguts.h | 16 ++++++----- tests/regexp.test | 2 +- 6 files changed, 28 insertions(+), 80 deletions(-) diff --git a/generic/regc_nfa.c b/generic/regc_nfa.c index bbc4dd6..5568bff 100644 --- a/generic/regc_nfa.c +++ b/generic/regc_nfa.c @@ -62,7 +62,6 @@ newnfa( nfa->nstates = 0; nfa->cm = cm; nfa->v = v; - nfa->size = 0; nfa->bos[0] = nfa->bos[1] = COLORLESS; nfa->eos[0] = nfa->eos[1] = COLORLESS; nfa->parent = parent; /* Precedes newfstate so parent is valid. */ @@ -90,61 +89,6 @@ newnfa( } /* - - TooManyStates - checks if the max states exceeds the compile-time value - ^ static int TooManyStates(struct nfa *); - */ -static int -TooManyStates( - struct nfa *nfa) -{ - struct nfa *parent = nfa->parent; - size_t sz = nfa->size; - - while (parent != NULL) { - sz = parent->size; - parent = parent->parent; - } - if (sz > REG_MAX_STATES) { - return 1; - } - return 0; -} - -/* - - IncrementSize - increases the tracked size of the NFA and its parents. - ^ static void IncrementSize(struct nfa *); - */ -static void -IncrementSize( - struct nfa *nfa) -{ - struct nfa *parent = nfa->parent; - - nfa->size++; - while (parent != NULL) { - parent->size++; - parent = parent->parent; - } -} - -/* - - DecrementSize - increases the tracked size of the NFA and its parents. - ^ static void DecrementSize(struct nfa *); - */ -static void -DecrementSize( - struct nfa *nfa) -{ - struct nfa *parent = nfa->parent; - - nfa->size--; - while (parent != NULL) { - parent->size--; - parent = parent->parent; - } -} - -/* - freenfa - free an entire NFA ^ static VOID freenfa(struct nfa *); */ @@ -180,20 +124,20 @@ newstate( { struct state *s; - if (TooManyStates(nfa)) { - /* XXX: add specific error for this */ - NERR(REG_ETOOBIG); - return NULL; - } if (nfa->free != NULL) { s = nfa->free; nfa->free = s->next; } else { + if (nfa->v->spaceused >= REG_MAX_COMPILE_SPACE) { + NERR(REG_ETOOBIG); + return NULL; + } s = (struct state *) MALLOC(sizeof(struct state)); if (s == NULL) { NERR(REG_ESPACE); return NULL; } + nfa->v->spaceused += sizeof(struct state); s->oas.next = NULL; s->free = NULL; s->noas = 0; @@ -217,12 +161,6 @@ newstate( } s->prev = nfa->slast; nfa->slast = s; - - /* - * Track the current size and the parent size. - */ - - IncrementSize(nfa); return s; } @@ -293,7 +231,6 @@ freestate( s->prev = NULL; s->next = nfa->free; /* don't delete it, put it on the free list */ nfa->free = s; - DecrementSize(nfa); } /* @@ -312,11 +249,13 @@ destroystate( for (ab=s->oas.next ; ab!=NULL ; ab=abnext) { abnext = ab->next; FREE(ab); + nfa->v->spaceused -= sizeof(struct arcbatch); } s->ins = NULL; s->outs = NULL; s->next = NULL; FREE(s); + nfa->v->spaceused -= sizeof(struct state); } /* @@ -439,14 +378,19 @@ allocarc( */ if (s->free == NULL) { - struct arcbatch *newAb = (struct arcbatch *) - MALLOC(sizeof(struct arcbatch)); + struct arcbatch *newAb; int i; + if (nfa->v->spaceused >= REG_MAX_COMPILE_SPACE) { + NERR(REG_ETOOBIG); + return NULL; + } + newAb = (struct arcbatch *) MALLOC(sizeof(struct arcbatch)); if (newAb == NULL) { NERR(REG_ESPACE); return NULL; } + nfa->v->spaceused += sizeof(struct arcbatch); newAb->next = s->oas.next; s->oas.next = newAb; diff --git a/generic/regcomp.c b/generic/regcomp.c index 6b40100..3c91962 100644 --- a/generic/regcomp.c +++ b/generic/regcomp.c @@ -228,6 +228,7 @@ struct vars { struct cvec *cv2; /* utility cvec */ struct subre *lacons; /* lookahead-constraint vector */ int nlacons; /* size of lacons */ + size_t spaceused; /* approx. space used for compilation */ }; /* parsing macros; most know that `v' is the struct vars pointer */ @@ -337,6 +338,7 @@ compile( v->cv2 = NULL; v->lacons = NULL; v->nlacons = 0; + v->spaceused = 0; re->re_magic = REMAGIC; re->re_info = 0; /* bits get set during parse */ re->re_csize = sizeof(chr); diff --git a/generic/regerrs.h b/generic/regerrs.h index 72548ff..ee203d5 100644 --- a/generic/regerrs.h +++ b/generic/regerrs.h @@ -16,5 +16,5 @@ { REG_INVARG, "REG_INVARG", "invalid argument to regex function" }, { REG_MIXED, "REG_MIXED", "character widths of regex and string differ" }, { REG_BADOPT, "REG_BADOPT", "invalid embedded option" }, -{ REG_ETOOBIG, "REG_ETOOBIG", "nfa has too many states" }, +{ REG_ETOOBIG, "REG_ETOOBIG", "regular expression is too complex" }, { REG_ECOLORS, "REG_ECOLORS", "too many colors" }, diff --git a/generic/regex.h b/generic/regex.h index b5dce50..b5b11bd 100644 --- a/generic/regex.h +++ b/generic/regex.h @@ -277,7 +277,7 @@ typedef struct { #define REG_INVARG 16 /* invalid argument to regex function */ #define REG_MIXED 17 /* character widths of regex and string differ */ #define REG_BADOPT 18 /* invalid embedded option */ -#define REG_ETOOBIG 19 /* nfa has too many states */ +#define REG_ETOOBIG 19 /* regular expression is too complex */ #define REG_ECOLORS 20 /* too many colors */ /* two specials for debugging and testing */ #define REG_ATOI 101 /* convert error-code name to number */ diff --git a/generic/regguts.h b/generic/regguts.h index 7ed8ec1..72bdcb3 100644 --- a/generic/regguts.h +++ b/generic/regguts.h @@ -307,9 +307,6 @@ struct nfa { struct colormap *cm; /* the color map */ color bos[2]; /* colors, if any, assigned to BOS and BOL */ color eos[2]; /* colors, if any, assigned to EOS and EOL */ - size_t size; /* Current NFA size; differs from nstates as - * it also counts the number of states created - * by children of this state. */ struct vars *v; /* simplifies compile error reporting */ struct nfa *parent; /* parent NFA, if any */ }; @@ -339,11 +336,16 @@ struct cnfa { #define NULLCNFA(cnfa) ((cnfa).nstates == 0) /* - * Used to limit the maximum NFA size to something sane. [Bug 1810264] + * This symbol limits the transient heap space used by the regex compiler, + * and thereby also the maximum complexity of NFAs that we'll deal with. + * Currently we only count NFA states and arcs against this; the other + * transient data is generally not large enough to notice compared to those. + * Note that we do not charge anything for the final output data structures + * (the compacted NFA and the colormap). */ - -#ifndef REG_MAX_STATES -# define REG_MAX_STATES 100000 +#ifndef REG_MAX_COMPILE_SPACE +#define REG_MAX_COMPILE_SPACE \ + (100000 * sizeof(struct state) + 100000 * sizeof(struct arcbatch)) #endif /* diff --git a/tests/regexp.test b/tests/regexp.test index 362f425..7878d41 100644 --- a/tests/regexp.test +++ b/tests/regexp.test @@ -716,7 +716,7 @@ test regexp-22.4 {Bug 3606139} -setup { [a 668]([a 55])[a 668]([a 55])[a 668]([a 55])[a 511]] {}] a } -cleanup { rename a {} -} -returnCodes 1 -result {couldn't compile regular expression pattern: nfa has too many states} +} -returnCodes 1 -match glob -result {couldn't compile regular expression pattern: *} test regexp-22.5 {Bug 3610026} -setup { set e {} set cp 99 -- cgit v0.12