summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordgp <dgp@users.sourceforge.net>2015-10-20 19:53:03 (GMT)
committerdgp <dgp@users.sourceforge.net>2015-10-20 19:53:03 (GMT)
commit75ed02d88822dad90ad256551f2df875aa884975 (patch)
treeab8cd7941695f1a4b7ec8f58c5e5defc75cbcc84
parent5ec3b6491d71b85b55f8927e78f033e75c92170f (diff)
downloadtcl-75ed02d88822dad90ad256551f2df875aa884975.zip
tcl-75ed02d88822dad90ad256551f2df875aa884975.tar.gz
tcl-75ed02d88822dad90ad256551f2df875aa884975.tar.bz2
Adaptation of re-memaccounting.patch from Tom Lane @postgres.
-rw-r--r--generic/regc_nfa.c84
-rw-r--r--generic/regcomp.c2
-rw-r--r--generic/regerrs.h2
-rw-r--r--generic/regex.h2
-rw-r--r--generic/regguts.h16
-rw-r--r--tests/regexp.test2
6 files changed, 28 insertions, 80 deletions
diff --git a/generic/regc_nfa.c b/generic/regc_nfa.c
index bbc4dd6..5568bff 100644
--- a/generic/regc_nfa.c
+++ b/generic/regc_nfa.c
@@ -62,7 +62,6 @@ newnfa(
nfa->nstates = 0;
nfa->cm = cm;
nfa->v = v;
- nfa->size = 0;
nfa->bos[0] = nfa->bos[1] = COLORLESS;
nfa->eos[0] = nfa->eos[1] = COLORLESS;
nfa->parent = parent; /* Precedes newfstate so parent is valid. */
@@ -90,61 +89,6 @@ newnfa(
}
/*
- - TooManyStates - checks if the max states exceeds the compile-time value
- ^ static int TooManyStates(struct nfa *);
- */
-static int
-TooManyStates(
- struct nfa *nfa)
-{
- struct nfa *parent = nfa->parent;
- size_t sz = nfa->size;
-
- while (parent != NULL) {
- sz = parent->size;
- parent = parent->parent;
- }
- if (sz > REG_MAX_STATES) {
- return 1;
- }
- return 0;
-}
-
-/*
- - IncrementSize - increases the tracked size of the NFA and its parents.
- ^ static void IncrementSize(struct nfa *);
- */
-static void
-IncrementSize(
- struct nfa *nfa)
-{
- struct nfa *parent = nfa->parent;
-
- nfa->size++;
- while (parent != NULL) {
- parent->size++;
- parent = parent->parent;
- }
-}
-
-/*
- - DecrementSize - increases the tracked size of the NFA and its parents.
- ^ static void DecrementSize(struct nfa *);
- */
-static void
-DecrementSize(
- struct nfa *nfa)
-{
- struct nfa *parent = nfa->parent;
-
- nfa->size--;
- while (parent != NULL) {
- parent->size--;
- parent = parent->parent;
- }
-}
-
-/*
- freenfa - free an entire NFA
^ static VOID freenfa(struct nfa *);
*/
@@ -180,20 +124,20 @@ newstate(
{
struct state *s;
- if (TooManyStates(nfa)) {
- /* XXX: add specific error for this */
- NERR(REG_ETOOBIG);
- return NULL;
- }
if (nfa->free != NULL) {
s = nfa->free;
nfa->free = s->next;
} else {
+ if (nfa->v->spaceused >= REG_MAX_COMPILE_SPACE) {
+ NERR(REG_ETOOBIG);
+ return NULL;
+ }
s = (struct state *) MALLOC(sizeof(struct state));
if (s == NULL) {
NERR(REG_ESPACE);
return NULL;
}
+ nfa->v->spaceused += sizeof(struct state);
s->oas.next = NULL;
s->free = NULL;
s->noas = 0;
@@ -217,12 +161,6 @@ newstate(
}
s->prev = nfa->slast;
nfa->slast = s;
-
- /*
- * Track the current size and the parent size.
- */
-
- IncrementSize(nfa);
return s;
}
@@ -293,7 +231,6 @@ freestate(
s->prev = NULL;
s->next = nfa->free; /* don't delete it, put it on the free list */
nfa->free = s;
- DecrementSize(nfa);
}
/*
@@ -312,11 +249,13 @@ destroystate(
for (ab=s->oas.next ; ab!=NULL ; ab=abnext) {
abnext = ab->next;
FREE(ab);
+ nfa->v->spaceused -= sizeof(struct arcbatch);
}
s->ins = NULL;
s->outs = NULL;
s->next = NULL;
FREE(s);
+ nfa->v->spaceused -= sizeof(struct state);
}
/*
@@ -439,14 +378,19 @@ allocarc(
*/
if (s->free == NULL) {
- struct arcbatch *newAb = (struct arcbatch *)
- MALLOC(sizeof(struct arcbatch));
+ struct arcbatch *newAb;
int i;
+ if (nfa->v->spaceused >= REG_MAX_COMPILE_SPACE) {
+ NERR(REG_ETOOBIG);
+ return NULL;
+ }
+ newAb = (struct arcbatch *) MALLOC(sizeof(struct arcbatch));
if (newAb == NULL) {
NERR(REG_ESPACE);
return NULL;
}
+ nfa->v->spaceused += sizeof(struct arcbatch);
newAb->next = s->oas.next;
s->oas.next = newAb;
diff --git a/generic/regcomp.c b/generic/regcomp.c
index 6b40100..3c91962 100644
--- a/generic/regcomp.c
+++ b/generic/regcomp.c
@@ -228,6 +228,7 @@ struct vars {
struct cvec *cv2; /* utility cvec */
struct subre *lacons; /* lookahead-constraint vector */
int nlacons; /* size of lacons */
+ size_t spaceused; /* approx. space used for compilation */
};
/* parsing macros; most know that `v' is the struct vars pointer */
@@ -337,6 +338,7 @@ compile(
v->cv2 = NULL;
v->lacons = NULL;
v->nlacons = 0;
+ v->spaceused = 0;
re->re_magic = REMAGIC;
re->re_info = 0; /* bits get set during parse */
re->re_csize = sizeof(chr);
diff --git a/generic/regerrs.h b/generic/regerrs.h
index 72548ff..ee203d5 100644
--- a/generic/regerrs.h
+++ b/generic/regerrs.h
@@ -16,5 +16,5 @@
{ REG_INVARG, "REG_INVARG", "invalid argument to regex function" },
{ REG_MIXED, "REG_MIXED", "character widths of regex and string differ" },
{ REG_BADOPT, "REG_BADOPT", "invalid embedded option" },
-{ REG_ETOOBIG, "REG_ETOOBIG", "nfa has too many states" },
+{ REG_ETOOBIG, "REG_ETOOBIG", "regular expression is too complex" },
{ REG_ECOLORS, "REG_ECOLORS", "too many colors" },
diff --git a/generic/regex.h b/generic/regex.h
index b5dce50..b5b11bd 100644
--- a/generic/regex.h
+++ b/generic/regex.h
@@ -277,7 +277,7 @@ typedef struct {
#define REG_INVARG 16 /* invalid argument to regex function */
#define REG_MIXED 17 /* character widths of regex and string differ */
#define REG_BADOPT 18 /* invalid embedded option */
-#define REG_ETOOBIG 19 /* nfa has too many states */
+#define REG_ETOOBIG 19 /* regular expression is too complex */
#define REG_ECOLORS 20 /* too many colors */
/* two specials for debugging and testing */
#define REG_ATOI 101 /* convert error-code name to number */
diff --git a/generic/regguts.h b/generic/regguts.h
index 7ed8ec1..72bdcb3 100644
--- a/generic/regguts.h
+++ b/generic/regguts.h
@@ -307,9 +307,6 @@ struct nfa {
struct colormap *cm; /* the color map */
color bos[2]; /* colors, if any, assigned to BOS and BOL */
color eos[2]; /* colors, if any, assigned to EOS and EOL */
- size_t size; /* Current NFA size; differs from nstates as
- * it also counts the number of states created
- * by children of this state. */
struct vars *v; /* simplifies compile error reporting */
struct nfa *parent; /* parent NFA, if any */
};
@@ -339,11 +336,16 @@ struct cnfa {
#define NULLCNFA(cnfa) ((cnfa).nstates == 0)
/*
- * Used to limit the maximum NFA size to something sane. [Bug 1810264]
+ * This symbol limits the transient heap space used by the regex compiler,
+ * and thereby also the maximum complexity of NFAs that we'll deal with.
+ * Currently we only count NFA states and arcs against this; the other
+ * transient data is generally not large enough to notice compared to those.
+ * Note that we do not charge anything for the final output data structures
+ * (the compacted NFA and the colormap).
*/
-
-#ifndef REG_MAX_STATES
-# define REG_MAX_STATES 100000
+#ifndef REG_MAX_COMPILE_SPACE
+#define REG_MAX_COMPILE_SPACE \
+ (100000 * sizeof(struct state) + 100000 * sizeof(struct arcbatch))
#endif
/*
diff --git a/tests/regexp.test b/tests/regexp.test
index 362f425..7878d41 100644
--- a/tests/regexp.test
+++ b/tests/regexp.test
@@ -716,7 +716,7 @@ test regexp-22.4 {Bug 3606139} -setup {
[a 668]([a 55])[a 668]([a 55])[a 668]([a 55])[a 511]] {}] a
} -cleanup {
rename a {}
-} -returnCodes 1 -result {couldn't compile regular expression pattern: nfa has too many states}
+} -returnCodes 1 -match glob -result {couldn't compile regular expression pattern: *}
test regexp-22.5 {Bug 3610026} -setup {
set e {}
set cp 99