From cdb09f0ded89acc8b7a6b269f50fc8cd15da9739 Mon Sep 17 00:00:00 2001 From: dgp Date: Thu, 15 Nov 2007 22:01:03 +0000 Subject: * generic/regc_nfa.c: Fixed infinite loop in the regexp compiler * generic/regcomp.c: [Bug 1810038]. Corrected looping logic in * tests/regexp.test: fixempties() to avoid wasting time walking a list of dead states [Bug 1832612]. Convert optst() from expensive no-op to a cheap no-op. Improve newline usage in debug output. --- ChangeLog | 8 ++++++++ generic/regc_nfa.c | 46 +++++++++++++++++++++++++++++++++++++++++++++- generic/regcomp.c | 18 +++++++++--------- tests/regexp.test | 8 +++++++- 4 files changed, 69 insertions(+), 11 deletions(-) diff --git a/ChangeLog b/ChangeLog index 354f9f5..fbe8165 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +2007-11-15 Don Porter + + * generic/regc_nfa.c: Fixed infinite loop in the regexp compiler + * generic/regcomp.c: [Bug 1810038]. Corrected looping logic in + * tests/regexp.test: fixempties() to avoid wasting time walking a + list of dead states [Bug 1832612]. Convert optst() from expensive + no-op to a cheap no-op. Improve newline usage in debug output. + 2007-11-13 Donal K. Fellows * unix/tclUnixCompat.c (TclpGetHostByName): The six-argument form of diff --git a/generic/regc_nfa.c b/generic/regc_nfa.c index 9881cd4..dd26e64 100644 --- a/generic/regc_nfa.c +++ b/generic/regc_nfa.c @@ -803,6 +803,26 @@ struct arc *con; return 1; } + /* + * DGP 2007-11-15: Cloning a state with a circular constraint on its + * list of outs can lead to trouble [Bug 1810038], so get rid of them + * first. + */ + + for (a = from->outs; a != NULL; a = nexta) { + nexta = a->outchain; + switch (a->type) { + case '^': + case '$': + case BEHIND: + case AHEAD: + if (from == a->to) { + freearc(nfa, a); + } + break; + } + } + /* first, clone from state if necessary to avoid other outarcs */ if (from->nouts > 1) { s = newstate(nfa); @@ -921,6 +941,29 @@ struct arc *con; return 1; } + /* + * DGP 2007-11-15: Here we duplicate the same protections as appear + * in pull() above to avoid troubles with cloning a state with a + * circular constraint on its list of ins. It is not clear whether + * this is necessary, or is protecting against a "can't happen". + * Any test case that actually leads to a freearc() call here would + * be a welcome addition to the test suite. + */ + + for (a = to->ins; a != NULL; a = nexta) { + nexta = a->inchain; + switch (a->type) { + case '^': + case '$': + case BEHIND: + case AHEAD: + if (a->from == to) { + freearc(nfa, a); + } + break; + } + } + /* first, clone to state if necessary to avoid other inarcs */ if (to->nins > 1) { s = newstate(nfa); @@ -1041,7 +1084,8 @@ FILE *f; /* for debug output; NULL none */ /* find and eliminate empties until there are no more */ do { progress = 0; - for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { + for (s = nfa->states; s != NULL && !NISERR() + && s->no != FREESTATE; s = nexts) { nexts = s->next; for (a = s->outs; a != NULL && !NISERR(); a = nexta) { nexta = a->outchain; diff --git a/generic/regcomp.c b/generic/regcomp.c index 29be00f..31b8a82 100644 --- a/generic/regcomp.c +++ b/generic/regcomp.c @@ -1837,14 +1837,14 @@ optst(v, t) struct vars *v; struct subre *t; { - if (t == NULL) - return; - - /* recurse through children */ - if (t->left != NULL) - optst(v, t->left); - if (t->right != NULL) - optst(v, t->right); + /* + * DGP (2007-11-13): I assume it was the programmer's intent to eventually + * come back and add code to optimize subRE trees, but the routine coded + * just spent effort traversing the tree and doing nothing. We can do + * nothing with less effort. + */ + + return; } /* @@ -2144,8 +2144,8 @@ int nfapresent; /* is the original NFA still around? */ if (!NULLCNFA(t->cnfa)) { fprintf(f, "\n"); dumpcnfa(&t->cnfa, f); - fprintf(f, "\n"); } + fprintf(f, "\n"); if (t->left != NULL) stdump(t->left, f, nfapresent); if (t->right != NULL) diff --git a/tests/regexp.test b/tests/regexp.test index 1403e9d..c4c1c80 100644 --- a/tests/regexp.test +++ b/tests/regexp.test @@ -11,7 +11,7 @@ # See the file "license.terms" for information on usage and redistribution # of this file, and for a DISCLAIMER OF ALL WARRANTIES. # -# RCS: @(#) $Id: regexp.test,v 1.22.2.3 2003/10/14 18:22:10 vincentdarley Exp $ +# RCS: @(#) $Id: regexp.test,v 1.22.2.4 2007/11/15 22:01:10 dgp Exp $ if {[lsearch [namespace children] ::tcltest] == -1} { package require tcltest 2 @@ -628,6 +628,12 @@ test regexp-21.13 {multiple matches handle newlines} { regexp -all -inline -indices -line -- ^ "a\nb\nc" } {{0 -1} {2 1} {4 3}} + +test regexp-22.1 {Bug 1810038} { + regexp ($|^X)* {} +} 1 + + # cleanup ::tcltest::cleanupTests return -- cgit v0.12