diff options
author | dgp <dgp@users.sourceforge.net> | 2007-11-15 22:01:03 (GMT) |
---|---|---|
committer | dgp <dgp@users.sourceforge.net> | 2007-11-15 22:01:03 (GMT) |
commit | cdb09f0ded89acc8b7a6b269f50fc8cd15da9739 (patch) | |
tree | d9b1b39a8a0517ecc9bbd810273e0cb13e7ca980 | |
parent | a4ad9e9083662f62f8f3df8f9901a114be5cbf24 (diff) | |
download | tcl-cdb09f0ded89acc8b7a6b269f50fc8cd15da9739.zip tcl-cdb09f0ded89acc8b7a6b269f50fc8cd15da9739.tar.gz tcl-cdb09f0ded89acc8b7a6b269f50fc8cd15da9739.tar.bz2 |
* generic/regc_nfa.c: Fixed infinite loop in the regexp compiler
* generic/regcomp.c: [Bug 1810038]. Corrected looping logic in
* tests/regexp.test: fixempties() to avoid wasting time walking a
list of dead states [Bug 1832612]. Convert optst() from expensive
no-op to a cheap no-op. Improve newline usage in debug output.
-rw-r--r-- | ChangeLog | 8 | ||||
-rw-r--r-- | generic/regc_nfa.c | 46 | ||||
-rw-r--r-- | generic/regcomp.c | 18 | ||||
-rw-r--r-- | tests/regexp.test | 8 |
4 files changed, 69 insertions, 11 deletions
@@ -1,3 +1,11 @@ +2007-11-15 Don Porter <dgp@users.sourceforge.net> + + * generic/regc_nfa.c: Fixed infinite loop in the regexp compiler + * generic/regcomp.c: [Bug 1810038]. Corrected looping logic in + * tests/regexp.test: fixempties() to avoid wasting time walking a + list of dead states [Bug 1832612]. Convert optst() from expensive + no-op to a cheap no-op. Improve newline usage in debug output. + 2007-11-13 Donal K. Fellows <donal.k.fellows@man.ac.uk> * unix/tclUnixCompat.c (TclpGetHostByName): The six-argument form of diff --git a/generic/regc_nfa.c b/generic/regc_nfa.c index 9881cd4..dd26e64 100644 --- a/generic/regc_nfa.c +++ b/generic/regc_nfa.c @@ -803,6 +803,26 @@ struct arc *con; return 1; } + /* + * DGP 2007-11-15: Cloning a state with a circular constraint on its + * list of outs can lead to trouble [Bug 1810038], so get rid of them + * first. + */ + + for (a = from->outs; a != NULL; a = nexta) { + nexta = a->outchain; + switch (a->type) { + case '^': + case '$': + case BEHIND: + case AHEAD: + if (from == a->to) { + freearc(nfa, a); + } + break; + } + } + /* first, clone from state if necessary to avoid other outarcs */ if (from->nouts > 1) { s = newstate(nfa); @@ -921,6 +941,29 @@ struct arc *con; return 1; } + /* + * DGP 2007-11-15: Here we duplicate the same protections as appear + * in pull() above to avoid troubles with cloning a state with a + * circular constraint on its list of ins. It is not clear whether + * this is necessary, or is protecting against a "can't happen". + * Any test case that actually leads to a freearc() call here would + * be a welcome addition to the test suite. + */ + + for (a = to->ins; a != NULL; a = nexta) { + nexta = a->inchain; + switch (a->type) { + case '^': + case '$': + case BEHIND: + case AHEAD: + if (a->from == to) { + freearc(nfa, a); + } + break; + } + } + /* first, clone to state if necessary to avoid other inarcs */ if (to->nins > 1) { s = newstate(nfa); @@ -1041,7 +1084,8 @@ FILE *f; /* for debug output; NULL none */ /* find and eliminate empties until there are no more */ do { progress = 0; - for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { + for (s = nfa->states; s != NULL && !NISERR() + && s->no != FREESTATE; s = nexts) { nexts = s->next; for (a = s->outs; a != NULL && !NISERR(); a = nexta) { nexta = a->outchain; diff --git a/generic/regcomp.c b/generic/regcomp.c index 29be00f..31b8a82 100644 --- a/generic/regcomp.c +++ b/generic/regcomp.c @@ -1837,14 +1837,14 @@ optst(v, t) struct vars *v; struct subre *t; { - if (t == NULL) - return; - - /* recurse through children */ - if (t->left != NULL) - optst(v, t->left); - if (t->right != NULL) - optst(v, t->right); + /* + * DGP (2007-11-13): I assume it was the programmer's intent to eventually + * come back and add code to optimize subRE trees, but the routine coded + * just spent effort traversing the tree and doing nothing. We can do + * nothing with less effort. + */ + + return; } /* @@ -2144,8 +2144,8 @@ int nfapresent; /* is the original NFA still around? */ if (!NULLCNFA(t->cnfa)) { fprintf(f, "\n"); dumpcnfa(&t->cnfa, f); - fprintf(f, "\n"); } + fprintf(f, "\n"); if (t->left != NULL) stdump(t->left, f, nfapresent); if (t->right != NULL) diff --git a/tests/regexp.test b/tests/regexp.test index 1403e9d..c4c1c80 100644 --- a/tests/regexp.test +++ b/tests/regexp.test @@ -11,7 +11,7 @@ # See the file "license.terms" for information on usage and redistribution # of this file, and for a DISCLAIMER OF ALL WARRANTIES. # -# RCS: @(#) $Id: regexp.test,v 1.22.2.3 2003/10/14 18:22:10 vincentdarley Exp $ +# RCS: @(#) $Id: regexp.test,v 1.22.2.4 2007/11/15 22:01:10 dgp Exp $ if {[lsearch [namespace children] ::tcltest] == -1} { package require tcltest 2 @@ -628,6 +628,12 @@ test regexp-21.13 {multiple matches handle newlines} { regexp -all -inline -indices -line -- ^ "a\nb\nc" } {{0 -1} {2 1} {4 3}} + +test regexp-22.1 {Bug 1810038} { + regexp ($|^X)* {} +} 1 + + # cleanup ::tcltest::cleanupTests return |