summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog8
-rw-r--r--generic/regc_nfa.c46
-rw-r--r--generic/regcomp.c18
-rw-r--r--tests/regexp.test8
4 files changed, 69 insertions, 11 deletions
diff --git a/ChangeLog b/ChangeLog
index 354f9f5..fbe8165 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2007-11-15 Don Porter <dgp@users.sourceforge.net>
+
+ * generic/regc_nfa.c: Fixed infinite loop in the regexp compiler
+ * generic/regcomp.c: [Bug 1810038]. Corrected looping logic in
+ * tests/regexp.test: fixempties() to avoid wasting time walking a
+ list of dead states [Bug 1832612]. Convert optst() from expensive
+ no-op to a cheap no-op. Improve newline usage in debug output.
+
2007-11-13 Donal K. Fellows <donal.k.fellows@man.ac.uk>
* unix/tclUnixCompat.c (TclpGetHostByName): The six-argument form of
diff --git a/generic/regc_nfa.c b/generic/regc_nfa.c
index 9881cd4..dd26e64 100644
--- a/generic/regc_nfa.c
+++ b/generic/regc_nfa.c
@@ -803,6 +803,26 @@ struct arc *con;
return 1;
}
+ /*
+ * DGP 2007-11-15: Cloning a state with a circular constraint on its
+ * list of outs can lead to trouble [Bug 1810038], so get rid of them
+ * first.
+ */
+
+ for (a = from->outs; a != NULL; a = nexta) {
+ nexta = a->outchain;
+ switch (a->type) {
+ case '^':
+ case '$':
+ case BEHIND:
+ case AHEAD:
+ if (from == a->to) {
+ freearc(nfa, a);
+ }
+ break;
+ }
+ }
+
/* first, clone from state if necessary to avoid other outarcs */
if (from->nouts > 1) {
s = newstate(nfa);
@@ -921,6 +941,29 @@ struct arc *con;
return 1;
}
+ /*
+ * DGP 2007-11-15: Here we duplicate the same protections as appear
+ * in pull() above to avoid troubles with cloning a state with a
+ * circular constraint on its list of ins. It is not clear whether
+ * this is necessary, or is protecting against a "can't happen".
+ * Any test case that actually leads to a freearc() call here would
+ * be a welcome addition to the test suite.
+ */
+
+ for (a = to->ins; a != NULL; a = nexta) {
+ nexta = a->inchain;
+ switch (a->type) {
+ case '^':
+ case '$':
+ case BEHIND:
+ case AHEAD:
+ if (a->from == to) {
+ freearc(nfa, a);
+ }
+ break;
+ }
+ }
+
/* first, clone to state if necessary to avoid other inarcs */
if (to->nins > 1) {
s = newstate(nfa);
@@ -1041,7 +1084,8 @@ FILE *f; /* for debug output; NULL none */
/* find and eliminate empties until there are no more */
do {
progress = 0;
- for (s = nfa->states; s != NULL && !NISERR(); s = nexts) {
+ for (s = nfa->states; s != NULL && !NISERR()
+ && s->no != FREESTATE; s = nexts) {
nexts = s->next;
for (a = s->outs; a != NULL && !NISERR(); a = nexta) {
nexta = a->outchain;
diff --git a/generic/regcomp.c b/generic/regcomp.c
index 29be00f..31b8a82 100644
--- a/generic/regcomp.c
+++ b/generic/regcomp.c
@@ -1837,14 +1837,14 @@ optst(v, t)
struct vars *v;
struct subre *t;
{
- if (t == NULL)
- return;
-
- /* recurse through children */
- if (t->left != NULL)
- optst(v, t->left);
- if (t->right != NULL)
- optst(v, t->right);
+ /*
+ * DGP (2007-11-13): I assume it was the programmer's intent to eventually
+ * come back and add code to optimize subRE trees, but the routine coded
+ * just spent effort traversing the tree and doing nothing. We can do
+ * nothing with less effort.
+ */
+
+ return;
}
/*
@@ -2144,8 +2144,8 @@ int nfapresent; /* is the original NFA still around? */
if (!NULLCNFA(t->cnfa)) {
fprintf(f, "\n");
dumpcnfa(&t->cnfa, f);
- fprintf(f, "\n");
}
+ fprintf(f, "\n");
if (t->left != NULL)
stdump(t->left, f, nfapresent);
if (t->right != NULL)
diff --git a/tests/regexp.test b/tests/regexp.test
index 1403e9d..c4c1c80 100644
--- a/tests/regexp.test
+++ b/tests/regexp.test
@@ -11,7 +11,7 @@
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
-# RCS: @(#) $Id: regexp.test,v 1.22.2.3 2003/10/14 18:22:10 vincentdarley Exp $
+# RCS: @(#) $Id: regexp.test,v 1.22.2.4 2007/11/15 22:01:10 dgp Exp $
if {[lsearch [namespace children] ::tcltest] == -1} {
package require tcltest 2
@@ -628,6 +628,12 @@ test regexp-21.13 {multiple matches handle newlines} {
regexp -all -inline -indices -line -- ^ "a\nb\nc"
} {{0 -1} {2 1} {4 3}}
+
+test regexp-22.1 {Bug 1810038} {
+ regexp ($|^X)* {}
+} 1
+
+
# cleanup
::tcltest::cleanupTests
return