summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorvincentdarley <vincentdarley>2003-06-17 20:42:36 (GMT)
committervincentdarley <vincentdarley>2003-06-17 20:42:36 (GMT)
commita267f6ee293c2fba8b771503ae41210a47bf3232 (patch)
tree2eff1f6d9708a1ba542c66565ef1b010c91e79bb
parenteb3d0976d6642da1d58097dd0c63336427f68e3d (diff)
downloadtcl-a267f6ee293c2fba8b771503ae41210a47bf3232.zip
tcl-a267f6ee293c2fba8b771503ae41210a47bf3232.tar.gz
tcl-a267f6ee293c2fba8b771503ae41210a47bf3232.tar.bz2
backport of regsub empty string fixes
-rw-r--r--ChangeLog6
-rw-r--r--generic/tclCmdMZ.c22
-rw-r--r--tests/regexp.test42
3 files changed, 63 insertions, 7 deletions
diff --git a/ChangeLog b/ChangeLog
index ad86d24..e858742 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2003-06-17 Vince Darley <vincentdarley@users.sourceforge.net>
+
+ * generic/tclCmdMZ.c:
+ * tests/regexp.test: fixing of bugs related to regexp and regsub
+ matching of empty strings. Addition of a number of new tests.
+
2003-06-10 Miguel Sofer <msofer@users.sf.net>
* generic/tclBasic.c:
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index 5a1751e..2086335 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -14,7 +14,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclCmdMZ.c,v 1.82.2.4 2003/05/10 23:55:08 hobbs Exp $
+ * RCS: @(#) $Id: tclCmdMZ.c,v 1.82.2.5 2003/06/17 20:42:37 vincentdarley Exp $
*/
#include "tclInt.h"
@@ -368,7 +368,10 @@ Tcl_RegexpObjCmd(dummy, interp, objc, objv)
while (1) {
match = Tcl_RegExpExecObj(interp, regExpr, objPtr,
- offset /* offset */, numMatchesSaved, eflags);
+ offset /* offset */, numMatchesSaved, eflags
+ | ((offset > 0 &&
+ (Tcl_GetUniChar(objPtr,offset-1) != (Tcl_UniChar)'\n'))
+ ? TCL_REG_NOTBOL : 0));
if (match < 0) {
return TCL_ERROR;
@@ -719,11 +722,14 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
* The following loop is to handle multiple matches within the
* same source string; each iteration handles one match and its
* corresponding substitution. If "-all" hasn't been specified
- * then the loop body only gets executed once.
+ * then the loop body only gets executed once. We must use
+ * 'offset <= wlen' in particular for the case where the regexp
+ * pattern can match the empty string - this is useful when
+ * doing, say, 'regsub -- ^ $str ...' when $str might be empty.
*/
numMatches = 0;
- for ( ; offset < wlen; ) {
+ for ( ; offset <= wlen; ) {
/*
* The flags argument is set if string is part of a larger string,
@@ -731,7 +737,9 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
*/
match = Tcl_RegExpExecObj(interp, regExpr, objPtr, offset,
- 10 /* matches */, ((offset > 0) ? TCL_REG_NOTBOL : 0));
+ 10 /* matches */, ((offset > 0 &&
+ (Tcl_GetUniChar(objPtr,offset-1) != (Tcl_UniChar)'\n'))
+ ? TCL_REG_NOTBOL : 0));
if (match < 0) {
result = TCL_ERROR;
@@ -819,7 +827,9 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
* in order to prevent infinite loops.
*/
- Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, 1);
+ if (offset < wlen) {
+ Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, 1);
+ }
offset++;
} else {
offset += end;
diff --git a/tests/regexp.test b/tests/regexp.test
index 2bb017e..af27771 100644
--- a/tests/regexp.test
+++ b/tests/regexp.test
@@ -11,7 +11,7 @@
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
-# RCS: @(#) $Id: regexp.test,v 1.22 2002/07/10 11:56:44 dgp Exp $
+# RCS: @(#) $Id: regexp.test,v 1.22.2.1 2003/06/17 20:42:37 vincentdarley Exp $
if {[lsearch [namespace children] ::tcltest] == -1} {
package require tcltest
@@ -572,6 +572,46 @@ test regexp-20.2 {regsub shared object shimmering with -about} {
eval regexp -about abc
} {0 {}}
+test regexp-21.1 {regsub works with empty string} {
+ regsub -- ^ {} foo
+} {foo}
+
+test regexp-21.2 {regsub works with empty string} {
+ regsub -- \$ {} foo
+} {foo}
+
+test regexp-21.3 {regsub works with empty string offset} {
+ regsub -start 0 -- ^ {} foo
+} {foo}
+
+test regexp-21.4 {regsub works with empty string offset} {
+ regsub -start 0 -- \$ {} foo
+} {foo}
+
+test regexp-21.5 {regsub works with empty string offset} {
+ regsub -start 3 -- \$ {123} foo
+} {123foo}
+
+test regexp-21.6 {regexp works with empty string} {
+ regexp -- ^ {}
+} {1}
+
+test regexp-21.7 {regexp works with empty string} {
+ regexp -start 0 -- ^ {}
+} {1}
+
+test regexp-21.8 {regexp works with empty string offset} {
+ regexp -start 3 -- ^ {123}
+} {0}
+
+test regexp-21.9 {regexp works with empty string offset} {
+ regexp -start 3 -- \$ {123}
+} {1}
+
+test regexp-21.10 {multiple matches handle newlines} {
+ regsub -all -lineanchor -- {^#[^\n]*\n} "#one\n#two\n#three\n" foo\n
+} "foo\nfoo\nfoo\n"
+
# cleanup
::tcltest::cleanupTests
return