diff options
author | vincentdarley <vincentdarley> | 2003-06-17 20:42:36 (GMT) |
---|---|---|
committer | vincentdarley <vincentdarley> | 2003-06-17 20:42:36 (GMT) |
commit | a267f6ee293c2fba8b771503ae41210a47bf3232 (patch) | |
tree | 2eff1f6d9708a1ba542c66565ef1b010c91e79bb | |
parent | eb3d0976d6642da1d58097dd0c63336427f68e3d (diff) | |
download | tcl-a267f6ee293c2fba8b771503ae41210a47bf3232.zip tcl-a267f6ee293c2fba8b771503ae41210a47bf3232.tar.gz tcl-a267f6ee293c2fba8b771503ae41210a47bf3232.tar.bz2 |
backport of regsub empty string fixes
-rw-r--r-- | ChangeLog | 6 | ||||
-rw-r--r-- | generic/tclCmdMZ.c | 22 | ||||
-rw-r--r-- | tests/regexp.test | 42 |
3 files changed, 63 insertions, 7 deletions
@@ -1,3 +1,9 @@ +2003-06-17 Vince Darley <vincentdarley@users.sourceforge.net> + + * generic/tclCmdMZ.c: + * tests/regexp.test: fixing of bugs related to regexp and regsub + matching of empty strings. Addition of a number of new tests. + 2003-06-10 Miguel Sofer <msofer@users.sf.net> * generic/tclBasic.c: diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index 5a1751e..2086335 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -14,7 +14,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclCmdMZ.c,v 1.82.2.4 2003/05/10 23:55:08 hobbs Exp $ + * RCS: @(#) $Id: tclCmdMZ.c,v 1.82.2.5 2003/06/17 20:42:37 vincentdarley Exp $ */ #include "tclInt.h" @@ -368,7 +368,10 @@ Tcl_RegexpObjCmd(dummy, interp, objc, objv) while (1) { match = Tcl_RegExpExecObj(interp, regExpr, objPtr, - offset /* offset */, numMatchesSaved, eflags); + offset /* offset */, numMatchesSaved, eflags + | ((offset > 0 && + (Tcl_GetUniChar(objPtr,offset-1) != (Tcl_UniChar)'\n')) + ? TCL_REG_NOTBOL : 0)); if (match < 0) { return TCL_ERROR; @@ -719,11 +722,14 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv) * The following loop is to handle multiple matches within the * same source string; each iteration handles one match and its * corresponding substitution. If "-all" hasn't been specified - * then the loop body only gets executed once. + * then the loop body only gets executed once. We must use + * 'offset <= wlen' in particular for the case where the regexp + * pattern can match the empty string - this is useful when + * doing, say, 'regsub -- ^ $str ...' when $str might be empty. */ numMatches = 0; - for ( ; offset < wlen; ) { + for ( ; offset <= wlen; ) { /* * The flags argument is set if string is part of a larger string, @@ -731,7 +737,9 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv) */ match = Tcl_RegExpExecObj(interp, regExpr, objPtr, offset, - 10 /* matches */, ((offset > 0) ? TCL_REG_NOTBOL : 0)); + 10 /* matches */, ((offset > 0 && + (Tcl_GetUniChar(objPtr,offset-1) != (Tcl_UniChar)'\n')) + ? TCL_REG_NOTBOL : 0)); if (match < 0) { result = TCL_ERROR; @@ -819,7 +827,9 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv) * in order to prevent infinite loops. */ - Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, 1); + if (offset < wlen) { + Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, 1); + } offset++; } else { offset += end; diff --git a/tests/regexp.test b/tests/regexp.test index 2bb017e..af27771 100644 --- a/tests/regexp.test +++ b/tests/regexp.test @@ -11,7 +11,7 @@ # See the file "license.terms" for information on usage and redistribution # of this file, and for a DISCLAIMER OF ALL WARRANTIES. # -# RCS: @(#) $Id: regexp.test,v 1.22 2002/07/10 11:56:44 dgp Exp $ +# RCS: @(#) $Id: regexp.test,v 1.22.2.1 2003/06/17 20:42:37 vincentdarley Exp $ if {[lsearch [namespace children] ::tcltest] == -1} { package require tcltest @@ -572,6 +572,46 @@ test regexp-20.2 {regsub shared object shimmering with -about} { eval regexp -about abc } {0 {}} +test regexp-21.1 {regsub works with empty string} { + regsub -- ^ {} foo +} {foo} + +test regexp-21.2 {regsub works with empty string} { + regsub -- \$ {} foo +} {foo} + +test regexp-21.3 {regsub works with empty string offset} { + regsub -start 0 -- ^ {} foo +} {foo} + +test regexp-21.4 {regsub works with empty string offset} { + regsub -start 0 -- \$ {} foo +} {foo} + +test regexp-21.5 {regsub works with empty string offset} { + regsub -start 3 -- \$ {123} foo +} {123foo} + +test regexp-21.6 {regexp works with empty string} { + regexp -- ^ {} +} {1} + +test regexp-21.7 {regexp works with empty string} { + regexp -start 0 -- ^ {} +} {1} + +test regexp-21.8 {regexp works with empty string offset} { + regexp -start 3 -- ^ {123} +} {0} + +test regexp-21.9 {regexp works with empty string offset} { + regexp -start 3 -- \$ {123} +} {1} + +test regexp-21.10 {multiple matches handle newlines} { + regsub -all -lineanchor -- {^#[^\n]*\n} "#one\n#two\n#three\n" foo\n +} "foo\nfoo\nfoo\n" + # cleanup ::tcltest::cleanupTests return |