diff options
-rw-r--r-- | ChangeLog | 9 | ||||
-rw-r--r-- | generic/tclCmdMZ.c | 22 | ||||
-rw-r--r-- | tests/string.test | 11 |
3 files changed, 34 insertions, 8 deletions
@@ -1,3 +1,12 @@ +2008-09-29 Donal K. Fellows <dkf@users.sf.net> + + TIP #318 IMPLEMENTATION + + * generic/tclCmdMZ.c (StringTrimCmd,StringTrimLCmd,StringTrimRCmd): + Update the default set of trimmed characters to include some from + the larger UNICODE space. Factor out the default trim set into a + macro so that it is easier to keep them in synch. + 2008-09-28 Donal K. Fellows <dkf@users.sf.net> TIP #314 IMPLEMENTATION diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index 227e7b8..dfeb59c 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -15,7 +15,7 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclCmdMZ.c,v 1.169 2008/07/31 20:01:40 msofer Exp $ + * RCS: @(#) $Id: tclCmdMZ.c,v 1.170 2008/09/29 08:20:34 dkf Exp $ */ #include "tclInt.h" @@ -23,6 +23,14 @@ static int UniCharIsAscii(int character); +/* + * Default set of characters to trim in [string trim] and friends. This is a + * UTF-8 literal string containing space, tab, newline, carriage return, + * ethiopic wordspace (U+1361), ogham space mark (U+1680), and ideographic + * space (U+3000). [TIP #318] + */ + +#define DEFAULT_TRIM_SET " \t\n\r\xe1\x8d\xa1\xe1\x9a\x80\xe3\x80\x80" /* *---------------------------------------------------------------------- @@ -3069,8 +3077,8 @@ StringTrimCmd( if (objc == 3) { string2 = TclGetStringFromObj(objv[2], &length2); } else if (objc == 2) { - string2 = " \t\n\r"; - length2 = strlen(string2); + string2 = DEFAULT_TRIM_SET; + length2 = strlen(DEFAULT_TRIM_SET); } else { Tcl_WrongNumArgs(interp, 1, objv, "string ?chars?"); return TCL_ERROR; @@ -3165,8 +3173,8 @@ StringTrimLCmd( if (objc == 3) { string2 = TclGetStringFromObj(objv[2], &length2); } else if (objc == 2) { - string2 = " \t\n\r"; - length2 = strlen(string2); + string2 = DEFAULT_TRIM_SET; + length2 = strlen(DEFAULT_TRIM_SET); } else { Tcl_WrongNumArgs(interp, 1, objv, "string ?chars?"); return TCL_ERROR; @@ -3237,8 +3245,8 @@ StringTrimRCmd( if (objc == 3) { string2 = TclGetStringFromObj(objv[2], &length2); } else if (objc == 2) { - string2 = " \t\n\r"; - length2 = strlen(string2); + string2 = DEFAULT_TRIM_SET; + length2 = strlen(DEFAULT_TRIM_SET); } else { Tcl_WrongNumArgs(interp, 1, objv, "string ?chars?"); return TCL_ERROR; diff --git a/tests/string.test b/tests/string.test index 64ec56f..c2ddfc8 100644 --- a/tests/string.test +++ b/tests/string.test @@ -12,7 +12,7 @@ # See the file "license.terms" for information on usage and redistribution # of this file, and for a DISCLAIMER OF ALL WARRANTIES. # -# RCS: @(#) $Id: string.test,v 1.73 2008/07/19 22:50:38 nijtmans Exp $ +# RCS: @(#) $Id: string.test,v 1.74 2008/09/29 08:20:38 dkf Exp $ if {[lsearch [namespace children] ::tcltest] == -1} { package require tcltest @@ -1407,6 +1407,9 @@ test string-18.10 {string trim} { test string-18.11 {string trim, unicode} { string trim "\xe7\xe8 AB\xe7C \xe8\xe7" \xe7\xe8 } " AB\xe7C " +test string-18.12 {string trim, unicode default} { + string trim ABC\u1361\u1680\u3000 +} ABC test string-19.1 {string trimleft} { list [catch {string trimleft} msg] $msg @@ -1414,6 +1417,9 @@ test string-19.1 {string trimleft} { test string-19.2 {string trimleft} { string trimleft " XYZ " } {XYZ } +test string-19.3 {string trimleft, unicode default} { + string trimleft \u1361\u1680\u3000ABC +} ABC test string-20.1 {string trimright errors} { list [catch {string trimright} msg] $msg @@ -1430,6 +1436,9 @@ test string-20.4 {string trimright} { test string-20.5 {string trimright} { string trimright "" } {} +test string-20.6 {string trimright, unicode default} { + string trimright ABC\u1361\u1680\u3000 +} ABC test string-21.1 {string wordend} { list [catch {string wordend a} msg] $msg |