summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog9
-rw-r--r--generic/tclCmdMZ.c22
-rw-r--r--tests/string.test11
3 files changed, 34 insertions, 8 deletions
diff --git a/ChangeLog b/ChangeLog
index f6b346e..e3c731c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,12 @@
+2008-09-29 Donal K. Fellows <dkf@users.sf.net>
+
+ TIP #318 IMPLEMENTATION
+
+ * generic/tclCmdMZ.c (StringTrimCmd,StringTrimLCmd,StringTrimRCmd):
+ Update the default set of trimmed characters to include some from
+ the larger UNICODE space. Factor out the default trim set into a
+ macro so that it is easier to keep them in synch.
+
2008-09-28 Donal K. Fellows <dkf@users.sf.net>
TIP #314 IMPLEMENTATION
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index 227e7b8..dfeb59c 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -15,7 +15,7 @@
* See the file "license.terms" for information on usage and redistribution of
* this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclCmdMZ.c,v 1.169 2008/07/31 20:01:40 msofer Exp $
+ * RCS: @(#) $Id: tclCmdMZ.c,v 1.170 2008/09/29 08:20:34 dkf Exp $
*/
#include "tclInt.h"
@@ -23,6 +23,14 @@
static int UniCharIsAscii(int character);
+/*
+ * Default set of characters to trim in [string trim] and friends. This is a
+ * UTF-8 literal string containing space, tab, newline, carriage return,
+ * ethiopic wordspace (U+1361), ogham space mark (U+1680), and ideographic
+ * space (U+3000). [TIP #318]
+ */
+
+#define DEFAULT_TRIM_SET " \t\n\r\xe1\x8d\xa1\xe1\x9a\x80\xe3\x80\x80"
/*
*----------------------------------------------------------------------
@@ -3069,8 +3077,8 @@ StringTrimCmd(
if (objc == 3) {
string2 = TclGetStringFromObj(objv[2], &length2);
} else if (objc == 2) {
- string2 = " \t\n\r";
- length2 = strlen(string2);
+ string2 = DEFAULT_TRIM_SET;
+ length2 = strlen(DEFAULT_TRIM_SET);
} else {
Tcl_WrongNumArgs(interp, 1, objv, "string ?chars?");
return TCL_ERROR;
@@ -3165,8 +3173,8 @@ StringTrimLCmd(
if (objc == 3) {
string2 = TclGetStringFromObj(objv[2], &length2);
} else if (objc == 2) {
- string2 = " \t\n\r";
- length2 = strlen(string2);
+ string2 = DEFAULT_TRIM_SET;
+ length2 = strlen(DEFAULT_TRIM_SET);
} else {
Tcl_WrongNumArgs(interp, 1, objv, "string ?chars?");
return TCL_ERROR;
@@ -3237,8 +3245,8 @@ StringTrimRCmd(
if (objc == 3) {
string2 = TclGetStringFromObj(objv[2], &length2);
} else if (objc == 2) {
- string2 = " \t\n\r";
- length2 = strlen(string2);
+ string2 = DEFAULT_TRIM_SET;
+ length2 = strlen(DEFAULT_TRIM_SET);
} else {
Tcl_WrongNumArgs(interp, 1, objv, "string ?chars?");
return TCL_ERROR;
diff --git a/tests/string.test b/tests/string.test
index 64ec56f..c2ddfc8 100644
--- a/tests/string.test
+++ b/tests/string.test
@@ -12,7 +12,7 @@
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
-# RCS: @(#) $Id: string.test,v 1.73 2008/07/19 22:50:38 nijtmans Exp $
+# RCS: @(#) $Id: string.test,v 1.74 2008/09/29 08:20:38 dkf Exp $
if {[lsearch [namespace children] ::tcltest] == -1} {
package require tcltest
@@ -1407,6 +1407,9 @@ test string-18.10 {string trim} {
test string-18.11 {string trim, unicode} {
string trim "\xe7\xe8 AB\xe7C \xe8\xe7" \xe7\xe8
} " AB\xe7C "
+test string-18.12 {string trim, unicode default} {
+ string trim ABC\u1361\u1680\u3000
+} ABC
test string-19.1 {string trimleft} {
list [catch {string trimleft} msg] $msg
@@ -1414,6 +1417,9 @@ test string-19.1 {string trimleft} {
test string-19.2 {string trimleft} {
string trimleft " XYZ "
} {XYZ }
+test string-19.3 {string trimleft, unicode default} {
+ string trimleft \u1361\u1680\u3000ABC
+} ABC
test string-20.1 {string trimright errors} {
list [catch {string trimright} msg] $msg
@@ -1430,6 +1436,9 @@ test string-20.4 {string trimright} {
test string-20.5 {string trimright} {
string trimright ""
} {}
+test string-20.6 {string trimright, unicode default} {
+ string trimright ABC\u1361\u1680\u3000
+} ABC
test string-21.1 {string wordend} {
list [catch {string wordend a} msg] $msg