diff options
-rw-r--r-- | generic/tclCmdMZ.c | 29 | ||||
-rw-r--r-- | tests/split.test | 7 | ||||
-rw-r--r-- | tests/string.test | 12 |
3 files changed, 29 insertions, 19 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index 8f5ad7a..bc7874a 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -1175,7 +1175,7 @@ Tcl_SplitObjCmd( int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ { - Tcl_UniChar ch = 0; + int ch = 0; int len; const char *splitChars; const char *stringPtr; @@ -1218,10 +1218,8 @@ Tcl_SplitObjCmd( Tcl_InitHashTable(&charReuseTable, TCL_ONE_WORD_KEYS); for ( ; stringPtr < end; stringPtr += len) { - int ucs4; - - len = TclUtfToUCS4(stringPtr, &ucs4); - hPtr = Tcl_CreateHashEntry(&charReuseTable, INT2PTR(ucs4), &isNew); + len = TclUtfToUCS4(stringPtr, &ch); + hPtr = Tcl_CreateHashEntry(&charReuseTable, INT2PTR(ch), &isNew); if (isNew) { TclNewStringObj(objPtr, stringPtr, len); @@ -1256,7 +1254,7 @@ Tcl_SplitObjCmd( } else { const char *element, *p, *splitEnd; size_t splitLen; - Tcl_UniChar splitChar = 0; + int splitChar; /* * Normal case: split on any of a given set of characters. Discard @@ -1266,9 +1264,9 @@ Tcl_SplitObjCmd( splitEnd = splitChars + splitCharLen; for (element = stringPtr; stringPtr < end; stringPtr += len) { - len = TclUtfToUniChar(stringPtr, &ch); + len = TclUtfToUCS4(stringPtr, &ch); for (p = splitChars; p < splitEnd; p += splitLen) { - splitLen = TclUtfToUniChar(p, &splitChar); + splitLen = TclUtfToUCS4(p, &splitChar); if (ch == splitChar) { TclNewStringObj(objPtr, element, stringPtr - element); Tcl_ListObjAppendElement(NULL, listPtr, objPtr); @@ -2487,9 +2485,7 @@ StringRevCmd( * StringStartCmd -- * * This procedure is invoked to process the "string wordstart" Tcl - * command. See the user documentation for details on what it does. Note - * that this command only functions correctly on properly formed Tcl UTF - * strings. + * command. See the user documentation for details on what it does. * * Results: * A standard Tcl result. @@ -2507,7 +2503,7 @@ StringStartCmd( int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ { - Tcl_UniChar ch = 0; + int ch; const char *p, *string; size_t numChars, length, cur, index; @@ -2529,7 +2525,7 @@ StringStartCmd( if (index + 1 > 1) { p = Tcl_UtfAtIndex(string, index); for (cur = index; cur != TCL_INDEX_NONE; cur--) { - TclUtfToUniChar(p, &ch); + TclUtfToUCS4(p, &ch); if (!Tcl_UniCharIsWordChar(ch)) { break; } @@ -2549,8 +2545,7 @@ StringStartCmd( * StringEndCmd -- * * This procedure is invoked to process the "string wordend" Tcl command. - * See the user documentation for details on what it does. Note that this - * command only functions correctly on properly formed Tcl UTF strings. + * See the user documentation for details on what it does. * * Results: * A standard Tcl result. @@ -2568,7 +2563,7 @@ StringEndCmd( int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ { - Tcl_UniChar ch = 0; + int ch; const char *p, *end, *string; size_t length, numChars, cur, index; @@ -2590,7 +2585,7 @@ StringEndCmd( p = Tcl_UtfAtIndex(string, index); end = string+length; for (cur = index; p < end; cur++) { - p += TclUtfToUniChar(p, &ch); + p += TclUtfToUCS4(p, &ch); if (!Tcl_UniCharIsWordChar(ch)) { break; } diff --git a/tests/split.test b/tests/split.test index 2d180e0..d00c452 100644 --- a/tests/split.test +++ b/tests/split.test @@ -71,8 +71,11 @@ test split-1.14 {basic split commands} { split ",12,,,34,56," {,} } {{} 12 {} {} 34 56 {}} test split-1.15 {basic split commands} -body { - split "a\U01f4a9b" {} -} -result "a \U01f4a9 b" + split "a\U1F4A9b" {} +} -result "a \U1F4A9 b" +test split-1.16 {basic split commands} -body { + split "a\U1F4A9b" \U1F4A9 +} -result "a b" test split-2.1 {split errors} { list [catch split msg] $msg $errorCode diff --git a/tests/string.test b/tests/string.test index 3e8e6ae..b8f01a5 100644 --- a/tests/string.test +++ b/tests/string.test @@ -1856,6 +1856,12 @@ test string-21.13.$noComp {string wordend, unicode} -body { test string-21.14.$noComp {string wordend, unicode} -body { run {string wordend "\uC700\uC700 abc" 8} } -result 6 +test string-21.15.$noComp {string wordend, unicode} -body { + run {string wordend "\U1D7CA\U1D7CA abc" 0} +} -result 2 +test string-21.16.$noComp {string wordend, unicode} -constraints fullutf -body { + run {string wordend "\U1D7CA\U1D7CA abc" 10} +} -result 6 test string-22.1.$noComp {string wordstart} -body { list [catch {run {string word a}} msg] $msg @@ -1896,6 +1902,12 @@ test string-22.12.$noComp {string wordstart, unicode} -body { test string-22.13.$noComp {string wordstart, unicode} -body { run {string wordstart "\uC700\uC700 abc" 8} } -result 3 +test string-22.14.$noComp {string wordstart, unicode} -body { + run {string wordstart "\U1D7CA\U1D7CA abc" 0} +} -result 0 +test string-22.15.$noComp {string wordstart, unicode} -constraints fullutf -body { + run {string wordstart "\U1D7CA\U1D7CA abc" 10} +} -result 3 test string-23.0.$noComp {string is boolean, Bug 1187123} testindexobj { set x 5 |