summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--generic/tclCmdMZ.c29
-rw-r--r--tests/split.test7
-rw-r--r--tests/string.test12
3 files changed, 29 insertions, 19 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index 8f5ad7a..bc7874a 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -1175,7 +1175,7 @@ Tcl_SplitObjCmd(
int objc, /* Number of arguments. */
Tcl_Obj *const objv[]) /* Argument objects. */
{
- Tcl_UniChar ch = 0;
+ int ch = 0;
int len;
const char *splitChars;
const char *stringPtr;
@@ -1218,10 +1218,8 @@ Tcl_SplitObjCmd(
Tcl_InitHashTable(&charReuseTable, TCL_ONE_WORD_KEYS);
for ( ; stringPtr < end; stringPtr += len) {
- int ucs4;
-
- len = TclUtfToUCS4(stringPtr, &ucs4);
- hPtr = Tcl_CreateHashEntry(&charReuseTable, INT2PTR(ucs4), &isNew);
+ len = TclUtfToUCS4(stringPtr, &ch);
+ hPtr = Tcl_CreateHashEntry(&charReuseTable, INT2PTR(ch), &isNew);
if (isNew) {
TclNewStringObj(objPtr, stringPtr, len);
@@ -1256,7 +1254,7 @@ Tcl_SplitObjCmd(
} else {
const char *element, *p, *splitEnd;
size_t splitLen;
- Tcl_UniChar splitChar = 0;
+ int splitChar;
/*
* Normal case: split on any of a given set of characters. Discard
@@ -1266,9 +1264,9 @@ Tcl_SplitObjCmd(
splitEnd = splitChars + splitCharLen;
for (element = stringPtr; stringPtr < end; stringPtr += len) {
- len = TclUtfToUniChar(stringPtr, &ch);
+ len = TclUtfToUCS4(stringPtr, &ch);
for (p = splitChars; p < splitEnd; p += splitLen) {
- splitLen = TclUtfToUniChar(p, &splitChar);
+ splitLen = TclUtfToUCS4(p, &splitChar);
if (ch == splitChar) {
TclNewStringObj(objPtr, element, stringPtr - element);
Tcl_ListObjAppendElement(NULL, listPtr, objPtr);
@@ -2487,9 +2485,7 @@ StringRevCmd(
* StringStartCmd --
*
* This procedure is invoked to process the "string wordstart" Tcl
- * command. See the user documentation for details on what it does. Note
- * that this command only functions correctly on properly formed Tcl UTF
- * strings.
+ * command. See the user documentation for details on what it does.
*
* Results:
* A standard Tcl result.
@@ -2507,7 +2503,7 @@ StringStartCmd(
int objc, /* Number of arguments. */
Tcl_Obj *const objv[]) /* Argument objects. */
{
- Tcl_UniChar ch = 0;
+ int ch;
const char *p, *string;
size_t numChars, length, cur, index;
@@ -2529,7 +2525,7 @@ StringStartCmd(
if (index + 1 > 1) {
p = Tcl_UtfAtIndex(string, index);
for (cur = index; cur != TCL_INDEX_NONE; cur--) {
- TclUtfToUniChar(p, &ch);
+ TclUtfToUCS4(p, &ch);
if (!Tcl_UniCharIsWordChar(ch)) {
break;
}
@@ -2549,8 +2545,7 @@ StringStartCmd(
* StringEndCmd --
*
* This procedure is invoked to process the "string wordend" Tcl command.
- * See the user documentation for details on what it does. Note that this
- * command only functions correctly on properly formed Tcl UTF strings.
+ * See the user documentation for details on what it does.
*
* Results:
* A standard Tcl result.
@@ -2568,7 +2563,7 @@ StringEndCmd(
int objc, /* Number of arguments. */
Tcl_Obj *const objv[]) /* Argument objects. */
{
- Tcl_UniChar ch = 0;
+ int ch;
const char *p, *end, *string;
size_t length, numChars, cur, index;
@@ -2590,7 +2585,7 @@ StringEndCmd(
p = Tcl_UtfAtIndex(string, index);
end = string+length;
for (cur = index; p < end; cur++) {
- p += TclUtfToUniChar(p, &ch);
+ p += TclUtfToUCS4(p, &ch);
if (!Tcl_UniCharIsWordChar(ch)) {
break;
}
diff --git a/tests/split.test b/tests/split.test
index 2d180e0..d00c452 100644
--- a/tests/split.test
+++ b/tests/split.test
@@ -71,8 +71,11 @@ test split-1.14 {basic split commands} {
split ",12,,,34,56," {,}
} {{} 12 {} {} 34 56 {}}
test split-1.15 {basic split commands} -body {
- split "a\U01f4a9b" {}
-} -result "a \U01f4a9 b"
+ split "a\U1F4A9b" {}
+} -result "a \U1F4A9 b"
+test split-1.16 {basic split commands} -body {
+ split "a\U1F4A9b" \U1F4A9
+} -result "a b"
test split-2.1 {split errors} {
list [catch split msg] $msg $errorCode
diff --git a/tests/string.test b/tests/string.test
index 3e8e6ae..b8f01a5 100644
--- a/tests/string.test
+++ b/tests/string.test
@@ -1856,6 +1856,12 @@ test string-21.13.$noComp {string wordend, unicode} -body {
test string-21.14.$noComp {string wordend, unicode} -body {
run {string wordend "\uC700\uC700 abc" 8}
} -result 6
+test string-21.15.$noComp {string wordend, unicode} -body {
+ run {string wordend "\U1D7CA\U1D7CA abc" 0}
+} -result 2
+test string-21.16.$noComp {string wordend, unicode} -constraints fullutf -body {
+ run {string wordend "\U1D7CA\U1D7CA abc" 10}
+} -result 6
test string-22.1.$noComp {string wordstart} -body {
list [catch {run {string word a}} msg] $msg
@@ -1896,6 +1902,12 @@ test string-22.12.$noComp {string wordstart, unicode} -body {
test string-22.13.$noComp {string wordstart, unicode} -body {
run {string wordstart "\uC700\uC700 abc" 8}
} -result 3
+test string-22.14.$noComp {string wordstart, unicode} -body {
+ run {string wordstart "\U1D7CA\U1D7CA abc" 0}
+} -result 0
+test string-22.15.$noComp {string wordstart, unicode} -constraints fullutf -body {
+ run {string wordstart "\U1D7CA\U1D7CA abc" 10}
+} -result 3
test string-23.0.$noComp {string is boolean, Bug 1187123} testindexobj {
set x 5