3 files changed, 29 insertions, 19 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index 8f5ad7a..bc7874a 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -1175,7 +1175,7 @@ Tcl_SplitObjCmd(
     int objc,			/* Number of arguments. */
     Tcl_Obj *const objv[])	/* Argument objects. */
 {
-    Tcl_UniChar ch = 0;
+    int ch = 0;
     int len;
     const char *splitChars;
     const char *stringPtr;
@@ -1218,10 +1218,8 @@ Tcl_SplitObjCmd(
 	Tcl_InitHashTable(&charReuseTable, TCL_ONE_WORD_KEYS);
 
 	for ( ; stringPtr < end; stringPtr += len) {
-	    int ucs4;
-
-	    len = TclUtfToUCS4(stringPtr, &ucs4);
-	    hPtr = Tcl_CreateHashEntry(&charReuseTable, INT2PTR(ucs4), &isNew);
+	    len = TclUtfToUCS4(stringPtr, &ch);
+	    hPtr = Tcl_CreateHashEntry(&charReuseTable, INT2PTR(ch), &isNew);
 	    if (isNew) {
 		TclNewStringObj(objPtr, stringPtr, len);
 
@@ -1256,7 +1254,7 @@ Tcl_SplitObjCmd(
     } else {
 	const char *element, *p, *splitEnd;
 	size_t splitLen;
-	Tcl_UniChar splitChar = 0;
+	int splitChar;
 
 	/*
 	 * Normal case: split on any of a given set of characters. Discard
@@ -1266,9 +1264,9 @@ Tcl_SplitObjCmd(
 	splitEnd = splitChars + splitCharLen;
 
 	for (element = stringPtr; stringPtr < end; stringPtr += len) {
-	    len = TclUtfToUniChar(stringPtr, &ch);
+	    len = TclUtfToUCS4(stringPtr, &ch);
 	    for (p = splitChars; p < splitEnd; p += splitLen) {
-		splitLen = TclUtfToUniChar(p, &splitChar);
+		splitLen = TclUtfToUCS4(p, &splitChar);
 		if (ch == splitChar) {
 		    TclNewStringObj(objPtr, element, stringPtr - element);
 		    Tcl_ListObjAppendElement(NULL, listPtr, objPtr);
@@ -2487,9 +2485,7 @@ StringRevCmd(
  * StringStartCmd --
  *
  *	This procedure is invoked to process the "string wordstart" Tcl
- *	command. See the user documentation for details on what it does. Note
- *	that this command only functions correctly on properly formed Tcl UTF
- *	strings.
+ *	command. See the user documentation for details on what it does.
  *
  * Results:
  *	A standard Tcl result.
@@ -2507,7 +2503,7 @@ StringStartCmd(
     int objc,			/* Number of arguments. */
     Tcl_Obj *const objv[])	/* Argument objects. */
 {
-    Tcl_UniChar ch = 0;
+    int ch;
     const char *p, *string;
     size_t numChars, length, cur, index;
 
@@ -2529,7 +2525,7 @@ StringStartCmd(
     if (index + 1 > 1) {
 	p = Tcl_UtfAtIndex(string, index);
 	for (cur = index; cur != TCL_INDEX_NONE; cur--) {
-	    TclUtfToUniChar(p, &ch);
+	    TclUtfToUCS4(p, &ch);
 	    if (!Tcl_UniCharIsWordChar(ch)) {
 		break;
 	    }
@@ -2549,8 +2545,7 @@ StringStartCmd(
  * StringEndCmd --
  *
  *	This procedure is invoked to process the "string wordend" Tcl command.
- *	See the user documentation for details on what it does. Note that this
- *	command only functions correctly on properly formed Tcl UTF strings.
+ *	See the user documentation for details on what it does.
  *
  * Results:
  *	A standard Tcl result.
@@ -2568,7 +2563,7 @@ StringEndCmd(
     int objc,			/* Number of arguments. */
     Tcl_Obj *const objv[])	/* Argument objects. */
 {
-    Tcl_UniChar ch = 0;
+    int ch;
     const char *p, *end, *string;
     size_t length, numChars, cur, index;
 
@@ -2590,7 +2585,7 @@ StringEndCmd(
 	p = Tcl_UtfAtIndex(string, index);
 	end = string+length;
 	for (cur = index; p < end; cur++) {
-	    p += TclUtfToUniChar(p, &ch);
+	    p += TclUtfToUCS4(p, &ch);
 	    if (!Tcl_UniCharIsWordChar(ch)) {
 		break;
 	    }
diff --git a/tests/split.test b/tests/split.test
index 2d180e0..d00c452 100644
--- a/tests/split.test
+++ b/tests/split.test
@@ -71,8 +71,11 @@ test split-1.14 {basic split commands} {
     split ",12,,,34,56," {,}
 } {{} 12 {} {} 34 56 {}}
 test split-1.15 {basic split commands} -body {
-    split "a\U01f4a9b" {}
-} -result "a \U01f4a9 b"
+    split "a\U1F4A9b" {}
+} -result "a \U1F4A9 b"
+test split-1.16 {basic split commands} -body {
+    split "a\U1F4A9b" \U1F4A9
+} -result "a b"
 
 test split-2.1 {split errors} {
     list [catch split msg] $msg $errorCode
diff --git a/tests/string.test b/tests/string.test
index 3e8e6ae..b8f01a5 100644
--- a/tests/string.test
+++ b/tests/string.test
@@ -1856,6 +1856,12 @@ test string-21.13.$noComp {string wordend, unicode} -body {
 test string-21.14.$noComp {string wordend, unicode} -body {
     run {string wordend "\uC700\uC700 abc" 8}
 } -result 6
+test string-21.15.$noComp {string wordend, unicode} -body {
+    run {string wordend "\U1D7CA\U1D7CA abc" 0}
+} -result 2
+test string-21.16.$noComp {string wordend, unicode} -constraints fullutf -body {
+    run {string wordend "\U1D7CA\U1D7CA abc" 10}
+} -result 6
 
 test string-22.1.$noComp {string wordstart} -body {
     list [catch {run {string word a}} msg] $msg
@@ -1896,6 +1902,12 @@ test string-22.12.$noComp {string wordstart, unicode} -body {
 test string-22.13.$noComp {string wordstart, unicode} -body {
     run {string wordstart "\uC700\uC700 abc" 8}
 } -result 3
+test string-22.14.$noComp {string wordstart, unicode} -body {
+    run {string wordstart "\U1D7CA\U1D7CA abc" 0}
+} -result 0
+test string-22.15.$noComp {string wordstart, unicode} -constraints fullutf -body {
+    run {string wordstart "\U1D7CA\U1D7CA abc" 10}
+} -result 3
 
 test string-23.0.$noComp {string is boolean, Bug 1187123} testindexobj {
     set x 5