8 files changed, 91 insertions, 49 deletions
diff --git a/generic/tclCmdIL.c b/generic/tclCmdIL.c
index 47076ec..b41d312 100644
--- a/generic/tclCmdIL.c
+++ b/generic/tclCmdIL.c
@@ -2945,7 +2945,7 @@ Tcl_LsearchObjCmd(
     double patDouble, objDouble;
     SortInfo sortInfo;
     Tcl_Obj *patObj, **listv, *listPtr, *startPtr, *itemPtr;
-    SortStrCmpFn_t strCmpFn = strcmp;
+    SortStrCmpFn_t strCmpFn = TclUtfCmp;
     Tcl_RegExp regexp = NULL;
     static const char *const options[] = {
 	"-all",	    "-ascii",   "-bisect", "-decreasing", "-dictionary",
@@ -4263,7 +4263,7 @@ SortCompare(
     int order = 0;
 
     if (infoPtr->sortMode == SORTMODE_ASCII) {
-	order = strcmp(elemPtr1->collationKey.strValuePtr,
+	order = TclUtfCmp(elemPtr1->collationKey.strValuePtr,
 		elemPtr2->collationKey.strValuePtr);
     } else if (infoPtr->sortMode == SORTMODE_ASCII_NC) {
 	order = TclUtfCasecmp(elemPtr1->collationKey.strValuePtr,
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index ad1dd5f..a206cc5 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -3547,7 +3547,7 @@ TclNRSwitchObjCmd(
 	OPT_LAST
     };
     typedef int (*strCmpFn_t)(const char *, const char *);
-    strCmpFn_t strCmpFn = strcmp;
+    strCmpFn_t strCmpFn = TclUtfCmp;
 
     mode = OPT_EXACT;
     foundmode = 0;
diff --git a/generic/tclEnsemble.c b/generic/tclEnsemble.c
index c5ccd22..a981851 100644
--- a/generic/tclEnsemble.c
+++ b/generic/tclEnsemble.c
@@ -732,7 +732,6 @@ Tcl_CreateEnsemble(
     Tcl_Namespace *namespacePtr,
     int flags)
 {
-    Tcl_Obj *nameObj = NULL;
     Namespace *nsPtr = (Namespace *)namespacePtr, *foundNsPtr, *altNsPtr,
     	*actualNsPtr;
     const char * simpleName;
@@ -741,11 +740,8 @@ Tcl_CreateEnsemble(
 	nsPtr = (Namespace *) TclGetCurrentNamespace(interp);
     }
 
-    TclGetNamespaceForQualName(interp, name, nsPtr, 0,
+    TclGetNamespaceForQualName(interp, name, nsPtr, TCL_CREATE_NS_IF_UNKNOWN,
     	&foundNsPtr, &altNsPtr, &actualNsPtr, &simpleName);
-    if (nameObj != NULL) {
-    	TclDecrRefCount(nameObj);
-    }
     return TclCreateEnsembleInNs(interp, simpleName,
 	(Tcl_Namespace *) foundNsPtr, (Tcl_Namespace *) nsPtr, flags);
 }
diff --git a/generic/tclInt.h b/generic/tclInt.h
index 7d5071f..44b181e 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -3172,6 +3172,7 @@ MODULE_SCOPE int	TclTrimLeft(const char *bytes, int numBytes,
 			    const char *trim, int numTrim);
 MODULE_SCOPE int	TclTrimRight(const char *bytes, int numBytes,
 			    const char *trim, int numTrim);
+MODULE_SCOPE int	TclUtfCmp(const char *cs, const char *ct);
 MODULE_SCOPE int	TclUtfCasecmp(const char *cs, const char *ct);
 MODULE_SCOPE int	TclUtfCount(int ch);
 MODULE_SCOPE Tcl_Obj *	TclpNativeToNormalized(ClientData clientData);
@@ -4399,7 +4400,7 @@ MODULE_SCOPE void	TclDbInitNewObj(Tcl_Obj *objPtr, const char *file,
  */
 
 #define TclUtfToUniChar(str, chPtr) \
-	((((unsigned char) *(str)) < 0xC0) ?		\
+	((((unsigned char) *(str)) < 0x80) ?		\
 	    ((*(chPtr) = (unsigned char) *(str)), 1)	\
 	    : Tcl_UtfToUniChar(str, chPtr))
 
diff --git a/generic/tclScan.c b/generic/tclScan.c
index 7f71262..e0798df 100644
--- a/generic/tclScan.c
+++ b/generic/tclScan.c
@@ -889,7 +889,7 @@ Tcl_ScanObjCmd(
 	    i = (int)sch;
 #if TCL_UTF_MAX == 4
 	    if (!offset) {
-		offset = Tcl_UtfToUniChar(string, &sch);
+		offset = TclUtfToUniChar(string, &sch);
 		i = (((i<<10) & 0x0FFC00) + 0x10000) + (sch & 0x3FF);
 	    }
 #endif
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index 5ffdd27..d7d896b 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -1843,20 +1843,20 @@ Tcl_AppendFormatToObj(
 	} else if (ch == 'I') {
 	    if ((format[1] == '6') && (format[2] == '4')) {
 		format += (step + 2);
-		step = Tcl_UtfToUniChar(format, &ch);
+		step = TclUtfToUniChar(format, &ch);
 #ifndef TCL_WIDE_INT_IS_LONG
 		useWide = 1;
 #endif
 	    } else if ((format[1] == '3') && (format[2] == '2')) {
 		format += (step + 2);
-		step = Tcl_UtfToUniChar(format, &ch);
+		step = TclUtfToUniChar(format, &ch);
 	    } else {
 		format += step;
-		step = Tcl_UtfToUniChar(format, &ch);
+		step = TclUtfToUniChar(format, &ch);
 	    }
 	} else if ((ch == 't') || (ch == 'z')) {
 	    format += step;
-	    step = Tcl_UtfToUniChar(format, &ch);
+	    step = TclUtfToUniChar(format, &ch);
 #ifndef TCL_WIDE_INT_IS_LONG
 	    if (sizeof(size_t) > sizeof(int)) {
 		useWide = 1;
@@ -1864,7 +1864,7 @@ Tcl_AppendFormatToObj(
 #endif
 	} else if ((ch == 'q') ||(ch == 'j')) {
 	    format += step;
-	    step = Tcl_UtfToUniChar(format, &ch);
+	    step = TclUtfToUniChar(format, &ch);
 #ifndef TCL_WIDE_INT_IS_LONG
 	    useWide = 1;
 #endif
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index a72394d..43636b4 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -720,8 +720,7 @@ Tcl_UniCharAtIndex(
 {
     Tcl_UniChar ch = 0;
 
-    while (index >= 0) {
-	index--;
+    while (index-- >= 0) {
 	src += TclUtfToUniChar(src, &ch);
     }
     return ch;
@@ -751,8 +750,7 @@ Tcl_UtfAtIndex(
 {
     Tcl_UniChar ch = 0;
 
-    while (index > 0) {
-	index--;
+    while (index-- > 0) {
 	src += TclUtfToUniChar(src, &ch);
     }
     return src;
@@ -1066,16 +1064,17 @@ Tcl_UtfNcmp(
 
 	cs += TclUtfToUniChar(cs, &ch1);
 	ct += TclUtfToUniChar(ct, &ch2);
+	if (ch1 != ch2) {
 #if TCL_UTF_MAX == 4
-    /* map high surrogate characters to values > 0xffff */
-    if ((ch1 & 0xFC00) == 0xD800) {
-	ch1 += 0x4000;
-    }
-    if ((ch2 & 0xFC00) == 0xD800) {
-	ch2 += 0x4000;
-    }
+	    /* Surrogates always report higher than non-surrogates */
+	    if (((ch1 & 0xFC00) == 0xD800)) {
+	    if ((ch2 & 0xFC00) != 0xD800) {
+		return ch1;
+	    }
+	    } else if ((ch2 & 0xFC00) == 0xD800) {
+		return -ch2;
+	    }
 #endif
-	if (ch1 != ch2) {
 	    return (ch1 - ch2);
 	}
     }
@@ -1116,16 +1115,17 @@ Tcl_UtfNcasecmp(
 	 */
 	cs += TclUtfToUniChar(cs, &ch1);
 	ct += TclUtfToUniChar(ct, &ch2);
+	if (ch1 != ch2) {
 #if TCL_UTF_MAX == 4
-    /* map high surrogate characters to values > 0xffff */
-    if ((ch1 & 0xFC00) == 0xD800) {
-	ch1 += 0x4000;
-    }
-    if ((ch2 & 0xFC00) == 0xD800) {
-	ch2 += 0x4000;
-    }
+	    /* Surrogates always report higher than non-surrogates */
+	    if (((ch1 & 0xFC00) == 0xD800)) {
+	    if ((ch2 & 0xFC00) != 0xD800) {
+		return ch1;
+	    }
+	    } else if ((ch2 & 0xFC00) == 0xD800) {
+		return -ch2;
+	    }
 #endif
-	if (ch1 != ch2) {
 	    ch1 = Tcl_UniCharToLower(ch1);
 	    ch2 = Tcl_UniCharToLower(ch2);
 	    if (ch1 != ch2) {
@@ -1135,6 +1135,52 @@ Tcl_UtfNcasecmp(
     }
     return 0;
 }
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * Tcl_UtfCmp --
+ *
+ *	Compare UTF chars of string cs to string ct case sensitively.
+ *	Replacement for strcmp in Tcl core, in places where UTF-8 should
+ *	be handled.
+ *
+ * Results:
+ *	Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct.
+ *
+ * Side effects:
+ *	None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+TclUtfCmp(
+    const char *cs,		/* UTF string to compare to ct. */
+    const char *ct)		/* UTF string cs is compared to. */
+{
+    Tcl_UniChar ch1 = 0, ch2 = 0;
+
+    while (*cs && *ct) {
+	cs += TclUtfToUniChar(cs, &ch1);
+	ct += TclUtfToUniChar(ct, &ch2);
+	if (ch1 != ch2) {
+#if TCL_UTF_MAX == 4
+	    /* Surrogates always report higher than non-surrogates */
+	    if (((ch1 & 0xFC00) == 0xD800)) {
+	    if ((ch2 & 0xFC00) != 0xD800) {
+		return ch1;
+	    }
+	    } else if ((ch2 & 0xFC00) == 0xD800) {
+		return -ch2;
+	    }
+#endif
+	    return ch1 - ch2;
+	}
+    }
+    return UCHAR(*cs) - UCHAR(*ct);
+}
+
 
 /*
  *----------------------------------------------------------------------
@@ -1164,16 +1210,17 @@ TclUtfCasecmp(
     while (*cs && *ct) {
 	cs += TclUtfToUniChar(cs, &ch1);
 	ct += TclUtfToUniChar(ct, &ch2);
+	if (ch1 != ch2) {
 #if TCL_UTF_MAX == 4
-    /* map high surrogate characters to values > 0xffff */
-    if ((ch1 & 0xFC00) == 0xD800) {
-	ch1 += 0x4000;
-    }
-    if ((ch2 & 0xFC00) == 0xD800) {
-	ch2 += 0x4000;
-    }
+	    /* Surrogates always report higher than non-surrogates */
+	    if (((ch1 & 0xFC00) == 0xD800)) {
+	    if ((ch2 & 0xFC00) != 0xD800) {
+		return ch1;
+	    }
+	    } else if ((ch2 & 0xFC00) == 0xD800) {
+		return -ch2;
+	    }
 #endif
-	if (ch1 != ch2) {
 	    ch1 = Tcl_UniCharToLower(ch1);
 	    ch2 = Tcl_UniCharToLower(ch2);
 	    if (ch1 != ch2) {
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index 8ab68d9..bbf9466 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -1665,6 +1665,7 @@ TclTrimRight(
 {
     const char *p = bytes + numBytes;
     int pInc;
+    Tcl_UniChar ch1 = 0, ch2 = 0;
 
     if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) {
 	Tcl_Panic("TclTrimRight works only on null-terminated strings");
@@ -1683,7 +1684,6 @@ TclTrimRight(
      */
 
     do {
-	Tcl_UniChar ch1;
 	const char *q = trim;
 	int bytesLeft = numTrim;
 
@@ -1695,7 +1695,6 @@ TclTrimRight(
 	 */
 
 	do {
-	    Tcl_UniChar ch2;
 	    int qInc = TclUtfToUniChar(q, &ch2);
 
 	    if (ch1 == ch2) {
@@ -1745,6 +1744,7 @@ TclTrimLeft(
     int numTrim)		/* ...and its length in bytes */
 {
     const char *p = bytes;
+	Tcl_UniChar ch1 = 0, ch2 = 0;
 
     if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) {
 	Tcl_Panic("TclTrimLeft works only on null-terminated strings");
@@ -1763,7 +1763,6 @@ TclTrimLeft(
      */
 
     do {
-	Tcl_UniChar ch1;
 	int pInc = TclUtfToUniChar(p, &ch1);
 	const char *q = trim;
 	int bytesLeft = numTrim;
@@ -1773,7 +1772,6 @@ TclTrimLeft(
 	 */
 
 	do {
-	    Tcl_UniChar ch2;
 	    int qInc = TclUtfToUniChar(q, &ch2);
 
 	    if (ch1 == ch2) {
@@ -2107,7 +2105,7 @@ Tcl_StringCaseMatch(
 {
     int p, charLen;
     const char *pstart = pattern;
-    Tcl_UniChar ch1, ch2;
+    Tcl_UniChar ch1 = 0, ch2 = 0;
 
     while (1) {
 	p = *pattern;
@@ -2217,7 +2215,7 @@ Tcl_StringCaseMatch(
 	 */
 
 	if (p == '[') {
-	    Tcl_UniChar startChar, endChar;
+	    Tcl_UniChar startChar = 0, endChar = 0;
 
 	    pattern++;
 	    if (UCHAR(*str) < 0x80) {