[3613609]: Replace strcasecmp() with UTF-8-aware version.

author: dkf <donal.k.fellows@manchester.ac.uk> 2013-05-22 13:07:33 (GMT)
committer: dkf <donal.k.fellows@manchester.ac.uk> 2013-05-22 13:07:33 (GMT)
commit: 0e1a083787efebde534b0323ba4cca840630fe71 (patch)
tree: 21c32e456ae1ff62b2dbb1b4861118c3330a60c3 /generic
parent: 6ede2e9a9c11a27ad6be06a3eceda2f98be8f8d5 (diff)
parent: a515427b5b0f2be828866a2073b4720681e10e0d (diff)
download: tcl-0e1a083787efebde534b0323ba4cca840630fe71.zip
tcl-0e1a083787efebde534b0323ba4cca840630fe71.tar.gz
tcl-0e1a083787efebde534b0323ba4cca840630fe71.tar.bz2
4 files changed, 47 insertions, 6 deletions
diff --git a/generic/tclCmdIL.c b/generic/tclCmdIL.c
index c70ba23..ac317e0 100644
--- a/generic/tclCmdIL.c
+++ b/generic/tclCmdIL.c
@@ -3005,7 +3005,7 @@ Tcl_LsearchObjCmd(
 	    dataType = INTEGER;
 	    break;
 	case LSEARCH_NOCASE:		/* -nocase */
-	    strCmpFn = strcasecmp;
+	    strCmpFn = TclUtfCasecmp;
 	    noCase = 1;
 	    break;
 	case LSEARCH_NOT:		/* -not */
@@ -3400,7 +3400,7 @@ Tcl_LsearchObjCmd(
 			 */
 
 			if (noCase) {
-			    match = (strcasecmp(bytes, patternBytes) == 0);
+			    match = (TclUtfCasecmp(bytes, patternBytes) == 0);
 			} else {
 			    match = (memcmp(bytes, patternBytes,
 				    (size_t) length) == 0);
@@ -3991,7 +3991,7 @@ Tcl_LsortObjCmd(
 		goto done1;
 	    }
 	    elementArray[i].collationKey.intValue = a;
-	} else if (sortInfo.sortMode == SORTMODE_REAL) {
+	} else if (sortMode == SORTMODE_REAL) {
 	    double a;
 
 	    if (Tcl_GetDoubleFromObj(sortInfo.interp, indexPtr,
@@ -4088,7 +4088,7 @@ Tcl_LsortObjCmd(
     TclStackFree(interp, elementArray);
 
   done:
-    if (sortInfo.sortMode == SORTMODE_COMMAND) {
+    if (sortMode == SORTMODE_COMMAND) {
 	TclDecrRefCount(sortInfo.compareCmdPtr);
 	TclDecrRefCount(listObj);
 	sortInfo.compareCmdPtr = NULL;
@@ -4233,7 +4233,7 @@ SortCompare(
 	order = strcmp(elemPtr1->collationKey.strValuePtr,
 		elemPtr2->collationKey.strValuePtr);
     } else if (infoPtr->sortMode == SORTMODE_ASCII_NC) {
-	order = strcasecmp(elemPtr1->collationKey.strValuePtr,
+	order = TclUtfCasecmp(elemPtr1->collationKey.strValuePtr,
 		elemPtr2->collationKey.strValuePtr);
     } else if (infoPtr->sortMode == SORTMODE_DICTIONARY) {
 	order = DictionaryCompare(elemPtr1->collationKey.strValuePtr,
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index fc4624b..5087fbb 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -3527,7 +3527,7 @@ TclNRSwitchObjCmd(
 	    i++;
 	    goto finishedOptions;
 	case OPT_NOCASE:
-	    strCmpFn = strcasecmp;
+	    strCmpFn = TclUtfCasecmp;
 	    noCase = 1;
 	    break;
 
diff --git a/generic/tclInt.h b/generic/tclInt.h
index 9e1ba09..e60b627 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -3094,6 +3094,7 @@ MODULE_SCOPE int	TclTrimLeft(const char *bytes, int numBytes,
 			    const char *trim, int numTrim);
 MODULE_SCOPE int	TclTrimRight(const char *bytes, int numBytes,
 			    const char *trim, int numTrim);
+MODULE_SCOPE int	TclUtfCasecmp(const char *cs, const char *ct);
 MODULE_SCOPE Tcl_Obj *	TclpNativeToNormalized(ClientData clientData);
 MODULE_SCOPE Tcl_Obj *	TclpFilesystemPathType(Tcl_Obj *pathPtr);
 MODULE_SCOPE int	TclpDlopen(Tcl_Interp *interp, Tcl_Obj *pathPtr,
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 18a82f7..4ad6f01 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -1106,6 +1106,46 @@ Tcl_UtfNcasecmp(
 /*
  *----------------------------------------------------------------------
  *
+ * Tcl_UtfNcasecmp --
+ *
+ *	Compare UTF chars of string cs to string ct case insensitively.
+ *	Replacement for strcasecmp in Tcl core, in places where UTF-8 should
+ *	be handled.
+ *
+ * Results:
+ *	Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct.
+ *
+ * Side effects:
+ *	None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+TclUtfCasecmp(
+    const char *cs,		/* UTF string to compare to ct. */
+    const char *ct)		/* UTF string cs is compared to. */
+{
+    while (*cs && *ct) {
+	Tcl_UniChar ch1, ch2;
+
+	cs += TclUtfToUniChar(cs, &ch1);
+	ct += TclUtfToUniChar(ct, &ch2);
+	if (ch1 != ch2) {
+	    ch1 = Tcl_UniCharToLower(ch1);
+	    ch2 = Tcl_UniCharToLower(ch2);
+	    if (ch1 != ch2) {
+		return ch1 - ch2;
+	    }
+	}
+    }
+    return UCHAR(*cs) - UCHAR(*ct);
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
  * Tcl_UniCharToUpper --
  *
  *	Compute the uppercase equivalent of the given Unicode character.
author	dkf <donal.k.fellows@manchester.ac.uk>	2013-05-22 13:07:33 (GMT)
committer	dkf <donal.k.fellows@manchester.ac.uk>	2013-05-22 13:07:33 (GMT)
commit	0e1a083787efebde534b0323ba4cca840630fe71 (patch)
tree	21c32e456ae1ff62b2dbb1b4861118c3330a60c3 /generic
parent	6ede2e9a9c11a27ad6be06a3eceda2f98be8f8d5 (diff)
parent	a515427b5b0f2be828866a2073b4720681e10e0d (diff)
download	tcl-0e1a083787efebde534b0323ba4cca840630fe71.zip tcl-0e1a083787efebde534b0323ba4cca840630fe71.tar.gz tcl-0e1a083787efebde534b0323ba4cca840630fe71.tar.bz2