summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordkf <donal.k.fellows@manchester.ac.uk>2013-05-22 13:07:33 (GMT)
committerdkf <donal.k.fellows@manchester.ac.uk>2013-05-22 13:07:33 (GMT)
commit0e1a083787efebde534b0323ba4cca840630fe71 (patch)
tree21c32e456ae1ff62b2dbb1b4861118c3330a60c3
parent6ede2e9a9c11a27ad6be06a3eceda2f98be8f8d5 (diff)
parenta515427b5b0f2be828866a2073b4720681e10e0d (diff)
downloadtcl-0e1a083787efebde534b0323ba4cca840630fe71.zip
tcl-0e1a083787efebde534b0323ba4cca840630fe71.tar.gz
tcl-0e1a083787efebde534b0323ba4cca840630fe71.tar.bz2
[3613609]: Replace strcasecmp() with UTF-8-aware version.
-rw-r--r--ChangeLog6
-rw-r--r--generic/tclCmdIL.c10
-rw-r--r--generic/tclCmdMZ.c2
-rw-r--r--generic/tclInt.h1
-rw-r--r--generic/tclUtf.c40
-rw-r--r--tests/cmdIL.test9
6 files changed, 62 insertions, 6 deletions
diff --git a/ChangeLog b/ChangeLog
index 553abbb..34fd231 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,11 @@
2013-05-22 Donal K. Fellows <dkf@users.sf.net>
+ * generic/tclUtf.c (TclUtfCasecmp): [Bug 3613609]: Replace problematic
+ uses of strcasecmp with a proper UTF-8-aware version. Affects both
+ [lsearch -nocase] and [lsort -nocase].
+
+2013-05-22 Donal K. Fellows <dkf@users.sf.net>
+
* doc/file.n: [Bug 3613671]: Added note to portability section on the
fact that [file owned] does not produce useful results on Windows.
diff --git a/generic/tclCmdIL.c b/generic/tclCmdIL.c
index c70ba23..ac317e0 100644
--- a/generic/tclCmdIL.c
+++ b/generic/tclCmdIL.c
@@ -3005,7 +3005,7 @@ Tcl_LsearchObjCmd(
dataType = INTEGER;
break;
case LSEARCH_NOCASE: /* -nocase */
- strCmpFn = strcasecmp;
+ strCmpFn = TclUtfCasecmp;
noCase = 1;
break;
case LSEARCH_NOT: /* -not */
@@ -3400,7 +3400,7 @@ Tcl_LsearchObjCmd(
*/
if (noCase) {
- match = (strcasecmp(bytes, patternBytes) == 0);
+ match = (TclUtfCasecmp(bytes, patternBytes) == 0);
} else {
match = (memcmp(bytes, patternBytes,
(size_t) length) == 0);
@@ -3991,7 +3991,7 @@ Tcl_LsortObjCmd(
goto done1;
}
elementArray[i].collationKey.intValue = a;
- } else if (sortInfo.sortMode == SORTMODE_REAL) {
+ } else if (sortMode == SORTMODE_REAL) {
double a;
if (Tcl_GetDoubleFromObj(sortInfo.interp, indexPtr,
@@ -4088,7 +4088,7 @@ Tcl_LsortObjCmd(
TclStackFree(interp, elementArray);
done:
- if (sortInfo.sortMode == SORTMODE_COMMAND) {
+ if (sortMode == SORTMODE_COMMAND) {
TclDecrRefCount(sortInfo.compareCmdPtr);
TclDecrRefCount(listObj);
sortInfo.compareCmdPtr = NULL;
@@ -4233,7 +4233,7 @@ SortCompare(
order = strcmp(elemPtr1->collationKey.strValuePtr,
elemPtr2->collationKey.strValuePtr);
} else if (infoPtr->sortMode == SORTMODE_ASCII_NC) {
- order = strcasecmp(elemPtr1->collationKey.strValuePtr,
+ order = TclUtfCasecmp(elemPtr1->collationKey.strValuePtr,
elemPtr2->collationKey.strValuePtr);
} else if (infoPtr->sortMode == SORTMODE_DICTIONARY) {
order = DictionaryCompare(elemPtr1->collationKey.strValuePtr,
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index fc4624b..5087fbb 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -3527,7 +3527,7 @@ TclNRSwitchObjCmd(
i++;
goto finishedOptions;
case OPT_NOCASE:
- strCmpFn = strcasecmp;
+ strCmpFn = TclUtfCasecmp;
noCase = 1;
break;
diff --git a/generic/tclInt.h b/generic/tclInt.h
index 9e1ba09..e60b627 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -3094,6 +3094,7 @@ MODULE_SCOPE int TclTrimLeft(const char *bytes, int numBytes,
const char *trim, int numTrim);
MODULE_SCOPE int TclTrimRight(const char *bytes, int numBytes,
const char *trim, int numTrim);
+MODULE_SCOPE int TclUtfCasecmp(const char *cs, const char *ct);
MODULE_SCOPE Tcl_Obj * TclpNativeToNormalized(ClientData clientData);
MODULE_SCOPE Tcl_Obj * TclpFilesystemPathType(Tcl_Obj *pathPtr);
MODULE_SCOPE int TclpDlopen(Tcl_Interp *interp, Tcl_Obj *pathPtr,
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 18a82f7..4ad6f01 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -1106,6 +1106,46 @@ Tcl_UtfNcasecmp(
/*
*----------------------------------------------------------------------
*
+ * Tcl_UtfNcasecmp --
+ *
+ * Compare UTF chars of string cs to string ct case insensitively.
+ * Replacement for strcasecmp in Tcl core, in places where UTF-8 should
+ * be handled.
+ *
+ * Results:
+ * Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+TclUtfCasecmp(
+ const char *cs, /* UTF string to compare to ct. */
+ const char *ct) /* UTF string cs is compared to. */
+{
+ while (*cs && *ct) {
+ Tcl_UniChar ch1, ch2;
+
+ cs += TclUtfToUniChar(cs, &ch1);
+ ct += TclUtfToUniChar(ct, &ch2);
+ if (ch1 != ch2) {
+ ch1 = Tcl_UniCharToLower(ch1);
+ ch2 = Tcl_UniCharToLower(ch2);
+ if (ch1 != ch2) {
+ return ch1 - ch2;
+ }
+ }
+ }
+ return UCHAR(*cs) - UCHAR(*ct);
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
* Tcl_UniCharToUpper --
*
* Compute the uppercase equivalent of the given Unicode character.
diff --git a/tests/cmdIL.test b/tests/cmdIL.test
index 721773f..23a5f96 100644
--- a/tests/cmdIL.test
+++ b/tests/cmdIL.test
@@ -417,6 +417,15 @@ test cmdIL-4.34 {SortCompare procedure, -ascii option with -nocase option} {
test cmdIL-4.35 {SortCompare procedure, -ascii option with -nocase option} {
lsort -ascii -nocase {d E c B a D35 d300 100 20}
} {100 20 a B c d d300 D35 E}
+test cmdIL-4.36 {SortCompare procedure, UTF-8 with -nocase option} {
+ scan [lsort -ascii -nocase [list \u101 \u100]] %c%c%c
+} {257 32 256}
+test cmdIL-4.37 {SortCompare procedure, UTF-8 with -nocase option} {
+ scan [lsort -ascii -nocase [list a\u0000a a]] %c%c%c%c%c
+} {97 32 97 0 97}
+test cmdIL-4.38 {SortCompare procedure, UTF-8 with -nocase option} {
+ scan [lsort -ascii -nocase [list a a\u0000a]] %c%c%c%c%c
+} {97 32 97 0 97}
test cmdIL-5.1 {lsort with list style index} {
lsort -ascii -decreasing -index {0 1} {