summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordkf <donal.k.fellows@manchester.ac.uk>2013-05-22 12:59:45 (GMT)
committerdkf <donal.k.fellows@manchester.ac.uk>2013-05-22 12:59:45 (GMT)
commita515427b5b0f2be828866a2073b4720681e10e0d (patch)
tree218f3ee1b36b96a9c5be01173b1c55abc142432e
parentbf486a2007f9d3d45855d43e623ad85d29bcd0b7 (diff)
parent75b8011dbad373e664a676ed8e3bcfec70313838 (diff)
downloadtcl-a515427b5b0f2be828866a2073b4720681e10e0d.zip
tcl-a515427b5b0f2be828866a2073b4720681e10e0d.tar.gz
tcl-a515427b5b0f2be828866a2073b4720681e10e0d.tar.bz2
[3613609]: Replace strcasecmp() with UTF-8-aware version.
-rw-r--r--ChangeLog6
-rw-r--r--generic/tclCmdIL.c10
-rw-r--r--generic/tclCmdMZ.c2
-rw-r--r--generic/tclInt.h1
-rw-r--r--generic/tclUtf.c40
-rw-r--r--tests/cmdIL.test9
6 files changed, 62 insertions, 6 deletions
diff --git a/ChangeLog b/ChangeLog
index 3f61f95..bbccf19 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2013-05-22 Donal K. Fellows <dkf@users.sf.net>
+
+ * generic/tclUtf.c (TclUtfCasecmp): [Bug 3613609]: Replace problematic
+ uses of strcasecmp with a proper UTF-8-aware version. Affects both
+ [lsearch -nocase] and [lsort -nocase].
+
2013-05-19 Jan Nijtmans <nijtmans@users.sf.net>
* unix/tcl.m4: Fix for FreeBSD, and remove support for older
diff --git a/generic/tclCmdIL.c b/generic/tclCmdIL.c
index 152e61d..98ec8b4 100644
--- a/generic/tclCmdIL.c
+++ b/generic/tclCmdIL.c
@@ -2807,7 +2807,7 @@ Tcl_LsearchObjCmd(
dataType = INTEGER;
break;
case LSEARCH_NOCASE: /* -nocase */
- strCmpFn = strcasecmp;
+ strCmpFn = TclUtfCasecmp;
noCase = 1;
break;
case LSEARCH_NOT: /* -not */
@@ -3209,7 +3209,7 @@ Tcl_LsearchObjCmd(
*/
if (noCase) {
- match = (strcasecmp(bytes, patternBytes) == 0);
+ match = (TclUtfCasecmp(bytes, patternBytes) == 0);
} else {
match = (memcmp(bytes, patternBytes,
(size_t) length) == 0);
@@ -3712,7 +3712,7 @@ Tcl_LsortObjCmd(
goto done1;
}
elementArray[i].index.intValue = a;
- } else if (sortInfo.sortMode == SORTMODE_REAL) {
+ } else if (sortMode == SORTMODE_REAL) {
double a;
if (Tcl_GetDoubleFromObj(sortInfo.interp, indexPtr, &a) != TCL_OK) {
sortInfo.resultCode = TCL_ERROR;
@@ -3790,7 +3790,7 @@ Tcl_LsortObjCmd(
ckfree((char *)elementArray);
done:
- if (sortInfo.sortMode == SORTMODE_COMMAND) {
+ if (sortMode == SORTMODE_COMMAND) {
TclDecrRefCount(sortInfo.compareCmdPtr);
TclDecrRefCount(listObj);
sortInfo.compareCmdPtr = NULL;
@@ -3932,7 +3932,7 @@ SortCompare(
order = strcmp(elemPtr1->index.strValuePtr,
elemPtr2->index.strValuePtr);
} else if (infoPtr->sortMode == SORTMODE_ASCII_NC) {
- order = strcasecmp(elemPtr1->index.strValuePtr,
+ order = TclUtfCasecmp(elemPtr1->index.strValuePtr,
elemPtr2->index.strValuePtr);
} else if (infoPtr->sortMode == SORTMODE_DICTIONARY) {
order = DictionaryCompare(elemPtr1->index.strValuePtr,
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index 0ad77aa..6fd468c 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -3436,7 +3436,7 @@ Tcl_SwitchObjCmd(
i++;
goto finishedOptions;
case OPT_NOCASE:
- strCmpFn = strcasecmp;
+ strCmpFn = TclUtfCasecmp;
noCase = 1;
break;
diff --git a/generic/tclInt.h b/generic/tclInt.h
index 92251fe..dc28b97 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -2762,6 +2762,7 @@ MODULE_SCOPE Tcl_WideInt TclpGetWideClicks(void);
MODULE_SCOPE double TclpWideClicksToNanoseconds(Tcl_WideInt clicks);
#endif
MODULE_SCOPE Tcl_Obj * TclDisassembleByteCodeObj(Tcl_Obj *objPtr);
+MODULE_SCOPE int TclUtfCasecmp(CONST char *cs, CONST char *ct);
/*
*----------------------------------------------------------------
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 83900e9..f3d1758 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -1105,6 +1105,46 @@ Tcl_UtfNcasecmp(
/*
*----------------------------------------------------------------------
*
+ * Tcl_UtfNcasecmp --
+ *
+ * Compare UTF chars of string cs to string ct case insensitively.
+ * Replacement for strcasecmp in Tcl core, in places where UTF-8 should
+ * be handled.
+ *
+ * Results:
+ * Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+TclUtfCasecmp(
+ CONST char *cs, /* UTF string to compare to ct. */
+ CONST char *ct) /* UTF string cs is compared to. */
+{
+ while (*cs && *ct) {
+ Tcl_UniChar ch1, ch2;
+
+ cs += TclUtfToUniChar(cs, &ch1);
+ ct += TclUtfToUniChar(ct, &ch2);
+ if (ch1 != ch2) {
+ ch1 = Tcl_UniCharToLower(ch1);
+ ch2 = Tcl_UniCharToLower(ch2);
+ if (ch1 != ch2) {
+ return ch1 - ch2;
+ }
+ }
+ }
+ return UCHAR(*cs) - UCHAR(*ct);
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
* Tcl_UniCharToUpper --
*
* Compute the uppercase equivalent of the given Unicode character.
diff --git a/tests/cmdIL.test b/tests/cmdIL.test
index b387e71..6fab269 100644
--- a/tests/cmdIL.test
+++ b/tests/cmdIL.test
@@ -394,6 +394,15 @@ test cmdIL-4.34 {SortCompare procedure, -ascii option with -nocase option} {
test cmdIL-4.35 {SortCompare procedure, -ascii option with -nocase option} {
lsort -ascii -nocase {d E c B a D35 d300 100 20}
} {100 20 a B c d d300 D35 E}
+test cmdIL-4.36 {SortCompare procedure, UTF-8 with -nocase option} {
+ scan [lsort -ascii -nocase [list \u101 \u100]] %c%c%c
+} {257 32 256}
+test cmdIL-4.37 {SortCompare procedure, UTF-8 with -nocase option} {
+ scan [lsort -ascii -nocase [list a\u0000a a]] %c%c%c%c%c
+} {97 32 97 0 97}
+test cmdIL-4.38 {SortCompare procedure, UTF-8 with -nocase option} {
+ scan [lsort -ascii -nocase [list a a\u0000a]] %c%c%c%c%c
+} {97 32 97 0 97}
test cmdIL-5.1 {lsort with list style index} {
lsort -ascii -decreasing -index {0 1} {