summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2013-05-21 09:27:15 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2013-05-21 09:27:15 (GMT)
commitf193acf08ce4f3fe6db1cb79ab3589d037e5853c (patch)
treeafc89006f3988a24590c5ae650312ddb2bbf5231
parentbf486a2007f9d3d45855d43e623ad85d29bcd0b7 (diff)
downloadtcl-f193acf08ce4f3fe6db1cb79ab3589d037e5853c.zip
tcl-f193acf08ce4f3fe6db1cb79ab3589d037e5853c.tar.gz
tcl-f193acf08ce4f3fe6db1cb79ab3589d037e5853c.tar.bz2
Proposed solution for 3613609: lsort -nocase does not sort non-ASCII correctly
-rw-r--r--generic/tclCmdIL.c10
-rw-r--r--generic/tclCmdMZ.c2
-rw-r--r--generic/tclInt.h1
-rw-r--r--generic/tclUtf.c27
-rw-r--r--tests/cmdIL.test3
5 files changed, 37 insertions, 6 deletions
diff --git a/generic/tclCmdIL.c b/generic/tclCmdIL.c
index 152e61d..98ec8b4 100644
--- a/generic/tclCmdIL.c
+++ b/generic/tclCmdIL.c
@@ -2807,7 +2807,7 @@ Tcl_LsearchObjCmd(
dataType = INTEGER;
break;
case LSEARCH_NOCASE: /* -nocase */
- strCmpFn = strcasecmp;
+ strCmpFn = TclUtfCasecmp;
noCase = 1;
break;
case LSEARCH_NOT: /* -not */
@@ -3209,7 +3209,7 @@ Tcl_LsearchObjCmd(
*/
if (noCase) {
- match = (strcasecmp(bytes, patternBytes) == 0);
+ match = (TclUtfCasecmp(bytes, patternBytes) == 0);
} else {
match = (memcmp(bytes, patternBytes,
(size_t) length) == 0);
@@ -3712,7 +3712,7 @@ Tcl_LsortObjCmd(
goto done1;
}
elementArray[i].index.intValue = a;
- } else if (sortInfo.sortMode == SORTMODE_REAL) {
+ } else if (sortMode == SORTMODE_REAL) {
double a;
if (Tcl_GetDoubleFromObj(sortInfo.interp, indexPtr, &a) != TCL_OK) {
sortInfo.resultCode = TCL_ERROR;
@@ -3790,7 +3790,7 @@ Tcl_LsortObjCmd(
ckfree((char *)elementArray);
done:
- if (sortInfo.sortMode == SORTMODE_COMMAND) {
+ if (sortMode == SORTMODE_COMMAND) {
TclDecrRefCount(sortInfo.compareCmdPtr);
TclDecrRefCount(listObj);
sortInfo.compareCmdPtr = NULL;
@@ -3932,7 +3932,7 @@ SortCompare(
order = strcmp(elemPtr1->index.strValuePtr,
elemPtr2->index.strValuePtr);
} else if (infoPtr->sortMode == SORTMODE_ASCII_NC) {
- order = strcasecmp(elemPtr1->index.strValuePtr,
+ order = TclUtfCasecmp(elemPtr1->index.strValuePtr,
elemPtr2->index.strValuePtr);
} else if (infoPtr->sortMode == SORTMODE_DICTIONARY) {
order = DictionaryCompare(elemPtr1->index.strValuePtr,
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index 0ad77aa..6fd468c 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -3436,7 +3436,7 @@ Tcl_SwitchObjCmd(
i++;
goto finishedOptions;
case OPT_NOCASE:
- strCmpFn = strcasecmp;
+ strCmpFn = TclUtfCasecmp;
noCase = 1;
break;
diff --git a/generic/tclInt.h b/generic/tclInt.h
index 92251fe..dc28b97 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -2762,6 +2762,7 @@ MODULE_SCOPE Tcl_WideInt TclpGetWideClicks(void);
MODULE_SCOPE double TclpWideClicksToNanoseconds(Tcl_WideInt clicks);
#endif
MODULE_SCOPE Tcl_Obj * TclDisassembleByteCodeObj(Tcl_Obj *objPtr);
+MODULE_SCOPE int TclUtfCasecmp(CONST char *cs, CONST char *ct);
/*
*----------------------------------------------------------------
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 83900e9..9dacb53 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -1101,6 +1101,33 @@ Tcl_UtfNcasecmp(
}
return 0;
}
+
+
+/* Replacement for strcasecmp in Tcl core, in places where UTF-8 should be handled. */
+int
+TclUtfCasecmp(
+ CONST char *cs, /* UTF string to compare to ct. */
+ CONST char *ct) /* UTF string cs is compared to. */
+{
+ Tcl_UniChar ch1, ch2;
+ char c;
+
+ do {
+
+ /* If c == '\0', loop should end. */
+ c = *cs;
+
+ cs += TclUtfToUniChar(cs, &ch1);
+ ct += TclUtfToUniChar(ct, &ch2);
+ if (ch1 != ch2) {
+ ch1 = Tcl_UniCharToLower(ch1);
+ ch2 = Tcl_UniCharToLower(ch2);
+ if (ch1 != ch2) break;
+ }
+ } while (c);
+ return (ch1 - ch2);
+}
+
/*
*----------------------------------------------------------------------
diff --git a/tests/cmdIL.test b/tests/cmdIL.test
index b387e71..c9a10b6 100644
--- a/tests/cmdIL.test
+++ b/tests/cmdIL.test
@@ -394,6 +394,9 @@ test cmdIL-4.34 {SortCompare procedure, -ascii option with -nocase option} {
test cmdIL-4.35 {SortCompare procedure, -ascii option with -nocase option} {
lsort -ascii -nocase {d E c B a D35 d300 100 20}
} {100 20 a B c d d300 D35 E}
+test cmdIL-4.36 {SortCompare procedure, UTF-8 with -nocase option} {
+ lsort -ascii -nocase [list \u101 \u100]
+} [list \u101 \u100]
test cmdIL-5.1 {lsort with list style index} {
lsort -ascii -decreasing -index {0 1} {