summaryrefslogtreecommitdiffstats
path: root/generic
diff options
context:
space:
mode:
Diffstat (limited to 'generic')
-rw-r--r--generic/tclCmdIL.c4
-rw-r--r--generic/tclCmdMZ.c2
-rw-r--r--generic/tclEnsemble.c6
-rw-r--r--generic/tclInt.h3
-rw-r--r--generic/tclScan.c2
-rw-r--r--generic/tclStringObj.c10
-rw-r--r--generic/tclUtf.c103
-rw-r--r--generic/tclUtil.c10
8 files changed, 91 insertions, 49 deletions
diff --git a/generic/tclCmdIL.c b/generic/tclCmdIL.c
index 47076ec..b41d312 100644
--- a/generic/tclCmdIL.c
+++ b/generic/tclCmdIL.c
@@ -2945,7 +2945,7 @@ Tcl_LsearchObjCmd(
double patDouble, objDouble;
SortInfo sortInfo;
Tcl_Obj *patObj, **listv, *listPtr, *startPtr, *itemPtr;
- SortStrCmpFn_t strCmpFn = strcmp;
+ SortStrCmpFn_t strCmpFn = TclUtfCmp;
Tcl_RegExp regexp = NULL;
static const char *const options[] = {
"-all", "-ascii", "-bisect", "-decreasing", "-dictionary",
@@ -4263,7 +4263,7 @@ SortCompare(
int order = 0;
if (infoPtr->sortMode == SORTMODE_ASCII) {
- order = strcmp(elemPtr1->collationKey.strValuePtr,
+ order = TclUtfCmp(elemPtr1->collationKey.strValuePtr,
elemPtr2->collationKey.strValuePtr);
} else if (infoPtr->sortMode == SORTMODE_ASCII_NC) {
order = TclUtfCasecmp(elemPtr1->collationKey.strValuePtr,
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index ad1dd5f..a206cc5 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -3547,7 +3547,7 @@ TclNRSwitchObjCmd(
OPT_LAST
};
typedef int (*strCmpFn_t)(const char *, const char *);
- strCmpFn_t strCmpFn = strcmp;
+ strCmpFn_t strCmpFn = TclUtfCmp;
mode = OPT_EXACT;
foundmode = 0;
diff --git a/generic/tclEnsemble.c b/generic/tclEnsemble.c
index c5ccd22..a981851 100644
--- a/generic/tclEnsemble.c
+++ b/generic/tclEnsemble.c
@@ -732,7 +732,6 @@ Tcl_CreateEnsemble(
Tcl_Namespace *namespacePtr,
int flags)
{
- Tcl_Obj *nameObj = NULL;
Namespace *nsPtr = (Namespace *)namespacePtr, *foundNsPtr, *altNsPtr,
*actualNsPtr;
const char * simpleName;
@@ -741,11 +740,8 @@ Tcl_CreateEnsemble(
nsPtr = (Namespace *) TclGetCurrentNamespace(interp);
}
- TclGetNamespaceForQualName(interp, name, nsPtr, 0,
+ TclGetNamespaceForQualName(interp, name, nsPtr, TCL_CREATE_NS_IF_UNKNOWN,
&foundNsPtr, &altNsPtr, &actualNsPtr, &simpleName);
- if (nameObj != NULL) {
- TclDecrRefCount(nameObj);
- }
return TclCreateEnsembleInNs(interp, simpleName,
(Tcl_Namespace *) foundNsPtr, (Tcl_Namespace *) nsPtr, flags);
}
diff --git a/generic/tclInt.h b/generic/tclInt.h
index 7d5071f..44b181e 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -3172,6 +3172,7 @@ MODULE_SCOPE int TclTrimLeft(const char *bytes, int numBytes,
const char *trim, int numTrim);
MODULE_SCOPE int TclTrimRight(const char *bytes, int numBytes,
const char *trim, int numTrim);
+MODULE_SCOPE int TclUtfCmp(const char *cs, const char *ct);
MODULE_SCOPE int TclUtfCasecmp(const char *cs, const char *ct);
MODULE_SCOPE int TclUtfCount(int ch);
MODULE_SCOPE Tcl_Obj * TclpNativeToNormalized(ClientData clientData);
@@ -4399,7 +4400,7 @@ MODULE_SCOPE void TclDbInitNewObj(Tcl_Obj *objPtr, const char *file,
*/
#define TclUtfToUniChar(str, chPtr) \
- ((((unsigned char) *(str)) < 0xC0) ? \
+ ((((unsigned char) *(str)) < 0x80) ? \
((*(chPtr) = (unsigned char) *(str)), 1) \
: Tcl_UtfToUniChar(str, chPtr))
diff --git a/generic/tclScan.c b/generic/tclScan.c
index 7f71262..e0798df 100644
--- a/generic/tclScan.c
+++ b/generic/tclScan.c
@@ -889,7 +889,7 @@ Tcl_ScanObjCmd(
i = (int)sch;
#if TCL_UTF_MAX == 4
if (!offset) {
- offset = Tcl_UtfToUniChar(string, &sch);
+ offset = TclUtfToUniChar(string, &sch);
i = (((i<<10) & 0x0FFC00) + 0x10000) + (sch & 0x3FF);
}
#endif
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index 5ffdd27..d7d896b 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -1843,20 +1843,20 @@ Tcl_AppendFormatToObj(
} else if (ch == 'I') {
if ((format[1] == '6') && (format[2] == '4')) {
format += (step + 2);
- step = Tcl_UtfToUniChar(format, &ch);
+ step = TclUtfToUniChar(format, &ch);
#ifndef TCL_WIDE_INT_IS_LONG
useWide = 1;
#endif
} else if ((format[1] == '3') && (format[2] == '2')) {
format += (step + 2);
- step = Tcl_UtfToUniChar(format, &ch);
+ step = TclUtfToUniChar(format, &ch);
} else {
format += step;
- step = Tcl_UtfToUniChar(format, &ch);
+ step = TclUtfToUniChar(format, &ch);
}
} else if ((ch == 't') || (ch == 'z')) {
format += step;
- step = Tcl_UtfToUniChar(format, &ch);
+ step = TclUtfToUniChar(format, &ch);
#ifndef TCL_WIDE_INT_IS_LONG
if (sizeof(size_t) > sizeof(int)) {
useWide = 1;
@@ -1864,7 +1864,7 @@ Tcl_AppendFormatToObj(
#endif
} else if ((ch == 'q') ||(ch == 'j')) {
format += step;
- step = Tcl_UtfToUniChar(format, &ch);
+ step = TclUtfToUniChar(format, &ch);
#ifndef TCL_WIDE_INT_IS_LONG
useWide = 1;
#endif
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index a72394d..43636b4 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -720,8 +720,7 @@ Tcl_UniCharAtIndex(
{
Tcl_UniChar ch = 0;
- while (index >= 0) {
- index--;
+ while (index-- >= 0) {
src += TclUtfToUniChar(src, &ch);
}
return ch;
@@ -751,8 +750,7 @@ Tcl_UtfAtIndex(
{
Tcl_UniChar ch = 0;
- while (index > 0) {
- index--;
+ while (index-- > 0) {
src += TclUtfToUniChar(src, &ch);
}
return src;
@@ -1066,16 +1064,17 @@ Tcl_UtfNcmp(
cs += TclUtfToUniChar(cs, &ch1);
ct += TclUtfToUniChar(ct, &ch2);
+ if (ch1 != ch2) {
#if TCL_UTF_MAX == 4
- /* map high surrogate characters to values > 0xffff */
- if ((ch1 & 0xFC00) == 0xD800) {
- ch1 += 0x4000;
- }
- if ((ch2 & 0xFC00) == 0xD800) {
- ch2 += 0x4000;
- }
+ /* Surrogates always report higher than non-surrogates */
+ if (((ch1 & 0xFC00) == 0xD800)) {
+ if ((ch2 & 0xFC00) != 0xD800) {
+ return ch1;
+ }
+ } else if ((ch2 & 0xFC00) == 0xD800) {
+ return -ch2;
+ }
#endif
- if (ch1 != ch2) {
return (ch1 - ch2);
}
}
@@ -1116,16 +1115,17 @@ Tcl_UtfNcasecmp(
*/
cs += TclUtfToUniChar(cs, &ch1);
ct += TclUtfToUniChar(ct, &ch2);
+ if (ch1 != ch2) {
#if TCL_UTF_MAX == 4
- /* map high surrogate characters to values > 0xffff */
- if ((ch1 & 0xFC00) == 0xD800) {
- ch1 += 0x4000;
- }
- if ((ch2 & 0xFC00) == 0xD800) {
- ch2 += 0x4000;
- }
+ /* Surrogates always report higher than non-surrogates */
+ if (((ch1 & 0xFC00) == 0xD800)) {
+ if ((ch2 & 0xFC00) != 0xD800) {
+ return ch1;
+ }
+ } else if ((ch2 & 0xFC00) == 0xD800) {
+ return -ch2;
+ }
#endif
- if (ch1 != ch2) {
ch1 = Tcl_UniCharToLower(ch1);
ch2 = Tcl_UniCharToLower(ch2);
if (ch1 != ch2) {
@@ -1135,6 +1135,52 @@ Tcl_UtfNcasecmp(
}
return 0;
}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * Tcl_UtfCmp --
+ *
+ * Compare UTF chars of string cs to string ct case sensitively.
+ * Replacement for strcmp in Tcl core, in places where UTF-8 should
+ * be handled.
+ *
+ * Results:
+ * Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+TclUtfCmp(
+ const char *cs, /* UTF string to compare to ct. */
+ const char *ct) /* UTF string cs is compared to. */
+{
+ Tcl_UniChar ch1 = 0, ch2 = 0;
+
+ while (*cs && *ct) {
+ cs += TclUtfToUniChar(cs, &ch1);
+ ct += TclUtfToUniChar(ct, &ch2);
+ if (ch1 != ch2) {
+#if TCL_UTF_MAX == 4
+ /* Surrogates always report higher than non-surrogates */
+ if (((ch1 & 0xFC00) == 0xD800)) {
+ if ((ch2 & 0xFC00) != 0xD800) {
+ return ch1;
+ }
+ } else if ((ch2 & 0xFC00) == 0xD800) {
+ return -ch2;
+ }
+#endif
+ return ch1 - ch2;
+ }
+ }
+ return UCHAR(*cs) - UCHAR(*ct);
+}
+
/*
*----------------------------------------------------------------------
@@ -1164,16 +1210,17 @@ TclUtfCasecmp(
while (*cs && *ct) {
cs += TclUtfToUniChar(cs, &ch1);
ct += TclUtfToUniChar(ct, &ch2);
+ if (ch1 != ch2) {
#if TCL_UTF_MAX == 4
- /* map high surrogate characters to values > 0xffff */
- if ((ch1 & 0xFC00) == 0xD800) {
- ch1 += 0x4000;
- }
- if ((ch2 & 0xFC00) == 0xD800) {
- ch2 += 0x4000;
- }
+ /* Surrogates always report higher than non-surrogates */
+ if (((ch1 & 0xFC00) == 0xD800)) {
+ if ((ch2 & 0xFC00) != 0xD800) {
+ return ch1;
+ }
+ } else if ((ch2 & 0xFC00) == 0xD800) {
+ return -ch2;
+ }
#endif
- if (ch1 != ch2) {
ch1 = Tcl_UniCharToLower(ch1);
ch2 = Tcl_UniCharToLower(ch2);
if (ch1 != ch2) {
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index 8ab68d9..bbf9466 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -1665,6 +1665,7 @@ TclTrimRight(
{
const char *p = bytes + numBytes;
int pInc;
+ Tcl_UniChar ch1 = 0, ch2 = 0;
if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) {
Tcl_Panic("TclTrimRight works only on null-terminated strings");
@@ -1683,7 +1684,6 @@ TclTrimRight(
*/
do {
- Tcl_UniChar ch1;
const char *q = trim;
int bytesLeft = numTrim;
@@ -1695,7 +1695,6 @@ TclTrimRight(
*/
do {
- Tcl_UniChar ch2;
int qInc = TclUtfToUniChar(q, &ch2);
if (ch1 == ch2) {
@@ -1745,6 +1744,7 @@ TclTrimLeft(
int numTrim) /* ...and its length in bytes */
{
const char *p = bytes;
+ Tcl_UniChar ch1 = 0, ch2 = 0;
if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) {
Tcl_Panic("TclTrimLeft works only on null-terminated strings");
@@ -1763,7 +1763,6 @@ TclTrimLeft(
*/
do {
- Tcl_UniChar ch1;
int pInc = TclUtfToUniChar(p, &ch1);
const char *q = trim;
int bytesLeft = numTrim;
@@ -1773,7 +1772,6 @@ TclTrimLeft(
*/
do {
- Tcl_UniChar ch2;
int qInc = TclUtfToUniChar(q, &ch2);
if (ch1 == ch2) {
@@ -2107,7 +2105,7 @@ Tcl_StringCaseMatch(
{
int p, charLen;
const char *pstart = pattern;
- Tcl_UniChar ch1, ch2;
+ Tcl_UniChar ch1 = 0, ch2 = 0;
while (1) {
p = *pattern;
@@ -2217,7 +2215,7 @@ Tcl_StringCaseMatch(
*/
if (p == '[') {
- Tcl_UniChar startChar, endChar;
+ Tcl_UniChar startChar = 0, endChar = 0;
pattern++;
if (UCHAR(*str) < 0x80) {