diff options
Diffstat (limited to 'generic')
-rw-r--r-- | generic/tclCmdIL.c | 4 | ||||
-rw-r--r-- | generic/tclCmdMZ.c | 2 | ||||
-rw-r--r-- | generic/tclEnsemble.c | 6 | ||||
-rw-r--r-- | generic/tclInt.h | 3 | ||||
-rw-r--r-- | generic/tclScan.c | 2 | ||||
-rw-r--r-- | generic/tclStringObj.c | 10 | ||||
-rw-r--r-- | generic/tclUtf.c | 103 | ||||
-rw-r--r-- | generic/tclUtil.c | 10 |
8 files changed, 91 insertions, 49 deletions
diff --git a/generic/tclCmdIL.c b/generic/tclCmdIL.c index 47076ec..b41d312 100644 --- a/generic/tclCmdIL.c +++ b/generic/tclCmdIL.c @@ -2945,7 +2945,7 @@ Tcl_LsearchObjCmd( double patDouble, objDouble; SortInfo sortInfo; Tcl_Obj *patObj, **listv, *listPtr, *startPtr, *itemPtr; - SortStrCmpFn_t strCmpFn = strcmp; + SortStrCmpFn_t strCmpFn = TclUtfCmp; Tcl_RegExp regexp = NULL; static const char *const options[] = { "-all", "-ascii", "-bisect", "-decreasing", "-dictionary", @@ -4263,7 +4263,7 @@ SortCompare( int order = 0; if (infoPtr->sortMode == SORTMODE_ASCII) { - order = strcmp(elemPtr1->collationKey.strValuePtr, + order = TclUtfCmp(elemPtr1->collationKey.strValuePtr, elemPtr2->collationKey.strValuePtr); } else if (infoPtr->sortMode == SORTMODE_ASCII_NC) { order = TclUtfCasecmp(elemPtr1->collationKey.strValuePtr, diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index ad1dd5f..a206cc5 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -3547,7 +3547,7 @@ TclNRSwitchObjCmd( OPT_LAST }; typedef int (*strCmpFn_t)(const char *, const char *); - strCmpFn_t strCmpFn = strcmp; + strCmpFn_t strCmpFn = TclUtfCmp; mode = OPT_EXACT; foundmode = 0; diff --git a/generic/tclEnsemble.c b/generic/tclEnsemble.c index c5ccd22..a981851 100644 --- a/generic/tclEnsemble.c +++ b/generic/tclEnsemble.c @@ -732,7 +732,6 @@ Tcl_CreateEnsemble( Tcl_Namespace *namespacePtr, int flags) { - Tcl_Obj *nameObj = NULL; Namespace *nsPtr = (Namespace *)namespacePtr, *foundNsPtr, *altNsPtr, *actualNsPtr; const char * simpleName; @@ -741,11 +740,8 @@ Tcl_CreateEnsemble( nsPtr = (Namespace *) TclGetCurrentNamespace(interp); } - TclGetNamespaceForQualName(interp, name, nsPtr, 0, + TclGetNamespaceForQualName(interp, name, nsPtr, TCL_CREATE_NS_IF_UNKNOWN, &foundNsPtr, &altNsPtr, &actualNsPtr, &simpleName); - if (nameObj != NULL) { - TclDecrRefCount(nameObj); - } return TclCreateEnsembleInNs(interp, simpleName, (Tcl_Namespace *) foundNsPtr, (Tcl_Namespace *) nsPtr, flags); } diff --git a/generic/tclInt.h b/generic/tclInt.h index 7d5071f..44b181e 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3172,6 +3172,7 @@ MODULE_SCOPE int TclTrimLeft(const char *bytes, int numBytes, const char *trim, int numTrim); MODULE_SCOPE int TclTrimRight(const char *bytes, int numBytes, const char *trim, int numTrim); +MODULE_SCOPE int TclUtfCmp(const char *cs, const char *ct); MODULE_SCOPE int TclUtfCasecmp(const char *cs, const char *ct); MODULE_SCOPE int TclUtfCount(int ch); MODULE_SCOPE Tcl_Obj * TclpNativeToNormalized(ClientData clientData); @@ -4399,7 +4400,7 @@ MODULE_SCOPE void TclDbInitNewObj(Tcl_Obj *objPtr, const char *file, */ #define TclUtfToUniChar(str, chPtr) \ - ((((unsigned char) *(str)) < 0xC0) ? \ + ((((unsigned char) *(str)) < 0x80) ? \ ((*(chPtr) = (unsigned char) *(str)), 1) \ : Tcl_UtfToUniChar(str, chPtr)) diff --git a/generic/tclScan.c b/generic/tclScan.c index 7f71262..e0798df 100644 --- a/generic/tclScan.c +++ b/generic/tclScan.c @@ -889,7 +889,7 @@ Tcl_ScanObjCmd( i = (int)sch; #if TCL_UTF_MAX == 4 if (!offset) { - offset = Tcl_UtfToUniChar(string, &sch); + offset = TclUtfToUniChar(string, &sch); i = (((i<<10) & 0x0FFC00) + 0x10000) + (sch & 0x3FF); } #endif diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 5ffdd27..d7d896b 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -1843,20 +1843,20 @@ Tcl_AppendFormatToObj( } else if (ch == 'I') { if ((format[1] == '6') && (format[2] == '4')) { format += (step + 2); - step = Tcl_UtfToUniChar(format, &ch); + step = TclUtfToUniChar(format, &ch); #ifndef TCL_WIDE_INT_IS_LONG useWide = 1; #endif } else if ((format[1] == '3') && (format[2] == '2')) { format += (step + 2); - step = Tcl_UtfToUniChar(format, &ch); + step = TclUtfToUniChar(format, &ch); } else { format += step; - step = Tcl_UtfToUniChar(format, &ch); + step = TclUtfToUniChar(format, &ch); } } else if ((ch == 't') || (ch == 'z')) { format += step; - step = Tcl_UtfToUniChar(format, &ch); + step = TclUtfToUniChar(format, &ch); #ifndef TCL_WIDE_INT_IS_LONG if (sizeof(size_t) > sizeof(int)) { useWide = 1; @@ -1864,7 +1864,7 @@ Tcl_AppendFormatToObj( #endif } else if ((ch == 'q') ||(ch == 'j')) { format += step; - step = Tcl_UtfToUniChar(format, &ch); + step = TclUtfToUniChar(format, &ch); #ifndef TCL_WIDE_INT_IS_LONG useWide = 1; #endif diff --git a/generic/tclUtf.c b/generic/tclUtf.c index a72394d..43636b4 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -720,8 +720,7 @@ Tcl_UniCharAtIndex( { Tcl_UniChar ch = 0; - while (index >= 0) { - index--; + while (index-- >= 0) { src += TclUtfToUniChar(src, &ch); } return ch; @@ -751,8 +750,7 @@ Tcl_UtfAtIndex( { Tcl_UniChar ch = 0; - while (index > 0) { - index--; + while (index-- > 0) { src += TclUtfToUniChar(src, &ch); } return src; @@ -1066,16 +1064,17 @@ Tcl_UtfNcmp( cs += TclUtfToUniChar(cs, &ch1); ct += TclUtfToUniChar(ct, &ch2); + if (ch1 != ch2) { #if TCL_UTF_MAX == 4 - /* map high surrogate characters to values > 0xffff */ - if ((ch1 & 0xFC00) == 0xD800) { - ch1 += 0x4000; - } - if ((ch2 & 0xFC00) == 0xD800) { - ch2 += 0x4000; - } + /* Surrogates always report higher than non-surrogates */ + if (((ch1 & 0xFC00) == 0xD800)) { + if ((ch2 & 0xFC00) != 0xD800) { + return ch1; + } + } else if ((ch2 & 0xFC00) == 0xD800) { + return -ch2; + } #endif - if (ch1 != ch2) { return (ch1 - ch2); } } @@ -1116,16 +1115,17 @@ Tcl_UtfNcasecmp( */ cs += TclUtfToUniChar(cs, &ch1); ct += TclUtfToUniChar(ct, &ch2); + if (ch1 != ch2) { #if TCL_UTF_MAX == 4 - /* map high surrogate characters to values > 0xffff */ - if ((ch1 & 0xFC00) == 0xD800) { - ch1 += 0x4000; - } - if ((ch2 & 0xFC00) == 0xD800) { - ch2 += 0x4000; - } + /* Surrogates always report higher than non-surrogates */ + if (((ch1 & 0xFC00) == 0xD800)) { + if ((ch2 & 0xFC00) != 0xD800) { + return ch1; + } + } else if ((ch2 & 0xFC00) == 0xD800) { + return -ch2; + } #endif - if (ch1 != ch2) { ch1 = Tcl_UniCharToLower(ch1); ch2 = Tcl_UniCharToLower(ch2); if (ch1 != ch2) { @@ -1135,6 +1135,52 @@ Tcl_UtfNcasecmp( } return 0; } + +/* + *---------------------------------------------------------------------- + * + * Tcl_UtfCmp -- + * + * Compare UTF chars of string cs to string ct case sensitively. + * Replacement for strcmp in Tcl core, in places where UTF-8 should + * be handled. + * + * Results: + * Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +int +TclUtfCmp( + const char *cs, /* UTF string to compare to ct. */ + const char *ct) /* UTF string cs is compared to. */ +{ + Tcl_UniChar ch1 = 0, ch2 = 0; + + while (*cs && *ct) { + cs += TclUtfToUniChar(cs, &ch1); + ct += TclUtfToUniChar(ct, &ch2); + if (ch1 != ch2) { +#if TCL_UTF_MAX == 4 + /* Surrogates always report higher than non-surrogates */ + if (((ch1 & 0xFC00) == 0xD800)) { + if ((ch2 & 0xFC00) != 0xD800) { + return ch1; + } + } else if ((ch2 & 0xFC00) == 0xD800) { + return -ch2; + } +#endif + return ch1 - ch2; + } + } + return UCHAR(*cs) - UCHAR(*ct); +} + /* *---------------------------------------------------------------------- @@ -1164,16 +1210,17 @@ TclUtfCasecmp( while (*cs && *ct) { cs += TclUtfToUniChar(cs, &ch1); ct += TclUtfToUniChar(ct, &ch2); + if (ch1 != ch2) { #if TCL_UTF_MAX == 4 - /* map high surrogate characters to values > 0xffff */ - if ((ch1 & 0xFC00) == 0xD800) { - ch1 += 0x4000; - } - if ((ch2 & 0xFC00) == 0xD800) { - ch2 += 0x4000; - } + /* Surrogates always report higher than non-surrogates */ + if (((ch1 & 0xFC00) == 0xD800)) { + if ((ch2 & 0xFC00) != 0xD800) { + return ch1; + } + } else if ((ch2 & 0xFC00) == 0xD800) { + return -ch2; + } #endif - if (ch1 != ch2) { ch1 = Tcl_UniCharToLower(ch1); ch2 = Tcl_UniCharToLower(ch2); if (ch1 != ch2) { diff --git a/generic/tclUtil.c b/generic/tclUtil.c index 8ab68d9..bbf9466 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -1665,6 +1665,7 @@ TclTrimRight( { const char *p = bytes + numBytes; int pInc; + Tcl_UniChar ch1 = 0, ch2 = 0; if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) { Tcl_Panic("TclTrimRight works only on null-terminated strings"); @@ -1683,7 +1684,6 @@ TclTrimRight( */ do { - Tcl_UniChar ch1; const char *q = trim; int bytesLeft = numTrim; @@ -1695,7 +1695,6 @@ TclTrimRight( */ do { - Tcl_UniChar ch2; int qInc = TclUtfToUniChar(q, &ch2); if (ch1 == ch2) { @@ -1745,6 +1744,7 @@ TclTrimLeft( int numTrim) /* ...and its length in bytes */ { const char *p = bytes; + Tcl_UniChar ch1 = 0, ch2 = 0; if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) { Tcl_Panic("TclTrimLeft works only on null-terminated strings"); @@ -1763,7 +1763,6 @@ TclTrimLeft( */ do { - Tcl_UniChar ch1; int pInc = TclUtfToUniChar(p, &ch1); const char *q = trim; int bytesLeft = numTrim; @@ -1773,7 +1772,6 @@ TclTrimLeft( */ do { - Tcl_UniChar ch2; int qInc = TclUtfToUniChar(q, &ch2); if (ch1 == ch2) { @@ -2107,7 +2105,7 @@ Tcl_StringCaseMatch( { int p, charLen; const char *pstart = pattern; - Tcl_UniChar ch1, ch2; + Tcl_UniChar ch1 = 0, ch2 = 0; while (1) { p = *pattern; @@ -2217,7 +2215,7 @@ Tcl_StringCaseMatch( */ if (p == '[') { - Tcl_UniChar startChar, endChar; + Tcl_UniChar startChar = 0, endChar = 0; pattern++; if (UCHAR(*str) < 0x80) { |