summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2020-05-05 16:00:06 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2020-05-05 16:00:06 (GMT)
commit96f7e93f0b671b1d35f78a5e3058f9a83e2caedc (patch)
tree51e0d5f17d6e5e6fa4a20021f2937740c9735a10
parentc6d9c4cee08c88cac3ba885b637147fe8808c62a (diff)
downloadtcl-96f7e93f0b671b1d35f78a5e3058f9a83e2caedc.zip
tcl-96f7e93f0b671b1d35f78a5e3058f9a83e2caedc.tar.gz
tcl-96f7e93f0b671b1d35f78a5e3058f9a83e2caedc.tar.bz2
More usage of TclUtfToUCS4(), so we can use the whole Unicode range better in TCL_UTF_MAX>3 builds.
-rw-r--r--generic/tclCmdIL.c10
-rw-r--r--generic/tclInt.h1
-rw-r--r--generic/tclUtf.c17
-rw-r--r--generic/tclUtil.c44
4 files changed, 36 insertions, 36 deletions
diff --git a/generic/tclCmdIL.c b/generic/tclCmdIL.c
index 7e685bd..3ec1c09 100644
--- a/generic/tclCmdIL.c
+++ b/generic/tclCmdIL.c
@@ -4370,7 +4370,7 @@ static int
DictionaryCompare(
const char *left, const char *right) /* The strings to compare. */
{
- Tcl_UniChar uniLeft = 0, uniRight = 0, uniLeftLower, uniRightLower;
+ int uniLeft = 0, uniRight = 0, uniLeftLower, uniRightLower;
int diff, zeros;
int secondaryDiff = 0;
@@ -4439,8 +4439,8 @@ DictionaryCompare(
*/
if ((*left != '\0') && (*right != '\0')) {
- left += TclUtfToUniChar(left, &uniLeft);
- right += TclUtfToUniChar(right, &uniRight);
+ left += TclUtfToUCS4(left, &uniLeft);
+ right += TclUtfToUCS4(right, &uniRight);
/*
* Convert both chars to lower for the comparison, because
@@ -4449,8 +4449,8 @@ DictionaryCompare(
* other interesting punctuations occur).
*/
- uniLeftLower = Tcl_UniCharToLower(uniLeft);
- uniRightLower = Tcl_UniCharToLower(uniRight);
+ uniLeftLower = TclUCS4ToLower(uniLeft);
+ uniRightLower = TclUCS4ToLower(uniRight);
} else {
diff = UCHAR(*left) - UCHAR(*right);
break;
diff --git a/generic/tclInt.h b/generic/tclInt.h
index 8983659..7fc06c8 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -3185,6 +3185,7 @@ MODULE_SCOPE int TclTrimRight(const char *bytes, int numBytes,
MODULE_SCOPE int TclUtfCasecmp(const char *cs, const char *ct);
MODULE_SCOPE int TclUtfToUCS4(const char *, int *);
MODULE_SCOPE int TclUCS4ToUtf(int, char *);
+MODULE_SCOPE int TclUCS4ToLower(int ch);
#if TCL_UTF_MAX == 4
MODULE_SCOPE int TclGetUCS4(Tcl_Obj *, int);
#else
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 5e9b7a1..9792071 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -88,7 +88,6 @@ static const unsigned char totalBytes[256] = {
static int UtfCount(int ch);
static int Invalid(unsigned char *src);
static int UCS4ToUpper(int ch);
-static int UCS4ToLower(int ch);
static int UCS4ToTitle(int ch);
/*
@@ -1078,7 +1077,7 @@ Tcl_UtfToLower(
src = dst = str;
while (*src) {
len = TclUtfToUCS4(src, &ch);
- lowChar = UCS4ToLower(ch);
+ lowChar = TclUCS4ToLower(ch);
/*
* To keep badly formed Utf strings from getting inflated by the
@@ -1149,7 +1148,7 @@ Tcl_UtfToTitle(
lowChar = ch;
/* Special exception for Georgian Asomtavruli chars, no titlecase. */
if ((unsigned)(lowChar - 0x1C90) >= 0x30) {
- lowChar = UCS4ToLower(lowChar);
+ lowChar = TclUCS4ToLower(lowChar);
}
if (len < UtfCount(lowChar) || ((lowChar & ~0x7FF) == 0xD800)) {
@@ -1254,11 +1253,11 @@ Tcl_UtfNcmp(
if (ch1 != ch2) {
#if TCL_UTF_MAX == 4
/* Surrogates always report higher than non-surrogates */
- if (((ch1 & 0xFC00) == 0xD800)) {
- if ((ch2 & 0xFC00) != 0xD800) {
+ if (((ch1 & ~0x3FF) == 0xD800)) {
+ if ((ch2 & ~0x3FF) != 0xD800) {
return ch1;
}
- } else if ((ch2 & 0xFC00) == 0xD800) {
+ } else if ((ch2 & ~0x3FF) == 0xD800) {
return -ch2;
}
#endif
@@ -1427,8 +1426,8 @@ Tcl_UniCharToUpper(
*----------------------------------------------------------------------
*/
-static int
-UCS4ToLower(
+int
+TclUCS4ToLower(
int ch) /* Unicode character to convert. */
{
if (!UNICODE_OUT_OF_RANGE(ch)) {
@@ -1447,7 +1446,7 @@ Tcl_UniChar
Tcl_UniCharToLower(
int ch) /* Unicode character to convert. */
{
- return (Tcl_UniChar) UCS4ToLower(ch);
+ return (Tcl_UniChar) TclUCS4ToLower(ch);
}
/*
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index 13b0d55..b4a07bb 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -2155,7 +2155,7 @@ Tcl_StringCaseMatch(
int nocase) /* 0 for case sensitive, 1 for insensitive */
{
int p, charLen;
- Tcl_UniChar ch1 = 0, ch2 = 0;
+ int ch1 = 0, ch2 = 0;
while (1) {
p = *pattern;
@@ -2196,12 +2196,12 @@ Tcl_StringCaseMatch(
*/
if (UCHAR(*pattern) < 0x80) {
- ch2 = (Tcl_UniChar)
+ ch2 = (int)
(nocase ? tolower(UCHAR(*pattern)) : UCHAR(*pattern));
} else {
- Tcl_UtfToUniChar(pattern, &ch2);
+ TclUtfToUCS4(pattern, &ch2);
if (nocase) {
- ch2 = Tcl_UniCharToLower(ch2);
+ ch2 = TclUCS4ToLower(ch2);
}
}
@@ -2215,8 +2215,8 @@ Tcl_StringCaseMatch(
if ((p != '[') && (p != '?') && (p != '\\')) {
if (nocase) {
while (*str) {
- charLen = TclUtfToUniChar(str, &ch1);
- if (ch2==ch1 || ch2==Tcl_UniCharToLower(ch1)) {
+ charLen = TclUtfToUCS4(str, &ch1);
+ if (ch2==ch1 || ch2==TclUCS4ToLower(ch1)) {
break;
}
str += charLen;
@@ -2229,7 +2229,7 @@ Tcl_StringCaseMatch(
*/
while (*str) {
- charLen = TclUtfToUniChar(str, &ch1);
+ charLen = TclUtfToUCS4(str, &ch1);
if (ch2 == ch1) {
break;
}
@@ -2243,7 +2243,7 @@ Tcl_StringCaseMatch(
if (*str == '\0') {
return 0;
}
- str += TclUtfToUniChar(str, &ch1);
+ str += TclUtfToUCS4(str, &ch1);
}
}
@@ -2254,7 +2254,7 @@ Tcl_StringCaseMatch(
if (p == '?') {
pattern++;
- str += TclUtfToUniChar(str, &ch1);
+ str += TclUtfToUCS4(str, &ch1);
continue;
}
@@ -2265,17 +2265,17 @@ Tcl_StringCaseMatch(
*/
if (p == '[') {
- Tcl_UniChar startChar = 0, endChar = 0;
+ int startChar = 0, endChar = 0;
pattern++;
if (UCHAR(*str) < 0x80) {
- ch1 = (Tcl_UniChar)
+ ch1 = (int)
(nocase ? tolower(UCHAR(*str)) : UCHAR(*str));
str++;
} else {
- str += Tcl_UtfToUniChar(str, &ch1);
+ str += TclUtfToUCS4(str, &ch1);
if (nocase) {
- ch1 = Tcl_UniCharToLower(ch1);
+ ch1 = TclUCS4ToLower(ch1);
}
}
while (1) {
@@ -2283,13 +2283,13 @@ Tcl_StringCaseMatch(
return 0;
}
if (UCHAR(*pattern) < 0x80) {
- startChar = (Tcl_UniChar) (nocase
+ startChar = (int) (nocase
? tolower(UCHAR(*pattern)) : UCHAR(*pattern));
pattern++;
} else {
- pattern += Tcl_UtfToUniChar(pattern, &startChar);
+ pattern += TclUtfToUCS4(pattern, &startChar);
if (nocase) {
- startChar = Tcl_UniCharToLower(startChar);
+ startChar = TclUCS4ToLower(startChar);
}
}
if (*pattern == '-') {
@@ -2298,13 +2298,13 @@ Tcl_StringCaseMatch(
return 0;
}
if (UCHAR(*pattern) < 0x80) {
- endChar = (Tcl_UniChar) (nocase
+ endChar = (int) (nocase
? tolower(UCHAR(*pattern)) : UCHAR(*pattern));
pattern++;
} else {
- pattern += Tcl_UtfToUniChar(pattern, &endChar);
+ pattern += TclUtfToUCS4(pattern, &endChar);
if (nocase) {
- endChar = Tcl_UniCharToLower(endChar);
+ endChar = TclUCS4ToLower(endChar);
}
}
if (((startChar <= ch1) && (ch1 <= endChar))
@@ -2350,10 +2350,10 @@ Tcl_StringCaseMatch(
* each string match.
*/
- str += TclUtfToUniChar(str, &ch1);
- pattern += TclUtfToUniChar(pattern, &ch2);
+ str += TclUtfToUCS4(str, &ch1);
+ pattern += TclUtfToUCS4(pattern, &ch2);
if (nocase) {
- if (Tcl_UniCharToLower(ch1) != Tcl_UniCharToLower(ch2)) {
+ if (TclUCS4ToLower(ch1) != TclUCS4ToLower(ch2)) {
return 0;
}
} else if (ch1 != ch2) {