summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2017-06-08 11:48:13 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2017-06-08 11:48:13 (GMT)
commite3c58bc54a39c2911fb59460045b16c4e61c491c (patch)
tree241320f6e42e21aa0ab94b1c29fb08ef5e4799fb
parent68be5b2b62dfcf1b9b7e348a71c4d88e08f19ef9 (diff)
downloadtcl-e3c58bc54a39c2911fb59460045b16c4e61c491c.zip
tcl-e3c58bc54a39c2911fb59460045b16c4e61c491c.tar.gz
tcl-e3c58bc54a39c2911fb59460045b16c4e61c491c.tar.bz2
tclUtil.c: Use TclUtfToUniChar() in stead of handling ASCII characters separately: This macro already does that.
Add new test-case for Tcl_NumUtfChars(), for a knownBug still to be fixed.
-rw-r--r--generic/tclTest.c2
-rw-r--r--generic/tclUtil.c47
-rw-r--r--tests/utf.test11
3 files changed, 20 insertions, 40 deletions
diff --git a/generic/tclTest.c b/generic/tclTest.c
index f2dbfc9..e8539e8 100644
--- a/generic/tclTest.c
+++ b/generic/tclTest.c
@@ -6672,7 +6672,7 @@ TestNumUtfCharsCmd(
int len = -1;
if (objc > 2) {
- (void) Tcl_GetStringFromObj(objv[1], &len);
+ (void) Tcl_GetIntFromObj(interp, objv[2], &len);
}
len = Tcl_NumUtfChars(Tcl_GetString(objv[1]), len);
Tcl_SetObjResult(interp, Tcl_NewIntObj(len));
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index 553593c..3fdf54b 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -2162,14 +2162,9 @@ Tcl_StringCaseMatch(
* This is a special case optimization for single-byte utf.
*/
- if (UCHAR(*pattern) < 0x80) {
- ch2 = (Tcl_UniChar)
- (nocase ? tolower(UCHAR(*pattern)) : UCHAR(*pattern));
- } else {
- Tcl_UtfToUniChar(pattern, &ch2);
- if (nocase) {
- ch2 = Tcl_UniCharToLower(ch2);
- }
+ TclUtfToUniChar(pattern, &ch2);
+ if (nocase) {
+ ch2 = Tcl_UniCharToLower(ch2);
}
while (1) {
@@ -2235,44 +2230,26 @@ Tcl_StringCaseMatch(
Tcl_UniChar startChar, endChar;
pattern++;
- if (UCHAR(*str) < 0x80) {
- ch1 = (Tcl_UniChar)
- (nocase ? tolower(UCHAR(*str)) : UCHAR(*str));
- str++;
- } else {
- str += Tcl_UtfToUniChar(str, &ch1);
- if (nocase) {
- ch1 = Tcl_UniCharToLower(ch1);
- }
+ str += TclUtfToUniChar(str, &ch1);
+ if (nocase) {
+ ch1 = Tcl_UniCharToLower(ch1);
}
while (1) {
if ((*pattern == ']') || (*pattern == '\0')) {
return 0;
}
- if (UCHAR(*pattern) < 0x80) {
- startChar = (Tcl_UniChar) (nocase
- ? tolower(UCHAR(*pattern)) : UCHAR(*pattern));
- pattern++;
- } else {
- pattern += Tcl_UtfToUniChar(pattern, &startChar);
- if (nocase) {
- startChar = Tcl_UniCharToLower(startChar);
- }
+ pattern += TclUtfToUniChar(pattern, &startChar);
+ if (nocase) {
+ startChar = Tcl_UniCharToLower(startChar);
}
if (*pattern == '-') {
pattern++;
if (*pattern == '\0') {
return 0;
}
- if (UCHAR(*pattern) < 0x80) {
- endChar = (Tcl_UniChar) (nocase
- ? tolower(UCHAR(*pattern)) : UCHAR(*pattern));
- pattern++;
- } else {
- pattern += Tcl_UtfToUniChar(pattern, &endChar);
- if (nocase) {
- endChar = Tcl_UniCharToLower(endChar);
- }
+ pattern += TclUtfToUniChar(pattern, &endChar);
+ if (nocase) {
+ endChar = Tcl_UniCharToLower(endChar);
}
if (((startChar <= ch1) && (ch1 <= endChar))
|| ((endChar <= ch1) && (ch1 <= startChar))) {
diff --git a/tests/utf.test b/tests/utf.test
index 28981d6..f677438 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -99,17 +99,20 @@ test utf-4.4 {Tcl_NumUtfChars: #u0000} {testnumutfchars testbytestring} {
testnumutfchars [testbytestring "\xC0\x80"]
} {1}
test utf-4.5 {Tcl_NumUtfChars: zero length, calc len} testnumutfchars {
- testnumutfchars "" 1
+ testnumutfchars "" 0
} {0}
test utf-4.6 {Tcl_NumUtfChars: length 1, calc len} {testnumutfchars testbytestring} {
- testnumutfchars [testbytestring "\xC2\xA2"] 1
+ testnumutfchars [testbytestring "\xC2\xA2"] 2
} {1}
test utf-4.7 {Tcl_NumUtfChars: long string, calc len} {testnumutfchars testbytestring} {
- testnumutfchars [testbytestring "abc\xC2\xA2\xe4\xb9\x8e\uA2\u4e4e"] 1
+ testnumutfchars [testbytestring "abc\xC2\xA2\xe4\xb9\x8e\uA2\u4e4e"] 10
} {7}
test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} {testnumutfchars testbytestring} {
- testnumutfchars [testbytestring "\xC0\x80"] 1
+ testnumutfchars [testbytestring "\xC0\x80"] 2
} {1}
+test utf-4.9 {Tcl_NumUtfChars: #u20AC, calc len, incomplete} {knownBug testnumutfchars testbytestring} {
+ testnumutfchars [testbytestring "\xE2\x82\xAC"] 2
+} {2}
test utf-5.1 {Tcl_UtfFindFirsts} {
} {}