From e3c58bc54a39c2911fb59460045b16c4e61c491c Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 8 Jun 2017 11:48:13 +0000 Subject: tclUtil.c: Use TclUtfToUniChar() in stead of handling ASCII characters separately: This macro already does that. Add new test-case for Tcl_NumUtfChars(), for a knownBug still to be fixed. --- generic/tclTest.c | 2 +- generic/tclUtil.c | 47 ++++++++++++----------------------------------- tests/utf.test | 11 +++++++---- 3 files changed, 20 insertions(+), 40 deletions(-) diff --git a/generic/tclTest.c b/generic/tclTest.c index f2dbfc9..e8539e8 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -6672,7 +6672,7 @@ TestNumUtfCharsCmd( int len = -1; if (objc > 2) { - (void) Tcl_GetStringFromObj(objv[1], &len); + (void) Tcl_GetIntFromObj(interp, objv[2], &len); } len = Tcl_NumUtfChars(Tcl_GetString(objv[1]), len); Tcl_SetObjResult(interp, Tcl_NewIntObj(len)); diff --git a/generic/tclUtil.c b/generic/tclUtil.c index 553593c..3fdf54b 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -2162,14 +2162,9 @@ Tcl_StringCaseMatch( * This is a special case optimization for single-byte utf. */ - if (UCHAR(*pattern) < 0x80) { - ch2 = (Tcl_UniChar) - (nocase ? tolower(UCHAR(*pattern)) : UCHAR(*pattern)); - } else { - Tcl_UtfToUniChar(pattern, &ch2); - if (nocase) { - ch2 = Tcl_UniCharToLower(ch2); - } + TclUtfToUniChar(pattern, &ch2); + if (nocase) { + ch2 = Tcl_UniCharToLower(ch2); } while (1) { @@ -2235,44 +2230,26 @@ Tcl_StringCaseMatch( Tcl_UniChar startChar, endChar; pattern++; - if (UCHAR(*str) < 0x80) { - ch1 = (Tcl_UniChar) - (nocase ? tolower(UCHAR(*str)) : UCHAR(*str)); - str++; - } else { - str += Tcl_UtfToUniChar(str, &ch1); - if (nocase) { - ch1 = Tcl_UniCharToLower(ch1); - } + str += TclUtfToUniChar(str, &ch1); + if (nocase) { + ch1 = Tcl_UniCharToLower(ch1); } while (1) { if ((*pattern == ']') || (*pattern == '\0')) { return 0; } - if (UCHAR(*pattern) < 0x80) { - startChar = (Tcl_UniChar) (nocase - ? tolower(UCHAR(*pattern)) : UCHAR(*pattern)); - pattern++; - } else { - pattern += Tcl_UtfToUniChar(pattern, &startChar); - if (nocase) { - startChar = Tcl_UniCharToLower(startChar); - } + pattern += TclUtfToUniChar(pattern, &startChar); + if (nocase) { + startChar = Tcl_UniCharToLower(startChar); } if (*pattern == '-') { pattern++; if (*pattern == '\0') { return 0; } - if (UCHAR(*pattern) < 0x80) { - endChar = (Tcl_UniChar) (nocase - ? tolower(UCHAR(*pattern)) : UCHAR(*pattern)); - pattern++; - } else { - pattern += Tcl_UtfToUniChar(pattern, &endChar); - if (nocase) { - endChar = Tcl_UniCharToLower(endChar); - } + pattern += TclUtfToUniChar(pattern, &endChar); + if (nocase) { + endChar = Tcl_UniCharToLower(endChar); } if (((startChar <= ch1) && (ch1 <= endChar)) || ((endChar <= ch1) && (ch1 <= startChar))) { diff --git a/tests/utf.test b/tests/utf.test index 28981d6..f677438 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -99,17 +99,20 @@ test utf-4.4 {Tcl_NumUtfChars: #u0000} {testnumutfchars testbytestring} { testnumutfchars [testbytestring "\xC0\x80"] } {1} test utf-4.5 {Tcl_NumUtfChars: zero length, calc len} testnumutfchars { - testnumutfchars "" 1 + testnumutfchars "" 0 } {0} test utf-4.6 {Tcl_NumUtfChars: length 1, calc len} {testnumutfchars testbytestring} { - testnumutfchars [testbytestring "\xC2\xA2"] 1 + testnumutfchars [testbytestring "\xC2\xA2"] 2 } {1} test utf-4.7 {Tcl_NumUtfChars: long string, calc len} {testnumutfchars testbytestring} { - testnumutfchars [testbytestring "abc\xC2\xA2\xe4\xb9\x8e\uA2\u4e4e"] 1 + testnumutfchars [testbytestring "abc\xC2\xA2\xe4\xb9\x8e\uA2\u4e4e"] 10 } {7} test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} {testnumutfchars testbytestring} { - testnumutfchars [testbytestring "\xC0\x80"] 1 + testnumutfchars [testbytestring "\xC0\x80"] 2 } {1} +test utf-4.9 {Tcl_NumUtfChars: #u20AC, calc len, incomplete} {knownBug testnumutfchars testbytestring} { + testnumutfchars [testbytestring "\xE2\x82\xAC"] 2 +} {2} test utf-5.1 {Tcl_UtfFindFirsts} { } {} -- cgit v0.12