From 5500074eff2f8cd05b593f4dcac0aecd23e00b94 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 7 May 2018 07:40:58 +0000 Subject: Remove some tip389 restrictions in test-cases, which are no longer necessary. Eliminate gcc compiler warnings when compiling with -DTCL_UTF_MAX=6 Other code clean-up and comment improvements. No change in functionality. --- generic/tcl.h | 6 +++--- generic/tclCmdMZ.c | 14 +++++++------- generic/tclUtf.c | 4 ++-- tests/utf.test | 18 ++++++++---------- 4 files changed, 20 insertions(+), 22 deletions(-) diff --git a/generic/tcl.h b/generic/tcl.h index c3e0f9d..6dd0ea0 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -2146,9 +2146,9 @@ typedef struct Tcl_EncodingType { /* * The maximum number of bytes that are necessary to represent a single - * Unicode character in UTF-8. The valid values should be 3, 4 or 6 - * (or perhaps 1 if we want to support a non-unicode enabled core). If 3 or - * 4, then Tcl_UniChar must be 2-bytes in size (UCS-2) (the default). If 6, + * Unicode character in UTF-8. The valid values are 4 and 6 + * (or perhaps 1 if we want to support a non-unicode enabled core). If 4, + * then Tcl_UniChar must be 2-bytes in size (UCS-2) (the default). If 6, * then Tcl_UniChar must be 4-bytes in size (UCS-4). At this time UCS-2 mode * is the default and recommended mode. UCS-4 is experimental and not * recommended. It works for the core, but most extensions expect UCS-2. diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index 6aaf1de..7c979bb 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -597,9 +597,9 @@ Tcl_RegsubObjCmd( * slightly modified version of the one pair STR_MAP code. */ - int slen, nocase; + int slen, nocase, wsrclc; int (*strCmpFn)(const Tcl_UniChar*,const Tcl_UniChar*,unsigned long); - Tcl_UniChar *p, wsrclc; + Tcl_UniChar *p; numMatches = 0; nocase = (cflags & TCL_REG_NOCASE); @@ -2001,8 +2001,8 @@ StringMapCmd( * larger strings. */ - int mapLen; - Tcl_UniChar *mapString, u2lc; + int mapLen, u2lc; + Tcl_UniChar *mapString; ustring2 = Tcl_GetUnicodeFromObj(mapElemv[0], &length2); p = ustring1; @@ -2033,8 +2033,8 @@ StringMapCmd( } } } else { - Tcl_UniChar **mapStrings, *u2lc = NULL; - int *mapLens; + Tcl_UniChar **mapStrings; + int *mapLens, *u2lc = NULL; /* * Precompute pointers to the unicode string and length. This saves us @@ -2046,7 +2046,7 @@ StringMapCmd( mapStrings = TclStackAlloc(interp, mapElemc*2*sizeof(Tcl_UniChar *)); mapLens = TclStackAlloc(interp, mapElemc * 2 * sizeof(int)); if (nocase) { - u2lc = TclStackAlloc(interp, mapElemc * sizeof(Tcl_UniChar)); + u2lc = TclStackAlloc(interp, mapElemc * sizeof(int)); } for (index = 0; index < mapElemc; index++) { mapStrings[index] = Tcl_GetUnicodeFromObj(mapElemv[index], diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 1d73a7a..693e210 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -1034,8 +1034,8 @@ Tcl_UtfToTitle( lowChar = (((lowChar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000; } #endif - /* Special exception for Gregorian characters, which don't have titlecase */ - if ((lowChar < 0x1C90) || (lowChar >= 0x1CC0)) { + /* Special exception for Georgian Asomtavruli chars, no titlecase. */ + if ((unsigned)(lowChar - 0x1C90) >= 0x30) { lowChar = Tcl_UniCharToLower(lowChar); } diff --git a/tests/utf.test b/tests/utf.test index 39818cc..9dd8017 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -41,7 +41,7 @@ test utf-1.5 {Tcl_UniCharToUtf: overflowed Tcl_UniChar} testbytestring { test utf-1.6 {Tcl_UniCharToUtf: negative Tcl_UniChar} testbytestring { expr {[format %c -1] eq [testbytestring "\xef\xbf\xbd"]} } 1 -test utf-1.7 {Tcl_UniCharToUtf: 4 byte sequences} -constraints {tip389 testbytestring} -body { +test utf-1.7 {Tcl_UniCharToUtf: 4 byte sequences} -constraints testbytestring -body { expr {"\U014e4e" eq [testbytestring "\xf0\x94\xb9\x8e"]} } -result 1 @@ -228,15 +228,13 @@ bsCheck \U4e21 20001 bsCheck \U004e21 20001 bsCheck \U00004e21 20001 bsCheck \U0000004e21 78 -if {[testConstraint tip389]} { - bsCheck \U00110000 69632 - bsCheck \U01100000 69632 - bsCheck \U11000000 69632 - bsCheck \U0010FFFF 1114111 - bsCheck \U010FFFF0 1114111 - bsCheck \U10FFFF00 1114111 - bsCheck \UFFFFFFFF 1048575 -} +bsCheck \U00110000 69632 +bsCheck \U01100000 69632 +bsCheck \U11000000 69632 +bsCheck \U0010FFFF 1114111 +bsCheck \U010FFFF0 1114111 +bsCheck \U10FFFF00 1114111 +bsCheck \UFFFFFFFF 1048575 test utf-11.1 {Tcl_UtfToUpper} { string toupper {} -- cgit v0.12