From e71aa6771eb99006cbe78b6dd13a0e1b1828e64f Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 28 Apr 2020 20:02:44 +0000 Subject: Bring forward test-cases utf-6.94 and utf-6.95 from Tcl 8.6 (higher ones have no value IMHO, but those 2 have!) --- tests/utf.test | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/utf.test b/tests/utf.test index 12841fc..a910c36 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -496,6 +496,12 @@ test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} testu test utf-6.93 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} testutfnext { testutfnext -bytestring \x80\x80\x80 } 3 +test utf-6.94 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} testutfnext { + testutfnext -bytestring \xA0\xA0\xA0\xA0 +} 3 +test utf-6.95 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} testutfnext { + testutfnext -bytestring \x80\x80\x80\x80 +} 3 test utf-7.1 {Tcl_UtfPrev} testutfprev { testutfprev {} -- cgit v0.12 From f57801a3b05da369c26d5f21a5907d066b5f4f63 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 28 Apr 2020 20:39:35 +0000 Subject: Extend "testutfnext" test-command such that it can detect when it reads more bytes than the end of the string. If you want to null-terminate string explitely, use \x00 --- generic/tclTest.c | 15 ++++++++++++--- tests/utf.test | 14 +++++++------- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/generic/tclTest.c b/generic/tclTest.c index 42eff25..2178647 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -6841,13 +6841,13 @@ TestUtfNextCmd( } } - if (numBytes > (int)sizeof(buffer)-2) { - Tcl_AppendResult(interp, "\"testutfnext\" can only handle 30 bytes", NULL); + if (numBytes > (int)sizeof(buffer)-3) { + Tcl_AppendResult(interp, "\"testutfnext\" can only handle 29 bytes", NULL); return TCL_ERROR; } memcpy(buffer + 1, bytes, numBytes); - buffer[0] = buffer[numBytes + 1] = '\x00'; + buffer[0] = buffer[numBytes + 1] = buffer[numBytes + 2] = '\x00'; first = result = TclUtfNext(buffer + 1); while ((buffer[0] = *p++) != '\0') { @@ -6858,6 +6858,15 @@ TestUtfNextCmd( return TCL_ERROR; } } + p = tobetested; + while ((buffer[numBytes + 1] = *p++) != '\0') { + /* Run Tcl_UtfNext with many more possible bytes at src[end], all should give the same result */ + result = TclUtfNext(buffer + 1); + if (first != result) { + Tcl_AppendResult(interp, "Tcl_UtfNext is not supposed to read src[end]", NULL); + return TCL_ERROR; + } + } Tcl_SetObjResult(interp, Tcl_NewIntObj(first - buffer - 1)); diff --git a/tests/utf.test b/tests/utf.test index a910c36..b316466 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -215,13 +215,13 @@ test utf-6.8 {Tcl_UtfNext} testutfnext { testutfnext -bytestring A\xF8 } 1 test utf-6.9 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xA0 + testutfnext -bytestring \xA0\x00 } 1 test utf-6.10 {Tcl_UtfNext} testutfnext { testutfnext -bytestring \xA0G } 1 test utf-6.11 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xA0\xA0 + testutfnext -bytestring \xA0\xA0\x00 } 2 test utf-6.12 {Tcl_UtfNext} testutfnext { testutfnext -bytestring \xA0\xD0 @@ -236,7 +236,7 @@ test utf-6.15 {Tcl_UtfNext} testutfnext { testutfnext -bytestring \xA0\xF8 } 1 test utf-6.16 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0 + testutfnext -bytestring \xD0\x00 } 1 test utf-6.17 {Tcl_UtfNext} testutfnext { testutfnext -bytestring \xD0G @@ -263,7 +263,7 @@ test utf-6.24 {Tcl_UtfNext} testutfnext { testutfnext -bytestring \xE8G } 1 test utf-6.25 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0 + testutfnext -bytestring \xE8\xA0\x00 } 1 test utf-6.26 {Tcl_UtfNext} testutfnext { testutfnext -bytestring \xE8\xD0 @@ -359,7 +359,7 @@ test utf-6.56 {Tcl_UtfNext} testutfnext { testutfnext -bytestring \xF2\xA0G } 1 test utf-6.57 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0\xA0 + testutfnext -bytestring \xF2\xA0\xA0\x00 } 1 test utf-6.58 {Tcl_UtfNext} testutfnext { testutfnext -bytestring \xF2\xA0\xD0 @@ -476,10 +476,10 @@ test utf-6.87.1 {Tcl_UtfNext - overlong sequences} {testutfnext ucs4} { testutfnext -bytestring \xF0\x90\x80\x80 } 4 test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} testutfnext { - testutfnext -bytestring \xA0\xA0 + testutfnext -bytestring \xA0\xA0\x00 } 2 test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} testutfnext { - testutfnext -bytestring \x80\x80 + testutfnext -bytestring \x80\x80\x00 } 2 test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext tip389} { testutfnext -bytestring \xF4\x8F\xBF\xBF -- cgit v0.12 From 45b096ac88e551fb40f84b02f14bd6bb2cdfdcdb Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 29 Apr 2020 11:10:44 +0000 Subject: Better structurize the UTF-8 (mainly around Tcl_UtfNext()/Tcl_UtfPrev(), but not only those) test-cases: Selector "ucs2" is meant for Tcl 8.5: No knowledge at all about 4-byte sequences. Selector "ucs4" is meant for Tcl 8.5 or 8.7 with TCL_UTF_MAX=4 or Tcl 8.6 with TCL_UTF_MAX=6 Selector "tip389" is meant for Tcl 8.6 with TCL_UTF_MAX=4 and Tcl 8.7 with TCL_UTF_MAX=3 (of course, this is too simple: there will be testcases needing more than those 3 --- generic/tclTest.c | 29 +- tests/utf.test | 896 +++++++++++++++++++++++++++--------------------------- 2 files changed, 454 insertions(+), 471 deletions(-) diff --git a/generic/tclTest.c b/generic/tclTest.c index 2178647..4c13850 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -6825,21 +6825,14 @@ TestUtfNextCmd( char *bytes; const char *result, *first; char buffer[32]; - static const char tobetested[] = "\xFF\xFE\xF4\xF2\xF0\xEF\xE8\xE3\xE2\xE1\xE0\xC2\xC1\xC0\x82"; + static const char tobetested[] = "A\xA0\xC0\xC1\xC2\xD0\xE0\xE8\xF2\xF7\xF8\xFE\xFF"; const char *p = tobetested; - if (objc != 3 || strcmp(Tcl_GetString(objv[1]), "-bytestring")) { - if (objc != 2) { - Tcl_WrongNumArgs(interp, 1, objv, "?-bytestring? bytes"); - return TCL_ERROR; - } - bytes = Tcl_GetStringFromObj(objv[1], &numBytes); - } else { - bytes = (char *) TclGetBytesFromObj(interp, objv[2], &numBytes); - if (bytes == NULL) { - return TCL_ERROR; - } + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, "?-bytestring? bytes"); + return TCL_ERROR; } + bytes = Tcl_GetStringFromObj(objv[1], &numBytes); if (numBytes > (int)sizeof(buffer)-3) { Tcl_AppendResult(interp, "\"testutfnext\" can only handle 29 bytes", NULL); @@ -6888,17 +6881,13 @@ TestUtfPrevCmd( int numBytes, offset; char *bytes; const char *result; - Tcl_Obj *copy; if (objc < 2 || objc > 3) { Tcl_WrongNumArgs(interp, 1, objv, "bytes ?offset?"); return TCL_ERROR; } - bytes = (char *) TclGetBytesFromObj(interp, objv[1], &numBytes); - if (bytes == NULL) { - return TCL_ERROR; - } + bytes = Tcl_GetStringFromObj(objv[1], &numBytes); if (objc == 3) { if (TCL_OK != Tcl_GetIntForIndex(interp, objv[2], numBytes, &offset)) { @@ -6913,14 +6902,8 @@ TestUtfPrevCmd( } else { offset = numBytes; } - copy = Tcl_DuplicateObj(objv[1]); - bytes = (char *) Tcl_SetByteArrayLength(copy, numBytes+1); - bytes[numBytes] = '\0'; - result = TclUtfPrev(bytes + offset, bytes); Tcl_SetObjResult(interp, Tcl_NewIntObj(result - bytes)); - - Tcl_DecrRefCount(copy); return TCL_OK; } diff --git a/tests/utf.test b/tests/utf.test index feb3f56..811002d 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -42,96 +42,96 @@ testConstraint tip413 [eq {} [string trim \x00]] catch {unset x} test utf-1.1 {Tcl_UniCharToUtf: 1 byte sequences} testbytestring { - expr {"\x01" eq [testbytestring "\x01"]} + expr {"\x01" eq [testbytestring \x01]} } 1 test utf-1.2 {Tcl_UniCharToUtf: 2 byte sequences} testbytestring { - expr {"\x00" eq [testbytestring "\xC0\x80"]} + expr {"\x00" eq [testbytestring \xC0\x80]} } 1 test utf-1.3 {Tcl_UniCharToUtf: 2 byte sequences} testbytestring { - expr {"\xE0" eq [testbytestring "\xC3\xA0"]} + expr {"\xE0" eq [testbytestring \xC3\xA0]} } 1 test utf-1.4 {Tcl_UniCharToUtf: 3 byte sequences} testbytestring { - expr {"\u4E4E" eq [testbytestring "\xE4\xB9\x8E"]} + expr {"\u4E4E" eq [testbytestring \xE4\xB9\x8E]} } 1 test utf-1.5 {Tcl_UniCharToUtf: overflowed Tcl_UniChar} testbytestring { - expr {[format %c 0x110000] eq [testbytestring "\xEF\xBF\xBD"]} + expr {[format %c 0x110000] eq [testbytestring \xEF\xBF\xBD]} } 1 test utf-1.6 {Tcl_UniCharToUtf: negative Tcl_UniChar} testbytestring { - expr {[format %c -1] eq [testbytestring "\xEF\xBF\xBD"]} + expr {[format %c -1] eq [testbytestring \xEF\xBF\xBD]} } 1 test utf-1.7.0 {Tcl_UniCharToUtf: 4 byte sequences} {fullutf Uesc testbytestring} { - expr {"\U014E4E" eq [testbytestring "\xF0\x94\xB9\x8E"]} + expr {"\U014E4E" eq [testbytestring \xF0\x94\xB9\x8E]} } 1 test utf-1.7.1 {Tcl_UniCharToUtf: 4 byte sequences} {ucs2 Uesc testbytestring} { - expr {"\U014E4E" eq [testbytestring "\xF0\x94\xB9\x8E"]} + expr {"\U014E4E" eq [testbytestring \xF0\x94\xB9\x8E]} } 0 test utf-1.8 {Tcl_UniCharToUtf: 3 byte sequence, high surrogate} testbytestring { - expr {"\uD842" eq [testbytestring "\xED\xA1\x82"]} + expr {"\uD842" eq [testbytestring \xED\xA1\x82]} } 1 test utf-1.9 {Tcl_UniCharToUtf: 3 byte sequence, low surrogate} testbytestring { - expr {"\uDC42" eq [testbytestring "\xED\xB1\x82"]} + expr {"\uDC42" eq [testbytestring \xED\xB1\x82]} } 1 test utf-1.10 {Tcl_UniCharToUtf: 3 byte sequence, high surrogate} testbytestring { - expr {[format %c 0xD842] eq [testbytestring "\xED\xA1\x82"]} + expr {[format %c 0xD842] eq [testbytestring \xED\xA1\x82]} } 1 test utf-1.11 {Tcl_UniCharToUtf: 3 byte sequence, low surrogate} testbytestring { - expr {[format %c 0xDC42] eq [testbytestring "\xED\xB1\x82"]} + expr {[format %c 0xDC42] eq [testbytestring \xED\xB1\x82]} } 1 test utf-1.12 {Tcl_UniCharToUtf: 4 byte sequence, high/low surrogate} {pairsTo4bytes testbytestring} { - expr {"\uD842\uDC42" eq [testbytestring "\xF0\xA0\xA1\x82"]} + expr {"\uD842\uDC42" eq [testbytestring \xF0\xA0\xA1\x82]} } 1 test utf-1.13 {Tcl_UniCharToUtf: Invalid surrogate} {Uesc testbytestring} { - expr {"\UD842" eq [testbytestring "\xEF\xBF\xBD"]} + expr {"\UD842" eq [testbytestring \xEF\xBF\xBD]} } 1 test utf-2.1 {Tcl_UtfToUniChar: low ascii} { string length "abc" } 3 test utf-2.2 {Tcl_UtfToUniChar: naked trail bytes} testbytestring { - string length [testbytestring "\x82\x83\x84"] + string length [testbytestring \x82\x83\x84] } 3 test utf-2.3 {Tcl_UtfToUniChar: lead (2-byte) followed by non-trail} testbytestring { - string length [testbytestring "\xC2"] + string length [testbytestring \xC2] } 1 test utf-2.4 {Tcl_UtfToUniChar: lead (2-byte) followed by trail} testbytestring { - string length [testbytestring "\xC2\xA2"] + string length [testbytestring \xC2\xA2] } 1 test utf-2.5 {Tcl_UtfToUniChar: lead (3-byte) followed by non-trail} testbytestring { - string length [testbytestring "\xE2"] + string length [testbytestring \xE2] } 1 test utf-2.6 {Tcl_UtfToUniChar: lead (3-byte) followed by 1 trail} testbytestring { - string length [testbytestring "\xE2\xA2"] + string length [testbytestring \xE2\xA2] } 2 test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} testbytestring { - string length [testbytestring "\xE4\xB9\x8E"] + string length [testbytestring \xE4\xB9\x8E] } 1 test utf-2.8.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring ucs2} { - string length [testbytestring "\xF0\x90\x80\x80"] + string length [testbytestring \xF0\x90\x80\x80] } 4 test utf-2.8.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring ucs4} { - string length [testbytestring "\xF0\x90\x80\x80"] + string length [testbytestring \xF0\x90\x80\x80] } 1 test utf-2.8.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring tip389} { - string length [testbytestring "\xF0\x90\x80\x80"] + string length [testbytestring \xF0\x90\x80\x80] } 2 test utf-2.9.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring ucs2} { - string length [testbytestring "\xF4\x8F\xBF\xBF"] + string length [testbytestring \xF4\x8F\xBF\xBF] } 4 -test utf-2.9.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring ucs4} { - string length [testbytestring "\xF4\x8F\xBF\xBF"] +test utf-2.9.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} ucs4 { + string length \U10FFFF } 1 -test utf-2.9.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring tip389} { - string length [testbytestring "\xF4\x8F\xBF\xBF"] +test utf-2.9.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} !ucs4 { + string length \uDBFF\uDFFF } 2 test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} testbytestring { - string length [testbytestring "\xF0\x8F\xBF\xBF"] + string length [testbytestring \xF0\x8F\xBF\xBF] } 4 test utf-2.11 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, overflow} testbytestring { # Would decode to U+110000 but that is outside the Unicode range. - string length [testbytestring "\xF4\x90\x80\x80"] + string length [testbytestring \xF4\x90\x80\x80] } 4 test utf-2.12 {Tcl_UtfToUniChar: longer UTF sequences not supported} testbytestring { - string length [testbytestring "\xF8\xA2\xA2\xA2\xA2"] + string length [testbytestring \xF8\xA2\xA2\xA2\xA2] } 5 test utf-3.1 {Tcl_UtfCharComplete} { @@ -141,32 +141,32 @@ test utf-4.1 {Tcl_NumUtfChars: zero length} testnumutfchars { testnumutfchars "" } 0 test utf-4.2 {Tcl_NumUtfChars: length 1} {testnumutfchars testbytestring} { - testnumutfchars [testbytestring "\xC2\xA2"] + testnumutfchars [testbytestring \xC2\xA2] } 1 test utf-4.3 {Tcl_NumUtfChars: long string} {testnumutfchars testbytestring} { - testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\xA2\x4E"] + testnumutfchars [testbytestring abc\xC2\xA2\xE4\xB9\x8E\xA2\x4E] } 7 test utf-4.4 {Tcl_NumUtfChars: #u0000} {testnumutfchars testbytestring} { - testnumutfchars [testbytestring "\xC0\x80"] + testnumutfchars [testbytestring \xC0\x80] } 1 test utf-4.5 {Tcl_NumUtfChars: zero length, calc len} testnumutfchars { testnumutfchars "" 0 } 0 test utf-4.6 {Tcl_NumUtfChars: length 1, calc len} {testnumutfchars testbytestring} { - testnumutfchars [testbytestring "\xC2\xA2"] end + testnumutfchars [testbytestring \xC2\xA2] end } 1 test utf-4.7 {Tcl_NumUtfChars: long string, calc len} {testnumutfchars testbytestring} { - testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\xA2\x4E"] end + testnumutfchars [testbytestring abc\xC2\xA2\xE4\xB9\x8E\xA2\x4E] end } 7 test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} {testnumutfchars testbytestring} { - testnumutfchars [testbytestring "\xC0\x80"] end + testnumutfchars [testbytestring \xC0\x80] end } 1 # Bug [2738427]: Tcl_NumUtfChars(...) no overflow check test utf-4.9 {Tcl_NumUtfChars: #u20AC, calc len, incomplete} {testnumutfchars testbytestring} { - testnumutfchars [testbytestring "\xE2\x82\xAC"] end-1 + testnumutfchars [testbytestring \xE2\x82\xAC] end-1 } 2 test utf-4.10 {Tcl_NumUtfChars: #u0000, calc len, overcomplete} {testnumutfchars testbytestring} { - testnumutfchars [testbytestring "\x00"] end+1 + testnumutfchars [testbytestring \x00] end+1 } 2 test utf-4.11 {Tcl_NumUtfChars: 3 bytes of 4-byte UTF-8 characater} {testnumutfchars testbytestring} { testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end-1 @@ -182,325 +182,325 @@ test utf-4.12.2 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars test } 2 test utf-5.1 {Tcl_UtfFindFirst} {testfindfirst testbytestring} { - testfindfirst [testbytestring "abcbc"] 98 + testfindfirst [testbytestring abcbc] 98 } bcbc test utf-5.2 {Tcl_UtfFindLast} {testfindlast testbytestring} { - testfindlast [testbytestring "abcbc"] 98 + testfindlast [testbytestring abcbc] 98 } bc -test utf-6.1 {Tcl_UtfNext} testutfnext { +test utf-6.1 {Tcl_UtfNext} {testutfnext testbytestring} { # This takes the pointer one past the terminating NUL. # This is really an invalid call. - testutfnext -bytestring {} + testutfnext [testbytestring {}] } 1 test utf-6.2 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring A + testutfnext A } 1 test utf-6.3 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring AA + testutfnext AA } 1 -test utf-6.4 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring A\xA0 +test utf-6.4 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext A[testbytestring \xA0] } 1 -test utf-6.5 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring A\xD0 +test utf-6.5 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext A[testbytestring \xD0] } 1 -test utf-6.6 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring A\xE8 +test utf-6.6 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext A[testbytestring \xE8] } 1 -test utf-6.7 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring A\xF2 +test utf-6.7 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext A[testbytestring \xF2] } 1 -test utf-6.8 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring A\xF8 +test utf-6.8 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext A[testbytestring \xF8] } 1 -test utf-6.9 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xA0\x00 +test utf-6.9 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xA0\x00] } 1 -test utf-6.10 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xA0G +test utf-6.10 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xA0]G } 1 -test utf-6.11 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xA0\xA0\x00 +test utf-6.11 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xA0\xA0\x00] } 2 -test utf-6.12 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xA0\xD0 +test utf-6.12 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xA0\xD0] } 1 -test utf-6.13 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xA0\xE8 +test utf-6.13 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xA0\xE8] } 1 -test utf-6.14 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xA0\xF2 +test utf-6.14 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xA0\xF2] } 1 -test utf-6.15 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xA0\xF8 +test utf-6.15 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xA0\xF8] } 1 -test utf-6.16 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0\x00 +test utf-6.16 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xD0\x00] } 1 -test utf-6.17 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0G +test utf-6.17 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xD0]G } 1 -test utf-6.18 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0\xA0 +test utf-6.18 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xD0\xA0] } 2 -test utf-6.19 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0\xD0 +test utf-6.19 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xD0\xD0] } 1 -test utf-6.20 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0\xE8 +test utf-6.20 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xD0\xE8] } 1 -test utf-6.21 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0\xF2 +test utf-6.21 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xD0\xF2] } 1 -test utf-6.22 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0\xF8 +test utf-6.22 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xD0\xF8] } 1 -test utf-6.23 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8 +test utf-6.23 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xE8] } 1 -test utf-6.24 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8G +test utf-6.24 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xE8]G } 1 -test utf-6.25 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0\x00 +test utf-6.25 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xE8\xA0\x00] } 1 -test utf-6.26 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xD0 +test utf-6.26 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xE8\xD0] } 1 -test utf-6.27 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xE8 +test utf-6.27 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xE8\xE8] } 1 -test utf-6.28 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xF2 +test utf-6.28 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xE8\xF2] } 1 -test utf-6.29 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xF8 +test utf-6.29 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xE8\xF8] } 1 -test utf-6.30 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2 +test utf-6.30 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF2] } 1 -test utf-6.31 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2G +test utf-6.31 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF2]G } 1 -test utf-6.32 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0 +test utf-6.32 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF2\xA0] } 1 -test utf-6.33 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xD0 +test utf-6.33 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF2\xD0] } 1 -test utf-6.34 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xE8 +test utf-6.34 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF2\xE8] } 1 -test utf-6.35 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xF2 +test utf-6.35 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF2\xF2] } 1 -test utf-6.36 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xF8 +test utf-6.36 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF2\xF8] } 1 -test utf-6.37 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF8 +test utf-6.37 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF8] } 1 -test utf-6.38 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF8G +test utf-6.38 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF8]G } 1 -test utf-6.39 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF8\xA0 +test utf-6.39 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF8\xA0] } 1 -test utf-6.40 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF8\xD0 +test utf-6.40 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF8\xD0] } 1 -test utf-6.41 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF8\xE8 +test utf-6.41 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF8\xE8] } 1 -test utf-6.42 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF8\xF2 +test utf-6.42 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF8\xF2] } 1 -test utf-6.43 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF8\xF8 +test utf-6.43 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF8\xF8] } 1 -test utf-6.44 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0\xA0G +test utf-6.44 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xD0\xA0]G } 2 -test utf-6.45 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0\xA0\xA0 +test utf-6.45 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xD0\xA0\xA0] } 2 -test utf-6.46 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0\xA0\xD0 +test utf-6.46 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xD0\xA0\xD0] } 2 -test utf-6.47 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0\xA0\xE8 +test utf-6.47 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xD0\xA0\xE8] } 2 -test utf-6.48 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0\xA0\xF2 +test utf-6.48 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xD0\xA0\xF2] } 2 -test utf-6.49 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0\xA0\xF8 +test utf-6.49 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xD0\xA0\xF8] } 2 -test utf-6.50 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0G +test utf-6.50 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xE8\xA0]G } 1 test utf-6.51 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0\xA0 + testutfnext \u8820 } 3 -test utf-6.52 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0\xD0 +test utf-6.52 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xE8\xA0\xD0] } 1 -test utf-6.53 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0\xE8 +test utf-6.53 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xE8\xA0\xE8] } 1 -test utf-6.54 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0\xF2 +test utf-6.54 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xE8\xA0\xF2] } 1 -test utf-6.55 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0\xF8 +test utf-6.55 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xE8\xA0\xF8] } 1 -test utf-6.56 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0G +test utf-6.56 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF2\xA0]G } 1 -test utf-6.57 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0\xA0\x00 +test utf-6.57 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF2\xA0\xA0\x00] } 1 -test utf-6.58 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0\xD0 +test utf-6.58 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF2\xA0\xD0] } 1 -test utf-6.59 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0\xE8 +test utf-6.59 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF2\xA0\xE8] } 1 -test utf-6.60 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0\xF2 +test utf-6.60 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF2\xA0\xF2] } 1 -test utf-6.61 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0\xF8 +test utf-6.61 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF2\xA0\xF8] } 1 test utf-6.62 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0\xA0G + testutfnext \u8820G } 3 -test utf-6.63 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0\xA0\xA0 +test utf-6.63 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext \u8820[testbytestring \xA0] } 3 -test utf-6.64 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0\xA0\xD0 +test utf-6.64 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext \u8820[testbytestring \xD0] } 3 -test utf-6.65 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0\xA0\xE8 +test utf-6.65 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext \u8820[testbytestring \xE8] } 3 -test utf-6.66 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0\xA0\xF2 +test utf-6.66 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext \u8820[testbytestring \xF2] } 3 -test utf-6.67 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0\xA0\xF8 +test utf-6.67 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext \u8820[testbytestring \xF8] } 3 -test utf-6.68 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0\xA0G +test utf-6.68 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF2\xA0\xA0]G } 1 test utf-6.69.0 {Tcl_UtfNext} {testutfnext tip389} { - testutfnext -bytestring \xF2\xA0\xA0\xA0 + testutfnext [testbytestring \xF2\xA0\xA0\xA0] } 4 test utf-6.69.1 {Tcl_UtfNext} {testutfnext ucs4} { - testutfnext -bytestring \xF2\xA0\xA0\xA0 + testutfnext [testbytestring \xF2\xA0\xA0\xA0] } 4 -test utf-6.70 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0\xA0\xD0 +test utf-6.70 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF2\xA0\xA0\xD0] } 1 -test utf-6.71 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0\xA0\xE8 +test utf-6.71 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF2\xA0\xA0\xE8] } 1 -test utf-6.72 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0\xA0\xF2 +test utf-6.72 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF2\xA0\xA0\xF2] } 1 -test utf-6.73 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0\xA0\xF8 +test utf-6.73 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF2\xA0\xA0\xF8] } 1 test utf-6.74.0 {Tcl_UtfNext} {testutfnext tip389} { - testutfnext -bytestring \xF2\xA0\xA0\xA0G + testutfnext [testbytestring \xF2\xA0\xA0\xA0]G } 4 test utf-6.74.1 {Tcl_UtfNext} {testutfnext ucs4} { - testutfnext -bytestring \xF2\xA0\xA0\xA0G + testutfnext [testbytestring \xF2\xA0\xA0\xA0]G } 4 test utf-6.75.0 {Tcl_UtfNext} {testutfnext tip389} { - testutfnext -bytestring \xF2\xA0\xA0\xA0\xA0 + testutfnext [testbytestring \xF2\xA0\xA0\xA0\xA0] } 4 test utf-6.75.1 {Tcl_UtfNext} {testutfnext ucs4} { - testutfnext -bytestring \xF2\xA0\xA0\xA0\xA0 + testutfnext [testbytestring \xF2\xA0\xA0\xA0\xA0] } 4 test utf-6.76.0 {Tcl_UtfNext} {testutfnext tip389} { - testutfnext -bytestring \xF2\xA0\xA0\xA0\xD0 + testutfnext [testbytestring \xF2\xA0\xA0\xA0\xD0] } 4 test utf-6.76.1 {Tcl_UtfNext} {testutfnext ucs4} { - testutfnext -bytestring \xF2\xA0\xA0\xA0\xD0 + testutfnext [testbytestring \xF2\xA0\xA0\xA0\xD0] } 4 test utf-6.77.0 {Tcl_UtfNext} {testutfnext tip389} { - testutfnext -bytestring \xF2\xA0\xA0\xA0\xE8 + testutfnext [testbytestring \xF2\xA0\xA0\xA0\xE8] } 4 test utf-6.77.1 {Tcl_UtfNext} {testutfnext ucs4} { - testutfnext -bytestring \xF2\xA0\xA0\xA0\xE8 + testutfnext [testbytestring \xF2\xA0\xA0\xA0\xE8] } 4 test utf-6.78.0 {Tcl_UtfNext} {testutfnext tip389} { - testutfnext -bytestring \xF2\xA0\xA0\xA0\xF2 + testutfnext [testbytestring \xF2\xA0\xA0\xA0\xF2] } 4 test utf-6.78.1 {Tcl_UtfNext} {testutfnext ucs4} { - testutfnext -bytestring \xF2\xA0\xA0\xA0\xF2 + testutfnext [testbytestring \xF2\xA0\xA0\xA0\xF2] } 4 test utf-6.79.0 {Tcl_UtfNext} {testutfnext tip389} { - testutfnext -bytestring \xF2\xA0\xA0\xA0G\xF8 + testutfnext [testbytestring \xF2\xA0\xA0\xA0G\xF8] } 4 test utf-6.79.1 {Tcl_UtfNext} {testutfnext ucs4} { - testutfnext -bytestring \xF2\xA0\xA0\xA0G\xF8 + testutfnext [testbytestring \xF2\xA0\xA0\xA0G\xF8] } 4 -test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext -bytestring \xC0\x80 +test utf-6.80 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring} { + testutfnext [testbytestring \xC0\x80] } 2 -test utf-6.81 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext -bytestring \xC0\x81 +test utf-6.81 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring} { + testutfnext [testbytestring \xC0\x81] } 1 -test utf-6.82 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext -bytestring \xC1\x80 +test utf-6.82 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring} { + testutfnext [testbytestring \xC1\x80] } 1 -test utf-6.83 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext -bytestring \xC2\x80 +test utf-6.83 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring} { + testutfnext [testbytestring \xC2\x80] } 2 -test utf-6.84 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext -bytestring \xE0\x80\x80 +test utf-6.84 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring} { + testutfnext [testbytestring \xE0\x80\x80] } 1 -test utf-6.85 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext -bytestring \xE0\xA0\x80 +test utf-6.85 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring} { + testutfnext [testbytestring \xE0\xA0\x80] } 3 -test utf-6.86 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext -bytestring \xF0\x80\x80\x80 +test utf-6.86 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring} { + testutfnext [testbytestring \xF0\x80\x80\x80] } 1 test utf-6.87.0 {Tcl_UtfNext - overlong sequences} {testutfnext tip389} { - testutfnext -bytestring \xF0\x90\x80\x80 + testutfnext [testbytestring \xF0\x90\x80\x80] } 4 test utf-6.87.1 {Tcl_UtfNext - overlong sequences} {testutfnext ucs4} { - testutfnext -bytestring \xF0\x90\x80\x80 + testutfnext [testbytestring \xF0\x90\x80\x80] } 4 -test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} testutfnext { - testutfnext -bytestring \xA0\xA0\x00 +test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext testbytestring} { + testutfnext [testbytestring \xA0\xA0\x00] } 2 -test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} testutfnext { - testutfnext -bytestring \x80\x80\x00 +test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext testbytestring} { + testutfnext [testbytestring \x80\x80\x00] } 2 test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext tip389} { - testutfnext -bytestring \xF4\x8F\xBF\xBF + testutfnext \uDBFF\uDFFF } 4 test utf-6.90.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext ucs4} { - testutfnext -bytestring \xF4\x8F\xBF\xBF + testutfnext \U10FFFF } 4 -test utf-6.91 {Tcl_UtfNext, validity check [493dccc2de]} testutfnext { - testutfnext -bytestring \xF4\x90\x80\x80 +test utf-6.91 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbytestring} { + testutfnext [testbytestring \xF4\x90\x80\x80] } 1 -test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} testutfnext { - testutfnext -bytestring \xA0\xA0\xA0 +test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} {testutfnext testbytestring} { + testutfnext [testbytestring \xA0\xA0\xA0] } 3 -test utf-6.93 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} testutfnext { - testutfnext -bytestring \x80\x80\x80 +test utf-6.93 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext testbytestring} { + testutfnext [testbytestring \x80\x80\x80] } 3 -test utf-6.94 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} testutfnext { - testutfnext -bytestring \xA0\xA0\xA0\xA0 +test utf-6.94 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring} { + testutfnext [testbytestring \xA0\xA0\xA0\xA0] } 3 -test utf-6.95 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} testutfnext { - testutfnext -bytestring \x80\x80\x80\x80 +test utf-6.95 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring} { + testutfnext [testbytestring \x80\x80\x80\x80] } 3 test utf-7.1 {Tcl_UtfPrev} testutfprev { @@ -512,335 +512,335 @@ test utf-7.2 {Tcl_UtfPrev} testutfprev { test utf-7.3 {Tcl_UtfPrev} testutfprev { testutfprev AA } 1 -test utf-7.4 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF8 +test utf-7.4 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF8] } 1 -test utf-7.4.1 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF8\xA0\xA0\xA0 2 +test utf-7.4.1 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF8\xA0\xA0\xA0] 2 } 1 -test utf-7.4.2 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF8\xF8\xA0\xA0 2 +test utf-7.4.2 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF8\xF8\xA0\xA0] 2 } 1 -test utf-7.5 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF2 +test utf-7.5 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF2] } 1 -test utf-7.5.1 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF2\xA0\xA0\xA0 2 +test utf-7.5.1 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF2\xA0\xA0\xA0] 2 } 1 -test utf-7.5.2 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF2\xF8\xA0\xA0 2 +test utf-7.5.2 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF2\xF8\xA0\xA0] 2 } 1 -test utf-7.6 {Tcl_UtfPrev} testutfprev { - testutfprev A\xE8 +test utf-7.6 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xE8] } 1 -test utf-7.6.1 {Tcl_UtfPrev} testutfprev { - testutfprev A\xE8\xA0\xA0\xA0 2 +test utf-7.6.1 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A\u8820[testbytestring \xA0] 2 } 1 -test utf-7.6.2 {Tcl_UtfPrev} testutfprev { - testutfprev A\xE8\xF8\xA0\xA0 2 +test utf-7.6.2 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xE8\xF8\xA0\xA0] 2 } 1 -test utf-7.7 {Tcl_UtfPrev} testutfprev { - testutfprev A\xD0 +test utf-7.7 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xD0] } 1 -test utf-7.7.1 {Tcl_UtfPrev} testutfprev { - testutfprev A\xD0\xA0\xA0\xA0 2 +test utf-7.7.1 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xD0\xA0\xA0\xA0] 2 } 1 -test utf-7.7.2 {Tcl_UtfPrev} testutfprev { - testutfprev A\xD0\xF8\xA0\xA0 2 +test utf-7.7.2 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xD0\xF8\xA0\xA0] 2 } 1 -test utf-7.8 {Tcl_UtfPrev} testutfprev { - testutfprev A\xA0 +test utf-7.8 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xA0] } 1 -test utf-7.8.1 {Tcl_UtfPrev} testutfprev { - testutfprev A\xA0\xA0\xA0\xA0 2 +test utf-7.8.1 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xA0\xA0\xA0\xA0] 2 } 1 -test utf-7.8.2 {Tcl_UtfPrev} testutfprev { - testutfprev A\xA0\xF8\xA0\xA0 2 +test utf-7.8.2 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xA0\xF8\xA0\xA0] 2 } 1 -test utf-7.9 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF8\xA0 +test utf-7.9 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF8\xA0] } 2 -test utf-7.9.1 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF8\xA0\xA0\xA0 3 +test utf-7.9.1 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF8\xA0\xA0\xA0] 3 } 2 -test utf-7.9.2 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF8\xA0\xF8\xA0 3 +test utf-7.9.2 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF8\xA0\xF8\xA0] 3 } 2 -test utf-7.10.0 {Tcl_UtfPrev} {testutfprev tip389} { - testutfprev A\xF2\xA0 +test utf-7.10.0 {Tcl_UtfPrev} {testutfprev testbytestring tip389} { + testutfprev A[testbytestring \xF2\xA0] } 1 -test utf-7.10.1 {Tcl_UtfPrev} {testutfprev ucs4} { - testutfprev A\xF2\xA0 +test utf-7.10.1 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { + testutfprev A[testbytestring \xF2\xA0] } 1 -test utf-7.10.2 {Tcl_UtfPrev} {testutfprev tip389} { - testutfprev A\xF2\xA0\xA0\xA0 3 +test utf-7.10.2 {Tcl_UtfPrev} {testutfprev testbytestring tip389} { + testutfprev A[testbytestring \xF2\xA0\xA0\xA0] 3 } 1 -test utf-7.10.3 {Tcl_UtfPrev} {testutfprev ucs4} { - testutfprev A\xF2\xA0\xA0\xA0 3 +test utf-7.10.3 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { + testutfprev A[testbytestring \xF2\xA0\xA0\xA0] 3 } 1 -test utf-7.10.4 {Tcl_UtfPrev} {testutfprev tip389} { - testutfprev A\xF2\xA0\xF8\xA0 3 +test utf-7.10.4 {Tcl_UtfPrev} {testutfprev testbytestring tip389} { + testutfprev A[testbytestring \xF2\xA0\xF8\xA0] 3 } 1 -test utf-7.10.5 {Tcl_UtfPrev} {testutfprev ucs4} { - testutfprev A\xF2\xA0\xF8\xA0 3 +test utf-7.10.5 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { + testutfprev A[testbytestring \xF2\xA0\xF8\xA0] 3 } 1 -test utf-7.11 {Tcl_UtfPrev} testutfprev { - testutfprev A\xE8\xA0 +test utf-7.11 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xE8\xA0] } 1 -test utf-7.11.1 {Tcl_UtfPrev} testutfprev { - testutfprev A\xE8\xA0\xA0\xA0 3 +test utf-7.11.1 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A\u8820[testbytestring \xA0] 3 } 1 -test utf-7.11.2 {Tcl_UtfPrev} testutfprev { - testutfprev A\xE8\xA0\xF8\xA0 3 +test utf-7.11.2 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xE8\xA0\xF8\xA0] 3 } 1 -test utf-7.11.3 {Tcl_UtfPrev} testutfprev { - testutfprev A\xE8\xA0\xF8 3 +test utf-7.11.3 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xE8\xA0\xF8] 3 } 1 -test utf-7.12 {Tcl_UtfPrev} testutfprev { - testutfprev A\xD0\xA0 +test utf-7.12 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xD0\xA0] } 1 -test utf-7.12.1 {Tcl_UtfPrev} testutfprev { - testutfprev A\xD0\xA0\xA0\xA0 3 +test utf-7.12.1 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xD0\xA0\xA0\xA0] 3 } 1 -test utf-7.12.2 {Tcl_UtfPrev} testutfprev { - testutfprev A\xD0\xA0\xF8\xA0 3 +test utf-7.12.2 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xD0\xA0\xF8\xA0] 3 } 1 -test utf-7.13 {Tcl_UtfPrev} testutfprev { - testutfprev A\xA0\xA0 +test utf-7.13 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xA0\xA0] } 2 -test utf-7.13.1 {Tcl_UtfPrev} testutfprev { - testutfprev A\xA0\xA0\xA0\xA0 3 +test utf-7.13.1 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xA0\xA0\xA0\xA0] 3 } 2 -test utf-7.13.2 {Tcl_UtfPrev} testutfprev { - testutfprev A\xA0\xA0\xF8\xA0 3 +test utf-7.13.2 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xA0\xA0\xF8\xA0] 3 } 2 -test utf-7.14 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF8\xA0\xA0 +test utf-7.14 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF8\xA0\xA0] } 3 -test utf-7.14.1 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF8\xA0\xA0\xA0 4 +test utf-7.14.1 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF8\xA0\xA0\xA0] 4 } 3 -test utf-7.14.2 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF8\xA0\xA0\xF8 4 +test utf-7.14.2 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF8\xA0\xA0\xF8] 4 } 3 -test utf-7.15.0 {Tcl_UtfPrev} {testutfprev tip389} { - testutfprev A\xF2\xA0\xA0 +test utf-7.15.0 {Tcl_UtfPrev} {testutfprev testbytestring tip389} { + testutfprev A[testbytestring \xF2\xA0\xA0] } 1 -test utf-7.15.1 {Tcl_UtfPrev} {testutfprev ucs4} { - testutfprev A\xF2\xA0\xA0 +test utf-7.15.1 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { + testutfprev A[testbytestring \xF2\xA0\xA0] } 1 -test utf-7.15.2 {Tcl_UtfPrev} {testutfprev tip389} { - testutfprev A\xF2\xA0\xA0\xA0 4 +test utf-7.15.2 {Tcl_UtfPrev} {testutfprev testbytestring tip389} { + testutfprev A[testbytestring \xF2\xA0\xA0\xA0] 4 } 1 -test utf-7.15.3 {Tcl_UtfPrev} {testutfprev ucs4} { - testutfprev A\xF2\xA0\xA0\xA0 4 +test utf-7.15.3 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { + testutfprev A[testbytestring \xF2\xA0\xA0\xA0] 4 } 1 -test utf-7.15.4 {Tcl_UtfPrev} {testutfprev tip389} { - testutfprev A\xF2\xA0\xA0\xF8 4 +test utf-7.15.4 {Tcl_UtfPrev} {testutfprev testbytestring tip389} { + testutfprev A[testbytestring \xF2\xA0\xA0\xF8] 4 } 1 -test utf-7.15.5 {Tcl_UtfPrev} {testutfprev ucs4} { - testutfprev A\xF2\xA0\xA0\xF8 4 +test utf-7.15.5 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { + testutfprev A[testbytestring \xF2\xA0\xA0\xF8] 4 } 1 test utf-7.16 {Tcl_UtfPrev} testutfprev { - testutfprev A\xE8\xA0\xA0 + testutfprev A\u8820 } 1 -test utf-7.16.1 {Tcl_UtfPrev} testutfprev { - testutfprev A\xE8\xA0\xA0\xA0 4 +test utf-7.16.1 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A\u8820[testbytestring \xA0] 4 } 1 -test utf-7.16.2 {Tcl_UtfPrev} testutfprev { - testutfprev A\xE8\xA0\xA0\xF8 4 +test utf-7.16.2 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A\u8820[testbytestring \xF8] 4 } 1 -test utf-7.17 {Tcl_UtfPrev} testutfprev { - testutfprev A\xD0\xA0\xA0 +test utf-7.17 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xD0\xA0\xA0] } 3 -test utf-7.17.1 {Tcl_UtfPrev} testutfprev { - testutfprev A\xD0\xA0\xA0\xA0 4 +test utf-7.17.1 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xD0\xA0\xA0\xA0] 4 } 3 -test utf-7.17.2 {Tcl_UtfPrev} testutfprev { - testutfprev A\xD0\xA0\xA0\xF8 4 +test utf-7.17.2 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xD0\xA0\xA0\xF8] 4 } 3 -test utf-7.18.0 {Tcl_UtfPrev} {testutfprev tip389} { - testutfprev A\xA0\xA0\xA0 +test utf-7.18.0 {Tcl_UtfPrev} {testutfprev testbytestring tip389} { + testutfprev A[testbytestring \xA0\xA0\xA0] } 3 -test utf-7.18.1 {Tcl_UtfPrev} {testutfprev ucs4} { - testutfprev A\xA0\xA0\xA0 +test utf-7.18.1 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { + testutfprev A[testbytestring \xA0\xA0\xA0] } 3 -test utf-7.18.2 {Tcl_UtfPrev} {testutfprev tip389} { - testutfprev A\xA0\xA0\xA0\xA0 4 +test utf-7.18.2 {Tcl_UtfPrev} {testutfprev testbytestring tip389} { + testutfprev A[testbytestring \xA0\xA0\xA0\xA0] 4 } 3 -test utf-7.18.3 {Tcl_UtfPrev} {testutfprev ucs4} { - testutfprev A\xA0\xA0\xA0\xA0 4 +test utf-7.18.3 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { + testutfprev A[testbytestring \xA0\xA0\xA0\xA0] 4 } 3 -test utf-7.18.4 {Tcl_UtfPrev} {testutfprev tip389} { - testutfprev A\xA0\xA0\xA0\xF8 4 +test utf-7.18.4 {Tcl_UtfPrev} {testutfprev testbytestring tip389} { + testutfprev A[testbytestring \xA0\xA0\xA0\xF8] 4 } 3 -test utf-7.18.5 {Tcl_UtfPrev} {testutfprev ucs4} { - testutfprev A\xA0\xA0\xA0\xF8 4 +test utf-7.18.5 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { + testutfprev A[testbytestring \xA0\xA0\xA0\xF8] 4 } 3 -test utf-7.19.0 {Tcl_UtfPrev} {testutfprev tip389} { - testutfprev A\xF8\xA0\xA0\xA0 +test utf-7.19.0 {Tcl_UtfPrev} {testutfprev testbytestring tip389} { + testutfprev A[testbytestring \xF8\xA0\xA0\xA0] } 4 -test utf-7.19.1 {Tcl_UtfPrev} {testutfprev ucs4} { - testutfprev A\xF8\xA0\xA0\xA0 +test utf-7.19.1 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { + testutfprev A[testbytestring \xF8\xA0\xA0\xA0] } 4 -test utf-7.20.0 {Tcl_UtfPrev} {testutfprev tip389} { - testutfprev A\xF2\xA0\xA0\xA0 +test utf-7.20.0 {Tcl_UtfPrev} {testutfprev testbytestring tip389} { + testutfprev A[testbytestring \xF2\xA0\xA0\xA0] } 1 -test utf-7.20.1 {Tcl_UtfPrev} {testutfprev ucs4} { - testutfprev A\xF2\xA0\xA0\xA0 +test utf-7.20.1 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { + testutfprev A[testbytestring \xF2\xA0\xA0\xA0] } 1 -test utf-7.21.0 {Tcl_UtfPrev} {testutfprev tip389} { - testutfprev A\xE8\xA0\xA0\xA0 +test utf-7.21.0 {Tcl_UtfPrev} {testutfprev testbytestring tip389} { + testutfprev A\u8820[testbytestring \xA0] } 4 -test utf-7.21.1 {Tcl_UtfPrev} {testutfprev ucs4} { - testutfprev A\xE8\xA0\xA0\xA0 +test utf-7.21.1 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { + testutfprev A\u8820[testbytestring \xA0] } 4 -test utf-7.22.0 {Tcl_UtfPrev} {testutfprev tip389} { - testutfprev A\xD0\xA0\xA0\xA0 +test utf-7.22.0 {Tcl_UtfPrev} {testutfprev testbytestring tip389} { + testutfprev A[testbytestring \xD0\xA0\xA0\xA0] } 4 -test utf-7.22.1 {Tcl_UtfPrev} {testutfprev ucs4} { - testutfprev A\xD0\xA0\xA0\xA0 +test utf-7.22.1 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { + testutfprev A[testbytestring \xD0\xA0\xA0\xA0] } 4 -test utf-7.23.0 {Tcl_UtfPrev} {testutfprev tip389} { - testutfprev A\xA0\xA0\xA0\xA0 +test utf-7.23.0 {Tcl_UtfPrev} {testutfprev testbytestring tip389} { + testutfprev A[testbytestring \xA0\xA0\xA0\xA0] } 4 -test utf-7.23.1 {Tcl_UtfPrev} {testutfprev ucs4} { - testutfprev A\xA0\xA0\xA0\xA0 +test utf-7.23.1 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { + testutfprev A[testbytestring \xA0\xA0\xA0\xA0] } 4 -test utf-7.24 {Tcl_UtfPrev -- overlong sequence} testutfprev { - testutfprev A\xC0\x81 +test utf-7.24 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { + testutfprev A[testbytestring \xC0\x81] } 2 -test utf-7.25 {Tcl_UtfPrev -- overlong sequence} testutfprev { - testutfprev A\xC0\x81 2 +test utf-7.25 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { + testutfprev A[testbytestring \xC0\x81] 2 } 1 -test utf-7.26 {Tcl_UtfPrev -- overlong sequence} testutfprev { - testutfprev A\xE0\x80\x80 +test utf-7.26 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { + testutfprev A[testbytestring \xE0\x80\x80] } 3 -test utf-7.27 {Tcl_UtfPrev -- overlong sequence} testutfprev { - testutfprev A\xE0\x80 +test utf-7.27 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { + testutfprev A[testbytestring \xE0\x80] } 2 -test utf-7.27.1 {Tcl_UtfPrev -- overlong sequence} testutfprev { - testutfprev A\xE0\x80\x80 3 +test utf-7.27.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { + testutfprev A[testbytestring \xE0\x80\x80] 3 } 2 -test utf-7.28 {Tcl_UtfPrev -- overlong sequence} testutfprev { - testutfprev A\xE0 +test utf-7.28 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { + testutfprev A[testbytestring \xE0] } 1 -test utf-7.28.1 {Tcl_UtfPrev -- overlong sequence} testutfprev { - testutfprev A\xE0\x80\x80 2 +test utf-7.28.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { + testutfprev A[testbytestring \xE0\x80\x80] 2 } 1 -test utf-7.29.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev tip389} { - testutfprev A\xF0\x80\x80\x80 +test utf-7.29.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring tip389} { + testutfprev A[testbytestring \xF0\x80\x80\x80] } 4 -test utf-7.29.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev ucs4} { - testutfprev A\xF0\x80\x80\x80 +test utf-7.29.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs4} { + testutfprev A[testbytestring \xF0\x80\x80\x80] } 4 -test utf-7.30 {Tcl_UtfPrev -- overlong sequence} testutfprev { - testutfprev A\xF0\x80\x80\x80 4 +test utf-7.30 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF0\x80\x80\x80] 4 } 3 -test utf-7.31 {Tcl_UtfPrev -- overlong sequence} testutfprev { - testutfprev A\xF0\x80\x80\x80 3 +test utf-7.31 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF0\x80\x80\x80] 3 } 2 -test utf-7.32 {Tcl_UtfPrev -- overlong sequence} testutfprev { - testutfprev A\xF0\x80\x80\x80 2 +test utf-7.32 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF0\x80\x80\x80] 2 } 1 -test utf-7.33 {Tcl_UtfPrev -- overlong sequence} testutfprev { - testutfprev A\xC0\x80 +test utf-7.33 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { + testutfprev A[testbytestring \xC0\x80] } 1 -test utf-7.34 {Tcl_UtfPrev -- overlong sequence} testutfprev { - testutfprev A\xC1\x80 +test utf-7.34 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { + testutfprev A[testbytestring \xC1\x80] } 2 -test utf-7.35 {Tcl_UtfPrev -- overlong sequence} testutfprev { - testutfprev A\xC2\x80 +test utf-7.35 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { + testutfprev A[testbytestring \xC2\x80] } 1 -test utf-7.36 {Tcl_UtfPrev -- overlong sequence} testutfprev { - testutfprev A\xE0\xA0\x80 +test utf-7.36 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { + testutfprev A[testbytestring \xE0\xA0\x80] } 1 -test utf-7.37 {Tcl_UtfPrev -- overlong sequence} testutfprev { - testutfprev A\xE0\xA0\x80 3 +test utf-7.37 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { + testutfprev A[testbytestring \xE0\xA0\x80] 3 } 1 -test utf-7.38 {Tcl_UtfPrev -- overlong sequence} testutfprev { - testutfprev A\xE0\xA0\x80 2 +test utf-7.38 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { + testutfprev A[testbytestring \xE0\xA0\x80] 2 } 1 -test utf-7.39.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev tip389} { - testutfprev A\xF0\x90\x80\x80 +test utf-7.39.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring tip389} { + testutfprev A[testbytestring \xF0\x90\x80\x80] } 1 -test utf-7.39.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev ucs4} { - testutfprev A\xF0\x90\x80\x80 +test utf-7.39.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs4} { + testutfprev A[testbytestring \xF0\x90\x80\x80] } 1 -test utf-7.40.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev tip389} { - testutfprev A\xF0\x90\x80\x80 4 +test utf-7.40.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring tip389} { + testutfprev A[testbytestring \xF0\x90\x80\x80] 4 } 1 -test utf-7.40.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev ucs4} { - testutfprev A\xF0\x90\x80\x80 4 +test utf-7.40.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs4} { + testutfprev A[testbytestring \xF0\x90\x80\x80] 4 } 1 -test utf-7.41.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev tip389} { - testutfprev A\xF0\x90\x80\x80 3 +test utf-7.41.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring tip389} { + testutfprev A[testbytestring \xF0\x90\x80\x80] 3 } 1 -test utf-7.41.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev ucs4} { - testutfprev A\xF0\x90\x80\x80 3 +test utf-7.41.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs4} { + testutfprev A[testbytestring \xF0\x90\x80\x80] 3 } 1 -test utf-7.42 {Tcl_UtfPrev -- overlong sequence} testutfprev { - testutfprev A\xF0\x90\x80\x80 2 +test utf-7.42 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF0\x90\x80\x80] 2 } 1 -test utf-7.43 {Tcl_UtfPrev -- no lead byte at start} testutfprev { - testutfprev \xA0 +test utf-7.43 {Tcl_UtfPrev -- no lead byte at start} {testutfprev testbytestring} { + testutfprev [testbytestring \xA0] } 0 -test utf-7.44 {Tcl_UtfPrev -- no lead byte at start} testutfprev { - testutfprev \xA0\xA0 +test utf-7.44 {Tcl_UtfPrev -- no lead byte at start} {testutfprev testbytestring} { + testutfprev [testbytestring \xA0\xA0] } 1 -test utf-7.45 {Tcl_UtfPrev -- no lead byte at start} testutfprev { - testutfprev \xA0\xA0\xA0 +test utf-7.45 {Tcl_UtfPrev -- no lead byte at start} {testutfprev testbytestring} { + testutfprev [testbytestring \xA0\xA0\xA0] } 2 -test utf-7.46.0 {Tcl_UtfPrev -- no lead byte at start} {testutfprev tip389} { - testutfprev \xA0\xA0\xA0\xA0 +test utf-7.46.0 {Tcl_UtfPrev -- no lead byte at start} {testutfprev testbytestring tip389} { + testutfprev [testbytestring \xA0\xA0\xA0\xA0] } 3 -test utf-7.46.1 {Tcl_UtfPrev -- no lead byte at start} {testutfprev ucs4} { - testutfprev \xA0\xA0\xA0\xA0 +test utf-7.46.1 {Tcl_UtfPrev -- no lead byte at start} {testutfprev testbytestring ucs4} { + testutfprev [testbytestring \xA0\xA0\xA0\xA0] } 3 -test utf-7.47 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} testutfprev { - testutfprev \xE8\xA0 +test utf-7.47 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {testutfprev testbytestring} { + testutfprev [testbytestring \xE8\xA0] } 0 -test utf-7.47.1 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} testutfprev { - testutfprev \xE8\xA0\xA0 2 +test utf-7.47.1 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {testutfprev testbytestring} { + testutfprev \u8820 2 } 0 -test utf-7.47.2 {Tcl_UtfPrev, pointing to 3th byte of 3-byte invalid sequence} testutfprev { - testutfprev \xE8\xA0\x00 2 +test utf-7.47.2 {Tcl_UtfPrev, pointing to 3th byte of 3-byte invalid sequence} {testutfprev testbytestring} { + testutfprev [testbytestring \xE8\xA0\x00] 2 } 0 -test utf-7.48.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev tip389} { - testutfprev A\xF4\x8F\xBF\xBF +test utf-7.48.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring tip389} { + testutfprev A\uDBFF\uDFFF } 1 -test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev ucs4} { - testutfprev A\xF4\x8F\xBF\xBF +test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs4} { + testutfprev A\U10FFFF } 1 -test utf-7.48.2 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev tip389} { - testutfprev A\xF4\x8F\xBF\xBF 4 +test utf-7.48.2 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring tip389} { + testutfprev A\uDBFF\uDFFF 4 } 1 -test utf-7.48.3 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev ucs4} { - testutfprev A\xF4\x8F\xBF\xBF 4 +test utf-7.48.3 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs4} { + testutfprev A\U10FFFF 4 } 1 -test utf-7.48.4 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev tip389} { - testutfprev A\xF4\x8F\xBF\xBF 3 +test utf-7.48.4 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring tip389} { + testutfprev A\uDBFF\uDFFF 3 } 1 -test utf-7.48.5 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev ucs4} { - testutfprev A\xF4\x8F\xBF\xBF 3 +test utf-7.48.5 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs4} { + testutfprev A\U10FFFF 3 } 1 -test utf-7.48.6 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { - testutfprev A\xF4\x8F\xBF\xBF 2 +test utf-7.48.6 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} { + testutfprev A\U10FFFF 2 } 1 -test utf-7.49.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev tip389} { - testutfprev A\xF4\x90\x80\x80 +test utf-7.49.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring tip389} { + testutfprev A[testbytestring \xF4\x90\x80\x80] } 4 -test utf-7.49.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev ucs4} { - testutfprev A\xF4\x90\x80\x80 +test utf-7.49.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs4} { + testutfprev A[testbytestring \xF4\x90\x80\x80] } 4 -test utf-7.49.2 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { - testutfprev A\xF4\x90\x80\x80 4 +test utf-7.49.2 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF4\x90\x80\x80] 4 } 3 -test utf-7.49.3 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { - testutfprev A\xF4\x90\x80\x80 3 +test utf-7.49.3 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF4\x90\x80\x80] 3 } 2 -test utf-7.49.4 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { - testutfprev A\xF4\x90\x80\x80 2 +test utf-7.49.4 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF4\x90\x80\x80] 2 } 1 test utf-8.1 {Tcl_UniCharAtIndex: index = 0} { @@ -988,10 +988,10 @@ test utf-10.1 {Tcl_UtfBackslash: dst == NULL} { } { } test utf-10.2 {Tcl_UtfBackslash: \u subst} testbytestring { - expr {"\uA2" eq [testbytestring "\xC2\xA2"]} + expr {"\uA2" eq [testbytestring \xC2\xA2]} } 1 test utf-10.3 {Tcl_UtfBackslash: longer \u subst} testbytestring { - expr {"\u4E21" eq [testbytestring "\xE4\xB8\xA1"]} + expr {"\u4E21" eq [testbytestring \xE4\xB8\xA1]} } 1 test utf-10.4 {Tcl_UtfBackslash: stops at first non-hex} testbytestring { expr {"\u4E2k" eq "[testbytestring \xD3\xA2]k"} -- cgit v0.12 From e75fc19d85ecc92ab5b33dcc23d2a50a57c0d7b1 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 29 Apr 2020 19:00:10 +0000 Subject: Add UNICODE_OUT_OF_RANGE() calls to UCS4ToTitle() and friends. Backported from 8.7. This fixes [69634d51fb]: handling out of range UCS4 values (at least, it's fixed in 8.6 now the same way as it's fixed in 8.7). --- generic/tclUtf.c | 48 +++++++++++++++++++++++++++--------------------- 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 0e9561d..3746586 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -427,7 +427,7 @@ Tcl_UtfToUniChar( * represents itself. */ } - else if (byte < 0xF8) { + else if (byte < 0xF5) { if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80) && ((src[3] & 0xC0) == 0x80)) { /* * Four-byte-character lead byte followed by three trail bytes. @@ -1389,12 +1389,14 @@ static int UCS4ToUpper( int ch) /* Unicode character to convert. */ { - int info = GetUniCharInfo(ch); + if (!UNICODE_OUT_OF_RANGE(ch)) { + int info = GetUniCharInfo(ch); - if (GetCaseType(info) & 0x04) { - ch -= GetDelta(info); + if (GetCaseType(info) & 0x04) { + ch -= GetDelta(info); + } } - return ch; + return ch & 0x1FFFFF; } Tcl_UniChar @@ -1424,13 +1426,15 @@ static int UCS4ToLower( int ch) /* Unicode character to convert. */ { - int info = GetUniCharInfo(ch); - int mode = GetCaseType(info); + if (!UNICODE_OUT_OF_RANGE(ch)) { + int info = GetUniCharInfo(ch); + int mode = GetCaseType(info); - if ((mode & 0x02) && (mode != 0x7)) { - ch += GetDelta(info); + if ((mode & 0x02) && (mode != 0x7)) { + ch += GetDelta(info); + } } - return ch; + return ch & 0x1FFFFF; } Tcl_UniChar @@ -1460,21 +1464,23 @@ static int UCS4ToTitle( int ch) /* Unicode character to convert. */ { - int info = GetUniCharInfo(ch); - int mode = GetCaseType(info); + if (!UNICODE_OUT_OF_RANGE(ch)) { + int info = GetUniCharInfo(ch); + int mode = GetCaseType(info); - if (mode & 0x1) { - /* - * Subtract or add one depending on the original case. - */ + if (mode & 0x1) { + /* + * Subtract or add one depending on the original case. + */ - if (mode != 0x7) { - ch += ((mode & 0x4) ? -1 : 1); + if (mode != 0x7) { + ch += ((mode & 0x4) ? -1 : 1); + } + } else if (mode == 0x4) { + ch -= GetDelta(info); } - } else if (mode == 0x4) { - ch -= GetDelta(info); } - return ch; + return ch & 0x1FFFFF; } Tcl_UniChar -- cgit v0.12 From b75fb42503acd37c3418a65e3ef5269571349ccc Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 30 Apr 2020 12:09:41 +0000 Subject: Partial fix for [ed29806ba]: Tcl_UtfToUniChar reads more than TCL_UTF_MAX bytes. --- generic/tclUtf.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 3746586..02952cb 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -428,25 +428,27 @@ Tcl_UtfToUniChar( */ } else if (byte < 0xF5) { - if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80) && ((src[3] & 0xC0) == 0x80)) { + if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80)) { /* - * Four-byte-character lead byte followed by three trail bytes. + * Four-byte-character lead byte followed by at least two trail bytes. + * (validity of 3th trail byte will be tested later) */ #if TCL_UTF_MAX <= 4 Tcl_UniChar high = (((byte & 0x07) << 8) | ((src[1] & 0x3F) << 2) | ((src[2] & 0x3F) >> 4)) - 0x40; - if (high >= 0x400) { - /* out of range, < 0x10000 or > 0x10FFFF */ - } else { + if ((high < 0x400) && ((src[3] & 0xC0) == 0x80)) { /* produce high surrogate, advance source pointer */ *chPtr = 0xD800 + high; return 1; } + /* out of range, < 0x10000 or > 0x10FFFF or invalid 3th byte */ #else - *chPtr = (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12) - | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F)); - if ((unsigned)(*chPtr - 0x10000) <= 0xFFFFF) { - return 4; + if ((src[3] & 0xC0) == 0x80) { + *chPtr = (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12) + | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F)); + if ((unsigned)(*chPtr - 0x10000) <= 0xFFFFF) { + return 4; + } } #endif } -- cgit v0.12 From 4c7e61bdbca84ca47a3400996676243ae26237f6 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 30 Apr 2020 12:51:38 +0000 Subject: Let's not get out the src[3] check yet. --- generic/tclUtf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 802610c..e1cde17 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -601,7 +601,7 @@ Tcl_UtfToChar16( */ Tcl_UniChar high = (((byte & 0x07) << 8) | ((src[1] & 0x3F) << 2) | ((src[2] & 0x3F) >> 4)) - 0x40; - if ((high < 0x400) /*&& ((src[3] & 0xC0) == 0x80)*/) { + if ((high < 0x400) && ((src[3] & 0xC0) == 0x80)) { /* produce high surrogate, advance source pointer */ *chPtr = 0xD800 + high; return 1; -- cgit v0.12 From 9a8559d4cb683fe7f03d28704ec74cbc08835fb2 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 30 Apr 2020 14:54:59 +0000 Subject: First, prove that bug [ed29806baf] is present in 8.7 too. Let's see what test-cases fail when we no longer check the validity of the 3th trail byte. --- generic/tclUtf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index e1cde17..693651d 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -597,16 +597,16 @@ Tcl_UtfToChar16( if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80)) { /* * Four-byte-character lead byte followed by at least two trail bytes. - * (validity of 3th trail byte will be tested later) + * We don't test the validity of 3th trail byte, see [ed29806ba] */ Tcl_UniChar high = (((byte & 0x07) << 8) | ((src[1] & 0x3F) << 2) | ((src[2] & 0x3F) >> 4)) - 0x40; - if ((high < 0x400) && ((src[3] & 0xC0) == 0x80)) { + if (high < 0x400) { /* produce high surrogate, advance source pointer */ *chPtr = 0xD800 + high; return 1; } - /* out of range, < 0x10000 or > 0x10FFFF or invalid 3th byte */ + /* out of range, < 0x10000 or > 0x10FFFF */ } /* -- cgit v0.12 From 032a198c31a28d021f89780cf07fad0c71a12557 Mon Sep 17 00:00:00 2001 From: oehhar Date: Thu, 30 Apr 2020 15:35:25 +0000 Subject: Win: use physical host DNS name instead cluster name. Ticket [da235271f1] --- win/tclWinSock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/win/tclWinSock.c b/win/tclWinSock.c index 1fa9f73..fdb7e12 100644 --- a/win/tclWinSock.c +++ b/win/tclWinSock.c @@ -347,7 +347,7 @@ InitializeHostName( DWORD length = sizeof(wbuf)/sizeof(WCHAR); Tcl_DString ds; - if (GetComputerNameExW(ComputerNameDnsFullyQualified, wbuf, &length) != 0) { + if (GetComputerNameExW(ComputerNamePhysicalDnsFullyQualified, wbuf, &length) != 0) { /* * Convert string from native to UTF then change to lowercase. */ -- cgit v0.12 From 62c00ac54a6f93ad1324d7e7aa5ef43623ca2415 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 1 May 2020 08:51:09 +0000 Subject: Fix [ed29806baf] by introducing TclUCS4Complete(). All other calls of Tcl_UtfToUniChar() are suspicious, because those cannot handle 4-byte UTF-8 sequences reliable. So, there's more work to do, but this part can already be backported to Tcl 8.6 and see where we get. --- generic/tclDecls.h | 2 +- generic/tclEncoding.c | 14 +++++++------- generic/tclInt.h | 5 ++++- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/generic/tclDecls.h b/generic/tclDecls.h index 4531be3..c713469 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -4181,7 +4181,7 @@ extern const TclStubs *tclStubsPtr; #if defined(USE_TCL_STUBS) && (TCL_UTF_MAX > 3) # undef Tcl_UtfCharComplete # define Tcl_UtfCharComplete(src, length) (((unsigned)((unsigned char)*(src) - 0xF0) < 5) \ - ? 4 : tclStubsPtr->tcl_UtfCharComplete((src), (length))) + ? ((length) >= 4) : tclStubsPtr->tcl_UtfCharComplete((src), (length))) #endif #endif /* _TCLDECLS */ diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 4789b7f..422627b 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2300,7 +2300,7 @@ UtfToUtfProc( const char *srcStart, *srcEnd, *srcClose; const char *dstStart, *dstEnd; int result, numChars, charLimit = INT_MAX; - Tcl_UniChar *chPtr = (Tcl_UniChar *) statePtr; + int *chPtr = (int *) statePtr; if (flags & TCL_ENCODING_START) { *statePtr = 0; @@ -2321,7 +2321,7 @@ UtfToUtfProc( dstEnd = dst + dstLen - TCL_UTF_MAX; for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) { - if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) { + if ((src > srcClose) && (!TclUCS4Complete(src, srcEnd - src))) { /* * If there is more string to follow, this will ensure that the * last UTF-8 character in the source buffer hasn't been cut off. @@ -2349,9 +2349,9 @@ UtfToUtfProc( *dst++ = 0; src += 2; - } else if (!Tcl_UtfCharComplete(src, srcEnd - src)) { + } else if (!TclUCS4Complete(src, srcEnd - src)) { /* - * Always check before using TclUtfToUniChar. Not doing can so + * Always check before using TclUtfToUCS4. Not doing can so * cause it run beyond the end of the buffer! If we happen such an * incomplete char its bytes are made to represent themselves. */ @@ -2360,11 +2360,11 @@ UtfToUtfProc( src += 1; dst += Tcl_UniCharToUtf(*chPtr, dst); } else { - src += TclUtfToUniChar(src, chPtr); + src += TclUtfToUCS4(src, chPtr); if ((*chPtr | 0x7FF) == 0xDFFF) { /* A surrogate character is detected, handle especially */ - Tcl_UniChar low = *chPtr; - size_t len = (src <= srcEnd-3) ? Tcl_UtfToUniChar(src, &low) : 0; + int low = *chPtr; + size_t len = (src <= srcEnd-3) ? TclUtfToUCS4(src, &low) : 0; if (((low | 0x3FF) != 0xDFFF) || (*chPtr & 0x400)) { *dst++ = (char) (((*chPtr >> 12) | 0xE0) & 0xEF); *dst++ = (char) (((*chPtr >> 6) | 0x80) & 0xBF); diff --git a/generic/tclInt.h b/generic/tclInt.h index 2ff644e..5f660e3 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3252,8 +3252,11 @@ MODULE_SCOPE int TclUtfCasecmp(const char *cs, const char *ct); MODULE_SCOPE int TclUtfCount(int ch); #if TCL_UTF_MAX > 3 # define TclUtfToUCS4 Tcl_UtfToUniChar +# define TclUCS4Complete Tcl_UtfCharComplete #else - MODULE_SCOPE int TclUtfToUCS4(const char *src, int *ucs4Ptr); + MODULE_SCOPE int TclUtfToUCS4(const char *src, int *ucs4Ptr); +# define TclUCS4Complete(src, length) (((unsigned)((unsigned char)*(src) - 0xF0) < 5) \ + ? ((length) >= 4) : Tcl_UtfCharComplete((src), (length))) #endif MODULE_SCOPE Tcl_Obj * TclpNativeToNormalized(ClientData clientData); MODULE_SCOPE Tcl_Obj * TclpFilesystemPathType(Tcl_Obj *pathPtr); -- cgit v0.12 From 87969cfb0ee1d53c7034f96f34cc443c7878c36c Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 1 May 2020 14:53:58 +0000 Subject: Never mind remark about TIP #573 in previous commit: Test failure was due to typo. Fixed now. --- generic/tclEncoding.c | 2 +- tests/encoding.test | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 444f99e..ae02821 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2367,7 +2367,7 @@ UtfToUtfProc( /* A surrogate character is detected, handle especially */ int low = *chPtr; size_t len = (src <= srcEnd-3) ? TclUtfToUCS4(src, &low) : 0; - if (((low & ~0x3FF) != 0xC00) || (*chPtr & 0x400)) { + if (((low & ~0x3FF) != 0xDC00) || (*chPtr & 0x400)) { *dst++ = (char) (((*chPtr >> 12) | 0xE0) & 0xEF); *dst++ = (char) (((*chPtr >> 6) | 0x80) & 0xBF); *dst++ = (char) ((*chPtr | 0x80) & 0xBF); diff --git a/tests/encoding.test b/tests/encoding.test index 04692b7..e313a29 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -36,7 +36,6 @@ proc runtests {} { testConstraint testencoding [llength [info commands testencoding]] testConstraint testbytestring [llength [info commands testbytestring]] testConstraint teststringbytes [llength [info commands teststringbytes]] -testConstraint ucs2 [expr {[format %c 0x010000] eq "\uFFFD"}] testConstraint exec [llength [info commands exec]] testConstraint testgetencpath [llength [info commands testgetencpath]] @@ -322,7 +321,7 @@ test encoding-15.3 {UtfToUtfProc null character input} teststringbytes { binary scan [teststringbytes $y] H* z set z } c080 -test encoding-15.4 {UtfToUtfProc emoji character input} -constraints ucs2 -body { +test encoding-15.4 {UtfToUtfProc emoji character input} -body { set x \xED\xA0\xBD\xED\xB8\x82 set y [encoding convertfrom utf-8 \xED\xA0\xBD\xED\xB8\x82] list [string length $x] $y -- cgit v0.12 From cc90266615cb98853ebc61301119d1f7a3718d7a Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sat, 2 May 2020 10:15:29 +0000 Subject: More fixes for [ed29806baf]. Not working yet. WIP --- generic/tclEncoding.c | 2 +- generic/tclInt.h | 7 ++++++- generic/tclUtf.c | 58 ++++++++++++++++++++++++++++++++++++--------------- tests/utf.test | 45 ++++++++++++++++++++------------------- 4 files changed, 71 insertions(+), 41 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 1584de0..5c7aab8 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2341,7 +2341,7 @@ UtfToUtfProc( *dst++ = 0; *chPtr = 0; /* reset surrogate handling */ src += 2; - } else if (!TclUCS4Complete(src, srcEnd - src)) { + } else if (!Tcl_UtfCharComplete(src, srcEnd - src)) { /* * Always check before using TclUtfToUniChar. Not doing can so * cause it run beyond the end of the buffer! If we happen such an diff --git a/generic/tclInt.h b/generic/tclInt.h index 593d878..5c46470 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3184,8 +3184,13 @@ MODULE_SCOPE int TclTrimRight(const char *bytes, int numBytes, const char *trim, int numTrim); MODULE_SCOPE int TclUtfCasecmp(const char *cs, const char *ct); MODULE_SCOPE int TclUtfToUCS4(const char *src, int *ucs4Ptr); +/* + * Bytes F0-F4 are start-bytes for 4-byte sequences. + * Byte 0xED can be the start-byte of an upper surrogate. In that case, + * TclUtfToUCS4() might read the lower surrogate following it too. + */ # define TclUCS4Complete(src, length) (((unsigned)(UCHAR(*(src)) - 0xF0) < 5) \ - ? ((length) >= 4) : Tcl_UtfCharComplete((src), (length))) + ? ((length) >= 4) : (UCHAR(*(src)) == 0xED) ? ((length) >= 6) : Tcl_UtfCharComplete((src), (length))) MODULE_SCOPE Tcl_Obj * TclpNativeToNormalized(ClientData clientData); MODULE_SCOPE Tcl_Obj * TclpFilesystemPathType(Tcl_Obj *pathPtr); MODULE_SCOPE int TclpDlopen(Tcl_Interp *interp, Tcl_Obj *pathPtr, diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 9ffbfba..9375a01 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -81,6 +81,28 @@ static const unsigned char totalBytes[256] = { 1,1,1,1,1,1,1,1,1,1,1 }; +static const unsigned char complete[256] = { + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +#if TCL_UTF_MAX > 4 + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +#else /* Tcl_UtfCharComplete() might point to 2nd byte of valid 4-byte sequence */ + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +#endif + 2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +#if TCL_UTF_MAX > 3 + 4,4,4,4,4, +#else + 1,1,1,1,1, +#endif + 1,1,1,1,1,1,1,1,1,1,1 +}; + /* * Functions used only in this module. */ @@ -359,8 +381,8 @@ Tcl_UniCharToUtfDString( int Tcl_UtfToUniChar( - register const char *src, /* The UTF-8 string. */ - register Tcl_UniChar *chPtr)/* Filled with the Tcl_UniChar represented by + const char *src, /* The UTF-8 string. */ + Tcl_UniChar *chPtr)/* Filled with the Tcl_UniChar represented by * the UTF-8 string. */ { Tcl_UniChar byte; @@ -557,7 +579,7 @@ Tcl_UtfCharComplete( * a complete UTF-8 character. */ int length) /* Length of above string in bytes. */ { - return length >= totalBytes[UCHAR(*src)]; + return length >= complete[UCHAR(*src)]; } /* @@ -580,12 +602,12 @@ Tcl_UtfCharComplete( int Tcl_NumUtfChars( - register const char *src, /* The UTF-8 string to measure. */ + const char *src, /* The UTF-8 string to measure. */ int length) /* The length of the string in bytes, or -1 * for strlen(string). */ { Tcl_UniChar ch = 0; - register int i = 0; + int i = 0; /* * The separate implementations are faster. @@ -601,27 +623,29 @@ Tcl_NumUtfChars( } if (i < 0) i = INT_MAX; /* Bug [2738427] */ } else { - register const char *endPtr = src + length - TCL_UTF_MAX; + const char *endPtr = src + length - TCL_UTF_MAX; while (src < endPtr) { +#if TCL_UTF_MAX < 4 if (((unsigned)UCHAR(*src) - 0xF0) < 5) { /* treat F0 - F4 as single character */ ch = 0; src++; - } else { - src += TclUtfToUniChar(src, &ch); - } + } else +#endif + src += TclUtfToUniChar(src, &ch); i++; } endPtr += TCL_UTF_MAX; while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) { +#if TCL_UTF_MAX < 4 if (((unsigned)UCHAR(*src) - 0xF0) < 5) { /* treat F0 - F4 as single character */ ch = 0; src++; - } else { - src += TclUtfToUniChar(src, &ch); - } + } else +#endif + src += TclUtfToUniChar(src, &ch); i++; } if (src < endPtr) { @@ -890,8 +914,8 @@ Tcl_UtfPrev( Tcl_UniChar Tcl_UniCharAtIndex( - register const char *src, /* The UTF-8 string to dereference. */ - register int index) /* The position of the desired character. */ + const char *src, /* The UTF-8 string to dereference. */ + int index) /* The position of the desired character. */ { Tcl_UniChar ch = 0; @@ -918,8 +942,8 @@ Tcl_UniCharAtIndex( const char * Tcl_UtfAtIndex( - register const char *src, /* The UTF-8 string. */ - register int index) /* The position of the desired character. */ + const char *src, /* The UTF-8 string. */ + int index) /* The position of the desired character. */ { Tcl_UniChar ch = 0; int len = 0; @@ -1191,7 +1215,7 @@ TclpUtfNcmp2( * fine in the strcmp manner. */ - register int result = 0; + int result = 0; for ( ; numBytes != 0; numBytes--, cs++, ct++) { if (*cs != *ct) { diff --git a/tests/utf.test b/tests/utf.test index 0929801..50351cb 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -29,6 +29,7 @@ testConstraint pre388 [eq \x741 A] testConstraint pairsTo4bytes [expr {[llength [info commands teststringbytes]] && [string length [teststringbytes \uD83D\uDCA9]] == 4}] +testConstraint teststringbytes [llength [info commands teststringbytes]] testConstraint testbytestring [llength [info commands testbytestring]] testConstraint testfindfirst [llength [info commands testfindfirst]] testConstraint testfindlast [llength [info commands testfindlast]] @@ -501,7 +502,7 @@ test utf-6.93.0 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} { } 1 test utf-6.93.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext fullutf} { testutfnext \x80\x80\x80 -} 1 +} 3 test utf-6.94 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} testutfnext { testutfnext \xA0\xA0\xA0\xA0 } 1 @@ -601,18 +602,18 @@ test utf-6.117.1 {Tcl_UtfNext, read limits} {testutfnext fullutf} { test utf-6.118 {Tcl_UtfNext, read limits} testutfnext { testutfnext \xA0G 0 } 0 -test utf-6.119 {Tcl_UtfNext, read limits} {testutfnext ucs2} { +test utf-6.119 {Tcl_UtfNext, read limits} testutfnext { testutfnext \xA0G 1 -} 1 -test utf-6.120 {Tcl_UtfNext, read limits} {testutfnext ucs2} { +} 0 +test utf-6.120 {Tcl_UtfNext, read limits} testutfnext { testutfnext \xA0\xA0 1 -} 1 -test utf-6.121 {Tcl_UtfNext, read limits} {testutfnext ucs2} { +} 0 +test utf-6.121 {Tcl_UtfNext, read limits} testutfnext { testutfnext \xA0\xA0G 2 -} 1 -test utf-6.122 {Tcl_UtfNext, read limits} {testutfnext ucs2} { +} 0 +test utf-6.122 {Tcl_UtfNext, read limits} testutfnext { testutfnext \xA0\xA0\xA0 2 -} 1 +} 0 test utf-6.123 {Tcl_UtfNext, read limits} testutfnext { testutfnext \xA0\xA0\xA0G 3 } 1 @@ -990,9 +991,9 @@ test utf-8.5.0 {Tcl_UniCharAtIndex: high surrogate} ucs2 { test utf-8.5.1 {Tcl_UniCharAtIndex: high surrogate} ucs4 { string index \uD842 0 } "\uD842" -test utf-8.5.2 {Tcl_UniCharAtIndex: high surrogate} tip389 { - string index \uD842 0 -} "\uD842" +test utf-8.5.2 {Tcl_UniCharAtIndex: high surrogate} {teststringbytes tip389} { + teststringbytes [string index \uD842 0] +} \xF0 test utf-8.6 {Tcl_UniCharAtIndex: low surrogate} { string index \uDC42 0 } "\uDC42" @@ -1002,18 +1003,18 @@ test utf-8.7.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { test utf-8.7.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { string index \uD83D\uDE00G 0 } "\U1F600" -test utf-8.7.2 {Tcl_UniCharAtIndex: Emoji} tip389 { - string index \uD83D\uDE00G 0 -} "\U1F600" +test utf-8.7.2 {Tcl_UniCharAtIndex: Emoji} {teststringbytes tip389} { + teststringbytes [string index \uD83D\uDE00G 0] +} \xF0 test utf-8.8.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { string index \uD83D\uDE00G 1 } "\uDE00" test utf-8.8.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { string index \uD83D\uDE00G 1 } G -test utf-8.8.2 {Tcl_UniCharAtIndex: Emoji} tip389 { - string index \uD83D\uDE00G 1 -} {} +test utf-8.8.2 {Tcl_UniCharAtIndex: Emoji} {teststringbytes tip389} { + teststringbytes [string index \uD83D\uDE00G 1] +} \xED\xB8\x80 test utf-8.9.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { string index \uD83D\uDE00G 2 } G @@ -1029,9 +1030,9 @@ test utf-8.10.0 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs2} { test utf-8.10.1 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs4} { string index \U1F600G 0 } "\U1F600" -test utf-8.10.2 {Tcl_UniCharAtIndex: Emoji} {Uesc tip389} { - string index \U1F600G 0 -} "\U1F600" +test utf-8.10.2 {Tcl_UniCharAtIndex: Emoji} {Uesc teststringbytes tip389} { + teststringbytes [string index \U1F600G 0] +} \xF0 test utf-8.11.0 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs2} { string index \U1F600G 1 } G @@ -1040,7 +1041,7 @@ test utf-8.11.1 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs4} { } G test utf-8.11.2 {Tcl_UniCharAtIndex: Emoji} {Uesc tip389} { string index \U1F600G 1 -} {} +} \uDE00 test utf-8.12.0 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs2} { string index \U1F600G 2 } {} -- cgit v0.12 From 797bc84ed2bc1f24a8adc1e42a91ef90d2c0c91f Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sat, 2 May 2020 21:54:14 +0000 Subject: Seems almost correct. Still problem with "string index" for TCL_UTF_MAX>3 --- generic/tclUtf.c | 25 +++++------- tests/utf.test | 115 +++++++++++++++++++++++++++---------------------------- 2 files changed, 66 insertions(+), 74 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 9375a01..03a7ca9 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -64,13 +64,10 @@ static const unsigned char totalBytes[256] = { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -#if TCL_UTF_MAX != 4 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -#else /* Tcl_UtfCharComplete() might point to 2nd byte of valid 4-byte sequence */ +/* Tcl_UtfCharComplete() might point to 2nd byte of valid 4-byte sequence */ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, -#endif +/* End of "continuation byte section" */ 2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, #if TCL_UTF_MAX > 3 @@ -80,7 +77,7 @@ static const unsigned char totalBytes[256] = { #endif 1,1,1,1,1,1,1,1,1,1,1 }; - + static const unsigned char complete[256] = { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, @@ -95,7 +92,7 @@ static const unsigned char complete[256] = { #endif 2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, -#if TCL_UTF_MAX > 3 +#if TCL_UTF_MAX > 4 4,4,4,4,4, #else 1,1,1,1,1, @@ -626,26 +623,24 @@ Tcl_NumUtfChars( const char *endPtr = src + length - TCL_UTF_MAX; while (src < endPtr) { -#if TCL_UTF_MAX < 4 if (((unsigned)UCHAR(*src) - 0xF0) < 5) { /* treat F0 - F4 as single character */ ch = 0; src++; - } else -#endif - src += TclUtfToUniChar(src, &ch); + } else { + src += TclUtfToUniChar(src, &ch); + } i++; } endPtr += TCL_UTF_MAX; while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) { -#if TCL_UTF_MAX < 4 if (((unsigned)UCHAR(*src) - 0xF0) < 5) { /* treat F0 - F4 as single character */ ch = 0; src++; - } else -#endif - src += TclUtfToUniChar(src, &ch); + } else { + src += TclUtfToUniChar(src, &ch); + } i++; } if (src < endPtr) { diff --git a/tests/utf.test b/tests/utf.test index 50351cb..71b4978 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -398,7 +398,7 @@ test utf-6.68 {Tcl_UtfNext} testutfnext { test utf-6.69.0 {Tcl_UtfNext} {testutfnext ucs2} { testutfnext \xF2\xA0\xA0\xA0 } 1 -test utf-6.69.1 {Tcl_UtfNext} {testutfnext fullutf} { +test utf-6.69.1 {Tcl_UtfNext} {testutfnext ucs4} { testutfnext \xF2\xA0\xA0\xA0 } 4 test utf-6.70 {Tcl_UtfNext} testutfnext { @@ -416,37 +416,37 @@ test utf-6.73 {Tcl_UtfNext} testutfnext { test utf-6.74.0 {Tcl_UtfNext} {testutfnext ucs2} { testutfnext \xF2\xA0\xA0\xA0G } 1 -test utf-6.74.1 {Tcl_UtfNext} {testutfnext fullutf} { +test utf-6.74.1 {Tcl_UtfNext} {testutfnext ucs4} { testutfnext \xF2\xA0\xA0\xA0G } 4 test utf-6.75.0 {Tcl_UtfNext} {testutfnext ucs2} { testutfnext \xF2\xA0\xA0\xA0\xA0 } 1 -test utf-6.75.1 {Tcl_UtfNext} {testutfnext fullutf} { +test utf-6.75.1 {Tcl_UtfNext} {testutfnext ucs4} { testutfnext \xF2\xA0\xA0\xA0\xA0 } 4 test utf-6.76.0 {Tcl_UtfNext} {testutfnext ucs2} { testutfnext \xF2\xA0\xA0\xA0\xD0 } 1 -test utf-6.76.1 {Tcl_UtfNext} {testutfnext fullutf} { +test utf-6.76.1 {Tcl_UtfNext} {testutfnext ucs4} { testutfnext \xF2\xA0\xA0\xA0\xD0 } 4 test utf-6.77.0 {Tcl_UtfNext} {testutfnext ucs2} { testutfnext \xF2\xA0\xA0\xA0\xE8 } 1 -test utf-6.77.1 {Tcl_UtfNext} {testutfnext fullutf} { +test utf-6.77.1 {Tcl_UtfNext} {testutfnext ucs4} { testutfnext \xF2\xA0\xA0\xA0\xE8 } 4 test utf-6.78.0 {Tcl_UtfNext} {testutfnext ucs2} { testutfnext \xF2\xA0\xA0\xA0\xF2 } 1 -test utf-6.78.1 {Tcl_UtfNext} {testutfnext fullutf} { +test utf-6.78.1 {Tcl_UtfNext} {testutfnext ucs4} { testutfnext \xF2\xA0\xA0\xA0\xF2 } 4 test utf-6.79.0 {Tcl_UtfNext} {testutfnext ucs2} { testutfnext \xF2\xA0\xA0\xA0G\xF8 } 1 -test utf-6.79.1 {Tcl_UtfNext} {testutfnext fullutf} { +test utf-6.79.1 {Tcl_UtfNext} {testutfnext ucs4} { testutfnext \xF2\xA0\xA0\xA0G\xF8 } 4 test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext { @@ -473,7 +473,7 @@ test utf-6.86 {Tcl_UtfNext - overlong sequences} testutfnext { test utf-6.87.0 {Tcl_UtfNext - overlong sequences} {testutfnext ucs2} { testutfnext \xF0\x90\x80\x80 } 1 -test utf-6.87.1 {Tcl_UtfNext - overlong sequences} {testutfnext fullutf} { +test utf-6.87.1 {Tcl_UtfNext - overlong sequences} {testutfnext ucs4} { testutfnext \xF0\x90\x80\x80 } 4 test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} testutfnext { @@ -485,7 +485,7 @@ test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} tes test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext ucs2} { testutfnext \xF4\x8F\xBF\xBF } 1 -test utf-6.90.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext fullutf} { +test utf-6.90.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext ucs4} { testutfnext \xF4\x8F\xBF\xBF } 4 test utf-6.91.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext ucs2} { @@ -497,12 +497,9 @@ test utf-6.91.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext fullutf} test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} testutfnext { testutfnext \xA0\xA0\xA0 } 1 -test utf-6.93.0 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext ucs2} { +test utf-6.93 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} testutfnext { testutfnext \x80\x80\x80 } 1 -test utf-6.93.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext fullutf} { - testutfnext \x80\x80\x80 -} 3 test utf-6.94 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} testutfnext { testutfnext \xA0\xA0\xA0\xA0 } 1 @@ -554,64 +551,64 @@ test utf-6.109 {Tcl_UtfNext, read limits} testutfnext { test utf-6.110.0 {Tcl_UtfNext, read limits} {testutfnext ucs2} { testutfnext \xF2\xA0\xA0\xA0G 1 } 1 -test utf-6.110.1 {Tcl_UtfNext, read limits} {testutfnext fullutf} { +test utf-6.110.1 {Tcl_UtfNext, read limits} {testutfnext ucs4} { testutfnext \xF2\xA0\xA0\xA0G 1 } 0 test utf-6.111.0 {Tcl_UtfNext, read limits} {testutfnext ucs2} { testutfnext \xF2\xA0\xA0\xA0G 2 } 1 -test utf-6.111.1 {Tcl_UtfNext, read limits} {testutfnext fullutf} { +test utf-6.111.1 {Tcl_UtfNext, read limits} {testutfnext ucs4} { testutfnext \xF2\xA0\xA0\xA0G 2 } 0 test utf-6.112.0 {Tcl_UtfNext, read limits} {testutfnext ucs2} { testutfnext \xF2\xA0\xA0\xA0G 3 } 1 -test utf-6.112.1 {Tcl_UtfNext, read limits} {testutfnext fullutf} { +test utf-6.112.1 {Tcl_UtfNext, read limits} {testutfnext ucs4} { testutfnext \xF2\xA0\xA0\xA0G 3 } 0 test utf-6.113.0 {Tcl_UtfNext, read limits} {testutfnext ucs2} { testutfnext \xF2\xA0\xA0\xA0G 4 } 1 -test utf-6.113.1 {Tcl_UtfNext, read limits} {testutfnext fullutf} { +test utf-6.113.1 {Tcl_UtfNext, read limits} {testutfnext ucs4} { testutfnext \xF2\xA0\xA0\xA0G 4 } 4 test utf-6.114.0 {Tcl_UtfNext, read limits} {testutfnext ucs2} { testutfnext \xF2\xA0\xA0\xA0\xA0 1 } 1 -test utf-6.114.1 {Tcl_UtfNext, read limits} {testutfnext fullutf} { +test utf-6.114.1 {Tcl_UtfNext, read limits} {testutfnext ucs4} { testutfnext \xF2\xA0\xA0\xA0\xA0 1 } 0 test utf-6.115.0 {Tcl_UtfNext, read limits} {testutfnext ucs2} { testutfnext \xF2\xA0\xA0\xA0\xA0 2 } 1 -test utf-6.115.1 {Tcl_UtfNext, read limits} {testutfnext fullutf} { +test utf-6.115.1 {Tcl_UtfNext, read limits} {testutfnext ucs4} { testutfnext \xF2\xA0\xA0\xA0\xA0 2 } 0 test utf-6.116.0 {Tcl_UtfNext, read limits} {testutfnext ucs2} { testutfnext \xF2\xA0\xA0\xA0\xA0 3 } 1 -test utf-6.116.1 {Tcl_UtfNext, read limits} {testutfnext fullutf} { +test utf-6.116.1 {Tcl_UtfNext, read limits} {testutfnext ucs4} { testutfnext \xF2\xA0\xA0\xA0\xA0 3 } 0 test utf-6.117.0 {Tcl_UtfNext, read limits} {testutfnext ucs2} { testutfnext \xF2\xA0\xA0\xA0\xA0 4 } 1 -test utf-6.117.1 {Tcl_UtfNext, read limits} {testutfnext fullutf} { +test utf-6.117.1 {Tcl_UtfNext, read limits} {testutfnext ucs4} { testutfnext \xF2\xA0\xA0\xA0\xA0 4 } 4 test utf-6.118 {Tcl_UtfNext, read limits} testutfnext { testutfnext \xA0G 0 } 0 -test utf-6.119 {Tcl_UtfNext, read limits} testutfnext { +test utf-6.119 {Tcl_UtfNext, read limits} {testutfnext ucs2} { testutfnext \xA0G 1 } 0 -test utf-6.120 {Tcl_UtfNext, read limits} testutfnext { +test utf-6.120 {Tcl_UtfNext, read limits} {testutfnext ucs2} { testutfnext \xA0\xA0 1 } 0 -test utf-6.121 {Tcl_UtfNext, read limits} testutfnext { +test utf-6.121 {Tcl_UtfNext, read limits} {testutfnext ucs2} { testutfnext \xA0\xA0G 2 } 0 -test utf-6.122 {Tcl_UtfNext, read limits} testutfnext { +test utf-6.122 {Tcl_UtfNext, read limits} {testutfnext ucs2} { testutfnext \xA0\xA0\xA0 2 } 0 test utf-6.123 {Tcl_UtfNext, read limits} testutfnext { @@ -693,19 +690,19 @@ test utf-7.9.2 {Tcl_UtfPrev} testutfprev { test utf-7.10.0 {Tcl_UtfPrev} {testutfprev ucs2} { testutfprev A\xF2\xA0 } 2 -test utf-7.10.1 {Tcl_UtfPrev} {testutfprev fullutf} { +test utf-7.10.1 {Tcl_UtfPrev} {testutfprev ucs4} { testutfprev A\xF2\xA0 } 1 test utf-7.10.2 {Tcl_UtfPrev} {testutfprev ucs2} { testutfprev A\xF2\xA0\xA0\xA0 3 } 2 -test utf-7.10.3 {Tcl_UtfPrev} {testutfprev fullutf} { +test utf-7.10.3 {Tcl_UtfPrev} {testutfprev ucs4} { testutfprev A\xF2\xA0\xA0\xA0 3 } 1 test utf-7.10.4 {Tcl_UtfPrev} {testutfprev ucs2} { testutfprev A\xF2\xA0\xF8\xA0 3 } 2 -test utf-7.10.5 {Tcl_UtfPrev} {testutfprev fullutf} { +test utf-7.10.5 {Tcl_UtfPrev} {testutfprev ucs4} { testutfprev A\xF2\xA0\xF8\xA0 3 } 1 test utf-7.11 {Tcl_UtfPrev} testutfprev { @@ -750,19 +747,19 @@ test utf-7.14.2 {Tcl_UtfPrev} testutfprev { test utf-7.15.0 {Tcl_UtfPrev} {testutfprev ucs2} { testutfprev A\xF2\xA0\xA0 } 3 -test utf-7.15.1 {Tcl_UtfPrev} {testutfprev fullutf} { +test utf-7.15.1 {Tcl_UtfPrev} {testutfprev ucs4} { testutfprev A\xF2\xA0\xA0 } 1 test utf-7.15.1.0 {Tcl_UtfPrev} {testutfprev ucs2} { testutfprev A\xF2\xA0\xA0\xA0 4 } 3 -test utf-7.15.1.1 {Tcl_UtfPrev} {testutfprev fullutf} { +test utf-7.15.1.1 {Tcl_UtfPrev} {testutfprev ucs4} { testutfprev A\xF2\xA0\xA0\xA0 4 } 1 test utf-7.15.2.0 {Tcl_UtfPrev} {testutfprev ucs2} { testutfprev A\xF2\xA0\xA0\xF8 4 } 3 -test utf-7.15.2.1 {Tcl_UtfPrev} {testutfprev fullutf} { +test utf-7.15.2.1 {Tcl_UtfPrev} {testutfprev ucs4} { testutfprev A\xF2\xA0\xA0\xF8 4 } 1 test utf-7.16 {Tcl_UtfPrev} testutfprev { @@ -888,19 +885,19 @@ test utf-7.38 {Tcl_UtfPrev -- overlong sequence} testutfprev { test utf-7.39.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev ucs2} { testutfprev A\xF0\x90\x80\x80 } 2 -test utf-7.39.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev fullutf} { +test utf-7.39.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev ucs4} { testutfprev A\xF0\x90\x80\x80 } 1 test utf-7.40.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev ucs2} { testutfprev A\xF0\x90\x80\x80 4 } 3 -test utf-7.40.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev fullutf} { +test utf-7.40.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev ucs4} { testutfprev A\xF0\x90\x80\x80 4 } 1 test utf-7.41.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev ucs2} { testutfprev A\xF0\x90\x80\x80 3 } 2 -test utf-7.41.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev fullutf} { +test utf-7.41.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev ucs4} { testutfprev A\xF0\x90\x80\x80 3 } 1 test utf-7.42 {Tcl_UtfPrev -- overlong sequence} testutfprev { @@ -933,19 +930,19 @@ test utf-7.47.2 {Tcl_UtfPrev, pointing to 3th byte of 3-byte invalid sequence} t test utf-7.48.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev ucs2} { testutfprev A\xF4\x8F\xBF\xBF } 2 -test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} { +test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev ucs4} { testutfprev A\xF4\x8F\xBF\xBF } 1 test utf-7.48.2 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev ucs2} { testutfprev A\xF4\x8F\xBF\xBF 4 } 3 -test utf-7.48.3 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} { +test utf-7.48.3 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev ucs4} { testutfprev A\xF4\x8F\xBF\xBF 4 } 1 test utf-7.48.4 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev ucs2} { testutfprev A\xF4\x8F\xBF\xBF 3 } 2 -test utf-7.48.5 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} { +test utf-7.48.5 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev ucs4} { testutfprev A\xF4\x8F\xBF\xBF 3 } 1 test utf-7.48.6 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { @@ -978,37 +975,37 @@ test utf-8.1 {Tcl_UniCharAtIndex: index = 0} { } a test utf-8.2 {Tcl_UniCharAtIndex: index = 0} { string index \u4E4E\u25A 0 -} "\u4E4E" +} \u4E4E test utf-8.3 {Tcl_UniCharAtIndex: index > 0} { string index abcd 2 } c test utf-8.4 {Tcl_UniCharAtIndex: index > 0} { string index \u4E4E\u25A\xFF\u543 2 -} "\uFF" +} \uFF test utf-8.5.0 {Tcl_UniCharAtIndex: high surrogate} ucs2 { string index \uD842 0 -} "\uD842" -test utf-8.5.1 {Tcl_UniCharAtIndex: high surrogate} ucs4 { - string index \uD842 0 -} "\uD842" +} \uD842 +test utf-8.5.1 {Tcl_UniCharAtIndex: high surrogate} {teststringbytes ucs4} { + teststringbytes [string index \uD842 0] +} \xF0 test utf-8.5.2 {Tcl_UniCharAtIndex: high surrogate} {teststringbytes tip389} { teststringbytes [string index \uD842 0] } \xF0 test utf-8.6 {Tcl_UniCharAtIndex: low surrogate} { string index \uDC42 0 -} "\uDC42" +} \uDC42 test utf-8.7.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { string index \uD83D\uDE00G 0 -} "\uD83D" +} \uD83D test utf-8.7.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { string index \uD83D\uDE00G 0 -} "\U1F600" +} \U1F600 test utf-8.7.2 {Tcl_UniCharAtIndex: Emoji} {teststringbytes tip389} { teststringbytes [string index \uD83D\uDE00G 0] } \xF0 test utf-8.8.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { string index \uD83D\uDE00G 1 -} "\uDE00" +} \uDE00 test utf-8.8.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { string index \uD83D\uDE00G 1 } G @@ -1026,10 +1023,10 @@ test utf-8.9.2 {Tcl_UniCharAtIndex: Emoji} tip389 { } G test utf-8.10.0 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs2} { string index \U1F600G 0 -} "\uFFFD" +} \uFFFD test utf-8.10.1 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs4} { string index \U1F600G 0 -} "\U1F600" +} \U1F600 test utf-8.10.2 {Tcl_UniCharAtIndex: Emoji} {Uesc teststringbytes tip389} { teststringbytes [string index \U1F600G 0] } \xF0 @@ -1057,22 +1054,22 @@ test utf-9.1 {Tcl_UtfAtIndex: index = 0} { } abc test utf-9.2 {Tcl_UtfAtIndex: index > 0} { string range \u4E4E\u25A\xFF\u543klmnop 1 5 -} "\u25A\xFF\u543kl" +} \u25A\xFF\u543kl test utf-9.3.0 {Tcl_UtfAtIndex: index = 0, Emoji} ucs2 { string range \uD83D\uDE00G 0 0 -} "\uD83D" +} \uD83D test utf-9.3.1 {Tcl_UtfAtIndex: index = 0, Emoji} ucs4 { string range \uD83D\uDE00G 0 0 -} "\U1F600" +} \U1F600 test utf-9.3.2 {Tcl_UtfAtIndex: index = 0, Emoji} tip389 { string range \uD83D\uDE00G 0 0 -} "\U1F600" +} \U1F600 test utf-9.4.0 {Tcl_UtfAtIndex: index > 0, Emoji} ucs2 { string range \uD83D\uDE00G 1 1 -} "\uDE00" +} \uDE00 test utf-9.4.1 {Tcl_UtfAtIndex: index > 0, Emoji} ucs4 { string range \uD83D\uDE00G 1 1 -} "G" +} G test utf-9.4.2 {Tcl_UtfAtIndex: index > 0, Emoji} tip389 { string range \uD83D\uDE00G 1 1 } {} @@ -1087,19 +1084,19 @@ test utf-9.5.2 {Tcl_UtfAtIndex: index > 0, Emoji} tip389 { } G test utf-9.6.0 {Tcl_UtfAtIndex: index = 0, Emoji} {Uesc ucs2} { string range \U1f600G 0 0 -} "\uFFFD" +} \uFFFD test utf-9.6.1 {Tcl_UtfAtIndex: index = 0, Emoji} {Uesc ucs4} { string range \U1f600G 0 0 -} "\U1F600" +} \U1F600 test utf-9.6.2 {Tcl_UtfAtIndex: index = 0, Emoji} {Uesc tip389} { string range \U1f600G 0 0 -} "\U1F600" +} \U1F600 test utf-9.7.0 {Tcl_UtfAtIndex: index > 0, Emoji} {Uesc ucs2} { string range \U1f600G 1 1 } G test utf-9.7.1 {Tcl_UtfAtIndex: index > 0, Emoji} {Uesc ucs4} { string range \U1f600G 1 1 -} "G" +} G test utf-9.7.2 {Tcl_UtfAtIndex: index > 0, Emoji} {Uesc tip389} { string range \U1f600G 1 1 } {} -- cgit v0.12 From 9501890b6c738830781eebe5d8bdcff2d6a0068c Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sat, 2 May 2020 22:48:20 +0000 Subject: Join test-cases utf-6.93.0 and utf-6.93.1, which MUST give the same answer always for whatever testConstraints. Fix one invalid use of TclUCS4Complete(), and let TclUtfToUCS4() handle (invalid) 4-byte sequences. Test-case cleanup (removal of unnecessary quoting) --- generic/tclEncoding.c | 2 +- generic/tclUtf.c | 2 +- tests/utf.test | 59 ++++++++++++++++++++++++--------------------------- 3 files changed, 30 insertions(+), 33 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 1584de0..5c7aab8 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2341,7 +2341,7 @@ UtfToUtfProc( *dst++ = 0; *chPtr = 0; /* reset surrogate handling */ src += 2; - } else if (!TclUCS4Complete(src, srcEnd - src)) { + } else if (!Tcl_UtfCharComplete(src, srcEnd - src)) { /* * Always check before using TclUtfToUniChar. Not doing can so * cause it run beyond the end of the buffer! If we happen such an diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 9ffbfba..160e444 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -2360,7 +2360,7 @@ TclUtfToUCS4( len = TclUtfToUniChar(src, &ch); fullchar = ch; -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX <= 4 /* 4-byte UTF-8 is supported; decode surrogates */ if ((ch >= 0xD800) && len < 3) { diff --git a/tests/utf.test b/tests/utf.test index 0929801..2bfb9ea 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -496,10 +496,7 @@ test utf-6.91.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext fullutf} test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} testutfnext { testutfnext \xA0\xA0\xA0 } 1 -test utf-6.93.0 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext ucs2} { - testutfnext \x80\x80\x80 -} 1 -test utf-6.93.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext fullutf} { +test utf-6.93 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} testutfnext { testutfnext \x80\x80\x80 } 1 test utf-6.94 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} testutfnext { @@ -977,37 +974,37 @@ test utf-8.1 {Tcl_UniCharAtIndex: index = 0} { } a test utf-8.2 {Tcl_UniCharAtIndex: index = 0} { string index \u4E4E\u25A 0 -} "\u4E4E" +} \u4E4E test utf-8.3 {Tcl_UniCharAtIndex: index > 0} { string index abcd 2 } c test utf-8.4 {Tcl_UniCharAtIndex: index > 0} { string index \u4E4E\u25A\xFF\u543 2 -} "\uFF" +} \uFF test utf-8.5.0 {Tcl_UniCharAtIndex: high surrogate} ucs2 { string index \uD842 0 -} "\uD842" +} \uD842 test utf-8.5.1 {Tcl_UniCharAtIndex: high surrogate} ucs4 { string index \uD842 0 -} "\uD842" +} \uD842 test utf-8.5.2 {Tcl_UniCharAtIndex: high surrogate} tip389 { string index \uD842 0 -} "\uD842" +} \uD842 test utf-8.6 {Tcl_UniCharAtIndex: low surrogate} { string index \uDC42 0 -} "\uDC42" +} \uDC42 test utf-8.7.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { string index \uD83D\uDE00G 0 -} "\uD83D" +} \uD83D test utf-8.7.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { string index \uD83D\uDE00G 0 -} "\U1F600" +} \U1F600 test utf-8.7.2 {Tcl_UniCharAtIndex: Emoji} tip389 { string index \uD83D\uDE00G 0 -} "\U1F600" +} \U1F600 test utf-8.8.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { string index \uD83D\uDE00G 1 -} "\uDE00" +} \uDE00 test utf-8.8.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { string index \uD83D\uDE00G 1 } G @@ -1025,13 +1022,13 @@ test utf-8.9.2 {Tcl_UniCharAtIndex: Emoji} tip389 { } G test utf-8.10.0 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs2} { string index \U1F600G 0 -} "\uFFFD" +} \uFFFD test utf-8.10.1 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs4} { string index \U1F600G 0 -} "\U1F600" +} \U1F600 test utf-8.10.2 {Tcl_UniCharAtIndex: Emoji} {Uesc tip389} { string index \U1F600G 0 -} "\U1F600" +} \U1F600 test utf-8.11.0 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs2} { string index \U1F600G 1 } G @@ -1056,22 +1053,22 @@ test utf-9.1 {Tcl_UtfAtIndex: index = 0} { } abc test utf-9.2 {Tcl_UtfAtIndex: index > 0} { string range \u4E4E\u25A\xFF\u543klmnop 1 5 -} "\u25A\xFF\u543kl" +} \u25A\xFF\u543kl test utf-9.3.0 {Tcl_UtfAtIndex: index = 0, Emoji} ucs2 { string range \uD83D\uDE00G 0 0 -} "\uD83D" +} \uD83D test utf-9.3.1 {Tcl_UtfAtIndex: index = 0, Emoji} ucs4 { string range \uD83D\uDE00G 0 0 -} "\U1F600" +} \U1F600 test utf-9.3.2 {Tcl_UtfAtIndex: index = 0, Emoji} tip389 { string range \uD83D\uDE00G 0 0 -} "\U1F600" +} \U1F600 test utf-9.4.0 {Tcl_UtfAtIndex: index > 0, Emoji} ucs2 { string range \uD83D\uDE00G 1 1 -} "\uDE00" +} \uDE00 test utf-9.4.1 {Tcl_UtfAtIndex: index > 0, Emoji} ucs4 { string range \uD83D\uDE00G 1 1 -} "G" +} G test utf-9.4.2 {Tcl_UtfAtIndex: index > 0, Emoji} tip389 { string range \uD83D\uDE00G 1 1 } {} @@ -1086,19 +1083,19 @@ test utf-9.5.2 {Tcl_UtfAtIndex: index > 0, Emoji} tip389 { } G test utf-9.6.0 {Tcl_UtfAtIndex: index = 0, Emoji} {Uesc ucs2} { string range \U1f600G 0 0 -} "\uFFFD" +} \uFFFD test utf-9.6.1 {Tcl_UtfAtIndex: index = 0, Emoji} {Uesc ucs4} { string range \U1f600G 0 0 -} "\U1F600" +} \U1F600 test utf-9.6.2 {Tcl_UtfAtIndex: index = 0, Emoji} {Uesc tip389} { string range \U1f600G 0 0 -} "\U1F600" +} \U1F600 test utf-9.7.0 {Tcl_UtfAtIndex: index > 0, Emoji} {Uesc ucs2} { string range \U1f600G 1 1 } G test utf-9.7.1 {Tcl_UtfAtIndex: index > 0, Emoji} {Uesc ucs4} { string range \U1f600G 1 1 -} "G" +} G test utf-9.7.2 {Tcl_UtfAtIndex: index > 0, Emoji} {Uesc tip389} { string range \U1f600G 1 1 } {} @@ -1182,7 +1179,7 @@ bsCheck \uA 10 bsCheck \340 224 bsCheck \uA1 161 bsCheck \u4E21 20001 -bsCheck \741 225 pre388 ;# == \341 +bsCheck \741 225 pre388 ;# == \341 bsCheck \741 60 !pre388 ;# == \74 1 bsCheck \U 85 bsCheck \Uk 85 @@ -1344,7 +1341,7 @@ test utf-20.2 {[4c591fa487] TclUniCharNcmp/TclUtfNcmp} knownBug { set two [format %c 0x10000] set first [string compare $one $two] string range $one 0 0 - string range $two 0 0 + string range $two 0 0 set second [string compare $one $two] expr {($first == $second) ? "agree" : "disagree"} } agree @@ -1466,9 +1463,9 @@ UniCharCaseCmpTest < a b UniCharCaseCmpTest > b a UniCharCaseCmpTest > B a UniCharCaseCmpTest > aBcB abca -UniCharCaseCmpTest < \uFFFF [format %c 0x10000] ucs4 +UniCharCaseCmpTest < \uFFFF [format %c 0x10000] ucs4 UniCharCaseCmpTest < \uFFFF \U10000 {Uesc ucs4} -UniCharCaseCmpTest > [format %c 0x10000] \uFFFF ucs4 +UniCharCaseCmpTest > [format %c 0x10000] \uFFFF ucs4 UniCharCaseCmpTest > \U10000 \uFFFF {Uesc ucs4} -- cgit v0.12 From c17661c31e3f4fac5a70dd487b4c9b3372ee5e5b Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 3 May 2020 22:16:21 +0000 Subject: Re-join utf-6.93.0 and utf-6.93.1 (please disregard comment in previous commit, it was not correct). Perfectionalize TclUtfToUCS4()/TclUCS4Complete() and new (internal) function TclUCS4ToUtf(). They can help preventing bugs regarding splitting/joining surrogates. Used them in a few more places. --- generic/tclBinary.c | 16 ++++++------- generic/tclCmdMZ.c | 9 ++------ generic/tclInt.h | 9 +++++++- generic/tclParse.c | 23 ++++++++----------- generic/tclUtf.c | 66 +++++++++++++++++++++++++++++++++++++++++++---------- tests/utf.test | 5 +++- 6 files changed, 86 insertions(+), 42 deletions(-) diff --git a/generic/tclBinary.c b/generic/tclBinary.c index 6306159..52ef457 100644 --- a/generic/tclBinary.c +++ b/generic/tclBinary.c @@ -1222,11 +1222,11 @@ BinaryFormatCmd( badField: { - Tcl_UniChar ch = 0; - char buf[TCL_UTF_MAX + 1] = ""; + int ch; + char buf[8] = ""; - TclUtfToUniChar(errorString, &ch); - buf[Tcl_UniCharToUtf(ch, buf)] = '\0'; + TclUtfToUCS4(errorString, &ch); + buf[TclUCS4ToUtf(ch, buf)] = '\0'; Tcl_SetObjResult(interp, Tcl_ObjPrintf( "bad field specifier \"%s\"", buf)); return TCL_ERROR; @@ -1592,11 +1592,11 @@ BinaryScanCmd( badField: { - Tcl_UniChar ch = 0; - char buf[TCL_UTF_MAX + 1] = ""; + int ch; + char buf[8] = ""; - TclUtfToUniChar(errorString, &ch); - buf[Tcl_UniCharToUtf(ch, buf)] = '\0'; + TclUtfToUCS4(errorString, &ch); + buf[TclUCS4ToUtf(ch, buf)] = '\0'; Tcl_SetObjResult(interp, Tcl_ObjPrintf( "bad field specifier \"%s\"", buf)); return TCL_ERROR; diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index 162a5a6..011164b 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -1413,14 +1413,9 @@ StringIndexCmd( Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(&uch, 1)); } else { - char buf[TCL_UTF_MAX] = ""; + char buf[8] = ""; - length = Tcl_UniCharToUtf(ch, buf); -#if TCL_UTF_MAX > 3 - if ((ch >= 0xD800) && (length < 3)) { - length += Tcl_UniCharToUtf(-1, buf + length); - } -#endif + length = TclUCS4ToUtf(ch, buf); Tcl_SetObjResult(interp, Tcl_NewStringObj(buf, length)); } } diff --git a/generic/tclInt.h b/generic/tclInt.h index 593d878..6f024a6 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3184,8 +3184,15 @@ MODULE_SCOPE int TclTrimRight(const char *bytes, int numBytes, const char *trim, int numTrim); MODULE_SCOPE int TclUtfCasecmp(const char *cs, const char *ct); MODULE_SCOPE int TclUtfToUCS4(const char *src, int *ucs4Ptr); +MODULE_SCOPE int TclUCS4ToUtf(int, char *); + +/* + * Bytes F0-F4 are start-bytes for 4-byte sequences. + * Byte 0xED can be the start-byte of an upper surrogate. In that case, + * TclUtfToUCS4() might read the lower surrogate following it too. + */ # define TclUCS4Complete(src, length) (((unsigned)(UCHAR(*(src)) - 0xF0) < 5) \ - ? ((length) >= 4) : Tcl_UtfCharComplete((src), (length))) + ? ((length) >= 4) : (UCHAR(*(src)) == 0xED) ? ((length) >= 6) : Tcl_UtfCharComplete((src), (length))) MODULE_SCOPE Tcl_Obj * TclpNativeToNormalized(ClientData clientData); MODULE_SCOPE Tcl_Obj * TclpFilesystemPathType(Tcl_Obj *pathPtr); MODULE_SCOPE int TclpDlopen(Tcl_Interp *interp, Tcl_Obj *pathPtr, diff --git a/generic/tclParse.c b/generic/tclParse.c index 7beaeea..23a07cf 100644 --- a/generic/tclParse.c +++ b/generic/tclParse.c @@ -843,7 +843,6 @@ TclParseBackslash( * written there. */ { register const char *p = src+1; - Tcl_UniChar unichar = 0; int result; int count; char buf[TCL_UTF_MAX] = ""; @@ -943,7 +942,7 @@ TclParseBackslash( * No hexdigits -> This is just "U". */ result = 'U'; - } else if ((result | 0x7FF) == 0xDFFF) { + } else if ((result & ~0x7FF) == 0xD800) { /* Upper or lower surrogate, not allowed in this syntax. */ result = 0xFFFD; } @@ -991,16 +990,15 @@ TclParseBackslash( * #217987] test subst-3.2 */ - if (Tcl_UtfCharComplete(p, numBytes - 1)) { - count = TclUtfToUniChar(p, &unichar) + 1; /* +1 for '\' */ + if (TclUCS4Complete(p, numBytes - 1)) { + count = TclUtfToUCS4(p, &result) + 1; /* +1 for '\' */ } else { - char utfBytes[TCL_UTF_MAX]; + char utfBytes[8]; - memcpy(utfBytes, p, (size_t) (numBytes - 1)); + memcpy(utfBytes, p, numBytes - 1); utfBytes[numBytes - 1] = '\0'; - count = TclUtfToUniChar(utfBytes, &unichar) + 1; + count = TclUtfToUCS4(utfBytes, &result) + 1; } - result = unichar; break; } @@ -1008,13 +1006,12 @@ TclParseBackslash( if (readPtr != NULL) { *readPtr = count; } - count = Tcl_UniCharToUtf(result, dst); -#if TCL_UTF_MAX > 3 - if ((result >= 0xD800) && (count < 3)) { - count += Tcl_UniCharToUtf(-1, dst + count); +#if TCL_UTF_MAX < 4 + if (result > 0xFFFF) { + result = 0xFFFD; } #endif - return count; + return TclUCS4ToUtf(result, dst); } /* diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 160e444..a2080dd 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -2335,7 +2335,7 @@ TclUniCharMatch( * routine does not run off the end and dereference non-existent memory * looking for trail bytes. If the source buffer is known to be '\0' * terminated, this cannot happen. Otherwise, the caller should call - * Tcl_UtfCharComplete() before calling this routine to ensure that + * TclUCS4Complete() before calling this routine to ensure that * enough bytes remain in the string. * * Results: @@ -2354,26 +2354,68 @@ TclUtfToUCS4( int *ucs4Ptr) /* Filled with the UCS4 codepoint represented * by the UTF-8 string. */ { - int len, fullchar; Tcl_UniChar ch = 0; - - len = TclUtfToUniChar(src, &ch); - fullchar = ch; + int len = Tcl_UtfToUniChar(src, &ch); #if TCL_UTF_MAX <= 4 - /* 4-byte UTF-8 is supported; decode surrogates */ - - if ((ch >= 0xD800) && len < 3) { - len += Tcl_UtfToUniChar(src + len, &ch); - fullchar = (((fullchar & 0x3FF) << 10) | (ch & 0x3FF)) + 0x10000; + if ((ch & ~0x3FF) == 0xD800) { + Tcl_UniChar low = ch; + int len2 = Tcl_UtfToUniChar(src+len, &low); + if ((low & ~0x3FF) == 0xDC00) { + *ucs4Ptr = (((ch & 0x3FF) << 10) | (low & 0x3FF)) + 0x10000; + return len + len2; + } } #endif - - *ucs4Ptr = fullchar; + *ucs4Ptr = (int)ch; return len; } /* + *--------------------------------------------------------------------------- + * + * TclUCS4ToUtf -- + * + * Store the given Unicode character as a sequence of UTF-8 bytes in the + * provided buffer. Might output 6 bytes, if the code point > 0xFFFF. + * + * Results: + * The return values is the number of bytes in the buffer that were + * consumed. + * + * Side effects: + * None. + * + *--------------------------------------------------------------------------- + */ + +int +TclUCS4ToUtf( + int ch, /* Unicode character to be stored in the + * buffer. */ + char *buf) /* Buffer in which the UTF-8 representation of + * the Unicode character is stored. Buffer must be + * large enough to hold the UTF-8 character(s) + * (at most 6 bytes). */ +{ +#if TCL_UTF_MAX <= 4 + if (((unsigned)(ch - 0x10000) <= 0xFFFFF)) { + /* Spit out a 4-byte UTF-8 character or 2 x 3-byte UTF-8 characters, depending on Tcl + * version and/or TCL_UTF_MAX build value */ + int len = Tcl_UniCharToUtf(0xD800 | ((ch - 0x10000) >> 10), buf); + return len + Tcl_UniCharToUtf(0xDC00 | (ch & 0x7FF), buf + len); + } +#endif + if ((ch & ~0x7FF) == 0xD800) { + buf[2] = (char) ((ch | 0x80) & 0xBF); + buf[1] = (char) (((ch >> 6) | 0x80) & 0xBF); + buf[0] = (char) ((ch >> 12) | 0xE0); + return 3; + } + return Tcl_UniCharToUtf(ch, buf); +} + +/* * Local Variables: * mode: c * c-basic-offset: 4 diff --git a/tests/utf.test b/tests/utf.test index 2bfb9ea..c0fed6f 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -496,7 +496,10 @@ test utf-6.91.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext fullutf} test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} testutfnext { testutfnext \xA0\xA0\xA0 } 1 -test utf-6.93 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} testutfnext { +test utf-6.93.0 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext ucs2} { + testutfnext \x80\x80\x80 +} 1 +test utf-6.93.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext fullutf} { testutfnext \x80\x80\x80 } 1 test utf-6.94 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} testutfnext { -- cgit v0.12 From 8c3587a6e899c6fd12fd0563312c4a20c289d8fd Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 4 May 2020 08:35:44 +0000 Subject: (partial) fix for [9d0cb35bb2]: Various issues with core-8-6-branch, TCL_UTF_MAX=4. (even though TCL_UTF_MAX=4 is unsupported, it would be nice to make it work) Marked various test-cases as "knownBug", those work correctly in core-8-branch (8.7). The fix there could be backported. Low prio. --- generic/tclUtf.c | 34 ++++++++++++++++++---------------- tests/utf.test | 16 ++++++++-------- 2 files changed, 26 insertions(+), 24 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index a2080dd..ab3c577 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -359,8 +359,8 @@ Tcl_UniCharToUtfDString( int Tcl_UtfToUniChar( - register const char *src, /* The UTF-8 string. */ - register Tcl_UniChar *chPtr)/* Filled with the Tcl_UniChar represented by + const char *src, /* The UTF-8 string. */ + Tcl_UniChar *chPtr)/* Filled with the Tcl_UniChar represented by * the UTF-8 string. */ { Tcl_UniChar byte; @@ -580,12 +580,12 @@ Tcl_UtfCharComplete( int Tcl_NumUtfChars( - register const char *src, /* The UTF-8 string to measure. */ + const char *src, /* The UTF-8 string to measure. */ int length) /* The length of the string in bytes, or -1 * for strlen(string). */ { Tcl_UniChar ch = 0; - register int i = 0; + int i = 0; /* * The separate implementations are faster. @@ -601,27 +601,29 @@ Tcl_NumUtfChars( } if (i < 0) i = INT_MAX; /* Bug [2738427] */ } else { - register const char *endPtr = src + length - TCL_UTF_MAX; + const char *endPtr = src + length - TCL_UTF_MAX; while (src < endPtr) { +#if TCL_UTF_MAX < 4 if (((unsigned)UCHAR(*src) - 0xF0) < 5) { /* treat F0 - F4 as single character */ ch = 0; src++; - } else { - src += TclUtfToUniChar(src, &ch); - } + } else +#endif + src += TclUtfToUniChar(src, &ch); i++; } endPtr += TCL_UTF_MAX; while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) { +#if TCL_UTF_MAX < 4 if (((unsigned)UCHAR(*src) - 0xF0) < 5) { /* treat F0 - F4 as single character */ ch = 0; src++; - } else { - src += TclUtfToUniChar(src, &ch); - } + } else +#endif + src += TclUtfToUniChar(src, &ch); i++; } if (src < endPtr) { @@ -890,8 +892,8 @@ Tcl_UtfPrev( Tcl_UniChar Tcl_UniCharAtIndex( - register const char *src, /* The UTF-8 string to dereference. */ - register int index) /* The position of the desired character. */ + const char *src, /* The UTF-8 string to dereference. */ + int index) /* The position of the desired character. */ { Tcl_UniChar ch = 0; @@ -918,8 +920,8 @@ Tcl_UniCharAtIndex( const char * Tcl_UtfAtIndex( - register const char *src, /* The UTF-8 string. */ - register int index) /* The position of the desired character. */ + const char *src, /* The UTF-8 string. */ + int index) /* The position of the desired character. */ { Tcl_UniChar ch = 0; int len = 0; @@ -1191,7 +1193,7 @@ TclpUtfNcmp2( * fine in the strcmp manner. */ - register int result = 0; + int result = 0; for ( ; numBytes != 0; numBytes--, cs++, ct++) { if (*cs != *ct) { diff --git a/tests/utf.test b/tests/utf.test index c0fed6f..a3c049d 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -499,9 +499,9 @@ test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} testu test utf-6.93.0 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext ucs2} { testutfnext \x80\x80\x80 } 1 -test utf-6.93.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext fullutf} { +test utf-6.93.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext fullutf knownBug} { testutfnext \x80\x80\x80 -} 1 +} 3 test utf-6.94 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} testutfnext { testutfnext \xA0\xA0\xA0\xA0 } 1 @@ -987,10 +987,10 @@ test utf-8.4 {Tcl_UniCharAtIndex: index > 0} { test utf-8.5.0 {Tcl_UniCharAtIndex: high surrogate} ucs2 { string index \uD842 0 } \uD842 -test utf-8.5.1 {Tcl_UniCharAtIndex: high surrogate} ucs4 { +test utf-8.5.1 {Tcl_UniCharAtIndex: high surrogate} {ucs4 knownBug} { string index \uD842 0 } \uD842 -test utf-8.5.2 {Tcl_UniCharAtIndex: high surrogate} tip389 { +test utf-8.5.2 {Tcl_UniCharAtIndex: high surrogate} {tip389 knownBug} { string index \uD842 0 } \uD842 test utf-8.6 {Tcl_UniCharAtIndex: low surrogate} { @@ -1002,7 +1002,7 @@ test utf-8.7.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { test utf-8.7.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { string index \uD83D\uDE00G 0 } \U1F600 -test utf-8.7.2 {Tcl_UniCharAtIndex: Emoji} tip389 { +test utf-8.7.2 {Tcl_UniCharAtIndex: Emoji} {tip389 knownBug} { string index \uD83D\uDE00G 0 } \U1F600 test utf-8.8.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { @@ -1011,7 +1011,7 @@ test utf-8.8.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { test utf-8.8.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { string index \uD83D\uDE00G 1 } G -test utf-8.8.2 {Tcl_UniCharAtIndex: Emoji} tip389 { +test utf-8.8.2 {Tcl_UniCharAtIndex: Emoji} {tip389 knownBug} { string index \uD83D\uDE00G 1 } {} test utf-8.9.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { @@ -1029,7 +1029,7 @@ test utf-8.10.0 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs2} { test utf-8.10.1 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs4} { string index \U1F600G 0 } \U1F600 -test utf-8.10.2 {Tcl_UniCharAtIndex: Emoji} {Uesc tip389} { +test utf-8.10.2 {Tcl_UniCharAtIndex: Emoji} {Uesc tip389 knownBug} { string index \U1F600G 0 } \U1F600 test utf-8.11.0 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs2} { @@ -1038,7 +1038,7 @@ test utf-8.11.0 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs2} { test utf-8.11.1 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs4} { string index \U1F600G 1 } G -test utf-8.11.2 {Tcl_UniCharAtIndex: Emoji} {Uesc tip389} { +test utf-8.11.2 {Tcl_UniCharAtIndex: Emoji} {Uesc tip389 knownBug} { string index \U1F600G 1 } {} test utf-8.12.0 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs2} { -- cgit v0.12 From 7759441602f14b3772186ea0d87f61ffdafc8404 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 4 May 2020 12:31:51 +0000 Subject: New internal function TclGetUCS4() only available when TCL_UTF_MAX=4. This fixes all "knownBug" testcases related to tip389. --- generic/tclCmdMZ.c | 2 +- generic/tclExecute.c | 12 ++------ generic/tclInt.h | 7 ++++- generic/tclStringObj.c | 78 ++++++++++++++++++++++++++++++++++++++++++++++++-- generic/tclUtf.c | 6 +++- tests/utf.test | 14 ++++----- 6 files changed, 98 insertions(+), 21 deletions(-) diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index 011164b..7516208 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -1401,7 +1401,7 @@ StringIndexCmd( } if ((index >= 0) && (index < length)) { - Tcl_UniChar ch = Tcl_GetUniChar(objv[1], index); + int ch = TclGetUCS4(objv[1], index); /* * If we have a ByteArray object, we're careful to generate a new diff --git a/generic/tclExecute.c b/generic/tclExecute.c index f38e752..eeb69de 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -5571,16 +5571,10 @@ TEBCresume( objResultPtr = Tcl_NewStringObj((const char *) valuePtr->bytes+index, 1); } else { - char buf[4] = ""; - Tcl_UniChar ch = Tcl_GetUniChar(valuePtr, index); + char buf[8] = ""; + int ch = TclGetUCS4(valuePtr, index); - /* - * This could be: Tcl_NewUnicodeObj((const Tcl_UniChar *)&ch, 1) - * but creating the object as a string seems to be faster in - * practical use. - */ - - length = Tcl_UniCharToUtf(ch, buf); + length = TclUCS4ToUtf(ch, buf); objResultPtr = Tcl_NewStringObj(buf, length); } diff --git a/generic/tclInt.h b/generic/tclInt.h index 6f024a6..8983659 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3183,8 +3183,13 @@ MODULE_SCOPE int TclTrimLeft(const char *bytes, int numBytes, MODULE_SCOPE int TclTrimRight(const char *bytes, int numBytes, const char *trim, int numTrim); MODULE_SCOPE int TclUtfCasecmp(const char *cs, const char *ct); -MODULE_SCOPE int TclUtfToUCS4(const char *src, int *ucs4Ptr); +MODULE_SCOPE int TclUtfToUCS4(const char *, int *); MODULE_SCOPE int TclUCS4ToUtf(int, char *); +#if TCL_UTF_MAX == 4 + MODULE_SCOPE int TclGetUCS4(Tcl_Obj *, int); +#else + #define TclGetUCS4 Tcl_GetUniChar +#endif /* * Bytes F0-F4 are start-bytes for 4-byte sequences. diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 3ce8281..656d6ce 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -519,10 +519,10 @@ TclCheckEmptyString ( /* *---------------------------------------------------------------------- * - * Tcl_GetUniChar -- + * Tcl_GetUniChar/TclGetUCS4 -- * * Get the index'th Unicode character from the String object. If index - * is out of range, the result = 0xFFFD; + * is out of range, the result = 0xFFFD (Tcl_GetUniChar) resp. -1 (TclGetUCS4) * * Results: * Returns the index'th Unicode character in the Object. @@ -587,6 +587,80 @@ Tcl_GetUniChar( } return stringPtr->unicode[index]; } + +#if TCL_UTF_MAX == 4 +int +TclGetUCS4( + Tcl_Obj *objPtr, /* The object to get the Unicode charater + * from. */ + int index) /* Get the index'th Unicode character. */ +{ + String *stringPtr; + int ch, length; + + if (index < 0) { + return -1; + } + + /* + * Optimize the case where we're really dealing with a bytearray object + * we don't need to convert to a string to perform the indexing operation. + */ + + if (TclIsPureByteArray(objPtr)) { + unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &length); + if (index >= length) { + return -1; + } + + return (int) bytes[index]; + } + + /* + * OK, need to work with the object as a string. + */ + + SetStringFromAny(NULL, objPtr); + stringPtr = GET_STRING(objPtr); + + if (stringPtr->hasUnicode == 0) { + /* + * If numChars is unknown, compute it. + */ + + if (stringPtr->numChars == -1) { + TclNumUtfChars(stringPtr->numChars, objPtr->bytes, objPtr->length); + } + if (stringPtr->numChars == objPtr->length) { + return (Tcl_UniChar) objPtr->bytes[index]; + } + FillUnicodeRep(objPtr); + stringPtr = GET_STRING(objPtr); + } + + if (index >= stringPtr->numChars) { + return -1; + } + ch = stringPtr->unicode[index]; +#if TCL_UTF_MAX <= 4 + /* See: bug [11ae2be95dac9417] */ + if ((ch & 0xF800) == 0xD800) { + if (ch & 0x400) { + if ((index > 0) + && ((stringPtr->unicode[index-1] & 0xFC00) == 0xD800)) { + ch = -1; /* low surrogate preceded by high surrogate */ + } + } else if ((++index < stringPtr->numChars) + && ((stringPtr->unicode[index] & 0xFC00) == 0xDC00)) { + /* high surrogate followed by low surrogate */ + ch = (((ch & 0x3FF) << 10) | + (stringPtr->unicode[index] & 0x3FF)) + 0x10000; + } + } +#endif + return ch; +} +#endif /* *---------------------------------------------------------------------- diff --git a/generic/tclUtf.c b/generic/tclUtf.c index ab3c577..9714204 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -2383,7 +2383,8 @@ TclUtfToUCS4( * * Results: * The return values is the number of bytes in the buffer that were - * consumed. + * consumed. If ch == -1, this function outputs 0 bytes (empty string), + * since TclGetUCS4 returns -1 for out-of-range indices. * * Side effects: * None. @@ -2414,6 +2415,9 @@ TclUCS4ToUtf( buf[0] = (char) ((ch >> 12) | 0xE0); return 3; } + if (ch == -1) { + return 0; + } return Tcl_UniCharToUtf(ch, buf); } diff --git a/tests/utf.test b/tests/utf.test index a3c049d..fd8231d 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -983,14 +983,14 @@ test utf-8.3 {Tcl_UniCharAtIndex: index > 0} { } c test utf-8.4 {Tcl_UniCharAtIndex: index > 0} { string index \u4E4E\u25A\xFF\u543 2 -} \uFF +} \xFF test utf-8.5.0 {Tcl_UniCharAtIndex: high surrogate} ucs2 { string index \uD842 0 } \uD842 -test utf-8.5.1 {Tcl_UniCharAtIndex: high surrogate} {ucs4 knownBug} { +test utf-8.5.1 {Tcl_UniCharAtIndex: high surrogate} {ucs4} { string index \uD842 0 } \uD842 -test utf-8.5.2 {Tcl_UniCharAtIndex: high surrogate} {tip389 knownBug} { +test utf-8.5.2 {Tcl_UniCharAtIndex: high surrogate} {tip389} { string index \uD842 0 } \uD842 test utf-8.6 {Tcl_UniCharAtIndex: low surrogate} { @@ -1002,7 +1002,7 @@ test utf-8.7.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { test utf-8.7.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { string index \uD83D\uDE00G 0 } \U1F600 -test utf-8.7.2 {Tcl_UniCharAtIndex: Emoji} {tip389 knownBug} { +test utf-8.7.2 {Tcl_UniCharAtIndex: Emoji} {tip389} { string index \uD83D\uDE00G 0 } \U1F600 test utf-8.8.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { @@ -1011,7 +1011,7 @@ test utf-8.8.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { test utf-8.8.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { string index \uD83D\uDE00G 1 } G -test utf-8.8.2 {Tcl_UniCharAtIndex: Emoji} {tip389 knownBug} { +test utf-8.8.2 {Tcl_UniCharAtIndex: Emoji} {tip389} { string index \uD83D\uDE00G 1 } {} test utf-8.9.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { @@ -1029,7 +1029,7 @@ test utf-8.10.0 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs2} { test utf-8.10.1 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs4} { string index \U1F600G 0 } \U1F600 -test utf-8.10.2 {Tcl_UniCharAtIndex: Emoji} {Uesc tip389 knownBug} { +test utf-8.10.2 {Tcl_UniCharAtIndex: Emoji} {Uesc tip389} { string index \U1F600G 0 } \U1F600 test utf-8.11.0 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs2} { @@ -1038,7 +1038,7 @@ test utf-8.11.0 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs2} { test utf-8.11.1 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs4} { string index \U1F600G 1 } G -test utf-8.11.2 {Tcl_UniCharAtIndex: Emoji} {Uesc tip389 knownBug} { +test utf-8.11.2 {Tcl_UniCharAtIndex: Emoji} {Uesc tip389} { string index \U1F600G 1 } {} test utf-8.12.0 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs2} { -- cgit v0.12 From 413ea81a284c691dc5ed4ad48217370ce83f65f7 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 4 May 2020 14:17:23 +0000 Subject: More progress/simplification --- generic/tclUtf.c | 23 ++--------------------- tests/utf.test | 33 ++++++++++++--------------------- 2 files changed, 14 insertions(+), 42 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index c4b5305..b964b7e 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -70,25 +70,6 @@ static const unsigned char totalBytes[256] = { /* End of "continuation byte section" */ 2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, -#if TCL_UTF_MAX > 3 - 4,4,4,4,4, -#else - 1,1,1,1,1, -#endif - 1,1,1,1,1,1,1,1,1,1,1 -}; - -static const unsigned char complete[256] = { - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -/* Tcl_UtfCharComplete() might point to 2nd byte of valid 4-byte sequence */ - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, -/* End of "continuation byte section" */ - 2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, #if TCL_UTF_MAX > 4 4,4,4,4,4, #else @@ -183,7 +164,7 @@ Invalid( unsigned char byte = *src; int index; - if (byte % 0x04) { + if ((byte & 0xC3) != 0xC0) { /* Only lead bytes 0xC0, 0xE0, 0xF0, 0xF4 need examination */ return 0; } @@ -573,7 +554,7 @@ Tcl_UtfCharComplete( * a complete UTF-8 character. */ int length) /* Length of above string in bytes. */ { - return length >= complete[UCHAR(*src)]; + return length >= totalBytes[UCHAR(*src)]; } /* diff --git a/tests/utf.test b/tests/utf.test index c455078..3f74f6f 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -493,25 +493,16 @@ test utf-6.91.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext ucs2} { test utf-6.91.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext fullutf} { testutfnext \xF4\x90\x80\x80 } 1 -test utf-6.92.0 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} {testutfnext ucs2} { +test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} testutfnext { testutfnext \xA0\xA0\xA0 -} 1 -test utf-6.92.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} {testutfnext fullutf knownBug} { - testutfnext \xA0\xA0\xA0 -} 3 -test utf-6.93.0 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext ucs2} { - testutfnext \x80\x80\x80 } 3 -test utf-6.93.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext fullutf knownBug} { +test utf-6.93 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} testutfnext { testutfnext \x80\x80\x80 } 3 -test utf-6.94.0 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext ucs2} { - testutfnext \xA0\xA0\xA0\xA0 -} 1 -test utf-6.94.1 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext fullutf knownBug} { +test utf-6.94 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} testutfnext { testutfnext \xA0\xA0\xA0\xA0 } 3 -test utf-6.95 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext ucs2} { +test utf-6.95 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} testutfnext { testutfnext \x80\x80\x80\x80 } 3 test utf-6.96 {Tcl_UtfNext, read limits} testutfnext { @@ -619,18 +610,18 @@ test utf-6.121 {Tcl_UtfNext, read limits} {testutfnext ucs2} { test utf-6.122 {Tcl_UtfNext, read limits} {testutfnext ucs2} { testutfnext \xA0\xA0\xA0 2 } 0 -test utf-6.123 {Tcl_UtfNext, read limits} {testutfnext ucs2} { +test utf-6.123 {Tcl_UtfNext, read limits} testutfnext { testutfnext \xA0\xA0\xA0G 3 -} 1 -test utf-6.124 {Tcl_UtfNext, read limits} {testutfnext ucs2} { +} 3 +test utf-6.124 {Tcl_UtfNext, read limits} testutfnext { testutfnext \xA0\xA0\xA0\xA0 3 -} 1 -test utf-6.125 {Tcl_UtfNext, read limits} {testutfnext ucs2} { +} 3 +test utf-6.125 {Tcl_UtfNext, read limits} testutfnext { testutfnext \xA0\xA0\xA0\xA0G 4 -} 1 -test utf-6.126 {Tcl_UtfNext, read limits} {testutfnext ucs2} { +} 3 +test utf-6.126 {Tcl_UtfNext, read limits} testutfnext { testutfnext \xA0\xA0\xA0\xA0\xA0 4 -} 1 +} 3 test utf-7.1 {Tcl_UtfPrev} testutfprev { testutfprev {} -- cgit v0.12 From 9ae5f06a2b8f68f611a804c7a2d7ee2e1f9ab759 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 5 May 2020 07:44:36 +0000 Subject: One more tip389 selector --- tests/string.test | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/string.test b/tests/string.test index e4c39d2..124bda7 100644 --- a/tests/string.test +++ b/tests/string.test @@ -24,7 +24,7 @@ catch [list package require -exact Tcltest [info patchlevel]] testConstraint testobj [expr {[info commands testobj] != {}}] testConstraint testindexobj [expr {[info commands testindexobj] != {}}] -testConstraint tip389 [expr {[string length \U010000] == 2}] +testConstraint utf16 [expr {[string length \U010000] == 2}] testConstraint testbytestring [llength [info commands testbytestring]] # Used for constraining memory leak tests @@ -1299,7 +1299,7 @@ test string-12.22 {string range, shimmering binary/index} { binary scan $s a* x string range $s $s end } 000000001 -test string-12.23 {string range, surrogates, bug [11ae2be95dac9417]} tip389 { +test string-12.23 {string range, surrogates, bug [11ae2be95dac9417]} utf16 { list [string range a\U100000b 1 1] [string range a\U100000b 2 2] [string range a\U100000b 3 3] } [list \U100000 {} b] -- cgit v0.12 From a2f768dce66fc3f48b9a095fdc121fc9cc20b23a Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 5 May 2020 11:44:32 +0000 Subject: Fix Tcl_UtfPrev() such that it can never go back more than TCL_UTF_MAX bytes. Already done correctly on core-8-6-branch, but this was never forwarded to core-8-branch. --- generic/tclUtf.c | 10 +++++++--- tests/utf.test | 26 +++++++++++++------------- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 782657a..7b6ec63 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -815,13 +815,13 @@ Tcl_NumUtfChars( } if (i < 0) i = INT_MAX; /* Bug [2738427] */ } else { - const char *endPtr = src + length - 4; + const char *endPtr = src + length - TCL_UTF_MAX; while (src < endPtr) { src += TclUtfToUniChar(src, &ch); i++; } - endPtr += 4; + endPtr += TCL_UTF_MAX; while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) { src += TclUtfToUniChar(src, &ch); i++; @@ -1065,7 +1065,7 @@ Tcl_UtfPrev( /* Continue the search backwards... */ look--; - } while (trailBytesSeen < 4); + } while (trailBytesSeen < TCL_UTF_MAX); /* * We've seen TCL_UTF_MAX trail bytes, so we know there will not be a @@ -1073,7 +1073,11 @@ Tcl_UtfPrev( * accepting the fallback (for TCL_UTF_MAX > 3) or just go back as * far as we can. */ +#if TCL_UTF_MAX > 3 return fallback; +#else + return src - TCL_UTF_MAX; +#endif } /* diff --git a/tests/utf.test b/tests/utf.test index 7079bbf..ae50b0e 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -661,49 +661,49 @@ test utf-7.17.2 {Tcl_UtfPrev} {testutfprev testbytestring} { } 3 test utf-7.18.0 {Tcl_UtfPrev} {testutfprev testbytestring utf16} { testutfprev A[testbytestring \xA0\xA0\xA0] -} 3 +} 1 test utf-7.18.1 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xA0\xA0\xA0] } 3 test utf-7.18.2 {Tcl_UtfPrev} {testutfprev testbytestring utf16} { testutfprev A[testbytestring \xA0\xA0\xA0\xA0] 4 -} 3 +} 1 test utf-7.18.3 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xA0\xA0\xA0\xA0] 4 } 3 test utf-7.18.4 {Tcl_UtfPrev} {testutfprev testbytestring utf16} { testutfprev A[testbytestring \xA0\xA0\xA0\xF8] 4 -} 3 +} 1 test utf-7.18.5 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xA0\xA0\xA0\xF8] 4 } 3 test utf-7.19.0 {Tcl_UtfPrev} {testutfprev testbytestring utf16} { testutfprev A[testbytestring \xF8\xA0\xA0\xA0] -} 4 +} 2 test utf-7.19.1 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xF8\xA0\xA0\xA0] } 4 test utf-7.20.0 {Tcl_UtfPrev} {testutfprev testbytestring utf16} { testutfprev A[testbytestring \xF2\xA0\xA0\xA0] -} 1 +} 2 test utf-7.20.1 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xF2\xA0\xA0\xA0] } 1 test utf-7.21.0 {Tcl_UtfPrev} {testutfprev testbytestring utf16} { testutfprev A\u8820[testbytestring \xA0] -} 4 +} 2 test utf-7.21.1 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { testutfprev A\u8820[testbytestring \xA0] } 4 test utf-7.22.0 {Tcl_UtfPrev} {testutfprev testbytestring utf16} { testutfprev A[testbytestring \xD0\xA0\xA0\xA0] -} 4 +} 2 test utf-7.22.1 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xD0\xA0\xA0\xA0] } 4 test utf-7.23.0 {Tcl_UtfPrev} {testutfprev testbytestring utf16} { testutfprev A[testbytestring \xA0\xA0\xA0\xA0] -} 4 +} 2 test utf-7.23.1 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xA0\xA0\xA0\xA0] } 4 @@ -730,7 +730,7 @@ test utf-7.28.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} } 1 test utf-7.29.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring utf16} { testutfprev A[testbytestring \xF0\x80\x80\x80] -} 4 +} 2 test utf-7.29.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xF0\x80\x80\x80] } 4 @@ -763,7 +763,7 @@ test utf-7.38 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { } 1 test utf-7.39.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring utf16} { testutfprev A[testbytestring \xF0\x90\x80\x80] -} 1 +} 2 test utf-7.39.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xF0\x90\x80\x80] } 1 @@ -793,7 +793,7 @@ test utf-7.45 {Tcl_UtfPrev -- no lead byte at start} {testutfprev testbytestrin } 2 test utf-7.46.0 {Tcl_UtfPrev -- no lead byte at start} {testutfprev testbytestring utf16} { testutfprev [testbytestring \xA0\xA0\xA0\xA0] -} 3 +} 1 test utf-7.46.1 {Tcl_UtfPrev -- no lead byte at start} {testutfprev testbytestring ucs4} { testutfprev [testbytestring \xA0\xA0\xA0\xA0] } 3 @@ -808,7 +808,7 @@ test utf-7.47.2 {Tcl_UtfPrev, pointing to 3th byte of 3-byte invalid sequence} { } 0 test utf-7.48.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring utf16} { testutfprev A\uDBFF\uDFFF -} 1 +} 2 test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs4} { testutfprev A\U10FFFF } 1 @@ -829,7 +829,7 @@ test utf-7.48.6 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbyte } 1 test utf-7.49.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring utf16} { testutfprev A[testbytestring \xF4\x90\x80\x80] -} 4 +} 2 test utf-7.49.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xF4\x90\x80\x80] } 4 -- cgit v0.12 From cc86f6e11e85d0a01675ec31e3677ad2e63cddc4 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 5 May 2020 13:23:17 +0000 Subject: Add 4 test-cases that could fool Tcl_UtfPrev (but ... actually they don't). Make sure that Tcl_UtfPrev() never reads more than 3 trail bytes (or 4 when TCL_UTF_MAX > 4). Those are the same limits as for Tcl_UtfNext() and Tcl_UtfToUniChar() --- generic/tclUtf.c | 2 +- tests/utf.test | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index ac87978..2439a54 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -855,7 +855,7 @@ Tcl_UtfPrev( /* Continue the search backwards... */ look--; - } while (trailBytesSeen < TCL_UTF_MAX); + } while (trailBytesSeen < ((TCL_UTF_MAX > 3) ? 4 : 3)); /* * We've seen TCL_UTF_MAX trail bytes, so we know there will not be a diff --git a/tests/utf.test b/tests/utf.test index e8fa603..ffe7896 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -968,6 +968,18 @@ test utf-7.49.5 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} test utf-7.49.6 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { testutfprev A\xF4\x90\x80\x80 2 } 1 +test utf-7.50.0 {Tcl_UtfPrev, 4-byte valid sequence with additional trail} {testutfprev ucs2} { + testutfprev \xF2\xA0\xA0\xA0\xA0 +} 2 +test utf-7.50.1 {Tcl_UtfPrev, 4-byte valid sequence with additional trail} {testutfprev fullutf} { + testutfprev \xF2\xA0\xA0\xA0\xA0 +} 4 +test utf-7.51.0 {Tcl_UtfPrev, 4-byte valid sequence with additional trail} {testutfprev ucs2} { + testutfprev \xF2\x80\x80\x80\x80 +} 2 +test utf-7.51.1 {Tcl_UtfPrev, 4-byte valid sequence with additional trail} {testutfprev fullutf} { + testutfprev \xF2\x80\x80\x80\x80 +} 4 test utf-8.1 {Tcl_UniCharAtIndex: index = 0} { string index abcd 0 -- cgit v0.12 From c6d9c4cee08c88cac3ba885b637147fe8808c62a Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 5 May 2020 15:56:14 +0000 Subject: Remove PROTO_FLAGS support from the Makefile, since it isn't used anywhere any more. --- unix/Makefile.in | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/unix/Makefile.in b/unix/Makefile.in index df585f5..2f1bb70 100644 --- a/unix/Makefile.in +++ b/unix/Makefile.in @@ -114,11 +114,6 @@ LDFLAGS_DEBUG = @LDFLAGS_DEBUG@ LDFLAGS_OPTIMIZE = @LDFLAGS_OPTIMIZE@ LDFLAGS = @LDFLAGS_DEFAULT@ @LDFLAGS@ -# To disable ANSI-C procedure prototypes reverse the comment characters on the -# following lines: -PROTO_FLAGS = -#PROTO_FLAGS = -DNO_PROTOTYPE - # If you use the setenv, putenv, or unsetenv procedures to modify environment # variables in your application and you'd like those modifications to appear # in the "env" Tcl variable, switch the comments on the two lines below so @@ -278,7 +273,7 @@ VALGRINDARGS = --tool=memcheck --num-callers=24 \ STUB_CC_SWITCHES = ${CFLAGS} ${CFLAGS_WARNING} ${SHLIB_CFLAGS} \ -I"${BUILD_DIR}" -I${UNIX_DIR} -I${GENERIC_DIR} -I${TOMMATH_DIR} \ -${AC_FLAGS} ${PROTO_FLAGS} ${ENV_FLAGS} ${EXTRA_CFLAGS} @EXTRA_CC_SWITCHES@ +${AC_FLAGS} ${ENV_FLAGS} ${EXTRA_CFLAGS} @EXTRA_CC_SWITCHES@ CC_SWITCHES = $(STUB_CC_SWITCHES) ${NO_DEPRECATED_FLAGS} -DMP_FIXED_CUTOFFS -DMP_NO_STDINT @@ -287,7 +282,7 @@ APP_CC_SWITCHES = $(CC_SWITCHES) @EXTRA_APP_CC_SWITCHES@ LIBS = @TCL_LIBS@ DEPEND_SWITCHES = ${CFLAGS} -I${UNIX_DIR} -I${GENERIC_DIR} \ -${AC_FLAGS} ${PROTO_FLAGS} ${EXTRA_CFLAGS} @EXTRA_CC_SWITCHES@ +${AC_FLAGS} ${EXTRA_CFLAGS} @EXTRA_CC_SWITCHES@ TCLSH_OBJS = tclAppInit.o -- cgit v0.12 From 96f7e93f0b671b1d35f78a5e3058f9a83e2caedc Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 5 May 2020 16:00:06 +0000 Subject: More usage of TclUtfToUCS4(), so we can use the whole Unicode range better in TCL_UTF_MAX>3 builds. --- generic/tclCmdIL.c | 10 +++++----- generic/tclInt.h | 1 + generic/tclUtf.c | 17 ++++++++--------- generic/tclUtil.c | 44 ++++++++++++++++++++++---------------------- 4 files changed, 36 insertions(+), 36 deletions(-) diff --git a/generic/tclCmdIL.c b/generic/tclCmdIL.c index 7e685bd..3ec1c09 100644 --- a/generic/tclCmdIL.c +++ b/generic/tclCmdIL.c @@ -4370,7 +4370,7 @@ static int DictionaryCompare( const char *left, const char *right) /* The strings to compare. */ { - Tcl_UniChar uniLeft = 0, uniRight = 0, uniLeftLower, uniRightLower; + int uniLeft = 0, uniRight = 0, uniLeftLower, uniRightLower; int diff, zeros; int secondaryDiff = 0; @@ -4439,8 +4439,8 @@ DictionaryCompare( */ if ((*left != '\0') && (*right != '\0')) { - left += TclUtfToUniChar(left, &uniLeft); - right += TclUtfToUniChar(right, &uniRight); + left += TclUtfToUCS4(left, &uniLeft); + right += TclUtfToUCS4(right, &uniRight); /* * Convert both chars to lower for the comparison, because @@ -4449,8 +4449,8 @@ DictionaryCompare( * other interesting punctuations occur). */ - uniLeftLower = Tcl_UniCharToLower(uniLeft); - uniRightLower = Tcl_UniCharToLower(uniRight); + uniLeftLower = TclUCS4ToLower(uniLeft); + uniRightLower = TclUCS4ToLower(uniRight); } else { diff = UCHAR(*left) - UCHAR(*right); break; diff --git a/generic/tclInt.h b/generic/tclInt.h index 8983659..7fc06c8 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3185,6 +3185,7 @@ MODULE_SCOPE int TclTrimRight(const char *bytes, int numBytes, MODULE_SCOPE int TclUtfCasecmp(const char *cs, const char *ct); MODULE_SCOPE int TclUtfToUCS4(const char *, int *); MODULE_SCOPE int TclUCS4ToUtf(int, char *); +MODULE_SCOPE int TclUCS4ToLower(int ch); #if TCL_UTF_MAX == 4 MODULE_SCOPE int TclGetUCS4(Tcl_Obj *, int); #else diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 5e9b7a1..9792071 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -88,7 +88,6 @@ static const unsigned char totalBytes[256] = { static int UtfCount(int ch); static int Invalid(unsigned char *src); static int UCS4ToUpper(int ch); -static int UCS4ToLower(int ch); static int UCS4ToTitle(int ch); /* @@ -1078,7 +1077,7 @@ Tcl_UtfToLower( src = dst = str; while (*src) { len = TclUtfToUCS4(src, &ch); - lowChar = UCS4ToLower(ch); + lowChar = TclUCS4ToLower(ch); /* * To keep badly formed Utf strings from getting inflated by the @@ -1149,7 +1148,7 @@ Tcl_UtfToTitle( lowChar = ch; /* Special exception for Georgian Asomtavruli chars, no titlecase. */ if ((unsigned)(lowChar - 0x1C90) >= 0x30) { - lowChar = UCS4ToLower(lowChar); + lowChar = TclUCS4ToLower(lowChar); } if (len < UtfCount(lowChar) || ((lowChar & ~0x7FF) == 0xD800)) { @@ -1254,11 +1253,11 @@ Tcl_UtfNcmp( if (ch1 != ch2) { #if TCL_UTF_MAX == 4 /* Surrogates always report higher than non-surrogates */ - if (((ch1 & 0xFC00) == 0xD800)) { - if ((ch2 & 0xFC00) != 0xD800) { + if (((ch1 & ~0x3FF) == 0xD800)) { + if ((ch2 & ~0x3FF) != 0xD800) { return ch1; } - } else if ((ch2 & 0xFC00) == 0xD800) { + } else if ((ch2 & ~0x3FF) == 0xD800) { return -ch2; } #endif @@ -1427,8 +1426,8 @@ Tcl_UniCharToUpper( *---------------------------------------------------------------------- */ -static int -UCS4ToLower( +int +TclUCS4ToLower( int ch) /* Unicode character to convert. */ { if (!UNICODE_OUT_OF_RANGE(ch)) { @@ -1447,7 +1446,7 @@ Tcl_UniChar Tcl_UniCharToLower( int ch) /* Unicode character to convert. */ { - return (Tcl_UniChar) UCS4ToLower(ch); + return (Tcl_UniChar) TclUCS4ToLower(ch); } /* diff --git a/generic/tclUtil.c b/generic/tclUtil.c index 13b0d55..b4a07bb 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -2155,7 +2155,7 @@ Tcl_StringCaseMatch( int nocase) /* 0 for case sensitive, 1 for insensitive */ { int p, charLen; - Tcl_UniChar ch1 = 0, ch2 = 0; + int ch1 = 0, ch2 = 0; while (1) { p = *pattern; @@ -2196,12 +2196,12 @@ Tcl_StringCaseMatch( */ if (UCHAR(*pattern) < 0x80) { - ch2 = (Tcl_UniChar) + ch2 = (int) (nocase ? tolower(UCHAR(*pattern)) : UCHAR(*pattern)); } else { - Tcl_UtfToUniChar(pattern, &ch2); + TclUtfToUCS4(pattern, &ch2); if (nocase) { - ch2 = Tcl_UniCharToLower(ch2); + ch2 = TclUCS4ToLower(ch2); } } @@ -2215,8 +2215,8 @@ Tcl_StringCaseMatch( if ((p != '[') && (p != '?') && (p != '\\')) { if (nocase) { while (*str) { - charLen = TclUtfToUniChar(str, &ch1); - if (ch2==ch1 || ch2==Tcl_UniCharToLower(ch1)) { + charLen = TclUtfToUCS4(str, &ch1); + if (ch2==ch1 || ch2==TclUCS4ToLower(ch1)) { break; } str += charLen; @@ -2229,7 +2229,7 @@ Tcl_StringCaseMatch( */ while (*str) { - charLen = TclUtfToUniChar(str, &ch1); + charLen = TclUtfToUCS4(str, &ch1); if (ch2 == ch1) { break; } @@ -2243,7 +2243,7 @@ Tcl_StringCaseMatch( if (*str == '\0') { return 0; } - str += TclUtfToUniChar(str, &ch1); + str += TclUtfToUCS4(str, &ch1); } } @@ -2254,7 +2254,7 @@ Tcl_StringCaseMatch( if (p == '?') { pattern++; - str += TclUtfToUniChar(str, &ch1); + str += TclUtfToUCS4(str, &ch1); continue; } @@ -2265,17 +2265,17 @@ Tcl_StringCaseMatch( */ if (p == '[') { - Tcl_UniChar startChar = 0, endChar = 0; + int startChar = 0, endChar = 0; pattern++; if (UCHAR(*str) < 0x80) { - ch1 = (Tcl_UniChar) + ch1 = (int) (nocase ? tolower(UCHAR(*str)) : UCHAR(*str)); str++; } else { - str += Tcl_UtfToUniChar(str, &ch1); + str += TclUtfToUCS4(str, &ch1); if (nocase) { - ch1 = Tcl_UniCharToLower(ch1); + ch1 = TclUCS4ToLower(ch1); } } while (1) { @@ -2283,13 +2283,13 @@ Tcl_StringCaseMatch( return 0; } if (UCHAR(*pattern) < 0x80) { - startChar = (Tcl_UniChar) (nocase + startChar = (int) (nocase ? tolower(UCHAR(*pattern)) : UCHAR(*pattern)); pattern++; } else { - pattern += Tcl_UtfToUniChar(pattern, &startChar); + pattern += TclUtfToUCS4(pattern, &startChar); if (nocase) { - startChar = Tcl_UniCharToLower(startChar); + startChar = TclUCS4ToLower(startChar); } } if (*pattern == '-') { @@ -2298,13 +2298,13 @@ Tcl_StringCaseMatch( return 0; } if (UCHAR(*pattern) < 0x80) { - endChar = (Tcl_UniChar) (nocase + endChar = (int) (nocase ? tolower(UCHAR(*pattern)) : UCHAR(*pattern)); pattern++; } else { - pattern += Tcl_UtfToUniChar(pattern, &endChar); + pattern += TclUtfToUCS4(pattern, &endChar); if (nocase) { - endChar = Tcl_UniCharToLower(endChar); + endChar = TclUCS4ToLower(endChar); } } if (((startChar <= ch1) && (ch1 <= endChar)) @@ -2350,10 +2350,10 @@ Tcl_StringCaseMatch( * each string match. */ - str += TclUtfToUniChar(str, &ch1); - pattern += TclUtfToUniChar(pattern, &ch2); + str += TclUtfToUCS4(str, &ch1); + pattern += TclUtfToUCS4(pattern, &ch2); if (nocase) { - if (Tcl_UniCharToLower(ch1) != Tcl_UniCharToLower(ch2)) { + if (TclUCS4ToLower(ch1) != TclUCS4ToLower(ch2)) { return 0; } } else if (ch1 != ch2) { -- cgit v0.12 From dfc1625fc35649fafff8a6619598351d21ef8bb9 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 5 May 2020 16:23:56 +0000 Subject: Remove PROTO_FLAGS here too --- unix/Makefile.in | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/unix/Makefile.in b/unix/Makefile.in index 6654f25..08219ca 100644 --- a/unix/Makefile.in +++ b/unix/Makefile.in @@ -114,11 +114,6 @@ LDFLAGS_DEBUG = @LDFLAGS_DEBUG@ LDFLAGS_OPTIMIZE = @LDFLAGS_OPTIMIZE@ LDFLAGS = @LDFLAGS_DEFAULT@ @LDFLAGS@ -# To disable ANSI-C procedure prototypes reverse the comment characters on the -# following lines: -PROTO_FLAGS = -#PROTO_FLAGS = -DNO_PROTOTYPE - # If you use the setenv, putenv, or unsetenv procedures to modify environment # variables in your application and you'd like those modifications to appear # in the "env" Tcl variable, switch the comments on the two lines below so @@ -282,7 +277,7 @@ VALGRINDARGS = --tool=memcheck --num-callers=24 \ STUB_CC_SWITCHES = -I"${BUILD_DIR}" -I${UNIX_DIR} -I${GENERIC_DIR} -I${TOMMATH_DIR} \ ${CFLAGS} ${CFLAGS_WARNING} ${SHLIB_CFLAGS} \ - ${AC_FLAGS} ${PROTO_FLAGS} ${ENV_FLAGS} ${EXTRA_CFLAGS} \ + ${AC_FLAGS} ${ENV_FLAGS} ${EXTRA_CFLAGS} \ @EXTRA_CC_SWITCHES@ CC_SWITCHES = $(STUB_CC_SWITCHES) ${NO_DEPRECATED_FLAGS} -DMP_FIXED_CUTOFFS -DMP_NO_STDINT @@ -292,7 +287,7 @@ APP_CC_SWITCHES = $(CC_SWITCHES) @EXTRA_APP_CC_SWITCHES@ LIBS = @TCL_LIBS@ DEPEND_SWITCHES = ${CFLAGS} -I${UNIX_DIR} -I${GENERIC_DIR} \ - ${AC_FLAGS} ${PROTO_FLAGS} ${EXTRA_CFLAGS} @EXTRA_CC_SWITCHES@ + ${AC_FLAGS} ${EXTRA_CFLAGS} @EXTRA_CC_SWITCHES@ TCLSH_OBJS = tclAppInit.o -- cgit v0.12 From 74330f28739f9f3c3020e7245ccf6710251534ae Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 7 May 2020 11:17:26 +0000 Subject: Fix [fad64a857e76f98e]: "lsearch" provides wrong errorCode with bad -stride option --- generic/tclCmdIL.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/tclCmdIL.c b/generic/tclCmdIL.c index 3241398..7938088 100644 --- a/generic/tclCmdIL.c +++ b/generic/tclCmdIL.c @@ -3305,7 +3305,7 @@ Tcl_LsearchObjCmd( if (groupSize < 1) { Tcl_SetObjResult(interp, Tcl_NewStringObj( "stride length must be at least 1", -1)); - Tcl_SetErrorCode(interp, "TCL", "OPERATION", "LSORT", + Tcl_SetErrorCode(interp, "TCL", "OPERATION", "LSEARCH", "BADSTRIDE", NULL); result = TCL_ERROR; goto done; -- cgit v0.12 From 13faf8b1d4ae63413f8d1c301c422aeab3eb977b Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 7 May 2020 14:15:06 +0000 Subject: For TCL_UTF_MAX==4: Make sure that Tcl_UtfNext()/Tcl_UtfPrev() never move more than 3 bytes. This is more consistant with what Tcl 8.7 does too. For TCL_UTF_MAX==6: Make sure that Tcl_UtfNext()/Tcl_UtfPrev() never move more than 4 bytes. For TCL_UTF_MAX==3: No change. Introduce ucs2_utf16 test constraint, since many test results now become the same for ucs2 and utf16. --- generic/tclUtf.c | 12 ++--- tests/utf.test | 161 ++++++++++++++++++++++++++++--------------------------- 2 files changed, 87 insertions(+), 86 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 1d5cff5..46a18e4 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -73,7 +73,7 @@ static const unsigned char totalBytes[256] = { #endif 2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, -#if TCL_UTF_MAX > 3 +#if TCL_UTF_MAX > 4 4,4,4,4,4, #else 1,1,1,1,1, @@ -887,18 +887,18 @@ Tcl_UtfPrev( /* Continue the search backwards... */ look--; - } while (trailBytesSeen < TCL_UTF_MAX); + } while (trailBytesSeen < ((TCL_UTF_MAX > 4) ? 4 : 3)); /* - * We've seen TCL_UTF_MAX trail bytes, so we know there will not be a + * We've seen 3 (or 4) trail bytes, so we know there will not be a * properly formed byte sequence to find, and we can stop looking, - * accepting the fallback (for TCL_UTF_MAX > 3) or just go back as + * accepting the fallback (for TCL_UTF_MAX > 4) or just go back as * far as we can. */ -#if TCL_UTF_MAX > 3 +#if TCL_UTF_MAX > 4 return fallback; #else - return src - TCL_UTF_MAX; + return src - 3; #endif } diff --git a/tests/utf.test b/tests/utf.test index ad69c50..0acbb76 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -21,6 +21,7 @@ testConstraint fullutf [expr {[format %c 0x010000] ne "\uFFFD"}] testConstraint utf16 [expr {[string length [format %c 0x10000]] == 2}] testConstraint ucs4 [expr {[testConstraint fullutf] && [string length [format %c 0x10000]] == 1}] +testConstraint ucs2_utf16 [expr {![testConstraint ucs4]}] testConstraint Uesc [expr {"\U0041" eq "A"}] testConstraint pre388 [expr {"\x741" eq "A"}] @@ -392,10 +393,10 @@ test utf-6.67 {Tcl_UtfNext} {testutfnext testbytestring} { test utf-6.68 {Tcl_UtfNext} {testutfnext testbytestring} { testutfnext [testbytestring \xF2\xA0\xA0]G } 1 -test utf-6.69.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} { +test utf-6.69.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} { testutfnext [testbytestring \xF2\xA0\xA0\xA0] } 1 -test utf-6.69.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} { +test utf-6.69.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} { testutfnext [testbytestring \xF2\xA0\xA0\xA0] } 4 test utf-6.70 {Tcl_UtfNext} {testutfnext testbytestring} { @@ -410,40 +411,40 @@ test utf-6.72 {Tcl_UtfNext} {testutfnext testbytestring} { test utf-6.73 {Tcl_UtfNext} {testutfnext testbytestring} { testutfnext [testbytestring \xF2\xA0\xA0\xF8] } 1 -test utf-6.74.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} { +test utf-6.74.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} { testutfnext [testbytestring \xF2\xA0\xA0\xA0]G } 1 -test utf-6.74.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} { +test utf-6.74.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} { testutfnext [testbytestring \xF2\xA0\xA0\xA0]G } 4 -test utf-6.75.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} { +test utf-6.75.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} { testutfnext [testbytestring \xF2\xA0\xA0\xA0\xA0] } 1 -test utf-6.75.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} { +test utf-6.75.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} { testutfnext [testbytestring \xF2\xA0\xA0\xA0\xA0] } 4 -test utf-6.76.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} { +test utf-6.76.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} { testutfnext [testbytestring \xF2\xA0\xA0\xA0\xD0] } 1 -test utf-6.76.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} { +test utf-6.76.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} { testutfnext [testbytestring \xF2\xA0\xA0\xA0\xD0] } 4 -test utf-6.77.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} { +test utf-6.77.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} { testutfnext [testbytestring \xF2\xA0\xA0\xA0\xE8] } 1 -test utf-6.77.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} { +test utf-6.77.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} { testutfnext [testbytestring \xF2\xA0\xA0\xA0\xE8] } 4 -test utf-6.78.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} { +test utf-6.78.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} { testutfnext [testbytestring \xF2\xA0\xA0\xA0\xF2] } 1 -test utf-6.78.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} { +test utf-6.78.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} { testutfnext [testbytestring \xF2\xA0\xA0\xA0\xF2] } 4 -test utf-6.79.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} { +test utf-6.79.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} { testutfnext [testbytestring \xF2\xA0\xA0\xA0G\xF8] } 1 -test utf-6.79.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} { +test utf-6.79.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} { testutfnext [testbytestring \xF2\xA0\xA0\xA0G\xF8] } 4 test utf-6.80 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring} { @@ -467,10 +468,10 @@ test utf-6.85 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring} { test utf-6.86 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring} { testutfnext [testbytestring \xF0\x80\x80\x80] } 1 -test utf-6.87.0 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring ucs2} { +test utf-6.87.0 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring ucs2_utf16} { testutfnext [testbytestring \xF0\x90\x80\x80] } 1 -test utf-6.87.1 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring fullutf} { +test utf-6.87.1 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring ucs4} { testutfnext [testbytestring \xF0\x90\x80\x80] } 4 test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext testbytestring} { @@ -479,10 +480,10 @@ test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {test test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext testbytestring} { testutfnext [testbytestring \x80\x80\x00] } 1 -test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext ucs2} { +test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext ucs2_utf16} { testutfnext [testbytestring \xF4\x8F\xBF\xBF] } 1 -test utf-6.90.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext fullutf} { +test utf-6.90.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext ucs4} { testutfnext [testbytestring \xF4\x8F\xBF\xBF] } 4 test utf-6.91.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbytestring ucs2} { @@ -557,52 +558,52 @@ test utf-6.108 {Tcl_UtfNext, read limits} {testutfnext testbytestring} { test utf-6.109 {Tcl_UtfNext, read limits} {testutfnext testbytestring} { testutfnext [testbytestring \xE8\xA0\xA0\xA0] 3 } 3 -test utf-6.110.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2} { +test utf-6.110.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2_utf16} { testutfnext [testbytestring \xF2\xA0\xA0\xA0G] 1 } 1 -test utf-6.110.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring fullutf} { +test utf-6.110.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs4} { testutfnext [testbytestring \xF2\xA0\xA0\xA0G] 1 } 0 -test utf-6.111.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2} { +test utf-6.111.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2_utf16} { testutfnext [testbytestring \xF2\xA0\xA0\xA0G] 2 } 1 -test utf-6.111.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring fullutf} { +test utf-6.111.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs4} { testutfnext [testbytestring \xF2\xA0\xA0\xA0G] 2 } 0 -test utf-6.112.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2} { +test utf-6.112.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2_utf16} { testutfnext [testbytestring \xF2\xA0\xA0\xA0G] 3 } 1 -test utf-6.112.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring fullutf} { +test utf-6.112.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs4} { testutfnext [testbytestring \xF2\xA0\xA0\xA0G] 3 } 0 -test utf-6.113.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2} { +test utf-6.113.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2_utf16} { testutfnext [testbytestring \xF2\xA0\xA0\xA0G] 4 } 1 -test utf-6.113.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring fullutf} { +test utf-6.113.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs4} { testutfnext [testbytestring \xF2\xA0\xA0\xA0G] 4 } 4 -test utf-6.114.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2} { +test utf-6.114.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2_utf16} { testutfnext [testbytestring \xF2\xA0\xA0\xA0\xA0] 1 } 1 -test utf-6.114.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring fullutf} { +test utf-6.114.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs4} { testutfnext [testbytestring \xF2\xA0\xA0\xA0\xA0] 1 } 0 -test utf-6.115.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2} { +test utf-6.115.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2_utf16} { testutfnext [testbytestring \xF2\xA0\xA0\xA0\xA0] 2 } 1 -test utf-6.115.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring fullutf} { +test utf-6.115.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs4} { testutfnext [testbytestring \xF2\xA0\xA0\xA0\xA0] 2 } 0 -test utf-6.116.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2} { +test utf-6.116.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2_utf16} { testutfnext [testbytestring \xF2\xA0\xA0\xA0\xA0] 3 } 1 -test utf-6.116.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring fullutf} { +test utf-6.116.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs4} { testutfnext [testbytestring \xF2\xA0\xA0\xA0\xA0] 3 } 0 -test utf-6.117.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2} { +test utf-6.117.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2_utf16} { testutfnext [testbytestring \xF2\xA0\xA0\xA0\xA0] 4 } 1 -test utf-6.117.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring fullutf} { +test utf-6.117.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs4} { testutfnext [testbytestring \xF2\xA0\xA0\xA0\xA0] 4 } 4 test utf-6.118 {Tcl_UtfNext, read limits} {testutfnext testbytestring} { @@ -696,22 +697,22 @@ test utf-7.9.1 {Tcl_UtfPrev} {testutfprev testbytestring} { test utf-7.9.2 {Tcl_UtfPrev} {testutfprev testbytestring} { testutfprev A[testbytestring \xF8\xA0\xF8\xA0] 3 } 2 -test utf-7.10.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { +test utf-7.10.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2_utf16} { testutfprev A[testbytestring \xF2\xA0] } 2 -test utf-7.10.1 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} { +test utf-7.10.1 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xF2\xA0] } 1 -test utf-7.10.2 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { +test utf-7.10.2 {Tcl_UtfPrev} {testutfprev testbytestring ucs2_utf16} { testutfprev A[testbytestring \xF2\xA0\xA0\xA0] 3 } 2 -test utf-7.10.3 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} { +test utf-7.10.3 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xF2\xA0\xA0\xA0] 3 } 1 -test utf-7.10.4 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { +test utf-7.10.4 {Tcl_UtfPrev} {testutfprev testbytestring ucs2_utf16} { testutfprev A[testbytestring \xF2\xA0\xF8\xA0] 3 } 2 -test utf-7.10.5 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} { +test utf-7.10.5 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xF2\xA0\xF8\xA0] 3 } 1 test utf-7.11 {Tcl_UtfPrev} {testutfprev testbytestring} { @@ -753,22 +754,22 @@ test utf-7.14.1 {Tcl_UtfPrev} {testutfprev testbytestring} { test utf-7.14.2 {Tcl_UtfPrev} {testutfprev testbytestring} { testutfprev A[testbytestring \xF8\xA0\xA0\xF8] 4 } 3 -test utf-7.15.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { +test utf-7.15.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2_utf16} { testutfprev A[testbytestring \xF2\xA0\xA0] } 3 -test utf-7.15.1 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} { +test utf-7.15.1 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xF2\xA0\xA0] } 1 -test utf-7.15.1.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { +test utf-7.15.1.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2_utf16} { testutfprev A[testbytestring \xF2\xA0\xA0\xA0] 4 } 3 -test utf-7.15.1.1 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} { +test utf-7.15.1.1 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xF2\xA0\xA0\xA0] 4 } 1 -test utf-7.15.2.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { +test utf-7.15.2.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2_utf16} { testutfprev A[testbytestring \xF2\xA0\xA0\xF8] 4 } 3 -test utf-7.15.2.1 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} { +test utf-7.15.2.1 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xF2\xA0\xA0\xF8] 4 } 1 test utf-7.16 {Tcl_UtfPrev} testutfprev { @@ -789,52 +790,52 @@ test utf-7.17.1 {Tcl_UtfPrev} {testutfprev testbytestring} { test utf-7.17.2 {Tcl_UtfPrev} {testutfprev testbytestring} { testutfprev A[testbytestring \xD0\xA0\xA0\xF8] 4 } 3 -test utf-7.18.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { +test utf-7.18.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2_utf16} { testutfprev A[testbytestring \xA0\xA0\xA0] } 1 -test utf-7.18.1 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} { +test utf-7.18.1 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xA0\xA0\xA0] } 3 -test utf-7.18.2 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { +test utf-7.18.2 {Tcl_UtfPrev} {testutfprev testbytestring ucs2_utf16} { testutfprev A[testbytestring \xA0\xA0\xA0\xA0] 4 } 1 -test utf-7.18.3 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} { +test utf-7.18.3 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xA0\xA0\xA0\xA0] 4 } 3 -test utf-7.18.4 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { +test utf-7.18.4 {Tcl_UtfPrev} {testutfprev testbytestring ucs2_utf16} { testutfprev A[testbytestring \xA0\xA0\xA0\xF8] 4 } 1 -test utf-7.18.5 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} { +test utf-7.18.5 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xA0\xA0\xA0\xF8] 4 } 3 -test utf-7.19.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { +test utf-7.19.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2_utf16} { testutfprev A[testbytestring \xF8\xA0\xA0\xA0] } 2 -test utf-7.19.1 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} { +test utf-7.19.1 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xF8\xA0\xA0\xA0] } 4 -test utf-7.20.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { +test utf-7.20.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2_utf16} { testutfprev A[testbytestring \xF2\xA0\xA0\xA0] } 2 -test utf-7.20.1 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} { +test utf-7.20.1 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xF2\xA0\xA0\xA0] } 1 -test utf-7.21.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { +test utf-7.21.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2_utf16} { testutfprev A\u8820[testbytestring \xA0] } 2 -test utf-7.21.1 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} { +test utf-7.21.1 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { testutfprev A\u8820[testbytestring \xA0] } 4 -test utf-7.22.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { +test utf-7.22.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2_utf16} { testutfprev A[testbytestring \xD0\xA0\xA0\xA0] } 2 -test utf-7.22.1 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} { +test utf-7.22.1 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xD0\xA0\xA0\xA0] } 4 -test utf-7.23.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { +test utf-7.23.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2_utf16} { testutfprev A[testbytestring \xA0\xA0\xA0\xA0] } 2 -test utf-7.23.1 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} { +test utf-7.23.1 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xA0\xA0\xA0\xA0] } 4 test utf-7.24 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { @@ -858,10 +859,10 @@ test utf-7.28 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { test utf-7.28.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { testutfprev A[testbytestring \xE0\x80\x80] 2 } 1 -test utf-7.29.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs2} { +test utf-7.29.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs2_utf16} { testutfprev A[testbytestring \xF0\x80\x80\x80] } 2 -test utf-7.29.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring fullutf} { +test utf-7.29.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xF0\x80\x80\x80] } 4 test utf-7.30 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { @@ -891,22 +892,22 @@ test utf-7.37 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { test utf-7.38 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { testutfprev A[testbytestring \xE0\xA0\x80] 2 } 1 -test utf-7.39.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs2} { +test utf-7.39.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs2_utf16} { testutfprev A[testbytestring \xF0\x90\x80\x80] } 2 -test utf-7.39.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring fullutf} { +test utf-7.39.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xF0\x90\x80\x80] } 1 -test utf-7.40.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs2} { +test utf-7.40.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs2_utf16} { testutfprev A[testbytestring \xF0\x90\x80\x80] 4 } 3 -test utf-7.40.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring fullutf} { +test utf-7.40.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xF0\x90\x80\x80] 4 } 1 -test utf-7.41.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs2} { +test utf-7.41.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs2_utf16} { testutfprev A[testbytestring \xF0\x90\x80\x80] 3 } 2 -test utf-7.41.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring fullutf} { +test utf-7.41.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xF0\x90\x80\x80] 3 } 1 test utf-7.42 {Tcl_UtfPrev -- overlong sequence} testutfprev { @@ -921,10 +922,10 @@ test utf-7.44 {Tcl_UtfPrev -- no lead byte at start} testutfprev { test utf-7.45 {Tcl_UtfPrev -- no lead byte at start} testutfprev { testutfprev [testbytestring \xA0\xA0\xA0] } 2 -test utf-7.46.0 {Tcl_UtfPrev -- no lead byte at start} {testutfprev testbytestring ucs2} { +test utf-7.46.0 {Tcl_UtfPrev -- no lead byte at start} {testutfprev testbytestring ucs2_utf16} { testutfprev [testbytestring \xA0\xA0\xA0\xA0] } 1 -test utf-7.46.1 {Tcl_UtfPrev -- no lead byte at start} {testutfprev testbytestring fullutf} { +test utf-7.46.1 {Tcl_UtfPrev -- no lead byte at start} {testutfprev testbytestring ucs4} { testutfprev [testbytestring \xA0\xA0\xA0\xA0] } 3 test utf-7.47 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {testutfprev testbytestring} { @@ -936,31 +937,31 @@ test utf-7.47.1 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {te test utf-7.47.2 {Tcl_UtfPrev, pointing to 3th byte of 3-byte invalid sequence} {testutfprev testbytestring} { testutfprev [testbytestring \xE8\xA0\x00] 2 } 0 -test utf-7.48.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs2} { +test utf-7.48.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs2_utf16} { testutfprev A[testbytestring \xF4\x8F\xBF\xBF] } 2 -test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring fullutf} { +test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xF4\x8F\xBF\xBF] } 1 -test utf-7.48.2 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs2} { +test utf-7.48.2 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs2_utf16} { testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 4 } 3 -test utf-7.48.3 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring fullutf} { +test utf-7.48.3 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 4 } 1 -test utf-7.48.4 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs2} { +test utf-7.48.4 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs2_utf16} { testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 3 } 2 -test utf-7.48.5 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring fullutf} { +test utf-7.48.5 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 3 } 1 test utf-7.48.6 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} { testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 2 } 1 -test utf-7.49.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs2} { +test utf-7.49.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs2_utf16} { testutfprev A[testbytestring \xF4\x90\x80\x80] } 2 -test utf-7.49.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring fullutf} { +test utf-7.49.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xF4\x90\x80\x80] } 4 test utf-7.49.2 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} { -- cgit v0.12 From 8fc378853391cde228bf25c1491e9ba02ebf0f2c Mon Sep 17 00:00:00 2001 From: dgp Date: Thu, 7 May 2020 18:23:36 +0000 Subject: New approach to fixing the regression reported in [31aa44375d] builds on recent reforms. Older efforts aborted. --- generic/tclUtf.c | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 12eb637..8ae4b15 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -381,7 +381,7 @@ Tcl_UtfToUniChar( * characters representing themselves. */ -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX == 4 /* If *chPtr contains a high surrogate (produced by a previous * Tcl_UtfToUniChar() call) and the next 3 bytes are UTF-8 continuation * bytes, then we must produce a follow-up low surrogate. We only @@ -437,7 +437,7 @@ Tcl_UtfToUniChar( * Four-byte-character lead byte followed by at least two trail bytes. * We don't test the validity of 3th trail byte, see [ed29806ba] */ -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX == 4 Tcl_UniChar high = (((byte & 0x07) << 8) | ((src[1] & 0x3F) << 2) | ((src[2] & 0x3F) >> 4)) - 0x40; if (high < 0x400) { @@ -446,7 +446,7 @@ Tcl_UtfToUniChar( return 1; } /* out of range, < 0x10000 or > 0x10FFFF */ -#else +#elif TCL_UTF_MAX > 4 if ((src[3] & 0xC0) == 0x80) { *chPtr = (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12) | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F)); @@ -617,25 +617,11 @@ Tcl_NumUtfChars( */ while (src <= optPtr /* && Tcl_UtfCharComplete(src, endPtr - src) */ ) { -#if TCL_UTF_MAX < 4 - if (((unsigned)UCHAR(*src) - 0xF0) < 5) { - /* treat F0 - F4 as single character */ - ch = 0; - src++; - } else -#endif src += TclUtfToUniChar(src, &ch); i++; } /* Loop over the remaining string where call must happen */ while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) { -#if TCL_UTF_MAX < 4 - if (((unsigned)UCHAR(*src) - 0xF0) < 5) { - /* treat F0 - F4 as single character */ - ch = 0; - src++; - } else -#endif src += TclUtfToUniChar(src, &ch); i++; } -- cgit v0.12 From 6d36267ee03ebbf37b1843d0602220bbc299f8e9 Mon Sep 17 00:00:00 2001 From: dgp Date: Thu, 7 May 2020 18:36:25 +0000 Subject: split and constrain the failing test. --- tests/encoding.test | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/encoding.test b/tests/encoding.test index 552c97f..84f9ae1 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -335,7 +335,12 @@ test encoding-15.4 {UtfToUtfProc emoji character input} -body { set y [encoding convertfrom utf-8 \xED\xA0\xBD\xED\xB8\x82] list [string length $x] $y } -result "6 \uD83D\uDE02" -test encoding-15.5 {UtfToUtfProc emoji character input} { +test encoding-15.5.0 {UtfToUtfProc emoji character input} ucs2 { + set x \xF0\x9F\x98\x82 + set y [encoding convertfrom utf-8 \xF0\x9F\x98\x82] + list [string length $x] $y +} "4 \xF0\x9F\x98\x82" +test encoding-15.5.1 {UtfToUtfProc emoji character input} fullutf { set x \xF0\x9F\x98\x82 set y [encoding convertfrom utf-8 \xF0\x9F\x98\x82] list [string length $x] $y -- cgit v0.12 From 2f5e983d7aaefadd8f9909a27c26d2d62d049f63 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 8 May 2020 08:09:42 +0000 Subject: More testcases that can run without "testbytestring" constraints. --- tests/utf.test | 72 +++++++++++++++++++++++++++++----------------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/tests/utf.test b/tests/utf.test index a9534f2..394fc48 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -483,10 +483,10 @@ test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {test test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext testbytestring} { testutfnext [testbytestring \x80\x80\x00] } 1 -test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext ucs2_utf16} { +test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbytestring ucs2_utf16} { testutfnext [testbytestring \xF4\x8F\xBF\xBF] } 1 -test utf-6.90.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext ucs4} { +test utf-6.90.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbytestring ucs4} { testutfnext [testbytestring \xF4\x8F\xBF\xBF] } 4 test utf-6.91.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbytestring ucs2} { @@ -519,23 +519,23 @@ test utf-6.94 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {te test utf-6.95 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring ucs2} { testutfnext [testbytestring \x80\x80\x80\x80] } 1 -test utf-6.96 {Tcl_UtfNext, read limits} {testutfnext testbytestring} { - testutfnext [testbytestring G] 0 +test utf-6.96 {Tcl_UtfNext, read limits} testutfnext { + testutfnext G 0 } 0 test utf-6.97 {Tcl_UtfNext, read limits} {testutfnext testbytestring} { testutfnext [testbytestring \xA0] 0 } 0 -test utf-6.98 {Tcl_UtfNext, read limits} {testutfnext testbytestring} { - testutfnext A[testbytestring G] 1 +test utf-6.98 {Tcl_UtfNext, read limits} testutfnext { + testutfnext AG 1 } 1 test utf-6.99 {Tcl_UtfNext, read limits} {testutfnext testbytestring} { testutfnext A[testbytestring \xA0] 1 } 1 test utf-6.100 {Tcl_UtfNext, read limits} {testutfnext testbytestring} { - testutfnext [testbytestring \xD0\xA0G] 1 + testutfnext [testbytestring \xD0\xA0]G 1 } 0 test utf-6.101 {Tcl_UtfNext, read limits} {testutfnext testbytestring} { - testutfnext [testbytestring \xD0\xA0G] 2 + testutfnext [testbytestring \xD0\xA0]G 2 } 2 test utf-6.102 {Tcl_UtfNext, read limits} {testutfnext testbytestring} { testutfnext [testbytestring \xD0\xA0\xA0] 1 @@ -543,47 +543,47 @@ test utf-6.102 {Tcl_UtfNext, read limits} {testutfnext testbytestring} { test utf-6.103 {Tcl_UtfNext, read limits} {testutfnext testbytestring} { testutfnext [testbytestring \xD0\xA0\xA0] 2 } 2 -test utf-6.104 {Tcl_UtfNext, read limits} {testutfnext testbytestring} { - testutfnext [testbytestring \xE8\xA0\xA0G] 1 +test utf-6.104 {Tcl_UtfNext, read limits} testutfnext { + testutfnext \u8820G 1 } 0 -test utf-6.105 {Tcl_UtfNext, read limits} {testutfnext testbytestring} { - testutfnext [testbytestring \xE8\xA0\xA0G] 2 +test utf-6.105 {Tcl_UtfNext, read limits} testutfnext { + testutfnext \u8820G 2 } 0 -test utf-6.106 {Tcl_UtfNext, read limits} {testutfnext testbytestring} { - testutfnext [testbytestring \xE8\xA0\xA0G] 3 +test utf-6.106 {Tcl_UtfNext, read limits} testutfnext { + testutfnext \u8820G 3 } 3 test utf-6.107 {Tcl_UtfNext, read limits} {testutfnext testbytestring} { - testutfnext [testbytestring \xE8\xA0\xA0\xA0] 1 + testutfnext \u8820[testbytestring \xA0] 1 } 0 test utf-6.108 {Tcl_UtfNext, read limits} {testutfnext testbytestring} { - testutfnext [testbytestring \xE8\xA0\xA0\xA0] 2 + testutfnext \u8820[testbytestring \xA0] 2 } 0 test utf-6.109 {Tcl_UtfNext, read limits} {testutfnext testbytestring} { - testutfnext [testbytestring \xE8\xA0\xA0\xA0] 3 + testutfnext \u8820[testbytestring \xA0] 3 } 3 test utf-6.110.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2_utf16} { - testutfnext [testbytestring \xF2\xA0\xA0\xA0G] 1 + testutfnext [testbytestring \xF2\xA0\xA0\xA0]G 1 } 1 test utf-6.110.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs4} { - testutfnext [testbytestring \xF2\xA0\xA0\xA0G] 1 + testutfnext [testbytestring \xF2\xA0\xA0\xA0]G 1 } 0 test utf-6.111.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2_utf16} { - testutfnext [testbytestring \xF2\xA0\xA0\xA0G] 2 + testutfnext [testbytestring \xF2\xA0\xA0\xA0]G 2 } 1 test utf-6.111.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs4} { - testutfnext [testbytestring \xF2\xA0\xA0\xA0G] 2 + testutfnext [testbytestring \xF2\xA0\xA0\xA0]G 2 } 0 test utf-6.112.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2_utf16} { - testutfnext [testbytestring \xF2\xA0\xA0\xA0G] 3 + testutfnext [testbytestring \xF2\xA0\xA0\xA0]G 3 } 1 test utf-6.112.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs4} { - testutfnext [testbytestring \xF2\xA0\xA0\xA0G] 3 + testutfnext [testbytestring \xF2\xA0\xA0\xA0]G 3 } 0 test utf-6.113.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2_utf16} { - testutfnext [testbytestring \xF2\xA0\xA0\xA0G] 4 + testutfnext [testbytestring \xF2\xA0\xA0\xA0]G 4 } 1 test utf-6.113.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs4} { - testutfnext [testbytestring \xF2\xA0\xA0\xA0G] 4 + testutfnext [testbytestring \xF2\xA0\xA0\xA0]G 4 } 4 test utf-6.114.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2_utf16} { testutfnext [testbytestring \xF2\xA0\xA0\xA0\xA0] 1 @@ -610,28 +610,28 @@ test utf-6.117.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs4} { testutfnext [testbytestring \xF2\xA0\xA0\xA0\xA0] 4 } 4 test utf-6.118 {Tcl_UtfNext, read limits} {testutfnext testbytestring} { - testutfnext [testbytestring \xA0G] 0 + testutfnext [testbytestring \xA0]G 0 } 0 test utf-6.119 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2} { - testutfnext [testbytestring \xA0G] 1 + testutfnext [testbytestring \xA0]G 1 } 1 test utf-6.120 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2} { testutfnext [testbytestring \xA0\xA0] 1 } 1 test utf-6.121 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2} { - testutfnext [testbytestring \xA0\xA0G] 2 + testutfnext [testbytestring \xA0\xA0]G 2 } 1 test utf-6.122 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2} { testutfnext [testbytestring \xA0\xA0\xA0] 2 } 1 test utf-6.123 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2} { - testutfnext [testbytestring \xA0\xA0\xA0G] 3 + testutfnext [testbytestring \xA0\xA0\xA0]G 3 } 1 test utf-6.124 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2} { testutfnext [testbytestring \xA0\xA0\xA0\xA0] 3 } 1 test utf-6.125 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2} { - testutfnext [testbytestring \xA0\xA0\xA0\xA0G] 4 + testutfnext [testbytestring \xA0\xA0\xA0\xA0]G 4 } 1 test utf-6.126 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2} { testutfnext [testbytestring \xA0\xA0\xA0\xA0\xA0] 4 @@ -877,7 +877,7 @@ test utf-7.31 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { test utf-7.32 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { testutfprev A[testbytestring \xF0\x80\x80\x80] 2 } 1 -test utf-7.33 {Tcl_UtfPrev -- overlong sequence} {testutfprev} { +test utf-7.33 {Tcl_UtfPrev -- overlong sequence} testutfprev { testutfprev A\x00 } 1 test utf-7.34 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { @@ -913,16 +913,16 @@ test utf-7.41.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring test utf-7.41.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xF0\x90\x80\x80] 3 } 1 -test utf-7.42 {Tcl_UtfPrev -- overlong sequence} testutfprev { +test utf-7.42 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { testutfprev A[testbytestring \xF0\x90\x80\x80] 2 } 1 -test utf-7.43 {Tcl_UtfPrev -- no lead byte at start} testutfprev { +test utf-7.43 {Tcl_UtfPrev -- no lead byte at start} {testutfprev testbytestring} { testutfprev [testbytestring \xA0] } 0 -test utf-7.44 {Tcl_UtfPrev -- no lead byte at start} testutfprev { +test utf-7.44 {Tcl_UtfPrev -- no lead byte at start} {testutfprev testbytestring} { testutfprev [testbytestring \xA0\xA0] } 1 -test utf-7.45 {Tcl_UtfPrev -- no lead byte at start} testutfprev { +test utf-7.45 {Tcl_UtfPrev -- no lead byte at start} {testutfprev testbytestring} { testutfprev [testbytestring \xA0\xA0\xA0] } 2 test utf-7.46.0 {Tcl_UtfPrev -- no lead byte at start} {testutfprev testbytestring ucs2_utf16} { @@ -1479,7 +1479,7 @@ UniCharCaseCmpTest > \U10000 \uFFFF {Uesc ucs4} test utf-26.1 {Tcl_UniCharDString} -setup { testobj freeallvars -} -constraints {teststringobj} -cleanup { +} -constraints {teststringobj testbytestring} -cleanup { testobj freeallvars } -body { teststringobj set 1 foo -- cgit v0.12 From 0733f232745ac3cc9a3bd4913bd5ffb8b58378a5 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 10 May 2020 19:28:08 +0000 Subject: Tweak Invalid() function: No need for "return 0" twice in the function. For start bytes F0-F4, case TCL_UTF_MAX=4, Tcl_UtfToUniChar() reads 3 bytes but only advances 1 byte. So Tcl_UtfCharComplete() must make sure 3 bytes are available, not 1. Adapt Tcl_UtfCharComplete() accordingly. No change for TCL_UTF_MAX=[3|6] --- generic/tclUtf.c | 39 +++++++++++++++++++++++++++++++-------- tests/utf.test | 14 +++++++------- 2 files changed, 38 insertions(+), 15 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index c0de80a..5e0b2e0 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -81,6 +81,30 @@ static const unsigned char totalBytes[256] = { 1,1,1,1,1,1,1,1,1,1,1 }; +static const unsigned char complete[256] = { + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +#if TCL_UTF_MAX < 4 + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +#else /* Tcl_UtfCharComplete() might point to 2nd byte of valid 4-byte sequence */ + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +#endif + 2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +#if TCL_UTF_MAX > 4 + 4,4,4,4,4, +#elif TCL_UTF_MAX < 4 + 1,1,1,1,1, +#else + 3,3,3,3,3, +#endif + 1,1,1,1,1,1,1,1,1,1,1 +}; + /* * Functions used only in this module. */ @@ -174,14 +198,13 @@ Invalid( unsigned char byte = UCHAR(*src); int index; - if ((byte & 0xC3) != 0xC0) { + if ((byte & 0xC3) == 0xC0) { /* Only lead bytes 0xC0, 0xE0, 0xF0, 0xF4 need examination */ - return 0; - } - index = (byte - 0xC0) >> 1; - if (UCHAR(src[1]) < bounds[index] || UCHAR(src[1]) > bounds[index+1]) { - /* Out of bounds - report invalid. */ - return 1; + index = (byte - 0xC0) >> 1; + if (UCHAR(src[1]) < bounds[index] || UCHAR(src[1]) > bounds[index+1]) { + /* Out of bounds - report invalid. */ + return 1; + } } return 0; } @@ -568,7 +591,7 @@ Tcl_UtfCharComplete( * a complete UTF-8 character. */ int length) /* Length of above string in bytes. */ { - return length >= totalBytes[UCHAR(*src)]; + return length >= complete[UCHAR(*src)]; } /* diff --git a/tests/utf.test b/tests/utf.test index 1a4b157..8745385 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -570,13 +570,13 @@ test utf-6.108 {Tcl_UtfNext, read limits} {testutfnext testbytestring} { test utf-6.109 {Tcl_UtfNext, read limits} {testutfnext testbytestring} { testutfnext \u8820[testbytestring \xA0] 3 } 3 -test utf-6.110.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2_utf16} { +test utf-6.110.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2} { testutfnext [testbytestring \xF2\xA0\xA0\xA0]G 1 } 1 -test utf-6.110.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs4} { +test utf-6.110.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring fullutf} { testutfnext [testbytestring \xF2\xA0\xA0\xA0]G 1 } 0 -test utf-6.111.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2_utf16} { +test utf-6.111.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2} { testutfnext [testbytestring \xF2\xA0\xA0\xA0]G 2 } 1 test utf-6.111.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs4} { @@ -594,16 +594,16 @@ test utf-6.113.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2_utf test utf-6.113.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs4} { testutfnext [testbytestring \xF2\xA0\xA0\xA0]G 4 } 4 -test utf-6.114.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2_utf16} { +test utf-6.114.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2} { testutfnext [testbytestring \xF2\xA0\xA0\xA0\xA0] 1 } 1 -test utf-6.114.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs4} { +test utf-6.114.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring fullutf} { testutfnext [testbytestring \xF2\xA0\xA0\xA0\xA0] 1 } 0 -test utf-6.115.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2_utf16} { +test utf-6.115.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2} { testutfnext [testbytestring \xF2\xA0\xA0\xA0\xA0] 2 } 1 -test utf-6.115.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs4} { +test utf-6.115.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring fullutf} { testutfnext [testbytestring \xF2\xA0\xA0\xA0\xA0] 2 } 0 test utf-6.116.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2_utf16} { -- cgit v0.12 From 594b85ce18c43c1a0665f90f702fa3d0da4659cf Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 10 May 2020 20:58:10 +0000 Subject: Demonstration for documentation bug, and suggestion for improved wording. More explanation will follow in the ticket. --- doc/Utf.3 | 4 +++- generic/tclUtf.c | 12 +----------- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/doc/Utf.3 b/doc/Utf.3 index c8c6132..35f9327 100644 --- a/doc/Utf.3 +++ b/doc/Utf.3 @@ -290,7 +290,9 @@ characters. Behavior is undefined if a negative \fIindex\fR is given. \fBTcl_UtfAtIndex\fR returns a pointer to the specified character (not byte) \fIindex\fR in the UTF-8 string \fIsrc\fR. The source string must contain at least \fIindex\fR characters. This is equivalent to calling -\fBTcl_UtfNext\fR \fIindex\fR times. If a negative \fIindex\fR is given, +\fBTcl_UtfToUniChar\fR \fIindex\fR times, except if the index points to +a lower surrogate preceded by an upper surrogate: In that case, the returned +pointer will point just after the lower surrogate. If a negative \fIindex\fR is given, the return pointer points to the first character in the source string. .PP \fBTcl_UtfBackslash\fR is a utility procedure used by several of the Tcl diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 5951f68..dbcfd6d 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -1166,19 +1166,9 @@ Tcl_UtfAtIndex( const char *src, /* The UTF-8 string. */ int index) /* The position of the desired character. */ { - Tcl_UniChar ch = 0; - int len = 0; - while (index-- > 0) { - len = TclUtfToUniChar(src, &ch); - src += len; + src = Tcl_UtfNext(src); } -#if TCL_UTF_MAX <= 3 - if ((ch >= 0xD800) && (len < 3)) { - /* Index points at character following high Surrogate */ - src += TclUtfToUniChar(src, &ch); - } -#endif return src; } -- cgit v0.12 From 429a4171cb6436a84c7923dcac6ef6fe3460ce20 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 11 May 2020 07:39:02 +0000 Subject: occurance -> occurrence. --- ChangeLog.2000 | 2 +- generic/tclCmdIL.c | 4 ++-- generic/tclFileName.c | 2 +- generic/tclUtf.c | 6 +++--- tests/README | 6 +++--- tests/encoding.test | 2 +- tests/lsearch.test | 18 +++++++++--------- tests/utf.test | 2 +- tools/mkdepend.tcl | 2 +- 9 files changed, 22 insertions(+), 22 deletions(-) diff --git a/ChangeLog.2000 b/ChangeLog.2000 index 5b62351..7e78c19 100644 --- a/ChangeLog.2000 +++ b/ChangeLog.2000 @@ -1779,7 +1779,7 @@ * generic/tclCmdMZ.c: Fixed infinite loop bug with [regexp -all] [Bug: 4981]. - * tests/*.test: Changed all occurances of "namespace import + * tests/*.test: Changed all occurrences of "namespace import ::tcltest" to "namespace import -force ::tcltest" [Bug: 3948]. 2000-04-09 Brent Welch diff --git a/generic/tclCmdIL.c b/generic/tclCmdIL.c index 3ec1c09..e97d495 100644 --- a/generic/tclCmdIL.c +++ b/generic/tclCmdIL.c @@ -3328,11 +3328,11 @@ Tcl_LsearchObjCmd( /* * Normally, binary search is written to stop when it finds a * match. If there are duplicates of an element in the list, - * our first match might not be the first occurance. + * our first match might not be the first occurrence. * Consider: 0 0 0 1 1 1 2 2 2 * * To maintain consistancy with standard lsearch semantics, we - * must find the leftmost occurance of the pattern in the + * must find the leftmost occurrence of the pattern in the * list. Thus we don't just stop searching here. This * variation means that a search always makes log n * comparisons (normal binary search might "get lucky" with an diff --git a/generic/tclFileName.c b/generic/tclFileName.c index 7afcdaf..8fb9f4d 100644 --- a/generic/tclFileName.c +++ b/generic/tclFileName.c @@ -2058,7 +2058,7 @@ TclGlob( * SkipToChar -- * * This function traverses a glob pattern looking for the next unquoted - * occurance of the specified character at the same braces nesting level. + * occurrence of the specified character at the same braces nesting level. * * Results: * Updates stringPtr to point to the matching character, or to the end of diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 5e0b2e0..a6dfb8a 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -653,7 +653,7 @@ Tcl_NumUtfChars( src += TclUtfToUniChar(src, &ch); } else { /* - * src points to incomplete UTF-8 sequence + * src points to incomplete UTF-8 sequence * Treat first byte as character and count it */ src++; @@ -669,7 +669,7 @@ Tcl_NumUtfChars( * * Tcl_UtfFindFirst -- * - * Returns a pointer to the first occurance of the given Unicode character + * Returns a pointer to the first occurrence of the given Unicode character * in the NULL-terminated UTF-8 string. The NULL terminator is considered * part of the UTF-8 string. Equivalent to Plan 9 utfrune(). * @@ -706,7 +706,7 @@ Tcl_UtfFindFirst( * * Tcl_UtfFindLast -- * - * Returns a pointer to the last occurance of the given Unicode character + * Returns a pointer to the last occurrence of the given Unicode character * in the NULL-terminated UTF-8 string. The NULL terminator is considered * part of the UTF-8 string. Equivalent to Plan 9 utfrrune(). * diff --git a/tests/README b/tests/README index ce2382e..e86100f 100644 --- a/tests/README +++ b/tests/README @@ -59,7 +59,7 @@ should correspond to the Tcl or C code file that they are testing. For example, the test file for the C file "tclCmdAH.c" is "cmdAH.test". Test files that contain black-box tests may not correspond to any Tcl or C code file so they should match the pattern -"*_bb.test". +"*_bb.test". Be sure your new test file can be run from any working directory. @@ -72,12 +72,12 @@ as well as an installation environment. If your test file contains tests that should not be run in one or more of those cases, please use the constraints mechanism to skip those tests. -4. Incompatibilities of package tcltest 2.1 with +4. Incompatibilities of package tcltest 2.1 with testing machinery of very old versions of Tcl: ------------------------------------------------ 1) Global variables such as VERBOSE, TESTS, and testConfig of the - old machinery correspond to the [configure -verbose], + old machinery correspond to the [configure -verbose], [configure -match], and [testConstraint] commands of tcltest 2.1, respectively. diff --git a/tests/encoding.test b/tests/encoding.test index efb1518..a2bfe5d 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -294,7 +294,7 @@ test encoding-12.2 {LoadTableEncoding: single-byte encoding} { append x [encoding convertfrom iso8859-3 ab\xD5g] } "ab\xd5gab\u120g" test encoding-12.3 {LoadTableEncoding: multi-byte encoding} { - set x [encoding convertto shiftjis ab\u4E4Eg] + set x [encoding convertto shiftjis ab\u4E4Eg] append x [encoding convertfrom shiftjis ab\x8c\xc1g] } "ab\x8c\xc1gab\u4e4eg" test encoding-12.4 {LoadTableEncoding: double-byte encoding} { diff --git a/tests/lsearch.test b/tests/lsearch.test index 5e8a1f8..7e6a345 100644 --- a/tests/lsearch.test +++ b/tests/lsearch.test @@ -149,14 +149,14 @@ test lsearch-5.2 {binary search} { } set res } $decreasingIntegers -test lsearch-5.3 {binary search finds leftmost occurances} { +test lsearch-5.3 {binary search finds leftmost occurrences} { set res {} for {set i 0} {$i < 10} {incr i} { lappend res [lsearch -integer -sorted $repeatingIncreasingIntegers $i] } set res } [list 0 5 10 15 20 25 30 35 40 45] -test lsearch-5.4 {binary search -decreasing finds leftmost occurances} { +test lsearch-5.4 {binary search -decreasing finds leftmost occurrences} { set res {} for {set i 9} {$i >= 0} {incr i -1} { lappend res [lsearch -sorted -integer -decreasing \ @@ -404,16 +404,16 @@ test lsearch-17.2 {lsearch -index option, basic functionality} { lsearch -index 1 -exact {{a c} {a b} {a a}} a } 2 test lsearch-17.3 {lsearch -index option, basic functionality} { - lsearch -index 1 -glob {{ab cb} {ab bb} {ab ab}} b* + lsearch -index 1 -glob {{ab cb} {ab bb} {ab ab}} b* } 1 test lsearch-17.4 {lsearch -index option, basic functionality} { lsearch -index 1 -regexp {{ab cb} {ab bb} {ab ab}} {[cb]b} -} 0 +} 0 test lsearch-17.5 {lsearch -index option, basic functionality} { lsearch -all -index 0 -exact {{a c} {a b} {d a}} a } {0 1} test lsearch-17.6 {lsearch -index option, basic functionality} { - lsearch -all -index 1 -glob {{ab cb} {ab bb} {db bx}} b* + lsearch -all -index 1 -glob {{ab cb} {ab bb} {db bx}} b* } {1 2} test lsearch-17.7 {lsearch -index option, basic functionality} { lsearch -all -index 1 -regexp {{ab cb} {ab bb} {ab ab}} {[cb]b} @@ -454,11 +454,11 @@ test lsearch-18.2 {lsearch -index option, list as index basic functionality} { lsearch -index {2 0} -exact {{{x x} {x b} {a d}} {{a c} {a b} {a a}}} a } 0 test lsearch-18.3 {lsearch -index option, list as index basic functionality} { - lsearch -index {1 1} -glob {{{ab cb} {ab bb} {ab ab}} {{ab cb} {ab bb} {ab ab}}} b* + lsearch -index {1 1} -glob {{{ab cb} {ab bb} {ab ab}} {{ab cb} {ab bb} {ab ab}}} b* } 0 test lsearch-18.4 {lsearch -index option, list as index basic functionality} { lsearch -index {0 1} -regexp {{{ab cb} {ab bb} {ab ab}} {{ab cb} {ab bb} {ab ab}}} {[cb]b} -} 0 +} 0 test lsearch-18.5 {lsearch -index option, list as index basic functionality} { lsearch -all -index {0 0} -exact {{{a c} {a b} {d a}} {{a c} {a b} {d a}}} a } {0 1} @@ -470,11 +470,11 @@ test lsearch-19.2 {lsearch -subindices option} { lsearch -subindices -index {2 0} -exact {{{x x} {x b} {a d}} {{a c} {a b} {a a}}} a } {0 2 0} test lsearch-19.3 {lsearch -subindices option} { - lsearch -subindices -index {1 1} -glob {{{ab cb} {ab bb} {ab ab}} {{ab cb} {ab bb} {ab ab}}} b* + lsearch -subindices -index {1 1} -glob {{{ab cb} {ab bb} {ab ab}} {{ab cb} {ab bb} {ab ab}}} b* } {0 1 1} test lsearch-19.4 {lsearch -subindices option} { lsearch -subindices -index {0 1} -regexp {{{ab cb} {ab bb} {ab ab}} {{ab cb} {ab bb} {ab ab}}} {[cb]b} -} {0 0 1} +} {0 0 1} test lsearch-19.5 {lsearch -subindices option} { lsearch -subindices -all -index {0 0} -exact {{{a c} {a b} {d a}} {{a c} {a b} {d a}}} a } {{0 0 0} {1 0 0}} diff --git a/tests/utf.test b/tests/utf.test index 8745385..1b427c9 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -179,7 +179,7 @@ test utf-4.12.1 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars test test utf-4.12.2 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring utf16} { testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end } 2 -test utf-4.13 {Tcl_NumUtfChars: end of string} {testnumutfchars testbytestring} { +test utf-4.13 {Tcl_NumUtfChars: end of string} {testnumutfchars testbytestring} { testnumutfchars foobar[testbytestring \xF2\xC2\xA0] end } 8 diff --git a/tools/mkdepend.tcl b/tools/mkdepend.tcl index ecb2206..3d96a5e 100644 --- a/tools/mkdepend.tcl +++ b/tools/mkdepend.tcl @@ -98,7 +98,7 @@ proc readDepends {chan} { } else { # don't include ourselves as a dependency of ourself. if {![string compare $fname $target]} {continue} - # store in an array so multiple occurances are not counted. + # store in an array so multiple occurrences are not counted. set depends($target|$fname) "" } } -- cgit v0.12 From 783b10ecec013f3f096ca5cc2f56b628db6b2fce Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 11 May 2020 10:03:46 +0000 Subject: Tweak the Tcl_UtfPrev() implementation for TCL_UTF_MAX=4. This fixes 10 testcases in 4 groups (utf-7.10, utf-7.15, utf-7.40 and utf-7.48) , where Tcl_UtfPrev() didn't jump to the beginning of the UTF-8 character, even though there was no limitation which prevented that. So, this is actually a bug-fix for the TIP #389 implementation. --- generic/tclUtf.c | 2 +- tests/utf.test | 62 +++++++++++++++++++++++++------------------------------- 2 files changed, 29 insertions(+), 35 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index a6dfb8a..9596aed 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -857,7 +857,7 @@ Tcl_UtfPrev( * it (the fallback) is correct. */ - || (trailBytesSeen >= totalBytes[byte])) { + || (trailBytesSeen >= complete[byte])) { /* * That is, (1 + trailBytesSeen > needed). * We've examined more bytes than needed to complete diff --git a/tests/utf.test b/tests/utf.test index 1b427c9..309b277 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -173,12 +173,12 @@ test utf-4.11 {Tcl_NumUtfChars: 3 bytes of 4-byte UTF-8 characater} {testnumutfc test utf-4.12.0 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring ucs2} { testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end } 4 -test utf-4.12.1 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring ucs4} { - testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end -} 1 -test utf-4.12.2 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring utf16} { +test utf-4.12.1 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring utf16} { testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end } 2 +test utf-4.12.2 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring ucs4} { + testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end +} 1 test utf-4.13 {Tcl_NumUtfChars: end of string} {testnumutfchars testbytestring} { testnumutfchars foobar[testbytestring \xF2\xC2\xA0] end } 8 @@ -282,25 +282,19 @@ test utf-6.28 {Tcl_UtfNext} {testutfnext testbytestring} { test utf-6.29 {Tcl_UtfNext} {testutfnext testbytestring} { testutfnext [testbytestring \xE8\xF8] } 1 -test utf-6.30.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} { +test utf-6.30.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} { testutfnext [testbytestring \xF2] } 1 -test utf-6.30.1 {Tcl_UtfNext} {testutfnext testbytestring utf16} { - testutfnext [testbytestring \xF2] -} 1 -test utf-6.30.2 {Tcl_UtfNext} {testutfnext testbytestring ucs4} { +test utf-6.30.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} { testutfnext [testbytestring \xF2] } -1 test utf-6.31 {Tcl_UtfNext} {testutfnext testbytestring} { testutfnext [testbytestring \xF2]G } 1 -test utf-6.32.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} { - testutfnext [testbytestring \xF2\xA0] -} 1 -test utf-6.32.1 {Tcl_UtfNext} {testutfnext testbytestring utf16} { +test utf-6.32.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} { testutfnext [testbytestring \xF2\xA0] } 1 -test utf-6.32.2 {Tcl_UtfNext} {testutfnext testbytestring ucs4} { +test utf-6.32.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} { testutfnext [testbytestring \xF2\xA0] } -1 test utf-6.33 {Tcl_UtfNext} {testutfnext testbytestring} { @@ -709,22 +703,22 @@ test utf-7.9.1 {Tcl_UtfPrev} {testutfprev testbytestring} { test utf-7.9.2 {Tcl_UtfPrev} {testutfprev testbytestring} { testutfprev A[testbytestring \xF8\xA0\xF8\xA0] 3 } 2 -test utf-7.10.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2_utf16} { +test utf-7.10.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { testutfprev A[testbytestring \xF2\xA0] } 2 -test utf-7.10.1 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { +test utf-7.10.1 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} { testutfprev A[testbytestring \xF2\xA0] } 1 -test utf-7.10.2 {Tcl_UtfPrev} {testutfprev testbytestring ucs2_utf16} { +test utf-7.10.2 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { testutfprev A[testbytestring \xF2\xA0\xA0\xA0] 3 } 2 -test utf-7.10.3 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { +test utf-7.10.3 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} { testutfprev A[testbytestring \xF2\xA0\xA0\xA0] 3 } 1 -test utf-7.10.4 {Tcl_UtfPrev} {testutfprev testbytestring ucs2_utf16} { +test utf-7.10.4 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { testutfprev A[testbytestring \xF2\xA0\xF8\xA0] 3 } 2 -test utf-7.10.5 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { +test utf-7.10.5 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} { testutfprev A[testbytestring \xF2\xA0\xF8\xA0] 3 } 1 test utf-7.11 {Tcl_UtfPrev} {testutfprev testbytestring} { @@ -766,22 +760,22 @@ test utf-7.14.1 {Tcl_UtfPrev} {testutfprev testbytestring} { test utf-7.14.2 {Tcl_UtfPrev} {testutfprev testbytestring} { testutfprev A[testbytestring \xF8\xA0\xA0\xF8] 4 } 3 -test utf-7.15.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2_utf16} { +test utf-7.15.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { testutfprev A[testbytestring \xF2\xA0\xA0] } 3 -test utf-7.15.1 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { +test utf-7.15.1 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} { testutfprev A[testbytestring \xF2\xA0\xA0] } 1 -test utf-7.15.1.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2_utf16} { +test utf-7.15.2 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { testutfprev A[testbytestring \xF2\xA0\xA0\xA0] 4 } 3 -test utf-7.15.1.1 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { +test utf-7.15.3 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} { testutfprev A[testbytestring \xF2\xA0\xA0\xA0] 4 } 1 -test utf-7.15.2.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2_utf16} { +test utf-7.15.4 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { testutfprev A[testbytestring \xF2\xA0\xA0\xF8] 4 } 3 -test utf-7.15.2.1 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { +test utf-7.15.5 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} { testutfprev A[testbytestring \xF2\xA0\xA0\xF8] 4 } 1 test utf-7.16 {Tcl_UtfPrev} testutfprev { @@ -910,16 +904,16 @@ test utf-7.39.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring test utf-7.39.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xF0\x90\x80\x80] } 1 -test utf-7.40.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs2_utf16} { +test utf-7.40.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs2} { testutfprev A[testbytestring \xF0\x90\x80\x80] 4 } 3 -test utf-7.40.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs4} { +test utf-7.40.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring fullutf} { testutfprev A[testbytestring \xF0\x90\x80\x80] 4 } 1 -test utf-7.41.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs2_utf16} { +test utf-7.41.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs2} { testutfprev A[testbytestring \xF0\x90\x80\x80] 3 } 2 -test utf-7.41.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs4} { +test utf-7.41.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring fullutf} { testutfprev A[testbytestring \xF0\x90\x80\x80] 3 } 1 test utf-7.42 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { @@ -955,16 +949,16 @@ test utf-7.48.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbyte test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xF4\x8F\xBF\xBF] } 1 -test utf-7.48.2 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs2_utf16} { +test utf-7.48.2 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs2} { testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 4 } 3 -test utf-7.48.3 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs4} { +test utf-7.48.3 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring fullutf} { testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 4 } 1 -test utf-7.48.4 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs2_utf16} { +test utf-7.48.4 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs2} { testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 3 } 2 -test utf-7.48.5 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs4} { +test utf-7.48.5 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring fullutf} { testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 3 } 1 test utf-7.48.6 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} { -- cgit v0.12 From 22648e5ebc9138abf5aba928db74520ac22f23ab Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 11 May 2020 10:41:30 +0000 Subject: Fix [d402ffe76]: Win32 potential crash when using main(). Thanks to Christian Werner for the Bug report and the Fix. --- generic/tclEnv.c | 11 +++++++++++ win/tclWinInit.c | 31 +++++++++++-------------------- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/generic/tclEnv.c b/generic/tclEnv.c index 1f61e07..15dd8b5 100644 --- a/generic/tclEnv.c +++ b/generic/tclEnv.c @@ -127,6 +127,17 @@ TclSetupEnv( /*msg*/ 0, /*createPart1*/ 0, /*createPart2*/ 0, &arrayPtr); TclFindArrayPtrElements(varPtr, &namesHash); +#if defined(_WIN32) + if (tenviron == NULL) { + /* + * When we are started from main(), the _wenviron array could + * be NULL and will be initialized by the first _wgetenv() call. + */ + + (void) _wgetenv(L"WINDIR"); + } +#endif + /* * Go through the environment array and transfer its values into Tcl. At * the same time, remove those elements we add/update from the hash table diff --git a/win/tclWinInit.c b/win/tclWinInit.c index 4b3b6d4..b1dd0f3 100644 --- a/win/tclWinInit.c +++ b/win/tclWinInit.c @@ -154,7 +154,7 @@ TclpInitPlatform(void) * invoked. */ - TclWinInit(GetModuleHandle(NULL)); + TclWinInit(GetModuleHandleW(NULL)); #endif /* @@ -260,7 +260,7 @@ AppendEnvironment( { int pathc; WCHAR wBuf[MAX_PATH]; - char buf[MAX_PATH * TCL_UTF_MAX]; + char buf[MAX_PATH * 3]; Tcl_Obj *objPtr; Tcl_DString ds; const char **pathv; @@ -273,7 +273,7 @@ AppendEnvironment( for (shortlib = (char *) &lib[strlen(lib)-1]; shortlib>lib ; shortlib--) { if (*shortlib == '/') { - if ((unsigned)(shortlib - lib) == strlen(lib) - 1) { + if ((size_t)(shortlib - lib) == strlen(lib) - 1) { Tcl_Panic("last character in lib cannot be '/'"); } shortlib++; @@ -353,7 +353,7 @@ InitializeDefaultLibraryDir( { HMODULE hModule = TclWinGetTclInstance(); WCHAR wName[MAX_PATH + LIBRARY_SIZE]; - char name[(MAX_PATH + LIBRARY_SIZE) * TCL_UTF_MAX]; + char name[(MAX_PATH + LIBRARY_SIZE) * 3]; char *end, *p; if (GetModuleFileNameW(hModule, wName, MAX_PATH) == 0) { @@ -426,7 +426,7 @@ InitializeSourceLibraryDir( *lengthPtr = strlen(name); *valuePtr = (char *)ckalloc(*lengthPtr + 1); *encodingPtr = NULL; - memcpy(*valuePtr, name, (size_t) *lengthPtr + 1); + memcpy(*valuePtr, name, *lengthPtr + 1); } /* @@ -674,16 +674,6 @@ TclpSetVariables( *---------------------------------------------------------------------- */ -#if defined(_WIN32) -# define tenviron _wenviron -# define tenviron2utfdstr(tenvstr, len, dstr) \ - Tcl_WinTCharToUtf((TCHAR *)tenvstr, len, dstr) -#else -# define tenviron environ -# define tenviron2utfdstr(tenvstr, len, dstr) \ - Tcl_ExternalToUtfDString(NULL, tenvstr, len, dstr) -#endif - int TclpFindVariable( const char *name, /* Name of desired environment variable @@ -694,7 +684,8 @@ TclpFindVariable( * searches). */ { int i, length, result = -1; - register const char *env, *p1, *p2; + const WCHAR *env; + const char *p1, *p2; char *envUpper, *nameUpper; Tcl_DString envString; @@ -704,20 +695,20 @@ TclpFindVariable( length = strlen(name); nameUpper = (char *)ckalloc(length + 1); - memcpy(nameUpper, name, (size_t) length+1); + memcpy(nameUpper, name, length+1); Tcl_UtfToUpper(nameUpper); Tcl_DStringInit(&envString); - for (i = 0, env = (const char *)tenviron[i]; + for (i = 0, env = _wenviron[i]; env != NULL; - i++, env = (const char *)tenviron[i]) { + i++, env = _wenviron[i]) { /* * Chop the env string off after the equal sign, then Convert the name * to all upper case, so we do not have to convert all the characters * after the equal sign. */ - envUpper = tenviron2utfdstr(env, -1, &envString); + envUpper = Tcl_WinTCharToUtf((TCHAR *)env, -1, &envString); p1 = strchr(envUpper, '='); if (p1 == NULL) { continue; -- cgit v0.12