From 5999fb96666df992ea33a641af2fe1fec8b95587 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 14 Apr 2020 16:33:54 +0000 Subject: Missing .$noComp in string-20.8 test-case, we don't want duplicate test-case numbers. --- tests/string.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/string.test b/tests/string.test index 223251e..909bdfc 100644 --- a/tests/string.test +++ b/tests/string.test @@ -1827,7 +1827,7 @@ test string-20.7.$noComp {string trim on not valid utf-8 sequence (consider NTS lappend result [string map $m [run {string trim $b fox}]] lappend result [string map $m [run {string trim $b fo\x00}]] } [list {*}[lrepeat 3 fooUV] {*}[lrepeat 2 UV V]] -test string-20.8 {[c61818e4c9] [string trimright] fails when UtfPrev is ok} {testbytestring knownBug} { +test string-20.8.$noComp {[c61818e4c9] [string trimright] fails when UtfPrev is ok} {testbytestring knownBug} { set result {} set a [testbytestring \xE8\x80] set b foo$a -- cgit v0.12 From 0d2f5506580cc30d20fc28304998de48ef2660b1 Mon Sep 17 00:00:00 2001 From: dgp Date: Tue, 14 Apr 2020 17:45:34 +0000 Subject: Expose the failing tests. Fix string-20.7 by using an invalid single byte that is stable in its meaning across different Tcl versions. --- tests/string.test | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/string.test b/tests/string.test index 909bdfc..9c30caa 100644 --- a/tests/string.test +++ b/tests/string.test @@ -1814,11 +1814,11 @@ test string-20.5.$noComp {string trimright} { test string-20.6.$noComp {string trimright, unicode default} { run {string trimright ABC\u1361\x85\x00\xA0\u1680\u180E\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u200B\u2028\u2029\u202F\u205F\u3000} } ABC\u1361 -test string-20.7.$noComp {string trim on not valid utf-8 sequence (consider NTS as continuation char), bug [c61818e4c9]} {testbytestring knownBug} { +test string-20.7.$noComp {string trim on not valid utf-8 sequence (consider NTS as continuation char), bug [c61818e4c9]} {testbytestring} { set result {} - set a [testbytestring \xC0\x80\x88] + set a [testbytestring \xC0\x80\xA0] set b foo$a - set m [list \x00 U \x88 V [testbytestring \x88] W] + set m [list \x00 U \xA0 V [testbytestring \xA0] W] lappend result [string map $m $b] lappend result [string map $m [run {string trimright $b x}]] lappend result [string map $m [run {string trimright $b \x00}]] @@ -1827,7 +1827,7 @@ test string-20.7.$noComp {string trim on not valid utf-8 sequence (consider NTS lappend result [string map $m [run {string trim $b fox}]] lappend result [string map $m [run {string trim $b fo\x00}]] } [list {*}[lrepeat 3 fooUV] {*}[lrepeat 2 UV V]] -test string-20.8.$noComp {[c61818e4c9] [string trimright] fails when UtfPrev is ok} {testbytestring knownBug} { +test string-20.8.$noComp {[c61818e4c9] [string trimright] fails when UtfPrev is ok} {testbytestring} { set result {} set a [testbytestring \xE8\x80] set b foo$a -- cgit v0.12 From 890dbba35a9366f68f9fbc67ae5bcd5baa3612d1 Mon Sep 17 00:00:00 2001 From: dgp Date: Tue, 14 Apr 2020 17:49:27 +0000 Subject: Same issue fixed for test string-20.8* --- tests/string.test | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/string.test b/tests/string.test index 9c30caa..98890f9 100644 --- a/tests/string.test +++ b/tests/string.test @@ -1829,17 +1829,17 @@ test string-20.7.$noComp {string trim on not valid utf-8 sequence (consider NTS } [list {*}[lrepeat 3 fooUV] {*}[lrepeat 2 UV V]] test string-20.8.$noComp {[c61818e4c9] [string trimright] fails when UtfPrev is ok} {testbytestring} { set result {} - set a [testbytestring \xE8\x80] + set a [testbytestring \xE8\xA0] set b foo$a - set m [list \xE8 U \x80 V [testbytestring \xE8] W [testbytestring \x80] X]] + set m [list \xE8 U \xA0 V [testbytestring \xE8] W [testbytestring \xA0] X]] lappend result [string map $m $b] lappend result [string map $m [run {string trimright $b x}]] lappend result [string map $m [run {string trimright $b \xE8}]] lappend result [string map $m [run {string trimright $b [testbytestring \xE8]}]] - lappend result [string map $m [run {string trimright $b \x80}]] - lappend result [string map $m [run {string trimright $b [testbytestring \x80]}]] - lappend result [string map $m [run {string trimright $b \xE8\x80}]] - lappend result [string map $m [run {string trimright $b [testbytestring \xE8\x80]}]] + lappend result [string map $m [run {string trimright $b \xA0}]] + lappend result [string map $m [run {string trimright $b [testbytestring \xA0]}]] + lappend result [string map $m [run {string trimright $b \xE8\xA0}]] + lappend result [string map $m [run {string trimright $b [testbytestring \xE8\xA0]}]] lappend result [string map $m [run {string trimright $b \u0000}]] } [list {*}[lrepeat 4 fooUV] {*}[lrepeat 2 fooU] {*}[lrepeat 2 foo] fooUV] -- cgit v0.12 From cf28c6b1dac27e2a0e39297fc096a656a598fa9b Mon Sep 17 00:00:00 2001 From: dgp Date: Tue, 14 Apr 2020 17:52:14 +0000 Subject: Keep the tests consistent. --- tests/string.test | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/string.test b/tests/string.test index 2fc719b..ccef0a7 100644 --- a/tests/string.test +++ b/tests/string.test @@ -1464,9 +1464,9 @@ test string-20.6 {string trimright, unicode default} { } {} test string-20.7 {string trim on not valid utf-8 sequence (consider NTS as continuation char), bug [c61818e4c9]} { set result {} - set a [bytestring \xc0\x80\x88] + set a [bytestring \xc0\x80\xA0] set b foo$a - set m [list \u0000 U \x88 V [bytestring \x88] W] + set m [list \u0000 U \xA0 V [bytestring \xA0] W] lappend result [string map $m $b] lappend result [string map $m [string trimright $b x]] lappend result [string map $m [string trimright $b \u0000]] @@ -1477,17 +1477,17 @@ test string-20.7 {string trim on not valid utf-8 sequence (consider NTS as conti } [list {*}[lrepeat 3 fooUV] {*}[lrepeat 2 UV V]] test string-20.8 {[c61818e4c9] [string trimright] fails when UtfPrev is ok} { set result {} - set a [bytestring \xE8\x80] + set a [bytestring \xE8\xA0] set b foo$a - set m [list \xE8 U \x80 V [bytestring \xE8] W [bytestring \x80] X]] + set m [list \xE8 U \xA0 V [bytestring \xE8] W [bytestring \xA0] X]] lappend result [string map $m $b] lappend result [string map $m [string trimright $b x]] lappend result [string map $m [string trimright $b \xE8]] lappend result [string map $m [string trimright $b [bytestring \xE8]]] - lappend result [string map $m [string trimright $b \x80]] - lappend result [string map $m [string trimright $b [bytestring \x80]]] - lappend result [string map $m [string trimright $b \xE8\x80]] - lappend result [string map $m [string trimright $b [bytestring \xE8\x80]]] + lappend result [string map $m [string trimright $b \xA0]] + lappend result [string map $m [string trimright $b [bytestring \xA0]]] + lappend result [string map $m [string trimright $b \xE8\xA0]] + lappend result [string map $m [string trimright $b [bytestring \xE8\xA0]]] lappend result [string map $m [string trimright $b \u0000]] } [list {*}[lrepeat 4 fooUV] {*}[lrepeat 2 fooU] {*}[lrepeat 2 foo] fooUV] -- cgit v0.12 From e9b1c88a4142a059a821fdd2f2b02272a53c9151 Mon Sep 17 00:00:00 2001 From: dgp Date: Tue, 14 Apr 2020 20:03:33 +0000 Subject: The function of Tcl_UtfNext() is to advance a pointer. There's nothing inherent in that task that requires decoding of the characters, but the implementation does that. Let's try a simpler solution for callers that do not need the content decoded. --- generic/tclUtf.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index fbdba4c..a03fa30 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -644,9 +644,19 @@ CONST char * Tcl_UtfNext( CONST char *src) /* The current location in the string. */ { - Tcl_UniChar ch; - - return src + TclUtfToUniChar(src, &ch); + int byte = *((unsigned char *) src); + int left = totalBytes[byte]; + + src++; + while (--left) { + byte = *((unsigned char *) src); + if ((byte & 0xC0) != 0x80) { + /* src points to non-trail byte; return it */ + return src; + } + src++; + } + return src; } /* -- cgit v0.12 From 279c54dbff724a62d6739a9cc71ba31a83325c98 Mon Sep 17 00:00:00 2001 From: dgp Date: Tue, 14 Apr 2020 20:18:55 +0000 Subject: Create and use an optimized macro TclUtfNext() for Tcl_UtfNext(). --- generic/tclInt.h | 3 +++ generic/tclUtil.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/generic/tclInt.h b/generic/tclInt.h index 15bc000..e92cd18 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3691,6 +3691,9 @@ MODULE_SCOPE void TclDbInitNewObj(Tcl_Obj *objPtr, CONST char *file, ((*(chPtr) = (unsigned char) *(str)), 1) \ : Tcl_UtfToUniChar(str, chPtr)) +#define TclUtfNext(src) \ + ((((unsigned char) *(src)) < 0xC0) ? src + 1 : Tcl_UtfNext(src)) + /* *---------------------------------------------------------------- * Macro that encapsulates the logic that determines when it is safe to diff --git a/generic/tclUtil.c b/generic/tclUtil.c index 3dd9a32..e87cf83 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -1691,7 +1691,7 @@ TclTrim( * that we will not trim. Skip over it. */ if (numBytes > 0) { const char *first = bytes + trimLeft; - bytes = Tcl_UtfNext(first); + bytes = TclUtfNext(first); numBytes -= (bytes - first); if (numBytes > 0) { -- cgit v0.12 From 6df0ccf3997e397b860c47c770ba0fc31a2a9961 Mon Sep 17 00:00:00 2001 From: dgp Date: Tue, 14 Apr 2020 21:16:43 +0000 Subject: Replace calls of TclUtfToUniChar() with TclUtfNext() when caller has no decoding need. Failing test string-22.14 indicates something is still not quite right. Now that Tcl_NumUtfChars() is not paying decoding prices, we let it spend to properly protect against overflow. [2738427] --- generic/tclCompExpr.c | 5 ++--- generic/tclUtf.c | 19 ++++++------------- 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/generic/tclCompExpr.c b/generic/tclCompExpr.c index 27d7503..42321af 100644 --- a/generic/tclCompExpr.c +++ b/generic/tclCompExpr.c @@ -1801,7 +1801,6 @@ ParseLexeme( { const char *end; int scanned; - Tcl_UniChar ch; Tcl_Obj *literal = NULL; unsigned char byte; @@ -1979,12 +1978,12 @@ ParseLexeme( if (!TclIsBareword(*start) || *start == '_') { if (Tcl_UtfCharComplete(start, numBytes)) { - scanned = Tcl_UtfToUniChar(start, &ch); + scanned = TclUtfNext(start) - start; } else { char utfBytes[TCL_UTF_MAX]; memcpy(utfBytes, start, (size_t) numBytes); utfBytes[numBytes] = '\0'; - scanned = Tcl_UtfToUniChar(utfBytes, &ch); + scanned = TclUtfNext(utfBytes) - utfBytes; } *lexemePtr = INVALID; Tcl_DecrRefCount(literal); diff --git a/generic/tclUtf.c b/generic/tclUtf.c index a03fa30..25d52d0 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -504,7 +504,6 @@ Tcl_NumUtfChars( int length) /* The length of the string in bytes, or -1 * for strlen(string). */ { - Tcl_UniChar ch; register int i; /* @@ -516,21 +515,20 @@ Tcl_NumUtfChars( i = 0; if (length < 0) { - while (*src != '\0') { - src += TclUtfToUniChar(src, &ch); + while ((*src != '\0') && (i < INT_MAX)) { + src = TclUtfNext(src); i++; } - if (i < 0) i = INT_MAX; /* Bug [2738427] */ } else { register const char *endPtr = src + length - TCL_UTF_MAX; while (src < endPtr) { - src += TclUtfToUniChar(src, &ch); + src = TclUtfNext(src); i++; } endPtr += TCL_UTF_MAX; while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) { - src += TclUtfToUniChar(src, &ch); + src = TclUtfNext(src); i++; } if (src < endPtr) { @@ -764,10 +762,7 @@ Tcl_UniCharAtIndex( { Tcl_UniChar ch; - while (index >= 0) { - index--; - src += TclUtfToUniChar(src, &ch); - } + TclUtfToUniChar(Tcl_UtfAtIndex(src, index), &ch); return ch; } @@ -793,11 +788,9 @@ Tcl_UtfAtIndex( register CONST char *src, /* The UTF-8 string. */ register int index) /* The position of the desired character. */ { - Tcl_UniChar ch; - while (index > 0) { index--; - src += TclUtfToUniChar(src, &ch); + src = TclUtfNext(src); } return src; } -- cgit v0.12 From e2b7ad1627665c99b128ff5a023e9f772fe467b4 Mon Sep 17 00:00:00 2001 From: dgp Date: Tue, 14 Apr 2020 21:25:33 +0000 Subject: Fix the bad logic in Tcl_UtfNext(). --- generic/tclUtf.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 25d52d0..7dd8598 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -644,17 +644,21 @@ Tcl_UtfNext( { int byte = *((unsigned char *) src); int left = totalBytes[byte]; + const char *next = src + 1; - src++; while (--left) { - byte = *((unsigned char *) src); + byte = *((unsigned char *) next); if ((byte & 0xC0) != 0x80) { - /* src points to non-trail byte; return it */ - return src; + /* + * src points to non-trail byte; We ran out of trail bytes + * before the needs of the lead bytes were satisfied. + * Let the (malformed) lead byte alone be a character + */ + return src + 1; } - src++; + next++; } - return src; + return next; } /* -- cgit v0.12 From 7aff227882dd8bfaa8972ecaf1e129bb9ef1e6e3 Mon Sep 17 00:00:00 2001 From: dgp Date: Tue, 14 Apr 2020 21:32:22 +0000 Subject: typo --- generic/tclUtf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 7dd8598..078ecf4 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -651,7 +651,7 @@ Tcl_UtfNext( if ((byte & 0xC0) != 0x80) { /* * src points to non-trail byte; We ran out of trail bytes - * before the needs of the lead bytes were satisfied. + * before the needs of the lead byte were satisfied. * Let the (malformed) lead byte alone be a character */ return src + 1; -- cgit v0.12 From f0f59ae8a31a818d78cb449dc4532762cfb2bb00 Mon Sep 17 00:00:00 2001 From: dgp Date: Tue, 14 Apr 2020 21:39:53 +0000 Subject: New testing command [testutfnext]. --- generic/tclTest.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 51 insertions(+), 2 deletions(-) diff --git a/generic/tclTest.c b/generic/tclTest.c index 31d3a7f..782b9a2 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -433,6 +433,7 @@ static int SimpleMatchInDirectory( Tcl_Interp *interp, Tcl_Obj *resultPtr, Tcl_Obj *dirPtr, const char *pattern, Tcl_GlobTypeData *types); +static Tcl_ObjCmdProc TestUtfNextCmd; static Tcl_ObjCmdProc TestUtfPrevCmd; static int TestNumUtfCharsCmd(ClientData clientData, Tcl_Interp *interp, int objc, @@ -697,8 +698,10 @@ Tcltest_Init( (ClientData) 0, NULL); Tcl_CreateObjCommand(interp, "testsetobjerrorcode", TestsetobjerrorcodeCmd, NULL, NULL); + Tcl_CreateObjCommand(interp, "testutfnext", + TestUtfNextCmd, NULL, NULL); Tcl_CreateObjCommand(interp, "testutfprev", - TestUtfPrevCmd, (ClientData) 0, NULL); + TestUtfPrevCmd, NULL, NULL); Tcl_CreateObjCommand(interp, "testnumutfchars", TestNumUtfCharsCmd, NULL, NULL); Tcl_CreateObjCommand(interp, "testfindfirst", @@ -7107,6 +7110,52 @@ SimpleListVolumes(void) } /* + * Used to check operations of Tcl_UtfNext. + * + * Usage: testutfnext $bytes $offset + */ + +static int +TestUtfNextCmd( + ClientData clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *const objv[]) +{ + int numBytes, offset = 0; + char *bytes; + const char *result; + Tcl_Obj *copy; + + if (objc < 2 || objc > 3) { + Tcl_WrongNumArgs(interp, 1, objv, "bytes ?offset?"); + return TCL_ERROR; + } + + bytes = (char *) Tcl_GetByteArrayFromObj(objv[1], &numBytes); + + if (objc == 3) { + if (TCL_OK != Tcl_GetIntFromObj(interp, objv[2], &offset)) { + return TCL_ERROR; + } + if (offset < 0) { + offset = 0; + } + if (offset > numBytes) { + offset = numBytes; + } + } + copy = Tcl_DuplicateObj(objv[1]); + bytes = (char *) Tcl_SetByteArrayLength(copy, numBytes+1); + bytes[numBytes] = '\0'; + + result = Tcl_UtfNext(bytes + offset); + Tcl_SetObjResult(interp, Tcl_NewIntObj(result - bytes)); + + Tcl_DecrRefCount(copy); + return TCL_OK; +} +/* * Used to check operations of Tcl_UtfPrev. * * Usage: testutfprev $bytes $offset @@ -7149,9 +7198,9 @@ TestUtfPrevCmd( bytes[numBytes] = '\0'; result = Tcl_UtfPrev(bytes + offset, bytes); + Tcl_SetObjResult(interp, Tcl_NewIntObj(result - bytes)); Tcl_DecrRefCount(copy); - Tcl_SetObjResult(interp, Tcl_NewIntObj(result - bytes)); return TCL_OK; } -- cgit v0.12 From 73e4fb471c479ab03b08349ea12ea4475c0d7c7f Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 14 Apr 2020 22:36:12 +0000 Subject: Try adding some quotes --- unix/configure | 2 +- unix/tcl.m4 | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/unix/configure b/unix/configure index f28cc8c..53def0a 100755 --- a/unix/configure +++ b/unix/configure @@ -7328,7 +7328,7 @@ fi LDFLAGS="$LDFLAGS -Wl,--export-dynamic" if test $doRpath = yes; then - CC_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}' + CC_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"' fi LD_SEARCH_FLAGS=${CC_SEARCH_FLAGS} diff --git a/unix/tcl.m4 b/unix/tcl.m4 index 585bbba..41767a1 100644 --- a/unix/tcl.m4 +++ b/unix/tcl.m4 @@ -1392,7 +1392,7 @@ AC_DEFUN([SC_CONFIG_CFLAGS], [ DL_LIBS="-ldl" LDFLAGS="$LDFLAGS -Wl,--export-dynamic" AS_IF([test $doRpath = yes], [ - CC_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}']) + CC_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"']) LD_SEARCH_FLAGS=${CC_SEARCH_FLAGS} AS_IF([test "`uname -m`" = "alpha"], [CFLAGS="$CFLAGS -mieee"]) AS_IF([test $do64bit = yes], [ -- cgit v0.12 From 9ddfa6bdf0e2e058f314917c81dab4ab40ec621c Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 15 Apr 2020 06:35:51 +0000 Subject: More double-quotes --- unix/configure | 22 +++++++++++----------- unix/tcl.m4 | 22 +++++++++++----------- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/unix/configure b/unix/configure index 53def0a..627d10a 100755 --- a/unix/configure +++ b/unix/configure @@ -7099,7 +7099,7 @@ fi SHLIB_LD='${CC} -shared' if test $doRpath = yes; then - CC_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}' + CC_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"' fi LD_SEARCH_FLAGS=${CC_SEARCH_FLAGS} @@ -7223,7 +7223,7 @@ esac if test $doRpath = yes; then - CC_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}' + CC_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"' LD_SEARCH_FLAGS='-rpath ${LIB_RUNTIME_DIR}' fi @@ -7244,7 +7244,7 @@ esac if test $doRpath = yes; then - CC_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}' + CC_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"' LD_SEARCH_FLAGS='-rpath ${LIB_RUNTIME_DIR}' fi @@ -7285,7 +7285,7 @@ esac if test $doRpath = yes; then - CC_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}' + CC_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"' LD_SEARCH_FLAGS='-rpath ${LIB_RUNTIME_DIR}' fi @@ -7428,8 +7428,8 @@ fi LD_FLAGS="-Wl,--export-dynamic" if test $doRpath = yes; then - CC_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}' - LD_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}' + CC_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"' + LD_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"' fi ;; @@ -7470,7 +7470,7 @@ fi DL_LIBS="" if test $doRpath = yes; then - CC_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}' + CC_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"' fi LD_SEARCH_FLAGS=${CC_SEARCH_FLAGS} @@ -7500,7 +7500,7 @@ fi LDFLAGS="$LDFLAGS -export-dynamic" if test $doRpath = yes; then - CC_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}' + CC_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"' fi LD_SEARCH_FLAGS=${CC_SEARCH_FLAGS} @@ -7524,8 +7524,8 @@ fi DL_LIBS="" if test $doRpath = yes; then - CC_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}' - LD_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}' + CC_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"' + LD_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"' fi if test "${TCL_THREADS}" = "1"; then @@ -8114,7 +8114,7 @@ fi DL_LIBS="" if test $doRpath = yes; then - CC_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}' + CC_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"' LD_SEARCH_FLAGS='-rpath ${LIB_RUNTIME_DIR}' fi diff --git a/unix/tcl.m4 b/unix/tcl.m4 index 41767a1..7d4185bb 100644 --- a/unix/tcl.m4 +++ b/unix/tcl.m4 @@ -1289,7 +1289,7 @@ AC_DEFUN([SC_CONFIG_CFLAGS], [ do64bit_ok=yes SHLIB_LD='${CC} -shared' AS_IF([test $doRpath = yes], [ - CC_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}']) + CC_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"']) LD_SEARCH_FLAGS=${CC_SEARCH_FLAGS} ;; *) @@ -1324,7 +1324,7 @@ AC_DEFUN([SC_CONFIG_CFLAGS], [ DL_LIBS="" AC_LIBOBJ(mkstemp) AS_IF([test $doRpath = yes], [ - CC_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}' + CC_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"' LD_SEARCH_FLAGS='-rpath ${LIB_RUNTIME_DIR}']) ;; IRIX-6.*) @@ -1335,7 +1335,7 @@ AC_DEFUN([SC_CONFIG_CFLAGS], [ DL_LIBS="" AC_LIBOBJ(mkstemp) AS_IF([test $doRpath = yes], [ - CC_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}' + CC_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"' LD_SEARCH_FLAGS='-rpath ${LIB_RUNTIME_DIR}']) AS_IF([test "$GCC" = yes], [ CFLAGS="$CFLAGS -mabi=n32" @@ -1361,7 +1361,7 @@ AC_DEFUN([SC_CONFIG_CFLAGS], [ DL_LIBS="" AC_LIBOBJ(mkstemp) AS_IF([test $doRpath = yes], [ - CC_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}' + CC_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"' LD_SEARCH_FLAGS='-rpath ${LIB_RUNTIME_DIR}']) # Check to enable 64-bit flags for compiler/linker @@ -1424,8 +1424,8 @@ AC_DEFUN([SC_CONFIG_CFLAGS], [ DL_LIBS="-mshared -ldl" LD_FLAGS="-Wl,--export-dynamic" AS_IF([test $doRpath = yes], [ - CC_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}' - LD_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}']) + CC_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"' + LD_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"']) ;; MP-RAS-02*) SHLIB_CFLAGS="-K PIC" @@ -1463,7 +1463,7 @@ AC_DEFUN([SC_CONFIG_CFLAGS], [ DL_OBJS="tclLoadDl.o" DL_LIBS="" AS_IF([test $doRpath = yes], [ - CC_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}']) + CC_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"']) LD_SEARCH_FLAGS=${CC_SEARCH_FLAGS} SHARED_LIB_SUFFIX='${TCL_TRIM_DOTS}.so${SHLIB_VERSION}' LDFLAGS="-Wl,-export-dynamic" @@ -1487,7 +1487,7 @@ AC_DEFUN([SC_CONFIG_CFLAGS], [ DL_LIBS="" LDFLAGS="$LDFLAGS -export-dynamic" AS_IF([test $doRpath = yes], [ - CC_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}']) + CC_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"']) LD_SEARCH_FLAGS=${CC_SEARCH_FLAGS} AS_IF([test "${TCL_THREADS}" = "1"], [ # The -pthread needs to go in the CFLAGS, not LIBS @@ -1505,8 +1505,8 @@ AC_DEFUN([SC_CONFIG_CFLAGS], [ DL_OBJS="tclLoadDl.o" DL_LIBS="" AS_IF([test $doRpath = yes], [ - CC_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}' - LD_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}']) + CC_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"' + LD_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"']) AS_IF([test "${TCL_THREADS}" = "1"], [ # The -pthread needs to go in the LDFLAGS, not LIBS LIBS=`echo $LIBS | sed s/-pthread//` @@ -1710,7 +1710,7 @@ AC_DEFUN([SC_CONFIG_CFLAGS], [ DL_OBJS="tclLoadDl.o" DL_LIBS="" AS_IF([test $doRpath = yes], [ - CC_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}' + CC_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"' LD_SEARCH_FLAGS='-rpath ${LIB_RUNTIME_DIR}']) AS_IF([test "$GCC" = yes], [CFLAGS="$CFLAGS -mieee"], [ CFLAGS="$CFLAGS -DHAVE_TZSET -std1 -ieee"]) -- cgit v0.12 From 59cfd3a1590de1f7e29a8246d32fa003ddcdf45e Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 15 Apr 2020 13:15:38 +0000 Subject: Update latest Xcode from 11.3 to 11.4 --- .travis.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 2a5f1a7..9af4494 100644 --- a/.travis.yml +++ b/.travis.yml @@ -124,9 +124,9 @@ matrix: - BUILD_DIR=unix - CFGOPT="--enable-symbols=mem" # Testing on Mac, various styles - - name: "macOS/Xcode 11.3/Shared" + - name: "macOS/Xcode 11.4/Shared" os: osx - osx_image: xcode11.3 + osx_image: xcode11.4 env: - BUILD_DIR=macosx install: [] @@ -134,9 +134,9 @@ matrix: - make all # The styles=develop avoids some weird problems on OSX - make test styles=develop - - name: "macOS/Xcode 11.3/Shared/Unix-like" + - name: "macOS/Xcode 11.4/Shared/Unix-like" os: osx - osx_image: xcode11.3 + osx_image: xcode11.4 env: - BUILD_DIR=unix # Older MacOS versions -- cgit v0.12 From 532ec4fa923534f592e04cc3c5679ce5771c684c Mon Sep 17 00:00:00 2001 From: dgp Date: Wed, 15 Apr 2020 14:42:41 +0000 Subject: Collection of coverage tests for Tcl_UtfNext. --- tests/utf.test | 246 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 244 insertions(+), 2 deletions(-) diff --git a/tests/utf.test b/tests/utf.test index 56ca1b9..b5358cc 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -111,8 +111,250 @@ test utf-5.2 {Tcl_UtfFindLast} testfindlast { testfindlast [bytestring "abcbc"] 98 } {bc} -test utf-6.1 {Tcl_UtfNext} { -} {} +testConstraint testutfnext [llength [info commands testutfnext]] + +test utf-6.1 {Tcl_UtfNext} testutfnext { + # This takes the pointer one past the terminating NUL. + # This is really an invalid call. + testutfnext {} +} 1 +test utf-6.2 {Tcl_UtfNext} testutfnext { + testutfnext A +} 1 +test utf-6.3 {Tcl_UtfNext} testutfnext { + testutfnext AA +} 1 +test utf-6.4 {Tcl_UtfNext} testutfnext { + testutfnext A\xA0 +} 1 +test utf-6.5 {Tcl_UtfNext} testutfnext { + testutfnext A\xD0 +} 1 +test utf-6.6 {Tcl_UtfNext} testutfnext { + testutfnext A\xE8 +} 1 +test utf-6.7 {Tcl_UtfNext} testutfnext { + testutfnext A\xF4 +} 1 +test utf-6.8 {Tcl_UtfNext} testutfnext { + testutfnext A\xF8 +} 1 +test utf-6.9 {Tcl_UtfNext} testutfnext { + testutfnext \xA0 +} 1 +test utf-6.10 {Tcl_UtfNext} testutfnext { + testutfnext \xA0G +} 1 +test utf-6.11 {Tcl_UtfNext} testutfnext { + testutfnext \xA0\xA0 +} 1 +test utf-6.12 {Tcl_UtfNext} testutfnext { + testutfnext \xA0\xD0 +} 1 +test utf-6.13 {Tcl_UtfNext} testutfnext { + testutfnext \xA0\xE8 +} 1 +test utf-6.14 {Tcl_UtfNext} testutfnext { + testutfnext \xA0\xF4 +} 1 +test utf-6.15 {Tcl_UtfNext} testutfnext { + testutfnext \xA0\xF8 +} 1 +test utf-6.16 {Tcl_UtfNext} testutfnext { + testutfnext \xD0 +} 1 +test utf-6.17 {Tcl_UtfNext} testutfnext { + testutfnext \xD0A +} 1 +test utf-6.18 {Tcl_UtfNext} testutfnext { + testutfnext \xD0\xA0 +} 2 +test utf-6.19 {Tcl_UtfNext} testutfnext { + testutfnext \xD0\xD0 +} 1 +test utf-6.20 {Tcl_UtfNext} testutfnext { + testutfnext \xD0\xE8 +} 1 +test utf-6.21 {Tcl_UtfNext} testutfnext { + testutfnext \xD0\xF4 +} 1 +test utf-6.22 {Tcl_UtfNext} testutfnext { + testutfnext \xD0\xF8 +} 1 +test utf-6.23 {Tcl_UtfNext} testutfnext { + testutfnext \xE8 +} 1 +test utf-6.24 {Tcl_UtfNext} testutfnext { + testutfnext \xE8A +} 1 +test utf-6.25 {Tcl_UtfNext} testutfnext { + testutfnext \xE8\xA0 +} 1 +test utf-6.26 {Tcl_UtfNext} testutfnext { + testutfnext \xE8\xD0 +} 1 +test utf-6.27 {Tcl_UtfNext} testutfnext { + testutfnext \xE8\xE8 +} 1 +test utf-6.28 {Tcl_UtfNext} testutfnext { + testutfnext \xE8\xF4 +} 1 +test utf-6.29 {Tcl_UtfNext} testutfnext { + testutfnext \xE8\xF8 +} 1 +test utf-6.30 {Tcl_UtfNext} testutfnext { + testutfnext \xF4 +} 1 +test utf-6.31 {Tcl_UtfNext} testutfnext { + testutfnext \xF4A +} 1 +test utf-6.32 {Tcl_UtfNext} testutfnext { + testutfnext \xF4\xA0 +} 1 +test utf-6.33 {Tcl_UtfNext} testutfnext { + testutfnext \xF4\xD0 +} 1 +test utf-6.34 {Tcl_UtfNext} testutfnext { + testutfnext \xF4\xE8 +} 1 +test utf-6.35 {Tcl_UtfNext} testutfnext { + testutfnext \xF4\xF4 +} 1 +test utf-6.36 {Tcl_UtfNext} testutfnext { + testutfnext \xF4\xF8 +} 1 +test utf-6.37 {Tcl_UtfNext} testutfnext { + testutfnext \xF8 +} 1 +test utf-6.38 {Tcl_UtfNext} testutfnext { + testutfnext \xF8A +} 1 +test utf-6.39 {Tcl_UtfNext} testutfnext { + testutfnext \xF8\xA0 +} 1 +test utf-6.40 {Tcl_UtfNext} testutfnext { + testutfnext \xF8\xD0 +} 1 +test utf-6.41 {Tcl_UtfNext} testutfnext { + testutfnext \xF8\xE8 +} 1 +test utf-6.42 {Tcl_UtfNext} testutfnext { + testutfnext \xF8\xF4 +} 1 +test utf-6.43 {Tcl_UtfNext} testutfnext { + testutfnext \xF8\xF8 +} 1 +test utf-6.44 {Tcl_UtfNext} testutfnext { + testutfnext \xD0\xA0G +} 2 +test utf-6.45 {Tcl_UtfNext} testutfnext { + testutfnext \xD0\xA0\xA0 +} 2 +test utf-6.46 {Tcl_UtfNext} testutfnext { + testutfnext \xD0\xA0\xD0 +} 2 +test utf-6.47 {Tcl_UtfNext} testutfnext { + testutfnext \xD0\xA0\xE8 +} 2 +test utf-6.48 {Tcl_UtfNext} testutfnext { + testutfnext \xD0\xA0\xF4 +} 2 +test utf-6.49 {Tcl_UtfNext} testutfnext { + testutfnext \xD0\xA0\xF8 +} 2 +test utf-6.50 {Tcl_UtfNext} testutfnext { + testutfnext \xE8\xA0G +} 1 +test utf-6.51 {Tcl_UtfNext} testutfnext { + testutfnext \xE8\xA0\xA0 +} 3 +test utf-6.52 {Tcl_UtfNext} testutfnext { + testutfnext \xE8\xA0\xD0 +} 1 +test utf-6.53 {Tcl_UtfNext} testutfnext { + testutfnext \xE8\xA0\xE8 +} 1 +test utf-6.54 {Tcl_UtfNext} testutfnext { + testutfnext \xE8\xA0\xF4 +} 1 +test utf-6.55 {Tcl_UtfNext} testutfnext { + testutfnext \xE8\xA0\xF8 +} 1 +test utf-6.56 {Tcl_UtfNext} testutfnext { + testutfnext \xF4\xA0G +} 1 +test utf-6.57 {Tcl_UtfNext} testutfnext { + testutfnext \xF4\xA0\xA0 +} 1 +test utf-6.58 {Tcl_UtfNext} testutfnext { + testutfnext \xF4\xA0\xD0 +} 1 +test utf-6.59 {Tcl_UtfNext} testutfnext { + testutfnext \xF4\xA0\xE8 +} 1 +test utf-6.60 {Tcl_UtfNext} testutfnext { + testutfnext \xF4\xA0\xF4 +} 1 +test utf-6.61 {Tcl_UtfNext} testutfnext { + testutfnext \xF4\xA0\xF8 +} 1 +test utf-6.62 {Tcl_UtfNext} testutfnext { + testutfnext \xE8\xA0\xA0G +} 3 +test utf-6.63 {Tcl_UtfNext} testutfnext { + testutfnext \xE8\xA0\xA0\xA0 +} 3 +test utf-6.64 {Tcl_UtfNext} testutfnext { + testutfnext \xE8\xA0\xA0\xD0 +} 3 +test utf-6.65 {Tcl_UtfNext} testutfnext { + testutfnext \xE8\xA0\xA0\xE8 +} 3 +test utf-6.66 {Tcl_UtfNext} testutfnext { + testutfnext \xE8\xA0\xA0\xF4 +} 3 +test utf-6.67 {Tcl_UtfNext} testutfnext { + testutfnext \xE8\xA0\xA0\xF8 +} 3 +test utf-6.68 {Tcl_UtfNext} testutfnext { + testutfnext \xF4\xA0\xA0G +} 1 +test utf-6.69 {Tcl_UtfNext} testutfnext { + testutfnext \xF4\xA0\xA0\xA0 +} 1 +test utf-6.70 {Tcl_UtfNext} testutfnext { + testutfnext \xF4\xA0\xA0\xD0 +} 1 +test utf-6.71 {Tcl_UtfNext} testutfnext { + testutfnext \xF4\xA0\xA0\xE8 +} 1 +test utf-6.71 {Tcl_UtfNext} testutfnext { + testutfnext \xF4\xA0\xA0\xF4 +} 1 +test utf-6.73 {Tcl_UtfNext} testutfnext { + testutfnext \xF4\xA0\xA0\xF8 +} 1 +test utf-6.74 {Tcl_UtfNext} testutfnext { + testutfnext \xF4\xA0\xA0\xA0G +} 1 +test utf-6.75 {Tcl_UtfNext} testutfnext { + testutfnext \xF4\xA0\xA0\xA0\xA0 +} 1 +test utf-6.76 {Tcl_UtfNext} testutfnext { + testutfnext \xF4\xA0\xA0\xA0\xD0 +} 1 +test utf-6.77 {Tcl_UtfNext} testutfnext { + testutfnext \xF4\xA0\xA0\xA0\xE8 +} 1 +test utf-6.78 {Tcl_UtfNext} testutfnext { + testutfnext \xF4\xA0\xA0\xA0\xF4 +} 1 +test utf-6.79 {Tcl_UtfNext} testutfnext { + testutfnext \xF4\xA0\xA0\xA0G\xF8 +} 1 + + + testConstraint testutfprev [llength [info commands testutfprev]] -- cgit v0.12 From e2a3a358c95196a7bf142d591fa5ef729b3b0d69 Mon Sep 17 00:00:00 2001 From: dgp Date: Wed, 15 Apr 2020 16:42:32 +0000 Subject: Add test demonstrating that Tcl_UtfNext accepts overlong byte sequences, which is in conflict with what Tcl_UtfToUniChar does. --- tests/utf.test | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/utf.test b/tests/utf.test index b5358cc..a930aae 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -352,9 +352,9 @@ test utf-6.78 {Tcl_UtfNext} testutfnext { test utf-6.79 {Tcl_UtfNext} testutfnext { testutfnext \xF4\xA0\xA0\xA0G\xF8 } 1 - - - +test utf-6.80 {Tcl_UtfNext - overlong sequences} { + testutfnext \xC0\x81 +} 1 testConstraint testutfprev [llength [info commands testutfprev]] -- cgit v0.12 From fe893becb4e69049ea40d04930490b24784faeed Mon Sep 17 00:00:00 2001 From: dgp Date: Wed, 15 Apr 2020 17:05:52 +0000 Subject: New tests checking Tcl_UtfPrev handling of overlong sequences. Bug demonstrated. --- tests/utf.test | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tests/utf.test b/tests/utf.test index 56ca1b9..9982f81 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -275,6 +275,33 @@ test utf-7.22 {Tcl_UtfPrev} testutfprev { test utf-7.23 {Tcl_UtfPrev} testutfprev { testutfprev A\xA0\xA0\xA0\xA0 } 4 +test utf-7.24 {Tcl_UtfPrev -- overlong sequence} testutfprev { + testutfprev A\xC0\x81 +} 2 +test utf-7.25 {Tcl_UtfPrev -- overlong sequence} testutfprev { + testutfprev A\xC0\x81 2 +} 1 +test utf-7.26 {Tcl_UtfPrev -- overlong sequence} testutfprev { + testutfprev A\xE0\x80\x80 +} 3 +test utf-7.27 {Tcl_UtfPrev -- overlong sequence} testutfprev { + testutfprev A\xE0\x80\x80 3 +} 2 +test utf-7.28 {Tcl_UtfPrev -- overlong sequence} testutfprev { + testutfprev A\xE0\x80\x80 2 +} 1 +test utf-7.29 {Tcl_UtfPrev -- overlong sequence} testutfprev { + testutfprev A\xF0\x80\x80\x80 +} 4 +test utf-7.30 {Tcl_UtfPrev -- overlong sequence} testutfprev { + testutfprev A\xF0\x80\x80\x80 4 +} 3 +test utf-7.31 {Tcl_UtfPrev -- overlong sequence} testutfprev { + testutfprev A\xF0\x80\x80\x80 3 +} 2 +test utf-7.32 {Tcl_UtfPrev -- overlong sequence} testutfprev { + testutfprev A\xF0\x80\x80\x80 2 +} 1 test utf-8.1 {Tcl_UniCharAtIndex: index = 0} { string index abcd 0 -- cgit v0.12 From 951e89371632e61ed06ddd97b090f4e3b7c36851 Mon Sep 17 00:00:00 2001 From: sebres Date: Wed, 15 Apr 2020 18:59:00 +0000 Subject: TclTrimRight/TclTrimLeft: removed mixed declarations and code (forbidden in ISO C90, declaration-after-statement); restored mistakenly removed part of check-in [578c25b43a] (move Tcl_UniChar initializations out of the loop), but "ch2" belongs to the inner loop, so move it inside outer interation. --- generic/tclUtil.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/generic/tclUtil.c b/generic/tclUtil.c index d7d6134..2c20831 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -1686,12 +1686,12 @@ TclTrimRight( * rely on (trim[numTrim] == '\0'). */ { const char *pp, *p = bytes + numBytes; + Tcl_UniChar ch1 = 0; /* Empty strings -> nothing to do */ if ((numBytes == 0) || (numTrim == 0)) { return 0; } - Tcl_UniChar ch1 = 0, ch2 = 0; /* * Outer loop: iterate over string to be trimmed. @@ -1700,6 +1700,7 @@ TclTrimRight( do { const char *q = trim; int pInc = 0, bytesLeft = numTrim; + Tcl_UniChar ch2 = 0; pp = Tcl_UtfPrev(p, bytes); do { @@ -1765,6 +1766,7 @@ TclTrimLeft( * rely on (trim[numTrim] == '\0'). */ { const char *p = bytes; + Tcl_UniChar ch1 = 0; /* Empty strings -> nothing to do */ if ((numBytes == 0) || (numTrim == 0)) { @@ -1776,7 +1778,7 @@ TclTrimLeft( */ do { - Tcl_UniChar ch1 = 0; + Tcl_UniChar ch2 = 0; int pInc = TclUtfToUniChar(p, &ch1); const char *q = trim; int bytesLeft = numTrim; @@ -1786,7 +1788,6 @@ TclTrimLeft( */ do { - Tcl_UniChar ch2 = 0; int qInc = TclUtfToUniChar(q, &ch2); if (ch1 == ch2) { -- cgit v0.12 From 6422781fdc73ec84db06ad3611a4a1676b4cb2eb Mon Sep 17 00:00:00 2001 From: sebres Date: Wed, 15 Apr 2020 19:06:38 +0000 Subject: fixes bug [8af92dfb66]: resolve too earlier stop of inflate's cycle (if generating decompressed data on flush case of full buffer) --- generic/tclZlib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/tclZlib.c b/generic/tclZlib.c index aed38c3..002c6ae 100644 --- a/generic/tclZlib.c +++ b/generic/tclZlib.c @@ -3832,7 +3832,7 @@ ResultGenerate( if (((flush == Z_SYNC_FLUSH) && (e == Z_BUF_ERROR)) || (e == Z_STREAM_END) - || (e == Z_OK && cd->inStream.avail_out == 0)) { + || (e == Z_OK && written == 0)) { return TCL_OK; } -- cgit v0.12 From f480c2471b8d644bb8cf256efcd0f99507fb4d10 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 15 Apr 2020 19:33:20 +0000 Subject: New test command "testutfnext", not used yet in actual test-cases. Being merged up to higher branches. (Thanks, Don!) --- generic/tclTest.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++----- generic/tclUtf.c | 4 ++-- 2 files changed, 56 insertions(+), 7 deletions(-) diff --git a/generic/tclTest.c b/generic/tclTest.c index 31d3a7f..ba25873 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -433,6 +433,7 @@ static int SimpleMatchInDirectory( Tcl_Interp *interp, Tcl_Obj *resultPtr, Tcl_Obj *dirPtr, const char *pattern, Tcl_GlobTypeData *types); +static Tcl_ObjCmdProc TestUtfNextCmd; static Tcl_ObjCmdProc TestUtfPrevCmd; static int TestNumUtfCharsCmd(ClientData clientData, Tcl_Interp *interp, int objc, @@ -697,8 +698,10 @@ Tcltest_Init( (ClientData) 0, NULL); Tcl_CreateObjCommand(interp, "testsetobjerrorcode", TestsetobjerrorcodeCmd, NULL, NULL); + Tcl_CreateObjCommand(interp, "testutfnext", + TestUtfNextCmd, NULL, NULL); Tcl_CreateObjCommand(interp, "testutfprev", - TestUtfPrevCmd, (ClientData) 0, NULL); + TestUtfPrevCmd, NULL, NULL); Tcl_CreateObjCommand(interp, "testnumutfchars", TestNumUtfCharsCmd, NULL, NULL); Tcl_CreateObjCommand(interp, "testfindfirst", @@ -7107,6 +7110,52 @@ SimpleListVolumes(void) } /* + * Used to check operations of Tcl_UtfNext. + * + * Usage: testutfnext $bytes $offset + */ + +static int +TestUtfNextCmd( + ClientData clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *const objv[]) +{ + int numBytes, offset = 0; + char *bytes; + const char *result; + Tcl_Obj *copy; + + if (objc < 2 || objc > 3) { + Tcl_WrongNumArgs(interp, 1, objv, "bytes ?offset?"); + return TCL_ERROR; + } + + bytes = (char *) Tcl_GetByteArrayFromObj(objv[1], &numBytes); + + if (objc == 3) { + if (TCL_OK != TclGetIntForIndex(interp, objv[2], numBytes, &offset)) { + return TCL_ERROR; + } + if (offset < 0) { + offset = 0; + } + if (offset > numBytes) { + offset = numBytes; + } + } + copy = Tcl_DuplicateObj(objv[1]); + bytes = (char *) Tcl_SetByteArrayLength(copy, numBytes+1); + bytes[numBytes] = '\0'; + + result = Tcl_UtfNext(bytes + offset); + Tcl_SetObjResult(interp, Tcl_NewIntObj(result - bytes)); + + Tcl_DecrRefCount(copy); + return TCL_OK; +} +/* * Used to check operations of Tcl_UtfPrev. * * Usage: testutfprev $bytes $offset @@ -7123,16 +7172,16 @@ TestUtfPrevCmd( char *bytes; const char *result; Tcl_Obj *copy; - + if (objc < 2 || objc > 3) { Tcl_WrongNumArgs(interp, 1, objv, "bytes ?offset?"); return TCL_ERROR; } bytes = (char *) Tcl_GetByteArrayFromObj(objv[1], &numBytes); - + if (objc == 3) { - if (TCL_OK != Tcl_GetIntFromObj(interp, objv[2], &offset)) { + if (TCL_OK != TclGetIntForIndex(interp, objv[2], numBytes, &offset)) { return TCL_ERROR; } if (offset < 0) { @@ -7149,9 +7198,9 @@ TestUtfPrevCmd( bytes[numBytes] = '\0'; result = Tcl_UtfPrev(bytes + offset, bytes); + Tcl_SetObjResult(interp, Tcl_NewIntObj(result - bytes)); Tcl_DecrRefCount(copy); - Tcl_SetObjResult(interp, Tcl_NewIntObj(result - bytes)); return TCL_OK; } diff --git a/generic/tclUtf.c b/generic/tclUtf.c index fbdba4c..eb9c057 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -666,7 +666,7 @@ Tcl_UtfNext( * starts a character when characters are read starting at start and * that character might include the byte src[-1]. The routine will * examine only those bytes in the range that might be returned. - * It will not examine the byte *src, and because of that cannot + * It will not examine the byte *src, and because of that cannot * determine for certain in all circumstances whether the character * that begins with the returned pointer will or will not include * the byte src[-1]. In the scenario, where src points to the end of @@ -681,7 +681,7 @@ Tcl_UtfNext( * prevented from running past the beginning of the string. * * In a string where all characters are complete and properly formed, - * and the value of src points to the first byte of a character, + * and the value of src points to the first byte of a character, * repeated Tcl_UtfPrev calls will step to the starting bytes of * characters, one character at a time. Within those limitations, * Tcl_UtfPrev and Tcl_UtfNext are inverses. If either condition cannot -- cgit v0.12 From 8393f3aaad44f60614833b054b2aa554c20f6473 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 15 Apr 2020 20:12:18 +0000 Subject: Use TclGetBytesFromObj() in testcases in stead of Tcl_GetByteArrayFromObj(), since we only want to handle proper bytearrays. --- generic/tclTest.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/generic/tclTest.c b/generic/tclTest.c index c872bd0..53ec544 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -6836,7 +6836,10 @@ TestUtfNextCmd( return TCL_ERROR; } - bytes = (char *) Tcl_GetByteArrayFromObj(objv[1], &numBytes); + bytes = (char *) TclGetBytesFromObj(interp, objv[1], &numBytes); + if (bytes == NULL) { + return TCL_ERROR; + } if (objc == 3) { if (TCL_OK != Tcl_GetIntForIndex(interp, objv[2], numBytes, &offset)) { @@ -6882,7 +6885,10 @@ TestUtfPrevCmd( return TCL_ERROR; } - bytes = (char *) Tcl_GetByteArrayFromObj(objv[1], &numBytes); + bytes = (char *) TclGetBytesFromObj(interp, objv[1], &numBytes); + if (bytes == NULL) { + return TCL_ERROR; + } if (objc == 3) { if (TCL_OK != Tcl_GetIntForIndex(interp, objv[2], numBytes, &offset)) { -- cgit v0.12 From e1e00509b78441a068cf98453c40e9af82ab8104 Mon Sep 17 00:00:00 2001 From: dgp Date: Wed, 15 Apr 2020 21:48:57 +0000 Subject: Rework Tcl_UtfPrev so it properly handles overlong sequences. --- generic/tclUtf.c | 132 +++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 113 insertions(+), 19 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index fbdba4c..a50a7fc 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -703,31 +703,125 @@ Tcl_UtfPrev( CONST char *src, /* A location in a UTF-8 string. */ CONST char *start) /* Pointer to the beginning of the string */ { - CONST char *look; - int i, byte; - - src--; - look = src; - for (i = 0; i < TCL_UTF_MAX; i++) { - if (look < start) { - if (src < start) { - src = start; - } - break; - } - byte = *((unsigned char *) look); + int trailBytesSeen = 0; /* How many trail bytes have been verified? */ + CONST char *fallback = src - 1; + /* If we cannot find a lead byte that might + * start a prefix of a valid UTF byte sequence, + * we will fallback to a one-byte back step */ + unsigned char *look = (unsigned char *)fallback; + /* Start search at the fallback position */ + + /* Quick boundary case exit. */ + if (fallback <= start) { + return start; + } + + do { + unsigned char byte = look[0]; + if (byte < 0x80) { - break; + /* + * Single byte character. Either this is a correct previous + * character, or it is followed by at least one trail byte + * which indicates a malformed sequence. In either case the + * correct result is to return the fallback. + */ + return fallback; } if (byte >= 0xC0) { - if (totalBytes[byte] <= i) { - break; + /* Non-trail byte; May be multibyte lead. */ + + if ((trailBytesSeen == 0) + /* + * We've seen no trailing context to use to check + * anything. From what we know, this non-trail byte + * is a prefix of a previous character, and accepting + * it (the fallback) is correct. + */ + + || (trailBytesSeen >= totalBytes[byte])) { + /* + * That is, (1 + trailBytesSeen > needed). + * We've examined more bytes than needed to complete + * this lead byte. No matter about well-formedness or + * validity, the sequence starting with this lead byte + * will never include the fallback location, so we must + * return the fallback location. + * + * EXAMPLE: bytes = "ab\C0\x80\x81def"; + * Tcl_UtfPrev(bytes+5, bytes); + * + * When we get here, look == bytes+2, trailBytesSeen == 2, + * needed = 2, and we need to return bytes+4 that points to + * the malformed \x81. + */ + return fallback; } - return look; + + /* + * trailBytesSeen > 0, so we can examine look[1] safely. + * Use that capability to screen out overlong sequences. + */ + + switch (byte) { + case 0xC0: + if (look[1] == 0x80) { + /* Valid sequence: \xC0\x80 for \u0000 */ + return (CONST char *)look; + } + /* Reject overlong: \xC0\x81 - \xC0\xBF */ + return fallback; + case 0xC1: + /* Reject overlong: \xC1\x80 - \xC1\xBF */ + return fallback; + case 0xE0: + if (look[1] < 0xA0) { + /* Reject overlong: \xE0\x80\x80 - \xE0\x9F\xBF */ + return fallback; + } + /* Valid sequence: \xE0\xA0\x80 for \u0800 , etc. */ + return (CONST char *)look; +#if TCL_UTF_MAX > 3 + case 0xF0: + if (look[1] < 0x90) { + /* Reject overlong: \xF0\x80\x80\x80 - \xF0\x8F\xBF\xBF */ + return fallback; + } + /* Valid sequence: \xF0\x90\x80\x80 for \U10000 , etc. */ + return (CONST char *)look; +#endif + default: + /* All other lead bytes lead only valid sequences */ + return (CONST char *)look; + } + } + + /* We saw a trail byte. */ + trailBytesSeen++; + + if ((CONST char *)look == start) { + /* + * Do not read before the start of the string + * + * If we get here, we've examined bytes at every location + * >= start and < src and all of them are trail bytes, + * including (*start). We need to return our fallback + * and exit this loop before we run past the start of the string. + */ + return fallback; } + + /* Continue the search backwards... */ look--; - } - return src; + } while (trailBytesSeen < TCL_UTF_MAX); + + /* + * We've seen TCL_UTF_MAX trail bytes, so we know there will not be a + * properly formed byte sequence to find, and we can stop looking, + * accepting the fallback. + */ + + return fallback; } /* -- cgit v0.12 From 6c579332631ddb93103506d954e17eda60d2d747 Mon Sep 17 00:00:00 2001 From: dgp Date: Wed, 15 Apr 2020 22:14:33 +0000 Subject: More test coverage --- tests/utf.test | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/tests/utf.test b/tests/utf.test index 9982f81..62b85a7 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -302,6 +302,48 @@ test utf-7.31 {Tcl_UtfPrev -- overlong sequence} testutfprev { test utf-7.32 {Tcl_UtfPrev -- overlong sequence} testutfprev { testutfprev A\xF0\x80\x80\x80 2 } 1 +test utf-7.33 {Tcl_UtfPrev -- overlong sequence} testutfprev { + testutfprev A\xC0\x80 +} 1 +test utf-7.34 {Tcl_UtfPrev -- overlong sequence} testutfprev { + testutfprev A\xC1\x80 +} 2 +test utf-7.35 {Tcl_UtfPrev -- overlong sequence} testutfprev { + testutfprev A\xC2\x80 +} 1 +test utf-7.36 {Tcl_UtfPrev -- overlong sequence} testutfprev { + testutfprev A\xE0\xA0\x80 +} 1 +test utf-7.37 {Tcl_UtfPrev -- overlong sequence} testutfprev { + testutfprev A\xE0\xA0\x80 3 +} 1 +test utf-7.38 {Tcl_UtfPrev -- overlong sequence} testutfprev { + testutfprev A\xE0\xA0\x80 2 +} 1 +test utf-7.39 {Tcl_UtfPrev -- overlong sequence} testutfprev { + testutfprev A\xF0\x90\x80\x80 +} 4 +test utf-7.40 {Tcl_UtfPrev -- overlong sequence} testutfprev { + testutfprev A\xF0\x90\x80\x80 4 +} 3 +test utf-7.41 {Tcl_UtfPrev -- overlong sequence} testutfprev { + testutfprev A\xF0\x90\x80\x80 3 +} 2 +test utf-7.42 {Tcl_UtfPrev -- overlong sequence} testutfprev { + testutfprev A\xF0\x90\x80\x80 2 +} 1 +test utf-7.43 {Tcl_UtfPrev -- no lead byte at start} testutfprev { + testutfprev \xA0 +} 0 +test utf-7.44 {Tcl_UtfPrev -- no lead byte at start} testutfprev { + testutfprev \xA0\xA0 +} 1 +test utf-7.45 {Tcl_UtfPrev -- no lead byte at start} testutfprev { + testutfprev \xA0\xA0\xA0 +} 2 +test utf-7.46 {Tcl_UtfPrev -- no lead byte at start} testutfprev { + testutfprev \xA0\xA0\xA0\xA0 +} 3 test utf-8.1 {Tcl_UniCharAtIndex: index = 0} { string index abcd 0 -- cgit v0.12 From 1f6e0698a6ecceec504384a0d228e56b2c1aba42 Mon Sep 17 00:00:00 2001 From: dgp Date: Wed, 15 Apr 2020 22:28:18 +0000 Subject: Use test existence to shorten comment. --- generic/tclUtf.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index a50a7fc..6003b75 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -746,14 +746,7 @@ Tcl_UtfPrev( * this lead byte. No matter about well-formedness or * validity, the sequence starting with this lead byte * will never include the fallback location, so we must - * return the fallback location. - * - * EXAMPLE: bytes = "ab\C0\x80\x81def"; - * Tcl_UtfPrev(bytes+5, bytes); - * - * When we get here, look == bytes+2, trailBytesSeen == 2, - * needed = 2, and we need to return bytes+4 that points to - * the malformed \x81. + * return the fallback location. See test utf-7.17 */ return fallback; } -- cgit v0.12 From d491ca6385bb8c8e630dea43cc287b5a881233ce Mon Sep 17 00:00:00 2001 From: dgp Date: Wed, 15 Apr 2020 22:39:53 +0000 Subject: Refactor the Overlong test into a utility routine. --- generic/tclUtf.c | 85 +++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 56 insertions(+), 29 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 6003b75..f3b2097 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -81,6 +81,7 @@ static CONST unsigned char totalBytes[256] = { */ static int UtfCount(int ch); +static int Overlong(unsigned char *src); /* *--------------------------------------------------------------------------- @@ -115,7 +116,59 @@ UtfCount( #endif return 3; } + +/* + *--------------------------------------------------------------------------- + * + * Overlong -- + * + * Utility routine to report whether /src/ points to the start of an + * overlong byte sequence that should be rejected. + * + * Results: + * A boolean. + *--------------------------------------------------------------------------- + */ +INLINE static int +Overlong( + unsigned char *src) /* Points to lead byte of a UTF-8 byte + * sequence. Caller guarantees it is safe + * to read src[0] and src[1]. */ +{ + switch (*src) { + case 0xC0: + if (src[1] == 0x80) { + /* Valid sequence: \xC0\x80 for \u0000 */ + return 0; + } + /* Reject overlong: \xC0\x81 - \xC0\xBF */ + return 1; + case 0xC1: + /* Reject overlong: \xC1\x80 - \xC1\xBF */ + return 1; + case 0xE0: + if (src[1] < 0xA0) { + /* Reject overlong: \xE0\x80\x80 - \xE0\x9F\xBF */ + return 1; + } + /* Valid sequence: \xE0\xA0\x80 for \u0800 , etc. */ + return 0; +#if TCL_UTF_MAX > 3 + case 0xF0: + if (src[1] < 0x90) { + /* Reject overlong: \xF0\x80\x80\x80 - \xF0\x8F\xBF\xBF */ + return 1; + } + /* Valid sequence: \xF0\x90\x80\x80 for \U10000 , etc. */ + return 0 +#endif + default: + /* All other lead bytes lead only valid sequences */ + return 0; + } +} + /* *--------------------------------------------------------------------------- * @@ -756,37 +809,11 @@ Tcl_UtfPrev( * Use that capability to screen out overlong sequences. */ - switch (byte) { - case 0xC0: - if (look[1] == 0x80) { - /* Valid sequence: \xC0\x80 for \u0000 */ - return (CONST char *)look; - } - /* Reject overlong: \xC0\x81 - \xC0\xBF */ + if (Overlong(look)) { + /* Reject */ return fallback; - case 0xC1: - /* Reject overlong: \xC1\x80 - \xC1\xBF */ - return fallback; - case 0xE0: - if (look[1] < 0xA0) { - /* Reject overlong: \xE0\x80\x80 - \xE0\x9F\xBF */ - return fallback; - } - /* Valid sequence: \xE0\xA0\x80 for \u0800 , etc. */ - return (CONST char *)look; -#if TCL_UTF_MAX > 3 - case 0xF0: - if (look[1] < 0x90) { - /* Reject overlong: \xF0\x80\x80\x80 - \xF0\x8F\xBF\xBF */ - return fallback; - } - /* Valid sequence: \xF0\x90\x80\x80 for \U10000 , etc. */ - return (CONST char *)look; -#endif - default: - /* All other lead bytes lead only valid sequences */ - return (CONST char *)look; } + return (CONST char *)look; } /* We saw a trail byte. */ -- cgit v0.12 From 3e46af6402992817bf01c9e6bc423b4c3a2d5b82 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 16 Apr 2020 10:07:03 +0000 Subject: Add (internal) stub entries for TclRegisterLiteral and TclStaticPackage. Not used in any test-cases yet, but could be used in extensions for testing. Backported from 8.6. New test commands "testbytestring" and "teststringbytes". Also backported from 8.6. Not used in any test-cases yet. --- generic/tclCompile.h | 2 - generic/tclInt.decls | 13 +- generic/tclIntDecls.h | 39 ++- generic/tclLiteral.c | 212 ++++++------ generic/tclStubInit.c | 8 +- generic/tclTest.c | 870 ++++++++++++++++++++++++-------------------------- 6 files changed, 593 insertions(+), 551 deletions(-) diff --git a/generic/tclCompile.h b/generic/tclCompile.h index ee994d7..9ee60c3 100644 --- a/generic/tclCompile.h +++ b/generic/tclCompile.h @@ -938,8 +938,6 @@ MODULE_SCOPE void TclPrintObject(FILE *outFile, Tcl_Obj *objPtr, int maxChars); MODULE_SCOPE void TclPrintSource(FILE *outFile, CONST char *string, int maxChars); -MODULE_SCOPE int TclRegisterLiteral(CompileEnv *envPtr, - char *bytes, int length, int flags); static inline void TclPreserveByteCode( diff --git a/generic/tclInt.decls b/generic/tclInt.decls index 93a436a..892f977 100644 --- a/generic/tclInt.decls +++ b/generic/tclInt.decls @@ -945,7 +945,18 @@ declare 249 { int *decpt, int *signum, char **endPtr) } -declare 259 { +# Allow extensions for optimization +declare 251 { + int TclRegisterLiteral(void *envPtr, + char *bytes, int length, int flags) +} + +declare 257 { + void TclStaticPackage(Tcl_Interp *interp, const char *pkgName, + Tcl_PackageInitProc *initProc, Tcl_PackageInitProc *safeInitProc) +} + +declare 260 { void TclUnusedStubEntry(void) } diff --git a/generic/tclIntDecls.h b/generic/tclIntDecls.h index fe23e77..4d98d00 100644 --- a/generic/tclIntDecls.h +++ b/generic/tclIntDecls.h @@ -1057,17 +1057,30 @@ EXTERN char * TclDoubleDigits(double dv, int ndigits, int flags, int *decpt, int *signum, char **endPtr); #endif /* Slot 250 is reserved */ -/* Slot 251 is reserved */ +#ifndef TclRegisterLiteral_TCL_DECLARED +#define TclRegisterLiteral_TCL_DECLARED +/* 251 */ +EXTERN int TclRegisterLiteral(VOID *envPtr, char *bytes, + int length, int flags); +#endif /* Slot 252 is reserved */ /* Slot 253 is reserved */ /* Slot 254 is reserved */ /* Slot 255 is reserved */ /* Slot 256 is reserved */ -/* Slot 257 is reserved */ +#ifndef TclStaticPackage_TCL_DECLARED +#define TclStaticPackage_TCL_DECLARED +/* 257 */ +EXTERN void TclStaticPackage(Tcl_Interp *interp, + CONST char *pkgName, + Tcl_PackageInitProc *initProc, + Tcl_PackageInitProc *safeInitProc); +#endif /* Slot 258 is reserved */ +/* Slot 259 is reserved */ #ifndef TclUnusedStubEntry_TCL_DECLARED #define TclUnusedStubEntry_TCL_DECLARED -/* 259 */ +/* 260 */ EXTERN void TclUnusedStubEntry(void); #endif @@ -1326,15 +1339,16 @@ typedef struct TclIntStubs { VOID *reserved248; char * (*tclDoubleDigits) (double dv, int ndigits, int flags, int *decpt, int *signum, char **endPtr); /* 249 */ VOID *reserved250; - VOID *reserved251; + int (*tclRegisterLiteral) (VOID *envPtr, char *bytes, int length, int flags); /* 251 */ VOID *reserved252; VOID *reserved253; VOID *reserved254; VOID *reserved255; VOID *reserved256; - VOID *reserved257; + void (*tclStaticPackage) (Tcl_Interp *interp, CONST char *pkgName, Tcl_PackageInitProc *initProc, Tcl_PackageInitProc *safeInitProc); /* 257 */ VOID *reserved258; - void (*tclUnusedStubEntry) (void); /* 259 */ + VOID *reserved259; + void (*tclUnusedStubEntry) (void); /* 260 */ } TclIntStubs; extern TclIntStubs *tclIntStubsPtr; @@ -2065,17 +2079,24 @@ extern TclIntStubs *tclIntStubsPtr; (tclIntStubsPtr->tclDoubleDigits) /* 249 */ #endif /* Slot 250 is reserved */ -/* Slot 251 is reserved */ +#ifndef TclRegisterLiteral +#define TclRegisterLiteral \ + (tclIntStubsPtr->tclRegisterLiteral) /* 251 */ +#endif /* Slot 252 is reserved */ /* Slot 253 is reserved */ /* Slot 254 is reserved */ /* Slot 255 is reserved */ /* Slot 256 is reserved */ -/* Slot 257 is reserved */ +#ifndef TclStaticPackage +#define TclStaticPackage \ + (tclIntStubsPtr->tclStaticPackage) /* 257 */ +#endif /* Slot 258 is reserved */ +/* Slot 259 is reserved */ #ifndef TclUnusedStubEntry #define TclUnusedStubEntry \ - (tclIntStubsPtr->tclUnusedStubEntry) /* 259 */ + (tclIntStubsPtr->tclUnusedStubEntry) /* 260 */ #endif #endif /* defined(USE_TCL_STUBS) && !defined(USE_TCL_STUB_PROCS) */ diff --git a/generic/tclLiteral.c b/generic/tclLiteral.c index b6c45ac..6aa6c5a 100644 --- a/generic/tclLiteral.c +++ b/generic/tclLiteral.c @@ -31,7 +31,7 @@ static int AddLocalLiteralEntry(CompileEnv *envPtr, Tcl_Obj *objPtr, int localHash); static void ExpandLocalLiteralArray(CompileEnv *envPtr); -static unsigned int HashString(const char *bytes, int length); +static unsigned HashString(const char *string, int length); #ifdef TCL_COMPILE_DEBUG static LiteralEntry * LookupLiteralEntry(Tcl_Interp *interp, Tcl_Obj *objPtr); @@ -58,7 +58,7 @@ static void RebuildLiteralTable(LiteralTable *tablePtr); void TclInitLiteralTable( - register LiteralTable *tablePtr) + LiteralTable *tablePtr) /* Pointer to table structure, which is * supplied by the caller. */ { @@ -131,7 +131,7 @@ TclDeleteLiteralTable( objPtr = entryPtr->objPtr; TclDecrRefCount(objPtr); nextPtr = entryPtr->nextPtr; - ckfree((char *) entryPtr); + ckfree((char *)entryPtr); entryPtr = nextPtr; } } @@ -141,7 +141,7 @@ TclDeleteLiteralTable( */ if (tablePtr->buckets != tablePtr->staticBuckets) { - ckfree((char *) tablePtr->buckets); + ckfree((char *)tablePtr->buckets); } } @@ -157,16 +157,16 @@ TclDeleteLiteralTable( * * Results: * The literal object. If it was created in this call *newPtr is set to - * 1, else 0. NULL is returned if newPtr==NULL and no literal is found. + * 1, else 0. NULL is returned if newPtr==NULL and no literal is found. * * Side effects: - * Increments the ref count of the global LiteralEntry since the caller - * now holds a reference. - * If LITERAL_ON_HEAP is set in flags, this function is given ownership - * of the string: if an object is created then its string representation - * is set directly from string, otherwise the string is freed. Typically, - * a caller sets LITERAL_ON_HEAP if "string" is an already heap-allocated - * buffer holding the result of backslash substitutions. + * Increments the ref count of the global LiteralEntry since the caller + * now holds a reference. If LITERAL_ON_HEAP is set in flags, this + * function is given ownership of the string: if an object is created + * then its string representation is set directly from string, otherwise + * the string is freed. Typically, a caller sets LITERAL_ON_HEAP if + * "string" is an already heap-allocated buffer holding the result of + * backslash substitutions. * *---------------------------------------------------------------------- */ @@ -174,15 +174,17 @@ TclDeleteLiteralTable( Tcl_Obj * TclCreateLiteral( Interp *iPtr, - char *bytes, - int length, - unsigned int hash, /* The string's hash. If -1, it will be computed here */ + char *bytes, /* The start of the string. Note that this is + * not a NUL-terminated string. */ + int length, /* Number of bytes in the string. */ + unsigned hash, /* The string's hash. If -1, it will be + * computed here. */ int *newPtr, Namespace *nsPtr, int flags, LiteralEntry **globalPtrPtr) { - LiteralTable *globalTablePtr = &(iPtr->literalTable); + LiteralTable *globalTablePtr = &iPtr->literalTable; LiteralEntry *globalPtr; int globalHash; Tcl_Obj *objPtr; @@ -191,7 +193,7 @@ TclCreateLiteral( * Is it in the interpreter's global literal table? */ - if (hash == (unsigned int) -1) { + if (hash == (unsigned) -1) { hash = HashString(bytes, length); } globalHash = (hash & globalTablePtr->mask); @@ -231,7 +233,7 @@ TclCreateLiteral( } } if (!newPtr) { - if (flags & LITERAL_ON_HEAP) { + if ((flags & LITERAL_ON_HEAP)) { ckfree(bytes); } return NULL; @@ -244,7 +246,7 @@ TclCreateLiteral( TclNewObj(objPtr); Tcl_IncrRefCount(objPtr); - if (flags & LITERAL_ON_HEAP) { + if ((flags & LITERAL_ON_HEAP)) { objPtr->bytes = bytes; objPtr->length = length; } else { @@ -253,12 +255,12 @@ TclCreateLiteral( #ifdef TCL_COMPILE_DEBUG if (LookupLiteralEntry((Tcl_Interp *) iPtr, objPtr) != NULL) { - Tcl_Panic("TclRegisterLiteral: literal \"%.*s\" found globally but shouldn't be", - (length>60? 60 : length), bytes); + Tcl_Panic("%s: literal \"%.*s\" found globally but shouldn't be", + "TclRegisterLiteral", (length>60? 60 : length), bytes); } #endif - globalPtr = (LiteralEntry *) ckalloc((unsigned) sizeof(LiteralEntry)); + globalPtr = (LiteralEntry *)ckalloc(sizeof(LiteralEntry)); globalPtr->objPtr = objPtr; globalPtr->refCount = 1; globalPtr->nsPtr = nsPtr; @@ -291,8 +293,8 @@ TclCreateLiteral( } } if (!found) { - Tcl_Panic("TclRegisterLiteral: literal \"%.*s\" wasn't global", - (length>60? 60 : length), bytes); + Tcl_Panic("%s: literal \"%.*s\" wasn't global", + "TclRegisterLiteral", (length>60? 60 : length), bytes); } } #endif /*TCL_COMPILE_DEBUG*/ @@ -340,9 +342,9 @@ TclCreateLiteral( int TclRegisterLiteral( - CompileEnv *envPtr, /* Points to the CompileEnv in whose object + void *ePtr, /* Points to the CompileEnv in whose object * array an object is found or created. */ - register char *bytes, /* Points to string for which to find or + char *bytes, /* Points to string for which to find or * create an object in CompileEnv's object * array. */ int length, /* Number of bytes in the string. If < 0, the @@ -351,14 +353,15 @@ TclRegisterLiteral( int flags) /* If LITERAL_ON_HEAP then the caller already * malloc'd bytes and ownership is passed to * this function. If LITERAL_NS_SCOPE then - * the literal shouldnot be shared accross + * the literal should not be shared accross * namespaces. */ { + CompileEnv *envPtr = ePtr; Interp *iPtr = envPtr->iPtr; - LiteralTable *localTablePtr = &(envPtr->localLitTable); + LiteralTable *localTablePtr = &envPtr->localLitTable; LiteralEntry *globalPtr, *localPtr; Tcl_Obj *objPtr; - unsigned int hash; + unsigned hash; int localHash, objIndex, new; Namespace *nsPtr; @@ -379,7 +382,7 @@ TclRegisterLiteral( if ((objPtr->length == length) && ((length == 0) || ((objPtr->bytes[0] == bytes[0]) && (memcmp(objPtr->bytes, bytes, (unsigned) length) == 0)))) { - if (flags & LITERAL_ON_HEAP) { + if ((flags & LITERAL_ON_HEAP)) { ckfree(bytes); } objIndex = (localPtr - envPtr->literalArrayPtr); @@ -444,14 +447,14 @@ static LiteralEntry * LookupLiteralEntry( Tcl_Interp *interp, /* Interpreter for which objPtr was created to * hold a literal. */ - register Tcl_Obj *objPtr) /* Points to a Tcl object holding a literal + Tcl_Obj *objPtr) /* Points to a Tcl object holding a literal * that was previously created by a call to * TclRegisterLiteral. */ { Interp *iPtr = (Interp *) interp; - LiteralTable *globalTablePtr = &(iPtr->literalTable); - register LiteralEntry *entryPtr; - char *bytes; + LiteralTable *globalTablePtr = &iPtr->literalTable; + LiteralEntry *entryPtr; + const char *bytes; int length, globalHash; bytes = TclGetStringFromObj(objPtr, &length); @@ -490,18 +493,18 @@ void TclHideLiteral( Tcl_Interp *interp, /* Interpreter for which objPtr was created to * hold a literal. */ - register CompileEnv *envPtr,/* Points to CompileEnv whose literal array + CompileEnv *envPtr,/* Points to CompileEnv whose literal array * contains the entry being hidden. */ int index) /* The index of the entry in the literal * array. */ { LiteralEntry **nextPtrPtr, *entryPtr, *lPtr; - LiteralTable *localTablePtr = &(envPtr->localLitTable); + LiteralTable *localTablePtr = &envPtr->localLitTable; int localHash, length; - char *bytes; + const char *bytes; Tcl_Obj *newObjPtr; - lPtr = &(envPtr->literalArrayPtr[index]); + lPtr = &envPtr->literalArrayPtr[index]; /* * To avoid unwanted sharing we need to copy the object and remove it from @@ -553,14 +556,14 @@ TclHideLiteral( int TclAddLiteralObj( - register CompileEnv *envPtr,/* Points to CompileEnv in whose literal array + CompileEnv *envPtr,/* Points to CompileEnv in whose literal array * the object is to be inserted. */ Tcl_Obj *objPtr, /* The object to insert into the array. */ LiteralEntry **litPtrPtr) /* The location where the pointer to the new * literal entry should be stored. May be * NULL. */ { - register LiteralEntry *lPtr; + LiteralEntry *lPtr; int objIndex; if (envPtr->literalArrayNext >= envPtr->literalArrayEnd) { @@ -569,7 +572,7 @@ TclAddLiteralObj( objIndex = envPtr->literalArrayNext; envPtr->literalArrayNext++; - lPtr = &(envPtr->literalArrayPtr[objIndex]); + lPtr = &envPtr->literalArrayPtr[objIndex]; lPtr->objPtr = objPtr; Tcl_IncrRefCount(objPtr); lPtr->refCount = -1; /* i.e., unused */ @@ -595,19 +598,19 @@ TclAddLiteralObj( * * Side effects: * Expands the literal array if necessary. May rebuild the hash bucket - * array of the CompileEnv's literal array if it becomes too large. + * array of the CompileEnv's literal array if it becomes too large. * *---------------------------------------------------------------------- */ static int AddLocalLiteralEntry( - register CompileEnv *envPtr,/* Points to CompileEnv in whose literal array + CompileEnv *envPtr,/* Points to CompileEnv in whose literal array * the object is to be inserted. */ - Tcl_Obj *objPtr, /* The literal to add to the CompileEnv. */ + Tcl_Obj *objPtr, /* The literal to add to the CompileEnv. */ int localHash) /* Hash value for the literal's string. */ { - register LiteralTable *localTablePtr = &(envPtr->localLitTable); + LiteralTable *localTablePtr = &envPtr->localLitTable; LiteralEntry *localPtr; int objIndex; @@ -648,8 +651,8 @@ AddLocalLiteralEntry( if (!found) { bytes = Tcl_GetStringFromObj(objPtr, &length); - Tcl_Panic("AddLocalLiteralEntry: literal \"%.*s\" wasn't found locally", - (length>60? 60 : length), bytes); + Tcl_Panic("%s: literal \"%.*s\" wasn't found locally", + "AddLocalLiteralEntry", (length>60? 60 : length), bytes); } } #endif /*TCL_COMPILE_DEBUG*/ @@ -679,7 +682,7 @@ AddLocalLiteralEntry( static void ExpandLocalLiteralArray( - register CompileEnv *envPtr)/* Points to the CompileEnv whose object array + CompileEnv *envPtr)/* Points to the CompileEnv whose object array * must be enlarged. */ { /* @@ -687,7 +690,7 @@ ExpandLocalLiteralArray( * 0 and (envPtr->literalArrayNext - 1) [inclusive]. */ - LiteralTable *localTablePtr = &(envPtr->localLitTable); + LiteralTable *localTablePtr = &envPtr->localLitTable; int currElems = envPtr->literalArrayNext; size_t currBytes = (currElems * sizeof(LiteralEntry)); LiteralEntry *currArrayPtr = envPtr->literalArrayPtr; @@ -701,14 +704,15 @@ ExpandLocalLiteralArray( } if (envPtr->mallocedLiteralArray) { - newArrayPtr = (LiteralEntry *) ckrealloc( + newArrayPtr = (LiteralEntry *)ckrealloc( (char *)currArrayPtr, newSize); } else { /* * envPtr->literalArrayPtr isn't a ckalloc'd pointer, so we must - * code a ckrealloc equivalent for ourselves + * code a ckrealloc equivalent for ourselves. */ - newArrayPtr = (LiteralEntry *) ckalloc(newSize); + + newArrayPtr = (LiteralEntry *)ckalloc(newSize); memcpy(newArrayPtr, currArrayPtr, currBytes); envPtr->mallocedLiteralArray = 1; } @@ -761,16 +765,21 @@ void TclReleaseLiteral( Tcl_Interp *interp, /* Interpreter for which objPtr was created to * hold a literal. */ - register Tcl_Obj *objPtr) /* Points to a literal object that was + Tcl_Obj *objPtr) /* Points to a literal object that was * previously created by a call to * TclRegisterLiteral. */ { Interp *iPtr = (Interp *) interp; - LiteralTable *globalTablePtr = &(iPtr->literalTable); - register LiteralEntry *entryPtr, *prevPtr; - char *bytes; + LiteralTable *globalTablePtr; + LiteralEntry *entryPtr, *prevPtr; + const char *bytes; int length, index; + if (iPtr == NULL) { + goto done; + } + + globalTablePtr = &iPtr->literalTable; bytes = TclGetStringFromObj(objPtr, &length); index = (HashString(bytes, length) & globalTablePtr->mask); @@ -797,7 +806,7 @@ TclReleaseLiteral( } else { prevPtr->nextPtr = entryPtr->nextPtr; } - ckfree((char *) entryPtr); + ckfree((char *)entryPtr); globalTablePtr->numEntries--; TclDecrRefCount(objPtr); @@ -814,6 +823,7 @@ TclReleaseLiteral( * Remove the reference corresponding to the local literal table entry. */ + done: Tcl_DecrRefCount(objPtr); } @@ -836,11 +846,11 @@ TclReleaseLiteral( static unsigned int HashString( - register const char *bytes, /* String for which to compute hash value. */ + const char *bytes, /* String for which to compute hash value. */ int length) /* Number of bytes in the string. */ { - register unsigned int result; - register int i; + unsigned int result; + int i; /* * I tried a zillion different hash functions and asked many other people @@ -850,12 +860,26 @@ HashString( * following reasons: * * 1. Multiplying by 10 is perfect for keys that are decimal strings, and - * multiplying by 9 is just about as good. + * multiplying by 9 is just about as good. * 2. Times-9 is (shift-left-3) plus (old). This means that each - * character's bits hang around in the low-order bits of the hash value - * for ever, plus they spread fairly rapidly up to the high-order bits - * to fill out the hash value. This seems works well both for decimal - * and non-decimal strings. + * character's bits hang around in the low-order bits of the hash value + * for ever, plus they spread fairly rapidly up to the high-order bits + * to fill out the hash value. This seems works well both for decimal + * and non-decimal strings. + * + * Note that this function is very weak against malicious strings; it's + * very easy to generate multiple keys that have the same hashcode. On the + * other hand, that hardly ever actually occurs and this function *is* + * very cheap, even by comparison with industry-standard hashes like FNV. + * If real strength of hash is required though, use a custom hash based on + * Bob Jenkins's lookup3(), but be aware that it's significantly slower. + * Tcl scripts tend to not have a big issue in this area, and literals + * mostly aren't looked up by name anyway. + * + * See also HashStringKey in tclHash.c. + * See also TclObjHashKey in tclObj.c. + * + * See [tcl-Feature Request #2958832] */ result = 0; @@ -885,14 +909,14 @@ HashString( static void RebuildLiteralTable( - register LiteralTable *tablePtr) + LiteralTable *tablePtr) /* Local or global table to enlarge. */ { LiteralEntry **oldBuckets; - register LiteralEntry **oldChainPtr, **newChainPtr; - register LiteralEntry *entryPtr; + LiteralEntry **oldChainPtr, **newChainPtr; + LiteralEntry *entryPtr; LiteralEntry **bucketPtr; - char *bytes; + const char *bytes; unsigned int oldSize; int count, index, length; @@ -915,8 +939,8 @@ RebuildLiteralTable( } tablePtr->numBuckets *= 4; - tablePtr->buckets = (LiteralEntry **) ckalloc((unsigned) - (tablePtr->numBuckets * sizeof(LiteralEntry *))); + tablePtr->buckets = (LiteralEntry **)ckalloc( + tablePtr->numBuckets * sizeof(LiteralEntry *)); for (count=tablePtr->numBuckets, newChainPtr=tablePtr->buckets; count>0 ; count--, newChainPtr++) { *newChainPtr = NULL; @@ -934,7 +958,7 @@ RebuildLiteralTable( index = (HashString(bytes, length) & tablePtr->mask); *oldChainPtr = entryPtr->nextPtr; - bucketPtr = &(tablePtr->buckets[index]); + bucketPtr = &tablePtr->buckets[index]; entryPtr->nextPtr = *bucketPtr; *bucketPtr = entryPtr; } @@ -945,7 +969,7 @@ RebuildLiteralTable( */ if (oldBuckets != tablePtr->staticBuckets) { - ckfree((char *) oldBuckets); + ckfree((char *)oldBuckets); } } @@ -975,7 +999,7 @@ TclLiteralStats( #define NUM_COUNTERS 10 int count[NUM_COUNTERS], overflow, i, j; double average, tmp; - register LiteralEntry *entryPtr; + LiteralEntry *entryPtr; char *result, *p; /* @@ -1007,7 +1031,7 @@ TclLiteralStats( * Print out the histogram and a few other pieces of information. */ - result = (char *) ckalloc((unsigned) ((NUM_COUNTERS*60) + 300)); + result = (char *)ckalloc(NUM_COUNTERS*60 + 300); sprintf(result, "%d entries in table, %d buckets\n", tablePtr->numEntries, tablePtr->numBuckets); p = result + strlen(result); @@ -1046,10 +1070,10 @@ TclVerifyLocalLiteralTable( CompileEnv *envPtr) /* Points to CompileEnv whose literal table is * to be validated. */ { - register LiteralTable *localTablePtr = &(envPtr->localLitTable); - register LiteralEntry *localPtr; + LiteralTable *localTablePtr = &envPtr->localLitTable; + LiteralEntry *localPtr; char *bytes; - register int i; + int i; int length, count; count = 0; @@ -1059,23 +1083,26 @@ TclVerifyLocalLiteralTable( count++; if (localPtr->refCount != -1) { bytes = Tcl_GetStringFromObj(localPtr->objPtr, &length); - Tcl_Panic("TclVerifyLocalLiteralTable: local literal \"%.*s\" had bad refCount %d", + Tcl_Panic("%s: local literal \"%.*s\" had bad refCount %d", + "TclVerifyLocalLiteralTable", (length>60? 60 : length), bytes, localPtr->refCount); } if (LookupLiteralEntry((Tcl_Interp *) envPtr->iPtr, localPtr->objPtr) == NULL) { bytes = Tcl_GetStringFromObj(localPtr->objPtr, &length); - Tcl_Panic("TclVerifyLocalLiteralTable: local literal \"%.*s\" is not global", - (length>60? 60 : length), bytes); + Tcl_Panic("%s: local literal \"%.*s\" is not global", + "TclVerifyLocalLiteralTable", (length>60? 60 : length), bytes); } if (localPtr->objPtr->bytes == NULL) { - Tcl_Panic("TclVerifyLocalLiteralTable: literal has NULL string rep"); + Tcl_Panic("%s: literal has NULL string rep", + "TclVerifyLocalLiteralTable"); } } } if (count != localTablePtr->numEntries) { - Tcl_Panic("TclVerifyLocalLiteralTable: local literal table had %d entries, should be %d", - count, localTablePtr->numEntries); + Tcl_Panic("%s: local literal table had %d entries, should be %d", + "TclVerifyLocalLiteralTable", count, + localTablePtr->numEntries); } } @@ -1100,10 +1127,10 @@ TclVerifyGlobalLiteralTable( Interp *iPtr) /* Points to interpreter whose global literal * table is to be validated. */ { - register LiteralTable *globalTablePtr = &(iPtr->literalTable); - register LiteralEntry *globalPtr; + LiteralTable *globalTablePtr = &iPtr->literalTable; + LiteralEntry *globalPtr; char *bytes; - register int i; + int i; int length, count; count = 0; @@ -1113,17 +1140,20 @@ TclVerifyGlobalLiteralTable( count++; if (globalPtr->refCount < 1) { bytes = Tcl_GetStringFromObj(globalPtr->objPtr, &length); - Tcl_Panic("TclVerifyGlobalLiteralTable: global literal \"%.*s\" had bad refCount %d", + Tcl_Panic("%s: global literal \"%.*s\" had bad refCount %d", + "TclVerifyGlobalLiteralTable", (length>60? 60 : length), bytes, globalPtr->refCount); } if (globalPtr->objPtr->bytes == NULL) { - Tcl_Panic("TclVerifyGlobalLiteralTable: literal has NULL string rep"); + Tcl_Panic("%s: literal has NULL string rep", + "TclVerifyGlobalLiteralTable"); } } } if (count != globalTablePtr->numEntries) { - Tcl_Panic("TclVerifyGlobalLiteralTable: global literal table had %d entries, should be %d", - count, globalTablePtr->numEntries); + Tcl_Panic("%s: global literal table had %d entries, should be %d", + "TclVerifyGlobalLiteralTable", count, + globalTablePtr->numEntries); } } #endif /*TCL_COMPILE_DEBUG*/ diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c index 67cb68e..d6f1da9 100644 --- a/generic/tclStubInit.c +++ b/generic/tclStubInit.c @@ -36,6 +36,7 @@ #undef TclWinGetServByName #undef TclWinGetSockOpt #undef TclWinSetSockOpt +#define TclStaticPackage Tcl_StaticPackage #define TclUnusedStubEntry NULL /* @@ -551,15 +552,16 @@ TclIntStubs tclIntStubs = { NULL, /* 248 */ TclDoubleDigits, /* 249 */ NULL, /* 250 */ - NULL, /* 251 */ + TclRegisterLiteral, /* 251 */ NULL, /* 252 */ NULL, /* 253 */ NULL, /* 254 */ NULL, /* 255 */ NULL, /* 256 */ - NULL, /* 257 */ + TclStaticPackage, /* 257 */ NULL, /* 258 */ - TclUnusedStubEntry, /* 259 */ + NULL, /* 259 */ + TclUnusedStubEntry, /* 260 */ }; TclIntPlatStubs tclIntPlatStubs = { diff --git a/generic/tclTest.c b/generic/tclTest.c index ba25873..6e0fbed 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -61,7 +61,7 @@ typedef struct TestAsyncHandler { /* Next is list of handlers. */ } TestAsyncHandler; -TCL_DECLARE_MUTEX(asyncTestMutex); +TCL_DECLARE_MUTEX(asyncTestMutex) static TestAsyncHandler *firstHandler = NULL; @@ -84,7 +84,7 @@ static Tcl_Trace cmdTrace; * TestdelCmd: */ -typedef struct DelCmd { +typedef struct { Tcl_Interp *interp; /* Interpreter in which command exists. */ char *deleteCmd; /* Script to execute when command is deleted. * Malloc'ed. */ @@ -95,7 +95,7 @@ typedef struct DelCmd { * command. */ -typedef struct TclEncoding { +typedef struct { Tcl_Interp *interp; char *toUtfCmd; char *fromUtfCmd; @@ -151,10 +151,8 @@ static void CleanupTestSetassocdataTests( ClientData clientData, Tcl_Interp *interp); static void CmdDelProc1(ClientData clientData); static void CmdDelProc2(ClientData clientData); -static int CmdProc1(ClientData clientData, - Tcl_Interp *interp, int argc, const char **argv); -static int CmdProc2(ClientData clientData, - Tcl_Interp *interp, int argc, const char **argv); +static Tcl_CmdProc CmdProc1; +static Tcl_CmdProc CmdProc2; static void CmdTraceDeleteProc( ClientData clientData, Tcl_Interp *interp, int level, char *command, Tcl_CmdProc *cmdProc, @@ -164,16 +162,11 @@ static void CmdTraceProc(ClientData clientData, Tcl_Interp *interp, int level, char *command, Tcl_CmdProc *cmdProc, ClientData cmdClientData, int argc, char **argv); -static int CreatedCommandProc( - ClientData clientData, Tcl_Interp *interp, - int argc, const char **argv); -static int CreatedCommandProc2( - ClientData clientData, Tcl_Interp *interp, - int argc, const char **argv); +static Tcl_CmdProc CreatedCommandProc; +static Tcl_CmdProc CreatedCommandProc2; static void DelCallbackProc(ClientData clientData, Tcl_Interp *interp); -static int DelCmdProc(ClientData clientData, - Tcl_Interp *interp, int argc, const char **argv); +static Tcl_CmdProc DelCmdProc; static void DelDeleteProc(ClientData clientData); static void EncodingFreeProc(ClientData clientData); static int EncodingToUtfProc(ClientData clientData, @@ -188,14 +181,10 @@ static int EncodingFromUtfProc(ClientData clientData, int *dstCharsPtr); static void ExitProcEven(ClientData clientData); static void ExitProcOdd(ClientData clientData); -static int GetTimesCmd(ClientData clientData, - Tcl_Interp *interp, int argc, const char **argv); +static Tcl_ObjCmdProc GetTimesObjCmd; static void MainLoop(void); -static int NoopCmd(ClientData clientData, - Tcl_Interp *interp, int argc, const char **argv); -static int NoopObjCmd(ClientData clientData, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); +static Tcl_CmdProc NoopCmd; +static Tcl_ObjCmdProc NoopObjCmd; static int ObjTraceProc(ClientData clientData, Tcl_Interp *interp, int level, const char *command, Tcl_Command commandToken, int objc, @@ -206,13 +195,9 @@ static void SpecialFree(char *blockPtr); static int StaticInitProc(Tcl_Interp *interp); #undef USE_OBSOLETE_FS_HOOKS #ifdef USE_OBSOLETE_FS_HOOKS -static int TestaccessprocCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestopenfilechannelprocCmd( - ClientData dummy, Tcl_Interp *interp, int argc, - const char **argv); -static int TeststatprocCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); +static Tcl_CmdProc TestaccessprocCmd; +static Tcl_CmdProc TestopenfilechannelprocCmd; +static Tcl_CmdProc TeststatprocCmd; static int PretendTclpAccess(const char *path, int mode); static int TestAccessProc1(const char *path, int mode); static int TestAccessProc2(const char *path, int mode); @@ -234,264 +219,160 @@ static int TestStatProc1(const char *path, struct stat *buf); static int TestStatProc2(const char *path, struct stat *buf); static int TestStatProc3(const char *path, struct stat *buf); #endif -static int TestasyncCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestcmdinfoCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestcmdtokenCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestcmdtraceCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestconcatobjCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestcreatecommandCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestdcallCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestdelCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestdelassocdataCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestdoubledigitsObjCmd(ClientData dummy, - Tcl_Interp* interp, - int objc, Tcl_Obj* const objv[]); -static int TestdstringCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestencodingObjCmd(ClientData dummy, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); -static int TestevalexObjCmd(ClientData dummy, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); -static int TestevalobjvObjCmd(ClientData dummy, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); -static int TesteventObjCmd(ClientData unused, - Tcl_Interp *interp, int argc, - Tcl_Obj *const objv[]); +static Tcl_CmdProc TestasyncCmd; +static Tcl_ObjCmdProc TestbytestringObjCmd; +static Tcl_ObjCmdProc TeststringbytesObjCmd; +static Tcl_CmdProc TestcmdinfoCmd; +static Tcl_CmdProc TestcmdtokenCmd; +static Tcl_CmdProc TestcmdtraceCmd; +static Tcl_CmdProc TestconcatobjCmd; +static Tcl_CmdProc TestcreatecommandCmd; +static Tcl_CmdProc TestdcallCmd; +static Tcl_CmdProc TestdelCmd; +static Tcl_CmdProc TestdelassocdataCmd; +static Tcl_ObjCmdProc TestdoubledigitsObjCmd; +static Tcl_CmdProc TestdstringCmd; +static Tcl_ObjCmdProc TestencodingObjCmd; +static Tcl_ObjCmdProc TestevalexObjCmd; +static Tcl_ObjCmdProc TestevalobjvObjCmd; +static Tcl_ObjCmdProc TesteventObjCmd; static int TesteventProc(Tcl_Event *event, int flags); static int TesteventDeleteProc(Tcl_Event *event, ClientData clientData); -static int TestexithandlerCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestexprlongCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestexprlongobjCmd(ClientData dummy, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); -static int TestexprdoubleCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestexprdoubleobjCmd(ClientData dummy, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); -static int TestexprparserObjCmd(ClientData dummy, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); -static int TestexprstringCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestfileCmd(ClientData dummy, - Tcl_Interp *interp, int objc, Tcl_Obj *const objv[]); -static int TestfilelinkCmd(ClientData dummy, - Tcl_Interp *interp, int objc, Tcl_Obj *const objv[]); -static int TestfeventCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestgetassocdataCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestgetintCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestgetplatformCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestgetvarfullnameCmd( - ClientData dummy, Tcl_Interp *interp, - int objc, Tcl_Obj *const objv[]); -static int TestinterpdeleteCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestlinkCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestlocaleCmd(ClientData dummy, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); +static Tcl_CmdProc TestexithandlerCmd; +static Tcl_CmdProc TestexprlongCmd; +static Tcl_ObjCmdProc TestexprlongobjCmd; +static Tcl_CmdProc TestexprdoubleCmd; +static Tcl_ObjCmdProc TestexprdoubleobjCmd; +static Tcl_ObjCmdProc TestexprparserObjCmd; +static Tcl_CmdProc TestexprstringCmd; +static Tcl_ObjCmdProc TestfileCmd; +static Tcl_ObjCmdProc TestfilelinkCmd; +static Tcl_CmdProc TestfeventCmd; +static Tcl_CmdProc TestgetassocdataCmd; +static Tcl_CmdProc TestgetintCmd; +static Tcl_CmdProc TestgetplatformCmd; +static Tcl_ObjCmdProc TestgetvarfullnameCmd; +static Tcl_CmdProc TestinterpdeleteCmd; +static Tcl_CmdProc TestlinkCmd; +static Tcl_ObjCmdProc TestlocaleCmd; static int TestMathFunc(ClientData clientData, Tcl_Interp *interp, Tcl_Value *args, Tcl_Value *resultPtr); static int TestMathFunc2(ClientData clientData, Tcl_Interp *interp, Tcl_Value *args, Tcl_Value *resultPtr); -static int TestmainthreadCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestsetmainloopCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestexitmainloopCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestpanicCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestparserObjCmd(ClientData dummy, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); -static int TestparsevarObjCmd(ClientData dummy, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); -static int TestparsevarnameObjCmd(ClientData dummy, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); -static int TestregexpObjCmd(ClientData dummy, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); -static int TestreturnObjCmd(ClientData dummy, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); -static void TestregexpXflags(char *string, +static Tcl_CmdProc TestmainthreadCmd; +static Tcl_CmdProc TestsetmainloopCmd; +static Tcl_CmdProc TestexitmainloopCmd; +static Tcl_CmdProc TestpanicCmd; +static Tcl_ObjCmdProc TestparserObjCmd; +static Tcl_ObjCmdProc TestparsevarObjCmd; +static Tcl_ObjCmdProc TestparsevarnameObjCmd; +static Tcl_ObjCmdProc TestregexpObjCmd; +static Tcl_ObjCmdProc TestreturnObjCmd; +static void TestregexpXflags(const char *string, int length, int *cflagsPtr, int *eflagsPtr); -static int TestsaveresultCmd(ClientData dummy, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); +static Tcl_ObjCmdProc TestsaveresultCmd; static void TestsaveresultFree(char *blockPtr); -static int TestsetassocdataCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestsetCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int Testset2Cmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestseterrorcodeCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestsetobjerrorcodeCmd( - ClientData dummy, Tcl_Interp *interp, - int objc, Tcl_Obj *const objv[]); -static int TestsetplatformCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TeststaticpkgCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TesttranslatefilenameCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestupvarCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestWrongNumArgsObjCmd( - ClientData clientData, Tcl_Interp *interp, - int objc, Tcl_Obj *const objv[]); -static int TestGetIndexFromObjStructObjCmd( - ClientData clientData, Tcl_Interp *interp, - int objc, Tcl_Obj *const objv[]); -static int TestChannelCmd(ClientData clientData, - Tcl_Interp *interp, int argc, const char **argv); -static int TestChannelEventCmd(ClientData clientData, - Tcl_Interp *interp, int argc, const char **argv); -static int TestFilesystemObjCmd(ClientData dummy, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); -static int TestSimpleFilesystemObjCmd( - ClientData dummy, Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); +static Tcl_CmdProc TestsetassocdataCmd; +static Tcl_CmdProc TestsetCmd; +static Tcl_CmdProc Testset2Cmd; +static Tcl_CmdProc TestseterrorcodeCmd; +static Tcl_ObjCmdProc TestsetobjerrorcodeCmd; +static Tcl_CmdProc TestsetplatformCmd; +static Tcl_CmdProc TeststaticpkgCmd; +static Tcl_CmdProc TesttranslatefilenameCmd; +static Tcl_CmdProc TestupvarCmd; +static Tcl_ObjCmdProc TestWrongNumArgsObjCmd; +static Tcl_ObjCmdProc TestGetIndexFromObjStructObjCmd; +static Tcl_CmdProc TestChannelCmd; +static Tcl_CmdProc TestChannelEventCmd; +static Tcl_ObjCmdProc TestFilesystemObjCmd; +static Tcl_ObjCmdProc TestSimpleFilesystemObjCmd; static void TestReport(const char *cmd, Tcl_Obj *arg1, Tcl_Obj *arg2); static Tcl_Obj * TestReportGetNativePath(Tcl_Obj *pathPtr); -static int TestReportStat(Tcl_Obj *path, Tcl_StatBuf *buf); -static int TestReportAccess(Tcl_Obj *path, int mode); -static Tcl_Channel TestReportOpenFileChannel( - Tcl_Interp *interp, Tcl_Obj *fileName, - int mode, int permissions); -static int TestReportMatchInDirectory(Tcl_Interp *interp, - Tcl_Obj *resultPtr, Tcl_Obj *dirPtr, - const char *pattern, Tcl_GlobTypeData *types); -static int TestReportChdir(Tcl_Obj *dirName); -static int TestReportLstat(Tcl_Obj *path, Tcl_StatBuf *buf); -static int TestReportCopyFile(Tcl_Obj *src, Tcl_Obj *dst); -static int TestReportDeleteFile(Tcl_Obj *path); -static int TestReportRenameFile(Tcl_Obj *src, Tcl_Obj *dst); -static int TestReportCreateDirectory(Tcl_Obj *path); -static int TestReportCopyDirectory(Tcl_Obj *src, - Tcl_Obj *dst, Tcl_Obj **errorPtr); -static int TestReportRemoveDirectory(Tcl_Obj *path, - int recursive, Tcl_Obj **errorPtr); -static int TestReportLoadFile(Tcl_Interp *interp, - Tcl_Obj *fileName, Tcl_LoadHandle *handlePtr, - Tcl_FSUnloadFileProc **unloadProcPtr); -static Tcl_Obj * TestReportLink(Tcl_Obj *path, - Tcl_Obj *to, int linkType); -static const char ** TestReportFileAttrStrings( - Tcl_Obj *fileName, Tcl_Obj **objPtrRef); -static int TestReportFileAttrsGet(Tcl_Interp *interp, - int index, Tcl_Obj *fileName, Tcl_Obj **objPtrRef); -static int TestReportFileAttrsSet(Tcl_Interp *interp, - int index, Tcl_Obj *fileName, Tcl_Obj *objPtr); -static int TestReportUtime(Tcl_Obj *fileName, - struct utimbuf *tval); -static int TestReportNormalizePath(Tcl_Interp *interp, - Tcl_Obj *pathPtr, int nextCheckpoint); -static int TestReportInFilesystem(Tcl_Obj *pathPtr, ClientData *clientDataPtr); -static void TestReportFreeInternalRep(ClientData clientData); -static ClientData TestReportDupInternalRep(ClientData clientData); - -static int SimpleStat(Tcl_Obj *path, Tcl_StatBuf *buf); -static int SimpleAccess(Tcl_Obj *path, int mode); -static Tcl_Channel SimpleOpenFileChannel(Tcl_Interp *interp, - Tcl_Obj *fileName, int mode, int permissions); -static Tcl_Obj * SimpleListVolumes(void); -static int SimplePathInFilesystem( - Tcl_Obj *pathPtr, ClientData *clientDataPtr); +static Tcl_FSStatProc TestReportStat; +static Tcl_FSAccessProc TestReportAccess; +static Tcl_FSOpenFileChannelProc TestReportOpenFileChannel; +static Tcl_FSMatchInDirectoryProc TestReportMatchInDirectory; +static Tcl_FSChdirProc TestReportChdir; +static Tcl_FSLstatProc TestReportLstat; +static Tcl_FSCopyFileProc TestReportCopyFile; +static Tcl_FSDeleteFileProc TestReportDeleteFile; +static Tcl_FSRenameFileProc TestReportRenameFile; +static Tcl_FSCreateDirectoryProc TestReportCreateDirectory; +static Tcl_FSCopyDirectoryProc TestReportCopyDirectory; +static Tcl_FSRemoveDirectoryProc TestReportRemoveDirectory; +static int TestReportLoadFile(Tcl_Interp *interp, Tcl_Obj *pathPtr, + Tcl_LoadHandle *handlePtr, Tcl_FSUnloadFileProc **unloadProcPtr); +static Tcl_FSLinkProc TestReportLink; +static Tcl_FSFileAttrStringsProc TestReportFileAttrStrings; +static Tcl_FSFileAttrsGetProc TestReportFileAttrsGet; +static Tcl_FSFileAttrsSetProc TestReportFileAttrsSet; +static Tcl_FSUtimeProc TestReportUtime; +static Tcl_FSNormalizePathProc TestReportNormalizePath; +static Tcl_FSPathInFilesystemProc TestReportInFilesystem; +static Tcl_FSFreeInternalRepProc TestReportFreeInternalRep; +static Tcl_FSDupInternalRepProc TestReportDupInternalRep; + +static Tcl_FSStatProc SimpleStat; +static Tcl_FSAccessProc SimpleAccess; +static Tcl_FSOpenFileChannelProc SimpleOpenFileChannel; +static Tcl_FSListVolumesProc SimpleListVolumes; +static Tcl_FSPathInFilesystemProc SimplePathInFilesystem; static Tcl_Obj * SimpleRedirect(Tcl_Obj *pathPtr); -static int SimpleMatchInDirectory( - Tcl_Interp *interp, Tcl_Obj *resultPtr, - Tcl_Obj *dirPtr, const char *pattern, - Tcl_GlobTypeData *types); +static Tcl_FSMatchInDirectoryProc SimpleMatchInDirectory; static Tcl_ObjCmdProc TestUtfNextCmd; static Tcl_ObjCmdProc TestUtfPrevCmd; -static int TestNumUtfCharsCmd(ClientData clientData, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); -static int TestFindFirstCmd(ClientData clientData, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); -static int TestFindLastCmd(ClientData clientData, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); -static int TestHashSystemHashCmd(ClientData clientData, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); -#if defined(HAVE_CPUID) || defined(__WIN32__) -static int TestcpuidCmd (ClientData dummy, - Tcl_Interp* interp, int objc, - Tcl_Obj *CONST objv[]); +static Tcl_ObjCmdProc TestNumUtfCharsCmd; +static Tcl_ObjCmdProc TestFindFirstCmd; +static Tcl_ObjCmdProc TestFindLastCmd; +static Tcl_ObjCmdProc TestHashSystemHashCmd; +#if defined(HAVE_CPUID) || defined(_WIN32) +static Tcl_ObjCmdProc TestcpuidCmd; #endif static Tcl_Filesystem testReportingFilesystem = { "reporting", sizeof(Tcl_Filesystem), TCL_FILESYSTEM_VERSION_1, - &TestReportInFilesystem, /* path in */ - &TestReportDupInternalRep, - &TestReportFreeInternalRep, + TestReportInFilesystem, /* path in */ + TestReportDupInternalRep, + TestReportFreeInternalRep, NULL, /* native to norm */ NULL, /* convert to native */ - &TestReportNormalizePath, + TestReportNormalizePath, NULL, /* path type */ NULL, /* separator */ - &TestReportStat, - &TestReportAccess, - &TestReportOpenFileChannel, - &TestReportMatchInDirectory, - &TestReportUtime, - &TestReportLink, + TestReportStat, + TestReportAccess, + TestReportOpenFileChannel, + TestReportMatchInDirectory, + TestReportUtime, + TestReportLink, NULL /* list volumes */, - &TestReportFileAttrStrings, - &TestReportFileAttrsGet, - &TestReportFileAttrsSet, - &TestReportCreateDirectory, - &TestReportRemoveDirectory, - &TestReportDeleteFile, - &TestReportCopyFile, - &TestReportRenameFile, - &TestReportCopyDirectory, - &TestReportLstat, - (Tcl_FSLoadFileProc *) &TestReportLoadFile, + TestReportFileAttrStrings, + TestReportFileAttrsGet, + TestReportFileAttrsSet, + TestReportCreateDirectory, + TestReportRemoveDirectory, + TestReportDeleteFile, + TestReportCopyFile, + TestReportRenameFile, + TestReportCopyDirectory, + TestReportLstat, + (Tcl_FSLoadFileProc *) TestReportLoadFile, NULL /* cwd */, - &TestReportChdir + TestReportChdir }; static Tcl_Filesystem simpleFilesystem = { "simple", sizeof(Tcl_Filesystem), TCL_FILESYSTEM_VERSION_1, - &SimplePathInFilesystem, + SimplePathInFilesystem, NULL, NULL, /* No internal to normalized, since we don't create any @@ -505,14 +386,14 @@ static Tcl_Filesystem simpleFilesystem = { NULL, NULL, NULL, - &SimpleStat, - &SimpleAccess, - &SimpleOpenFileChannel, - &SimpleMatchInDirectory, + SimpleStat, + SimpleAccess, + SimpleOpenFileChannel, + SimpleMatchInDirectory, NULL, /* We choose not to support symbolic links inside our vfs's */ NULL, - &SimpleListVolumes, + SimpleListVolumes, NULL, NULL, NULL, @@ -569,14 +450,19 @@ Tcltest_Init( { Tcl_ValueType t3ArgTypes[2]; - Tcl_Obj *listPtr; - Tcl_Obj **objv; + Tcl_Obj **objv, *objPtr; int objc, index; static const char *const specialOptions[] = { "-appinitprocerror", "-appinitprocdeleteinterp", "-appinitprocclosestderr", "-appinitprocsetrcfile", NULL }; + if (Tcl_InitStubs(interp, "8.5", 0) == NULL) { + return TCL_ERROR; + } + if (Tcl_TomMath_InitStubs(interp, "8.5") == NULL) { + return TCL_ERROR; + } /* TIP #268: Full patchlevel instead of just major.minor */ if (Tcl_PkgProvide(interp, "Tcltest", TCL_PATCH_LEVEL) == TCL_ERROR) { @@ -587,115 +473,117 @@ Tcltest_Init( * Create additional commands and math functions for testing Tcl. */ - Tcl_CreateCommand(interp, "gettimes", GetTimesCmd, (ClientData) 0, NULL); - Tcl_CreateCommand(interp, "noop", NoopCmd, (ClientData) 0, NULL); - Tcl_CreateObjCommand(interp, "noop", NoopObjCmd, (ClientData) 0, NULL); + Tcl_CreateObjCommand(interp, "gettimes", GetTimesObjCmd, NULL, NULL); + Tcl_CreateCommand(interp, "noop", NoopCmd, NULL, NULL); + Tcl_CreateObjCommand(interp, "noop", NoopObjCmd, NULL, NULL); + Tcl_CreateObjCommand(interp, "testbytestring", TestbytestringObjCmd, NULL, NULL); + Tcl_CreateObjCommand(interp, "teststringbytes", TeststringbytesObjCmd, NULL, NULL); Tcl_CreateObjCommand(interp, "testwrongnumargs", TestWrongNumArgsObjCmd, - (ClientData) 0, NULL); + NULL, NULL); Tcl_CreateObjCommand(interp, "testfilesystem", TestFilesystemObjCmd, - (ClientData) 0, NULL); + NULL, NULL); Tcl_CreateObjCommand(interp, "testsimplefilesystem", TestSimpleFilesystemObjCmd, - (ClientData) 0, NULL); + NULL, NULL); Tcl_CreateObjCommand(interp, "testgetindexfromobjstruct", - TestGetIndexFromObjStructObjCmd, (ClientData) 0, NULL); + TestGetIndexFromObjStructObjCmd, NULL, NULL); #ifdef USE_OBSOLETE_FS_HOOKS - Tcl_CreateCommand(interp, "testaccessproc", TestaccessprocCmd, (ClientData) 0, + Tcl_CreateCommand(interp, "testaccessproc", TestaccessprocCmd, NULL, NULL); Tcl_CreateCommand(interp, "testopenfilechannelproc", - TestopenfilechannelprocCmd, (ClientData) 0, NULL); - Tcl_CreateCommand(interp, "teststatproc", TeststatprocCmd, (ClientData) 0, + TestopenfilechannelprocCmd, NULL, NULL); + Tcl_CreateCommand(interp, "teststatproc", TeststatprocCmd, NULL, NULL); #endif - Tcl_CreateCommand(interp, "testasync", TestasyncCmd, (ClientData) 0, NULL); + Tcl_CreateCommand(interp, "testasync", TestasyncCmd, NULL, NULL); Tcl_CreateCommand(interp, "testchannel", TestChannelCmd, - (ClientData) 0, NULL); + NULL, NULL); Tcl_CreateCommand(interp, "testchannelevent", TestChannelEventCmd, - (ClientData) 0, NULL); - Tcl_CreateCommand(interp, "testcmdtoken", TestcmdtokenCmd, (ClientData) 0, + NULL, NULL); + Tcl_CreateCommand(interp, "testcmdtoken", TestcmdtokenCmd, NULL, NULL); - Tcl_CreateCommand(interp, "testcmdinfo", TestcmdinfoCmd, (ClientData) 0, + Tcl_CreateCommand(interp, "testcmdinfo", TestcmdinfoCmd, NULL, NULL); Tcl_CreateCommand(interp, "testcmdtrace", TestcmdtraceCmd, - (ClientData) 0, NULL); + NULL, NULL); Tcl_CreateCommand(interp, "testconcatobj", TestconcatobjCmd, - (ClientData) 0, NULL); + NULL, NULL); Tcl_CreateCommand(interp, "testcreatecommand", TestcreatecommandCmd, - (ClientData) 0, NULL); - Tcl_CreateCommand(interp, "testdcall", TestdcallCmd, (ClientData) 0, NULL); - Tcl_CreateCommand(interp, "testdel", TestdelCmd, (ClientData) 0, NULL); + NULL, NULL); + Tcl_CreateCommand(interp, "testdcall", TestdcallCmd, NULL, NULL); + Tcl_CreateCommand(interp, "testdel", TestdelCmd, NULL, NULL); Tcl_CreateCommand(interp, "testdelassocdata", TestdelassocdataCmd, - (ClientData) 0, NULL); + NULL, NULL); Tcl_CreateObjCommand(interp, "testdoubledigits", TestdoubledigitsObjCmd, NULL, NULL); Tcl_DStringInit(&dstring); - Tcl_CreateCommand(interp, "testdstring", TestdstringCmd, (ClientData) 0, + Tcl_CreateCommand(interp, "testdstring", TestdstringCmd, NULL, NULL); - Tcl_CreateObjCommand(interp, "testencoding", TestencodingObjCmd, (ClientData) 0, + Tcl_CreateObjCommand(interp, "testencoding", TestencodingObjCmd, NULL, NULL); Tcl_CreateObjCommand(interp, "testevalex", TestevalexObjCmd, - (ClientData) 0, NULL); + NULL, NULL); Tcl_CreateObjCommand(interp, "testevalobjv", TestevalobjvObjCmd, - (ClientData) 0, NULL); + NULL, NULL); Tcl_CreateObjCommand(interp, "testevent", TesteventObjCmd, - (ClientData) 0, NULL); + NULL, NULL); Tcl_CreateCommand(interp, "testexithandler", TestexithandlerCmd, - (ClientData) 0, NULL); + NULL, NULL); Tcl_CreateCommand(interp, "testexprlong", TestexprlongCmd, - (ClientData) 0, NULL); + NULL, NULL); Tcl_CreateObjCommand(interp, "testexprlongobj", TestexprlongobjCmd, - (ClientData) 0, NULL); + NULL, NULL); Tcl_CreateCommand(interp, "testexprdouble", TestexprdoubleCmd, - (ClientData) 0, NULL); + NULL, NULL); Tcl_CreateObjCommand(interp, "testexprdoubleobj", TestexprdoubleobjCmd, - (ClientData) 0, NULL); + NULL, NULL); Tcl_CreateObjCommand(interp, "testexprparser", TestexprparserObjCmd, - (ClientData) 0, NULL); + NULL, NULL); Tcl_CreateCommand(interp, "testexprstring", TestexprstringCmd, - (ClientData) 0, NULL); - Tcl_CreateCommand(interp, "testfevent", TestfeventCmd, (ClientData) 0, + NULL, NULL); + Tcl_CreateCommand(interp, "testfevent", TestfeventCmd, NULL, NULL); Tcl_CreateObjCommand(interp, "testfilelink", TestfilelinkCmd, - (ClientData) 0, NULL); + NULL, NULL); Tcl_CreateObjCommand(interp, "testfile", TestfileCmd, - (ClientData) 0, NULL); + NULL, NULL); Tcl_CreateObjCommand(interp, "testhashsystemhash", - TestHashSystemHashCmd, (ClientData) 0, NULL); + TestHashSystemHashCmd, NULL, NULL); Tcl_CreateCommand(interp, "testgetassocdata", TestgetassocdataCmd, - (ClientData) 0, NULL); + NULL, NULL); Tcl_CreateCommand(interp, "testgetint", TestgetintCmd, - (ClientData) 0, NULL); + NULL, NULL); Tcl_CreateCommand(interp, "testgetplatform", TestgetplatformCmd, - (ClientData) 0, NULL); + NULL, NULL); Tcl_CreateObjCommand(interp, "testgetvarfullname", - TestgetvarfullnameCmd, (ClientData) 0, NULL); + TestgetvarfullnameCmd, NULL, NULL); Tcl_CreateCommand(interp, "testinterpdelete", TestinterpdeleteCmd, - (ClientData) 0, NULL); - Tcl_CreateCommand(interp, "testlink", TestlinkCmd, (ClientData) 0, NULL); - Tcl_CreateObjCommand(interp, "testlocale", TestlocaleCmd, (ClientData) 0, + NULL, NULL); + Tcl_CreateCommand(interp, "testlink", TestlinkCmd, NULL, NULL); + Tcl_CreateObjCommand(interp, "testlocale", TestlocaleCmd, NULL, NULL); - Tcl_CreateCommand(interp, "testpanic", TestpanicCmd, (ClientData) 0, NULL); + Tcl_CreateCommand(interp, "testpanic", TestpanicCmd, NULL, NULL); Tcl_CreateObjCommand(interp, "testparser", TestparserObjCmd, - (ClientData) 0, NULL); + NULL, NULL); Tcl_CreateObjCommand(interp, "testparsevar", TestparsevarObjCmd, - (ClientData) 0, NULL); + NULL, NULL); Tcl_CreateObjCommand(interp, "testparsevarname", TestparsevarnameObjCmd, - (ClientData) 0, NULL); + NULL, NULL); Tcl_CreateObjCommand(interp, "testregexp", TestregexpObjCmd, - (ClientData) 0, NULL); + NULL, NULL); Tcl_CreateObjCommand(interp, "testreturn", TestreturnObjCmd, - (ClientData) 0, NULL); + NULL, NULL); Tcl_CreateObjCommand(interp, "testsaveresult", TestsaveresultCmd, - (ClientData) 0, NULL); + NULL, NULL); Tcl_CreateCommand(interp, "testsetassocdata", TestsetassocdataCmd, - (ClientData) 0, NULL); + NULL, NULL); Tcl_CreateCommand(interp, "testsetnoerr", TestsetCmd, - (ClientData) 0, NULL); + NULL, NULL); Tcl_CreateCommand(interp, "testseterr", TestsetCmd, (ClientData) TCL_LEAVE_ERR_MSG, NULL); Tcl_CreateCommand(interp, "testset2", Testset2Cmd, (ClientData) TCL_LEAVE_ERR_MSG, NULL); Tcl_CreateCommand(interp, "testseterrorcode", TestseterrorcodeCmd, - (ClientData) 0, NULL); + NULL, NULL); Tcl_CreateObjCommand(interp, "testsetobjerrorcode", TestsetobjerrorcodeCmd, NULL, NULL); Tcl_CreateObjCommand(interp, "testutfnext", @@ -709,28 +597,28 @@ Tcltest_Init( Tcl_CreateObjCommand(interp, "testfindlast", TestFindLastCmd, NULL, NULL); Tcl_CreateCommand(interp, "testsetplatform", TestsetplatformCmd, - (ClientData) 0, NULL); + NULL, NULL); Tcl_CreateCommand(interp, "teststaticpkg", TeststaticpkgCmd, - (ClientData) 0, NULL); + NULL, NULL); Tcl_CreateCommand(interp, "testtranslatefilename", - TesttranslatefilenameCmd, (ClientData) 0, NULL); - Tcl_CreateCommand(interp, "testupvar", TestupvarCmd, (ClientData) 0, NULL); + TesttranslatefilenameCmd, NULL, NULL); + Tcl_CreateCommand(interp, "testupvar", TestupvarCmd, NULL, NULL); Tcl_CreateMathFunc(interp, "T1", 0, NULL, TestMathFunc, (ClientData) 123); Tcl_CreateMathFunc(interp, "T2", 0, NULL, TestMathFunc, (ClientData) 345); - Tcl_CreateCommand(interp, "testmainthread", TestmainthreadCmd, (ClientData) 0, + Tcl_CreateCommand(interp, "testmainthread", TestmainthreadCmd, NULL, NULL); Tcl_CreateCommand(interp, "testsetmainloop", TestsetmainloopCmd, - (ClientData) NULL, NULL); + NULL, NULL); Tcl_CreateCommand(interp, "testexitmainloop", TestexitmainloopCmd, - (ClientData) NULL, NULL); -#if defined(HAVE_CPUID) || defined(__WIN32__) + NULL, NULL); +#if defined(HAVE_CPUID) || defined(_WIN32) Tcl_CreateObjCommand(interp, "testcpuid", TestcpuidCmd, - (ClientData) 0, NULL); + NULL, NULL); #endif t3ArgTypes[0] = TCL_EITHER; t3ArgTypes[1] = TCL_EITHER; Tcl_CreateMathFunc(interp, "T3", 2, t3ArgTypes, TestMathFunc2, - (ClientData) 0); + NULL); #ifdef TCL_THREADS if (TclThread_Init(interp) != TCL_OK) { @@ -742,9 +630,9 @@ Tcltest_Init( * Check for special options used in ../tests/main.test */ - listPtr = Tcl_GetVar2Ex(interp, "argv", NULL, TCL_GLOBAL_ONLY); - if (listPtr != NULL) { - if (Tcl_ListObjGetElements(interp, listPtr, &objc, &objv) != TCL_OK) { + objPtr = Tcl_GetVar2Ex(interp, "argv", NULL, TCL_GLOBAL_ONLY); + if (objPtr != NULL) { + if (Tcl_ListObjGetElements(interp, objPtr, &objc, &objv) != TCL_OK) { return TCL_ERROR; } if (objc && (Tcl_GetIndexFromObj(NULL, objv[0], specialOptions, NULL, @@ -806,19 +694,19 @@ TestasyncCmd( TestAsyncHandler *asyncPtr, *prevPtr; int id, code; static int nextId = 1; - char buf[TCL_INTEGER_SPACE]; + (void)dummy; if (argc < 2) { wrongNumArgs: - Tcl_SetResult(interp, "wrong # args", TCL_STATIC); + Tcl_AppendResult(interp, "wrong # args", NULL); return TCL_ERROR; } if (strcmp(argv[1], "create") == 0) { if (argc != 3) { goto wrongNumArgs; } - asyncPtr = (TestAsyncHandler *) ckalloc(sizeof(TestAsyncHandler)); - asyncPtr->command = ckalloc(strlen(argv[2]) + 1); + asyncPtr = (TestAsyncHandler *)ckalloc(sizeof(TestAsyncHandler)); + asyncPtr->command = (char *)ckalloc(strlen(argv[2]) + 1); strcpy(asyncPtr->command, argv[2]); Tcl_MutexLock(&asyncTestMutex); asyncPtr->id = nextId; @@ -828,8 +716,7 @@ TestasyncCmd( asyncPtr->nextPtr = firstHandler; firstHandler = asyncPtr; Tcl_MutexUnlock(&asyncTestMutex); - TclFormatInt(buf, asyncPtr->id); - Tcl_SetResult(interp, buf, TCL_VOLATILE); + Tcl_SetObjResult(interp, Tcl_NewIntObj(asyncPtr->id)); } else if (strcmp(argv[1], "delete") == 0) { if (argc == 2) { Tcl_MutexLock(&asyncTestMutex); @@ -838,7 +725,7 @@ TestasyncCmd( firstHandler = asyncPtr->nextPtr; Tcl_AsyncDelete(asyncPtr->handler); ckfree(asyncPtr->command); - ckfree((char *) asyncPtr); + ckfree((char *)asyncPtr); } Tcl_MutexUnlock(&asyncTestMutex); return TCL_OK; @@ -862,7 +749,7 @@ TestasyncCmd( } Tcl_AsyncDelete(asyncPtr->handler); ckfree(asyncPtr->command); - ckfree((char *) asyncPtr); + ckfree((char *)asyncPtr); break; } Tcl_MutexUnlock(&asyncTestMutex); @@ -874,7 +761,7 @@ TestasyncCmd( || (Tcl_GetInt(interp, argv[4], &code) != TCL_OK)) { return TCL_ERROR; } - Tcl_MutexLock(&asyncTestMutex); + Tcl_MutexLock(&asyncTestMutex); for (asyncPtr = firstHandler; asyncPtr != NULL; asyncPtr = asyncPtr->nextPtr) { if (asyncPtr->id == id) { @@ -882,8 +769,8 @@ TestasyncCmd( break; } } - Tcl_MutexUnlock(&asyncTestMutex); - Tcl_SetResult(interp, (char *)argv[3], TCL_VOLATILE); + Tcl_SetObjResult(interp, Tcl_NewStringObj(argv[3], -1)); + Tcl_MutexUnlock(&asyncTestMutex); return code; #ifdef TCL_THREADS } else if (strcmp(argv[1], "marklater") == 0) { @@ -899,9 +786,9 @@ TestasyncCmd( if (asyncPtr->id == id) { Tcl_ThreadId threadID; if (Tcl_CreateThread(&threadID, AsyncThreadProc, - (ClientData) INT2PTR(id), TCL_THREAD_STACK_DEFAULT, + INT2PTR(id), TCL_THREAD_STACK_DEFAULT, TCL_THREAD_NOFLAGS) != TCL_OK) { - Tcl_SetResult(interp, "can't create thread", TCL_STATIC); + Tcl_AppendResult(interp, "can't create thread", NULL); Tcl_MutexUnlock(&asyncTestMutex); return TCL_ERROR; } @@ -938,8 +825,10 @@ AsyncHandlerProc( Tcl_MutexLock(&asyncTestMutex); for (asyncPtr = firstHandler; asyncPtr != NULL; - asyncPtr = asyncPtr->nextPtr) { - if (asyncPtr->id == id) break; + asyncPtr = asyncPtr->nextPtr) { + if (asyncPtr->id == id) { + break; + } } Tcl_MutexUnlock(&asyncTestMutex); @@ -1033,6 +922,7 @@ TestcmdinfoCmd( const char **argv) /* Argument strings. */ { Tcl_CmdInfo info; + (void)dummy; if (argc != 3) { Tcl_AppendResult(interp, "wrong # args: should be \"", argv[0], @@ -1048,7 +938,7 @@ TestcmdinfoCmd( Tcl_DStringResult(interp, &delString); } else if (strcmp(argv[1], "get") == 0) { if (Tcl_GetCommandInfo(interp, argv[2], &info) ==0) { - Tcl_SetResult(interp, "??", TCL_STATIC); + Tcl_AppendResult(interp, "??", NULL); return TCL_OK; } if (info.proc == CmdProc1) { @@ -1079,13 +969,13 @@ TestcmdinfoCmd( info.proc = CmdProc2; info.clientData = (ClientData) "new_command_data"; info.objProc = NULL; - info.objClientData = (ClientData) NULL; + info.objClientData = NULL; info.deleteProc = CmdDelProc2; info.deleteData = (ClientData) "new_delete_data"; if (Tcl_SetCommandInfo(interp, argv[2], &info) == 0) { - Tcl_SetResult(interp, "0", TCL_STATIC); + Tcl_SetObjResult(interp, Tcl_NewIntObj(0)); } else { - Tcl_SetResult(interp, "1", TCL_STATIC); + Tcl_SetObjResult(interp, Tcl_NewIntObj(1)); } } else { Tcl_AppendResult(interp, "bad option \"", argv[1], @@ -1238,7 +1128,7 @@ TestcmdtraceCmd( if (strcmp(argv[1], "tracetest") == 0) { Tcl_DStringInit(&buffer); cmdTrace = Tcl_CreateTrace(interp, 50000, - (Tcl_CmdTraceProc *) CmdTraceProc, (ClientData) &buffer); + (Tcl_CmdTraceProc *) CmdTraceProc, &buffer); result = Tcl_Eval(interp, argv[2]); if (result == TCL_OK) { Tcl_ResetResult(interp); @@ -1255,13 +1145,13 @@ TestcmdtraceCmd( */ cmdTrace = Tcl_CreateTrace(interp, 50000, - (Tcl_CmdTraceProc *) CmdTraceDeleteProc, (ClientData) NULL); + (Tcl_CmdTraceProc *) CmdTraceDeleteProc, NULL); Tcl_Eval(interp, argv[2]); } else if (strcmp(argv[1], "leveltest") == 0) { Interp *iPtr = (Interp *) interp; Tcl_DStringInit(&buffer); cmdTrace = Tcl_CreateTrace(interp, iPtr->numLevels + 4, - (Tcl_CmdTraceProc *) CmdTraceProc, (ClientData) &buffer); + (Tcl_CmdTraceProc *) CmdTraceProc, &buffer); result = Tcl_Eval(interp, argv[2]); if (result == TCL_OK) { Tcl_ResetResult(interp); @@ -1432,12 +1322,12 @@ TestcreatecommandCmd( } if (strcmp(argv[1], "create") == 0) { Tcl_CreateCommand(interp, "test_ns_basic::createdcommand", - CreatedCommandProc, (ClientData) NULL, NULL); + CreatedCommandProc, NULL, NULL); } else if (strcmp(argv[1], "delete") == 0) { Tcl_DeleteCommand(interp, "test_ns_basic::createdcommand"); } else if (strcmp(argv[1], "create2") == 0) { Tcl_CreateCommand(interp, "value:at:", - CreatedCommandProc2, (ClientData) NULL, NULL); + CreatedCommandProc2, NULL, NULL); } else if (strcmp(argv[1], "delete2") == 0) { Tcl_DeleteCommand(interp, "value:at:"); } else { @@ -1594,9 +1484,9 @@ TestdelCmd( return TCL_ERROR; } - dPtr = (DelCmd *) ckalloc(sizeof(DelCmd)); + dPtr = (DelCmd *)ckalloc(sizeof(DelCmd)); dPtr->interp = interp; - dPtr->deleteCmd = (char *) ckalloc((unsigned) (strlen(argv[3]) + 1)); + dPtr->deleteCmd = (char *)ckalloc(strlen(argv[3]) + 1); strcpy(dPtr->deleteCmd, argv[3]); Tcl_CreateCommand(slave, argv[2], DelCmdProc, (ClientData) dPtr, @@ -1615,7 +1505,7 @@ DelCmdProc( Tcl_AppendResult(interp, dPtr->deleteCmd, NULL); ckfree(dPtr->deleteCmd); - ckfree((char *) dPtr); + ckfree((char *)dPtr); return TCL_OK; } @@ -1623,12 +1513,12 @@ static void DelDeleteProc( ClientData clientData) /* String command to evaluate. */ { - DelCmd *dPtr = (DelCmd *) clientData; + DelCmd *dPtr = (DelCmd *)clientData; Tcl_Eval(dPtr->interp, dPtr->deleteCmd); Tcl_ResetResult(dPtr->interp); ckfree(dPtr->deleteCmd); - ckfree((char *) dPtr); + ckfree((char *)dPtr); } /* @@ -1830,13 +1720,13 @@ TestdstringCmd( } else if (strcmp(argv[2], "staticlarge") == 0) { Tcl_SetResult(interp, "first0 first1 first2 first3 first4 first5 first6 first7 first8 first9\nsecond0 second1 second2 second3 second4 second5 second6 second7 second8 second9\nthird0 third1 third2 third3 third4 third5 third6 third7 third8 third9\nfourth0 fourth1 fourth2 fourth3 fourth4 fourth5 fourth6 fourth7 fourth8 fourth9\nfifth0 fifth1 fifth2 fifth3 fifth4 fifth5 fifth6 fifth7 fifth8 fifth9\nsixth0 sixth1 sixth2 sixth3 sixth4 sixth5 sixth6 sixth7 sixth8 sixth9\nseventh0 seventh1 seventh2 seventh3 seventh4 seventh5 seventh6 seventh7 seventh8 seventh9\n", TCL_STATIC); } else if (strcmp(argv[2], "free") == 0) { - Tcl_SetResult(interp, (char *) ckalloc(100), TCL_DYNAMIC); - strcpy(interp->result, "This is a malloc-ed string"); + char *s = ckalloc(100); + strcpy(s, "This is a malloc-ed string"); + Tcl_SetResult(interp, s, TCL_DYNAMIC); } else if (strcmp(argv[2], "special") == 0) { - interp->result = (char *) ckalloc(100); - interp->result += 4; - interp->freeProc = SpecialFree; - strcpy(interp->result, "This is a specially-allocated string"); + char *s = (char*)ckalloc(100) + 16; + strcpy(s, "This is a specially-allocated string"); + Tcl_SetResult(interp, s, SpecialFree); } else { Tcl_AppendResult(interp, "bad gresult option \"", argv[2], "\": must be staticsmall, staticlarge, free, or special", @@ -1845,13 +1735,11 @@ TestdstringCmd( } Tcl_DStringGetResult(interp, &dstring); } else if (strcmp(argv[1], "length") == 0) { - char buf[TCL_INTEGER_SPACE]; if (argc != 2) { goto wrongNumArgs; } - TclFormatInt(buf, Tcl_DStringLength(&dstring)); - Tcl_SetResult(interp, buf, TCL_VOLATILE); + Tcl_SetObjResult(interp, Tcl_NewIntObj(Tcl_DStringLength(&dstring))); } else if (strcmp(argv[1], "result") == 0) { if (argc != 2) { goto wrongNumArgs; @@ -1887,7 +1775,7 @@ TestdstringCmd( static void SpecialFree(blockPtr) char *blockPtr; /* Block to free. */ { - ckfree(blockPtr - 4); + ckfree(blockPtr - 16); } /* @@ -1917,7 +1805,7 @@ TestencodingObjCmd( { Tcl_Encoding encoding; int index, length; - char *string; + const char *string; TclEncoding *encodingPtr; static const char *optionStrings[] = { "create", "delete", NULL @@ -1938,15 +1826,15 @@ TestencodingObjCmd( if (objc != 5) { return TCL_ERROR; } - encodingPtr = (TclEncoding *) ckalloc(sizeof(TclEncoding)); + encodingPtr = (TclEncoding *)ckalloc(sizeof(TclEncoding)); encodingPtr->interp = interp; string = Tcl_GetStringFromObj(objv[3], &length); - encodingPtr->toUtfCmd = (char *) ckalloc((unsigned) (length + 1)); + encodingPtr->toUtfCmd = (char *)ckalloc(length + 1); memcpy(encodingPtr->toUtfCmd, string, (unsigned) length + 1); string = Tcl_GetStringFromObj(objv[4], &length); - encodingPtr->fromUtfCmd = (char *) ckalloc((unsigned) (length + 1)); + encodingPtr->fromUtfCmd = (char *)ckalloc(length + 1); memcpy(encodingPtr->fromUtfCmd, string, (unsigned) (length + 1)); string = Tcl_GetStringFromObj(objv[2], &length); @@ -2041,12 +1929,11 @@ static void EncodingFreeProc( ClientData clientData) /* ClientData associated with type. */ { - TclEncoding *encodingPtr; + TclEncoding *encodingPtr = (TclEncoding *)clientData; - encodingPtr = (TclEncoding *) clientData; - ckfree((char *) encodingPtr->toUtfCmd); - ckfree((char *) encodingPtr->fromUtfCmd); - ckfree((char *) encodingPtr); + ckfree((char *)encodingPtr->toUtfCmd); + ckfree((char *)encodingPtr->fromUtfCmd); + ckfree((char *)encodingPtr); } /* @@ -2074,11 +1961,11 @@ TestevalexObjCmd( Tcl_Obj *const objv[]) /* Argument objects. */ { int length, flags; - char *script; + const char *script; flags = 0; if (objc == 3) { - char *global = Tcl_GetStringFromObj(objv[2], &length); + const char *global = Tcl_GetString(objv[2]); if (strcmp(global, "global") != 0) { Tcl_AppendResult(interp, "bad value \"", global, "\": must be global", NULL); @@ -2184,7 +2071,7 @@ TesteventObjCmd( TestEvent *ev; /* Event to be queued */ if (objc < 2) { - Tcl_WrongNumArgs(interp, 1, objv, "subcommand ?args?"); + Tcl_WrongNumArgs(interp, 1, objv, "subcommand ?arg ...?"); return TCL_ERROR; } if (Tcl_GetIndexFromObj(interp, objv[1], subcommands, "subcommand", @@ -2201,7 +2088,7 @@ TesteventObjCmd( "position specifier", TCL_EXACT, &posIndex) != TCL_OK) { return TCL_ERROR; } - ev = (TestEvent *) ckalloc(sizeof(TestEvent)); + ev = (TestEvent *)ckalloc(sizeof(TestEvent)); ev->header.proc = TesteventProc; ev->header.nextPtr = NULL; ev->interp = interp; @@ -2302,17 +2189,17 @@ TesteventDeleteProc( * to remove */ { TestEvent *ev; /* Event to examine */ - char *evNameStr; + const char *evNameStr; Tcl_Obj *targetName; /* Name of the event(s) to delete */ - char *targetNameStr; + const char *targetNameStr; if (event->proc != TesteventProc) { return 0; } targetName = (Tcl_Obj *) clientData; - targetNameStr = (char *) Tcl_GetStringFromObj(targetName, NULL); + targetNameStr = (char *) Tcl_GetString(targetName); ev = (TestEvent *) event; - evNameStr = Tcl_GetStringFromObj(ev->tag, NULL); + evNameStr = Tcl_GetString(ev->tag); if (strcmp(evNameStr, targetNameStr) == 0) { Tcl_DecrRefCount(ev->tag); Tcl_DecrRefCount(ev->command); @@ -2821,7 +2708,7 @@ TestlinkCmd( static unsigned char ucharVar = 130; static short shortVar = 3000; static unsigned short ushortVar = 60000; - static unsigned int uintVar = 0xbeeffeed; + static unsigned int uintVar = 0xBEEFFEED; static long longVar = 123456789L; static unsigned long ulongVar = 3456789012UL; static float floatVar = 4.5; @@ -3059,7 +2946,7 @@ TestlinkCmd( if (strcmp(argv[5], "-") == 0) { stringVar = NULL; } else { - stringVar = (char *) ckalloc((unsigned) (strlen(argv[5]) + 1)); + stringVar = (char *)ckalloc(strlen(argv[5]) + 1); strcpy(stringVar, argv[5]); } } @@ -3166,7 +3053,7 @@ TestlinkCmd( if (strcmp(argv[5], "-") == 0) { stringVar = NULL; } else { - stringVar = (char *) ckalloc((unsigned) (strlen(argv[5]) + 1)); + stringVar = (char *)ckalloc(strlen(argv[5]) + 1); strcpy(stringVar, argv[5]); } Tcl_UpdateLinkedVar(interp, "string"); @@ -3281,13 +3168,13 @@ TestlocaleCmd( Tcl_Obj *const objv[]) /* The argument objects. */ { int index; - char *locale; + const char *locale; static const char *optionStrings[] = { - "ctype", "numeric", "time", "collate", "monetary", + "ctype", "numeric", "time", "collate", "monetary", "all", NULL }; - static CONST int lcTypes[] = { + static const int lcTypes[] = { LC_CTYPE, LC_NUMERIC, LC_TIME, LC_COLLATE, LC_MONETARY, LC_ALL }; @@ -3478,7 +3365,7 @@ CleanupTestSetassocdataTests( ClientData clientData, /* Data to be released. */ Tcl_Interp *interp) /* Interpreter being deleted. */ { - ckfree((char *) clientData); + ckfree((char *)clientData); } /* @@ -3505,7 +3392,7 @@ TestparserObjCmd( int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* The argument objects. */ { - char *script; + const char *script; int length, dummy; Tcl_Parse parse; @@ -3561,7 +3448,7 @@ TestexprparserObjCmd( int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* The argument objects. */ { - char *script; + const char *script; int length, dummy; Tcl_Parse parse; @@ -3749,7 +3636,7 @@ TestparsevarnameObjCmd( int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* The argument objects. */ { - char *script; + const char *script; int append, length, dummy; Tcl_Parse parse; @@ -3817,7 +3704,7 @@ TestregexpObjCmd( int i, ii, indices, stringLength, match, about; int hasxflags, cflags, eflags; Tcl_RegExp regExpr; - char *string; + const char *string; Tcl_Obj *objPtr; Tcl_RegExpInfo info; static const char *options[] = { @@ -3840,7 +3727,7 @@ TestregexpObjCmd( hasxflags = 0; for (i = 1; i < objc; i++) { - char *name; + const char *name; int index; name = Tcl_GetString(objv[i]); @@ -3885,7 +3772,7 @@ TestregexpObjCmd( endOfForLoop: if (objc - i < hasxflags + 2 - about) { Tcl_WrongNumArgs(interp, 1, objv, - "?switches? exp string ?matchVar? ?subMatchVar subMatchVar ...?"); + "?-switch ...? exp string ?matchVar? ?subMatchVar ...?"); return TCL_ERROR; } objc -= i; @@ -3925,7 +3812,7 @@ TestregexpObjCmd( Tcl_SetIntObj(Tcl_GetObjResult(interp), 0); if (objc > 2 && (cflags®_EXPECT) && indices) { - char *varName; + const char *varName; const char *value; int start, end; char resinfo[TCL_INTEGER_SPACE * 2]; @@ -3940,7 +3827,7 @@ TestregexpObjCmd( return TCL_ERROR; } } else if (cflags & TCL_REG_CANMATCH) { - char *varName; + const char *varName; const char *value; char resinfo[TCL_INTEGER_SPACE * 2]; @@ -4044,7 +3931,7 @@ TestregexpObjCmd( static void TestregexpXflags( - char *string, /* The string of flags. */ + const char *string, /* The string of flags. */ int length, /* The length of the string in bytes. */ int *cflagsPtr, /* compile flags word */ int *eflagsPtr) /* exec flags word */ @@ -4177,7 +4064,7 @@ TestsetassocdataCmd( return TCL_ERROR; } - buf = ckalloc((unsigned) strlen(argv[2]) + 1); + buf = ckalloc(strlen(argv[2]) + 1); strcpy(buf, argv[2]); /* @@ -4419,8 +4306,26 @@ TestseterrorcodeCmd( Tcl_SetResult(interp, "too many args", TCL_STATIC); return TCL_ERROR; } - Tcl_SetErrorCode(interp, argv[1], argv[2], argv[3], argv[4], - argv[5], NULL); + switch (argc) { + case 1: + Tcl_SetErrorCode(interp, "NONE", NULL); + break; + case 2: + Tcl_SetErrorCode(interp, argv[1], NULL); + break; + case 3: + Tcl_SetErrorCode(interp, argv[1], argv[2], NULL); + break; + case 4: + Tcl_SetErrorCode(interp, argv[1], argv[2], argv[3], NULL); + break; + case 5: + Tcl_SetErrorCode(interp, argv[1], argv[2], argv[3], argv[4], NULL); + break; + case 6: + Tcl_SetErrorCode(interp, argv[1], argv[2], argv[3], argv[4], + argv[5], NULL); + } return TCL_ERROR; } @@ -4485,7 +4390,7 @@ TestfeventCmd( if (argc < 2) { Tcl_AppendResult(interp, "wrong # args: should be \"", argv[0], - " option ?arg arg ...?", NULL); + " option ?arg ...?", NULL); return TCL_ERROR; } if (strcmp(argv[1], "cmd") == 0) { @@ -4551,7 +4456,7 @@ TestpanicCmd( int argc, /* Number of arguments. */ const char **argv) /* Argument strings. */ { - const char *argString; + char *argString; /* * Put the arguments into a var args structure @@ -4560,11 +4465,11 @@ TestpanicCmd( argString = Tcl_Merge(argc-1, argv+1); Tcl_Panic("%s", argString); - ckfree((char *)argString); + ckfree(argString); return TCL_OK; } - + static int TestfileCmd( ClientData dummy, /* Not used. */ @@ -4574,7 +4479,7 @@ TestfileCmd( { int force, i, j, result; Tcl_Obj *error = NULL; - char *subcmd; + const char *subcmd; if (argc < 3) { return TCL_ERROR; @@ -4654,7 +4559,7 @@ TestgetvarfullnameCmd( int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* The argument objects. */ { - char *name, *arg; + const char *name, *arg; int flags = 0; Tcl_Namespace *namespacePtr; Tcl_CallFrame *framePtr; @@ -4710,7 +4615,7 @@ TestgetvarfullnameCmd( /* *---------------------------------------------------------------------- * - * GetTimesCmd -- + * GetTimesObjCmd -- * * This procedure implements the "gettimes" command. It is used for * computing the time needed for various basic operations such as reading @@ -4726,11 +4631,11 @@ TestgetvarfullnameCmd( */ static int -GetTimesCmd( +GetTimesObjCmd( ClientData unused, /* Unused. */ Tcl_Interp *interp, /* The current interpreter. */ - int argc, /* The number of arguments. */ - const char **argv) /* The argument strings. */ + int objc, /* Number of arguments. (not used)*/ + Tcl_Obj *const dummy[]) /* The argument objects (not used). */ { Interp *iPtr = (Interp *) interp; int i, n; @@ -4739,13 +4644,15 @@ GetTimesCmd( Tcl_Obj *objPtr, **objv; const char *s; char newString[TCL_INTEGER_SPACE]; + (void)objc; + (void)dummy; /* alloc & free 100000 times */ fprintf(stderr, "alloc & free 100000 6 word items\n"); Tcl_GetTime(&start); for (i = 0; i < 100000; i++) { - objPtr = (Tcl_Obj *) ckalloc(sizeof(Tcl_Obj)); - ckfree((char *) objPtr); + objPtr = (Tcl_Obj *)ckalloc(sizeof(Tcl_Obj)); + ckfree((char *)objPtr); } Tcl_GetTime(&stop); timePer = (stop.sec - start.sec)*1000000 + (stop.usec - start.usec); @@ -4766,7 +4673,7 @@ GetTimesCmd( fprintf(stderr, "free 5000 6 word items\n"); Tcl_GetTime(&start); for (i = 0; i < 5000; i++) { - ckfree((char *) objv[i]); + ckfree((char *)objv[i]); } Tcl_GetTime(&stop); timePer = (stop.sec - start.sec)*1000000 + (stop.usec - start.usec); @@ -4792,7 +4699,7 @@ GetTimesCmd( Tcl_GetTime(&stop); timePer = (stop.sec - start.sec)*1000000 + (stop.usec - start.usec); fprintf(stderr, " %.3f usec per Tcl_DecrRefCount\n", timePer/5000); - ckfree((char *) objv); + ckfree((char *)objv); /* TclGetString 100000 times */ fprintf(stderr, "TclGetStringFromObj of \"12345\" 100000 times\n"); @@ -4944,6 +4851,79 @@ NoopObjCmd( /* *---------------------------------------------------------------------- * + * TeststringbytesObjCmd -- + * Returns bytearray value of the bytes in argument string rep + * + * Results: + * Returns the TCL_OK result code. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +static int +TeststringbytesObjCmd( + ClientData dummy, + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* The argument objects. */ +{ + int n; + const unsigned char *p; + (void)dummy; + + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, "value"); + return TCL_ERROR; + } + p = (const unsigned char *)Tcl_GetStringFromObj(objv[1], &n); + Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(p, n)); + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * + * TestbytestringObjCmd -- + * + * This object-based procedure constructs a string which can + * possibly contain invalid UTF-8 bytes. + * + * Results: + * Returns the TCL_OK result code. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +static int +TestbytestringObjCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* The argument objects. */ +{ + int n = 0; + const char *p; + (void)dummy; + + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, "bytearray"); + return TCL_ERROR; + } + + p = (const char *)Tcl_GetByteArrayFromObj(objv[1], &n); + Tcl_SetObjResult(interp, Tcl_NewStringObj(p, n)); + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * * TestsetCmd -- * * Implements the "testset{err,noerr}" cmds that are used when testing @@ -4958,11 +4938,10 @@ NoopObjCmd( *---------------------------------------------------------------------- */ - /* ARGSUSED */ static int TestsetCmd( ClientData data, /* Additional flags for Get/SetVar2. */ - register Tcl_Interp *interp,/* Current interpreter. */ + Tcl_Interp *interp,/* Current interpreter. */ int argc, /* Number of arguments. */ const char **argv) /* Argument strings. */ { @@ -4994,7 +4973,7 @@ TestsetCmd( static int Testset2Cmd( ClientData data, /* Additional flags for Get/SetVar2. */ - register Tcl_Interp *interp,/* Current interpreter. */ + Tcl_Interp *interp,/* Current interpreter. */ int argc, /* Number of arguments. */ const char **argv) /* Argument strings. */ { @@ -5041,14 +5020,14 @@ Testset2Cmd( *---------------------------------------------------------------------- */ - /* ARGSUSED */ static int TestsaveresultCmd( ClientData dummy, /* Not used. */ - register Tcl_Interp *interp,/* Current interpreter. */ + Tcl_Interp *interp,/* Current interpreter. */ int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* The argument objects. */ { + Interp* iPtr = (Interp*) interp; int discard, result, index; Tcl_SavedResult state; Tcl_Obj *objPtr; @@ -5091,7 +5070,7 @@ TestsaveresultCmd( break; } case RESULT_DYNAMIC: - Tcl_SetResult(interp, "dynamic result", TestsaveresultFree); + Tcl_SetResult(interp, (char *)"dynamic result", TestsaveresultFree); break; case RESULT_OBJECT: objPtr = Tcl_NewStringObj("object result", -1); @@ -5117,7 +5096,7 @@ TestsaveresultCmd( switch ((enum options) index) { case RESULT_DYNAMIC: { - int present = interp->freeProc == TestsaveresultFree; + int present = iPtr->freeProc == TestsaveresultFree; int called = freeCount; Tcl_AppendElement(interp, called ? "called" : "notCalled"); @@ -5394,7 +5373,7 @@ TestmainthreadCmd( * A main loop set by TestsetmainloopCmd below. * * Results: - * None. + * None. * * Side effects: * Event handlers could do anything. @@ -5432,7 +5411,7 @@ MainLoop(void) static int TestsetmainloopCmd( ClientData dummy, /* Not used. */ - register Tcl_Interp *interp,/* Current interpreter. */ + Tcl_Interp *interp,/* Current interpreter. */ int argc, /* Number of arguments. */ const char **argv) /* Argument strings. */ { @@ -5461,7 +5440,7 @@ TestsetmainloopCmd( static int TestexitmainloopCmd( ClientData dummy, /* Not used. */ - register Tcl_Interp *interp,/* Current interpreter. */ + Tcl_Interp *interp,/* Current interpreter. */ int argc, /* Number of arguments. */ const char **argv) /* Argument strings. */ { @@ -5836,7 +5815,7 @@ TestChannelCmd( *nextPtrPtr = curPtr->nextPtr; curPtr->nextPtr = NULL; chan = curPtr->chan; - ckfree((char *) curPtr); + ckfree((char *)curPtr); break; } } @@ -6076,8 +6055,8 @@ TestChannelCmd( return TCL_ERROR; } - TclFormatInt(buf, (long)(size_t)Tcl_GetChannelThread(chan)); - Tcl_AppendResult(interp, buf, NULL); + Tcl_SetObjResult(interp, Tcl_NewWideIntObj( + (Tcl_WideInt) (size_t) Tcl_GetChannelThread(chan))); return TCL_OK; } @@ -6362,7 +6341,7 @@ TestChannelEventCmd( Tcl_DeleteChannelHandler((Tcl_Channel) chanPtr, TclChannelEventScriptInvoker, (ClientData) esPtr); Tcl_DecrRefCount(esPtr->scriptPtr); - ckfree((char *) esPtr); + ckfree((char *)esPtr); return TCL_OK; } @@ -6403,7 +6382,7 @@ TestChannelEventCmd( Tcl_DeleteChannelHandler((Tcl_Channel) chanPtr, TclChannelEventScriptInvoker, (ClientData) esPtr); Tcl_DecrRefCount(esPtr->scriptPtr); - ckfree((char *) esPtr); + ckfree((char *)esPtr); } statePtr->scriptRecordPtr = NULL; return TCL_OK; @@ -6479,7 +6458,7 @@ TestWrongNumArgsObjCmd( Tcl_Obj *const objv[]) /* Argument objects. */ { int i, length; - char *msg; + const char *msg; if (objc < 3) { /* @@ -6534,7 +6513,7 @@ TestGetIndexFromObjStructObjCmd( int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ { - const char *ary[] = { + const char *const ary[] = { "a", "b", "c", "d", "e", "f", NULL, NULL }; int idx,target; @@ -6589,7 +6568,7 @@ TestFilesystemObjCmd( Tcl_Obj *const objv[]) { int res, boolVal; - char *msg; + const char *msg; if (objc != 2) { Tcl_WrongNumArgs(interp, 1, objv, "boolean"); @@ -6605,7 +6584,7 @@ TestFilesystemObjCmd( res = Tcl_FSUnregister(&testReportingFilesystem); msg = (res == TCL_OK) ? "unregistered" : "failed"; } - Tcl_SetResult(interp, msg, TCL_VOLATILE); + Tcl_SetObjResult(interp, Tcl_NewStringObj(msg , -1)); return res; } @@ -6961,7 +6940,7 @@ TestSimpleFilesystemObjCmd( Tcl_Obj *const objv[]) { int res, boolVal; - char *msg; + const char *msg; if (objc != 2) { Tcl_WrongNumArgs(interp, 1, objv, "boolean"); @@ -6977,7 +6956,7 @@ TestSimpleFilesystemObjCmd( res = Tcl_FSUnregister(&simpleFilesystem); msg = (res == TCL_OK) ? "unregistered" : "failed"; } - Tcl_SetResult(interp, msg, TCL_VOLATILE); + Tcl_SetObjResult(interp, Tcl_NewStringObj(msg , -1)); return res; } @@ -7277,7 +7256,7 @@ TestFindLastCmd( return TCL_OK; } -#if defined(HAVE_CPUID) || defined(__WIN32__) +#if defined(HAVE_CPUID) || defined(_WIN32) /* *---------------------------------------------------------------------- * @@ -7339,7 +7318,7 @@ TestcpuidCmd( static int TestHashSystemHashCmd( - ClientData clientData, + ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *const objv[]) @@ -7351,6 +7330,7 @@ TestHashSystemHashCmd( Tcl_HashTable hash; Tcl_HashEntry *hPtr; int i, isNew, limit = 100; + (void)dummy; if (objc>1 && Tcl_GetIntFromObj(interp, objv[1], &limit)!=TCL_OK) { return TCL_ERROR; @@ -7365,14 +7345,14 @@ TestHashSystemHashCmd( } for (i=0 ; ibytes != NULL) { - ckfree((char *) list1Ptr->bytes); + ckfree((char *)list1Ptr->bytes); list1Ptr->bytes = NULL; } list2Ptr = Tcl_NewStringObj("eeny meeny", -1); Tcl_ListObjLength(NULL, list2Ptr, &len); if (list2Ptr->bytes != NULL) { - ckfree((char *) list2Ptr->bytes); + ckfree((char *)list2Ptr->bytes); list2Ptr->bytes = NULL; } -- cgit v0.12 From a0c779ad224a47f5fe74dd94960022dfbd2b7aa4 Mon Sep 17 00:00:00 2001 From: sebres Date: Thu, 16 Apr 2020 12:55:47 +0000 Subject: zlib, *BO*: fixes possible segfault (or buffer overrun), for instance if limit (max 65K) is set larger as allocated buffer for inflate-input (default 4K) --- generic/tclZlib.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/generic/tclZlib.c b/generic/tclZlib.c index 002c6ae..ed29ff9 100644 --- a/generic/tclZlib.c +++ b/generic/tclZlib.c @@ -3027,7 +3027,9 @@ ZlibTransformInput( * reading over the border. */ - readBytes = Tcl_ReadRaw(cd->parent, cd->inBuffer, cd->readAheadLimit); + readBytes = Tcl_ReadRaw(cd->parent, cd->inBuffer, + cd->readAheadLimit <= cd->inAllocated ? + cd->readAheadLimit : cd->inAllocated); /* * Three cases here: -- cgit v0.12 From c6eae962e7f9f3c91353bcc12415a110885d12f5 Mon Sep 17 00:00:00 2001 From: dgp Date: Thu, 16 Apr 2020 16:22:34 +0000 Subject: When we reject overlong sequences, \xC1 is as invalid a lead byte as \xFF. --- generic/tclUtf.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 5ba8ff5..556a5a2 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -66,7 +66,7 @@ static CONST unsigned char totalBytes[256] = { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, #if TCL_UTF_MAX > 3 4,4,4,4,4, @@ -123,7 +123,9 @@ UtfCount( * Overlong -- * * Utility routine to report whether /src/ points to the start of an - * overlong byte sequence that should be rejected. + * overlong byte sequence that should be rejected. Caller guarantees + * /src/ points to a byte that can lead a multi-byte sequence, and + * that src[0] and src[1] are readable. * * Results: * A boolean. @@ -132,9 +134,7 @@ UtfCount( INLINE static int Overlong( - unsigned char *src) /* Points to lead byte of a UTF-8 byte - * sequence. Caller guarantees it is safe - * to read src[0] and src[1]. */ + unsigned char *src) /* Points to lead byte of a UTF-8 byte sequence */ { switch (*src) { case 0xC0: @@ -144,9 +144,6 @@ Overlong( } /* Reject overlong: \xC0\x81 - \xC0\xBF */ return 1; - case 0xC1: - /* Reject overlong: \xC1\x80 - \xC1\xBF */ - return 1; case 0xE0: if (src[1] < 0xA0) { /* Reject overlong: \xE0\x80\x80 - \xE0\x9F\xBF */ -- cgit v0.12 From 0f4abadd9600089ea03eb89b0aa5533592a8259a Mon Sep 17 00:00:00 2001 From: dgp Date: Thu, 16 Apr 2020 17:30:29 +0000 Subject: Convert Overlong() to use a lookup table. --- generic/tclUtf.c | 50 +++++++++++++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 556a5a2..f8d89f8 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -124,46 +124,50 @@ UtfCount( * * Utility routine to report whether /src/ points to the start of an * overlong byte sequence that should be rejected. Caller guarantees - * /src/ points to a byte that can lead a multi-byte sequence, and - * that src[0] and src[1] are readable. + * that src[0] and src[1] are readable, and + * + * (src[0] >= 0xC0) && (src[0] != 0xC1) + * (src[1] >= 0x80) && (src[1] < 0xC0) + * (src[0] < ((TCL_UTF_MAX > 3) ? 0xF8 : 0xF0)) * * Results: * A boolean. *--------------------------------------------------------------------------- */ +static CONST unsigned char overlong[3] = { + 0x80, /* \xD0 -- all sequences valid */ + 0xA0, /* \xE0\x80 through \xE0\x9F are invalid prefixes */ +#if TCL_UTF_MAX > 3 + 0x90 /* \xF0\x80 through \xF0\x8F are invalid prefixes */ +#else + 0xC0 /* Not used, but reject all again for safety. */ +#endif +}; + INLINE static int Overlong( unsigned char *src) /* Points to lead byte of a UTF-8 byte sequence */ { - switch (*src) { - case 0xC0: + unsigned char byte = *src; + + if (byte % 0x10) { + /* Only lead bytes 0xC0, 0xE0, 0xF0 need examination */ + return 0; + } + if (byte == 0xC0) { if (src[1] == 0x80) { /* Valid sequence: \xC0\x80 for \u0000 */ return 0; } /* Reject overlong: \xC0\x81 - \xC0\xBF */ return 1; - case 0xE0: - if (src[1] < 0xA0) { - /* Reject overlong: \xE0\x80\x80 - \xE0\x9F\xBF */ - return 1; - } - /* Valid sequence: \xE0\xA0\x80 for \u0800 , etc. */ - return 0; -#if TCL_UTF_MAX > 3 - case 0xF0: - if (src[1] < 0x90) { - /* Reject overlong: \xF0\x80\x80\x80 - \xF0\x8F\xBF\xBF */ - return 1; - } - /* Valid sequence: \xF0\x90\x80\x80 for \U10000 , etc. */ - return 0 -#endif - default: - /* All other lead bytes lead only valid sequences */ - return 0; } + if (src[1] < overlong[(byte >> 4) - 0x0D]) { + /* Reject overlong */ + return 1; + } + return 0; } /* -- cgit v0.12 From 2b4c279e0ba71a2727bb76740ba90ea2f800fc90 Mon Sep 17 00:00:00 2001 From: dgp Date: Thu, 16 Apr 2020 18:14:55 +0000 Subject: Create and use macro TclUtfPrev for Tcl_UtfPrev. --- generic/tclCmdMZ.c | 2 +- generic/tclInt.h | 5 +++++ generic/tclStringObj.c | 6 +++--- generic/tclUtil.c | 2 +- 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index 6515d98..f6bdd3e 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -2406,7 +2406,7 @@ StringStartCmd( break; } - next = Tcl_UtfPrev(p, string); + next = TclUtfPrev(p, string); do { next += delta; delta = TclUtfToUniChar(next, &ch); diff --git a/generic/tclInt.h b/generic/tclInt.h index 15bc000..3dc3d1d 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3691,6 +3691,11 @@ MODULE_SCOPE void TclDbInitNewObj(Tcl_Obj *objPtr, CONST char *file, ((*(chPtr) = (unsigned char) *(str)), 1) \ : Tcl_UtfToUniChar(str, chPtr)) +#define TclUtfPrev(src, start) \ + (((src) < (start)+2) ? (start) : \ + ((unsigned char) *(src - 1)) < 0x80 ? (src)-1 : \ + Tcl_UtfPrev(src, start)) + /* *---------------------------------------------------------------- * Macro that encapsulates the logic that determines when it is safe to diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index c3c85dc..d62fc72 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -1139,10 +1139,10 @@ Tcl_AppendLimitedToObj( } eLen = strlen(ellipsis); while (eLen > limit) { - eLen = Tcl_UtfPrev(ellipsis+eLen, ellipsis) - ellipsis; + eLen = TclUtfPrev(ellipsis+eLen, ellipsis) - ellipsis; } - toCopy = Tcl_UtfPrev(bytes+limit+1-eLen, bytes) - bytes; + toCopy = TclUtfPrev(bytes+limit+1-eLen, bytes) - bytes; } /* @@ -2585,7 +2585,7 @@ AppendPrintfToObjVA( * multi-byte characters. */ - q = Tcl_UtfPrev(end, bytes); + q = TclUtfPrev(end, bytes); if (!Tcl_UtfCharComplete(q, (int)(end - q))) { end = q; } diff --git a/generic/tclUtil.c b/generic/tclUtil.c index 3dd9a32..50922bf 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -1554,7 +1554,7 @@ TclTrimRight( const char *q = trim; int pInc = 0, bytesLeft = numTrim; - pp = Tcl_UtfPrev(p, bytes); + pp = TclUtfPrev(p, bytes); do { pp += pInc; pInc = TclUtfToUniChar(pp, &ch1); -- cgit v0.12 From e6faa58e6df3292b2c0735ba4921af4be0e215fa Mon Sep 17 00:00:00 2001 From: dgp Date: Thu, 16 Apr 2020 18:40:40 +0000 Subject: More tests and fix for overlong handling in revised Tcl_UtfNext. --- generic/tclUtf.c | 3 +++ tests/utf.test | 23 ++++++++++++++++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index e41e7a5..00ca94e 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -712,6 +712,9 @@ Tcl_UtfNext( } next++; } + if (Overlong(src)) { + return src + 1; + } return next; } diff --git a/tests/utf.test b/tests/utf.test index 72165f9..02b7002 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -352,9 +352,30 @@ test utf-6.78 {Tcl_UtfNext} testutfnext { test utf-6.79 {Tcl_UtfNext} testutfnext { testutfnext \xF4\xA0\xA0\xA0G\xF8 } 1 -test utf-6.80 {Tcl_UtfNext - overlong sequences} { +test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext { + testutfnext \xC0\x80 +} 2 +test utf-6.81 {Tcl_UtfNext - overlong sequences} testutfnext { testutfnext \xC0\x81 } 1 +test utf-6.82 {Tcl_UtfNext - overlong sequences} testutfnext { + testutfnext \xC1\x80 +} 1 +test utf-6.83 {Tcl_UtfNext - overlong sequences} testutfnext { + testutfnext \xC2\x80 +} 2 +test utf-6.84 {Tcl_UtfNext - overlong sequences} testutfnext { + testutfnext \xE0\x80\x80 +} 1 +test utf-6.85 {Tcl_UtfNext - overlong sequences} testutfnext { + testutfnext \xE0\xA0\x80 +} 3 +test utf-6.86 {Tcl_UtfNext - overlong sequences} testutfnext { + testutfnext \xF0\x80\x80\x80 +} 1 +test utf-6.87 {Tcl_UtfNext - overlong sequences} testutfnext { + testutfnext \xF0\x90\x80\x80 +} 1 testConstraint testutfprev [llength [info commands testutfprev]] -- cgit v0.12 From 443928c10f1ac94e6a6adfafb478eb9fa09ac39a Mon Sep 17 00:00:00 2001 From: dgp Date: Thu, 16 Apr 2020 18:42:37 +0000 Subject: compiler warning --- generic/tclUtf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 00ca94e..91e9c73 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -712,7 +712,7 @@ Tcl_UtfNext( } next++; } - if (Overlong(src)) { + if (Overlong((unsigned char *)src)) { return src + 1; } return next; -- cgit v0.12 From 60d5424069845124e51d4032f295d913a17454a1 Mon Sep 17 00:00:00 2001 From: dgp Date: Thu, 16 Apr 2020 19:02:40 +0000 Subject: More detailed comments. --- generic/tclUtf.c | 70 +++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 46 insertions(+), 24 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 91e9c73..67603af 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -678,13 +678,35 @@ Tcl_UtfFindLast( * * Tcl_UtfNext -- * - * Given a pointer to some current location in a UTF-8 string, move - * forward one character. The caller must ensure that they are not asking - * for the next character after the last character in the string. + * The aim of this routine is to provide a way to iterate forward + * through a UTF-8 string. The caller is expected to pass a non-NULL + * pointer argument /src/ which points to a location within a string. + * (*src) will be read, so /src/ must not point to an unreadable + * location past the end of the string. If /src/ points to the + * beginning of a complete, well-formed and valid UTF_8 byte sequence + * of no more than TCL_UTF_MAX bytes, Tcl_UtfNext returns the pointer + * just past the end of that sequence. In any other circumstance, + * Tcl_UtfNext returns /src/+1. + * + * Because this routine always returns a value > /src/, it is useful + * as a forward iterator that will always make progress. If the string + * is NUL-terminated, Tcl_UtfNext will not read beyond the terminating + * NUL character. If it is not NUL-terminated, the caller must make + * use of the companion routine Tcl_UtfCharComplete to test whether + * there is risk that Tcl_UtfNext will read beyond the end of the string. + * Tcl_UtfNext will never read more than TCL_UTF_MAX bytes. + * + * In a string where all characters are complete and properly formed, + * and /src/ points to the first byte of a character, repeated + * Tcl_UtfNext calls will step to the starting bytes of characters, one + * character at a time. Within those limitations, Tcl_UtfPrev and + * Tcl_UtfNext are inverses. If either condition cannot be met, + * Tcl_UtfPrev and Tcl_UtfNext may not function as inverses and the + * caller will have to take greater care. * * Results: - * The return value is the pointer to the next character in the UTF-8 - * string. + * A pointer to the start of the next character in the string (or to + * the end of the string) as described above. * * Side effects: * None. @@ -725,37 +747,37 @@ Tcl_UtfNext( * * The aim of this routine is to provide a way to move backward * through a UTF-8 string. The caller is expected to pass non-NULL - * pointer arguments start and src. start points to the beginning - * of a string, and src >= start points to a location within (or just - * past the end) of the string. This routine always returns a - * pointer within the string (>= start). When (src == start), it - * returns start. When (src > start), it returns a pointer (< src) - * and (>= src - TCL_UTF_MAX). Subject to these constraints, the - * routine returns a pointer to the earliest byte in the string that - * starts a character when characters are read starting at start and + * pointer arguments /start/ and /src/. /start/ points to the beginning + * of a string, and /src/ (>= /start/) points to a location within (or + * just past the end) of the string. This routine always returns a + * pointer within the string (>= /start/). When (/src/ == /start/), + * it returns /start/. When (/src/ > /start/), it returns a pointer + * (< /src/) and (>= /src/ - TCL_UTF_MAX). Subject to these constraints, + * the routine returns a pointer to the earliest byte in the string that + * starts a character when characters are read starting at /start/ and * that character might include the byte src[-1]. The routine will * examine only those bytes in the range that might be returned. - * It will not examine the byte *src, and because of that cannot + * It will not examine the byte (*src), and because of that cannot * determine for certain in all circumstances whether the character * that begins with the returned pointer will or will not include - * the byte src[-1]. In the scenario, where src points to the end of - * a buffer being filled, the returned pointer point to either the + * the byte src[-1]. In the scenario where /src/ points to the end of + * a buffer being filled, the returned pointer points to either the * final complete character in the string or to the earliest byte * that might start an incomplete character waiting for more bytes to * complete. * - * Because this routine always returns a value < src until the point - * it is forced to return start, it is useful as a backward iterator + * Because this routine always returns a value < /src/ until the point + * it is forced to return /start/, it is useful as a backward iterator * through a string that will always make progress and always be * prevented from running past the beginning of the string. * * In a string where all characters are complete and properly formed, - * and the value of src points to the first byte of a character, - * repeated Tcl_UtfPrev calls will step to the starting bytes of - * characters, one character at a time. Within those limitations, - * Tcl_UtfPrev and Tcl_UtfNext are inverses. If either condition cannot - * be met, Tcl_UtfPrev and Tcl_UtfNext may not function as inverses and - * the caller will have to take greater care. + * and /src/ points to the first byte of a character, repeated + * Tcl_UtfPrev calls will step to the starting bytes of characters, one + * character at a time. Within those limitations, Tcl_UtfPrev and + * Tcl_UtfNext are inverses. If either condition cannot be met, + * Tcl_UtfPrev and Tcl_UtfNext may not function as inverses and the + * caller will have to take greater care. * * Results: * A pointer to the start of a character in the string as described -- cgit v0.12 From c2479a465e6bef08275d47d0277deda87e6e014e Mon Sep 17 00:00:00 2001 From: dgp Date: Thu, 16 Apr 2020 19:04:47 +0000 Subject: delete merge litter --- generic/tclTest.c | 1 - 1 file changed, 1 deletion(-) diff --git a/generic/tclTest.c b/generic/tclTest.c index b8507bf..6e0fbed 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -325,7 +325,6 @@ static Tcl_FSPathInFilesystemProc SimplePathInFilesystem; static Tcl_Obj * SimpleRedirect(Tcl_Obj *pathPtr); static Tcl_FSMatchInDirectoryProc SimpleMatchInDirectory; static Tcl_ObjCmdProc TestUtfNextCmd; -static Tcl_ObjCmdProc TestUtfNextCmd; static Tcl_ObjCmdProc TestUtfPrevCmd; static Tcl_ObjCmdProc TestNumUtfCharsCmd; static Tcl_ObjCmdProc TestFindFirstCmd; -- cgit v0.12 From 66197fff215f60690a444b3f2af67a0c3c87c8af Mon Sep 17 00:00:00 2001 From: dgp Date: Thu, 16 Apr 2020 19:39:36 +0000 Subject: Improve the docs for Tcl_UtfNext. --- doc/Utf.3 | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/doc/Utf.3 b/doc/Utf.3 index 87d1318..cb82699 100644 --- a/doc/Utf.3 +++ b/doc/Utf.3 @@ -217,11 +217,20 @@ returns a pointer to the last occurrence of the Tcl_UniChar \fIch\fR in the null-terminated UTF-8 string \fIsrc\fR. The null terminator is considered part of the UTF-8 string. .PP -Given \fIsrc\fR, a pointer to some location in a UTF-8 string, -\fBTcl_UtfNext\fR returns a pointer to the next UTF-8 character in the -string. The caller must not ask for the next character after the last -character in the string if the string is not terminated by a null -character. +\fBTcl_UtfNext\fR is used to step forward through a UTF-8 string. +If the UTF-8 string is made up entirely of complete, well-formed, and +valid character byte sequences, and \fIsrc\fR points to the lead byte +of one of those sequences, then repeated calls of \fBTcl_UtfNext\fR will +return pointers to the lead bytes of each character in the string, one +character at a time. In any other circumstance, \fBTcl_UtfNext\fR +returns \fIsrc\fR+1. \fBTcl_UtfNext\fR will always read \fIsrc[0]\fR +and may read as many following bytes (up to a total of \fBTCL_UTF_MAX\fR) +as needed to find the end of the byte sequence. If the string is +\fBNUL\fR-terminated, \fBTcl_UtfNext\fR will not read beyond the terminating +\fBNUL\fR byte. If not, the caller must use the companion routine +\fBTcl_UtfCharComplete\fR to determine whether there is any risk +\fBTcl_UtfNext\fR might read beyond the readable memory occupied +by the string. .PP \fBTcl_UtfPrev\fR is used to step backward through but not beyond the UTF-8 string that begins at \fIstart\fR. If the UTF-8 string is made -- cgit v0.12 From 1924a68fc604e0939f0222a3fe68aa1c35deec7a Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 16 Apr 2020 20:24:49 +0000 Subject: Add "knownBug" testcase, showing a situation in which Tcl_UtfNext doesn't behave as described in the documentation --- tests/utf.test | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/utf.test b/tests/utf.test index 1b7b409..8742a5e 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -153,8 +153,13 @@ test utf-5.2 {Tcl_UtfFindLast} {testfindlast testbytestring} { testfindlast [testbytestring "abcbc"] 98 } {bc} +testConstraint testutfnext [llength [info commands testutfnext]] + test utf-6.1 {Tcl_UtfNext} { } {} +test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext knownBug} { + testutfnext \xE8\xA0\xA0 1 +} 3 testConstraint testutfprev [llength [info commands testutfprev]] -- cgit v0.12 From ae8350301e553be8131de6a4cc47b223fb405386 Mon Sep 17 00:00:00 2001 From: dgp Date: Thu, 16 Apr 2020 20:28:15 +0000 Subject: merge litter --- generic/tclUtf.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 67603af..b5c430b 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -558,7 +558,7 @@ Tcl_NumUtfChars( int length) /* The length of the string in bytes, or -1 * for strlen(string). */ { - register int i; + register int i = 0; /* * The separate implementations are faster. @@ -567,7 +567,6 @@ Tcl_NumUtfChars( * single-byte char case specially. */ - i = 0; if (length < 0) { while ((*src != '\0') && (i < INT_MAX)) { src = TclUtfNext(src); -- cgit v0.12 From d168b44ff69105f80110a01cda37279becc68dc5 Mon Sep 17 00:00:00 2001 From: dgp Date: Thu, 16 Apr 2020 20:59:11 +0000 Subject: Adjust test results and implementation for Tcl 8.6 current support of 4-byte sequences in a TCL_UTF_MAX=3 build. --- generic/tclUtf.c | 4 ++-- tests/utf.test | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index cd57d12..d6ba15c 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -897,10 +897,10 @@ Tcl_UtfPrev( /* Continue the search backwards... */ look--; - } while (trailBytesSeen < TCL_UTF_MAX); + } while (trailBytesSeen < /* was TCL_UTF_MAX */ 4); /* - * We've seen TCL_UTF_MAX trail bytes, so we know there will not be a + * We've seen 4 (was TCL_UTF_MAX) trail bytes, so we know there will not be a * properly formed byte sequence to find, and we can stop looking, * accepting the fallback. */ diff --git a/tests/utf.test b/tests/utf.test index 3a2911f..76cf3fe 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -363,7 +363,7 @@ test utf-6.68 {Tcl_UtfNext} testutfnext { } 1 test utf-6.69 {Tcl_UtfNext} testutfnext { testutfnext \xF4\xA0\xA0\xA0 -} 1 +} 4 test utf-6.70 {Tcl_UtfNext} testutfnext { testutfnext \xF4\xA0\xA0\xD0 } 1 @@ -378,22 +378,22 @@ test utf-6.73 {Tcl_UtfNext} testutfnext { } 1 test utf-6.74 {Tcl_UtfNext} testutfnext { testutfnext \xF4\xA0\xA0\xA0G -} 1 +} 4 test utf-6.75 {Tcl_UtfNext} testutfnext { testutfnext \xF4\xA0\xA0\xA0\xA0 -} 1 +} 4 test utf-6.76 {Tcl_UtfNext} testutfnext { testutfnext \xF4\xA0\xA0\xA0\xD0 -} 1 +} 4 test utf-6.77 {Tcl_UtfNext} testutfnext { testutfnext \xF4\xA0\xA0\xA0\xE8 -} 1 +} 4 test utf-6.78 {Tcl_UtfNext} testutfnext { testutfnext \xF4\xA0\xA0\xA0\xF4 -} 1 +} 4 test utf-6.79 {Tcl_UtfNext} testutfnext { testutfnext \xF4\xA0\xA0\xA0G\xF8 -} 1 +} 4 test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext { testutfnext \xC0\x80 } 2 -- cgit v0.12 From 335f3c8fb761a9499b9df27bce308a6f631dc082 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 16 Apr 2020 22:06:54 +0000 Subject: Fix more test-cases for TCL_UTF_MAX=3 --- generic/tclUtf.c | 18 ++++++++++-------- tests/utf.test | 2 +- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index d6ba15c..b37d55a 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -82,7 +82,7 @@ static const unsigned char complete[256] = { 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, #endif - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, #if TCL_UTF_MAX > 4 4,4,4,4,4, @@ -589,8 +589,9 @@ Tcl_NumUtfChars( int length) /* The length of the string in bytes, or -1 * for strlen(string). */ { - register int i = 0; - + Tcl_UniChar ch = 0; + int i = 0; + /* * The separate implementations are faster. * @@ -600,19 +601,20 @@ Tcl_NumUtfChars( if (length < 0) { while ((*src != '\0') && (i < INT_MAX)) { - src = TclUtfNext(src); + src += Tcl_UtfToUniChar(src, &ch); i++; } + if (i < 0) i = INT_MAX; /* Bug [2738427] */ } else { - register const char *endPtr = src + length - TCL_UTF_MAX; + const char *endPtr = src + length - TCL_UTF_MAX; while (src < endPtr) { - src = TclUtfNext(src); + src += Tcl_UtfToUniChar(src, &ch); i++; } endPtr += TCL_UTF_MAX; while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) { - src = TclUtfNext(src); + src += Tcl_UtfToUniChar(src, &ch); i++; } if (src < endPtr) { @@ -958,7 +960,7 @@ Tcl_UtfAtIndex( register const char *src, /* The UTF-8 string. */ register int index) /* The position of the desired character. */ { -#if 0 +#if 1 /* The Tcl 8.6 implementation */ Tcl_UniChar ch = 0; int len = 0; diff --git a/tests/utf.test b/tests/utf.test index 76cf3fe..b5d1f5c 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -418,7 +418,7 @@ test utf-6.86 {Tcl_UtfNext - overlong sequences} testutfnext { test utf-6.87 {Tcl_UtfNext - overlong sequences} testutfnext { testutfnext \xF0\x90\x80\x80 } 1 -test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} { +test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext knownBug} { testutfnext \xE8\xA0\xA0 1 } 3 -- cgit v0.12 From 1045aefa35184677d8bede823e91e80cc17db5ee Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 16 Apr 2020 22:19:15 +0000 Subject: Fix build for TCL_UTF_MAX=4. Mark some failing tests with "knownBug". Those still need to be fixed! --- generic/tclUtf.c | 2 +- tests/utf.test | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index b37d55a..0768992 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -972,7 +972,7 @@ Tcl_UtfAtIndex( #if TCL_UTF_MAX == 4 if ((ch >= 0xD800) && (len < 3)) { /* Index points at character following high Surrogate */ - src = TclUtfToUniChar(src, &ch); + src += TclUtfToUniChar(src, &ch); } #endif return src; diff --git a/tests/utf.test b/tests/utf.test index b5d1f5c..a730feb 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -415,7 +415,7 @@ test utf-6.85 {Tcl_UtfNext - overlong sequences} testutfnext { test utf-6.86 {Tcl_UtfNext - overlong sequences} testutfnext { testutfnext \xF0\x80\x80\x80 } 1 -test utf-6.87 {Tcl_UtfNext - overlong sequences} testutfnext { +test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext knownBug} { # Doesn't work with TCL_UTF_MAX>3 testutfnext \xF0\x90\x80\x80 } 1 test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext knownBug} { @@ -628,13 +628,13 @@ test utf-7.37 {Tcl_UtfPrev -- overlong sequence} testutfprev { test utf-7.38 {Tcl_UtfPrev -- overlong sequence} testutfprev { testutfprev A\xE0\xA0\x80 2 } 1 -test utf-7.39 {Tcl_UtfPrev -- overlong sequence} testutfprev { +test utf-7.39 {Tcl_UtfPrev -- overlong sequence} {testutfprev knownBug} { # Doesn't work with TCL_UTF_MAX>3 testutfprev A\xF0\x90\x80\x80 } 4 -test utf-7.40 {Tcl_UtfPrev -- overlong sequence} testutfprev { +test utf-7.40 {Tcl_UtfPrev -- overlong sequence} {testutfprev knownBug} { # Doesn't work with TCL_UTF_MAX>3 testutfprev A\xF0\x90\x80\x80 4 } 3 -test utf-7.41 {Tcl_UtfPrev -- overlong sequence} testutfprev { +test utf-7.41 {Tcl_UtfPrev -- overlong sequence} {testutfprev knownBug} { # Doesn't work with TCL_UTF_MAX>3 testutfprev A\xF0\x90\x80\x80 3 } 2 test utf-7.42 {Tcl_UtfPrev -- overlong sequence} testutfprev { -- cgit v0.12 From 882fcc12b24d44674254eabaacfe15be718f3b73 Mon Sep 17 00:00:00 2001 From: dgp Date: Fri, 17 Apr 2020 03:54:50 +0000 Subject: Fix the bad tests utf-2.11 and utf-6.88 that expected the wrong results. Also reconcile the merge from 8.5 to the new decoupling of bytesequence counts from indexed code unit couints. Docs still need an update. --- generic/tclUtf.c | 50 ++++++++++++++++++++------------------------------ tests/utf.test | 4 ++-- 2 files changed, 22 insertions(+), 32 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index d6ba15c..24fd418 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -589,6 +589,7 @@ Tcl_NumUtfChars( int length) /* The length of the string in bytes, or -1 * for strlen(string). */ { + const char *next; register int i = 0; /* @@ -600,20 +601,23 @@ Tcl_NumUtfChars( if (length < 0) { while ((*src != '\0') && (i < INT_MAX)) { - src = TclUtfNext(src); - i++; + next = TclUtfNext(src); + i += 1 + ((next - src) > 3); + src = next; } } else { register const char *endPtr = src + length - TCL_UTF_MAX; while (src < endPtr) { - src = TclUtfNext(src); - i++; + next = TclUtfNext(src); + i += 1 + ((next - src) > 3); + src = next; } endPtr += TCL_UTF_MAX; while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) { - src = TclUtfNext(src); - i++; + next = TclUtfNext(src); + i += 1 + ((next - src) > 3); + src = next; } if (src < endPtr) { i += endPtr - src; @@ -958,33 +962,19 @@ Tcl_UtfAtIndex( register const char *src, /* The UTF-8 string. */ register int index) /* The position of the desired character. */ { -#if 0 -/* The Tcl 8.6 implementation */ - Tcl_UniChar ch = 0; - int len = 0; - while (index-- > 0) { - len = TclUtfToUniChar(src, &ch); - src += len; - } -#if TCL_UTF_MAX == 4 - if ((ch >= 0xD800) && (len < 3)) { - /* Index points at character following high Surrogate */ - src = TclUtfToUniChar(src, &ch); - } -#endif - return src; -#else -/* The Tcl 8.5 implementation */ - while (index > 0) { - index--; - src = TclUtfNext(src); /* NOTE: counts each valid byte sequence - * as one character, maybe including those - * that will get stored as two UCS-2 units - * in the UTF-16 encoding. */ + const char *next = TclUtfNext(src); + + /* + * 4-byte sequences generate two UCS-2 code units in the + * UTF-16 representation, so in the current indexing scheme + * we need to account for an extra index (total of two). + */ + index -= ((next - src) > 3); + + src = next; } return src; -#endif } /* diff --git a/tests/utf.test b/tests/utf.test index 76cf3fe..dd94c54 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -96,7 +96,7 @@ test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} t } {4} test utf-2.11 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, overflow} testbytestring { string length [testbytestring "\xF4\x90\x80\x80"] -} {4} +} {2} test utf-2.12 {Tcl_UtfToUniChar: longer UTF sequences not supported} testbytestring { string length [testbytestring "\xF8\xA2\xA2\xA2\xA2"] } {5} @@ -420,7 +420,7 @@ test utf-6.87 {Tcl_UtfNext - overlong sequences} testutfnext { } 1 test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} { testutfnext \xE8\xA0\xA0 1 -} 3 +} 2 testConstraint testutfprev [llength [info commands testutfprev]] -- cgit v0.12 From e1f67c4c92c08a2f8dfa465b14b877c7572822ec Mon Sep 17 00:00:00 2001 From: dgp Date: Fri, 17 Apr 2020 04:08:46 +0000 Subject: Bring the single-byte marker for invalid lead byte \xC1 into the complete table --- generic/tclUtf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 24fd418..fdf2e32 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -82,7 +82,7 @@ static const unsigned char complete[256] = { 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, #endif - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, #if TCL_UTF_MAX > 4 4,4,4,4,4, -- cgit v0.12 From 700b782299b5ac82c829eae5725cdc7b577390a6 Mon Sep 17 00:00:00 2001 From: dgp Date: Fri, 17 Apr 2020 04:17:51 +0000 Subject: enable the tests on a bug fix branch --- tests/utf.test | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/utf.test b/tests/utf.test index b89be25..2004625 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -415,7 +415,7 @@ test utf-6.85 {Tcl_UtfNext - overlong sequences} testutfnext { test utf-6.86 {Tcl_UtfNext - overlong sequences} testutfnext { testutfnext \xF0\x80\x80\x80 } 1 -test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext knownBug} { # Doesn't work with TCL_UTF_MAX>3 +test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext} { # Doesn't work with TCL_UTF_MAX>3 testutfnext \xF0\x90\x80\x80 } 1 test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} { @@ -628,13 +628,13 @@ test utf-7.37 {Tcl_UtfPrev -- overlong sequence} testutfprev { test utf-7.38 {Tcl_UtfPrev -- overlong sequence} testutfprev { testutfprev A\xE0\xA0\x80 2 } 1 -test utf-7.39 {Tcl_UtfPrev -- overlong sequence} {testutfprev knownBug} { # Doesn't work with TCL_UTF_MAX>3 +test utf-7.39 {Tcl_UtfPrev -- overlong sequence} {testutfprev} { # Doesn't work with TCL_UTF_MAX>3 testutfprev A\xF0\x90\x80\x80 } 4 -test utf-7.40 {Tcl_UtfPrev -- overlong sequence} {testutfprev knownBug} { # Doesn't work with TCL_UTF_MAX>3 +test utf-7.40 {Tcl_UtfPrev -- overlong sequence} {testutfprev} { # Doesn't work with TCL_UTF_MAX>3 testutfprev A\xF0\x90\x80\x80 4 } 3 -test utf-7.41 {Tcl_UtfPrev -- overlong sequence} {testutfprev knownBug} { # Doesn't work with TCL_UTF_MAX>3 +test utf-7.41 {Tcl_UtfPrev -- overlong sequence} {testutfprev} { # Doesn't work with TCL_UTF_MAX>3 testutfprev A\xF0\x90\x80\x80 3 } 2 test utf-7.42 {Tcl_UtfPrev -- overlong sequence} testutfprev { -- cgit v0.12 From c998be1b068916a8009a76867ed4eaed8938d19b Mon Sep 17 00:00:00 2001 From: dgp Date: Fri, 17 Apr 2020 04:45:26 +0000 Subject: When supporting 4-byte sequences, make sure the Overlong test does too, and make sure the test results reflect it. --- generic/tclUtf.c | 2 +- tests/utf.test | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index fdf2e32..e637263 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -154,7 +154,7 @@ UtfCount( static CONST unsigned char overlong[3] = { 0x80, /* \xD0 -- all sequences valid */ 0xA0, /* \xE0\x80 through \xE0\x9F are invalid prefixes */ -#if TCL_UTF_MAX > 3 +#if TCL_UTF_MAX >= 3 0x90 /* \xF0\x80 through \xF0\x8F are invalid prefixes */ #else 0xC0 /* Not used, but reject all again for safety. */ diff --git a/tests/utf.test b/tests/utf.test index 2004625..e8b1d51 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -630,13 +630,13 @@ test utf-7.38 {Tcl_UtfPrev -- overlong sequence} testutfprev { } 1 test utf-7.39 {Tcl_UtfPrev -- overlong sequence} {testutfprev} { # Doesn't work with TCL_UTF_MAX>3 testutfprev A\xF0\x90\x80\x80 -} 4 +} 1 test utf-7.40 {Tcl_UtfPrev -- overlong sequence} {testutfprev} { # Doesn't work with TCL_UTF_MAX>3 testutfprev A\xF0\x90\x80\x80 4 -} 3 +} 1 test utf-7.41 {Tcl_UtfPrev -- overlong sequence} {testutfprev} { # Doesn't work with TCL_UTF_MAX>3 testutfprev A\xF0\x90\x80\x80 3 -} 2 +} 1 test utf-7.42 {Tcl_UtfPrev -- overlong sequence} testutfprev { testutfprev A\xF0\x90\x80\x80 2 } 1 -- cgit v0.12 From 1edc023972ada28abd9649c5e153f0705ba1268d Mon Sep 17 00:00:00 2001 From: dgp Date: Fri, 17 Apr 2020 04:51:48 +0000 Subject: more test fixes --- tests/utf.test | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/utf.test b/tests/utf.test index e8b1d51..c584ea1 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -415,9 +415,9 @@ test utf-6.85 {Tcl_UtfNext - overlong sequences} testutfnext { test utf-6.86 {Tcl_UtfNext - overlong sequences} testutfnext { testutfnext \xF0\x80\x80\x80 } 1 -test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext} { # Doesn't work with TCL_UTF_MAX>3 +test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext} { testutfnext \xF0\x90\x80\x80 -} 1 +} 4 test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} { testutfnext \xE8\xA0\xA0 1 } 2 @@ -628,13 +628,13 @@ test utf-7.37 {Tcl_UtfPrev -- overlong sequence} testutfprev { test utf-7.38 {Tcl_UtfPrev -- overlong sequence} testutfprev { testutfprev A\xE0\xA0\x80 2 } 1 -test utf-7.39 {Tcl_UtfPrev -- overlong sequence} {testutfprev} { # Doesn't work with TCL_UTF_MAX>3 +test utf-7.39 {Tcl_UtfPrev -- overlong sequence} {testutfprev} { testutfprev A\xF0\x90\x80\x80 } 1 -test utf-7.40 {Tcl_UtfPrev -- overlong sequence} {testutfprev} { # Doesn't work with TCL_UTF_MAX>3 +test utf-7.40 {Tcl_UtfPrev -- overlong sequence} {testutfprev} { testutfprev A\xF0\x90\x80\x80 4 } 1 -test utf-7.41 {Tcl_UtfPrev -- overlong sequence} {testutfprev} { # Doesn't work with TCL_UTF_MAX>3 +test utf-7.41 {Tcl_UtfPrev -- overlong sequence} {testutfprev} { testutfprev A\xF0\x90\x80\x80 3 } 1 test utf-7.42 {Tcl_UtfPrev -- overlong sequence} testutfprev { -- cgit v0.12 From ad85391b59eba72ecf8c0af3e83f800c61e52f26 Mon Sep 17 00:00:00 2001 From: dgp Date: Fri, 17 Apr 2020 05:14:43 +0000 Subject: When supporting 4-byte sequences even with TCL_UTF_MAX = 3, need to paramterize a few things differently. (utf-4.11 failures). --- generic/tclUtf.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index e637263..4b5d500 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -84,11 +84,7 @@ static const unsigned char complete[256] = { #endif 2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, -#if TCL_UTF_MAX > 4 4,4,4,4,4, -#else - 3,3,3,3,3, /* Tcl_UtfCharComplete() only checks TCL_UTF_MAX bytes */ -#endif 1,1,1,1,1,1,1,1,1,1,1 }; @@ -606,14 +602,14 @@ Tcl_NumUtfChars( src = next; } } else { - register const char *endPtr = src + length - TCL_UTF_MAX; + register const char *endPtr = src + length - /*TCL_UTF_MAX*/ 4; while (src < endPtr) { next = TclUtfNext(src); i += 1 + ((next - src) > 3); src = next; } - endPtr += TCL_UTF_MAX; + endPtr += /*TCL_UTF_MAX*/ 4; while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) { next = TclUtfNext(src); i += 1 + ((next - src) > 3); -- cgit v0.12 From b4ecc6b747780c309fbf05d525ecb3c5f74f3ff2 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 17 Apr 2020 09:47:01 +0000 Subject: More test-cases --- tests/utf.test | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tests/utf.test b/tests/utf.test index 02b7002..7b7b5c2 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -376,6 +376,18 @@ test utf-6.86 {Tcl_UtfNext - overlong sequences} testutfnext { test utf-6.87 {Tcl_UtfNext - overlong sequences} testutfnext { testutfnext \xF0\x90\x80\x80 } 1 +test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} { + testutfnext \xA0\xA0 +} 1 +test utf-6.88.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} { + testutfnext \xE8\xA0\xA0 1 +} 2 +test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} { + testutfnext \x80\x80 +} 1 +test utf-6.89.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} { + testutfnext \xF0\x80\x80 1 +} 2 testConstraint testutfprev [llength [info commands testutfprev]] @@ -460,6 +472,9 @@ test utf-7.11.1 {Tcl_UtfPrev} testutfprev { test utf-7.11.2 {Tcl_UtfPrev} testutfprev { testutfprev A\xE8\xA0\xF8\xA0 3 } 1 +test utf-7.11.3 {Tcl_UtfPrev} testutfprev { + testutfprev A\xE8\xA0\xF8 3 +} 1 test utf-7.12 {Tcl_UtfPrev} testutfprev { testutfprev A\xD0\xA0 } 1 @@ -548,9 +563,15 @@ test utf-7.26 {Tcl_UtfPrev -- overlong sequence} testutfprev { testutfprev A\xE0\x80\x80 } 3 test utf-7.27 {Tcl_UtfPrev -- overlong sequence} testutfprev { + testutfprev A\xE0\x80 +} 2 +test utf-7.27.1 {Tcl_UtfPrev -- overlong sequence} testutfprev { testutfprev A\xE0\x80\x80 3 } 2 test utf-7.28 {Tcl_UtfPrev -- overlong sequence} testutfprev { + testutfprev A\xE0 +} 1 +test utf-7.28.1 {Tcl_UtfPrev -- overlong sequence} testutfprev { testutfprev A\xE0\x80\x80 2 } 1 test utf-7.29 {Tcl_UtfPrev -- overlong sequence} testutfprev { @@ -607,6 +628,15 @@ test utf-7.45 {Tcl_UtfPrev -- no lead byte at start} testutfprev { test utf-7.46 {Tcl_UtfPrev -- no lead byte at start} testutfprev { testutfprev \xA0\xA0\xA0\xA0 } 3 +test utf-7.47 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {testutfprev} { + testutfprev \xE8\xA0 +} 0 +test utf-7.47.1 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {testutfprev} { + testutfprev \xE8\xA0\xA0 2 +} 0 +test utf-7.47.2 {Tcl_UtfPrev, pointing to 3th byte of 3-byte invalid sequence} {testutfprev} { + testutfprev \xE8\xA0\x00 2 +} 0 test utf-8.1 {Tcl_UniCharAtIndex: index = 0} { string index abcd 0 -- cgit v0.12 From 4de1b924a8f4f41a574d576427acfea490cd06ca Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 17 Apr 2020 11:02:19 +0000 Subject: Fix implementation of Tcl_UtfAtIndex() for TCL_UTF_MAX=6 (There's no test-case for this in the core-8-6-branch, but there is in core-8-branch). --- generic/tclUtf.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 94fa5f6..1ba474e 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -971,14 +971,16 @@ Tcl_UtfAtIndex( register int index) /* The position of the desired character. */ { while (index-- > 0) { - const char *next = TclUtfNext(src); + const char *next = TclUtfNext(src); +#if TCL_UTF_MAX <= 4 /* * 4-byte sequences generate two UCS-2 code units in the * UTF-16 representation, so in the current indexing scheme * we need to account for an extra index (total of two). */ index -= ((next - src) > 3); +#endif src = next; } -- cgit v0.12 From 072e295b8042e53de6d6a51acfeec054a5125588 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 17 Apr 2020 12:20:23 +0000 Subject: Add test-case for Tcl_UtfNext/Tcl_UtfPrev. About 15 of them give the wrong answer, but - at least - this way we record the current state, to be compared with the new implementation coming soon. --- tests/utf.test | 366 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 364 insertions(+), 2 deletions(-) diff --git a/tests/utf.test b/tests/utf.test index 9744703..9bf104a 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -152,8 +152,283 @@ test utf-5.2 {Tcl_UtfFindLast} {testfindlast testbytestring} { testfindlast [testbytestring "abcbc"] 98 } {bc} -test utf-6.1 {Tcl_UtfNext} { -} {} +testConstraint testutfnext [llength [info commands testutfnext]] + +test utf-6.1 {Tcl_UtfNext} testutfnext { + # This takes the pointer one past the terminating NUL. + # This is really an invalid call. + testutfnext {} +} 1 +test utf-6.2 {Tcl_UtfNext} testutfnext { + testutfnext A +} 1 +test utf-6.3 {Tcl_UtfNext} testutfnext { + testutfnext AA +} 1 +test utf-6.4 {Tcl_UtfNext} testutfnext { + testutfnext A\xA0 +} 1 +test utf-6.5 {Tcl_UtfNext} testutfnext { + testutfnext A\xD0 +} 1 +test utf-6.6 {Tcl_UtfNext} testutfnext { + testutfnext A\xE8 +} 1 +test utf-6.7 {Tcl_UtfNext} testutfnext { + testutfnext A\xF4 +} 1 +test utf-6.8 {Tcl_UtfNext} testutfnext { + testutfnext A\xF8 +} 1 +test utf-6.9 {Tcl_UtfNext} testutfnext { + testutfnext \xA0 +} 1 +test utf-6.10 {Tcl_UtfNext} testutfnext { + testutfnext \xA0G +} 1 +test utf-6.11 {Tcl_UtfNext} testutfnext { + testutfnext \xA0\xA0 +} 1 +test utf-6.12 {Tcl_UtfNext} testutfnext { + testutfnext \xA0\xD0 +} 1 +test utf-6.13 {Tcl_UtfNext} testutfnext { + testutfnext \xA0\xE8 +} 1 +test utf-6.14 {Tcl_UtfNext} testutfnext { + testutfnext \xA0\xF4 +} 1 +test utf-6.15 {Tcl_UtfNext} testutfnext { + testutfnext \xA0\xF8 +} 1 +test utf-6.16 {Tcl_UtfNext} testutfnext { + testutfnext \xD0 +} 1 +test utf-6.17 {Tcl_UtfNext} testutfnext { + testutfnext \xD0A +} 1 +test utf-6.18 {Tcl_UtfNext} testutfnext { + testutfnext \xD0\xA0 +} 2 +test utf-6.19 {Tcl_UtfNext} testutfnext { + testutfnext \xD0\xD0 +} 1 +test utf-6.20 {Tcl_UtfNext} testutfnext { + testutfnext \xD0\xE8 +} 1 +test utf-6.21 {Tcl_UtfNext} testutfnext { + testutfnext \xD0\xF4 +} 1 +test utf-6.22 {Tcl_UtfNext} testutfnext { + testutfnext \xD0\xF8 +} 1 +test utf-6.23 {Tcl_UtfNext} testutfnext { + testutfnext \xE8 +} 1 +test utf-6.24 {Tcl_UtfNext} testutfnext { + testutfnext \xE8A +} 1 +test utf-6.25 {Tcl_UtfNext} testutfnext { + testutfnext \xE8\xA0 +} 1 +test utf-6.26 {Tcl_UtfNext} testutfnext { + testutfnext \xE8\xD0 +} 1 +test utf-6.27 {Tcl_UtfNext} testutfnext { + testutfnext \xE8\xE8 +} 1 +test utf-6.28 {Tcl_UtfNext} testutfnext { + testutfnext \xE8\xF4 +} 1 +test utf-6.29 {Tcl_UtfNext} testutfnext { + testutfnext \xE8\xF8 +} 1 +test utf-6.30 {Tcl_UtfNext} testutfnext { + testutfnext \xF4 +} 1 +test utf-6.31 {Tcl_UtfNext} testutfnext { + testutfnext \xF4A +} 1 +test utf-6.32 {Tcl_UtfNext} testutfnext { + testutfnext \xF4\xA0 +} 1 +test utf-6.33 {Tcl_UtfNext} testutfnext { + testutfnext \xF4\xD0 +} 1 +test utf-6.34 {Tcl_UtfNext} testutfnext { + testutfnext \xF4\xE8 +} 1 +test utf-6.35 {Tcl_UtfNext} testutfnext { + testutfnext \xF4\xF4 +} 1 +test utf-6.36 {Tcl_UtfNext} testutfnext { + testutfnext \xF4\xF8 +} 1 +test utf-6.37 {Tcl_UtfNext} testutfnext { + testutfnext \xF8 +} 1 +test utf-6.38 {Tcl_UtfNext} testutfnext { + testutfnext \xF8A +} 1 +test utf-6.39 {Tcl_UtfNext} testutfnext { + testutfnext \xF8\xA0 +} 1 +test utf-6.40 {Tcl_UtfNext} testutfnext { + testutfnext \xF8\xD0 +} 1 +test utf-6.41 {Tcl_UtfNext} testutfnext { + testutfnext \xF8\xE8 +} 1 +test utf-6.42 {Tcl_UtfNext} testutfnext { + testutfnext \xF8\xF4 +} 1 +test utf-6.43 {Tcl_UtfNext} testutfnext { + testutfnext \xF8\xF8 +} 1 +test utf-6.44 {Tcl_UtfNext} testutfnext { + testutfnext \xD0\xA0G +} 2 +test utf-6.45 {Tcl_UtfNext} testutfnext { + testutfnext \xD0\xA0\xA0 +} 2 +test utf-6.46 {Tcl_UtfNext} testutfnext { + testutfnext \xD0\xA0\xD0 +} 2 +test utf-6.47 {Tcl_UtfNext} testutfnext { + testutfnext \xD0\xA0\xE8 +} 2 +test utf-6.48 {Tcl_UtfNext} testutfnext { + testutfnext \xD0\xA0\xF4 +} 2 +test utf-6.49 {Tcl_UtfNext} testutfnext { + testutfnext \xD0\xA0\xF8 +} 2 +test utf-6.50 {Tcl_UtfNext} testutfnext { + testutfnext \xE8\xA0G +} 1 +test utf-6.51 {Tcl_UtfNext} testutfnext { + testutfnext \xE8\xA0\xA0 +} 3 +test utf-6.52 {Tcl_UtfNext} testutfnext { + testutfnext \xE8\xA0\xD0 +} 1 +test utf-6.53 {Tcl_UtfNext} testutfnext { + testutfnext \xE8\xA0\xE8 +} 1 +test utf-6.54 {Tcl_UtfNext} testutfnext { + testutfnext \xE8\xA0\xF4 +} 1 +test utf-6.55 {Tcl_UtfNext} testutfnext { + testutfnext \xE8\xA0\xF8 +} 1 +test utf-6.56 {Tcl_UtfNext} testutfnext { + testutfnext \xF4\xA0G +} 1 +test utf-6.57 {Tcl_UtfNext} testutfnext { + testutfnext \xF4\xA0\xA0 +} 1 +test utf-6.58 {Tcl_UtfNext} testutfnext { + testutfnext \xF4\xA0\xD0 +} 1 +test utf-6.59 {Tcl_UtfNext} testutfnext { + testutfnext \xF4\xA0\xE8 +} 1 +test utf-6.60 {Tcl_UtfNext} testutfnext { + testutfnext \xF4\xA0\xF4 +} 1 +test utf-6.61 {Tcl_UtfNext} testutfnext { + testutfnext \xF4\xA0\xF8 +} 1 +test utf-6.62 {Tcl_UtfNext} testutfnext { + testutfnext \xE8\xA0\xA0G +} 3 +test utf-6.63 {Tcl_UtfNext} testutfnext { + testutfnext \xE8\xA0\xA0\xA0 +} 3 +test utf-6.64 {Tcl_UtfNext} testutfnext { + testutfnext \xE8\xA0\xA0\xD0 +} 3 +test utf-6.65 {Tcl_UtfNext} testutfnext { + testutfnext \xE8\xA0\xA0\xE8 +} 3 +test utf-6.66 {Tcl_UtfNext} testutfnext { + testutfnext \xE8\xA0\xA0\xF4 +} 3 +test utf-6.67 {Tcl_UtfNext} testutfnext { + testutfnext \xE8\xA0\xA0\xF8 +} 3 +test utf-6.68 {Tcl_UtfNext} testutfnext { + testutfnext \xF4\xA0\xA0G +} 1 +test utf-6.69 {Tcl_UtfNext} testutfnext { + testutfnext \xF4\xA0\xA0\xA0 +} 1 +test utf-6.70 {Tcl_UtfNext} testutfnext { + testutfnext \xF4\xA0\xA0\xD0 +} 1 +test utf-6.71 {Tcl_UtfNext} testutfnext { + testutfnext \xF4\xA0\xA0\xE8 +} 1 +test utf-6.71 {Tcl_UtfNext} testutfnext { + testutfnext \xF4\xA0\xA0\xF4 +} 1 +test utf-6.73 {Tcl_UtfNext} testutfnext { + testutfnext \xF4\xA0\xA0\xF8 +} 1 +test utf-6.74 {Tcl_UtfNext} testutfnext { + testutfnext \xF4\xA0\xA0\xA0G +} 1 +test utf-6.75 {Tcl_UtfNext} testutfnext { + testutfnext \xF4\xA0\xA0\xA0\xA0 +} 1 +test utf-6.76 {Tcl_UtfNext} testutfnext { + testutfnext \xF4\xA0\xA0\xA0\xD0 +} 1 +test utf-6.77 {Tcl_UtfNext} testutfnext { + testutfnext \xF4\xA0\xA0\xA0\xE8 +} 1 +test utf-6.78 {Tcl_UtfNext} testutfnext { + testutfnext \xF4\xA0\xA0\xA0\xF4 +} 1 +test utf-6.79 {Tcl_UtfNext} testutfnext { + testutfnext \xF4\xA0\xA0\xA0G\xF8 +} 1 +test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext { + testutfnext \xC0\x80 +} 2 +test utf-6.81 {Tcl_UtfNext - overlong sequences} testutfnext { + testutfnext \xC0\x81 +} 1 +test utf-6.82 {Tcl_UtfNext - overlong sequences} testutfnext { + testutfnext \xC1\x80 +} 1 +test utf-6.83 {Tcl_UtfNext - overlong sequences} testutfnext { + testutfnext \xC2\x80 +} 2 +test utf-6.84 {Tcl_UtfNext - overlong sequences} testutfnext { + testutfnext \xE0\x80\x80 +} 1 +test utf-6.85 {Tcl_UtfNext - overlong sequences} testutfnext { + testutfnext \xE0\xA0\x80 +} 3 +test utf-6.86 {Tcl_UtfNext - overlong sequences} testutfnext { + testutfnext \xF0\x80\x80\x80 +} 1 +test utf-6.87 {Tcl_UtfNext - overlong sequences} testutfnext { + testutfnext \xF0\x90\x80\x80 +} 4 +test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} testutfnext { + testutfnext \xA0\xA0 +} 1 +test utf-6.88.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} testutfnext { + testutfnext \xE8\xA0\xA0 1 +} 2 +test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} testutfnext { + testutfnext \x80\x80 +} 1 +test utf-6.89.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} testutfnext { + testutfnext \xF0\x80\x80 1 +} 2 testConstraint testutfprev [llength [info commands testutfprev]] @@ -238,6 +513,9 @@ test utf-7.11.1 {Tcl_UtfPrev} testutfprev { test utf-7.11.2 {Tcl_UtfPrev} testutfprev { testutfprev A\xE8\xA0\xF8\xA0 3 } 1 +test utf-7.11.3 {Tcl_UtfPrev} testutfprev { + testutfprev A\xE8\xA0\xF8 3 +} 1 test utf-7.12 {Tcl_UtfPrev} testutfprev { testutfprev A\xD0\xA0 } 1 @@ -316,6 +594,90 @@ test utf-7.22 {Tcl_UtfPrev} testutfprev { test utf-7.23 {Tcl_UtfPrev} testutfprev { testutfprev A\xA0\xA0\xA0\xA0 } 4 +test utf-7.24 {Tcl_UtfPrev -- overlong sequence} testutfprev { + testutfprev A\xC0\x81 +} 1 +test utf-7.25 {Tcl_UtfPrev -- overlong sequence} testutfprev { + testutfprev A\xC0\x81 2 +} 1 +test utf-7.26 {Tcl_UtfPrev -- overlong sequence} testutfprev { + testutfprev A\xE0\x80\x80 +} 1 +test utf-7.27 {Tcl_UtfPrev -- overlong sequence} testutfprev { + testutfprev A\xE0\x80 +} 1 +test utf-7.27.1 {Tcl_UtfPrev -- overlong sequence} testutfprev { + testutfprev A\xE0\x80\x80 3 +} 1 +test utf-7.28 {Tcl_UtfPrev -- overlong sequence} testutfprev { + testutfprev A\xE0 +} 1 +test utf-7.28.1 {Tcl_UtfPrev -- overlong sequence} testutfprev { + testutfprev A\xE0\x80\x80 2 +} 1 +test utf-7.29 {Tcl_UtfPrev -- overlong sequence} testutfprev { + testutfprev A\xF0\x80\x80\x80 +} 1 +test utf-7.30 {Tcl_UtfPrev -- overlong sequence} testutfprev { + testutfprev A\xF0\x80\x80\x80 4 +} 1 +test utf-7.31 {Tcl_UtfPrev -- overlong sequence} testutfprev { + testutfprev A\xF0\x80\x80\x80 3 +} 1 +test utf-7.32 {Tcl_UtfPrev -- overlong sequence} testutfprev { + testutfprev A\xF0\x80\x80\x80 2 +} 1 +test utf-7.33 {Tcl_UtfPrev -- overlong sequence} testutfprev { + testutfprev A\xC0\x80 +} 1 +test utf-7.34 {Tcl_UtfPrev -- overlong sequence} testutfprev { + testutfprev A\xC1\x80 +} 1 +test utf-7.35 {Tcl_UtfPrev -- overlong sequence} testutfprev { + testutfprev A\xC2\x80 +} 1 +test utf-7.36 {Tcl_UtfPrev -- overlong sequence} testutfprev { + testutfprev A\xE0\xA0\x80 +} 1 +test utf-7.37 {Tcl_UtfPrev -- overlong sequence} testutfprev { + testutfprev A\xE0\xA0\x80 3 +} 1 +test utf-7.38 {Tcl_UtfPrev -- overlong sequence} testutfprev { + testutfprev A\xE0\xA0\x80 2 +} 1 +test utf-7.39 {Tcl_UtfPrev -- overlong sequence} testutfprev { + testutfprev A\xF0\x90\x80\x80 +} 1 +test utf-7.40 {Tcl_UtfPrev -- overlong sequence} testutfprev { + testutfprev A\xF0\x90\x80\x80 4 +} 1 +test utf-7.41 {Tcl_UtfPrev -- overlong sequence} testutfprev { + testutfprev A\xF0\x90\x80\x80 3 +} 1 +test utf-7.42 {Tcl_UtfPrev -- overlong sequence} testutfprev { + testutfprev A\xF0\x90\x80\x80 2 +} 1 +test utf-7.43 {Tcl_UtfPrev -- no lead byte at start} testutfprev { + testutfprev \xA0 +} 0 +test utf-7.44 {Tcl_UtfPrev -- no lead byte at start} testutfprev { + testutfprev \xA0\xA0 +} 1 +test utf-7.45 {Tcl_UtfPrev -- no lead byte at start} testutfprev { + testutfprev \xA0\xA0\xA0 +} 2 +test utf-7.46 {Tcl_UtfPrev -- no lead byte at start} testutfprev { + testutfprev \xA0\xA0\xA0\xA0 +} 3 +test utf-7.47 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} testutfprev { + testutfprev \xE8\xA0 +} 0 +test utf-7.47.1 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} testutfprev { + testutfprev \xE8\xA0\xA0 2 +} 0 +test utf-7.47.2 {Tcl_UtfPrev, pointing to 3th byte of 3-byte invalid sequence} testutfprev { + testutfprev \xE8\xA0\x00 2 +} 0 test utf-8.1 {Tcl_UniCharAtIndex: index = 0} { string index abcd 0 -- cgit v0.12 From aea602a07ae4eba9adb21f2d11c7f96b5f632f79 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 17 Apr 2020 12:49:32 +0000 Subject: Clean-up some unnecessary spacing. --- compat/unistd.h | 2 +- generic/tclCompile.h | 2 +- generic/tclStubInit.c | 2 +- generic/tclStubLib.c | 2 +- generic/tclTomMath.h | 10 +++++----- libtommath/tommath.h | 12 ++++++------ libtommath/tommath_superclass.h | 4 ++-- unix/tclUnixFCmd.c | 2 +- unix/tclUnixFile.c | 2 +- unix/tclUnixPort.h | 6 +++--- unix/tclUnixThrd.h | 2 +- unix/tclUnixTime.c | 4 ++-- unix/tclXtTest.c | 2 +- win/tclWinInt.h | 34 +++++++++++++++++----------------- 14 files changed, 43 insertions(+), 43 deletions(-) diff --git a/compat/unistd.h b/compat/unistd.h index 2de5bd0..a8f14f2 100644 --- a/compat/unistd.h +++ b/compat/unistd.h @@ -20,7 +20,7 @@ #define NULL 0 #endif -/* +/* * Strict POSIX stuff goes here. Extensions go down below, in the ifndef * _POSIX_SOURCE section. */ diff --git a/generic/tclCompile.h b/generic/tclCompile.h index 9ee60c3..ccfa4be 100644 --- a/generic/tclCompile.h +++ b/generic/tclCompile.h @@ -1257,7 +1257,7 @@ MODULE_SCOPE int TclWordKnownAtCompileTime(Tcl_Token *tokenPtr, * If the second macro is defined, logging to file starts immediately, * otherwise only after the first call to [tcl::dtrace]. Note that the debug * probe data is always computed, even when it is not logged to file. - * + * * Defining the third macro enables debug logging of inst probes (disabled * by default due to the significant performance impact). */ diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c index d6f1da9..1a83752 100644 --- a/generic/tclStubInit.c +++ b/generic/tclStubInit.c @@ -1,4 +1,4 @@ -/* +/* * tclStubInit.c -- * * This file contains the initializers for the Tcl stub vectors. diff --git a/generic/tclStubLib.c b/generic/tclStubLib.c index 31fc865..c895ffe 100644 --- a/generic/tclStubLib.c +++ b/generic/tclStubLib.c @@ -142,7 +142,7 @@ TclTomMathInitializeStubs( const char* packageName = "tcl::tommath"; const char* errMsg = NULL; ClientData pkgClientData = NULL; - const char* actualVersion = + const char* actualVersion = tclStubsPtr->tcl_PkgRequireEx(interp, packageName, version, exact, &pkgClientData); TclTomMathStubs* stubsPtr = (TclTomMathStubs*) pkgClientData; if (actualVersion == NULL) { diff --git a/generic/tclTomMath.h b/generic/tclTomMath.h index b219405..1e455fe 100644 --- a/generic/tclTomMath.h +++ b/generic/tclTomMath.h @@ -80,10 +80,10 @@ extern "C" { #define DIGIT_BIT 60 #else /* this is the default case, 28-bit digits */ - + /* this is to make porting into LibTomCrypt easier :-) */ #ifndef CRYPT - #if defined(_MSC_VER) || defined(__BORLANDC__) + #if defined(_MSC_VER) || defined(__BORLANDC__) typedef unsigned __int64 ulong64; typedef signed __int64 long64; #else @@ -98,21 +98,21 @@ extern "C" { #endif typedef ulong64 mp_word; -#ifdef MP_31BIT +#ifdef MP_31BIT /* this is an extension that uses 31-bit digits */ #define DIGIT_BIT 31 #else /* default case is 28-bit digits, defines MP_28BIT as a handy macro to test */ #define DIGIT_BIT 28 #define MP_28BIT -#endif +#endif #endif /* define heap macros */ #if 0 /* these are macros in tclTomMathDecls.h */ #ifndef CRYPT /* default to libc stuff */ - #ifndef XMALLOC + #ifndef XMALLOC #define XMALLOC malloc #define XFREE free #define XREALLOC realloc diff --git a/libtommath/tommath.h b/libtommath/tommath.h index df460f6..babb478 100644 --- a/libtommath/tommath.h +++ b/libtommath/tommath.h @@ -36,7 +36,7 @@ extern "C" { /* detect 64-bit mode if possible */ -#if defined(__x86_64__) +#if defined(__x86_64__) #if !(defined(MP_64BIT) && defined(MP_16BIT) && defined(MP_8BIT)) #define MP_64BIT #endif @@ -69,10 +69,10 @@ extern "C" { #define DIGIT_BIT 60 #else /* this is the default case, 28-bit digits */ - + /* this is to make porting into LibTomCrypt easier :-) */ #ifndef CRYPT - #if defined(_MSC_VER) || defined(__BORLANDC__) + #if defined(_MSC_VER) || defined(__BORLANDC__) typedef unsigned __int64 ulong64; typedef signed __int64 long64; #else @@ -84,20 +84,20 @@ extern "C" { typedef unsigned long mp_digit; typedef ulong64 mp_word; -#ifdef MP_31BIT +#ifdef MP_31BIT /* this is an extension that uses 31-bit digits */ #define DIGIT_BIT 31 #else /* default case is 28-bit digits, defines MP_28BIT as a handy macro to test */ #define DIGIT_BIT 28 #define MP_28BIT -#endif +#endif #endif /* define heap macros */ #ifndef CRYPT /* default to libc stuff */ - #ifndef XMALLOC + #ifndef XMALLOC #define XMALLOC malloc #define XFREE free #define XREALLOC realloc diff --git a/libtommath/tommath_superclass.h b/libtommath/tommath_superclass.h index e3926df..c4ec19f 100644 --- a/libtommath/tommath_superclass.h +++ b/libtommath/tommath_superclass.h @@ -60,9 +60,9 @@ #undef BN_FAST_MP_INVMOD_C /* To safely undefine these you have to make sure your RSA key won't exceed the Comba threshold - * which is roughly 255 digits [7140 bits for 32-bit machines, 15300 bits for 64-bit machines] + * which is roughly 255 digits [7140 bits for 32-bit machines, 15300 bits for 64-bit machines] * which means roughly speaking you can handle upto 2536-bit RSA keys with these defined without - * trouble. + * trouble. */ #undef BN_S_MP_MUL_DIGS_C #undef BN_S_MP_SQR_C diff --git a/unix/tclUnixFCmd.c b/unix/tclUnixFCmd.c index 7360c32..a582223 100644 --- a/unix/tclUnixFCmd.c +++ b/unix/tclUnixFCmd.c @@ -1051,7 +1051,7 @@ TraverseUnixTree( unsigned short pathlen = ent->fts_pathlen - sourceLen; int type; Tcl_StatBuf *statBufPtr = NULL; - + if (info == FTS_DNR || info == FTS_ERR || info == FTS_NS) { errfile = ent->fts_path; break; diff --git a/unix/tclUnixFile.c b/unix/tclUnixFile.c index 038cbf8..3b09799 100644 --- a/unix/tclUnixFile.c +++ b/unix/tclUnixFile.c @@ -332,7 +332,7 @@ TclpMatchInDirectory( matchHiddenPat = (pattern[0] == '.') || ((pattern[0] == '\\') && (pattern[1] == '.')); - matchHidden = matchHiddenPat + matchHidden = matchHiddenPat || (types && (types->perm & TCL_GLOB_PERM_HIDDEN)); while ((entryPtr = TclOSreaddir(d)) != NULL) { /* INTL: Native. */ Tcl_DString utfDs; diff --git a/unix/tclUnixPort.h b/unix/tclUnixPort.h index 086dd91..0e1bce8 100644 --- a/unix/tclUnixPort.h +++ b/unix/tclUnixPort.h @@ -590,8 +590,8 @@ extern char **environ; /* *--------------------------------------------------------------------------- - * The following macros and declarations represent the interface between - * generic and unix-specific parts of Tcl. Some of the macros may override + * The following macros and declarations represent the interface between + * generic and unix-specific parts of Tcl. Some of the macros may override * functions declared in tclInt.h. *--------------------------------------------------------------------------- */ @@ -608,7 +608,7 @@ typedef int socklen_t; #endif /* - * The following macros have trivial definitions, allowing generic code to + * The following macros have trivial definitions, allowing generic code to * address platform-specific issues. */ diff --git a/unix/tclUnixThrd.h b/unix/tclUnixThrd.h index 6a73132..f03b530 100644 --- a/unix/tclUnixThrd.h +++ b/unix/tclUnixThrd.h @@ -8,7 +8,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. */ - + #ifndef _TCLUNIXTHRD #define _TCLUNIXTHRD diff --git a/unix/tclUnixTime.c b/unix/tclUnixTime.c index 4860876..a6572f9 100644 --- a/unix/tclUnixTime.c +++ b/unix/tclUnixTime.c @@ -228,7 +228,7 @@ TclpWideClicksToNanoseconds( #ifdef MAC_OSX_TCL static mach_timebase_info_data_t tb; static uint64_t maxClicksForUInt64; - + if (!tb.denom) { mach_timebase_info(&tb); maxClicksForUInt64 = UINT64_MAX / tb.numer; @@ -251,7 +251,7 @@ TclpWideClicksToNanoseconds( * * TclpWideClickInMicrosec -- * - * This procedure return scale to convert click values from the + * This procedure return scale to convert click values from the * TclpGetWideClicks native resolution to microsecond resolution * and back. * diff --git a/unix/tclXtTest.c b/unix/tclXtTest.c index 8437f2a..1393dfe 100644 --- a/unix/tclXtTest.c +++ b/unix/tclXtTest.c @@ -1,4 +1,4 @@ -/* +/* * tclXtTest.c -- * * Contains commands for Xt notifier specific tests on Unix. diff --git a/win/tclWinInt.h b/win/tclWinInt.h index af6619f..39790a0 100644 --- a/win/tclWinInt.h +++ b/win/tclWinInt.h @@ -59,7 +59,7 @@ typedef struct TCLEXCEPTION_REGISTRATION { #endif /* - * The following structure keeps track of whether we are using the + * The following structure keeps track of whether we are using the * multi-byte or the wide-character interfaces to the operating system. * System calls should be made through the following function table. */ @@ -76,10 +76,10 @@ typedef struct TclWinProcs { TCHAR *(WINAPI *charLowerProc)(TCHAR *); BOOL (WINAPI *copyFileProc)(CONST TCHAR *, CONST TCHAR *, BOOL); BOOL (WINAPI *createDirectoryProc)(CONST TCHAR *, LPSECURITY_ATTRIBUTES); - HANDLE (WINAPI *createFileProc)(CONST TCHAR *, DWORD, DWORD, + HANDLE (WINAPI *createFileProc)(CONST TCHAR *, DWORD, DWORD, LPSECURITY_ATTRIBUTES, DWORD, DWORD, HANDLE); - BOOL (WINAPI *createProcessProc)(CONST TCHAR *, TCHAR *, - LPSECURITY_ATTRIBUTES, LPSECURITY_ATTRIBUTES, BOOL, DWORD, + BOOL (WINAPI *createProcessProc)(CONST TCHAR *, TCHAR *, + LPSECURITY_ATTRIBUTES, LPSECURITY_ATTRIBUTES, BOOL, DWORD, LPVOID, CONST TCHAR *, LPSTARTUPINFOA, LPPROCESS_INFORMATION); BOOL (WINAPI *deleteFileProc)(CONST TCHAR *); HANDLE (WINAPI *findFirstFileProc)(CONST TCHAR *, WIN32_FIND_DATAT *); @@ -87,35 +87,35 @@ typedef struct TclWinProcs { BOOL (WINAPI *getComputerNameProc)(WCHAR *, LPDWORD); DWORD (WINAPI *getCurrentDirectoryProc)(DWORD, WCHAR *); DWORD (WINAPI *getFileAttributesProc)(CONST TCHAR *); - DWORD (WINAPI *getFullPathNameProc)(CONST TCHAR *, DWORD nBufferLength, + DWORD (WINAPI *getFullPathNameProc)(CONST TCHAR *, DWORD nBufferLength, WCHAR *, TCHAR **); DWORD (WINAPI *getModuleFileNameProc)(HMODULE, WCHAR *, int); - DWORD (WINAPI *getShortPathNameProc)(CONST TCHAR *, WCHAR *, DWORD); - UINT (WINAPI *getTempFileNameProc)(CONST TCHAR *, CONST TCHAR *, UINT, + DWORD (WINAPI *getShortPathNameProc)(CONST TCHAR *, WCHAR *, DWORD); + UINT (WINAPI *getTempFileNameProc)(CONST TCHAR *, CONST TCHAR *, UINT, WCHAR *); DWORD (WINAPI *getTempPathProc)(DWORD, WCHAR *); - BOOL (WINAPI *getVolumeInformationProc)(CONST TCHAR *, WCHAR *, DWORD, + BOOL (WINAPI *getVolumeInformationProc)(CONST TCHAR *, WCHAR *, DWORD, LPDWORD, LPDWORD, LPDWORD, WCHAR *, DWORD); HINSTANCE (WINAPI *loadLibraryExProc)(CONST TCHAR *, HANDLE, DWORD); TCHAR (WINAPI *lstrcpyProc)(WCHAR *, CONST TCHAR *); BOOL (WINAPI *moveFileProc)(CONST TCHAR *, CONST TCHAR *); BOOL (WINAPI *removeDirectoryProc)(CONST TCHAR *); - DWORD (WINAPI *searchPathProc)(CONST TCHAR *, CONST TCHAR *, + DWORD (WINAPI *searchPathProc)(CONST TCHAR *, CONST TCHAR *, CONST TCHAR *, DWORD, WCHAR *, TCHAR **); BOOL (WINAPI *setCurrentDirectoryProc)(CONST TCHAR *); BOOL (WINAPI *setFileAttributesProc)(CONST TCHAR *, DWORD); - /* + /* * These two function pointers will only be set when * Tcl_FindExecutable is called. If you don't ever call that * function, the application will crash whenever WinTcl tries to call * functions through these null pointers. That is not a bug in Tcl * -- Tcl_FindExecutable is obligatory in recent Tcl releases. */ - BOOL (WINAPI *getFileAttributesExProc)(CONST TCHAR *, + BOOL (WINAPI *getFileAttributesExProc)(CONST TCHAR *, GET_FILEEX_INFO_LEVELS, LPVOID); - BOOL (WINAPI *createHardLinkProc)(CONST TCHAR*, CONST TCHAR*, + BOOL (WINAPI *createHardLinkProc)(CONST TCHAR*, CONST TCHAR*, LPSECURITY_ATTRIBUTES); - + /* deleted INT (__cdecl *utimeProc)(CONST TCHAR*, struct _utimbuf *); */ /* These two are also NULL at start; see comment above */ HANDLE (WINAPI *findFirstFileExProc)(CONST TCHAR*, UINT, @@ -123,7 +123,7 @@ typedef struct TclWinProcs { LPVOID, DWORD); BOOL (WINAPI *getVolumeNameForVMPProc)(CONST TCHAR*, TCHAR*, DWORD); DWORD (WINAPI *getLongPathNameProc)(CONST TCHAR*, TCHAR*, DWORD); - /* + /* * These six are for the security sdk to get correct file * permissions on NT, 2000, XP, etc. On 95,98,ME they are * always null. @@ -131,9 +131,9 @@ typedef struct TclWinProcs { BOOL (WINAPI *getFileSecurityProc)(LPCTSTR lpFileName, SECURITY_INFORMATION RequestedInformation, PSECURITY_DESCRIPTOR pSecurityDescriptor, - DWORD nLength, + DWORD nLength, LPDWORD lpnLengthNeeded); - BOOL (WINAPI *impersonateSelfProc) (SECURITY_IMPERSONATION_LEVEL + BOOL (WINAPI *impersonateSelfProc) (SECURITY_IMPERSONATION_LEVEL ImpersonationLevel); BOOL (WINAPI *openThreadTokenProc) (HANDLE ThreadHandle, DWORD DesiredAccess, BOOL OpenAsSelf, @@ -191,7 +191,7 @@ MODULE_SCOPE HANDLE TclWinSerialOpen(HANDLE handle, CONST TCHAR *name, DWORD access); MODULE_SCOPE int TclWinSymLinkCopyDirectory(CONST TCHAR* LinkOriginal, CONST TCHAR* LinkCopy); -MODULE_SCOPE int TclWinSymLinkDelete(CONST TCHAR* LinkOriginal, +MODULE_SCOPE int TclWinSymLinkDelete(CONST TCHAR* LinkOriginal, int linkOnly); #if defined(TCL_THREADS) && defined(USE_THREAD_ALLOC) MODULE_SCOPE void TclWinFreeAllocCache(void); -- cgit v0.12 From 54b62ddd622894072bea921ed8c3a5af0d7d3de9 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 17 Apr 2020 13:01:35 +0000 Subject: Unbreak shared windows build. Remove some ARGSUSED usage. --- generic/tclTest.c | 49 ++++++++++++++++++++----------------------------- unix/tclUnixChan.c | 6 ------ unix/tclUnixPipe.c | 4 ---- win/tclWinPipe.c | 1 - win/tclWinSock.c | 3 --- 5 files changed, 20 insertions(+), 43 deletions(-) diff --git a/generic/tclTest.c b/generic/tclTest.c index 6e0fbed..8c29aa7 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -457,12 +457,6 @@ Tcltest_Init( "-appinitprocclosestderr", "-appinitprocsetrcfile", NULL }; - if (Tcl_InitStubs(interp, "8.5", 0) == NULL) { - return TCL_ERROR; - } - if (Tcl_TomMath_InitStubs(interp, "8.5") == NULL) { - return TCL_ERROR; - } /* TIP #268: Full patchlevel instead of just major.minor */ if (Tcl_PkgProvide(interp, "Tcltest", TCL_PATCH_LEVEL) == TCL_ERROR) { @@ -683,7 +677,6 @@ Tcltest_Init( *---------------------------------------------------------------------- */ - /* ARGSUSED */ static int TestasyncCmd( ClientData dummy, /* Not used. */ @@ -913,7 +906,7 @@ AsyncThreadProc( *---------------------------------------------------------------------- */ - /* ARGSUSED */ + static int TestcmdinfoCmd( ClientData dummy, /* Not used. */ @@ -985,7 +978,6 @@ TestcmdinfoCmd( return TCL_OK; } - /*ARGSUSED*/ static int CmdProc1( ClientData clientData, /* String to return. */ @@ -997,7 +989,6 @@ CmdProc1( return TCL_OK; } - /*ARGSUSED*/ static int CmdProc2( ClientData clientData, /* String to return. */ @@ -1044,7 +1035,7 @@ CmdDelProc2( *---------------------------------------------------------------------- */ - /* ARGSUSED */ + static int TestcmdtokenCmd( ClientData dummy, /* Not used. */ @@ -1108,7 +1099,7 @@ TestcmdtokenCmd( *---------------------------------------------------------------------- */ - /* ARGSUSED */ + static int TestcmdtraceCmd( ClientData dummy, /* Not used. */ @@ -1398,7 +1389,7 @@ CreatedCommandProc2( *---------------------------------------------------------------------- */ - /* ARGSUSED */ + static int TestdcallCmd( ClientData dummy, /* Not used. */ @@ -1463,7 +1454,7 @@ DelCallbackProc( *---------------------------------------------------------------------- */ - /* ARGSUSED */ + static int TestdelCmd( ClientData dummy, /* Not used. */ @@ -1668,7 +1659,7 @@ TestdoubledigitsObjCmd(ClientData unused, *---------------------------------------------------------------------- */ - /* ARGSUSED */ + static int TestdstringCmd( ClientData dummy, /* Not used. */ @@ -1795,7 +1786,7 @@ static void SpecialFree(blockPtr) *---------------------------------------------------------------------- */ - /* ARGSUSED */ + static int TestencodingObjCmd( ClientData dummy, /* Not used. */ @@ -2650,7 +2641,7 @@ TestgetplatformCmd( *---------------------------------------------------------------------- */ - /* ARGSUSED */ + static int TestinterpdeleteCmd( ClientData dummy, /* Not used. */ @@ -2691,7 +2682,7 @@ TestinterpdeleteCmd( *---------------------------------------------------------------------- */ - /* ARGSUSED */ + static int TestlinkCmd( ClientData dummy, /* Not used. */ @@ -3222,7 +3213,7 @@ TestlocaleCmd( *---------------------------------------------------------------------- */ - /* ARGSUSED */ + static int TestMathFunc( ClientData clientData, /* Integer value to return. */ @@ -3252,7 +3243,7 @@ TestMathFunc( *---------------------------------------------------------------------- */ - /* ARGSUSED */ + static int TestMathFunc2( ClientData clientData, /* Integer value to return. */ @@ -3359,7 +3350,7 @@ TestMathFunc2( * *---------------------------------------------------------------------- */ - /* ARGSUSED */ + static void CleanupTestSetassocdataTests( ClientData clientData, /* Data to be released. */ @@ -3693,7 +3684,7 @@ TestparsevarnameObjCmd( *---------------------------------------------------------------------- */ - /* ARGSUSED */ + static int TestregexpObjCmd( ClientData dummy, /* Not used. */ @@ -4019,7 +4010,7 @@ TestregexpXflags( *---------------------------------------------------------------------- */ - /* ARGSUSED */ + static int TestreturnObjCmd( ClientData dummy, /* Not used. */ @@ -4241,7 +4232,7 @@ TesttranslatefilenameCmd( *---------------------------------------------------------------------- */ - /* ARGSUSED */ + static int TestupvarCmd( ClientData dummy, /* Not used. */ @@ -4294,7 +4285,7 @@ TestupvarCmd( *---------------------------------------------------------------------- */ - /* ARGSUSED */ + static int TestseterrorcodeCmd( ClientData dummy, /* Not used. */ @@ -4347,7 +4338,7 @@ TestseterrorcodeCmd( *---------------------------------------------------------------------- */ - /* ARGSUSED */ + static int TestsetobjerrorcodeCmd( ClientData dummy, /* Not used. */ @@ -4376,7 +4367,7 @@ TestsetobjerrorcodeCmd( *---------------------------------------------------------------------- */ - /* ARGSUSED */ + static int TestfeventCmd( ClientData clientData, /* Not used. */ @@ -5768,7 +5759,7 @@ TestOpenFileChannelProc3( *---------------------------------------------------------------------- */ - /* ARGSUSED */ + static int TestChannelCmd( ClientData clientData, /* Not used. */ @@ -6237,7 +6228,7 @@ TestChannelCmd( *---------------------------------------------------------------------- */ - /* ARGSUSED */ + static int TestChannelEventCmd( ClientData dummy, /* Not used. */ diff --git a/unix/tclUnixChan.c b/unix/tclUnixChan.c index 9cac4ae..d3207e2 100644 --- a/unix/tclUnixChan.c +++ b/unix/tclUnixChan.c @@ -344,7 +344,6 @@ static Tcl_ChannelType tcpChannelType = { *---------------------------------------------------------------------- */ - /* ARGSUSED */ static int FileBlockModeProc( ClientData instanceData, /* File state. */ @@ -1840,7 +1839,6 @@ Tcl_MakeFileChannel( *---------------------------------------------------------------------- */ - /* ARGSUSED */ static int TcpBlockModeProc( ClientData instanceData, /* Socket state. */ @@ -1938,7 +1936,6 @@ WaitForConnect( *---------------------------------------------------------------------- */ - /* ARGSUSED */ static int TcpInputProc( ClientData instanceData, /* Socket state. */ @@ -2033,7 +2030,6 @@ TcpOutputProc( *---------------------------------------------------------------------- */ - /* ARGSUSED */ static int TcpCloseProc( ClientData instanceData, /* The socket to close. */ @@ -2331,7 +2327,6 @@ TcpWatchProc( *---------------------------------------------------------------------- */ - /* ARGSUSED */ static int TcpGetHandleProc( ClientData instanceData, /* The socket state. */ @@ -2853,7 +2848,6 @@ Tcl_OpenTcpServer( *---------------------------------------------------------------------- */ - /* ARGSUSED */ static void TcpAccept( ClientData data, /* Callback token. */ diff --git a/unix/tclUnixPipe.c b/unix/tclUnixPipe.c index d0a5e53..f2a2daa 100644 --- a/unix/tclUnixPipe.c +++ b/unix/tclUnixPipe.c @@ -410,7 +410,6 @@ TclpCloseFile( *--------------------------------------------------------------------------- */ - /* ARGSUSED */ int TclpCreateProcess( Tcl_Interp *interp, /* Interpreter in which to leave errors that @@ -889,7 +888,6 @@ TclGetAndDetachPids( *---------------------------------------------------------------------- */ - /* ARGSUSED */ static int PipeBlockModeProc( ClientData instanceData, /* Pipe state. */ @@ -933,7 +931,6 @@ PipeBlockModeProc( *---------------------------------------------------------------------- */ - /* ARGSUSED */ static int PipeCloseProc( ClientData instanceData, /* The pipe to close. */ @@ -1231,7 +1228,6 @@ Tcl_WaitPid( *---------------------------------------------------------------------- */ - /* ARGSUSED */ int Tcl_PidObjCmd( ClientData dummy, /* Not used. */ diff --git a/win/tclWinPipe.c b/win/tclWinPipe.c index e33273b..cdb955f 100644 --- a/win/tclWinPipe.c +++ b/win/tclWinPipe.c @@ -2782,7 +2782,6 @@ TclWinAddProcess( *---------------------------------------------------------------------- */ - /* ARGSUSED */ int Tcl_PidObjCmd( ClientData dummy, /* Not used. */ diff --git a/win/tclWinSock.c b/win/tclWinSock.c index 8f565b9..882aa4a 100644 --- a/win/tclWinSock.c +++ b/win/tclWinSock.c @@ -358,7 +358,6 @@ InitSockets(void) *---------------------------------------------------------------------- */ - /* ARGSUSED */ static int SocketsEnabled(void) { @@ -387,7 +386,6 @@ SocketsEnabled(void) *---------------------------------------------------------------------- */ - /* ARGSUSED */ static void SocketExitHandler( ClientData clientData) /* Not used. */ @@ -786,7 +784,6 @@ TcpBlockProc( *---------------------------------------------------------------------- */ - /* ARGSUSED */ static int TcpCloseProc( ClientData instanceData, /* The socket to close. */ -- cgit v0.12 From d569ea2c8be5fab1d16b51b2bdb862b5fe5a9283 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 17 Apr 2020 13:31:06 +0000 Subject: Add mem-debug build to Travis. Update Xcode from 8 to 8.3, 9 to 9.2 and 11.3 to 11.4 --- .travis.yml | 59 +++++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 49 insertions(+), 10 deletions(-) diff --git a/.travis.yml b/.travis.yml index 02e3102..e10ca7c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,5 @@ sudo: false language: c - matrix: include: # Testing on Linux with various compilers @@ -24,6 +23,13 @@ matrix: env: - BUILD_DIR=unix - CFGOPT="--enable-symbols" + - name: "Linux/GCC/Mem-Debug" + os: linux + dist: bionic + compiler: gcc + env: + - BUILD_DIR=unix + - CFGOPT="--enable-symbols=mem" # Older versions of GCC... - name: "Linux/GCC 7/Shared" os: linux @@ -82,15 +88,17 @@ matrix: env: - BUILD_DIR=unix - CFGOPT="--enable-symbols" -# Testing on Mac, various styles - - name: "macOS/Xcode 11.3/Shared/Unix-like" - os: osx - osx_image: xcode11.3 + - name: "Linux/Clang/Mem-Debug" + os: linux + dist: bionic + compiler: clang env: - BUILD_DIR=unix - - name: "macOS/Xcode 11.3/Shared" + - CFGOPT="--enable-symbols=mem" +# Testing on Mac, various styles + - name: "macOS/Xcode 11.4/Shared" os: osx - osx_image: xcode11.3 + osx_image: xcode11.4 env: - BUILD_DIR=macosx install: [] @@ -98,6 +106,12 @@ matrix: - make all # The styles=develop avoids some weird problems on OSX - make test styles=develop + - name: "macOS/Xcode 11.4/Shared/Unix-like" + os: osx + osx_image: xcode11.4 + env: + - BUILD_DIR=unix +# Older MacOS versions - name: "macOS/Xcode 11/Shared" os: osx osx_image: xcode11 @@ -114,14 +128,14 @@ matrix: script: *mactest - name: "macOS/Xcode 9/Shared" os: osx - osx_image: xcode9 + osx_image: xcode9.2 env: - BUILD_DIR=macosx install: [] script: *mactest - name: "macOS/Xcode 8/Shared" os: osx - osx_image: xcode8 + osx_image: xcode8.3 env: - BUILD_DIR=macosx install: [] @@ -197,6 +211,15 @@ matrix: script: - cmd.exe //C vcvarsall.bat x64 '&&' nmake 'OPTS=symbols,threads' '-f' makefile.vc all tcltest - cmd.exe //C vcvarsall.bat x64 '&&' nmake 'OPTS=symbols,threads' '-f' makefile.vc test + - name: "Windows/MSVC/Mem-Debug" + os: windows + compiler: cl + env: *vcenv + before_install: *vcpreinst + install: [] + script: + - cmd.exe //C vcvarsall.bat x64 '&&' nmake 'STATS=memdbg OPTS=threads' '-f' makefile.vc all tcltest + - cmd.exe //C vcvarsall.bat x64 '&&' nmake 'STATS=memdbg OPTS=threads' '-f' makefile.vc test # Test on Windows with MSVC native (32-bit) - name: "Windows/MSVC-x86/Shared" os: windows @@ -225,6 +248,15 @@ matrix: script: - cmd.exe //C vcvarsall.bat x86 '&&' nmake 'OPTS=symbols,threads' '-f' makefile.vc all tcltest - cmd.exe //C vcvarsall.bat x86 '&&' nmake 'OPTS=symbols,threads' '-f' makefile.vc test + - name: "Windows/MSVC-x86/Mem-Debug" + os: windows + compiler: cl + env: *vcenv + before_install: *vcpreinst + install: [] + script: + - cmd.exe //C vcvarsall.bat x86 '&&' nmake 'STATS=memdbg OPTS=threads' '-f' makefile.vc all tcltest + - cmd.exe //C vcvarsall.bat x86 '&&' nmake 'STATS=memdbg OPTS=threads' '-f' makefile.vc test # Test on Windows with GCC native - name: "Windows/GCC/Shared" os: windows @@ -233,7 +265,7 @@ matrix: - BUILD_DIR=win - CFGOPT="--enable-64bit --enable-threads" before_install: &makepreinst - - choco install make + - choco install -y make - cd ${BUILD_DIR} - name: "Windows/GCC/Static" os: windows @@ -271,6 +303,13 @@ matrix: - BUILD_DIR=win - CFGOPT="--enable-threads --enable-symbols" before_install: *makepreinst + - name: "Windows/GCC-x86/Mem-Debug" + os: windows + compiler: gcc + env: + - BUILD_DIR=win + - CFGOPT="--enable-threads --enable-symbols=mem" + before_install: *makepreinst before_install: - cd ${BUILD_DIR} install: -- cgit v0.12 From 4b0626ddd7e1f7450781deb2508d94a98c8db93d Mon Sep 17 00:00:00 2001 From: dgp Date: Fri, 17 Apr 2020 16:56:51 +0000 Subject: Bring back the test utf-2.11; it fails in a TCL_UTF_MAX=4 build. --- tests/utf.test | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/utf.test b/tests/utf.test index 7b7b5c2..a22dafe 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -13,6 +13,8 @@ if {[lsearch [namespace children] ::tcltest] == -1} { namespace import -force ::tcltest::* } +testConstraint testbytestring [llength [info commands testbytestring]] + catch {unset x} test utf-1.1 {Tcl_UniCharToUtf: 1 byte sequences} { @@ -59,6 +61,12 @@ test utf-2.8 {Tcl_UtfToUniChar: longer UTF sequences not supported} { string length [bytestring "\xF4\xA2\xA2\xA2"] } {4} +test utf-2.11 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, invalid} testbytestring { + # Would decode to U+110000 but that is outside the Unicode range. + string length [testbytestring "\xF4\x90\x80\x80"] +} {4} + + test utf-3.1 {Tcl_UtfCharComplete} { } {} -- cgit v0.12 From 1e5043ff451573bf735f6aec84208af7f0c24cc2 Mon Sep 17 00:00:00 2001 From: dgp Date: Fri, 17 Apr 2020 20:15:07 +0000 Subject: Backport a collection of tests for consistency between branches. --- tests/utf.test | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/tests/utf.test b/tests/utf.test index a22dafe..ff4f4a9 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -57,15 +57,22 @@ test utf-2.6 {Tcl_UtfToUniChar: lead (3-byte) followed by 1 trail} { test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} { string length [bytestring "\xE4\xb9\x8e"] } {1} -test utf-2.8 {Tcl_UtfToUniChar: longer UTF sequences not supported} { - string length [bytestring "\xF4\xA2\xA2\xA2"] +test utf-2.8 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring} -body { + string length [testbytestring "\xF0\x90\x80\x80"] +} -result {4} +test utf-2.9 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring} -body { + string length [testbytestring "\xF4\x8F\xBF\xBF"] +} -result {4} +test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} testbytestring { + string length [testbytestring "\xF0\x8F\xBF\xBF"] } {4} - -test utf-2.11 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, invalid} testbytestring { +test utf-2.11 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, overflow} testbytestring { # Would decode to U+110000 but that is outside the Unicode range. string length [testbytestring "\xF4\x90\x80\x80"] } {4} - +test utf-2.12 {Tcl_UtfToUniChar: longer UTF sequences not supported} testbytestring { + string length [testbytestring "\xF8\xA2\xA2\xA2\xA2"] +} {5} test utf-3.1 {Tcl_UtfCharComplete} { } {} -- cgit v0.12 From 7067acad796b0536c589101c3f61fbae9fd268aa Mon Sep 17 00:00:00 2001 From: dgp Date: Fri, 17 Apr 2020 20:23:49 +0000 Subject: Corrections for many tests, changing lead byte \xF4 to \xF2. The tested sequences were always intended to be valid 4-byte sequences. Also a few errors with greedy \xHHHH . --- tests/utf.test | 92 +++++++++++++++++++++++++++++----------------------------- 1 file changed, 46 insertions(+), 46 deletions(-) diff --git a/tests/utf.test b/tests/utf.test index ff4f4a9..7953a68 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -149,7 +149,7 @@ test utf-6.6 {Tcl_UtfNext} testutfnext { testutfnext A\xE8 } 1 test utf-6.7 {Tcl_UtfNext} testutfnext { - testutfnext A\xF4 + testutfnext A\xF2 } 1 test utf-6.8 {Tcl_UtfNext} testutfnext { testutfnext A\xF8 @@ -170,7 +170,7 @@ test utf-6.13 {Tcl_UtfNext} testutfnext { testutfnext \xA0\xE8 } 1 test utf-6.14 {Tcl_UtfNext} testutfnext { - testutfnext \xA0\xF4 + testutfnext \xA0\xF2 } 1 test utf-6.15 {Tcl_UtfNext} testutfnext { testutfnext \xA0\xF8 @@ -179,7 +179,7 @@ test utf-6.16 {Tcl_UtfNext} testutfnext { testutfnext \xD0 } 1 test utf-6.17 {Tcl_UtfNext} testutfnext { - testutfnext \xD0A + testutfnext \xD0G } 1 test utf-6.18 {Tcl_UtfNext} testutfnext { testutfnext \xD0\xA0 @@ -191,7 +191,7 @@ test utf-6.20 {Tcl_UtfNext} testutfnext { testutfnext \xD0\xE8 } 1 test utf-6.21 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xF4 + testutfnext \xD0\xF2 } 1 test utf-6.22 {Tcl_UtfNext} testutfnext { testutfnext \xD0\xF8 @@ -200,7 +200,7 @@ test utf-6.23 {Tcl_UtfNext} testutfnext { testutfnext \xE8 } 1 test utf-6.24 {Tcl_UtfNext} testutfnext { - testutfnext \xE8A + testutfnext \xE8G } 1 test utf-6.25 {Tcl_UtfNext} testutfnext { testutfnext \xE8\xA0 @@ -212,37 +212,37 @@ test utf-6.27 {Tcl_UtfNext} testutfnext { testutfnext \xE8\xE8 } 1 test utf-6.28 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xF4 + testutfnext \xE8\xF2 } 1 test utf-6.29 {Tcl_UtfNext} testutfnext { testutfnext \xE8\xF8 } 1 test utf-6.30 {Tcl_UtfNext} testutfnext { - testutfnext \xF4 + testutfnext \xF2 } 1 test utf-6.31 {Tcl_UtfNext} testutfnext { - testutfnext \xF4A + testutfnext \xF2G } 1 test utf-6.32 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0 + testutfnext \xF2\xA0 } 1 test utf-6.33 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xD0 + testutfnext \xF2\xD0 } 1 test utf-6.34 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xE8 + testutfnext \xF2\xE8 } 1 test utf-6.35 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xF4 + testutfnext \xF2\xF2 } 1 test utf-6.36 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xF8 + testutfnext \xF2\xF8 } 1 test utf-6.37 {Tcl_UtfNext} testutfnext { testutfnext \xF8 } 1 test utf-6.38 {Tcl_UtfNext} testutfnext { - testutfnext \xF8A + testutfnext \xF8G } 1 test utf-6.39 {Tcl_UtfNext} testutfnext { testutfnext \xF8\xA0 @@ -254,7 +254,7 @@ test utf-6.41 {Tcl_UtfNext} testutfnext { testutfnext \xF8\xE8 } 1 test utf-6.42 {Tcl_UtfNext} testutfnext { - testutfnext \xF8\xF4 + testutfnext \xF8\xF2 } 1 test utf-6.43 {Tcl_UtfNext} testutfnext { testutfnext \xF8\xF8 @@ -272,7 +272,7 @@ test utf-6.47 {Tcl_UtfNext} testutfnext { testutfnext \xD0\xA0\xE8 } 2 test utf-6.48 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xA0\xF4 + testutfnext \xD0\xA0\xF2 } 2 test utf-6.49 {Tcl_UtfNext} testutfnext { testutfnext \xD0\xA0\xF8 @@ -290,28 +290,28 @@ test utf-6.53 {Tcl_UtfNext} testutfnext { testutfnext \xE8\xA0\xE8 } 1 test utf-6.54 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xF4 + testutfnext \xE8\xA0\xF2 } 1 test utf-6.55 {Tcl_UtfNext} testutfnext { testutfnext \xE8\xA0\xF8 } 1 test utf-6.56 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0G + testutfnext \xF2\xA0G } 1 test utf-6.57 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0 + testutfnext \xF2\xA0\xA0 } 1 test utf-6.58 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xD0 + testutfnext \xF2\xA0\xD0 } 1 test utf-6.59 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xE8 + testutfnext \xF2\xA0\xE8 } 1 test utf-6.60 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xF4 + testutfnext \xF2\xA0\xF2 } 1 test utf-6.61 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xF8 + testutfnext \xF2\xA0\xF8 } 1 test utf-6.62 {Tcl_UtfNext} testutfnext { testutfnext \xE8\xA0\xA0G @@ -326,46 +326,46 @@ test utf-6.65 {Tcl_UtfNext} testutfnext { testutfnext \xE8\xA0\xA0\xE8 } 3 test utf-6.66 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xA0\xF4 + testutfnext \xE8\xA0\xA0\xF2 } 3 test utf-6.67 {Tcl_UtfNext} testutfnext { testutfnext \xE8\xA0\xA0\xF8 } 3 test utf-6.68 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0G + testutfnext \xF2\xA0\xA0G } 1 test utf-6.69 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0\xA0 + testutfnext \xF2\xA0\xA0\xA0 } 1 test utf-6.70 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0\xD0 + testutfnext \xF2\xA0\xA0\xD0 } 1 test utf-6.71 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0\xE8 + testutfnext \xF2\xA0\xA0\xE8 } 1 test utf-6.71 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0\xF4 + testutfnext \xF2\xA0\xA0\xF2 } 1 test utf-6.73 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0\xF8 + testutfnext \xF2\xA0\xA0\xF8 } 1 test utf-6.74 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0\xA0G + testutfnext \xF2\xA0\xA0\xA0G } 1 test utf-6.75 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0\xA0\xA0 + testutfnext \xF2\xA0\xA0\xA0\xA0 } 1 test utf-6.76 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0\xA0\xD0 + testutfnext \xF2\xA0\xA0\xA0\xD0 } 1 test utf-6.77 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0\xA0\xE8 + testutfnext \xF2\xA0\xA0\xA0\xE8 } 1 test utf-6.78 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0\xA0\xF4 + testutfnext \xF2\xA0\xA0\xA0\xF2 } 1 test utf-6.79 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0\xA0G\xF8 + testutfnext \xF2\xA0\xA0\xA0G\xF8 } 1 test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext { testutfnext \xC0\x80 @@ -425,13 +425,13 @@ test utf-7.4.2 {Tcl_UtfPrev} testutfprev { testutfprev A\xF8\xF8\xA0\xA0 2 } 1 test utf-7.5 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF4 + testutfprev A\xF2 } 1 test utf-7.5.1 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF4\xA0\xA0\xA0 2 + testutfprev A\xF2\xA0\xA0\xA0 2 } 1 test utf-7.5.2 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF4\xF8\xA0\xA0 2 + testutfprev A\xF2\xF8\xA0\xA0 2 } 1 test utf-7.6 {Tcl_UtfPrev} testutfprev { testutfprev A\xE8 @@ -470,13 +470,13 @@ test utf-7.9.2 {Tcl_UtfPrev} testutfprev { testutfprev A\xF8\xA0\xF8\xA0 3 } 2 test utf-7.10 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF4\xA0 + testutfprev A\xF2\xA0 } 2 test utf-7.10.1 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF4\xA0\xA0\xA0 3 + testutfprev A\xF2\xA0\xA0\xA0 3 } 2 test utf-7.10.2 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF4\xA0\xF8\xA0 3 + testutfprev A\xF2\xA0\xF8\xA0 3 } 2 test utf-7.11 {Tcl_UtfPrev} testutfprev { testutfprev A\xE8\xA0 @@ -518,13 +518,13 @@ test utf-7.14.2 {Tcl_UtfPrev} testutfprev { testutfprev A\xF8\xA0\xA0\xF8 4 } 3 test utf-7.15 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF4\xA0\xA0 + testutfprev A\xF2\xA0\xA0 } 3 test utf-7.15.1 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF4\xA0\xA0\xA0 4 + testutfprev A\xF2\xA0\xA0\xA0 4 } 3 test utf-7.15.2 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF4\xA0\xA0\xF8 4 + testutfprev A\xF2\xA0\xA0\xF8 4 } 3 test utf-7.16 {Tcl_UtfPrev} testutfprev { testutfprev A\xE8\xA0\xA0 @@ -557,7 +557,7 @@ test utf-7.19 {Tcl_UtfPrev} testutfprev { testutfprev A\xF8\xA0\xA0\xA0 } 4 test utf-7.20 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF4\xA0\xA0\xA0 + testutfprev A\xF2\xA0\xA0\xA0 } 4 test utf-7.21 {Tcl_UtfPrev} testutfprev { testutfprev A\xE8\xA0\xA0\xA0 -- cgit v0.12 From e19b1f1306ccd36f01270f8594f9315dbdf39846 Mon Sep 17 00:00:00 2001 From: dgp Date: Fri, 17 Apr 2020 20:38:38 +0000 Subject: [493dccc2de] Coverage that Tcl_UtfPrev also checks the upper range validity. --- tests/utf.test | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/utf.test b/tests/utf.test index 7953a68..6d87928 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -652,6 +652,30 @@ test utf-7.47.1 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {te test utf-7.47.2 {Tcl_UtfPrev, pointing to 3th byte of 3-byte invalid sequence} {testutfprev} { testutfprev \xE8\xA0\x00 2 } 0 +test utf-7.48 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { + testutfprev A\xF4\x8F\xBF\xBF +} 4 +test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { + testutfprev A\xF4\x8F\xBF\xBF 4 +} 3 +test utf-7.48.2 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { + testutfprev A\xF4\x8F\xBF\xBF 3 +} 2 +test utf-7.48.3 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { + testutfprev A\xF4\x8F\xBF\xBF 2 +} 1 +test utf-7.49 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { + testutfprev A\xF4\x90\x80\x80 +} 4 +test utf-7.49.1 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { + testutfprev A\xF4\x90\x80\x80 4 +} 3 +test utf-7.49.2 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { + testutfprev A\xF4\x90\x80\x80 3 +} 2 +test utf-7.49.3 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { + testutfprev A\xF4\x90\x80\x80 2 +} 1 test utf-8.1 {Tcl_UniCharAtIndex: index = 0} { string index abcd 0 -- cgit v0.12 From 4520aa1ca30a7b09dc9cfc4bc9007aa262793711 Mon Sep 17 00:00:00 2001 From: dgp Date: Fri, 17 Apr 2020 21:03:30 +0000 Subject: More tests explicitly for Tcl_UtfNext near validity boundary U+110000 --- tests/utf.test | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/utf.test b/tests/utf.test index 6d87928..01e0bb2 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -403,6 +403,12 @@ test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {te test utf-6.89.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} { testutfnext \xF0\x80\x80 1 } 2 +test utf-6.90 {Tcl_UtfNext, validity check [493dccc2de]} testutfnext { + testutfnext \xF4\x8F\xBF\xBF +} 1 +test utf-6.91 {Tcl_UtfNext, validity check [493dccc2de]} testutfnext { + testutfnext \xF4\x90\x80\x80 +} 1 testConstraint testutfprev [llength [info commands testutfprev]] -- cgit v0.12 From aa9bb7f9e401573bc8c79e8336fdb74636b2702f Mon Sep 17 00:00:00 2001 From: dgp Date: Fri, 17 Apr 2020 21:07:09 +0000 Subject: [493dccc2de] Revise sequence validity check to reject out of range decodes too. --- generic/tclUtf.c | 53 +++++++++++++++++++++++++++-------------------------- 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index b5c430b..1883804 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -81,7 +81,7 @@ static CONST unsigned char totalBytes[256] = { */ static int UtfCount(int ch); -static int Overlong(unsigned char *src); +static int Invalid(unsigned char *src); /* *--------------------------------------------------------------------------- @@ -120,51 +120,52 @@ UtfCount( /* *--------------------------------------------------------------------------- * - * Overlong -- + * Invalid -- * * Utility routine to report whether /src/ points to the start of an - * overlong byte sequence that should be rejected. Caller guarantees - * that src[0] and src[1] are readable, and + * invald byte sequence that should be rejected. This might be because + * it is an overlong encoding, or because it encodes something out of + * the proper range. Caller guarantees that src[0] and src[1] are + * readable, and * * (src[0] >= 0xC0) && (src[0] != 0xC1) * (src[1] >= 0x80) && (src[1] < 0xC0) - * (src[0] < ((TCL_UTF_MAX > 3) ? 0xF8 : 0xF0)) + * (src[0] < ((TCL_UTF_MAX > 3) ? 0xF5 : 0xF0)) * * Results: * A boolean. *--------------------------------------------------------------------------- */ -static CONST unsigned char overlong[3] = { - 0x80, /* \xD0 -- all sequences valid */ - 0xA0, /* \xE0\x80 through \xE0\x9F are invalid prefixes */ +static CONST unsigned char bounds[28] = { + 0x80, 0x80, /* \xC0 accepts \x80 only */ + 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, + 0x80, 0xBF, /* (\xC4 - \xDC) -- all sequences valid */ + 0xA0, 0xBF, /* \xE0\x80 through \xE0\x9F are invalid prefixes */ + 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, /* (\xE4 - \xEC) -- all valid */ #if TCL_UTF_MAX > 3 - 0x90 /* \xF0\x80 through \xF0\x8F are invalid prefixes */ + 0x90, 0xBF, /* \xF0\x80 through \xF0\x8F are invalid prefixes */ + 0x80, 0x8F /* \xF4\x90 and higher are invalid prefixes */ #else - 0xC0 /* Not used, but reject all again for safety. */ + 0xC0, 0xBF, /* Not used, but reject all again for safety. */ + 0xC0, 0xBF /* Not used, but reject all again for safety. */ #endif }; INLINE static int -Overlong( +Invalid( unsigned char *src) /* Points to lead byte of a UTF-8 byte sequence */ { unsigned char byte = *src; + int index; - if (byte % 0x10) { - /* Only lead bytes 0xC0, 0xE0, 0xF0 need examination */ + if (byte % 0x04) { + /* Only lead bytes 0xC0, 0xE0, 0xF0, 0xF4 need examination */ return 0; } - if (byte == 0xC0) { - if (src[1] == 0x80) { - /* Valid sequence: \xC0\x80 for \u0000 */ - return 0; - } - /* Reject overlong: \xC0\x81 - \xC0\xBF */ - return 1; - } - if (src[1] < overlong[(byte >> 4) - 0x0D]) { - /* Reject overlong */ + index = (byte - 0xC0) >> 1; + if (src[1] < bounds[index] || src[1] > bounds[index+1]) { + /* Out of bounds - report invalid. */ return 1; } return 0; @@ -733,7 +734,7 @@ Tcl_UtfNext( } next++; } - if (Overlong((unsigned char *)src)) { + if (Invalid((unsigned char *)src)) { return src + 1; } return next; @@ -843,10 +844,10 @@ Tcl_UtfPrev( /* * trailBytesSeen > 0, so we can examine look[1] safely. - * Use that capability to screen out overlong sequences. + * Use that capability to screen out invalid sequences. */ - if (Overlong(look)) { + if (Invalid(look)) { /* Reject */ return fallback; } -- cgit v0.12 From ec1723eeadcf9efe52b0f81a65d683dff9b160c5 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sat, 18 Apr 2020 12:46:54 +0000 Subject: Update documentation of Tcl_UtfPrev/Tcl_UtfNext back to how it was. Will be updated later, when implementation is ready and agreed upon. --- doc/Utf.3 | 37 +++++++++++------------------ generic/tclUtf.c | 72 ++++++++++++++++++++------------------------------------ 2 files changed, 39 insertions(+), 70 deletions(-) diff --git a/doc/Utf.3 b/doc/Utf.3 index cb82699..334fa6f 100644 --- a/doc/Utf.3 +++ b/doc/Utf.3 @@ -3,7 +3,7 @@ '\" '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. -'\" +'\" .TH Utf 3 "8.1" Tcl "Tcl Library Procedures" .so man.macros .BS @@ -13,7 +13,7 @@ Tcl_UniChar, Tcl_UniCharToUtf, Tcl_UtfToUniChar, Tcl_UniCharToUtfDString, Tcl_Ut .nf \fB#include \fR .sp -typedef ... Tcl_UniChar; +typedef ... \fBTcl_UniChar\fR; .sp int \fBTcl_UniCharToUtf\fR(\fIch, buf\fR) @@ -48,7 +48,7 @@ int int \fBTcl_UtfCharComplete\fR(\fIsrc, length\fR) .sp -int +int \fBTcl_NumUtfChars\fR(\fIsrc, length\fR) .sp const char * @@ -109,7 +109,7 @@ Pointer to the beginning of a UTF-8 string. .AP int index in The index of a character (not byte) in the UTF-8 string. .AP int *readPtr out -If non-NULL, filled with the number of bytes in the backslash sequence, +If non-NULL, filled with the number of bytes in the backslash sequence, including the backslash character. .AP char *dst out Buffer in which the bytes represented by the backslash sequence are stored. @@ -141,8 +141,8 @@ source buffer is long enough such that this routine does not run off the end and dereference non-existent or random memory; if the source buffer is known to be null-terminated, this will not happen. If the input is not in proper UTF-8 format, \fBTcl_UtfToUniChar\fR will store the first -byte of \fIsrc\fR in \fI*chPtr\fR as a Tcl_UniChar between 0x0000 and -0x00FF and return 1. +byte of \fIsrc\fR in \fI*chPtr\fR as a Tcl_UniChar between 0x0080 and +0x00FF and return 1. .PP \fBTcl_UniCharToUtfDString\fR converts the given Unicode string to UTF-8, storing the result in a previously initialized \fBTcl_DString\fR. @@ -210,27 +210,18 @@ length is negative, all bytes up to the first null byte are used. \fBTcl_UtfFindFirst\fR corresponds to \fBstrchr\fR for UTF-8 strings. It returns a pointer to the first occurrence of the Tcl_UniChar \fIch\fR in the null-terminated UTF-8 string \fIsrc\fR. The null terminator is -considered part of the UTF-8 string. +considered part of the UTF-8 string. .PP \fBTcl_UtfFindLast\fR corresponds to \fBstrrchr\fR for UTF-8 strings. It returns a pointer to the last occurrence of the Tcl_UniChar \fIch\fR in the null-terminated UTF-8 string \fIsrc\fR. The null terminator is -considered part of the UTF-8 string. +considered part of the UTF-8 string. .PP -\fBTcl_UtfNext\fR is used to step forward through a UTF-8 string. -If the UTF-8 string is made up entirely of complete, well-formed, and -valid character byte sequences, and \fIsrc\fR points to the lead byte -of one of those sequences, then repeated calls of \fBTcl_UtfNext\fR will -return pointers to the lead bytes of each character in the string, one -character at a time. In any other circumstance, \fBTcl_UtfNext\fR -returns \fIsrc\fR+1. \fBTcl_UtfNext\fR will always read \fIsrc[0]\fR -and may read as many following bytes (up to a total of \fBTCL_UTF_MAX\fR) -as needed to find the end of the byte sequence. If the string is -\fBNUL\fR-terminated, \fBTcl_UtfNext\fR will not read beyond the terminating -\fBNUL\fR byte. If not, the caller must use the companion routine -\fBTcl_UtfCharComplete\fR to determine whether there is any risk -\fBTcl_UtfNext\fR might read beyond the readable memory occupied -by the string. +Given \fIsrc\fR, a pointer to some location in a UTF-8 string, +\fBTcl_UtfNext\fR returns a pointer to the next UTF-8 character in the +string. The caller must not ask for the next character after the last +character in the string if the string is not terminated by a null +character. .PP \fBTcl_UtfPrev\fR is used to step backward through but not beyond the UTF-8 string that begins at \fIstart\fR. If the UTF-8 string is made @@ -262,7 +253,7 @@ characters. Behavior is undefined if a negative \fIindex\fR is given. .PP \fBTcl_UtfAtIndex\fR returns a pointer to the specified character (not byte) \fIindex\fR in the UTF-8 string \fIsrc\fR. The source string must -contain at least \fIindex\fR characters. This is equivalent to calling +contain at least \fIindex\fR characters. This is equivalent to calling \fBTcl_UtfNext\fR \fIindex\fR times. If a negative \fIindex\fR is given, the return pointer points to the first character in the source string. .PP diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 1883804..64ee0a8 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -678,35 +678,13 @@ Tcl_UtfFindLast( * * Tcl_UtfNext -- * - * The aim of this routine is to provide a way to iterate forward - * through a UTF-8 string. The caller is expected to pass a non-NULL - * pointer argument /src/ which points to a location within a string. - * (*src) will be read, so /src/ must not point to an unreadable - * location past the end of the string. If /src/ points to the - * beginning of a complete, well-formed and valid UTF_8 byte sequence - * of no more than TCL_UTF_MAX bytes, Tcl_UtfNext returns the pointer - * just past the end of that sequence. In any other circumstance, - * Tcl_UtfNext returns /src/+1. - * - * Because this routine always returns a value > /src/, it is useful - * as a forward iterator that will always make progress. If the string - * is NUL-terminated, Tcl_UtfNext will not read beyond the terminating - * NUL character. If it is not NUL-terminated, the caller must make - * use of the companion routine Tcl_UtfCharComplete to test whether - * there is risk that Tcl_UtfNext will read beyond the end of the string. - * Tcl_UtfNext will never read more than TCL_UTF_MAX bytes. - * - * In a string where all characters are complete and properly formed, - * and /src/ points to the first byte of a character, repeated - * Tcl_UtfNext calls will step to the starting bytes of characters, one - * character at a time. Within those limitations, Tcl_UtfPrev and - * Tcl_UtfNext are inverses. If either condition cannot be met, - * Tcl_UtfPrev and Tcl_UtfNext may not function as inverses and the - * caller will have to take greater care. + * Given a pointer to some current location in a UTF-8 string, move + * forward one character. The caller must ensure that they are not asking + * for the next character after the last character in the string. * * Results: - * A pointer to the start of the next character in the string (or to - * the end of the string) as described above. + * The return value is the pointer to the next character in the UTF-8 + * string. * * Side effects: * None. @@ -747,37 +725,37 @@ Tcl_UtfNext( * * The aim of this routine is to provide a way to move backward * through a UTF-8 string. The caller is expected to pass non-NULL - * pointer arguments /start/ and /src/. /start/ points to the beginning - * of a string, and /src/ (>= /start/) points to a location within (or - * just past the end) of the string. This routine always returns a - * pointer within the string (>= /start/). When (/src/ == /start/), - * it returns /start/. When (/src/ > /start/), it returns a pointer - * (< /src/) and (>= /src/ - TCL_UTF_MAX). Subject to these constraints, - * the routine returns a pointer to the earliest byte in the string that - * starts a character when characters are read starting at /start/ and + * pointer arguments start and src. start points to the beginning + * of a string, and src >= start points to a location within (or just + * past the end) of the string. This routine always returns a + * pointer within the string (>= start). When (src == start), it + * returns start. When (src > start), it returns a pointer (< src) + * and (>= src - TCL_UTF_MAX). Subject to these constraints, the + * routine returns a pointer to the earliest byte in the string that + * starts a character when characters are read starting at start and * that character might include the byte src[-1]. The routine will * examine only those bytes in the range that might be returned. - * It will not examine the byte (*src), and because of that cannot + * It will not examine the byte *src, and because of that cannot * determine for certain in all circumstances whether the character * that begins with the returned pointer will or will not include - * the byte src[-1]. In the scenario where /src/ points to the end of - * a buffer being filled, the returned pointer points to either the + * the byte src[-1]. In the scenario, where src points to the end of + * a buffer being filled, the returned pointer point to either the * final complete character in the string or to the earliest byte * that might start an incomplete character waiting for more bytes to * complete. * - * Because this routine always returns a value < /src/ until the point - * it is forced to return /start/, it is useful as a backward iterator + * Because this routine always returns a value < src until the point + * it is forced to return start, it is useful as a backward iterator * through a string that will always make progress and always be * prevented from running past the beginning of the string. * * In a string where all characters are complete and properly formed, - * and /src/ points to the first byte of a character, repeated - * Tcl_UtfPrev calls will step to the starting bytes of characters, one - * character at a time. Within those limitations, Tcl_UtfPrev and - * Tcl_UtfNext are inverses. If either condition cannot be met, - * Tcl_UtfPrev and Tcl_UtfNext may not function as inverses and the - * caller will have to take greater care. + * and the value of src points to the first byte of a character, + * repeated Tcl_UtfPrev calls will step to the starting bytes of + * characters, one character at a time. Within those limitations, + * Tcl_UtfPrev and Tcl_UtfNext are inverses. If either condition cannot + * be met, Tcl_UtfPrev and Tcl_UtfNext may not function as inverses and + * the caller will have to take greater care. * * Results: * A pointer to the start of a character in the string as described @@ -887,7 +865,7 @@ Tcl_UtfPrev( * * Tcl_UniCharAtIndex -- * - * Returns the Unicode character represented at the specified character + * Returns the Tcl_UniChar represented at the specified character * (not byte) position in the UTF-8 string. * * Results: -- cgit v0.12 From 6f00fef31d332688308f392fd5df4cab98d05161 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sat, 18 Apr 2020 13:47:06 +0000 Subject: Fix [c574e50a3b30e76f]: CRASH: utf-2.[89] in 8.5 built with TCL_UTF_MAX=4 --- generic/regcustom.h | 2 +- generic/tcl.h | 2 +- generic/tclUtf.c | 84 +---------------- tests/utf.test | 259 ++++++++++++++++++++++++++-------------------------- tests/util.test | 1 + 5 files changed, 132 insertions(+), 216 deletions(-) diff --git a/generic/regcustom.h b/generic/regcustom.h index 57a2d47..ac33087 100644 --- a/generic/regcustom.h +++ b/generic/regcustom.h @@ -97,7 +97,7 @@ typedef int celt; /* Type to hold chr, or NOCELT */ #define NOCELT (-1) /* Celt value which is not valid chr */ #define CHR(c) (UCHAR(c)) /* Turn char literal into chr literal */ #define DIGITVAL(c) ((c)-'0') /* Turn chr digit into its value */ -#if TCL_UTF_MAX > 4 +#if TCL_UTF_MAX > 3 #define CHRBITS 32 /* Bits in a chr; must not use sizeof */ #define CHR_MIN 0x00000000 /* Smallest and largest chr; the value */ #define CHR_MAX 0xffffffff /* CHR_MAX-CHR_MIN+1 should fit in uchr */ diff --git a/generic/tcl.h b/generic/tcl.h index 7378a8f..d7d064c 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -2148,7 +2148,7 @@ typedef struct Tcl_Parse { * reflected in regcustom.h. */ -#if TCL_UTF_MAX > 4 +#if TCL_UTF_MAX > 3 /* * unsigned int isn't 100% accurate as it should be a strict 4-byte value * (perhaps wchar_t). 64-bit systems may have troubles. The size of this diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 64ee0a8..3741d70 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -209,30 +209,6 @@ Tcl_UniCharToUtf( return 2; } if (ch <= 0xFFFF) { -#if TCL_UTF_MAX == 4 - if ((ch & 0xF800) == 0xD800) { - if (ch & 0x0400) { - /* Low surrogate */ - if (((buf[0] & 0xF8) == 0xF0) && ((buf[1] & 0xC0) == 0x80) - && ((buf[2] & 0xCF) == 0)) { - /* Previous Tcl_UniChar was a High surrogate, so combine */ - buf[3] = (char) ((ch & 0x3F) | 0x80); - buf[2] |= (char) (((ch >> 6) & 0x0F) | 0x80); - return 4; - } - /* Previous Tcl_UniChar was not a High surrogate, so just output */ - } else { - /* High surrogate */ - ch += 0x40; - /* Fill buffer with specific 3-byte (invalid) byte combination, - so following Low surrogate can recognize it and combine */ - buf[2] = (char) ((ch << 4) & 0x30); - buf[1] = (char) (((ch >> 2) & 0x3F) | 0x80); - buf[0] = (char) (((ch >> 8) & 0x07) | 0xF0); - return 0; - } - } -#endif goto three; } @@ -321,15 +297,6 @@ Tcl_UniCharToUtfDString( * Tcl_UtfCharComplete() before calling this routine to ensure that * enough bytes remain in the string. * - * If TCL_UTF_MAX == 4, special handling of Surrogate pairs is done: - * For any UTF-8 string containing a character outside of the BMP, the - * first call to this function will fill *chPtr with the high surrogate - * and generate a return value of 0. Calling Tcl_UtfToUniChar again - * will produce the low surrogate and a return value of 4. Because *chPtr - * is used to remember whether the high surrogate is already produced, it - * is recommended to initialize the variable it points to as 0 before - * the first call to Tcl_UtfToUniChar is done. - * * Results: * *chPtr is filled with the Tcl_UniChar, and the return value is the * number of bytes from the UTF-8 string that were consumed. @@ -402,34 +369,15 @@ Tcl_UtfToUniChar( /* * Four-byte-character lead byte followed by three trail bytes. */ -#if TCL_UTF_MAX == 4 - Tcl_UniChar surrogate; - - byte = (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12) - | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F)) - 0x10000; - surrogate = (Tcl_UniChar) (0xD800 + (byte >> 10)); - if (byte & 0x100000) { - /* out of range, < 0x10000 or > 0x10ffff */ - } else if (*chPtr != surrogate) { - /* produce high surrogate, but don't advance source pointer */ - *chPtr = surrogate; - return 0; - } else { - /* produce low surrogate, and advance source pointer */ - *chPtr = (Tcl_UniChar) (0xDC00 | (byte & 0x3FF)); - return 4; - } -#else *chPtr = (Tcl_UniChar) (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12) | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F)); if ((unsigned)(*chPtr - 0x10000) <= 0xFFFFF) { return 4; } -#endif } /* - * A four-byte-character lead-byte not followed by two trail-bytes + * A four-byte-character lead-byte not followed by three trail-bytes * represents itself. */ } @@ -1230,16 +1178,6 @@ Tcl_UtfNcmp( cs += TclUtfToUniChar(cs, &ch1); ct += TclUtfToUniChar(ct, &ch2); if (ch1 != ch2) { -#if TCL_UTF_MAX == 4 - /* Surrogates always report higher than non-surrogates */ - if (((ch1 & 0xFC00) == 0xD800)) { - if ((ch2 & 0xFC00) != 0xD800) { - return ch1; - } - } else if ((ch2 & 0xFC00) == 0xD800) { - return -ch2; - } -#endif return (ch1 - ch2); } } @@ -1280,16 +1218,6 @@ Tcl_UtfNcasecmp( cs += TclUtfToUniChar(cs, &ch1); ct += TclUtfToUniChar(ct, &ch2); if (ch1 != ch2) { -#if TCL_UTF_MAX == 4 - /* Surrogates always report higher than non-surrogates */ - if (((ch1 & 0xFC00) == 0xD800)) { - if ((ch2 & 0xFC00) != 0xD800) { - return ch1; - } - } else if ((ch2 & 0xFC00) == 0xD800) { - return -ch2; - } -#endif ch1 = Tcl_UniCharToLower(ch1); ch2 = Tcl_UniCharToLower(ch2); if (ch1 != ch2) { @@ -1329,16 +1257,6 @@ TclUtfCasecmp( cs += TclUtfToUniChar(cs, &ch1); ct += TclUtfToUniChar(ct, &ch2); if (ch1 != ch2) { -#if TCL_UTF_MAX == 4 - /* Surrogates always report higher than non-surrogates */ - if (((ch1 & 0xFC00) == 0xD800)) { - if ((ch2 & 0xFC00) != 0xD800) { - return ch1; - } - } else if ((ch2 & 0xFC00) == 0xD800) { - return -ch2; - } -#endif ch1 = Tcl_UniCharToLower(ch1); ch2 = Tcl_UniCharToLower(ch2); if (ch1 != ch2) { diff --git a/tests/utf.test b/tests/utf.test index 01e0bb2..9a55729 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -13,54 +13,61 @@ if {[lsearch [namespace children] ::tcltest] == -1} { namespace import -force ::tcltest::* } +testConstraint compat85 [expr {[format %c 0x010000] == "\uFFFD"}] testConstraint testbytestring [llength [info commands testbytestring]] +testConstraint testfindfirst [llength [info commands testfindfirst]] +testConstraint testfindlast [llength [info commands testfindlast]] +testConstraint testnumutfchars [llength [info commands testnumutfchars]] +testConstraint teststringobj [llength [info commands teststringobj]] +testConstraint testutfnext [llength [info commands testutfnext]] +testConstraint testutfprev [llength [info commands testutfprev]] catch {unset x} -test utf-1.1 {Tcl_UniCharToUtf: 1 byte sequences} { +test utf-1.1 {Tcl_UniCharToUtf: 1 byte sequences} testbytestring { set x \x01 -} [bytestring "\x01"] -test utf-1.2 {Tcl_UniCharToUtf: 2 byte sequences} { +} [testbytestring "\x01"] +test utf-1.2 {Tcl_UniCharToUtf: 2 byte sequences} testbytestring { set x "\x00" -} [bytestring "\xc0\x80"] -test utf-1.3 {Tcl_UniCharToUtf: 2 byte sequences} { +} [testbytestring "\xC0\x80"] +test utf-1.3 {Tcl_UniCharToUtf: 2 byte sequences} testbytestring { set x "\xe0" -} [bytestring "\xc3\xa0"] -test utf-1.4 {Tcl_UniCharToUtf: 3 byte sequences} { - set x "\u4e4e" -} [bytestring "\xe4\xb9\x8e"] -test utf-1.5 {Tcl_UniCharToUtf: overflowed Tcl_UniChar} { +} [testbytestring "\xC3\xA0"] +test utf-1.4 {Tcl_UniCharToUtf: 3 byte sequences} testbytestring { + set x "\u4E4E" +} [testbytestring "\xE4\xB9\x8E"] +test utf-1.5 {Tcl_UniCharToUtf: overflowed Tcl_UniChar} testbytestring { format %c 0x110000 -} [bytestring "\xef\xbf\xbd"] -test utf-1.6 {Tcl_UniCharToUtf: negative Tcl_UniChar} { +} [testbytestring "\xEF\xBF\xBD"] +test utf-1.6 {Tcl_UniCharToUtf: negative Tcl_UniChar} testbytestring { format %c -1 -} [bytestring "\xef\xbf\xbd"] +} [testbytestring "\xEF\xBF\xBD"] test utf-2.1 {Tcl_UtfToUniChar: low ascii} { string length "abc" } {3} -test utf-2.2 {Tcl_UtfToUniChar: naked trail bytes} { - string length [bytestring "\x82\x83\x84"] +test utf-2.2 {Tcl_UtfToUniChar: naked trail bytes} testbytestring { + string length [testbytestring "\x82\x83\x84"] } {3} -test utf-2.3 {Tcl_UtfToUniChar: lead (2-byte) followed by non-trail} { - string length [bytestring "\xC2"] +test utf-2.3 {Tcl_UtfToUniChar: lead (2-byte) followed by non-trail} testbytestring { + string length [testbytestring "\xC2"] } {1} -test utf-2.4 {Tcl_UtfToUniChar: lead (2-byte) followed by trail} { - string length [bytestring "\xC2\xa2"] +test utf-2.4 {Tcl_UtfToUniChar: lead (2-byte) followed by trail} testbytestring { + string length [testbytestring "\xC2\xA2"] } {1} -test utf-2.5 {Tcl_UtfToUniChar: lead (3-byte) followed by non-trail} { - string length [bytestring "\xE2"] +test utf-2.5 {Tcl_UtfToUniChar: lead (3-byte) followed by non-trail} testbytestring { + string length [testbytestring "\xE2"] } {1} -test utf-2.6 {Tcl_UtfToUniChar: lead (3-byte) followed by 1 trail} { - string length [bytestring "\xE2\xA2"] +test utf-2.6 {Tcl_UtfToUniChar: lead (3-byte) followed by 1 trail} testbytestring { + string length [testbytestring "\xE2\xA2"] } {2} -test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} { - string length [bytestring "\xE4\xb9\x8e"] +test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} testbytestring { + string length [testbytestring "\xE4\xb9\x8E"] } {1} -test utf-2.8 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring} -body { +test utf-2.8 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring compat85} -body { string length [testbytestring "\xF0\x90\x80\x80"] } -result {4} -test utf-2.9 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring} -body { +test utf-2.9 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring compat85} -body { string length [testbytestring "\xF4\x8F\xBF\xBF"] } -result {4} test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} testbytestring { @@ -77,57 +84,51 @@ test utf-2.12 {Tcl_UtfToUniChar: longer UTF sequences not supported} testbytestr test utf-3.1 {Tcl_UtfCharComplete} { } {} -testConstraint testnumutfchars [llength [info commands testnumutfchars]] -testConstraint testfindfirst [llength [info commands testfindfirst]] -testConstraint testfindlast [llength [info commands testfindlast]] - test utf-4.1 {Tcl_NumUtfChars: zero length} testnumutfchars { testnumutfchars "" } {0} -test utf-4.2 {Tcl_NumUtfChars: length 1} testnumutfchars { - testnumutfchars [bytestring "\xC2\xA2"] +test utf-4.2 {Tcl_NumUtfChars: length 1} {testnumutfchars testbytestring} { + testnumutfchars [testbytestring "\xC2\xA2"] } {1} -test utf-4.3 {Tcl_NumUtfChars: long string} testnumutfchars { - testnumutfchars [bytestring "abc\xC2\xA2\xe4\xb9\x8e\uA2\u4e4e"] +test utf-4.3 {Tcl_NumUtfChars: long string} {testnumutfchars testbytestring} { + testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8e\uA2\u4E4E"] } {7} -test utf-4.4 {Tcl_NumUtfChars: #u0000} testnumutfchars { - testnumutfchars [bytestring "\xC0\x80"] +test utf-4.4 {Tcl_NumUtfChars: #u0000} {testnumutfchars testbytestring} { + testnumutfchars [testbytestring "\xC0\x80"] } {1} test utf-4.5 {Tcl_NumUtfChars: zero length, calc len} testnumutfchars { testnumutfchars "" 0 } {0} -test utf-4.6 {Tcl_NumUtfChars: length 1, calc len} testnumutfchars { - testnumutfchars [bytestring "\xC2\xA2"] 1 +test utf-4.6 {Tcl_NumUtfChars: length 1, calc len} {testnumutfchars testbytestring} { + testnumutfchars [testbytestring "\xC2\xA2"] 1 } {1} -test utf-4.7 {Tcl_NumUtfChars: long string, calc len} testnumutfchars { - testnumutfchars [bytestring "abc\xC2\xA2\xe4\xb9\x8e\uA2\u4e4e"] 10 +test utf-4.7 {Tcl_NumUtfChars: long string, calc len} {testnumutfchars testbytestring} { + testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\uA2\u4E4E"] 10 } {7} -test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} testnumutfchars { - testnumutfchars [bytestring "\xC0\x80"] 1 +test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} {testnumutfchars testbytestring} { + testnumutfchars [testbytestring "\xC0\x80"] 1 } {1} # Bug [2738427]: Tcl_NumUtfChars(...) no overflow check -test utf-4.9 {Tcl_NumUtfChars: #u20AC, calc len, incomplete} testnumutfchars { - testnumutfchars [bytestring "\xE2\x82\xAC"] 2 +test utf-4.9 {Tcl_NumUtfChars: #u20AC, calc len, incomplete} {testnumutfchars testbytestring} { + testnumutfchars [testbytestring "\xE2\x82\xAC"] 2 } {2} -test utf-4.10 {Tcl_NumUtfChars: #u0000, calc len, overcomplete} testnumutfchars { - testnumutfchars [bytestring "\x00"] 2 +test utf-4.10 {Tcl_NumUtfChars: #u0000, calc len, overcomplete} {testnumutfchars testbytestring} { + testnumutfchars [testbytestring "\x00"] 2 } {2} -test utf-4.11 {Tcl_NumUtfChars: 3 bytes of 4-byte UTF-8 characater} testnumutfchars { - testnumutfchars [bytestring \xf0\x9f\x92\xa9] 3 +test utf-4.11 {Tcl_NumUtfChars: 3 bytes of 4-byte UTF-8 characater} {testnumutfchars testbytestring} { + testnumutfchars [testbytestring \xf0\x9f\x92\xA9] 3 } {3} -test utf-4.12 {Tcl_NumUtfChars: #4-byte UTF-8 character} testnumutfchars { - testnumutfchars [bytestring \xf0\x9f\x92\xa9] 4 +test utf-4.12 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring compat85} { + testnumutfchars [testbytestring \xf0\x9f\x92\xA9] 4 } {4} -test utf-5.1 {Tcl_UtfFindFirst} testfindfirst { - testfindfirst [bytestring "abcbc"] 98 +test utf-5.1 {Tcl_UtfFindFirst} {testfindfirst testbytestring} { + testfindfirst [testbytestring "abcbc"] 98 } {bcbc} -test utf-5.2 {Tcl_UtfFindLast} testfindlast { - testfindlast [bytestring "abcbc"] 98 +test utf-5.2 {Tcl_UtfFindLast} {testfindlast testbytestring} { + testfindlast [testbytestring "abcbc"] 98 } {bc} -testConstraint testutfnext [llength [info commands testutfnext]] - test utf-6.1 {Tcl_UtfNext} testutfnext { # This takes the pointer one past the terminating NUL. # This is really an invalid call. @@ -334,7 +335,7 @@ test utf-6.67 {Tcl_UtfNext} testutfnext { test utf-6.68 {Tcl_UtfNext} testutfnext { testutfnext \xF2\xA0\xA0G } 1 -test utf-6.69 {Tcl_UtfNext} testutfnext { +test utf-6.69 {Tcl_UtfNext} {testutfnext compat85} { testutfnext \xF2\xA0\xA0\xA0 } 1 test utf-6.70 {Tcl_UtfNext} testutfnext { @@ -349,22 +350,22 @@ test utf-6.71 {Tcl_UtfNext} testutfnext { test utf-6.73 {Tcl_UtfNext} testutfnext { testutfnext \xF2\xA0\xA0\xF8 } 1 -test utf-6.74 {Tcl_UtfNext} testutfnext { +test utf-6.74 {Tcl_UtfNext} {testutfnext compat85} { testutfnext \xF2\xA0\xA0\xA0G } 1 -test utf-6.75 {Tcl_UtfNext} testutfnext { +test utf-6.75 {Tcl_UtfNext} {testutfnext compat85} { testutfnext \xF2\xA0\xA0\xA0\xA0 } 1 -test utf-6.76 {Tcl_UtfNext} testutfnext { +test utf-6.76 {Tcl_UtfNext} {testutfnext compat85} { testutfnext \xF2\xA0\xA0\xA0\xD0 } 1 -test utf-6.77 {Tcl_UtfNext} testutfnext { +test utf-6.77 {Tcl_UtfNext} {testutfnext compat85} { testutfnext \xF2\xA0\xA0\xA0\xE8 } 1 -test utf-6.78 {Tcl_UtfNext} testutfnext { +test utf-6.78 {Tcl_UtfNext} {testutfnext compat85} { testutfnext \xF2\xA0\xA0\xA0\xF2 } 1 -test utf-6.79 {Tcl_UtfNext} testutfnext { +test utf-6.79 {Tcl_UtfNext} {testutfnext compat85} { testutfnext \xF2\xA0\xA0\xA0G\xF8 } 1 test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext { @@ -388,7 +389,7 @@ test utf-6.85 {Tcl_UtfNext - overlong sequences} testutfnext { test utf-6.86 {Tcl_UtfNext - overlong sequences} testutfnext { testutfnext \xF0\x80\x80\x80 } 1 -test utf-6.87 {Tcl_UtfNext - overlong sequences} testutfnext { +test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext compat85} { testutfnext \xF0\x90\x80\x80 } 1 test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} { @@ -403,15 +404,13 @@ test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {te test utf-6.89.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} { testutfnext \xF0\x80\x80 1 } 2 -test utf-6.90 {Tcl_UtfNext, validity check [493dccc2de]} testutfnext { +test utf-6.90 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext compat85} { testutfnext \xF4\x8F\xBF\xBF } 1 test utf-6.91 {Tcl_UtfNext, validity check [493dccc2de]} testutfnext { testutfnext \xF4\x90\x80\x80 } 1 -testConstraint testutfprev [llength [info commands testutfprev]] - test utf-7.1 {Tcl_UtfPrev} testutfprev { testutfprev {} } 0 @@ -475,13 +474,13 @@ test utf-7.9.1 {Tcl_UtfPrev} testutfprev { test utf-7.9.2 {Tcl_UtfPrev} testutfprev { testutfprev A\xF8\xA0\xF8\xA0 3 } 2 -test utf-7.10 {Tcl_UtfPrev} testutfprev { +test utf-7.10 {Tcl_UtfPrev} {testutfprev compat85} { testutfprev A\xF2\xA0 } 2 -test utf-7.10.1 {Tcl_UtfPrev} testutfprev { +test utf-7.10.1 {Tcl_UtfPrev} {testutfprev compat85} { testutfprev A\xF2\xA0\xA0\xA0 3 } 2 -test utf-7.10.2 {Tcl_UtfPrev} testutfprev { +test utf-7.10.2 {Tcl_UtfPrev} {testutfprev compat85} { testutfprev A\xF2\xA0\xF8\xA0 3 } 2 test utf-7.11 {Tcl_UtfPrev} testutfprev { @@ -523,13 +522,13 @@ test utf-7.14.1 {Tcl_UtfPrev} testutfprev { test utf-7.14.2 {Tcl_UtfPrev} testutfprev { testutfprev A\xF8\xA0\xA0\xF8 4 } 3 -test utf-7.15 {Tcl_UtfPrev} testutfprev { +test utf-7.15 {Tcl_UtfPrev} {testutfprev compat85} { testutfprev A\xF2\xA0\xA0 } 3 -test utf-7.15.1 {Tcl_UtfPrev} testutfprev { +test utf-7.15.1 {Tcl_UtfPrev} {testutfprev compat85} { testutfprev A\xF2\xA0\xA0\xA0 4 } 3 -test utf-7.15.2 {Tcl_UtfPrev} testutfprev { +test utf-7.15.2 {Tcl_UtfPrev} {testutfprev compat85} { testutfprev A\xF2\xA0\xA0\xF8 4 } 3 test utf-7.16 {Tcl_UtfPrev} testutfprev { @@ -562,7 +561,7 @@ test utf-7.18.2 {Tcl_UtfPrev} testutfprev { test utf-7.19 {Tcl_UtfPrev} testutfprev { testutfprev A\xF8\xA0\xA0\xA0 } 4 -test utf-7.20 {Tcl_UtfPrev} testutfprev { +test utf-7.20 {Tcl_UtfPrev} {testutfprev compat85} { testutfprev A\xF2\xA0\xA0\xA0 } 4 test utf-7.21 {Tcl_UtfPrev} testutfprev { @@ -622,16 +621,16 @@ test utf-7.36 {Tcl_UtfPrev -- overlong sequence} testutfprev { test utf-7.37 {Tcl_UtfPrev -- overlong sequence} testutfprev { testutfprev A\xE0\xA0\x80 3 } 1 -test utf-7.38 {Tcl_UtfPrev -- overlong sequence} testutfprev { +test utf-7.38 {Tcl_UtfPrev -- overlong sequence} {testutfprev compat85} { testutfprev A\xE0\xA0\x80 2 } 1 -test utf-7.39 {Tcl_UtfPrev -- overlong sequence} testutfprev { +test utf-7.39 {Tcl_UtfPrev -- overlong sequence} {testutfprev compat85} { testutfprev A\xF0\x90\x80\x80 } 4 -test utf-7.40 {Tcl_UtfPrev -- overlong sequence} testutfprev { +test utf-7.40 {Tcl_UtfPrev -- overlong sequence} {testutfprev compat85} { testutfprev A\xF0\x90\x80\x80 4 } 3 -test utf-7.41 {Tcl_UtfPrev -- overlong sequence} testutfprev { +test utf-7.41 {Tcl_UtfPrev -- overlong sequence} {testutfprev compat85} { testutfprev A\xF0\x90\x80\x80 3 } 2 test utf-7.42 {Tcl_UtfPrev -- overlong sequence} testutfprev { @@ -658,13 +657,13 @@ test utf-7.47.1 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {te test utf-7.47.2 {Tcl_UtfPrev, pointing to 3th byte of 3-byte invalid sequence} {testutfprev} { testutfprev \xE8\xA0\x00 2 } 0 -test utf-7.48 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { +test utf-7.48 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev compat85} { testutfprev A\xF4\x8F\xBF\xBF } 4 -test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { +test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev compat85} { testutfprev A\xF4\x8F\xBF\xBF 4 } 3 -test utf-7.48.2 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { +test utf-7.48.2 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev compat85} { testutfprev A\xF4\x8F\xBF\xBF 3 } 2 test utf-7.48.3 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { @@ -708,18 +707,18 @@ test utf-10.1 {Tcl_UtfBackslash: dst == NULL} { set x \n } { } -test utf-10.2 {Tcl_UtfBackslash: \u subst} { - set x \ua2 -} [bytestring "\xc2\xa2"] -test utf-10.3 {Tcl_UtfBackslash: longer \u subst} { - set x \u4e21 -} [bytestring "\xe4\xb8\xa1"] -test utf-10.4 {Tcl_UtfBackslash: stops at first non-hex} { - set x \u4e2k -} "[bytestring \xd3\xa2]k" -test utf-10.5 {Tcl_UtfBackslash: stops after 4 hex chars} { - set x \u4e216 -} "[bytestring \xe4\xb8\xa1]6" +test utf-10.2 {Tcl_UtfBackslash: \u subst} testbytestring { + set x \uA2 +} [testbytestring "\xC2\xA2"] +test utf-10.3 {Tcl_UtfBackslash: longer \u subst} testbytestring { + set x \u4E21 +} [testbytestring "\xE4\xB8\xA1"] +test utf-10.4 {Tcl_UtfBackslash: stops at first non-hex} testbytestring { + set x \u4E2k +} "[testbytestring \xD3\xA2]k" +test utf-10.5 {Tcl_UtfBackslash: stops after 4 hex chars} testbytestring { + set x \u4E216 +} "[testbytestring \xE4\xB8\xA1]6" proc bsCheck {char num} { global errNum test utf-10.$errNum {backslash substitution} { @@ -774,11 +773,11 @@ test utf-11.2 {Tcl_UtfToUpper} { string toupper abc } ABC test utf-11.3 {Tcl_UtfToUpper} { - string toupper \u00e3ab -} \u00c3AB + string toupper \u00E3ab +} \u00C3AB test utf-11.4 {Tcl_UtfToUpper} { - string toupper \u01e3ab -} \u01e2AB + string toupper \u01E3ab +} \u01E2AB test utf-12.1 {Tcl_UtfToLower} { string tolower {} @@ -787,11 +786,11 @@ test utf-12.2 {Tcl_UtfToLower} { string tolower ABC } abc test utf-12.3 {Tcl_UtfToLower} { - string tolower \u00c3AB -} \u00e3ab + string tolower \u00C3AB +} \u00E3ab test utf-12.4 {Tcl_UtfToLower} { - string tolower \u01e2AB -} \u01e3ab + string tolower \u01E2AB +} \u01E3ab test utf-13.1 {Tcl_UtfToTitle} { string totitle {} @@ -800,11 +799,11 @@ test utf-13.2 {Tcl_UtfToTitle} { string totitle abc } Abc test utf-13.3 {Tcl_UtfToTitle} { - string totitle \u00e3ab -} \u00c3ab + string totitle \u00E3ab +} \u00C3ab test utf-13.4 {Tcl_UtfToTitle} { - string totitle \u01f3ab -} \u01f2ab + string totitle \u01F3ab +} \u01F2ab test utf-14.1 {Tcl_UtfNcasecmp} { string compare -nocase a b @@ -823,7 +822,7 @@ test utf-15.1 {Tcl_UniCharToUpper, negative delta} { string toupper aA } AA test utf-15.2 {Tcl_UniCharToUpper, positive delta} { - string toupper \u0178\u00ff + string toupper \u0178\xFF } \u0178\u0178 test utf-15.3 {Tcl_UniCharToUpper, no delta} { string toupper ! @@ -833,24 +832,24 @@ test utf-16.1 {Tcl_UniCharToLower, negative delta} { string tolower aA } aa test utf-16.2 {Tcl_UniCharToLower, positive delta} { - string tolower \u0178\u00ff\uA78D\u01c5 -} \u00ff\u00ff\u0265\u01c6 + string tolower \u0178\xFF\uA78D\u01C5 +} \xFF\xFF\u0265\u01C6 test utf-17.1 {Tcl_UniCharToLower, no delta} { string tolower ! } ! test utf-18.1 {Tcl_UniCharToTitle, add one for title} { - string totitle \u01c4 -} \u01c5 + string totitle \u01C4 +} \u01C5 test utf-18.2 {Tcl_UniCharToTitle, subtract one for title} { - string totitle \u01c6 -} \u01c5 + string totitle \u01C6 +} \u01C5 test utf-18.3 {Tcl_UniCharToTitle, subtract delta for title (positive)} { - string totitle \u017f + string totitle \u017F } \u0053 test utf-18.4 {Tcl_UniCharToTitle, subtract delta for title (negative)} { - string totitle \u00ff + string totitle \xFF } \u0178 test utf-18.5 {Tcl_UniCharToTitle, no delta} { string totitle ! @@ -865,15 +864,15 @@ test utf-20.1 {TclUniCharNcmp} { test utf-21.1 {TclUniCharIsAlnum} { # this returns 1 with Unicode 7 compliance - string is alnum \u1040\u021f\u0220 + string is alnum \u1040\u021F\u0220 } {1} test utf-21.2 {unicode alnum char in regc_locale.c} { # this returns 1 with Unicode 7 compliance - list [regexp {^[[:alnum:]]+$} \u1040\u021f\u0220] [regexp {^\w+$} \u1040\u021f\u0220_\u203f\u2040\u2054\ufe33\ufe34\ufe4d\ufe4e\ufe4f\uff3f] + list [regexp {^[[:alnum:]]+$} \u1040\u021F\u0220] [regexp {^\w+$} \u1040\u021F\u0220_\u203F\u2040\u2054\uFE33\uFE34\uFE4D\uFE4E\uFE4F\uFF3F] } {1 1} test utf-21.3 {unicode print char in regc_locale.c} { # this returns 1 with Unicode 7 compliance - regexp {^[[:print:]]+$} \ufbc1 + regexp {^[[:print:]]+$} \uFBC1 } 1 test utf-21.4 {TclUniCharIsGraph} { # [Bug 3464428] @@ -885,11 +884,11 @@ test utf-21.5 {unicode graph char in regc_locale.c} { } {1} test utf-21.6 {TclUniCharIsGraph} { # [Bug 3464428] - string is graph \u00a0 + string is graph \xA0 } {0} test utf-21.7 {unicode graph char in regc_locale.c} { # [Bug 3464428] - regexp {[[:graph:]]} \u0020\u00a0\u2028\u2029 + regexp {[[:graph:]]} \x20\xA0\u2028\u2029 } {0} test utf-21.8 {TclUniCharIsPrint} { # [Bug 3464428] @@ -905,49 +904,47 @@ test utf-21.10 {unicode print char in regc_locale.c} { } {0} test utf-21.11 {TclUniCharIsControl} { # [Bug 3464428] - string is control \u0000\u001f\u00ad\u0605\u061c\u180e\u2066\ufeff + string is control \x00\x1F\xad\u0605\u061C\u180E\u2066\uFEFF } {1} test utf-21.12 {unicode control char in regc_locale.c} { # [Bug 3464428], [Bug a876646efe] - regexp {^[[:cntrl:]]*$} \u0000\u001f\u00ad\u0605\u061c\u180e\u2066\ufeff + regexp {^[[:cntrl:]]*$} \x00\x1F\xad\u0605\u061C\u180E\u2066\uFEFF } {1} test utf-22.1 {TclUniCharIsWordChar} { string wordend "xyz123_bar fg" 0 } 10 test utf-22.2 {TclUniCharIsWordChar} { - string wordend "x\u5080z123_bar\u203c fg" 0 + string wordend "x\u5080z123_bar\u203C fg" 0 } 10 test utf-23.1 {TclUniCharIsAlpha} { # this returns 1 with Unicode 7 compliance - string is alpha \u021f\u0220\u037f\u052f + string is alpha \u021F\u0220\u037F\u052F } {1} test utf-23.2 {unicode alpha char in regc_locale.c} { # this returns 1 with Unicode 7 compliance - regexp {^[[:alpha:]]+$} \u021f\u0220\u037f\u052f + regexp {^[[:alpha:]]+$} \u021F\u0220\u037F\u052F } {1} test utf-24.1 {TclUniCharIsDigit} { # this returns 1 with Unicode 7 compliance - string is digit \u1040\uabf0 + string is digit \u1040\uABF0 } {1} test utf-24.2 {unicode digit char in regc_locale.c} { # this returns 1 with Unicode 7 compliance - list [regexp {^[[:digit:]]+$} \u1040\uabf0] [regexp {^\d+$} \u1040\uabf0] + list [regexp {^[[:digit:]]+$} \u1040\uABF0] [regexp {^\d+$} \u1040\uABF0] } {1 1} test utf-24.3 {TclUniCharIsSpace} { # this returns 1 with Unicode 7 compliance - string is space \u1680\u180e\u202f + string is space \u1680\u180E\u202F } {1} test utf-24.4 {unicode space char in regc_locale.c} { # this returns 1 with Unicode 7 compliance - list [regexp {^[[:space:]]+$} \u1680\u180e\u202f] [regexp {^\s+$} \u1680\u180e\u202f] + list [regexp {^[[:space:]]+$} \u1680\u180E\u202F] [regexp {^\s+$} \u1680\u180E\u202F] } {1 1} -testConstraint teststringobj [llength [info commands teststringobj]] - test utf-25.1 {Tcl_UniCharNcasecmp} -constraints teststringobj \ -setup { testobj freeallvars diff --git a/tests/util.test b/tests/util.test index 85c06dd..a483de1 100644 --- a/tests/util.test +++ b/tests/util.test @@ -15,6 +15,7 @@ if {[lsearch [namespace children] ::tcltest] == -1} { testConstraint testdstring [llength [info commands testdstring]] testConstraint testconcatobj [llength [info commands testconcatobj]] testConstraint testdoubledigits [llength [info commands testdoubledigits]] +testConstraint compat85 [expr {[format %c 0x010000] == "\uFFFD"}] # Big test for correct ordering of data in [expr] -- cgit v0.12 From ea9467702aecb854ba8cd803edbb38c4590aa928 Mon Sep 17 00:00:00 2001 From: dgp Date: Sat, 18 Apr 2020 15:02:51 +0000 Subject: Make TCL_UTF_MAX=4 build test clean again. --- generic/tcl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/tcl.h b/generic/tcl.h index d7d064c..7378a8f 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -2148,7 +2148,7 @@ typedef struct Tcl_Parse { * reflected in regcustom.h. */ -#if TCL_UTF_MAX > 3 +#if TCL_UTF_MAX > 4 /* * unsigned int isn't 100% accurate as it should be a strict 4-byte value * (perhaps wchar_t). 64-bit systems may have troubles. The size of this -- cgit v0.12 From f866e98a39dc53d4864e3b04119b7dc2fd65078d Mon Sep 17 00:00:00 2001 From: dgp Date: Sat, 18 Apr 2020 15:11:22 +0000 Subject: regexp engine has to agree about the sizeof(Tcl_UniChar). --- generic/regcustom.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/regcustom.h b/generic/regcustom.h index ac33087..57a2d47 100644 --- a/generic/regcustom.h +++ b/generic/regcustom.h @@ -97,7 +97,7 @@ typedef int celt; /* Type to hold chr, or NOCELT */ #define NOCELT (-1) /* Celt value which is not valid chr */ #define CHR(c) (UCHAR(c)) /* Turn char literal into chr literal */ #define DIGITVAL(c) ((c)-'0') /* Turn chr digit into its value */ -#if TCL_UTF_MAX > 3 +#if TCL_UTF_MAX > 4 #define CHRBITS 32 /* Bits in a chr; must not use sizeof */ #define CHR_MIN 0x00000000 /* Smallest and largest chr; the value */ #define CHR_MAX 0xffffffff /* CHR_MAX-CHR_MIN+1 should fit in uchr */ -- cgit v0.12 From 4f3cc7f661e8ae301fd9b4aaf7a4c66d94897ec3 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sat, 18 Apr 2020 19:54:08 +0000 Subject: Clean-up testcases: Constant use of uppercase in hex-values. Use "testbytestring" in stead of "bytestring". Mark tests not working with TCL_UTF_MAX>3 with "compat85" --- tests/utf.test | 278 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 137 insertions(+), 141 deletions(-) diff --git a/tests/utf.test b/tests/utf.test index 01e0bb2..189b85d 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -13,54 +13,60 @@ if {[lsearch [namespace children] ::tcltest] == -1} { namespace import -force ::tcltest::* } +testConstraint compat85 [expr {[format %c 0x010000] == "\uFFFD"}] testConstraint testbytestring [llength [info commands testbytestring]] +testConstraint testfindfirst [llength [info commands testfindfirst]] +testConstraint testfindlast [llength [info commands testfindlast]] +testConstraint testnumutfchars [llength [info commands testnumutfchars]] +testConstraint teststringobj [llength [info commands teststringobj]] +testConstraint testutfnext [llength [info commands testutfnext]] +testConstraint testutfprev [llength [info commands testutfprev]] catch {unset x} -test utf-1.1 {Tcl_UniCharToUtf: 1 byte sequences} { - set x \x01 -} [bytestring "\x01"] -test utf-1.2 {Tcl_UniCharToUtf: 2 byte sequences} { - set x "\x00" -} [bytestring "\xc0\x80"] -test utf-1.3 {Tcl_UniCharToUtf: 2 byte sequences} { - set x "\xe0" -} [bytestring "\xc3\xa0"] -test utf-1.4 {Tcl_UniCharToUtf: 3 byte sequences} { - set x "\u4e4e" -} [bytestring "\xe4\xb9\x8e"] -test utf-1.5 {Tcl_UniCharToUtf: overflowed Tcl_UniChar} { - format %c 0x110000 -} [bytestring "\xef\xbf\xbd"] -test utf-1.6 {Tcl_UniCharToUtf: negative Tcl_UniChar} { - format %c -1 -} [bytestring "\xef\xbf\xbd"] - +test utf-1.1 {Tcl_UniCharToUtf: 1 byte sequences} testbytestring { + expr {"\x01" eq [testbytestring "\x01"]} +} 1 +test utf-1.2 {Tcl_UniCharToUtf: 2 byte sequences} testbytestring { + expr {"\x00" eq [testbytestring "\xC0\x80"]} +} 1 +test utf-1.3 {Tcl_UniCharToUtf: 2 byte sequences} testbytestring { + expr {"\xE0" eq [testbytestring "\xC3\xA0"]} +} 1 +test utf-1.4 {Tcl_UniCharToUtf: 3 byte sequences} testbytestring { + expr {"\u4E4E" eq [testbytestring "\xE4\xB9\x8E"]} +} 1 +test utf-1.5 {Tcl_UniCharToUtf: overflowed Tcl_UniChar} testbytestring { + expr {[format %c 0x110000] eq [testbytestring "\xEF\xBF\xBD"]} +} 1 +test utf-1.6 {Tcl_UniCharToUtf: negative Tcl_UniChar} testbytestring { + expr {[format %c -1] eq [testbytestring "\xEF\xBF\xBD"]} +} 1 test utf-2.1 {Tcl_UtfToUniChar: low ascii} { string length "abc" } {3} -test utf-2.2 {Tcl_UtfToUniChar: naked trail bytes} { - string length [bytestring "\x82\x83\x84"] +test utf-2.2 {Tcl_UtfToUniChar: naked trail bytes} testbytestring { + string length [testbytestring "\x82\x83\x84"] } {3} -test utf-2.3 {Tcl_UtfToUniChar: lead (2-byte) followed by non-trail} { - string length [bytestring "\xC2"] +test utf-2.3 {Tcl_UtfToUniChar: lead (2-byte) followed by non-trail} testbytestring { + string length [testbytestring "\xC2"] } {1} -test utf-2.4 {Tcl_UtfToUniChar: lead (2-byte) followed by trail} { - string length [bytestring "\xC2\xa2"] +test utf-2.4 {Tcl_UtfToUniChar: lead (2-byte) followed by trail} testbytestring { + string length [testbytestring "\xC2\xA2"] } {1} -test utf-2.5 {Tcl_UtfToUniChar: lead (3-byte) followed by non-trail} { - string length [bytestring "\xE2"] +test utf-2.5 {Tcl_UtfToUniChar: lead (3-byte) followed by non-trail} testbytestring { + string length [testbytestring "\xE2"] } {1} -test utf-2.6 {Tcl_UtfToUniChar: lead (3-byte) followed by 1 trail} { - string length [bytestring "\xE2\xA2"] +test utf-2.6 {Tcl_UtfToUniChar: lead (3-byte) followed by 1 trail} testbytestring { + string length [testbytestring "\xE2\xA2"] } {2} -test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} { - string length [bytestring "\xE4\xb9\x8e"] +test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} testbytestring { + string length [testbytestring "\xE4\xB9\x8E"] } {1} -test utf-2.8 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring} -body { +test utf-2.8 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring compat85} -body { string length [testbytestring "\xF0\x90\x80\x80"] } -result {4} -test utf-2.9 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring} -body { +test utf-2.9 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring compat85} -body { string length [testbytestring "\xF4\x8F\xBF\xBF"] } -result {4} test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} testbytestring { @@ -77,57 +83,51 @@ test utf-2.12 {Tcl_UtfToUniChar: longer UTF sequences not supported} testbytestr test utf-3.1 {Tcl_UtfCharComplete} { } {} -testConstraint testnumutfchars [llength [info commands testnumutfchars]] -testConstraint testfindfirst [llength [info commands testfindfirst]] -testConstraint testfindlast [llength [info commands testfindlast]] - test utf-4.1 {Tcl_NumUtfChars: zero length} testnumutfchars { testnumutfchars "" } {0} -test utf-4.2 {Tcl_NumUtfChars: length 1} testnumutfchars { - testnumutfchars [bytestring "\xC2\xA2"] +test utf-4.2 {Tcl_NumUtfChars: length 1} {testnumutfchars testbytestring} { + testnumutfchars [testbytestring "\xC2\xA2"] } {1} -test utf-4.3 {Tcl_NumUtfChars: long string} testnumutfchars { - testnumutfchars [bytestring "abc\xC2\xA2\xe4\xb9\x8e\uA2\u4e4e"] +test utf-4.3 {Tcl_NumUtfChars: long string} {testnumutfchars testbytestring} { + testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\uA2\x4E"] } {7} -test utf-4.4 {Tcl_NumUtfChars: #u0000} testnumutfchars { - testnumutfchars [bytestring "\xC0\x80"] +test utf-4.4 {Tcl_NumUtfChars: #u0000} {testnumutfchars testbytestring} { + testnumutfchars [testbytestring "\xC0\x80"] } {1} test utf-4.5 {Tcl_NumUtfChars: zero length, calc len} testnumutfchars { testnumutfchars "" 0 } {0} -test utf-4.6 {Tcl_NumUtfChars: length 1, calc len} testnumutfchars { - testnumutfchars [bytestring "\xC2\xA2"] 1 +test utf-4.6 {Tcl_NumUtfChars: length 1, calc len} {testnumutfchars testbytestring} { + testnumutfchars [testbytestring "\xC2\xA2"] 1 } {1} -test utf-4.7 {Tcl_NumUtfChars: long string, calc len} testnumutfchars { - testnumutfchars [bytestring "abc\xC2\xA2\xe4\xb9\x8e\uA2\u4e4e"] 10 +test utf-4.7 {Tcl_NumUtfChars: long string, calc len} {testnumutfchars testbytestring} { + testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\uA2\x4E"] 10 } {7} -test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} testnumutfchars { - testnumutfchars [bytestring "\xC0\x80"] 1 +test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} {testnumutfchars testbytestring} { + testnumutfchars [testbytestring "\xC0\x80"] 1 } {1} # Bug [2738427]: Tcl_NumUtfChars(...) no overflow check -test utf-4.9 {Tcl_NumUtfChars: #u20AC, calc len, incomplete} testnumutfchars { - testnumutfchars [bytestring "\xE2\x82\xAC"] 2 +test utf-4.9 {Tcl_NumUtfChars: #u20AC, calc len, incomplete} {testnumutfchars testbytestring} { + testnumutfchars [testbytestring "\xE2\x82\xAC"] 2 } {2} -test utf-4.10 {Tcl_NumUtfChars: #u0000, calc len, overcomplete} testnumutfchars { - testnumutfchars [bytestring "\x00"] 2 +test utf-4.10 {Tcl_NumUtfChars: #u0000, calc len, overcomplete} {testnumutfchars testbytestring} { + testnumutfchars [testbytestring "\x00"] 2 } {2} -test utf-4.11 {Tcl_NumUtfChars: 3 bytes of 4-byte UTF-8 characater} testnumutfchars { - testnumutfchars [bytestring \xf0\x9f\x92\xa9] 3 +test utf-4.11 {Tcl_NumUtfChars: 3 bytes of 4-byte UTF-8 characater} {testnumutfchars testbytestring} { + testnumutfchars [testbytestring \xF0\x9F\x92\xA9] 3 } {3} -test utf-4.12 {Tcl_NumUtfChars: #4-byte UTF-8 character} testnumutfchars { - testnumutfchars [bytestring \xf0\x9f\x92\xa9] 4 +test utf-4.12 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring compat85} { + testnumutfchars [testbytestring \xF0\x9F\x92\xA9] 4 } {4} -test utf-5.1 {Tcl_UtfFindFirst} testfindfirst { - testfindfirst [bytestring "abcbc"] 98 +test utf-5.1 {Tcl_UtfFindFirst} {testfindfirst testbytestring} { + testfindfirst [testbytestring "abcbc"] 98 } {bcbc} -test utf-5.2 {Tcl_UtfFindLast} testfindlast { - testfindlast [bytestring "abcbc"] 98 +test utf-5.2 {Tcl_UtfFindLast} {testfindlast testbytestring} { + testfindlast [testbytestring "abcbc"] 98 } {bc} -testConstraint testutfnext [llength [info commands testutfnext]] - test utf-6.1 {Tcl_UtfNext} testutfnext { # This takes the pointer one past the terminating NUL. # This is really an invalid call. @@ -334,7 +334,7 @@ test utf-6.67 {Tcl_UtfNext} testutfnext { test utf-6.68 {Tcl_UtfNext} testutfnext { testutfnext \xF2\xA0\xA0G } 1 -test utf-6.69 {Tcl_UtfNext} testutfnext { +test utf-6.69 {Tcl_UtfNext} {testutfnext compat85} { testutfnext \xF2\xA0\xA0\xA0 } 1 test utf-6.70 {Tcl_UtfNext} testutfnext { @@ -349,22 +349,22 @@ test utf-6.71 {Tcl_UtfNext} testutfnext { test utf-6.73 {Tcl_UtfNext} testutfnext { testutfnext \xF2\xA0\xA0\xF8 } 1 -test utf-6.74 {Tcl_UtfNext} testutfnext { +test utf-6.74 {Tcl_UtfNext} {testutfnext compat85} { testutfnext \xF2\xA0\xA0\xA0G } 1 -test utf-6.75 {Tcl_UtfNext} testutfnext { +test utf-6.75 {Tcl_UtfNext} {testutfnext compat85} { testutfnext \xF2\xA0\xA0\xA0\xA0 } 1 -test utf-6.76 {Tcl_UtfNext} testutfnext { +test utf-6.76 {Tcl_UtfNext} {testutfnext compat85} { testutfnext \xF2\xA0\xA0\xA0\xD0 } 1 -test utf-6.77 {Tcl_UtfNext} testutfnext { +test utf-6.77 {Tcl_UtfNext} {testutfnext compat85} { testutfnext \xF2\xA0\xA0\xA0\xE8 } 1 -test utf-6.78 {Tcl_UtfNext} testutfnext { +test utf-6.78 {Tcl_UtfNext} {testutfnext compat85} { testutfnext \xF2\xA0\xA0\xA0\xF2 } 1 -test utf-6.79 {Tcl_UtfNext} testutfnext { +test utf-6.79 {Tcl_UtfNext} {testutfnext compat85} { testutfnext \xF2\xA0\xA0\xA0G\xF8 } 1 test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext { @@ -388,7 +388,7 @@ test utf-6.85 {Tcl_UtfNext - overlong sequences} testutfnext { test utf-6.86 {Tcl_UtfNext - overlong sequences} testutfnext { testutfnext \xF0\x80\x80\x80 } 1 -test utf-6.87 {Tcl_UtfNext - overlong sequences} testutfnext { +test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext compat85} { testutfnext \xF0\x90\x80\x80 } 1 test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} { @@ -403,15 +403,13 @@ test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {te test utf-6.89.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} { testutfnext \xF0\x80\x80 1 } 2 -test utf-6.90 {Tcl_UtfNext, validity check [493dccc2de]} testutfnext { +test utf-6.90 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext compat85} { testutfnext \xF4\x8F\xBF\xBF } 1 test utf-6.91 {Tcl_UtfNext, validity check [493dccc2de]} testutfnext { testutfnext \xF4\x90\x80\x80 } 1 -testConstraint testutfprev [llength [info commands testutfprev]] - test utf-7.1 {Tcl_UtfPrev} testutfprev { testutfprev {} } 0 @@ -475,13 +473,13 @@ test utf-7.9.1 {Tcl_UtfPrev} testutfprev { test utf-7.9.2 {Tcl_UtfPrev} testutfprev { testutfprev A\xF8\xA0\xF8\xA0 3 } 2 -test utf-7.10 {Tcl_UtfPrev} testutfprev { +test utf-7.10 {Tcl_UtfPrev} {testutfprev compat85} { testutfprev A\xF2\xA0 } 2 -test utf-7.10.1 {Tcl_UtfPrev} testutfprev { +test utf-7.10.1 {Tcl_UtfPrev} {testutfprev compat85} { testutfprev A\xF2\xA0\xA0\xA0 3 } 2 -test utf-7.10.2 {Tcl_UtfPrev} testutfprev { +test utf-7.10.2 {Tcl_UtfPrev} {testutfprev compat85} { testutfprev A\xF2\xA0\xF8\xA0 3 } 2 test utf-7.11 {Tcl_UtfPrev} testutfprev { @@ -523,13 +521,13 @@ test utf-7.14.1 {Tcl_UtfPrev} testutfprev { test utf-7.14.2 {Tcl_UtfPrev} testutfprev { testutfprev A\xF8\xA0\xA0\xF8 4 } 3 -test utf-7.15 {Tcl_UtfPrev} testutfprev { +test utf-7.15 {Tcl_UtfPrev} {testutfprev compat85} { testutfprev A\xF2\xA0\xA0 } 3 -test utf-7.15.1 {Tcl_UtfPrev} testutfprev { +test utf-7.15.1 {Tcl_UtfPrev} {testutfprev compat85} { testutfprev A\xF2\xA0\xA0\xA0 4 } 3 -test utf-7.15.2 {Tcl_UtfPrev} testutfprev { +test utf-7.15.2 {Tcl_UtfPrev} {testutfprev compat85} { testutfprev A\xF2\xA0\xA0\xF8 4 } 3 test utf-7.16 {Tcl_UtfPrev} testutfprev { @@ -562,7 +560,7 @@ test utf-7.18.2 {Tcl_UtfPrev} testutfprev { test utf-7.19 {Tcl_UtfPrev} testutfprev { testutfprev A\xF8\xA0\xA0\xA0 } 4 -test utf-7.20 {Tcl_UtfPrev} testutfprev { +test utf-7.20 {Tcl_UtfPrev} {testutfprev compat85} { testutfprev A\xF2\xA0\xA0\xA0 } 4 test utf-7.21 {Tcl_UtfPrev} testutfprev { @@ -622,16 +620,16 @@ test utf-7.36 {Tcl_UtfPrev -- overlong sequence} testutfprev { test utf-7.37 {Tcl_UtfPrev -- overlong sequence} testutfprev { testutfprev A\xE0\xA0\x80 3 } 1 -test utf-7.38 {Tcl_UtfPrev -- overlong sequence} testutfprev { +test utf-7.38 {Tcl_UtfPrev -- overlong sequence} {testutfprev compat85} { testutfprev A\xE0\xA0\x80 2 } 1 -test utf-7.39 {Tcl_UtfPrev -- overlong sequence} testutfprev { +test utf-7.39 {Tcl_UtfPrev -- overlong sequence} {testutfprev compat85} { testutfprev A\xF0\x90\x80\x80 } 4 -test utf-7.40 {Tcl_UtfPrev -- overlong sequence} testutfprev { +test utf-7.40 {Tcl_UtfPrev -- overlong sequence} {testutfprev compat85} { testutfprev A\xF0\x90\x80\x80 4 } 3 -test utf-7.41 {Tcl_UtfPrev -- overlong sequence} testutfprev { +test utf-7.41 {Tcl_UtfPrev -- overlong sequence} {testutfprev compat85} { testutfprev A\xF0\x90\x80\x80 3 } 2 test utf-7.42 {Tcl_UtfPrev -- overlong sequence} testutfprev { @@ -658,13 +656,13 @@ test utf-7.47.1 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {te test utf-7.47.2 {Tcl_UtfPrev, pointing to 3th byte of 3-byte invalid sequence} {testutfprev} { testutfprev \xE8\xA0\x00 2 } 0 -test utf-7.48 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { +test utf-7.48 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev compat85} { testutfprev A\xF4\x8F\xBF\xBF } 4 -test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { +test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev compat85} { testutfprev A\xF4\x8F\xBF\xBF 4 } 3 -test utf-7.48.2 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { +test utf-7.48.2 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev compat85} { testutfprev A\xF4\x8F\xBF\xBF 3 } 2 test utf-7.48.3 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { @@ -708,18 +706,18 @@ test utf-10.1 {Tcl_UtfBackslash: dst == NULL} { set x \n } { } -test utf-10.2 {Tcl_UtfBackslash: \u subst} { - set x \ua2 -} [bytestring "\xc2\xa2"] -test utf-10.3 {Tcl_UtfBackslash: longer \u subst} { - set x \u4e21 -} [bytestring "\xe4\xb8\xa1"] -test utf-10.4 {Tcl_UtfBackslash: stops at first non-hex} { - set x \u4e2k -} "[bytestring \xd3\xa2]k" -test utf-10.5 {Tcl_UtfBackslash: stops after 4 hex chars} { - set x \u4e216 -} "[bytestring \xe4\xb8\xa1]6" +test utf-10.2 {Tcl_UtfBackslash: \u subst} testbytestring { + expr {"\uA2" eq [testbytestring "\xC2\xA2"]} +} 1 +test utf-10.3 {Tcl_UtfBackslash: longer \u subst} testbytestring { + expr {"\u4E21" eq [testbytestring "\xE4\xB8\xA1"]} +} 1 +test utf-10.4 {Tcl_UtfBackslash: stops at first non-hex} testbytestring { + expr {"\u4E2k" eq "[testbytestring \xD3\xA2]k"} +} 1 +test utf-10.5 {Tcl_UtfBackslash: stops after 4 hex chars} testbytestring { + expr {"\u4E216" eq [testbytestring "\xE4\xB8\xA1"]6} +} 1 proc bsCheck {char num} { global errNum test utf-10.$errNum {backslash substitution} { @@ -774,11 +772,11 @@ test utf-11.2 {Tcl_UtfToUpper} { string toupper abc } ABC test utf-11.3 {Tcl_UtfToUpper} { - string toupper \u00e3ab -} \u00c3AB + string toupper \xE3gh +} \xC3GH test utf-11.4 {Tcl_UtfToUpper} { - string toupper \u01e3ab -} \u01e2AB + string toupper \u01E3ab +} \u01E2AB test utf-12.1 {Tcl_UtfToLower} { string tolower {} @@ -787,11 +785,11 @@ test utf-12.2 {Tcl_UtfToLower} { string tolower ABC } abc test utf-12.3 {Tcl_UtfToLower} { - string tolower \u00c3AB -} \u00e3ab + string tolower \xC3GH +} \xE3gh test utf-12.4 {Tcl_UtfToLower} { - string tolower \u01e2AB -} \u01e3ab + string tolower \u01E2AB +} \u01E3ab test utf-13.1 {Tcl_UtfToTitle} { string totitle {} @@ -800,11 +798,11 @@ test utf-13.2 {Tcl_UtfToTitle} { string totitle abc } Abc test utf-13.3 {Tcl_UtfToTitle} { - string totitle \u00e3ab -} \u00c3ab + string totitle \xE3GH +} \xC3gh test utf-13.4 {Tcl_UtfToTitle} { - string totitle \u01f3ab -} \u01f2ab + string totitle \u01F3AB +} \u01F2ab test utf-14.1 {Tcl_UtfNcasecmp} { string compare -nocase a b @@ -823,7 +821,7 @@ test utf-15.1 {Tcl_UniCharToUpper, negative delta} { string toupper aA } AA test utf-15.2 {Tcl_UniCharToUpper, positive delta} { - string toupper \u0178\u00ff + string toupper \u0178\xFF } \u0178\u0178 test utf-15.3 {Tcl_UniCharToUpper, no delta} { string toupper ! @@ -833,24 +831,24 @@ test utf-16.1 {Tcl_UniCharToLower, negative delta} { string tolower aA } aa test utf-16.2 {Tcl_UniCharToLower, positive delta} { - string tolower \u0178\u00ff\uA78D\u01c5 -} \u00ff\u00ff\u0265\u01c6 + string tolower \u0178\xFF\uA78D\u01C5 +} \xFF\xFF\u0265\u01C6 test utf-17.1 {Tcl_UniCharToLower, no delta} { string tolower ! } ! test utf-18.1 {Tcl_UniCharToTitle, add one for title} { - string totitle \u01c4 -} \u01c5 + string totitle \u01C4 +} \u01C5 test utf-18.2 {Tcl_UniCharToTitle, subtract one for title} { - string totitle \u01c6 -} \u01c5 + string totitle \u01C6 +} \u01C5 test utf-18.3 {Tcl_UniCharToTitle, subtract delta for title (positive)} { - string totitle \u017f -} \u0053 + string totitle \u017F +} \x53 test utf-18.4 {Tcl_UniCharToTitle, subtract delta for title (negative)} { - string totitle \u00ff + string totitle \xFF } \u0178 test utf-18.5 {Tcl_UniCharToTitle, no delta} { string totitle ! @@ -865,15 +863,15 @@ test utf-20.1 {TclUniCharNcmp} { test utf-21.1 {TclUniCharIsAlnum} { # this returns 1 with Unicode 7 compliance - string is alnum \u1040\u021f\u0220 + string is alnum \u1040\u021F\u0220 } {1} test utf-21.2 {unicode alnum char in regc_locale.c} { # this returns 1 with Unicode 7 compliance - list [regexp {^[[:alnum:]]+$} \u1040\u021f\u0220] [regexp {^\w+$} \u1040\u021f\u0220_\u203f\u2040\u2054\ufe33\ufe34\ufe4d\ufe4e\ufe4f\uff3f] + list [regexp {^[[:alnum:]]+$} \u1040\u021F\u0220] [regexp {^\w+$} \u1040\u021F\u0220_\u203F\u2040\u2054\uFE33\uFE34\uFE4D\uFE4E\uFE4F\uFF3F] } {1 1} test utf-21.3 {unicode print char in regc_locale.c} { # this returns 1 with Unicode 7 compliance - regexp {^[[:print:]]+$} \ufbc1 + regexp {^[[:print:]]+$} \uFBC1 } 1 test utf-21.4 {TclUniCharIsGraph} { # [Bug 3464428] @@ -885,69 +883,67 @@ test utf-21.5 {unicode graph char in regc_locale.c} { } {1} test utf-21.6 {TclUniCharIsGraph} { # [Bug 3464428] - string is graph \u00a0 + string is graph \xA0 } {0} test utf-21.7 {unicode graph char in regc_locale.c} { # [Bug 3464428] - regexp {[[:graph:]]} \u0020\u00a0\u2028\u2029 + regexp {[[:graph:]]} \x20\xA0\u2028\u2029 } {0} test utf-21.8 {TclUniCharIsPrint} { # [Bug 3464428] - string is print \u0009 + string is print \x09 } {0} test utf-21.9 {unicode print char in regc_locale.c} { # [Bug 3464428] - regexp {[[:print:]]} \u0009 + regexp {[[:print:]]} \x09 } {0} test utf-21.10 {unicode print char in regc_locale.c} { # [Bug 3464428] - regexp {[[:print:]]} \u0009 + regexp {[[:print:]]} \x09 } {0} test utf-21.11 {TclUniCharIsControl} { # [Bug 3464428] - string is control \u0000\u001f\u00ad\u0605\u061c\u180e\u2066\ufeff + string is control \x00\x1F\xAD\u0605\u061C\u180E\u2066\uFEFF } {1} test utf-21.12 {unicode control char in regc_locale.c} { # [Bug 3464428], [Bug a876646efe] - regexp {^[[:cntrl:]]*$} \u0000\u001f\u00ad\u0605\u061c\u180e\u2066\ufeff + regexp {^[[:cntrl:]]*$} \x00\x1F\xAD\u0605\u061C\u180E\u2066\uFEFF } {1} test utf-22.1 {TclUniCharIsWordChar} { string wordend "xyz123_bar fg" 0 } 10 test utf-22.2 {TclUniCharIsWordChar} { - string wordend "x\u5080z123_bar\u203c fg" 0 + string wordend "x\u5080z123_bar\u203C fg" 0 } 10 test utf-23.1 {TclUniCharIsAlpha} { # this returns 1 with Unicode 7 compliance - string is alpha \u021f\u0220\u037f\u052f + string is alpha \u021F\u0220\u037F\u052F } {1} test utf-23.2 {unicode alpha char in regc_locale.c} { # this returns 1 with Unicode 7 compliance - regexp {^[[:alpha:]]+$} \u021f\u0220\u037f\u052f + regexp {^[[:alpha:]]+$} \u021F\u0220\u037F\u052F } {1} test utf-24.1 {TclUniCharIsDigit} { # this returns 1 with Unicode 7 compliance - string is digit \u1040\uabf0 + string is digit \u1040\uABF0 } {1} test utf-24.2 {unicode digit char in regc_locale.c} { # this returns 1 with Unicode 7 compliance - list [regexp {^[[:digit:]]+$} \u1040\uabf0] [regexp {^\d+$} \u1040\uabf0] + list [regexp {^[[:digit:]]+$} \u1040\uABF0] [regexp {^\d+$} \u1040\uABF0] } {1 1} test utf-24.3 {TclUniCharIsSpace} { # this returns 1 with Unicode 7 compliance - string is space \u1680\u180e\u202f + string is space \u1680\u180E\u202F } {1} test utf-24.4 {unicode space char in regc_locale.c} { # this returns 1 with Unicode 7 compliance - list [regexp {^[[:space:]]+$} \u1680\u180e\u202f] [regexp {^\s+$} \u1680\u180e\u202f] + list [regexp {^[[:space:]]+$} \u1680\u180E\u202F] [regexp {^\s+$} \u1680\u180E\u202F] } {1 1} -testConstraint teststringobj [llength [info commands teststringobj]] - test utf-25.1 {Tcl_UniCharNcasecmp} -constraints teststringobj \ -setup { testobj freeallvars -- cgit v0.12 From 62e02d2ea67073cbb810cf1710214254f00a3a0c Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sat, 18 Apr 2020 20:23:26 +0000 Subject: More test-case cleanup --- tests/utf.test | 173 +++++++++++++++++++++++++++++++++------------------------ 1 file changed, 99 insertions(+), 74 deletions(-) diff --git a/tests/utf.test b/tests/utf.test index 9bf104a..febeb50 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -17,12 +17,16 @@ if {[lsearch [namespace children] ::tcltest] == -1} { catch [list package require -exact Tcltest [info patchlevel]] testConstraint testbytestring [llength [info commands testbytestring]] +testConstraint tip389 [expr {[string length \U010000] == 2}] +testConstraint testfindfirst [llength [info commands testfindfirst]] +testConstraint testfindlast [llength [info commands testfindlast]] +testConstraint testnumutfchars [llength [info commands testnumutfchars]] +testConstraint teststringobj [llength [info commands teststringobj]] +testConstraint testutfnext [llength [info commands testutfnext]] +testConstraint testutfprev [llength [info commands testutfprev]] catch {unset x} -# Some tests require support for 4-byte UTF-8 sequences -testConstraint tip389 [expr {[string length \U010000] == 2}] - test utf-1.1 {Tcl_UniCharToUtf: 1 byte sequences} testbytestring { expr {"\x01" eq [testbytestring "\x01"]} } 1 @@ -94,6 +98,7 @@ test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} t string length [testbytestring "\xF0\x8F\xBF\xBF"] } {4} test utf-2.11 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, overflow} testbytestring { + # Would decode to U+110000 but that is outside the Unicode range. string length [testbytestring "\xF4\x90\x80\x80"] } {4} test utf-2.12 {Tcl_UtfToUniChar: longer UTF sequences not supported} testbytestring { @@ -103,10 +108,6 @@ test utf-2.12 {Tcl_UtfToUniChar: longer UTF sequences not supported} testbytestr test utf-3.1 {Tcl_UtfCharComplete} { } {} -testConstraint testnumutfchars [llength [info commands testnumutfchars]] -testConstraint testfindfirst [llength [info commands testfindfirst]] -testConstraint testfindlast [llength [info commands testfindlast]] - test utf-4.1 {Tcl_NumUtfChars: zero length} testnumutfchars { testnumutfchars "" } {0} @@ -152,8 +153,6 @@ test utf-5.2 {Tcl_UtfFindLast} {testfindlast testbytestring} { testfindlast [testbytestring "abcbc"] 98 } {bc} -testConstraint testutfnext [llength [info commands testutfnext]] - test utf-6.1 {Tcl_UtfNext} testutfnext { # This takes the pointer one past the terminating NUL. # This is really an invalid call. @@ -175,7 +174,7 @@ test utf-6.6 {Tcl_UtfNext} testutfnext { testutfnext A\xE8 } 1 test utf-6.7 {Tcl_UtfNext} testutfnext { - testutfnext A\xF4 + testutfnext A\xF2 } 1 test utf-6.8 {Tcl_UtfNext} testutfnext { testutfnext A\xF8 @@ -196,7 +195,7 @@ test utf-6.13 {Tcl_UtfNext} testutfnext { testutfnext \xA0\xE8 } 1 test utf-6.14 {Tcl_UtfNext} testutfnext { - testutfnext \xA0\xF4 + testutfnext \xA0\xF2 } 1 test utf-6.15 {Tcl_UtfNext} testutfnext { testutfnext \xA0\xF8 @@ -205,7 +204,7 @@ test utf-6.16 {Tcl_UtfNext} testutfnext { testutfnext \xD0 } 1 test utf-6.17 {Tcl_UtfNext} testutfnext { - testutfnext \xD0A + testutfnext \xD0G } 1 test utf-6.18 {Tcl_UtfNext} testutfnext { testutfnext \xD0\xA0 @@ -217,7 +216,7 @@ test utf-6.20 {Tcl_UtfNext} testutfnext { testutfnext \xD0\xE8 } 1 test utf-6.21 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xF4 + testutfnext \xD0\xF2 } 1 test utf-6.22 {Tcl_UtfNext} testutfnext { testutfnext \xD0\xF8 @@ -226,7 +225,7 @@ test utf-6.23 {Tcl_UtfNext} testutfnext { testutfnext \xE8 } 1 test utf-6.24 {Tcl_UtfNext} testutfnext { - testutfnext \xE8A + testutfnext \xE8G } 1 test utf-6.25 {Tcl_UtfNext} testutfnext { testutfnext \xE8\xA0 @@ -238,37 +237,37 @@ test utf-6.27 {Tcl_UtfNext} testutfnext { testutfnext \xE8\xE8 } 1 test utf-6.28 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xF4 + testutfnext \xE8\xF2 } 1 test utf-6.29 {Tcl_UtfNext} testutfnext { testutfnext \xE8\xF8 } 1 test utf-6.30 {Tcl_UtfNext} testutfnext { - testutfnext \xF4 + testutfnext \xF2 } 1 test utf-6.31 {Tcl_UtfNext} testutfnext { - testutfnext \xF4A + testutfnext \xF2A } 1 test utf-6.32 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0 + testutfnext \xF2\xA0 } 1 test utf-6.33 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xD0 + testutfnext \xF2\xD0 } 1 test utf-6.34 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xE8 + testutfnext \xF2\xE8 } 1 test utf-6.35 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xF4 + testutfnext \xF2\xF2 } 1 test utf-6.36 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xF8 + testutfnext \xF2\xF8 } 1 test utf-6.37 {Tcl_UtfNext} testutfnext { testutfnext \xF8 } 1 test utf-6.38 {Tcl_UtfNext} testutfnext { - testutfnext \xF8A + testutfnext \xF8G } 1 test utf-6.39 {Tcl_UtfNext} testutfnext { testutfnext \xF8\xA0 @@ -280,7 +279,7 @@ test utf-6.41 {Tcl_UtfNext} testutfnext { testutfnext \xF8\xE8 } 1 test utf-6.42 {Tcl_UtfNext} testutfnext { - testutfnext \xF8\xF4 + testutfnext \xF8\xF2 } 1 test utf-6.43 {Tcl_UtfNext} testutfnext { testutfnext \xF8\xF8 @@ -298,7 +297,7 @@ test utf-6.47 {Tcl_UtfNext} testutfnext { testutfnext \xD0\xA0\xE8 } 2 test utf-6.48 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xA0\xF4 + testutfnext \xD0\xA0\xF2 } 2 test utf-6.49 {Tcl_UtfNext} testutfnext { testutfnext \xD0\xA0\xF8 @@ -316,28 +315,28 @@ test utf-6.53 {Tcl_UtfNext} testutfnext { testutfnext \xE8\xA0\xE8 } 1 test utf-6.54 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xF4 + testutfnext \xE8\xA0\xF2 } 1 test utf-6.55 {Tcl_UtfNext} testutfnext { testutfnext \xE8\xA0\xF8 } 1 test utf-6.56 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0G + testutfnext \xF2\xA0G } 1 test utf-6.57 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0 + testutfnext \xF2\xA0\xA0 } 1 test utf-6.58 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xD0 + testutfnext \xF2\xA0\xD0 } 1 test utf-6.59 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xE8 + testutfnext \xF2\xA0\xE8 } 1 test utf-6.60 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xF4 + testutfnext \xF2\xA0\xF2 } 1 test utf-6.61 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xF8 + testutfnext \xF2\xA0\xF8 } 1 test utf-6.62 {Tcl_UtfNext} testutfnext { testutfnext \xE8\xA0\xA0G @@ -352,47 +351,47 @@ test utf-6.65 {Tcl_UtfNext} testutfnext { testutfnext \xE8\xA0\xA0\xE8 } 3 test utf-6.66 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xA0\xF4 + testutfnext \xE8\xA0\xA0\xF2 } 3 test utf-6.67 {Tcl_UtfNext} testutfnext { testutfnext \xE8\xA0\xA0\xF8 } 3 test utf-6.68 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0G + testutfnext \xF2\xA0\xA0G } 1 test utf-6.69 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0\xA0 -} 1 + testutfnext \xF2\xA0\xA0\xA0 +} 4 test utf-6.70 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0\xD0 + testutfnext \xF2\xA0\xA0\xD0 } 1 test utf-6.71 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0\xE8 + testutfnext \xF2\xA0\xA0\xE8 } 1 test utf-6.71 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0\xF4 + testutfnext \xF2\xA0\xA0\xF4 } 1 test utf-6.73 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0\xF8 + testutfnext \xF2\xA0\xA0\xF8 } 1 test utf-6.74 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0\xA0G -} 1 + testutfnext \xF2\xA0\xA0\xA0G +} 4 test utf-6.75 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0\xA0\xA0 -} 1 + testutfnext \xF2\xA0\xA0\xA0\xA0 +} 4 test utf-6.76 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0\xA0\xD0 -} 1 + testutfnext \xF2\xA0\xA0\xA0\xD0 +} 4 test utf-6.77 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0\xA0\xE8 -} 1 + testutfnext \xF2\xA0\xA0\xA0\xE8 +} 4 test utf-6.78 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0\xA0\xF4 -} 1 + testutfnext \xF2\xA0\xA0\xA0\xF2 +} 4 test utf-6.79 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0\xA0G\xF8 -} 1 + testutfnext \xF2\xA0\xA0\xA0G\xF8 +} 4 test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext { testutfnext \xC0\x80 } 2 @@ -429,8 +428,12 @@ test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} tes test utf-6.89.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} testutfnext { testutfnext \xF0\x80\x80 1 } 2 - -testConstraint testutfprev [llength [info commands testutfprev]] +test utf-6.90 {Tcl_UtfNext, validity check [493dccc2de]} testutfnext { + testutfnext \xF4\x8F\xBF\xBF +} 4 +test utf-6.91 {Tcl_UtfNext, validity check [493dccc2de]} testutfnext { + testutfnext \xF4\x90\x80\x80 +} 1 test utf-7.1 {Tcl_UtfPrev} testutfprev { testutfprev {} @@ -451,13 +454,13 @@ test utf-7.4.2 {Tcl_UtfPrev} testutfprev { testutfprev A\xF8\xF8\xA0\xA0 2 } 1 test utf-7.5 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF4 + testutfprev A\xF2 } 1 test utf-7.5.1 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF4\xA0\xA0\xA0 2 + testutfprev A\xF2\xA0\xA0\xA0 2 } 1 test utf-7.5.2 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF4\xF8\xA0\xA0 2 + testutfprev A\xF2\xF8\xA0\xA0 2 } 1 test utf-7.6 {Tcl_UtfPrev} testutfprev { testutfprev A\xE8 @@ -496,13 +499,13 @@ test utf-7.9.2 {Tcl_UtfPrev} testutfprev { testutfprev A\xF8\xA0\xF8\xA0 3 } 2 test utf-7.10 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF4\xA0 + testutfprev A\xF2\xA0 } 1 test utf-7.10.1 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF4\xA0\xA0\xA0 3 + testutfprev A\xF2\xA0\xA0\xA0 3 } 1 test utf-7.10.2 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF4\xA0\xF8\xA0 3 + testutfprev A\xF2\xA0\xF8\xA0 3 } 1 test utf-7.11 {Tcl_UtfPrev} testutfprev { testutfprev A\xE8\xA0 @@ -544,13 +547,13 @@ test utf-7.14.2 {Tcl_UtfPrev} testutfprev { testutfprev A\xF8\xA0\xA0\xF8 4 } 3 test utf-7.15 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF4\xA0\xA0 + testutfprev A\xF2\xA0\xA0 } 1 test utf-7.15.1 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF4\xA0\xA0\xA0 4 + testutfprev A\xF2\xA0\xA0\xA0 4 } 1 test utf-7.15.2 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF4\xA0\xA0\xF8 4 + testutfprev A\xF2\xA0\xA0\xF8 4 } 1 test utf-7.16 {Tcl_UtfPrev} testutfprev { testutfprev A\xE8\xA0\xA0 @@ -583,7 +586,7 @@ test utf-7.19 {Tcl_UtfPrev} testutfprev { testutfprev A\xF8\xA0\xA0\xA0 } 4 test utf-7.20 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF4\xA0\xA0\xA0 + testutfprev A\xF2\xA0\xA0\xA0 } 1 test utf-7.21 {Tcl_UtfPrev} testutfprev { testutfprev A\xE8\xA0\xA0\xA0 @@ -678,6 +681,30 @@ test utf-7.47.1 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} tes test utf-7.47.2 {Tcl_UtfPrev, pointing to 3th byte of 3-byte invalid sequence} testutfprev { testutfprev \xE8\xA0\x00 2 } 0 +test utf-7.48 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { + testutfprev A\xF4\x8F\xBF\xBF +} 1 +test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { + testutfprev A\xF4\x8F\xBF\xBF 4 +} 1 +test utf-7.48.2 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { + testutfprev A\xF4\x8F\xBF\xBF 3 +} 1 +test utf-7.48.3 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { + testutfprev A\xF4\x8F\xBF\xBF 2 +} 1 +test utf-7.49 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { + testutfprev A\xF4\x90\x80\x80 +} 1 +test utf-7.49.1 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { + testutfprev A\xF4\x90\x80\x80 4 +} 1 +test utf-7.49.2 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { + testutfprev A\xF4\x90\x80\x80 3 +} 1 +test utf-7.49.3 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { + testutfprev A\xF4\x90\x80\x80 2 +} 1 test utf-8.1 {Tcl_UniCharAtIndex: index = 0} { string index abcd 0 @@ -800,11 +827,11 @@ test utf-11.2 {Tcl_UtfToUpper} { string toupper abc } ABC test utf-11.3 {Tcl_UtfToUpper} { - string toupper \xE3AB -} \xC3AB + string toupper \xE3gh +} \xC3GH test utf-11.4 {Tcl_UtfToUpper} { - string toupper \u01E3AB -} \u01E2AB + string toupper \u01E3gh +} \u01E2GH test utf-11.5 {Tcl_UtfToUpper Georgian (new in Unicode 11)} { string toupper \u10D0\u1C90 } \u1C90\u1C90 @@ -819,8 +846,8 @@ test utf-12.2 {Tcl_UtfToLower} { string tolower ABC } abc test utf-12.3 {Tcl_UtfToLower} { - string tolower \xC3AB -} \xE3ab + string tolower \xC3GH +} \xE3gh test utf-12.4 {Tcl_UtfToLower} { string tolower \u01E2AB } \u01E3ab @@ -838,8 +865,8 @@ test utf-13.2 {Tcl_UtfToTitle} { string totitle abc } Abc test utf-13.3 {Tcl_UtfToTitle} { - string totitle \xE3AB -} \xC3ab + string totitle \xE3GH +} \xC3gh test utf-13.4 {Tcl_UtfToTitle} { string totitle \u01F3AB } \u01F2ab @@ -995,8 +1022,6 @@ test utf-24.4 {unicode space char in regc_locale.c} { list [regexp {^[[:space:]]+$} \x85\u1680\u180E\u200B\u202F\u2060] [regexp {^\s+$} \x85\u1680\u180E\u200B\u202F\u2060] } {1 1} -testConstraint teststringobj [llength [info commands teststringobj]] - test utf-25.1 {Tcl_UniCharNcasecmp} -constraints teststringobj \ -setup { testobj freeallvars -- cgit v0.12 From 4551b215909f0c208728bd8fa5f39e07b6370dde Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 19 Apr 2020 09:12:54 +0000 Subject: 4 more testcases, which could detect future regressions in Emoji handling --- tests/utf.test | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tests/utf.test b/tests/utf.test index febeb50..150e395 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -115,7 +115,7 @@ test utf-4.2 {Tcl_NumUtfChars: length 1} {testnumutfchars testbytestring} { testnumutfchars [testbytestring "\xC2\xA2"] } {1} test utf-4.3 {Tcl_NumUtfChars: long string} {testnumutfchars testbytestring} { - testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\uA2\x4E"] + testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\xA2\x4E"] } {7} test utf-4.4 {Tcl_NumUtfChars: #u0000} {testnumutfchars testbytestring} { testnumutfchars [testbytestring "\xC0\x80"] @@ -724,6 +724,12 @@ test utf-8.5 {Tcl_UniCharAtIndex: high surrogate} { test utf-8.6 {Tcl_UniCharAtIndex: low surrogate} { string index \uDC42 0 } "\uDC42" +test utf-8.7 {Tcl_UniCharAtIndex: Emoji} { + string index \U1F600 0 +} "\U1F600" +test utf-8.8 {Tcl_UniCharAtIndex: Emoji} { + string index \U1F600 1 +} {} test utf-9.1 {Tcl_UtfAtIndex: index = 0} { string range abcd 0 2 @@ -731,6 +737,12 @@ test utf-9.1 {Tcl_UtfAtIndex: index = 0} { test utf-9.2 {Tcl_UtfAtIndex: index > 0} { string range \u4E4E\u25A\xFF\u543klmnop 1 5 } "\u25A\xFF\u543kl" +test utf-9.3 {Tcl_UtfAtIndex: index = 0, Emoji} { + string range \U1F600G 0 0 +} "\U1F600" +test utf-9.4 {Tcl_UtfAtIndex: index > 0, Emoji} tip389 { + string range \U1F600G 1 1 +} {} test utf-10.1 {Tcl_UtfBackslash: dst == NULL} { -- cgit v0.12 From 942bd1ddd961886f38b16577614a77f473bc1239 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 19 Apr 2020 09:57:08 +0000 Subject: Proposed fix for [27944a3661]: Taming test utf-6.88. This fix is not optimized, it still uses TclUtfToUniChar() in its implementation. But optimizing work is on its way, hopefully, coming through 8.5 .. 8.6 .. and up. --- generic/tclUtf.c | 18 ++++++++++++++---- tests/utf.test | 22 +++++++++++++++++----- 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 2a04414..b4f760f 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -852,9 +852,11 @@ Tcl_UtfFindLast( * * Tcl_UtfNext -- * - * Given a pointer to some current location in a UTF-8 string, move - * forward one character. The caller must ensure that they are not asking - * for the next character after the last character in the string. + * Given a pointer to some location in a UTF-8 string, Tcl_UtfNext + * returns a pointer to the next UTF-8 character in the string. + * The caller must not ask for the next character after the last + * character in the string if the string is not terminated by a null + * character. * * Results: * The return value is the pointer to the next character in the UTF-8 @@ -871,7 +873,15 @@ Tcl_UtfNext( const char *src) /* The current location in the string. */ { Tcl_UniChar ch = 0; - int len = TclUtfToUniChar(src, &ch); + int len; + + if (((*src) & 0xC0) == 0x80) { + if ((((*++src) & 0xC0) == 0x80) && (((*++src) & 0xC0) == 0x80)) { + ++src; + } + return src; + } + len = TclUtfToUniChar(src, &ch); #if TCL_UTF_MAX <= 3 if ((ch >= 0xD800) && (len < 3)) { diff --git a/tests/utf.test b/tests/utf.test index 150e395..f3633bd 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -187,7 +187,7 @@ test utf-6.10 {Tcl_UtfNext} testutfnext { } 1 test utf-6.11 {Tcl_UtfNext} testutfnext { testutfnext \xA0\xA0 -} 1 +} 2 test utf-6.12 {Tcl_UtfNext} testutfnext { testutfnext \xA0\xD0 } 1 @@ -418,22 +418,34 @@ test utf-6.87 {Tcl_UtfNext - overlong sequences} testutfnext { } 4 test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} testutfnext { testutfnext \xA0\xA0 -} 1 +} 2 test utf-6.88.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} testutfnext { testutfnext \xE8\xA0\xA0 1 -} 2 +} 3 test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} testutfnext { testutfnext \x80\x80 -} 1 +} 2 test utf-6.89.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} testutfnext { testutfnext \xF0\x80\x80 1 -} 2 +} 3 test utf-6.90 {Tcl_UtfNext, validity check [493dccc2de]} testutfnext { testutfnext \xF4\x8F\xBF\xBF } 4 test utf-6.91 {Tcl_UtfNext, validity check [493dccc2de]} testutfnext { testutfnext \xF4\x90\x80\x80 } 1 +test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} testutfnext { + testutfnext \xA0\xA0\xA0 +} 3 +test utf-6.92.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} testutfnext { + testutfnext \xF2\xA0\xA0\xA0 1 +} 4 +test utf-6.93 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} testutfnext { + testutfnext \x80\x80\x80 +} 3 +test utf-6.93.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} testutfnext { + testutfnext \xF0\x80\x80\x80 1 +} 4 test utf-7.1 {Tcl_UtfPrev} testutfprev { testutfprev {} -- cgit v0.12 From 1a343cd043776b8acc3c4a047a10556c70f077dd Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 19 Apr 2020 19:37:05 +0000 Subject: More test-cases. Fix wrong quoting in testcase utf-10.5 --- tests/utf.test | 52 ++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 44 insertions(+), 8 deletions(-) diff --git a/tests/utf.test b/tests/utf.test index 189b85d..946aa83 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -42,6 +42,7 @@ test utf-1.5 {Tcl_UniCharToUtf: overflowed Tcl_UniChar} testbytestring { test utf-1.6 {Tcl_UniCharToUtf: negative Tcl_UniChar} testbytestring { expr {[format %c -1] eq [testbytestring "\xEF\xBF\xBD"]} } 1 + test utf-2.1 {Tcl_UtfToUniChar: low ascii} { string length "abc" } {3} @@ -90,7 +91,7 @@ test utf-4.2 {Tcl_NumUtfChars: length 1} {testnumutfchars testbytestring} { testnumutfchars [testbytestring "\xC2\xA2"] } {1} test utf-4.3 {Tcl_NumUtfChars: long string} {testnumutfchars testbytestring} { - testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\uA2\x4E"] + testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\xA2\x4E"] } {7} test utf-4.4 {Tcl_NumUtfChars: #u0000} {testnumutfchars testbytestring} { testnumutfchars [testbytestring "\xC0\x80"] @@ -693,6 +694,18 @@ test utf-8.3 {Tcl_UniCharAtIndex: index > 0} { test utf-8.4 {Tcl_UniCharAtIndex: index > 0} { string index \u4E4E\u25A\xFF\u543 2 } "\uFF" +test utf-8.5 {Tcl_UniCharAtIndex: high surrogate} { + string index \uD842 0 +} "\uD842" +test utf-8.6 {Tcl_UniCharAtIndex: low surrogate} { + string index \uDC42 0 +} "\uDC42" +test utf-8.7 {Tcl_UniCharAtIndex: Emoji} compat85 { + string index \uD83D\uDE00 0 +} "\uD83D" +test utf-8.8 {Tcl_UniCharAtIndex: Emoji} compat85 { + string index \uD83D\uDE00 1 +} "\uDE00" test utf-9.1 {Tcl_UtfAtIndex: index = 0} { string range abcd 0 2 @@ -700,6 +713,12 @@ test utf-9.1 {Tcl_UtfAtIndex: index = 0} { test utf-9.2 {Tcl_UtfAtIndex: index > 0} { string range \u4E4E\u25A\xFF\u543klmnop 1 5 } "\u25A\xFF\u543kl" +test utf-9.3 {Tcl_UtfAtIndex: index = 0, Emoji} compat85 { + string range \uD83D\uDE00G 0 0 +} "\uD83D" +test utf-9.4 {Tcl_UtfAtIndex: index > 0, Emoji} compat85 { + string range \uD83D\uDE00G 1 1 +} "\uDE00" test utf-10.1 {Tcl_UtfBackslash: dst == NULL} { @@ -716,7 +735,7 @@ test utf-10.4 {Tcl_UtfBackslash: stops at first non-hex} testbytestring { expr {"\u4E2k" eq "[testbytestring \xD3\xA2]k"} } 1 test utf-10.5 {Tcl_UtfBackslash: stops after 4 hex chars} testbytestring { - expr {"\u4E216" eq [testbytestring "\xE4\xB8\xA1"]6} + expr {"\u4E216" eq "[testbytestring \xE4\xB8\xA1]6"} } 1 proc bsCheck {char num} { global errNum @@ -775,8 +794,8 @@ test utf-11.3 {Tcl_UtfToUpper} { string toupper \xE3gh } \xC3GH test utf-11.4 {Tcl_UtfToUpper} { - string toupper \u01E3ab -} \u01E2AB + string toupper \u01E3gh +} \u01E2GH test utf-12.1 {Tcl_UtfToLower} { string tolower {} @@ -788,8 +807,14 @@ test utf-12.3 {Tcl_UtfToLower} { string tolower \xC3GH } \xE3gh test utf-12.4 {Tcl_UtfToLower} { - string tolower \u01E2AB -} \u01E3ab + string tolower \u01E2GH +} \u01E3gh +test utf-12.5 {Tcl_UtfToLower Georgian (new in Unicode 11)} { + string tolower \u10D0\u1C90 +} \u10D0\u10D0 +test utf-12.6 {Tcl_UtfToUpper low/high surrogate)} { + string tolower \uDC24\uD824 +} \uDC24\uD824 test utf-13.1 {Tcl_UtfToTitle} { string totitle {} @@ -803,6 +828,15 @@ test utf-13.3 {Tcl_UtfToTitle} { test utf-13.4 {Tcl_UtfToTitle} { string totitle \u01F3AB } \u01F2ab +test utf-13.5 {Tcl_UtfToTitle Georgian (new in Unicode 11)} { + string totitle \u10D0\u1C90 +} \u10D0\u1C90 +test utf-13.6 {Tcl_UtfToTitle Georgian (new in Unicode 11)} { + string totitle \u1C90\u10D0 +} \u1C90\u10D0 +test utf-13.7 {Tcl_UtfToTitle low/high surrogate)} { + string totitle \uDC24\uD824 +} \uDC24\uD824 test utf-14.1 {Tcl_UtfNcasecmp} { string compare -nocase a b @@ -854,9 +888,11 @@ test utf-18.5 {Tcl_UniCharToTitle, no delta} { string totitle ! } ! -test utf-19.1 {TclUniCharLen} { +test utf-19.1 {TclUniCharLen} -body { list [regexp \\d abc456def foo] $foo -} {1 4} +} -cleanup { + unset -nocomplain foo +} -result {1 4} test utf-20.1 {TclUniCharNcmp} { } {} -- cgit v0.12 From 9bcd48e5ffd32d1858e4d0f90a4eaee550ede17f Mon Sep 17 00:00:00 2001 From: dgp Date: Sun, 19 Apr 2020 22:02:10 +0000 Subject: typo --- tests/utf.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/utf.test b/tests/utf.test index 946aa83..07863b9 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -103,7 +103,7 @@ test utf-4.6 {Tcl_NumUtfChars: length 1, calc len} {testnumutfchars testbytestri testnumutfchars [testbytestring "\xC2\xA2"] 1 } {1} test utf-4.7 {Tcl_NumUtfChars: long string, calc len} {testnumutfchars testbytestring} { - testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\uA2\x4E"] 10 + testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\xA2\x4E"] 10 } {7} test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} {testnumutfchars testbytestring} { testnumutfchars [testbytestring "\xC0\x80"] 1 -- cgit v0.12 From bb5381f946565a91e146910d62c56b40c02c5193 Mon Sep 17 00:00:00 2001 From: dgp Date: Mon, 20 Apr 2020 05:35:54 +0000 Subject: Reconcile tests to the 8.5 branch history. --- tests/utf.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/utf.test b/tests/utf.test index 07863b9..1ca3647 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -621,7 +621,7 @@ test utf-7.36 {Tcl_UtfPrev -- overlong sequence} testutfprev { test utf-7.37 {Tcl_UtfPrev -- overlong sequence} testutfprev { testutfprev A\xE0\xA0\x80 3 } 1 -test utf-7.38 {Tcl_UtfPrev -- overlong sequence} {testutfprev compat85} { +test utf-7.38 {Tcl_UtfPrev -- overlong sequence} testutfprev { testutfprev A\xE0\xA0\x80 2 } 1 test utf-7.39 {Tcl_UtfPrev -- overlong sequence} {testutfprev compat85} { -- cgit v0.12 From 534db753aefcbe8cbdbec69611e9c6e31ea3deec Mon Sep 17 00:00:00 2001 From: dgp Date: Mon, 20 Apr 2020 06:45:47 +0000 Subject: Backport the encoding fix for source-7.2 in TCL_UTF_MAX=6 build. --- generic/tclEncoding.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 6c16827..5a9d2d5 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2470,20 +2470,33 @@ UtfToUnicodeProc( if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; break; - } + } src += TclUtfToUniChar(src, &ch); /* * Need to handle this in a way that won't cause misalignment * by casting dst to a Tcl_UniChar. [Bug 1122671] - * XXX: This hard-codes the assumed size of Tcl_UniChar as 2. */ #ifdef WORDS_BIGENDIAN +#if TCL_UTF_MAX > 4 + *dst++ = (ch >> 24); + *dst++ = ((ch >> 16) & 0xFF); + *dst++ = ((ch >> 8) & 0xFF); + *dst++ = (ch & 0xFF); +#else *dst++ = (ch >> 8); *dst++ = (ch & 0xFF); +#endif +#else +#if TCL_UTF_MAX > 4 + *dst++ = (ch & 0xFF); + *dst++ = ((ch >> 8) & 0xFF); + *dst++ = ((ch >> 16) & 0xFF); + *dst++ = (ch >> 24); #else *dst++ = (ch & 0xFF); *dst++ = (ch >> 8); #endif +#endif } *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; -- cgit v0.12 From 0424b820bc8101075ba4673a8d07df870348f134 Mon Sep 17 00:00:00 2001 From: dgp Date: Mon, 20 Apr 2020 07:34:28 +0000 Subject: Backport the fix for encoding-16.1 in a TCL_UTF_MAX=6 build. --- generic/tclEncoding.c | 240 +++++++++++++++++++++++++++----------------------- 1 file changed, 128 insertions(+), 112 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 5a9d2d5..da03055 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -83,7 +83,7 @@ typedef struct TableEncodingData { } TableEncodingData; /* - * The following structures is the clientData for a dynamically-loaded, + * Each of the following structures is the clientData for a dynamically-loaded * escape-driven encoding that is itself comprised of other simpler encodings. * An example is "iso-2022-jp", which uses escape sequences to switch between * ascii, jis0208, jis0212, gb2312, and ksc5601. Note that "escape-driven" @@ -117,8 +117,8 @@ typedef struct EscapeEncodingData { * 0. */ int numSubTables; /* Length of following array. */ EscapeSubTable subTables[1];/* Information about each EscapeSubTable used - * by this encoding type. The actual size will - * be as large as necessary to hold all + * by this encoding type. The actual size is + * as large as necessary to hold all * EscapeSubTables. */ } EscapeEncodingData; @@ -156,7 +156,7 @@ static ProcessGlobalValue encodingFileMap = { * A list of directories making up the "library path". Historically this * search path has served many uses, but the only one remaining is a base for * the encodingSearchPath above. If the application does not explicitly set - * the encodingSearchPath, then it will be initialized by appending /encoding + * the encodingSearchPath, then it is initialized by appending /encoding * to each directory in this "libraryPath". */ @@ -177,7 +177,7 @@ TCL_DECLARE_MUTEX(encodingMutex) /* * The following are used to hold the default and current system encodings. * If NULL is passed to one of the conversion routines, the current setting of - * the system encoding will be used to perform the conversion. + * the system encoding is used to perform the conversion. */ static Tcl_Encoding defaultEncoding; @@ -429,9 +429,8 @@ TclGetLibraryPath(void) * Keeps the per-thread copy of the library path current with changes to * the global copy. * - * NOTE: this routine returns void, so there's no way to report the error - * that searchPath is not a valid list. In that case, this routine will - * silently do nothing. + * Since the result of this routine is void, if searchPath is not a valid + * list this routine silently does nothing. * *---------------------------------------------------------------------- */ @@ -453,17 +452,16 @@ TclSetLibraryPath( * * FillEncodingFileMap -- * - * Called to bring the encoding file map in sync with the current value - * of the encoding search path. + * Called to update the encoding file map with the current value + * of the encoding search path. * - * Scan the directories on the encoding search path, find the *.enc - * files, and store the found pathnames in a map associated with the - * encoding name. + * Finds *.end files in the directories on the encoding search path and + * stores the found pathnames in a map associated with the encoding name. * - * In particular, if $dir is on the encoding search path, and the file - * $dir/foo.enc is found, then store a "foo" -> $dir entry in the map. - * Later, any need for the "foo" encoding will quickly * be able to - * construct the $dir/foo.enc pathname for reading the encoding data. + * If $dir is on the encoding search path and the file $dir/foo.enc is + * found, stores a "foo" -> $dir entry in the map. if the "foo" encoding + * is needed later, the $dir/foo.enc name can be quickly constructed in + * order to read the encoding data. * * Results: * None. @@ -544,19 +542,24 @@ void TclInitEncodingSubsystem(void) { Tcl_EncodingType type; + union { + char c; + short s; + } isLe; if (encodingsInitialized) { return; } + isLe.s = 1; Tcl_MutexLock(&encodingMutex); Tcl_InitHashTable(&encodingTable, TCL_STRING_KEYS); Tcl_MutexUnlock(&encodingMutex); /* - * Create a few initial encodings. Note that the UTF-8 to UTF-8 - * translation is not a no-op, because it will turn a stream of improperly - * formed UTF-8 into a properly formed stream. + * Create a few initial encodings. UTF-8 to UTF-8 translation is not a + * no-op because it turns a stream of improperly formed UTF-8 into a + * properly formed stream. */ type.encodingName = "identity"; @@ -583,7 +586,7 @@ TclInitEncodingSubsystem(void) type.fromUtfProc = UtfToUnicodeProc; type.freeProc = NULL; type.nullSize = 2; - type.clientData = NULL; + type.clientData = INT2PTR(isLe.c); Tcl_CreateEncoding(&type); /* @@ -755,11 +758,7 @@ Tcl_SetDefaultEncodingDir( * interp was NULL. * * Side effects: - * The new encoding type is entered into a table visible to all - * interpreters, keyed off the encoding's name. For each call to this - * function, there should eventually be a call to Tcl_FreeEncoding, so - * that the database can be cleaned up when encodings aren't needed - * anymore. + * LoadEncodingFile is called if necessary. * *------------------------------------------------------------------------- */ @@ -797,15 +796,15 @@ Tcl_GetEncoding( * * Tcl_FreeEncoding -- * - * This function is called to release an encoding allocated by - * Tcl_CreateEncoding() or Tcl_GetEncoding(). + * Releases an encoding allocated by Tcl_CreateEncoding() or + * Tcl_GetEncoding(). * * Results: * None. * * Side effects: * The reference count associated with the encoding is decremented and - * the encoding may be deleted if nothing is using it anymore. + * the encoding is deleted if nothing is using it anymore. * *--------------------------------------------------------------------------- */ @@ -824,13 +823,14 @@ Tcl_FreeEncoding( * * FreeEncoding -- * - * This function is called to release an encoding by functions that - * already have the encodingMutex. + * Decrements the reference count of an encoding. The caller must hold + * encodingMutes. * * Results: * None. * * Side effects: + * Releases the resource for an encoding if it is now unused. * The reference count associated with the encoding is decremented and * the encoding may be deleted if nothing is using it anymore. * @@ -850,16 +850,17 @@ FreeEncoding( if (encodingPtr->refCount<=0) { Tcl_Panic("FreeEncoding: refcount problem !!!"); } - encodingPtr->refCount--; - if (encodingPtr->refCount == 0) { + if (encodingPtr->refCount-- <= 1) { if (encodingPtr->freeProc != NULL) { (*encodingPtr->freeProc)(encodingPtr->clientData); } if (encodingPtr->hPtr != NULL) { Tcl_DeleteHashEntry(encodingPtr->hPtr); } - ckfree((char *) encodingPtr->name); - ckfree((char *) encodingPtr); + if (encodingPtr->name) { + ckfree((char *)encodingPtr->name); + } + ckfree((char *)encodingPtr); } } @@ -1020,23 +1021,22 @@ Tcl_SetSystemEncoding( * * Tcl_CreateEncoding -- * - * This function is called to define a new encoding and the functions - * that are used to convert between the specified encoding and Unicode. + * Defines a new encoding, along with the functions that are used to + * convert to and from Unicode. * * Results: * Returns a token that represents the encoding. If an encoding with the * same name already existed, the old encoding token remains valid and - * continues to behave as it used to, and will eventually be garbage - * collected when the last reference to it goes away. Any subsequent - * calls to Tcl_GetEncoding with the specified name will retrieve the - * most recent encoding token. + * continues to behave as it used to, and is eventually garbage collected + * when the last reference to it goes away. Any subsequent calls to + * Tcl_GetEncoding with the specified name retrieve the most recent + * encoding token. * * Side effects: - * The new encoding type is entered into a table visible to all - * interpreters, keyed off the encoding's name. For each call to this - * function, there should eventually be a call to Tcl_FreeEncoding, so - * that the database can be cleaned up when encodings aren't needed - * anymore. + * A new record having the name of the encoding is entered into a table of + * encodings visible to all interpreters. For each call to this function, + * there should eventually be a call to Tcl_FreeEncoding, which cleans + * deletes the record in the table when an encoding is no longer needed. * *--------------------------------------------------------------------------- */ @@ -1258,10 +1258,9 @@ Tcl_ExternalToUtf( * * Tcl_UtfToExternalDString -- * - * Convert a source buffer from UTF-8 into the specified encoding. If any + * Convert a source buffer from UTF-8 to the specified encoding. If any * of the bytes in the source buffer are invalid or cannot be represented - * in the target encoding, a default fallback character will be - * substituted. + * in the target encoding, a default fallback character is substituted. * * Results: * The converted bytes are stored in the DString, which is then NULL @@ -1570,13 +1569,13 @@ OpenEncodingFileChannel( * the data. * * Results: - * The return value is the newly loaded Encoding, or NULL if the file - * didn't exist of was in the incorrect format. If NULL was returned, an - * error message is left in interp's result object, unless interp was - * NULL. + * The return value is the newly loaded Tcl_Encoding or NULL if the file + * didn't exist or could not be processed. If NULL is returned and interp + * is not NULL, an error message is left in interp's result object. * * Side effects: - * File read from disk. + * A corresponding encoding file might be read from persistent storage, in + * which case LoadTableEncoding is called. * *--------------------------------------------------------------------------- */ @@ -1584,8 +1583,8 @@ OpenEncodingFileChannel( static Tcl_Encoding LoadEncodingFile( Tcl_Interp *interp, /* Interp for error reporting, if not NULL. */ - const char *name) /* The name of the encoding file on disk and - * also the name for new encoding. */ + const char *name) /* The name of both the encoding file + * and the new encoding. */ { Tcl_Channel chan = NULL; Tcl_Encoding encoding = NULL; @@ -1637,27 +1636,27 @@ LoadEncodingFile( * * LoadTableEncoding -- * - * Helper function for LoadEncodingTable(). Loads a table to that - * converts between Unicode and some other encoding and creates an - * encoding (using a TableEncoding structure) from that information. + * Helper function for LoadEncodingFile(). Creates a Tcl_EncodingType + * structure along with its corresponding TableEncodingData structure, and + * passes it to Tcl_Createncoding. * - * File contains binary data, but begins with a marker to indicate - * byte-ordering, so that same binary file can be read on either endian - * platforms. + * The file contains binary data but begins with a marker to indicate + * byte-ordering so a single binary file can be read on big or + * little-endian systems. * * Results: - * The return value is the new encoding, or NULL if the encoding could - * not be created (because the file contained invalid data). + * Returns the new Tcl_Encoding, or NULL if it could could + * not be created because the file contained invalid data. * * Side effects: - * None. + * See Tcl_CreateEncoding(). * *------------------------------------------------------------------------- */ static Tcl_Encoding LoadTableEncoding( - const char *name, /* Name for new encoding. */ + const char *name, /* Name of the new encoding. */ int type, /* Type of encoding (ENCODING_?????). */ Tcl_Channel chan) /* File containing new encoding. */ { @@ -1769,10 +1768,10 @@ LoadTableEncoding( } /* - * Invert toUnicode array to produce the fromUnicode array. Performs a + * Invert the toUnicode array to produce the fromUnicode array. Performs a * single malloc to get the memory for the array and all the pages needed - * by the array. While reading in the toUnicode array, we remembered what - * pages that would be needed for the fromUnicode array. + * by the array. While reading in the toUnicode array remember what + * pages are needed for the fromUnicode array. */ if (symbol) { @@ -1814,8 +1813,8 @@ LoadTableEncoding( if (type == ENCODING_MULTIBYTE) { /* * If multibyte encodings don't have a backslash character, define - * one. Otherwise, on Windows, native file names won't work because - * the backslash in the file name will map to the unknown character + * one. Otherwise, on Windows, native file names don't work because + * the backslash in the file name maps to the unknown character * (question mark) when converting from UTF-8 to external encoding. */ @@ -1829,13 +1828,13 @@ LoadTableEncoding( unsigned short *page; /* - * Make a special symbol encoding that not only maps the symbol - * characters from their Unicode code points down into page 0, but - * also ensure that the characters on page 0 map to themselves. This - * is so that a symbol font can be used to display a simple string - * like "abcd" and have alpha, beta, chi, delta show up, rather than - * have "unknown" chars show up because strictly speaking the symbol - * font doesn't have glyphs for those low ascii chars. + * Make a special symbol encoding that maps each symbol character from + * its Unicode code point down into page 0, and also ensure that each + * characters on page 0 maps to itself so that a symbol font can be + * used to display a simple string like "abcd" and have alpha, beta, + * chi, delta show up, rather than have "unknown" chars show up because + * strictly speaking the symbol font doesn't have glyphs for those low + * ASCII chars. */ page = dataPtr->fromUnicode[0]; @@ -1939,7 +1938,7 @@ LoadTableEncoding( static Tcl_Encoding LoadEscapeEncoding( - const char *name, /* Name for new encoding. */ + const char *name, /* Name of the new encoding. */ Tcl_Channel chan) /* File containing new encoding. */ { int i; @@ -2318,7 +2317,7 @@ UtfToUtfProc( * * UnicodeToUtfProc -- * - * Convert from Unicode to UTF-8. + * Convert from UTF-16 to UTF-8. * * Results: * Returns TCL_OK if conversion was successful. @@ -2331,7 +2330,7 @@ UtfToUtfProc( static int UnicodeToUtfProc( - ClientData clientData, /* Not used. */ + ClientData clientData, /* != NULL means LE, == NUL means BE */ const char *src, /* Source string in Unicode. */ int srcLen, /* Source string length in bytes. */ int flags, /* Conversion control flags. */ @@ -2359,13 +2358,19 @@ UnicodeToUtfProc( const char *srcStart, *srcEnd; char *dstEnd, *dstStart; int result, numChars; - Tcl_UniChar ch; + unsigned short ch; result = TCL_OK; - if ((srcLen % sizeof(Tcl_UniChar)) != 0) { + + /* check alignment with utf-16 (2 == sizeof(UTF-16)) */ + if ((srcLen % 2) != 0) { + result = TCL_CONVERT_MULTIBYTE; + srcLen--; + } + /* If last code point is a high surrogate, we cannot handle that yet */ + if ((srcLen >= 2) && ((src[srcLen - (clientData?1:2)] & 0xFC) == 0xD8)) { result = TCL_CONVERT_MULTIBYTE; - srcLen /= sizeof(Tcl_UniChar); - srcLen *= sizeof(Tcl_UniChar); + srcLen-= 2; } srcStart = src; @@ -2379,17 +2384,21 @@ UnicodeToUtfProc( result = TCL_CONVERT_NOSPACE; break; } + if (clientData) { + ch = (src[1] & 0xFF) << 8 | (src[0] & 0xFF); + } else { + ch = (src[0] & 0xFF) << 8 | (src[1] & 0xFF); + } /* - * Special case for 1-byte utf chars for speed. Make sure we - * work with Tcl_UniChar-size data. + * Special case for 1-byte utf chars for speed. Make sure we work with + * unsigned short-size data. */ - ch = *(Tcl_UniChar *)src; if (ch && ch < 0x80) { *dst++ = (ch & 0xFF); } else { dst += Tcl_UniCharToUtf(ch, dst); } - src += sizeof(Tcl_UniChar); + src += sizeof(unsigned short); } *srcReadPtr = src - srcStart; @@ -2403,7 +2412,7 @@ UnicodeToUtfProc( * * UtfToUnicodeProc -- * - * Convert from UTF-8 to Unicode. + * Convert from UTF-8 to UTF-16. * * Results: * Returns TCL_OK if conversion was successful. @@ -2416,8 +2425,7 @@ UnicodeToUtfProc( static int UtfToUnicodeProc( - ClientData clientData, /* TableEncodingData that specifies - * encoding. */ + ClientData clientData, /* != NULL means LE, == NUL means BE */ const char *src, /* Source string in UTF-8. */ int srcLen, /* Source string length in bytes. */ int flags, /* Conversion control flags. */ @@ -2444,7 +2452,7 @@ UtfToUnicodeProc( { const char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd; int result, numChars; - Tcl_UniChar ch; + Tcl_UniChar ch = 0; srcStart = src; srcEnd = src + srcLen; @@ -2476,27 +2484,37 @@ UtfToUnicodeProc( * Need to handle this in a way that won't cause misalignment * by casting dst to a Tcl_UniChar. [Bug 1122671] */ -#ifdef WORDS_BIGENDIAN + if (clientData) { #if TCL_UTF_MAX > 4 - *dst++ = (ch >> 24); - *dst++ = ((ch >> 16) & 0xFF); - *dst++ = ((ch >> 8) & 0xFF); - *dst++ = (ch & 0xFF); + if (ch <= 0xFFFF) { + *dst++ = (ch & 0xFF); + *dst++ = (ch >> 8); + } else { + *dst++ = (((ch - 0x10000) >> 10) & 0xFF); + *dst++ = (((ch - 0x10000) >> 18) & 0x3) | 0xD8; + *dst++ = (ch & 0xFF); + *dst++ = ((ch & 0x3) >> 8) | 0xDC; + } #else - *dst++ = (ch >> 8); - *dst++ = (ch & 0xFF); + *dst++ = (ch & 0xFF); + *dst++ = (ch >> 8); #endif -#else + } else { #if TCL_UTF_MAX > 4 - *dst++ = (ch & 0xFF); - *dst++ = ((ch >> 8) & 0xFF); - *dst++ = ((ch >> 16) & 0xFF); - *dst++ = (ch >> 24); + if (ch <= 0xFFFF) { + *dst++ = (ch >> 8); + *dst++ = (ch & 0xFF); + } else { + *dst++ = ((ch & 0x3) >> 8) | 0xDC; + *dst++ = (ch & 0xFF); + *dst++ = (((ch - 0x10000) >> 18) & 0x3) | 0xD8; + *dst++ = (((ch - 0x10000) >> 10) & 0xFF); + } #else - *dst++ = (ch & 0xFF); - *dst++ = (ch >> 8); -#endif + *dst++ = (ch >> 8); + *dst++ = (ch & 0xFF); #endif + } } *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; @@ -2899,7 +2917,6 @@ Iso88591FromUtfProc( result = TCL_CONVERT_UNKNOWN; break; } - /* * Plunge on, using '?' as a fallback character. */ @@ -3387,14 +3404,13 @@ EscapeFromUtfProc( * * EscapeFreeProc -- * - * This function is invoked when an EscapeEncodingData encoding is - * deleted. It deletes the memory used by the encoding. + * Frees resources used by the encoding. * * Results: * None. * * Side effects: - * Memory freed. + * Memory is freed. * *--------------------------------------------------------------------------- */ -- cgit v0.12 From 2958d5196de3452ea46a083603d4ce1dc0d05d2a Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 20 Apr 2020 07:50:22 +0000 Subject: Move the needed apt package in .travis.yml to the top, so they can be shared between the images. --- .travis.yml | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/.travis.yml b/.travis.yml index e10ca7c..5672c0b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,15 @@ sudo: false language: c +addons: + apt: + packages: + - binutils-mingw-w64-i686 + - binutils-mingw-w64-x86-64 + - gcc-mingw-w64 + - gcc-mingw-w64-base + - gcc-mingw-w64-i686 + - gcc-mingw-w64-x86-64 + - gcc-multilib matrix: include: # Testing on Linux with various compilers @@ -146,13 +156,6 @@ matrix: os: linux dist: bionic compiler: x86_64-w64-mingw32-gcc - addons: - apt: - packages: - - gcc-mingw-w64-base - - binutils-mingw-w64-x86-64 - - gcc-mingw-w64-x86-64 - - gcc-mingw-w64 env: - BUILD_DIR=win - CFGOPT="--host=x86_64-w64-mingw32 --enable-64bit --enable-threads" @@ -167,14 +170,6 @@ matrix: os: linux dist: bionic compiler: i686-w64-mingw32-gcc - addons: - apt: - packages: - - gcc-mingw-w64-base - - binutils-mingw-w64-i686 - - gcc-mingw-w64-i686 - - gcc-mingw-w64 - - gcc-multilib env: - BUILD_DIR=win - CFGOPT="--host=i686-w64-mingw32 --enable-threads" -- cgit v0.12 From eb263e1e057977a19cf246eeda844bb4bd31342a Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 20 Apr 2020 11:30:31 +0000 Subject: Code cleanup (more typecasts), making it more compatible with -Wc++-compat. Less use of /* ARGUSED */. --- generic/tclEncoding.c | 116 ++++++++++++++++++++++---------------------------- generic/tclUtil.c | 61 +++++++++++++------------- generic/tclZlib.c | 64 ++++++++++++++-------------- win/tclWinPipe.c | 1 - 4 files changed, 114 insertions(+), 128 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index b879ec8..e012570 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -18,7 +18,7 @@ typedef size_t (LengthProc)(const char *src); * convert between various character sets and UTF-8. */ -typedef struct Encoding { +typedef struct { char *name; /* Name of encoding. Malloced because (1) hash * table entry that owns this encoding may be * freed prior to this encoding being freed, @@ -57,7 +57,7 @@ typedef struct Encoding { * encoding. */ -typedef struct TableEncodingData { +typedef struct { int fallback; /* Character (in this encoding) to substitute * when this encoding cannot represent a UTF-8 * character. */ @@ -91,7 +91,7 @@ typedef struct TableEncodingData { * for switching character sets. */ -typedef struct EscapeSubTable { +typedef struct { unsigned sequenceLen; /* Length of following string. */ char sequence[16]; /* Escape code that marks this encoding. */ char name[32]; /* Name for encoding. */ @@ -100,7 +100,7 @@ typedef struct EscapeSubTable { * yet. */ } EscapeSubTable; -typedef struct EscapeEncodingData { +typedef struct { int fallback; /* Character (in this encoding) to substitute * when this encoding cannot represent a UTF-8 * character. */ @@ -195,35 +195,25 @@ static unsigned short emptyPage[256]; * Functions used only in this module. */ -static int BinaryProc(ClientData clientData, - const char *src, int srcLen, int flags, - Tcl_EncodingState *statePtr, char *dst, int dstLen, - int *srcReadPtr, int *dstWrotePtr, - int *dstCharsPtr); -static void DupEncodingIntRep(Tcl_Obj *srcPtr, Tcl_Obj *dupPtr); -static void EscapeFreeProc(ClientData clientData); -static int EscapeFromUtfProc(ClientData clientData, - const char *src, int srcLen, int flags, - Tcl_EncodingState *statePtr, char *dst, int dstLen, - int *srcReadPtr, int *dstWrotePtr, - int *dstCharsPtr); -static int EscapeToUtfProc(ClientData clientData, - const char *src, int srcLen, int flags, - Tcl_EncodingState *statePtr, char *dst, int dstLen, - int *srcReadPtr, int *dstWrotePtr, - int *dstCharsPtr); -static void FillEncodingFileMap(void); -static void FreeEncoding(Tcl_Encoding encoding); -static void FreeEncodingIntRep(Tcl_Obj *objPtr); -static Encoding * GetTableEncoding(EscapeEncodingData *dataPtr, - int state); -static Tcl_Encoding LoadEncodingFile(Tcl_Interp *interp, const char *name); -static Tcl_Encoding LoadTableEncoding(const char *name, int type, - Tcl_Channel chan); -static Tcl_Encoding LoadEscapeEncoding(const char *name, Tcl_Channel chan); -static Tcl_Channel OpenEncodingFileChannel(Tcl_Interp *interp, - const char *name); -static void TableFreeProc(ClientData clientData); +static Tcl_EncodingConvertProc BinaryProc; +static Tcl_DupInternalRepProc DupEncodingIntRep; +static Tcl_EncodingFreeProc EscapeFreeProc; +static Tcl_EncodingConvertProc EscapeFromUtfProc; +static Tcl_EncodingConvertProc EscapeToUtfProc; +static void FillEncodingFileMap(void); +static void FreeEncoding(Tcl_Encoding encoding); +static Tcl_FreeInternalRepProc FreeEncodingIntRep; +static Encoding * GetTableEncoding(EscapeEncodingData *dataPtr, + int state); +static Tcl_Encoding LoadEncodingFile(Tcl_Interp *interp, + const char *name); +static Tcl_Encoding LoadTableEncoding(const char *name, int type, + Tcl_Channel chan); +static Tcl_Encoding LoadEscapeEncoding(const char *name, + Tcl_Channel chan); +static Tcl_Channel OpenEncodingFileChannel(Tcl_Interp *interp, + const char *name); +static Tcl_EncodingFreeProc TableFreeProc; static int TableFromUtfProc(ClientData clientData, const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, @@ -335,7 +325,7 @@ static void FreeEncodingIntRep( Tcl_Obj *objPtr) { - Tcl_FreeEncoding(objPtr->internalRep.twoPtrValue.ptr1); + Tcl_FreeEncoding((Tcl_Encoding)objPtr->internalRep.twoPtrValue.ptr1); objPtr->typePtr = NULL; } @@ -596,14 +586,14 @@ TclInitEncodingSubsystem(void) * code to duplicate the structure of a table encoding here. */ - dataPtr = ckalloc(sizeof(TableEncodingData)); + dataPtr = (TableEncodingData *)ckalloc(sizeof(TableEncodingData)); memset(dataPtr, 0, sizeof(TableEncodingData)); dataPtr->fallback = '?'; size = 256*(sizeof(unsigned short *) + sizeof(unsigned short)); - dataPtr->toUnicode = ckalloc(size); + dataPtr->toUnicode = (unsigned short **)ckalloc(size); memset(dataPtr->toUnicode, 0, size); - dataPtr->fromUnicode = ckalloc(size); + dataPtr->fromUnicode = (unsigned short **)ckalloc(size); memset(dataPtr->fromUnicode, 0, size); dataPtr->toUnicode[0] = (unsigned short *) (dataPtr->toUnicode + 256); @@ -669,7 +659,7 @@ TclFinalizeEncodingSubsystem(void) * cleaned up. */ - FreeEncoding(Tcl_GetHashValue(hPtr)); + FreeEncoding((Tcl_Encoding)Tcl_GetHashValue(hPtr)); hPtr = Tcl_FirstHashEntry(&encodingTable, &search); } @@ -778,7 +768,7 @@ Tcl_GetEncoding( hPtr = Tcl_FindHashEntry(&encodingTable, name); if (hPtr != NULL) { - encodingPtr = Tcl_GetHashValue(hPtr); + encodingPtr = (Encoding *)Tcl_GetHashValue(hPtr); encodingPtr->refCount++; Tcl_MutexUnlock(&encodingMutex); return (Tcl_Encoding) encodingPtr; @@ -926,7 +916,7 @@ Tcl_GetEncodingNames( Tcl_MutexLock(&encodingMutex); for (hPtr = Tcl_FirstHashEntry(&encodingTable, &search); hPtr != NULL; hPtr = Tcl_NextHashEntry(&search)) { - Encoding *encodingPtr = Tcl_GetHashValue(hPtr); + Encoding *encodingPtr = (Encoding *)Tcl_GetHashValue(hPtr); Tcl_CreateHashEntry(&table, Tcl_NewStringObj(encodingPtr->name, -1), &dummy); @@ -1056,13 +1046,13 @@ Tcl_CreateEncoding( * reference goes away. */ - encodingPtr = Tcl_GetHashValue(hPtr); + encodingPtr = (Encoding *)Tcl_GetHashValue(hPtr); encodingPtr->hPtr = NULL; } - name = ckalloc(strlen(typePtr->encodingName) + 1); + name = (char *)ckalloc(strlen(typePtr->encodingName) + 1); - encodingPtr = ckalloc(sizeof(Encoding)); + encodingPtr = (Encoding *)ckalloc(sizeof(Encoding)); encodingPtr->name = strcpy(name, typePtr->encodingName); encodingPtr->toUtfProc = typePtr->toUtfProc; encodingPtr->fromUtfProc = typePtr->fromUtfProc; @@ -1740,7 +1730,7 @@ LoadTableEncoding( #undef PAGESIZE #define PAGESIZE (256 * sizeof(unsigned short)) - dataPtr = ckalloc(sizeof(TableEncodingData)); + dataPtr = (TableEncodingData *)ckalloc(sizeof(TableEncodingData)); memset(dataPtr, 0, sizeof(TableEncodingData)); dataPtr->fallback = fallback; @@ -1752,7 +1742,7 @@ LoadTableEncoding( */ size = 256 * sizeof(unsigned short *) + numPages * PAGESIZE; - dataPtr->toUnicode = ckalloc(size); + dataPtr->toUnicode = (unsigned short **)ckalloc(size); memset(dataPtr->toUnicode, 0, size); pageMemPtr = (unsigned short *) (dataPtr->toUnicode + 256); @@ -1813,7 +1803,7 @@ LoadTableEncoding( } } size = 256 * sizeof(unsigned short *) + numPages * PAGESIZE; - dataPtr->fromUnicode = ckalloc(size); + dataPtr->fromUnicode = (unsigned short **)ckalloc(size); memset(dataPtr->fromUnicode, 0, size); pageMemPtr = (unsigned short *) (dataPtr->fromUnicode + 256); @@ -2051,7 +2041,7 @@ LoadEscapeEncoding( size = sizeof(EscapeEncodingData) - sizeof(EscapeSubTable) + Tcl_DStringLength(&escapeData); - dataPtr = ckalloc(size); + dataPtr = (EscapeEncodingData *)ckalloc(size); dataPtr->initLen = strlen(init); memcpy(dataPtr->init, init, dataPtr->initLen + 1); dataPtr->finalLen = strlen(final); @@ -2661,7 +2651,7 @@ TableToUtfProc( Tcl_UniChar ch = 0; const unsigned short *const *toUnicode; const unsigned short *pageZero; - TableEncodingData *dataPtr = clientData; + TableEncodingData *dataPtr = (TableEncodingData *)clientData; if (flags & TCL_ENCODING_CHAR_LIMIT) { charLimit = *dstCharsPtr; @@ -2772,7 +2762,7 @@ TableFromUtfProc( const char *dstStart, *dstEnd, *prefixBytes; Tcl_UniChar ch = 0; int result, len, word, numChars; - TableEncodingData *dataPtr = clientData; + TableEncodingData *dataPtr = (TableEncodingData *)clientData; const unsigned short *const *fromUnicode; result = TCL_OK; @@ -2979,11 +2969,9 @@ Iso88591FromUtfProc( { const char *srcStart, *srcEnd, *srcClose; const char *dstStart, *dstEnd; - int result, numChars; + int result = TCL_OK, numChars; Tcl_UniChar ch = 0; - result = TCL_OK; - srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; @@ -3067,7 +3055,7 @@ TableFreeProc( ClientData clientData) /* TableEncodingData that specifies * encoding. */ { - TableEncodingData *dataPtr = clientData; + TableEncodingData *dataPtr = (TableEncodingData *)clientData; /* * Make sure we aren't freeing twice on shutdown. [Bug 219314] @@ -3125,7 +3113,7 @@ EscapeToUtfProc( * correspond to the bytes stored in the * output buffer. */ { - EscapeEncodingData *dataPtr = clientData; + EscapeEncodingData *dataPtr = (EscapeEncodingData *)clientData; const char *prefixBytes, *tablePrefixBytes, *srcStart, *srcEnd; const unsigned short *const *tableToUnicode; const Encoding *encodingPtr; @@ -3136,8 +3124,8 @@ EscapeToUtfProc( charLimit = *dstCharsPtr; } result = TCL_OK; - tablePrefixBytes = NULL; /* lint. */ - tableToUnicode = NULL; /* lint. */ + tablePrefixBytes = NULL; + tableToUnicode = NULL; prefixBytes = dataPtr->prefixBytes; encodingPtr = NULL; @@ -3261,7 +3249,7 @@ EscapeToUtfProc( TableEncodingData *tableDataPtr; encodingPtr = GetTableEncoding(dataPtr, state); - tableDataPtr = encodingPtr->clientData; + tableDataPtr = (TableEncodingData *)encodingPtr->clientData; tablePrefixBytes = tableDataPtr->prefixBytes; tableToUnicode = (const unsigned short *const*) tableDataPtr->toUnicode; @@ -3339,7 +3327,7 @@ EscapeFromUtfProc( * correspond to the bytes stored in the * output buffer. */ { - EscapeEncodingData *dataPtr = clientData; + EscapeEncodingData *dataPtr = (EscapeEncodingData *)clientData; const Encoding *encodingPtr; const char *srcStart, *srcEnd, *srcClose; const char *dstStart, *dstEnd; @@ -3380,7 +3368,7 @@ EscapeFromUtfProc( } encodingPtr = GetTableEncoding(dataPtr, state); - tableDataPtr = encodingPtr->clientData; + tableDataPtr = (const TableEncodingData *)encodingPtr->clientData; tablePrefixBytes = tableDataPtr->prefixBytes; tableFromUnicode = (const unsigned short *const *) tableDataPtr->fromUnicode; @@ -3408,7 +3396,7 @@ EscapeFromUtfProc( oldState = state; for (state = 0; state < dataPtr->numSubTables; state++) { encodingPtr = GetTableEncoding(dataPtr, state); - tableDataPtr = encodingPtr->clientData; + tableDataPtr = (const TableEncodingData *)encodingPtr->clientData; word = tableDataPtr->fromUnicode[(ch >> 8)][ch & 0xFF]; if (word != 0) { break; @@ -3422,7 +3410,7 @@ EscapeFromUtfProc( break; } encodingPtr = GetTableEncoding(dataPtr, state); - tableDataPtr = encodingPtr->clientData; + tableDataPtr = (const TableEncodingData *)encodingPtr->clientData; word = tableDataPtr->fallback; } @@ -3528,7 +3516,7 @@ EscapeFreeProc( ClientData clientData) /* EscapeEncodingData that specifies * encoding. */ { - EscapeEncodingData *dataPtr = clientData; + EscapeEncodingData *dataPtr = (EscapeEncodingData *)clientData; EscapeSubTable *subTablePtr; int i; @@ -3693,8 +3681,8 @@ InitializeEncodingSearchPath( bytes = Tcl_GetStringFromObj(searchPathObj, &numBytes); *lengthPtr = numBytes; - *valuePtr = ckalloc(numBytes + 1); - memcpy(*valuePtr, bytes, (size_t) numBytes + 1); + *valuePtr = (char *)ckalloc(numBytes + 1); + memcpy(*valuePtr, bytes, numBytes + 1); Tcl_DecrRefCount(searchPathObj); } diff --git a/generic/tclUtil.c b/generic/tclUtil.c index 4139804..13b0d55 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -577,7 +577,7 @@ FindElement( const char *limit; /* Points just after list/dict's last byte. */ int openBraces = 0; /* Brace nesting level during parse. */ int inQuotes = 0; - int size = 0; /* lint. */ + int size = 0; int numChars; int literal = 1; const char *p2; @@ -869,7 +869,7 @@ Tcl_SplitList( size = TclMaxListLength(list, -1, &end) + 1; length = end - list; - argv = ckalloc((size * sizeof(char *)) + length + 1); + argv = (const char **)ckalloc((size * sizeof(char *)) + length + 1); for (i = 0, p = ((char *) argv) + size*sizeof(char *); *list != 0; i++) { @@ -1577,7 +1577,7 @@ Tcl_Merge( */ if (argc == 0) { - result = ckalloc(1); + result = (char *)ckalloc(1); result[0] = '\0'; return result; } @@ -1589,7 +1589,7 @@ Tcl_Merge( if (argc <= LOCAL_SIZE) { flagPtr = localFlags; } else { - flagPtr = ckalloc(argc); + flagPtr = (char *)ckalloc(argc); } for (i = 0; i < argc; i++) { flagPtr[i] = ( i ? TCL_DONT_QUOTE_HASH : 0 ); @@ -1607,7 +1607,7 @@ Tcl_Merge( * Pass two: copy into the result area. */ - result = ckalloc(bytesNeeded); + result = (char *)ckalloc(bytesNeeded); dst = result; for (i = 0; i < argc; i++) { flagPtr[i] |= ( i ? TCL_DONT_QUOTE_HASH : 0 ); @@ -1931,7 +1931,7 @@ Tcl_Concat( * All element bytes + (argc - 1) spaces + 1 terminating NULL. */ - result = ckalloc(bytesNeeded + argc); + result = (char *)ckalloc(bytesNeeded + argc); for (p = result, i = 0; i < argc; i++) { int triml, trimr, elemLength; @@ -2666,7 +2666,7 @@ Tcl_DStringAppend( if (newSize >= dsPtr->spaceAvl) { dsPtr->spaceAvl = newSize * 2; if (dsPtr->string == dsPtr->staticSpace) { - char *newString = ckalloc(dsPtr->spaceAvl); + char *newString = (char *)ckalloc(dsPtr->spaceAvl); memcpy(newString, dsPtr->string, dsPtr->length); dsPtr->string = newString; @@ -2679,7 +2679,7 @@ Tcl_DStringAppend( offset = bytes - dsPtr->string; } - dsPtr->string = ckrealloc(dsPtr->string, dsPtr->spaceAvl); + dsPtr->string = (char *)ckrealloc(dsPtr->string, dsPtr->spaceAvl); if (offset >= 0) { bytes = dsPtr->string + offset; @@ -2797,7 +2797,7 @@ Tcl_DStringAppendElement( if (newSize >= dsPtr->spaceAvl) { dsPtr->spaceAvl = newSize * 2; if (dsPtr->string == dsPtr->staticSpace) { - char *newString = ckalloc(dsPtr->spaceAvl); + char *newString = (char *)ckalloc(dsPtr->spaceAvl); memcpy(newString, dsPtr->string, dsPtr->length); dsPtr->string = newString; @@ -2810,7 +2810,7 @@ Tcl_DStringAppendElement( offset = element - dsPtr->string; } - dsPtr->string = ckrealloc(dsPtr->string, dsPtr->spaceAvl); + dsPtr->string = (char *)ckrealloc(dsPtr->string, dsPtr->spaceAvl); if (offset >= 0) { element = dsPtr->string + offset; @@ -2884,12 +2884,12 @@ Tcl_DStringSetLength( dsPtr->spaceAvl = length + 1; } if (dsPtr->string == dsPtr->staticSpace) { - char *newString = ckalloc(dsPtr->spaceAvl); + char *newString = (char *)ckalloc(dsPtr->spaceAvl); memcpy(newString, dsPtr->string, dsPtr->length); dsPtr->string = newString; } else { - dsPtr->string = ckrealloc(dsPtr->string, dsPtr->spaceAvl); + dsPtr->string = (char *)ckrealloc(dsPtr->string, dsPtr->spaceAvl); } } dsPtr->length = length; @@ -3034,7 +3034,7 @@ Tcl_DStringGetResult( dsPtr->string = iPtr->result; dsPtr->spaceAvl = dsPtr->length+1; } else { - dsPtr->string = ckalloc(dsPtr->length+1); + dsPtr->string = (char *)ckalloc(dsPtr->length+1); memcpy(dsPtr->string, iPtr->result, dsPtr->length+1); iPtr->freeProc(iPtr->result); } @@ -3045,7 +3045,7 @@ Tcl_DStringGetResult( dsPtr->string = dsPtr->staticSpace; dsPtr->spaceAvl = TCL_DSTRING_STATIC_SIZE; } else { - dsPtr->string = ckalloc(dsPtr->length+1); + dsPtr->string = (char *)ckalloc(dsPtr->length+1); dsPtr->spaceAvl = dsPtr->length + 1; } memcpy(dsPtr->string, iPtr->result, dsPtr->length+1); @@ -3203,7 +3203,7 @@ Tcl_PrintDouble( int signum; char *digits; char *end; - int *precisionPtr = Tcl_GetThreadData(&precisionKey, sizeof(int)); + int *precisionPtr = (int *)Tcl_GetThreadData(&precisionKey, sizeof(int)); /* * Handle NaN. @@ -3365,7 +3365,6 @@ Tcl_PrintDouble( *---------------------------------------------------------------------- */ - /* ARGSUSED */ char * TclPrecTraceProc( ClientData clientData, /* Not used. */ @@ -3376,7 +3375,7 @@ TclPrecTraceProc( { Tcl_Obj *value; int prec; - int *precisionPtr = Tcl_GetThreadData(&precisionKey, (int) sizeof(int)); + int *precisionPtr = (int *)Tcl_GetThreadData(&precisionKey, sizeof(int)); /* * If the variable is unset, then recreate the trace. @@ -3724,8 +3723,8 @@ UpdateStringOfEndOffset( buffer[len++] = '-'; len += TclFormatInt(buffer+len, -(objPtr->internalRep.longValue)); } - objPtr->bytes = ckalloc((unsigned) len+1); - memcpy(objPtr->bytes, buffer, (unsigned) len+1); + objPtr->bytes = (char *)ckalloc(len+1); + memcpy(objPtr->bytes, buffer, len+1); objPtr->length = len; } @@ -3734,14 +3733,14 @@ UpdateStringOfEndOffset( * * GetEndOffsetFromObj -- * - * Look for a string of the form "end[+-]offset" and convert it to an - * internal representation holding the offset. + * Look for a string of the form "end[+-]offset" and convert it to an + * internal representation holding the offset. * * Results: - * Tcl return code. + * Tcl return code. * * Side effects: - * May store a Tcl_ObjType. + * May store a Tcl_ObjType. * *---------------------------------------------------------------------- */ @@ -4086,7 +4085,7 @@ ClearHash( for (hPtr = Tcl_FirstHashEntry(tablePtr, &search); hPtr != NULL; hPtr = Tcl_NextHashEntry(&search)) { - Tcl_Obj *objPtr = Tcl_GetHashValue(hPtr); + Tcl_Obj *objPtr = (Tcl_Obj *)Tcl_GetHashValue(hPtr); Tcl_DecrRefCount(objPtr); Tcl_DeleteHashEntry(hPtr); @@ -4116,10 +4115,10 @@ GetThreadHash( Tcl_ThreadDataKey *keyPtr) { Tcl_HashTable **tablePtrPtr = - Tcl_GetThreadData(keyPtr, sizeof(Tcl_HashTable *)); + (Tcl_HashTable **)Tcl_GetThreadData(keyPtr, sizeof(Tcl_HashTable *)); if (NULL == *tablePtrPtr) { - *tablePtrPtr = ckalloc(sizeof(Tcl_HashTable)); + *tablePtrPtr = (Tcl_HashTable *)ckalloc(sizeof(Tcl_HashTable)); Tcl_CreateThreadExitHandler(FreeThreadHash, *tablePtrPtr); Tcl_InitHashTable(*tablePtrPtr, TCL_ONE_WORD_KEYS); } @@ -4144,7 +4143,7 @@ static void FreeThreadHash( ClientData clientData) { - Tcl_HashTable *tablePtr = clientData; + Tcl_HashTable *tablePtr = (Tcl_HashTable *)clientData; ClearHash(tablePtr); Tcl_DeleteHashTable(tablePtr); @@ -4166,7 +4165,7 @@ static void FreeProcessGlobalValue( ClientData clientData) { - ProcessGlobalValue *pgvPtr = clientData; + ProcessGlobalValue *pgvPtr = (ProcessGlobalValue *)clientData; pgvPtr->epoch++; pgvPtr->numBytes = 0; @@ -4214,7 +4213,7 @@ TclSetProcessGlobalValue( Tcl_CreateExitHandler(FreeProcessGlobalValue, pgvPtr); } bytes = Tcl_GetStringFromObj(newValue, &pgvPtr->numBytes); - pgvPtr->value = ckalloc(pgvPtr->numBytes + 1); + pgvPtr->value = (char *)ckalloc(pgvPtr->numBytes + 1); memcpy(pgvPtr->value, bytes, pgvPtr->numBytes + 1); if (pgvPtr->encoding) { Tcl_FreeEncoding(pgvPtr->encoding); @@ -4278,7 +4277,7 @@ TclGetProcessGlobalValue( Tcl_DStringLength(&native), &newValue); Tcl_DStringFree(&native); ckfree(pgvPtr->value); - pgvPtr->value = ckalloc(Tcl_DStringLength(&newValue) + 1); + pgvPtr->value = (char *)ckalloc(Tcl_DStringLength(&newValue) + 1); memcpy(pgvPtr->value, Tcl_DStringValue(&newValue), Tcl_DStringLength(&newValue) + 1); Tcl_DStringFree(&newValue); @@ -4328,7 +4327,7 @@ TclGetProcessGlobalValue( Tcl_SetHashValue(hPtr, value); Tcl_IncrRefCount(value); } - return Tcl_GetHashValue(hPtr); + return (Tcl_Obj *)Tcl_GetHashValue(hPtr); } /* diff --git a/generic/tclZlib.c b/generic/tclZlib.c index ed29ff9..86fda86 100644 --- a/generic/tclZlib.c +++ b/generic/tclZlib.c @@ -593,7 +593,7 @@ SetInflateDictionary( int length; unsigned char *bytes = Tcl_GetByteArrayFromObj(compDictObj, &length); - return inflateSetDictionary(strm, bytes, (unsigned) length); + return inflateSetDictionary(strm, bytes, length); } return Z_OK; } @@ -607,7 +607,7 @@ SetDeflateDictionary( int length; unsigned char *bytes = Tcl_GetByteArrayFromObj(compDictObj, &length); - return deflateSetDictionary(strm, bytes, (unsigned) length); + return deflateSetDictionary(strm, bytes, length); } return Z_OK; } @@ -623,7 +623,7 @@ Deflate( int e; strm->next_out = (Bytef *) bufferPtr; - strm->avail_out = (unsigned) bufferSize; + strm->avail_out = bufferSize; e = deflate(strm, flush); if (writtenPtr != NULL) { *writtenPtr = bufferSize - strm->avail_out; @@ -697,7 +697,7 @@ Tcl_ZlibStreamInit( case TCL_ZLIB_FORMAT_GZIP: wbits = WBITS_GZIP; if (dictObj) { - gzHeaderPtr = ckalloc(sizeof(GzipHeader)); + gzHeaderPtr = (GzipHeader *)ckalloc(sizeof(GzipHeader)); memset(gzHeaderPtr, 0, sizeof(GzipHeader)); if (GenerateHeader(interp, dictObj, gzHeaderPtr, NULL) != TCL_OK) { @@ -731,7 +731,7 @@ Tcl_ZlibStreamInit( break; case TCL_ZLIB_FORMAT_GZIP: wbits = WBITS_GZIP; - gzHeaderPtr = ckalloc(sizeof(GzipHeader)); + gzHeaderPtr = (GzipHeader *)ckalloc(sizeof(GzipHeader)); memset(gzHeaderPtr, 0, sizeof(GzipHeader)); gzHeaderPtr->header.name = (Bytef *) gzHeaderPtr->nativeFilenameBuf; @@ -757,7 +757,7 @@ Tcl_ZlibStreamInit( " TCL_ZLIB_STREAM_INFLATE"); } - zshPtr = ckalloc(sizeof(ZlibStreamHandle)); + zshPtr = (ZlibStreamHandle *)ckalloc(sizeof(ZlibStreamHandle)); zshPtr->interp = interp; zshPtr->mode = mode; zshPtr->format = format; @@ -884,7 +884,7 @@ static void ZlibStreamCmdDelete( ClientData cd) { - ZlibStreamHandle *zshPtr = cd; + ZlibStreamHandle *zshPtr = (ZlibStreamHandle *)cd; zshPtr->cmd = NULL; ZlibStreamCleanup(zshPtr); @@ -1231,7 +1231,7 @@ Tcl_ZlibStreamPut( if (outSize > BUFFER_SIZE_LIMIT) { outSize = BUFFER_SIZE_LIMIT; } - dataTmp = ckalloc(outSize); + dataTmp = (char *)ckalloc(outSize); while (1) { e = Deflate(&zshPtr->stream, dataTmp, outSize, flush, &toStore); @@ -1265,7 +1265,7 @@ Tcl_ZlibStreamPut( if (outSize < BUFFER_SIZE_LIMIT) { outSize = BUFFER_SIZE_LIMIT; /* There may be *lots* of data left to output... */ - dataTmp = ckrealloc(dataTmp, outSize); + dataTmp = (char *)ckrealloc(dataTmp, outSize); } } @@ -1750,10 +1750,10 @@ Tcl_ZlibInflate( if (gzipHeaderDictObj) { headerPtr = &header; memset(headerPtr, 0, sizeof(gz_header)); - nameBuf = ckalloc(MAXPATHLEN); + nameBuf = (char *)ckalloc(MAXPATHLEN); header.name = (Bytef *) nameBuf; header.name_max = MAXPATHLEN - 1; - commentBuf = ckalloc(MAX_COMMENT_LEN); + commentBuf = (char *)ckalloc(MAX_COMMENT_LEN); header.comment = (Bytef *) commentBuf; header.comm_max = MAX_COMMENT_LEN - 1; } @@ -1858,7 +1858,7 @@ Tcl_ZlibInflate( if (headerPtr != NULL) { ExtractHeader(&header, gzipHeaderDictObj); SetValue(gzipHeaderDictObj, "size", - Tcl_NewLongObj((long) stream.total_out)); + Tcl_NewLongObj(stream.total_out)); ckfree(nameBuf); ckfree(commentBuf); } @@ -1894,7 +1894,7 @@ Tcl_ZlibCRC32( int len) { /* Nothing much to do, just wrap the crc32(). */ - return crc32(crc, (Bytef *) buf, (unsigned) len); + return crc32(crc, (Bytef *) buf, len); } unsigned int @@ -1903,7 +1903,7 @@ Tcl_ZlibAdler32( const unsigned char *buf, int len) { - return adler32(adler, (Bytef *) buf, (unsigned) len); + return adler32(adler, (Bytef *) buf, len); } /* @@ -2519,7 +2519,7 @@ ZlibStreamCmd( int objc, Tcl_Obj *const objv[]) { - Tcl_ZlibStream zstream = cd; + Tcl_ZlibStream zstream = (Tcl_ZlibStream)cd; int command, count, code; Tcl_Obj *obj; static const char *const cmds[] = { @@ -2645,7 +2645,7 @@ ZlibStreamAddCmd( int objc, Tcl_Obj *const objv[]) { - Tcl_ZlibStream zstream = cd; + Tcl_ZlibStream zstream = (Tcl_ZlibStream)cd; int index, code, buffersize = -1, flush = -1, i; Tcl_Obj *obj, *compDictObj = NULL; static const char *const add_options[] = { @@ -2769,7 +2769,7 @@ ZlibStreamPutCmd( int objc, Tcl_Obj *const objv[]) { - Tcl_ZlibStream zstream = cd; + Tcl_ZlibStream zstream = (Tcl_ZlibStream)cd; int index, flush = -1, i; Tcl_Obj *compDictObj = NULL; static const char *const put_options[] = { @@ -2858,7 +2858,7 @@ ZlibStreamHeaderCmd( int objc, Tcl_Obj *const objv[]) { - ZlibStreamHandle *zshPtr = cd; + ZlibStreamHandle *zshPtr = (ZlibStreamHandle *)cd; Tcl_Obj *resultObj; if (objc != 2) { @@ -2895,7 +2895,7 @@ ZlibTransformClose( ClientData instanceData, Tcl_Interp *interp) { - ZlibChannelData *cd = instanceData; + ZlibChannelData *cd = (ZlibChannelData *)instanceData; int e, written, result = TCL_OK; /* @@ -2988,7 +2988,7 @@ ZlibTransformInput( int toRead, int *errorCodePtr) { - ZlibChannelData *cd = instanceData; + ZlibChannelData *cd = (ZlibChannelData *)instanceData; Tcl_DriverInputProc *inProc = Tcl_ChannelInputProc(Tcl_GetChannelType(cd->parent)); int readBytes, gotBytes, copied; @@ -3027,7 +3027,7 @@ ZlibTransformInput( * reading over the border. */ - readBytes = Tcl_ReadRaw(cd->parent, cd->inBuffer, + readBytes = Tcl_ReadRaw(cd->parent, cd->inBuffer, cd->readAheadLimit <= cd->inAllocated ? cd->readAheadLimit : cd->inAllocated); @@ -3104,7 +3104,7 @@ ZlibTransformOutput( int toWrite, int *errorCodePtr) { - ZlibChannelData *cd = instanceData; + ZlibChannelData *cd = (ZlibChannelData *)instanceData; Tcl_DriverOutputProc *outProc = Tcl_ChannelOutputProc(Tcl_GetChannelType(cd->parent)); int e, produced; @@ -3225,7 +3225,7 @@ ZlibTransformSetOption( /* not used */ const char *optionName, const char *value) { - ZlibChannelData *cd = instanceData; + ZlibChannelData *cd = (ZlibChannelData *)instanceData; Tcl_DriverSetOptionProc *setOptionProc = Tcl_ChannelSetOptionProc(Tcl_GetChannelType(cd->parent)); static const char *compressChanOptions = "dictionary flush"; @@ -3338,7 +3338,7 @@ ZlibTransformGetOption( const char *optionName, Tcl_DString *dsPtr) { - ZlibChannelData *cd = instanceData; + ZlibChannelData *cd = (ZlibChannelData *)instanceData; Tcl_DriverGetOptionProc *getOptionProc = Tcl_ChannelGetOptionProc(Tcl_GetChannelType(cd->parent)); static const char *compressChanOptions = "checksum dictionary"; @@ -3456,7 +3456,7 @@ ZlibTransformWatch( ClientData instanceData, int mask) { - ZlibChannelData *cd = instanceData; + ZlibChannelData *cd = (ZlibChannelData *)instanceData; Tcl_DriverWatchProc *watchProc; /* @@ -3479,7 +3479,7 @@ ZlibTransformEventHandler( ClientData instanceData, int interestMask) { - ZlibChannelData *cd = instanceData; + ZlibChannelData *cd = (ZlibChannelData *)instanceData; ZlibTransformEventTimerKill(cd); return interestMask; @@ -3499,7 +3499,7 @@ static void ZlibTransformTimerRun( ClientData clientData) { - ZlibChannelData *cd = clientData; + ZlibChannelData *cd = (ZlibChannelData *)clientData; cd->timer = NULL; Tcl_NotifyChannel(cd->chan, TCL_READABLE); @@ -3522,7 +3522,7 @@ ZlibTransformGetHandle( int direction, ClientData *handlePtr) { - ZlibChannelData *cd = instanceData; + ZlibChannelData *cd = (ZlibChannelData *)instanceData; return Tcl_GetChannelHandle(cd->parent, direction, handlePtr); } @@ -3542,7 +3542,7 @@ ZlibTransformBlockMode( ClientData instanceData, int mode) { - ZlibChannelData *cd = instanceData; + ZlibChannelData *cd = (ZlibChannelData *)instanceData; if (mode == TCL_MODE_NONBLOCKING) { cd->flags |= ASYNC; @@ -3592,7 +3592,7 @@ ZlibStackChannelTransform( * dictionary (not dictObj!) to use if * necessary. */ { - ZlibChannelData *cd = ckalloc(sizeof(ZlibChannelData)); + ZlibChannelData *cd = (ZlibChannelData *)ckalloc(sizeof(ZlibChannelData)); Tcl_Channel chan; int wbits = 0; @@ -3652,7 +3652,7 @@ ZlibStackChannelTransform( goto error; } cd->inAllocated = DEFAULT_BUFFER_SIZE; - cd->inBuffer = ckalloc(cd->inAllocated); + cd->inBuffer = (char *)ckalloc(cd->inAllocated); if (cd->flags & IN_HEADER) { if (inflateGetHeader(&cd->inStream, &cd->inHeader.header) != Z_OK) { goto error; @@ -3669,7 +3669,7 @@ ZlibStackChannelTransform( goto error; } cd->outAllocated = DEFAULT_BUFFER_SIZE; - cd->outBuffer = ckalloc(cd->outAllocated); + cd->outBuffer = (char *)ckalloc(cd->outAllocated); if (cd->flags & OUT_HEADER) { if (deflateSetHeader(&cd->outStream, &cd->outHeader.header) != Z_OK) { goto error; diff --git a/win/tclWinPipe.c b/win/tclWinPipe.c index 69c38d4..04c371e 100644 --- a/win/tclWinPipe.c +++ b/win/tclWinPipe.c @@ -2752,7 +2752,6 @@ TclWinAddProcess( *---------------------------------------------------------------------- */ - /* ARGSUSED */ int Tcl_PidObjCmd( ClientData dummy, /* Not used. */ -- cgit v0.12 From effab20437479dde3420488467f7ab773743d849 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 20 Apr 2020 11:56:27 +0000 Subject: (cherry-pick): Proposed fix for [27944a3661]: Taming test utf-6.88. --- generic/tclUtf.c | 38 ++++++++++++-------------------------- tests/utf.test | 22 +++++++++++++++++----- 2 files changed, 29 insertions(+), 31 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 1ba474e..aa949ca 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -716,31 +716,11 @@ Tcl_UtfFindLast( * * Tcl_UtfNext -- * - * The aim of this routine is to provide a way to iterate forward - * through a UTF-8 string. The caller is expected to pass a non-NULL - * pointer argument /src/ which points to a location within a string. - * (*src) will be read, so /src/ must not point to an unreadable - * location past the end of the string. If /src/ points to the - * beginning of a complete, well-formed and valid UTF_8 byte sequence - * of no more than TCL_UTF_MAX bytes, Tcl_UtfNext returns the pointer - * just past the end of that sequence. In any other circumstance, - * Tcl_UtfNext returns /src/+1. - * - * Because this routine always returns a value > /src/, it is useful - * as a forward iterator that will always make progress. If the string - * is NUL-terminated, Tcl_UtfNext will not read beyond the terminating - * NUL character. If it is not NUL-terminated, the caller must make - * use of the companion routine Tcl_UtfCharComplete to test whether - * there is risk that Tcl_UtfNext will read beyond the end of the string. - * Tcl_UtfNext will never read more than TCL_UTF_MAX bytes. - * - * In a string where all characters are complete and properly formed, - * and /src/ points to the first byte of a character, repeated - * Tcl_UtfNext calls will step to the starting bytes of characters, one - * character at a time. Within those limitations, Tcl_UtfPrev and - * Tcl_UtfNext are inverses. If either condition cannot be met, - * Tcl_UtfPrev and Tcl_UtfNext may not function as inverses and the - * caller will have to take greater care. + * Given a pointer to some location in a UTF-8 string, Tcl_UtfNext + * returns a pointer to the next UTF-8 character in the string. + * The caller must not ask for the next character after the last + * character in the string if the string is not terminated by a null + * character. * * Results: * A pointer to the start of the next character in the string (or to @@ -760,13 +740,19 @@ Tcl_UtfNext( int left = totalBytes[byte]; const char *next = src + 1; + if (((*src) & 0xC0) == 0x80) { + if ((((*++src) & 0xC0) == 0x80) && (((*++src) & 0xC0) == 0x80)) { + ++src; + } + return src; + } + while (--left) { byte = *((unsigned char *) next); if ((byte & 0xC0) != 0x80) { /* * src points to non-trail byte; We ran out of trail bytes * before the needs of the lead byte were satisfied. - * Let the (malformed) lead byte alone be a character */ return src + 1; } diff --git a/tests/utf.test b/tests/utf.test index 0ba2b85..f56fabc 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -189,7 +189,7 @@ test utf-6.10 {Tcl_UtfNext} testutfnext { } 1 test utf-6.11 {Tcl_UtfNext} testutfnext { testutfnext \xA0\xA0 -} 1 +} 2 test utf-6.12 {Tcl_UtfNext} testutfnext { testutfnext \xA0\xD0 } 1 @@ -420,18 +420,30 @@ test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext} { } 4 test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} { testutfnext \xA0\xA0 -} 1 +} 2 test utf-6.88.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} { testutfnext \xE8\xA0\xA0 1 -} 2 +} 3 test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} { testutfnext \x80\x80 -} 1 +} 2 test utf-6.89.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} { testutfnext \xF0\x80\x80 1 -} 2 +} 3 testConstraint testutfprev [llength [info commands testutfprev]] +test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} testutfnext { + testutfnext \xA0\xA0\xA0 +} 3 +test utf-6.92.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} testutfnext { + testutfnext \xF2\xA0\xA0\xA0 1 +} 4 +test utf-6.93 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} testutfnext { + testutfnext \x80\x80\x80 +} 3 +test utf-6.93.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} testutfnext { + testutfnext \xF0\x80\x80\x80 1 +} 4 test utf-7.1 {Tcl_UtfPrev} testutfprev { testutfprev {} -- cgit v0.12 From 4f2621d9d59b2df9183fc4a90bb530dbccd18fc2 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 20 Apr 2020 15:20:55 +0000 Subject: Proposed fix for [c11e0c5ce4]: Regression in Tcl_UtfCharComplete. --- generic/tclUtf.c | 32 ++++++------- tests/utf.test | 136 ++++++++++++++++++++++++++++++++++++++++++++----------- 2 files changed, 123 insertions(+), 45 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index aa949ca..842744d 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -64,17 +64,6 @@ static const unsigned char totalBytes[256] = { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1 -}; - -static const unsigned char complete[256] = { - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, #if TCL_UTF_MAX > 4 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, @@ -84,7 +73,11 @@ static const unsigned char complete[256] = { #endif 2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, - 4,4,4,4,4, +#if TCL_UTF_MAX > 3 + 4,4,4,4,4, +#else + 1,1,1,1,1, +#endif 1,1,1,1,1,1,1,1,1,1,1 }; @@ -558,7 +551,7 @@ Tcl_UtfCharComplete( * a complete UTF-8 character. */ int length) /* Length of above string in bytes. */ { - return length >= complete[(unsigned char)*src]; + return length >= totalBytes[(unsigned char)*src]; } /* @@ -606,7 +599,7 @@ Tcl_NumUtfChars( src = next; } } else { - register const char *endPtr = src + length - /*TCL_UTF_MAX*/ 4; + register const char *endPtr = src + length - TCL_UTF_MAX; while (src < endPtr) { next = TclUtfNext(src); @@ -617,7 +610,7 @@ Tcl_NumUtfChars( #endif src = next; } - endPtr += /*TCL_UTF_MAX*/ 4; + endPtr += TCL_UTF_MAX; while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) { next = TclUtfNext(src); #if TCL_UTF_MAX > 4 @@ -895,15 +888,18 @@ Tcl_UtfPrev( /* Continue the search backwards... */ look--; - } while (trailBytesSeen < /* was TCL_UTF_MAX */ 4); + } while (trailBytesSeen < TCL_UTF_MAX); /* - * We've seen 4 (was TCL_UTF_MAX) trail bytes, so we know there will not be a + * We've seen TCL_UTF_MAX trail bytes, so we know there will not be a * properly formed byte sequence to find, and we can stop looking, * accepting the fallback. */ - +#if TCL_UTF_MAX < 4 + return src - TCL_UTF_MAX; +#else return fallback; +#endif } /* diff --git a/tests/utf.test b/tests/utf.test index f56fabc..3301dde 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -21,6 +21,7 @@ testConstraint testbytestring [llength [info commands testbytestring]] catch {unset x} # Some tests require support for 4-byte UTF-8 sequences +testConstraint smallutf [expr {[format %c 0x010000] == "\uFFFD"}] testConstraint fullutf [expr {[format %c 0x010000] != "\uFFFD"}] testConstraint tip389 [expr {[string length \U010000] == 2}] @@ -361,7 +362,10 @@ test utf-6.67 {Tcl_UtfNext} testutfnext { test utf-6.68 {Tcl_UtfNext} testutfnext { testutfnext \xF4\xA0\xA0G } 1 -test utf-6.69 {Tcl_UtfNext} testutfnext { +test utf-6.69 {Tcl_UtfNext} {testutfnext smallutf} { + testutfnext \xF4\xA0\xA0\xA0 +} 1 +test utf-6.69.1 {Tcl_UtfNext} {testutfnext fullutf} { testutfnext \xF4\xA0\xA0\xA0 } 4 test utf-6.70 {Tcl_UtfNext} testutfnext { @@ -376,22 +380,40 @@ test utf-6.71 {Tcl_UtfNext} testutfnext { test utf-6.73 {Tcl_UtfNext} testutfnext { testutfnext \xF4\xA0\xA0\xF8 } 1 -test utf-6.74 {Tcl_UtfNext} testutfnext { +test utf-6.74 {Tcl_UtfNext} {testutfnext smallutf} { + testutfnext \xF4\xA0\xA0\xA0G +} 1 +test utf-6.74.1 {Tcl_UtfNext} {testutfnext fullutf} { testutfnext \xF4\xA0\xA0\xA0G } 4 -test utf-6.75 {Tcl_UtfNext} testutfnext { +test utf-6.75 {Tcl_UtfNext} {testutfnext smallutf} { + testutfnext \xF4\xA0\xA0\xA0\xA0 +} 1 +test utf-6.75.1 {Tcl_UtfNext} {testutfnext fullutf} { testutfnext \xF4\xA0\xA0\xA0\xA0 } 4 -test utf-6.76 {Tcl_UtfNext} testutfnext { +test utf-6.76 {Tcl_UtfNext} {testutfnext smallutf} { + testutfnext \xF4\xA0\xA0\xA0\xD0 +} 1 +test utf-6.76.1 {Tcl_UtfNext} {testutfnext fullutf} { testutfnext \xF4\xA0\xA0\xA0\xD0 } 4 -test utf-6.77 {Tcl_UtfNext} testutfnext { +test utf-6.77 {Tcl_UtfNext} {testutfnext smallutf} { + testutfnext \xF4\xA0\xA0\xA0\xE8 +} 1 +test utf-6.77.1 {Tcl_UtfNext} {testutfnext fullutf} { testutfnext \xF4\xA0\xA0\xA0\xE8 } 4 -test utf-6.78 {Tcl_UtfNext} testutfnext { +test utf-6.78 {Tcl_UtfNext} {testutfnext smallutf} { + testutfnext \xF4\xA0\xA0\xA0\xF4 +} 1 +test utf-6.78.1 {Tcl_UtfNext} {testutfnext fullutf} { testutfnext \xF4\xA0\xA0\xA0\xF4 } 4 -test utf-6.79 {Tcl_UtfNext} testutfnext { +test utf-6.79 {Tcl_UtfNext} {testutfnext smallutf} { + testutfnext \xF4\xA0\xA0\xA0G\xF8 +} 1 +test utf-6.79.1 {Tcl_UtfNext} {testutfnext fullutf} { testutfnext \xF4\xA0\xA0\xA0G\xF8 } 4 test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext { @@ -415,7 +437,10 @@ test utf-6.85 {Tcl_UtfNext - overlong sequences} testutfnext { test utf-6.86 {Tcl_UtfNext - overlong sequences} testutfnext { testutfnext \xF0\x80\x80\x80 } 1 -test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext} { +test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext smallutf} { + testutfnext \xF0\x90\x80\x80 +} 1 +test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext fullutf} { testutfnext \xF0\x90\x80\x80 } 4 test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} { @@ -508,13 +533,22 @@ test utf-7.9.1 {Tcl_UtfPrev} testutfprev { test utf-7.9.2 {Tcl_UtfPrev} testutfprev { testutfprev A\xF8\xA0\xF8\xA0 3 } 2 -test utf-7.10 {Tcl_UtfPrev} testutfprev { +test utf-7.10 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xF4\xA0 +} 2 +test utf-7.10.1 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xF4\xA0\xA0\xA0 3 +} 2 +test utf-7.10.2 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xF4\xA0\xF8\xA0 3 +} 2 +test utf-7.10 {Tcl_UtfPrev} {testutfprev fullutf} { testutfprev A\xF4\xA0 } 1 -test utf-7.10.1 {Tcl_UtfPrev} testutfprev { +test utf-7.10.1 {Tcl_UtfPrev} {testutfprev fullutf} { testutfprev A\xF4\xA0\xA0\xA0 3 } 1 -test utf-7.10.2 {Tcl_UtfPrev} testutfprev { +test utf-7.10.2 {Tcl_UtfPrev} {testutfprev fullutf} { testutfprev A\xF4\xA0\xF8\xA0 3 } 1 test utf-7.11 {Tcl_UtfPrev} testutfprev { @@ -556,13 +590,22 @@ test utf-7.14.1 {Tcl_UtfPrev} testutfprev { test utf-7.14.2 {Tcl_UtfPrev} testutfprev { testutfprev A\xF8\xA0\xA0\xF8 4 } 3 -test utf-7.15 {Tcl_UtfPrev} testutfprev { +test utf-7.15 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xF4\xA0\xA0 +} 3 +test utf-7.15.1 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xF4\xA0\xA0\xA0 4 +} 3 +test utf-7.15.2 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xF4\xA0\xA0\xF8 4 +} 3 +test utf-7.15.3 {Tcl_UtfPrev} {testutfprev fullutf} { testutfprev A\xF4\xA0\xA0 } 1 -test utf-7.15.1 {Tcl_UtfPrev} testutfprev { +test utf-7.15.4 {Tcl_UtfPrev} {testutfprev fullutf} { testutfprev A\xF4\xA0\xA0\xA0 4 } 1 -test utf-7.15.2 {Tcl_UtfPrev} testutfprev { +test utf-7.15.5 {Tcl_UtfPrev} {testutfprev fullutf} { testutfprev A\xF4\xA0\xA0\xF8 4 } 1 test utf-7.16 {Tcl_UtfPrev} testutfprev { @@ -583,28 +626,52 @@ test utf-7.17.1 {Tcl_UtfPrev} testutfprev { test utf-7.17.2 {Tcl_UtfPrev} testutfprev { testutfprev A\xD0\xA0\xA0\xF8 4 } 3 -test utf-7.18 {Tcl_UtfPrev} testutfprev { +test utf-7.18 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xA0\xA0\xA0 +} 1 +test utf-7.18.1 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xA0\xA0\xA0\xA0 4 +} 1 +test utf-7.18.2 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xA0\xA0\xA0\xF8 4 +} 1 +test utf-7.18.3 {Tcl_UtfPrev} {testutfprev fullutf} { testutfprev A\xA0\xA0\xA0 } 3 -test utf-7.18.1 {Tcl_UtfPrev} testutfprev { +test utf-7.18.4 {Tcl_UtfPrev} {testutfprev fullutf} { testutfprev A\xA0\xA0\xA0\xA0 4 } 3 -test utf-7.18.2 {Tcl_UtfPrev} testutfprev { +test utf-7.18.5 {Tcl_UtfPrev} {testutfprev fullutf} { testutfprev A\xA0\xA0\xA0\xF8 4 } 3 -test utf-7.19 {Tcl_UtfPrev} testutfprev { +test utf-7.19 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xF8\xA0\xA0\xA0 +} 2 +test utf-7.19.1 {Tcl_UtfPrev} {testutfprev fullutf} { testutfprev A\xF8\xA0\xA0\xA0 } 4 -test utf-7.20 {Tcl_UtfPrev} testutfprev { +test utf-7.20 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xF4\xA0\xA0\xA0 +} 2 +test utf-7.20.1 {Tcl_UtfPrev} {testutfprev fullutf} { testutfprev A\xF4\xA0\xA0\xA0 } 1 -test utf-7.21 {Tcl_UtfPrev} testutfprev { +test utf-7.21 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xE8\xA0\xA0\xA0 +} 2 +test utf-7.21.1 {Tcl_UtfPrev} {testutfprev fullutf} { testutfprev A\xE8\xA0\xA0\xA0 } 4 -test utf-7.22 {Tcl_UtfPrev} testutfprev { +test utf-7.22 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xD0\xA0\xA0\xA0 +} 2 +test utf-7.22.1 {Tcl_UtfPrev} {testutfprev fullutf} { testutfprev A\xD0\xA0\xA0\xA0 } 4 -test utf-7.23 {Tcl_UtfPrev} testutfprev { +test utf-7.23 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xA0\xA0\xA0\xA0 +} 2 +test utf-7.23.1 {Tcl_UtfPrev} {testutfprev fullutf} { testutfprev A\xA0\xA0\xA0\xA0 } 4 test utf-7.24 {Tcl_UtfPrev -- overlong sequence} testutfprev { @@ -628,7 +695,10 @@ test utf-7.28 {Tcl_UtfPrev -- overlong sequence} testutfprev { test utf-7.28.1 {Tcl_UtfPrev -- overlong sequence} testutfprev { testutfprev A\xE0\x80\x80 2 } 1 -test utf-7.29 {Tcl_UtfPrev -- overlong sequence} testutfprev { +test utf-7.29 {Tcl_UtfPrev -- overlong sequence} {testutfprev smallutf} { + testutfprev A\xF0\x80\x80\x80 +} 2 +test utf-7.29.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev fullutf} { testutfprev A\xF0\x80\x80\x80 } 4 test utf-7.30 {Tcl_UtfPrev -- overlong sequence} testutfprev { @@ -658,13 +728,22 @@ test utf-7.37 {Tcl_UtfPrev -- overlong sequence} testutfprev { test utf-7.38 {Tcl_UtfPrev -- overlong sequence} testutfprev { testutfprev A\xE0\xA0\x80 2 } 1 -test utf-7.39 {Tcl_UtfPrev -- overlong sequence} {testutfprev} { +test utf-7.39 {Tcl_UtfPrev -- overlong sequence} {testutfprev smallutf} { + testutfprev A\xF0\x90\x80\x80 +} 2 +test utf-7.39.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev fullutf} { testutfprev A\xF0\x90\x80\x80 } 1 -test utf-7.40 {Tcl_UtfPrev -- overlong sequence} {testutfprev} { +test utf-7.40 {Tcl_UtfPrev -- overlong sequence} {testutfprev smallutf} { + testutfprev A\xF0\x90\x80\x80 4 +} 3 +test utf-7.40.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev fullutf} { testutfprev A\xF0\x90\x80\x80 4 } 1 -test utf-7.41 {Tcl_UtfPrev -- overlong sequence} {testutfprev} { +test utf-7.41 {Tcl_UtfPrev -- overlong sequence} {testutfprev smallutf} { + testutfprev A\xF0\x90\x80\x80 3 +} 2 +test utf-7.41.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev fullutf} { testutfprev A\xF0\x90\x80\x80 3 } 1 test utf-7.42 {Tcl_UtfPrev -- overlong sequence} testutfprev { @@ -679,7 +758,10 @@ test utf-7.44 {Tcl_UtfPrev -- no lead byte at start} testutfprev { test utf-7.45 {Tcl_UtfPrev -- no lead byte at start} testutfprev { testutfprev \xA0\xA0\xA0 } 2 -test utf-7.46 {Tcl_UtfPrev -- no lead byte at start} testutfprev { +test utf-7.46 {Tcl_UtfPrev -- no lead byte at start} {testutfprev smallutf} { + testutfprev \xA0\xA0\xA0\xA0 +} 1 +test utf-7.46 {Tcl_UtfPrev -- no lead byte at start} {testutfprev fullutf} { testutfprev \xA0\xA0\xA0\xA0 } 3 test utf-7.47 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {testutfprev} { -- cgit v0.12 From 059d5cc1803ee62dea1b7fd7e42e75453ce27264 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 20 Apr 2020 20:44:24 +0000 Subject: Add test-cases handling TclGetBytesFromObj() ( actually Tcl_UtfToUniChar too) --- tests/binary.test | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/tests/binary.test b/tests/binary.test index a777b2a..b06afe0 100644 --- a/tests/binary.test +++ b/tests/binary.test @@ -16,6 +16,7 @@ if {[lsearch [namespace children] ::tcltest] == -1} { } testConstraint bigEndian [expr {$tcl_platform(byteOrder) eq "bigEndian"}] testConstraint littleEndian [expr {$tcl_platform(byteOrder) eq "littleEndian"}] +testConstraint testbytestring [llength [info commands testbytestring]] # Big test for correct ordering of data in [expr] proc testIEEE {} { @@ -2941,7 +2942,19 @@ test binary-79.2 {Tcl_SetByteArrayLength} testsetbytearraylength { testsetbytearraylength [string cat \u0141 B C] 1 } A - +test binary-80.1 {TclGetBytesFromObj} -constraints testbytestring -returnCodes 1 -body { + testbytestring "\u4E4E" +} -result "expected byte sequence but character 0 was '\u4E4E' (U+004E4E)" +test binary-80.2 {TclGetBytesFromObj} -constraints testbytestring -returnCodes 1 -body { + testbytestring [testbytestring "\x00\xA0\xA0\xA0\xE4\xB9\x8E"] +} -result "expected byte sequence but character 4 was '\u4E4E' (U+004E4E)" +test binary-80.3 {TclGetBytesFromObj} -constraints testbytestring -returnCodes 1 -body { + testbytestring [testbytestring "\xC0\x80\xA0\xA0\xA0\xE4\xB9\x8E"] +} -result "expected byte sequence but character 4 was '\u4E4E' (U+004E4E)" +test binary-80.4 {TclGetBytesFromObj} -constraints testbytestring -returnCodes 1 -body { + testbytestring [testbytestring "\xC0\x80\xA0\xA0\xA0\xF0\x9F\x98\x81"] +} -result "expected byte sequence but character 4 was '\U01F601' (U+01F601)" + # ---------------------------------------------------------------------- # cleanup -- cgit v0.12 From 206022e9799361a82f91780bace269e514fb27bf Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 20 Apr 2020 21:54:31 +0000 Subject: Teach Tcl_UtfPrev() that 0xC1 is _always_ an invallid byte. Test-case utf-7.34. Make sure that Tcl_UtfCharComplete(src, TCL_UTF_MAX) always returns 1, for whatever bytes, since that's the maximum number of bytes Tcl_UtfToUniChar() can read in a single call. --- generic/tclUtf.c | 20 ++++++++++++++------ tests/utf.test | 2 +- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 2a04414..c018472 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -64,12 +64,14 @@ static const unsigned char totalBytes[256] = { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +/* Tcl_UtfCharComplete() might point to 2nd byte of valid 4-byte sequence */ + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +/* End of "continuation byte section" */ + 2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1 }; - + static const unsigned char complete[256] = { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, @@ -79,8 +81,14 @@ static const unsigned char complete[256] = { 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /* End of "continuation byte section" */ - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1 + 2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +#if TCL_UTF_MAX > 3 + 4,4,4,4,4, +#else + 1,1,1,1,1, +#endif + 1,1,1,1,1,1,1,1,1,1,1 }; /* diff --git a/tests/utf.test b/tests/utf.test index 150e395..a12cc73 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -635,7 +635,7 @@ test utf-7.33 {Tcl_UtfPrev -- overlong sequence} testutfprev { } 1 test utf-7.34 {Tcl_UtfPrev -- overlong sequence} testutfprev { testutfprev A\xC1\x80 -} 1 +} 2 test utf-7.35 {Tcl_UtfPrev -- overlong sequence} testutfprev { testutfprev A\xC2\x80 } 1 -- cgit v0.12 From 58cf4db1ccb0602f9bd023ecc4e56830aea2453a Mon Sep 17 00:00:00 2001 From: dgp Date: Mon, 20 Apr 2020 22:35:39 +0000 Subject: Tie together the TCL_UTF_MAX=4 and TCL_UTF_MAX=6 builds to mean the same thing on the 8.5 branch -- use internal UCS-4 storage. --- generic/regcustom.h | 2 +- generic/tcl.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/generic/regcustom.h b/generic/regcustom.h index 57a2d47..ac33087 100644 --- a/generic/regcustom.h +++ b/generic/regcustom.h @@ -97,7 +97,7 @@ typedef int celt; /* Type to hold chr, or NOCELT */ #define NOCELT (-1) /* Celt value which is not valid chr */ #define CHR(c) (UCHAR(c)) /* Turn char literal into chr literal */ #define DIGITVAL(c) ((c)-'0') /* Turn chr digit into its value */ -#if TCL_UTF_MAX > 4 +#if TCL_UTF_MAX > 3 #define CHRBITS 32 /* Bits in a chr; must not use sizeof */ #define CHR_MIN 0x00000000 /* Smallest and largest chr; the value */ #define CHR_MAX 0xffffffff /* CHR_MAX-CHR_MIN+1 should fit in uchr */ diff --git a/generic/tcl.h b/generic/tcl.h index 7378a8f..d7d064c 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -2148,7 +2148,7 @@ typedef struct Tcl_Parse { * reflected in regcustom.h. */ -#if TCL_UTF_MAX > 4 +#if TCL_UTF_MAX > 3 /* * unsigned int isn't 100% accurate as it should be a strict 4-byte value * (perhaps wchar_t). 64-bit systems may have troubles. The size of this -- cgit v0.12 From 4c9bc32c393e100ae9caf7f06e57c798f96ada6d Mon Sep 17 00:00:00 2001 From: dgp Date: Mon, 20 Apr 2020 23:00:30 +0000 Subject: Pair every compat85 test with a fullutf test so that we cover all variants. --- tests/utf.test | 137 ++++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 107 insertions(+), 30 deletions(-) diff --git a/tests/utf.test b/tests/utf.test index 1ca3647..1c79f32 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -13,7 +13,9 @@ if {[lsearch [namespace children] ::tcltest] == -1} { namespace import -force ::tcltest::* } -testConstraint compat85 [expr {[format %c 0x010000] == "\uFFFD"}] +testConstraint compat85 [expr {[format %c 0x010000] eq "\uFFFD"}] +testConstraint fullutf [expr {[format %c 0x010000] ne "\uFFFD"}] + testConstraint testbytestring [llength [info commands testbytestring]] testConstraint testfindfirst [llength [info commands testfindfirst]] testConstraint testfindlast [llength [info commands testfindlast]] @@ -64,12 +66,18 @@ test utf-2.6 {Tcl_UtfToUniChar: lead (3-byte) followed by 1 trail} testbytestrin test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} testbytestring { string length [testbytestring "\xE4\xB9\x8E"] } {1} -test utf-2.8 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring compat85} -body { +test utf-2.8.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring compat85} -body { string length [testbytestring "\xF0\x90\x80\x80"] } -result {4} -test utf-2.9 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring compat85} -body { +test utf-2.8.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring fullutf} -body { + string length [testbytestring "\xF0\x90\x80\x80"] +} -result {1} +test utf-2.9.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring compat85} -body { string length [testbytestring "\xF4\x8F\xBF\xBF"] } -result {4} +test utf-2.9.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring fullutf} -body { + string length [testbytestring "\xF4\x8F\xBF\xBF"] +} -result {1} test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} testbytestring { string length [testbytestring "\xF0\x8F\xBF\xBF"] } {4} @@ -118,9 +126,12 @@ test utf-4.10 {Tcl_NumUtfChars: #u0000, calc len, overcomplete} {testnumutfchars test utf-4.11 {Tcl_NumUtfChars: 3 bytes of 4-byte UTF-8 characater} {testnumutfchars testbytestring} { testnumutfchars [testbytestring \xF0\x9F\x92\xA9] 3 } {3} -test utf-4.12 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring compat85} { +test utf-4.12.0 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring compat85} { testnumutfchars [testbytestring \xF0\x9F\x92\xA9] 4 } {4} +test utf-4.12.1 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring fullutf} { + testnumutfchars [testbytestring \xF0\x9F\x92\xA9] 4 +} {1} test utf-5.1 {Tcl_UtfFindFirst} {testfindfirst testbytestring} { testfindfirst [testbytestring "abcbc"] 98 @@ -335,9 +346,12 @@ test utf-6.67 {Tcl_UtfNext} testutfnext { test utf-6.68 {Tcl_UtfNext} testutfnext { testutfnext \xF2\xA0\xA0G } 1 -test utf-6.69 {Tcl_UtfNext} {testutfnext compat85} { +test utf-6.69.0 {Tcl_UtfNext} {testutfnext compat85} { testutfnext \xF2\xA0\xA0\xA0 } 1 +test utf-6.69.1 {Tcl_UtfNext} {testutfnext fullutf} { + testutfnext \xF2\xA0\xA0\xA0 +} 4 test utf-6.70 {Tcl_UtfNext} testutfnext { testutfnext \xF2\xA0\xA0\xD0 } 1 @@ -350,24 +364,42 @@ test utf-6.71 {Tcl_UtfNext} testutfnext { test utf-6.73 {Tcl_UtfNext} testutfnext { testutfnext \xF2\xA0\xA0\xF8 } 1 -test utf-6.74 {Tcl_UtfNext} {testutfnext compat85} { +test utf-6.74.0 {Tcl_UtfNext} {testutfnext compat85} { testutfnext \xF2\xA0\xA0\xA0G } 1 -test utf-6.75 {Tcl_UtfNext} {testutfnext compat85} { +test utf-6.74.1 {Tcl_UtfNext} {testutfnext fullutf} { + testutfnext \xF2\xA0\xA0\xA0G +} 4 +test utf-6.75.0 {Tcl_UtfNext} {testutfnext compat85} { testutfnext \xF2\xA0\xA0\xA0\xA0 } 1 -test utf-6.76 {Tcl_UtfNext} {testutfnext compat85} { +test utf-6.75.1 {Tcl_UtfNext} {testutfnext fullutf} { + testutfnext \xF2\xA0\xA0\xA0\xA0 +} 4 +test utf-6.76.0 {Tcl_UtfNext} {testutfnext compat85} { testutfnext \xF2\xA0\xA0\xA0\xD0 } 1 -test utf-6.77 {Tcl_UtfNext} {testutfnext compat85} { +test utf-6.76.1 {Tcl_UtfNext} {testutfnext fullutf} { + testutfnext \xF2\xA0\xA0\xA0\xD0 +} 4 +test utf-6.77.0 {Tcl_UtfNext} {testutfnext compat85} { testutfnext \xF2\xA0\xA0\xA0\xE8 } 1 -test utf-6.78 {Tcl_UtfNext} {testutfnext compat85} { +test utf-6.77.1 {Tcl_UtfNext} {testutfnext fullutf} { + testutfnext \xF2\xA0\xA0\xA0\xE8 +} 4 +test utf-6.78.0 {Tcl_UtfNext} {testutfnext compat85} { testutfnext \xF2\xA0\xA0\xA0\xF2 } 1 -test utf-6.79 {Tcl_UtfNext} {testutfnext compat85} { +test utf-6.78.1 {Tcl_UtfNext} {testutfnext fullutf} { + testutfnext \xF2\xA0\xA0\xA0\xF2 +} 4 +test utf-6.79.0 {Tcl_UtfNext} {testutfnext compat85} { testutfnext \xF2\xA0\xA0\xA0G\xF8 } 1 +test utf-6.79.1 {Tcl_UtfNext} {testutfnext fullutf} { + testutfnext \xF2\xA0\xA0\xA0G\xF8 +} 4 test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext { testutfnext \xC0\x80 } 2 @@ -389,9 +421,12 @@ test utf-6.85 {Tcl_UtfNext - overlong sequences} testutfnext { test utf-6.86 {Tcl_UtfNext - overlong sequences} testutfnext { testutfnext \xF0\x80\x80\x80 } 1 -test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext compat85} { +test utf-6.87.0 {Tcl_UtfNext - overlong sequences} {testutfnext compat85} { testutfnext \xF0\x90\x80\x80 } 1 +test utf-6.87.1 {Tcl_UtfNext - overlong sequences} {testutfnext fullutf} { + testutfnext \xF0\x90\x80\x80 +} 4 test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} { testutfnext \xA0\xA0 } 1 @@ -404,9 +439,12 @@ test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {te test utf-6.89.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} { testutfnext \xF0\x80\x80 1 } 2 -test utf-6.90 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext compat85} { +test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext compat85} { testutfnext \xF4\x8F\xBF\xBF } 1 +test utf-6.90.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext fullutf} { + testutfnext \xF4\x8F\xBF\xBF +} 4 test utf-6.91 {Tcl_UtfNext, validity check [493dccc2de]} testutfnext { testutfnext \xF4\x90\x80\x80 } 1 @@ -474,15 +512,24 @@ test utf-7.9.1 {Tcl_UtfPrev} testutfprev { test utf-7.9.2 {Tcl_UtfPrev} testutfprev { testutfprev A\xF8\xA0\xF8\xA0 3 } 2 -test utf-7.10 {Tcl_UtfPrev} {testutfprev compat85} { +test utf-7.10.0 {Tcl_UtfPrev} {testutfprev compat85} { testutfprev A\xF2\xA0 } 2 -test utf-7.10.1 {Tcl_UtfPrev} {testutfprev compat85} { +test utf-7.10.1 {Tcl_UtfPrev} {testutfprev fullutf} { + testutfprev A\xF2\xA0 +} 1 +test utf-7.10.1.0 {Tcl_UtfPrev} {testutfprev compat85} { testutfprev A\xF2\xA0\xA0\xA0 3 } 2 -test utf-7.10.2 {Tcl_UtfPrev} {testutfprev compat85} { +test utf-7.10.1.1 {Tcl_UtfPrev} {testutfprev fullutf} { + testutfprev A\xF2\xA0\xA0\xA0 3 +} 1 +test utf-7.10.2.0 {Tcl_UtfPrev} {testutfprev compat85} { testutfprev A\xF2\xA0\xF8\xA0 3 } 2 +test utf-7.10.2.1 {Tcl_UtfPrev} {testutfprev fullutf} { + testutfprev A\xF2\xA0\xF8\xA0 3 +} 1 test utf-7.11 {Tcl_UtfPrev} testutfprev { testutfprev A\xE8\xA0 } 1 @@ -522,15 +569,24 @@ test utf-7.14.1 {Tcl_UtfPrev} testutfprev { test utf-7.14.2 {Tcl_UtfPrev} testutfprev { testutfprev A\xF8\xA0\xA0\xF8 4 } 3 -test utf-7.15 {Tcl_UtfPrev} {testutfprev compat85} { +test utf-7.15.0 {Tcl_UtfPrev} {testutfprev compat85} { testutfprev A\xF2\xA0\xA0 } 3 -test utf-7.15.1 {Tcl_UtfPrev} {testutfprev compat85} { +test utf-7.15.1 {Tcl_UtfPrev} {testutfprev fullutf} { + testutfprev A\xF2\xA0\xA0 +} 1 +test utf-7.15.1.0 {Tcl_UtfPrev} {testutfprev compat85} { testutfprev A\xF2\xA0\xA0\xA0 4 } 3 -test utf-7.15.2 {Tcl_UtfPrev} {testutfprev compat85} { +test utf-7.15.1.1 {Tcl_UtfPrev} {testutfprev fullutf} { + testutfprev A\xF2\xA0\xA0\xA0 4 +} 1 +test utf-7.15.2.0 {Tcl_UtfPrev} {testutfprev compat85} { testutfprev A\xF2\xA0\xA0\xF8 4 } 3 +test utf-7.15.2.1 {Tcl_UtfPrev} {testutfprev fullutf} { + testutfprev A\xF2\xA0\xA0\xF8 4 +} 1 test utf-7.16 {Tcl_UtfPrev} testutfprev { testutfprev A\xE8\xA0\xA0 } 1 @@ -561,9 +617,12 @@ test utf-7.18.2 {Tcl_UtfPrev} testutfprev { test utf-7.19 {Tcl_UtfPrev} testutfprev { testutfprev A\xF8\xA0\xA0\xA0 } 4 -test utf-7.20 {Tcl_UtfPrev} {testutfprev compat85} { +test utf-7.20.0 {Tcl_UtfPrev} {testutfprev compat85} { testutfprev A\xF2\xA0\xA0\xA0 } 4 +test utf-7.20.1 {Tcl_UtfPrev} {testutfprev fullutf} { + testutfprev A\xF2\xA0\xA0\xA0 +} 1 test utf-7.21 {Tcl_UtfPrev} testutfprev { testutfprev A\xE8\xA0\xA0\xA0 } 4 @@ -624,15 +683,24 @@ test utf-7.37 {Tcl_UtfPrev -- overlong sequence} testutfprev { test utf-7.38 {Tcl_UtfPrev -- overlong sequence} testutfprev { testutfprev A\xE0\xA0\x80 2 } 1 -test utf-7.39 {Tcl_UtfPrev -- overlong sequence} {testutfprev compat85} { +test utf-7.39.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev compat85} { testutfprev A\xF0\x90\x80\x80 } 4 -test utf-7.40 {Tcl_UtfPrev -- overlong sequence} {testutfprev compat85} { +test utf-7.39.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev fullutf} { + testutfprev A\xF0\x90\x80\x80 +} 1 +test utf-7.40.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev compat85} { testutfprev A\xF0\x90\x80\x80 4 } 3 -test utf-7.41 {Tcl_UtfPrev -- overlong sequence} {testutfprev compat85} { +test utf-7.40.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev fullutf} { + testutfprev A\xF0\x90\x80\x80 4 +} 1 +test utf-7.41.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev compat85} { testutfprev A\xF0\x90\x80\x80 3 } 2 +test utf-7.41.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev fullutf} { + testutfprev A\xF0\x90\x80\x80 3 +} 1 test utf-7.42 {Tcl_UtfPrev -- overlong sequence} testutfprev { testutfprev A\xF0\x90\x80\x80 2 } 1 @@ -657,15 +725,24 @@ test utf-7.47.1 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {te test utf-7.47.2 {Tcl_UtfPrev, pointing to 3th byte of 3-byte invalid sequence} {testutfprev} { testutfprev \xE8\xA0\x00 2 } 0 -test utf-7.48 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev compat85} { +test utf-7.48.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev compat85} { testutfprev A\xF4\x8F\xBF\xBF } 4 -test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev compat85} { +test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} { + testutfprev A\xF4\x8F\xBF\xBF +} 1 +test utf-7.48.1.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev compat85} { testutfprev A\xF4\x8F\xBF\xBF 4 } 3 -test utf-7.48.2 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev compat85} { +test utf-7.48.1.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} { + testutfprev A\xF4\x8F\xBF\xBF 4 +} 1 +test utf-7.48.2.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev compat85} { testutfprev A\xF4\x8F\xBF\xBF 3 } 2 +test utf-7.48.2.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} { + testutfprev A\xF4\x8F\xBF\xBF 3 +} 1 test utf-7.48.3 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { testutfprev A\xF4\x8F\xBF\xBF 2 } 1 @@ -700,10 +777,10 @@ test utf-8.5 {Tcl_UniCharAtIndex: high surrogate} { test utf-8.6 {Tcl_UniCharAtIndex: low surrogate} { string index \uDC42 0 } "\uDC42" -test utf-8.7 {Tcl_UniCharAtIndex: Emoji} compat85 { +test utf-8.7 {Tcl_UniCharAtIndex: Emoji} { string index \uD83D\uDE00 0 } "\uD83D" -test utf-8.8 {Tcl_UniCharAtIndex: Emoji} compat85 { +test utf-8.8 {Tcl_UniCharAtIndex: Emoji} { string index \uD83D\uDE00 1 } "\uDE00" @@ -713,10 +790,10 @@ test utf-9.1 {Tcl_UtfAtIndex: index = 0} { test utf-9.2 {Tcl_UtfAtIndex: index > 0} { string range \u4E4E\u25A\xFF\u543klmnop 1 5 } "\u25A\xFF\u543kl" -test utf-9.3 {Tcl_UtfAtIndex: index = 0, Emoji} compat85 { +test utf-9.3 {Tcl_UtfAtIndex: index = 0, Emoji} { string range \uD83D\uDE00G 0 0 } "\uD83D" -test utf-9.4 {Tcl_UtfAtIndex: index > 0, Emoji} compat85 { +test utf-9.4 {Tcl_UtfAtIndex: index > 0, Emoji} { string range \uD83D\uDE00G 1 1 } "\uDE00" -- cgit v0.12 From d8bc590eef94f9e9ec24150cf8208f38638290ef Mon Sep 17 00:00:00 2001 From: dgp Date: Tue, 21 Apr 2020 02:50:23 +0000 Subject: Revert the backport to tclEncoding.c that seems to redefine the "unicode" encoding to mean UTF-16. Don't want that behavior change in 8.5. --- generic/tclEncoding.c | 240 +++++++++++++++++++++++--------------------------- 1 file changed, 112 insertions(+), 128 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index da03055..5a9d2d5 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -83,7 +83,7 @@ typedef struct TableEncodingData { } TableEncodingData; /* - * Each of the following structures is the clientData for a dynamically-loaded + * The following structures is the clientData for a dynamically-loaded, * escape-driven encoding that is itself comprised of other simpler encodings. * An example is "iso-2022-jp", which uses escape sequences to switch between * ascii, jis0208, jis0212, gb2312, and ksc5601. Note that "escape-driven" @@ -117,8 +117,8 @@ typedef struct EscapeEncodingData { * 0. */ int numSubTables; /* Length of following array. */ EscapeSubTable subTables[1];/* Information about each EscapeSubTable used - * by this encoding type. The actual size is - * as large as necessary to hold all + * by this encoding type. The actual size will + * be as large as necessary to hold all * EscapeSubTables. */ } EscapeEncodingData; @@ -156,7 +156,7 @@ static ProcessGlobalValue encodingFileMap = { * A list of directories making up the "library path". Historically this * search path has served many uses, but the only one remaining is a base for * the encodingSearchPath above. If the application does not explicitly set - * the encodingSearchPath, then it is initialized by appending /encoding + * the encodingSearchPath, then it will be initialized by appending /encoding * to each directory in this "libraryPath". */ @@ -177,7 +177,7 @@ TCL_DECLARE_MUTEX(encodingMutex) /* * The following are used to hold the default and current system encodings. * If NULL is passed to one of the conversion routines, the current setting of - * the system encoding is used to perform the conversion. + * the system encoding will be used to perform the conversion. */ static Tcl_Encoding defaultEncoding; @@ -429,8 +429,9 @@ TclGetLibraryPath(void) * Keeps the per-thread copy of the library path current with changes to * the global copy. * - * Since the result of this routine is void, if searchPath is not a valid - * list this routine silently does nothing. + * NOTE: this routine returns void, so there's no way to report the error + * that searchPath is not a valid list. In that case, this routine will + * silently do nothing. * *---------------------------------------------------------------------- */ @@ -452,16 +453,17 @@ TclSetLibraryPath( * * FillEncodingFileMap -- * - * Called to update the encoding file map with the current value - * of the encoding search path. + * Called to bring the encoding file map in sync with the current value + * of the encoding search path. * - * Finds *.end files in the directories on the encoding search path and - * stores the found pathnames in a map associated with the encoding name. + * Scan the directories on the encoding search path, find the *.enc + * files, and store the found pathnames in a map associated with the + * encoding name. * - * If $dir is on the encoding search path and the file $dir/foo.enc is - * found, stores a "foo" -> $dir entry in the map. if the "foo" encoding - * is needed later, the $dir/foo.enc name can be quickly constructed in - * order to read the encoding data. + * In particular, if $dir is on the encoding search path, and the file + * $dir/foo.enc is found, then store a "foo" -> $dir entry in the map. + * Later, any need for the "foo" encoding will quickly * be able to + * construct the $dir/foo.enc pathname for reading the encoding data. * * Results: * None. @@ -542,24 +544,19 @@ void TclInitEncodingSubsystem(void) { Tcl_EncodingType type; - union { - char c; - short s; - } isLe; if (encodingsInitialized) { return; } - isLe.s = 1; Tcl_MutexLock(&encodingMutex); Tcl_InitHashTable(&encodingTable, TCL_STRING_KEYS); Tcl_MutexUnlock(&encodingMutex); /* - * Create a few initial encodings. UTF-8 to UTF-8 translation is not a - * no-op because it turns a stream of improperly formed UTF-8 into a - * properly formed stream. + * Create a few initial encodings. Note that the UTF-8 to UTF-8 + * translation is not a no-op, because it will turn a stream of improperly + * formed UTF-8 into a properly formed stream. */ type.encodingName = "identity"; @@ -586,7 +583,7 @@ TclInitEncodingSubsystem(void) type.fromUtfProc = UtfToUnicodeProc; type.freeProc = NULL; type.nullSize = 2; - type.clientData = INT2PTR(isLe.c); + type.clientData = NULL; Tcl_CreateEncoding(&type); /* @@ -758,7 +755,11 @@ Tcl_SetDefaultEncodingDir( * interp was NULL. * * Side effects: - * LoadEncodingFile is called if necessary. + * The new encoding type is entered into a table visible to all + * interpreters, keyed off the encoding's name. For each call to this + * function, there should eventually be a call to Tcl_FreeEncoding, so + * that the database can be cleaned up when encodings aren't needed + * anymore. * *------------------------------------------------------------------------- */ @@ -796,15 +797,15 @@ Tcl_GetEncoding( * * Tcl_FreeEncoding -- * - * Releases an encoding allocated by Tcl_CreateEncoding() or - * Tcl_GetEncoding(). + * This function is called to release an encoding allocated by + * Tcl_CreateEncoding() or Tcl_GetEncoding(). * * Results: * None. * * Side effects: * The reference count associated with the encoding is decremented and - * the encoding is deleted if nothing is using it anymore. + * the encoding may be deleted if nothing is using it anymore. * *--------------------------------------------------------------------------- */ @@ -823,14 +824,13 @@ Tcl_FreeEncoding( * * FreeEncoding -- * - * Decrements the reference count of an encoding. The caller must hold - * encodingMutes. + * This function is called to release an encoding by functions that + * already have the encodingMutex. * * Results: * None. * * Side effects: - * Releases the resource for an encoding if it is now unused. * The reference count associated with the encoding is decremented and * the encoding may be deleted if nothing is using it anymore. * @@ -850,17 +850,16 @@ FreeEncoding( if (encodingPtr->refCount<=0) { Tcl_Panic("FreeEncoding: refcount problem !!!"); } - if (encodingPtr->refCount-- <= 1) { + encodingPtr->refCount--; + if (encodingPtr->refCount == 0) { if (encodingPtr->freeProc != NULL) { (*encodingPtr->freeProc)(encodingPtr->clientData); } if (encodingPtr->hPtr != NULL) { Tcl_DeleteHashEntry(encodingPtr->hPtr); } - if (encodingPtr->name) { - ckfree((char *)encodingPtr->name); - } - ckfree((char *)encodingPtr); + ckfree((char *) encodingPtr->name); + ckfree((char *) encodingPtr); } } @@ -1021,22 +1020,23 @@ Tcl_SetSystemEncoding( * * Tcl_CreateEncoding -- * - * Defines a new encoding, along with the functions that are used to - * convert to and from Unicode. + * This function is called to define a new encoding and the functions + * that are used to convert between the specified encoding and Unicode. * * Results: * Returns a token that represents the encoding. If an encoding with the * same name already existed, the old encoding token remains valid and - * continues to behave as it used to, and is eventually garbage collected - * when the last reference to it goes away. Any subsequent calls to - * Tcl_GetEncoding with the specified name retrieve the most recent - * encoding token. + * continues to behave as it used to, and will eventually be garbage + * collected when the last reference to it goes away. Any subsequent + * calls to Tcl_GetEncoding with the specified name will retrieve the + * most recent encoding token. * * Side effects: - * A new record having the name of the encoding is entered into a table of - * encodings visible to all interpreters. For each call to this function, - * there should eventually be a call to Tcl_FreeEncoding, which cleans - * deletes the record in the table when an encoding is no longer needed. + * The new encoding type is entered into a table visible to all + * interpreters, keyed off the encoding's name. For each call to this + * function, there should eventually be a call to Tcl_FreeEncoding, so + * that the database can be cleaned up when encodings aren't needed + * anymore. * *--------------------------------------------------------------------------- */ @@ -1258,9 +1258,10 @@ Tcl_ExternalToUtf( * * Tcl_UtfToExternalDString -- * - * Convert a source buffer from UTF-8 to the specified encoding. If any + * Convert a source buffer from UTF-8 into the specified encoding. If any * of the bytes in the source buffer are invalid or cannot be represented - * in the target encoding, a default fallback character is substituted. + * in the target encoding, a default fallback character will be + * substituted. * * Results: * The converted bytes are stored in the DString, which is then NULL @@ -1569,13 +1570,13 @@ OpenEncodingFileChannel( * the data. * * Results: - * The return value is the newly loaded Tcl_Encoding or NULL if the file - * didn't exist or could not be processed. If NULL is returned and interp - * is not NULL, an error message is left in interp's result object. + * The return value is the newly loaded Encoding, or NULL if the file + * didn't exist of was in the incorrect format. If NULL was returned, an + * error message is left in interp's result object, unless interp was + * NULL. * * Side effects: - * A corresponding encoding file might be read from persistent storage, in - * which case LoadTableEncoding is called. + * File read from disk. * *--------------------------------------------------------------------------- */ @@ -1583,8 +1584,8 @@ OpenEncodingFileChannel( static Tcl_Encoding LoadEncodingFile( Tcl_Interp *interp, /* Interp for error reporting, if not NULL. */ - const char *name) /* The name of both the encoding file - * and the new encoding. */ + const char *name) /* The name of the encoding file on disk and + * also the name for new encoding. */ { Tcl_Channel chan = NULL; Tcl_Encoding encoding = NULL; @@ -1636,27 +1637,27 @@ LoadEncodingFile( * * LoadTableEncoding -- * - * Helper function for LoadEncodingFile(). Creates a Tcl_EncodingType - * structure along with its corresponding TableEncodingData structure, and - * passes it to Tcl_Createncoding. + * Helper function for LoadEncodingTable(). Loads a table to that + * converts between Unicode and some other encoding and creates an + * encoding (using a TableEncoding structure) from that information. * - * The file contains binary data but begins with a marker to indicate - * byte-ordering so a single binary file can be read on big or - * little-endian systems. + * File contains binary data, but begins with a marker to indicate + * byte-ordering, so that same binary file can be read on either endian + * platforms. * * Results: - * Returns the new Tcl_Encoding, or NULL if it could could - * not be created because the file contained invalid data. + * The return value is the new encoding, or NULL if the encoding could + * not be created (because the file contained invalid data). * * Side effects: - * See Tcl_CreateEncoding(). + * None. * *------------------------------------------------------------------------- */ static Tcl_Encoding LoadTableEncoding( - const char *name, /* Name of the new encoding. */ + const char *name, /* Name for new encoding. */ int type, /* Type of encoding (ENCODING_?????). */ Tcl_Channel chan) /* File containing new encoding. */ { @@ -1768,10 +1769,10 @@ LoadTableEncoding( } /* - * Invert the toUnicode array to produce the fromUnicode array. Performs a + * Invert toUnicode array to produce the fromUnicode array. Performs a * single malloc to get the memory for the array and all the pages needed - * by the array. While reading in the toUnicode array remember what - * pages are needed for the fromUnicode array. + * by the array. While reading in the toUnicode array, we remembered what + * pages that would be needed for the fromUnicode array. */ if (symbol) { @@ -1813,8 +1814,8 @@ LoadTableEncoding( if (type == ENCODING_MULTIBYTE) { /* * If multibyte encodings don't have a backslash character, define - * one. Otherwise, on Windows, native file names don't work because - * the backslash in the file name maps to the unknown character + * one. Otherwise, on Windows, native file names won't work because + * the backslash in the file name will map to the unknown character * (question mark) when converting from UTF-8 to external encoding. */ @@ -1828,13 +1829,13 @@ LoadTableEncoding( unsigned short *page; /* - * Make a special symbol encoding that maps each symbol character from - * its Unicode code point down into page 0, and also ensure that each - * characters on page 0 maps to itself so that a symbol font can be - * used to display a simple string like "abcd" and have alpha, beta, - * chi, delta show up, rather than have "unknown" chars show up because - * strictly speaking the symbol font doesn't have glyphs for those low - * ASCII chars. + * Make a special symbol encoding that not only maps the symbol + * characters from their Unicode code points down into page 0, but + * also ensure that the characters on page 0 map to themselves. This + * is so that a symbol font can be used to display a simple string + * like "abcd" and have alpha, beta, chi, delta show up, rather than + * have "unknown" chars show up because strictly speaking the symbol + * font doesn't have glyphs for those low ascii chars. */ page = dataPtr->fromUnicode[0]; @@ -1938,7 +1939,7 @@ LoadTableEncoding( static Tcl_Encoding LoadEscapeEncoding( - const char *name, /* Name of the new encoding. */ + const char *name, /* Name for new encoding. */ Tcl_Channel chan) /* File containing new encoding. */ { int i; @@ -2317,7 +2318,7 @@ UtfToUtfProc( * * UnicodeToUtfProc -- * - * Convert from UTF-16 to UTF-8. + * Convert from Unicode to UTF-8. * * Results: * Returns TCL_OK if conversion was successful. @@ -2330,7 +2331,7 @@ UtfToUtfProc( static int UnicodeToUtfProc( - ClientData clientData, /* != NULL means LE, == NUL means BE */ + ClientData clientData, /* Not used. */ const char *src, /* Source string in Unicode. */ int srcLen, /* Source string length in bytes. */ int flags, /* Conversion control flags. */ @@ -2358,19 +2359,13 @@ UnicodeToUtfProc( const char *srcStart, *srcEnd; char *dstEnd, *dstStart; int result, numChars; - unsigned short ch; + Tcl_UniChar ch; result = TCL_OK; - - /* check alignment with utf-16 (2 == sizeof(UTF-16)) */ - if ((srcLen % 2) != 0) { - result = TCL_CONVERT_MULTIBYTE; - srcLen--; - } - /* If last code point is a high surrogate, we cannot handle that yet */ - if ((srcLen >= 2) && ((src[srcLen - (clientData?1:2)] & 0xFC) == 0xD8)) { + if ((srcLen % sizeof(Tcl_UniChar)) != 0) { result = TCL_CONVERT_MULTIBYTE; - srcLen-= 2; + srcLen /= sizeof(Tcl_UniChar); + srcLen *= sizeof(Tcl_UniChar); } srcStart = src; @@ -2384,21 +2379,17 @@ UnicodeToUtfProc( result = TCL_CONVERT_NOSPACE; break; } - if (clientData) { - ch = (src[1] & 0xFF) << 8 | (src[0] & 0xFF); - } else { - ch = (src[0] & 0xFF) << 8 | (src[1] & 0xFF); - } /* - * Special case for 1-byte utf chars for speed. Make sure we work with - * unsigned short-size data. + * Special case for 1-byte utf chars for speed. Make sure we + * work with Tcl_UniChar-size data. */ + ch = *(Tcl_UniChar *)src; if (ch && ch < 0x80) { *dst++ = (ch & 0xFF); } else { dst += Tcl_UniCharToUtf(ch, dst); } - src += sizeof(unsigned short); + src += sizeof(Tcl_UniChar); } *srcReadPtr = src - srcStart; @@ -2412,7 +2403,7 @@ UnicodeToUtfProc( * * UtfToUnicodeProc -- * - * Convert from UTF-8 to UTF-16. + * Convert from UTF-8 to Unicode. * * Results: * Returns TCL_OK if conversion was successful. @@ -2425,7 +2416,8 @@ UnicodeToUtfProc( static int UtfToUnicodeProc( - ClientData clientData, /* != NULL means LE, == NUL means BE */ + ClientData clientData, /* TableEncodingData that specifies + * encoding. */ const char *src, /* Source string in UTF-8. */ int srcLen, /* Source string length in bytes. */ int flags, /* Conversion control flags. */ @@ -2452,7 +2444,7 @@ UtfToUnicodeProc( { const char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd; int result, numChars; - Tcl_UniChar ch = 0; + Tcl_UniChar ch; srcStart = src; srcEnd = src + srcLen; @@ -2484,37 +2476,27 @@ UtfToUnicodeProc( * Need to handle this in a way that won't cause misalignment * by casting dst to a Tcl_UniChar. [Bug 1122671] */ - if (clientData) { +#ifdef WORDS_BIGENDIAN #if TCL_UTF_MAX > 4 - if (ch <= 0xFFFF) { - *dst++ = (ch & 0xFF); - *dst++ = (ch >> 8); - } else { - *dst++ = (((ch - 0x10000) >> 10) & 0xFF); - *dst++ = (((ch - 0x10000) >> 18) & 0x3) | 0xD8; - *dst++ = (ch & 0xFF); - *dst++ = ((ch & 0x3) >> 8) | 0xDC; - } + *dst++ = (ch >> 24); + *dst++ = ((ch >> 16) & 0xFF); + *dst++ = ((ch >> 8) & 0xFF); + *dst++ = (ch & 0xFF); #else - *dst++ = (ch & 0xFF); - *dst++ = (ch >> 8); + *dst++ = (ch >> 8); + *dst++ = (ch & 0xFF); #endif - } else { +#else #if TCL_UTF_MAX > 4 - if (ch <= 0xFFFF) { - *dst++ = (ch >> 8); - *dst++ = (ch & 0xFF); - } else { - *dst++ = ((ch & 0x3) >> 8) | 0xDC; - *dst++ = (ch & 0xFF); - *dst++ = (((ch - 0x10000) >> 18) & 0x3) | 0xD8; - *dst++ = (((ch - 0x10000) >> 10) & 0xFF); - } + *dst++ = (ch & 0xFF); + *dst++ = ((ch >> 8) & 0xFF); + *dst++ = ((ch >> 16) & 0xFF); + *dst++ = (ch >> 24); #else - *dst++ = (ch >> 8); - *dst++ = (ch & 0xFF); + *dst++ = (ch & 0xFF); + *dst++ = (ch >> 8); +#endif #endif - } } *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; @@ -2917,6 +2899,7 @@ Iso88591FromUtfProc( result = TCL_CONVERT_UNKNOWN; break; } + /* * Plunge on, using '?' as a fallback character. */ @@ -3404,13 +3387,14 @@ EscapeFromUtfProc( * * EscapeFreeProc -- * - * Frees resources used by the encoding. + * This function is invoked when an EscapeEncodingData encoding is + * deleted. It deletes the memory used by the encoding. * * Results: * None. * * Side effects: - * Memory is freed. + * Memory freed. * *--------------------------------------------------------------------------- */ -- cgit v0.12 From 5ccd380c46e3e74f3273ecfa83b0686bca5e8056 Mon Sep 17 00:00:00 2001 From: dgp Date: Tue, 21 Apr 2020 02:57:41 +0000 Subject: We've settled on using (TCL_UTF_MAX > 3) to indicate 4-byte Tcl_UniChar. --- generic/tclEncoding.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 5a9d2d5..66bec44 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2477,7 +2477,7 @@ UtfToUnicodeProc( * by casting dst to a Tcl_UniChar. [Bug 1122671] */ #ifdef WORDS_BIGENDIAN -#if TCL_UTF_MAX > 4 +#if TCL_UTF_MAX > 3 *dst++ = (ch >> 24); *dst++ = ((ch >> 16) & 0xFF); *dst++ = ((ch >> 8) & 0xFF); @@ -2487,7 +2487,7 @@ UtfToUnicodeProc( *dst++ = (ch & 0xFF); #endif #else -#if TCL_UTF_MAX > 4 +#if TCL_UTF_MAX > 3 *dst++ = (ch & 0xFF); *dst++ = ((ch >> 8) & 0xFF); *dst++ = ((ch >> 16) & 0xFF); -- cgit v0.12 From 108ee434235c4aed8698d7624c7b7c8bce4dae55 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 21 Apr 2020 07:03:46 +0000 Subject: Add more test-cases for TCL_UTF_MAX>3 --- generic/tclUtf.c | 2 +- tests/utf.test | 341 ++++++++++++++++++++++++++++++++++--------------------- 2 files changed, 210 insertions(+), 133 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 842744d..35a98a1 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -74,7 +74,7 @@ static const unsigned char totalBytes[256] = { 2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, #if TCL_UTF_MAX > 3 - 4,4,4,4,4, + 4,4,4,4,4, #else 1,1,1,1,1, #endif diff --git a/tests/utf.test b/tests/utf.test index 3301dde..35772ae 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -16,15 +16,20 @@ if {[lsearch [namespace children] ::tcltest] == -1} { ::tcltest::loadTestedCommands catch [list package require -exact Tcltest [info patchlevel]] -testConstraint testbytestring [llength [info commands testbytestring]] - -catch {unset x} - -# Some tests require support for 4-byte UTF-8 sequences testConstraint smallutf [expr {[format %c 0x010000] == "\uFFFD"}] testConstraint fullutf [expr {[format %c 0x010000] != "\uFFFD"}] testConstraint tip389 [expr {[string length \U010000] == 2}] +testConstraint testbytestring [llength [info commands testbytestring]] +testConstraint testfindfirst [llength [info commands testfindfirst]] +testConstraint testfindlast [llength [info commands testfindlast]] +testConstraint testnumutfchars [llength [info commands testnumutfchars]] +testConstraint teststringobj [llength [info commands teststringobj]] +testConstraint testutfnext [llength [info commands testutfnext]] +testConstraint testutfprev [llength [info commands testutfprev]] + +catch {unset x} + test utf-1.1 {Tcl_UniCharToUtf: 1 byte sequences} testbytestring { expr {"\x01" eq [testbytestring "\x01"]} } 1 @@ -96,6 +101,7 @@ test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} t string length [testbytestring "\xF0\x8F\xBF\xBF"] } {4} test utf-2.11 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, overflow} {testbytestring knownBug} {# Doesn't work with any TCL_UTF_MAX value + # Would decode to U+110000 but that is outside the Unicode range. string length [testbytestring "\xF4\x90\x80\x80"] } {4} test utf-2.12 {Tcl_UtfToUniChar: longer UTF sequences not supported} testbytestring { @@ -105,10 +111,6 @@ test utf-2.12 {Tcl_UtfToUniChar: longer UTF sequences not supported} testbytestr test utf-3.1 {Tcl_UtfCharComplete} { } {} -testConstraint testnumutfchars [llength [info commands testnumutfchars]] -testConstraint testfindfirst [llength [info commands testfindfirst]] -testConstraint testfindlast [llength [info commands testfindlast]] - test utf-4.1 {Tcl_NumUtfChars: zero length} testnumutfchars { testnumutfchars "" } {0} @@ -116,7 +118,7 @@ test utf-4.2 {Tcl_NumUtfChars: length 1} {testnumutfchars testbytestring} { testnumutfchars [testbytestring "\xC2\xA2"] } {1} test utf-4.3 {Tcl_NumUtfChars: long string} {testnumutfchars testbytestring} { - testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\uA2\x4E"] + testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\xA2\x4E"] } {7} test utf-4.4 {Tcl_NumUtfChars: #u0000} {testnumutfchars testbytestring} { testnumutfchars [testbytestring "\xC0\x80"] @@ -125,13 +127,13 @@ test utf-4.5 {Tcl_NumUtfChars: zero length, calc len} testnumutfchars { testnumutfchars "" 0 } {0} test utf-4.6 {Tcl_NumUtfChars: length 1, calc len} {testnumutfchars testbytestring} { - testnumutfchars [testbytestring "\xC2\xA2"] 2 + testnumutfchars [testbytestring "\xC2\xA2"] 1 } {1} test utf-4.7 {Tcl_NumUtfChars: long string, calc len} {testnumutfchars testbytestring} { - testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\uA2\x4E"] 10 + testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\xA2\x4E"] 10 } {7} test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} {testnumutfchars testbytestring} { - testnumutfchars [testbytestring "\xC0\x80"] 2 + testnumutfchars [testbytestring "\xC0\x80"] 1 } {1} # Bug [2738427]: Tcl_NumUtfChars(...) no overflow check test utf-4.9 {Tcl_NumUtfChars: #u20AC, calc len, incomplete} {testnumutfchars testbytestring} { @@ -141,10 +143,13 @@ test utf-4.10 {Tcl_NumUtfChars: #u0000, calc len, overcomplete} {testnumutfchars testnumutfchars [testbytestring "\x00"] 2 } {2} test utf-4.11 {Tcl_NumUtfChars: 3 bytes of 4-byte UTF-8 characater} {testnumutfchars testbytestring} { - testnumutfchars [testbytestring \xf0\x9f\x92\xa9] 3 + testnumutfchars [testbytestring \xF0\x9F\x92\xA9] 3 } {3} -test utf-4.12 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring tip389} { - testnumutfchars [testbytestring \xf0\x9f\x92\xa9] 4 +test utf-4.12.0 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring smallutf} { + testnumutfchars [testbytestring \xF0\x9F\x92\xA9] 4 +} {4} +test utf-4.12.1 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring tip389} { + testnumutfchars [testbytestring \xF0\x9F\x92\xA9] 4 } {2} test utf-5.1 {Tcl_UtfFindFirst} {testfindfirst testbytestring} { @@ -154,8 +159,6 @@ test utf-5.2 {Tcl_UtfFindLast} {testfindlast testbytestring} { testfindlast [testbytestring "abcbc"] 98 } {bc} -testConstraint testutfnext [llength [info commands testutfnext]] - test utf-6.1 {Tcl_UtfNext} testutfnext { # This takes the pointer one past the terminating NUL. # This is really an invalid call. @@ -177,7 +180,7 @@ test utf-6.6 {Tcl_UtfNext} testutfnext { testutfnext A\xE8 } 1 test utf-6.7 {Tcl_UtfNext} testutfnext { - testutfnext A\xF4 + testutfnext A\xF2 } 1 test utf-6.8 {Tcl_UtfNext} testutfnext { testutfnext A\xF8 @@ -198,7 +201,7 @@ test utf-6.13 {Tcl_UtfNext} testutfnext { testutfnext \xA0\xE8 } 1 test utf-6.14 {Tcl_UtfNext} testutfnext { - testutfnext \xA0\xF4 + testutfnext \xA0\xF2 } 1 test utf-6.15 {Tcl_UtfNext} testutfnext { testutfnext \xA0\xF8 @@ -207,7 +210,7 @@ test utf-6.16 {Tcl_UtfNext} testutfnext { testutfnext \xD0 } 1 test utf-6.17 {Tcl_UtfNext} testutfnext { - testutfnext \xD0A + testutfnext \xD0G } 1 test utf-6.18 {Tcl_UtfNext} testutfnext { testutfnext \xD0\xA0 @@ -219,7 +222,7 @@ test utf-6.20 {Tcl_UtfNext} testutfnext { testutfnext \xD0\xE8 } 1 test utf-6.21 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xF4 + testutfnext \xD0\xF2 } 1 test utf-6.22 {Tcl_UtfNext} testutfnext { testutfnext \xD0\xF8 @@ -228,7 +231,7 @@ test utf-6.23 {Tcl_UtfNext} testutfnext { testutfnext \xE8 } 1 test utf-6.24 {Tcl_UtfNext} testutfnext { - testutfnext \xE8A + testutfnext \xE8G } 1 test utf-6.25 {Tcl_UtfNext} testutfnext { testutfnext \xE8\xA0 @@ -240,37 +243,37 @@ test utf-6.27 {Tcl_UtfNext} testutfnext { testutfnext \xE8\xE8 } 1 test utf-6.28 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xF4 + testutfnext \xE8\xF2 } 1 test utf-6.29 {Tcl_UtfNext} testutfnext { testutfnext \xE8\xF8 } 1 test utf-6.30 {Tcl_UtfNext} testutfnext { - testutfnext \xF4 + testutfnext \xF2 } 1 test utf-6.31 {Tcl_UtfNext} testutfnext { - testutfnext \xF4A + testutfnext \xF2G } 1 test utf-6.32 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0 + testutfnext \xF2\xA0 } 1 test utf-6.33 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xD0 + testutfnext \xF2\xD0 } 1 test utf-6.34 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xE8 + testutfnext \xF2\xE8 } 1 test utf-6.35 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xF4 + testutfnext \xF2\xF2 } 1 test utf-6.36 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xF8 + testutfnext \xF2\xF8 } 1 test utf-6.37 {Tcl_UtfNext} testutfnext { testutfnext \xF8 } 1 test utf-6.38 {Tcl_UtfNext} testutfnext { - testutfnext \xF8A + testutfnext \xF8G } 1 test utf-6.39 {Tcl_UtfNext} testutfnext { testutfnext \xF8\xA0 @@ -282,7 +285,7 @@ test utf-6.41 {Tcl_UtfNext} testutfnext { testutfnext \xF8\xE8 } 1 test utf-6.42 {Tcl_UtfNext} testutfnext { - testutfnext \xF8\xF4 + testutfnext \xF8\xF2 } 1 test utf-6.43 {Tcl_UtfNext} testutfnext { testutfnext \xF8\xF8 @@ -300,7 +303,7 @@ test utf-6.47 {Tcl_UtfNext} testutfnext { testutfnext \xD0\xA0\xE8 } 2 test utf-6.48 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xA0\xF4 + testutfnext \xD0\xA0\xF2 } 2 test utf-6.49 {Tcl_UtfNext} testutfnext { testutfnext \xD0\xA0\xF8 @@ -318,28 +321,28 @@ test utf-6.53 {Tcl_UtfNext} testutfnext { testutfnext \xE8\xA0\xE8 } 1 test utf-6.54 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xF4 + testutfnext \xE8\xA0\xF2 } 1 test utf-6.55 {Tcl_UtfNext} testutfnext { testutfnext \xE8\xA0\xF8 } 1 test utf-6.56 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0G + testutfnext \xF2\xA0G } 1 test utf-6.57 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0 + testutfnext \xF2\xA0\xA0 } 1 test utf-6.58 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xD0 + testutfnext \xF2\xA0\xD0 } 1 test utf-6.59 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xE8 + testutfnext \xF2\xA0\xE8 } 1 test utf-6.60 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xF4 + testutfnext \xF2\xA0\xF2 } 1 test utf-6.61 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xF8 + testutfnext \xF2\xA0\xF8 } 1 test utf-6.62 {Tcl_UtfNext} testutfnext { testutfnext \xE8\xA0\xA0G @@ -354,67 +357,67 @@ test utf-6.65 {Tcl_UtfNext} testutfnext { testutfnext \xE8\xA0\xA0\xE8 } 3 test utf-6.66 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xA0\xF4 + testutfnext \xE8\xA0\xA0\xF2 } 3 test utf-6.67 {Tcl_UtfNext} testutfnext { testutfnext \xE8\xA0\xA0\xF8 } 3 test utf-6.68 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0G + testutfnext \xF2\xA0\xA0G } 1 -test utf-6.69 {Tcl_UtfNext} {testutfnext smallutf} { - testutfnext \xF4\xA0\xA0\xA0 +test utf-6.69.0 {Tcl_UtfNext} {testutfnext smallutf} { + testutfnext \xF2\xA0\xA0\xA0 } 1 test utf-6.69.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext \xF4\xA0\xA0\xA0 + testutfnext \xF2\xA0\xA0\xA0 } 4 test utf-6.70 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0\xD0 + testutfnext \xF2\xA0\xA0\xD0 } 1 test utf-6.71 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0\xE8 + testutfnext \xF2\xA0\xA0\xE8 } 1 test utf-6.71 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0\xF4 + testutfnext \xF2\xA0\xA0\xF2 } 1 test utf-6.73 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0\xF8 + testutfnext \xF2\xA0\xA0\xF8 } 1 -test utf-6.74 {Tcl_UtfNext} {testutfnext smallutf} { - testutfnext \xF4\xA0\xA0\xA0G +test utf-6.74.0 {Tcl_UtfNext} {testutfnext smallutf} { + testutfnext \xF2\xA0\xA0\xA0G } 1 test utf-6.74.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext \xF4\xA0\xA0\xA0G + testutfnext \xF2\xA0\xA0\xA0G } 4 -test utf-6.75 {Tcl_UtfNext} {testutfnext smallutf} { - testutfnext \xF4\xA0\xA0\xA0\xA0 +test utf-6.75.0 {Tcl_UtfNext} {testutfnext smallutf} { + testutfnext \xF2\xA0\xA0\xA0\xA0 } 1 test utf-6.75.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext \xF4\xA0\xA0\xA0\xA0 + testutfnext \xF2\xA0\xA0\xA0\xA0 } 4 -test utf-6.76 {Tcl_UtfNext} {testutfnext smallutf} { - testutfnext \xF4\xA0\xA0\xA0\xD0 +test utf-6.76.0 {Tcl_UtfNext} {testutfnext smallutf} { + testutfnext \xF2\xA0\xA0\xA0\xD0 } 1 test utf-6.76.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext \xF4\xA0\xA0\xA0\xD0 + testutfnext \xF2\xA0\xA0\xA0\xD0 } 4 -test utf-6.77 {Tcl_UtfNext} {testutfnext smallutf} { - testutfnext \xF4\xA0\xA0\xA0\xE8 +test utf-6.77.0 {Tcl_UtfNext} {testutfnext smallutf} { + testutfnext \xF2\xA0\xA0\xA0\xE8 } 1 test utf-6.77.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext \xF4\xA0\xA0\xA0\xE8 + testutfnext \xF2\xA0\xA0\xA0\xE8 } 4 -test utf-6.78 {Tcl_UtfNext} {testutfnext smallutf} { - testutfnext \xF4\xA0\xA0\xA0\xF4 +test utf-6.78.0 {Tcl_UtfNext} {testutfnext smallutf} { + testutfnext \xF2\xA0\xA0\xA0\xF2 } 1 test utf-6.78.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext \xF4\xA0\xA0\xA0\xF4 + testutfnext \xF2\xA0\xA0\xA0\xF2 } 4 -test utf-6.79 {Tcl_UtfNext} {testutfnext smallutf} { - testutfnext \xF4\xA0\xA0\xA0G\xF8 +test utf-6.79.0 {Tcl_UtfNext} {testutfnext smallutf} { + testutfnext \xF2\xA0\xA0\xA0G\xF8 } 1 test utf-6.79.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext \xF4\xA0\xA0\xA0G\xF8 + testutfnext \xF2\xA0\xA0\xA0G\xF8 } 4 test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext { testutfnext \xC0\x80 @@ -437,10 +440,10 @@ test utf-6.85 {Tcl_UtfNext - overlong sequences} testutfnext { test utf-6.86 {Tcl_UtfNext - overlong sequences} testutfnext { testutfnext \xF0\x80\x80\x80 } 1 -test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext smallutf} { +test utf-6.87.0 {Tcl_UtfNext - overlong sequences} {testutfnext smallutf} { testutfnext \xF0\x90\x80\x80 } 1 -test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext fullutf} { +test utf-6.87.1 {Tcl_UtfNext - overlong sequences} {testutfnext fullutf} { testutfnext \xF0\x90\x80\x80 } 4 test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} { @@ -455,8 +458,18 @@ test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {te test utf-6.89.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} { testutfnext \xF0\x80\x80 1 } 3 - -testConstraint testutfprev [llength [info commands testutfprev]] +test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext smallutf} { + testutfnext \xF4\x8F\xBF\xBF +} 1 +test utf-6.90.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext fullutf} { + testutfnext \xF4\x8F\xBF\xBF +} 4 +test utf-6.91.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext smallutf} { + testutfnext \xF4\x90\x80\x80 +} 1 +test utf-6.91.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext fullutf} { + testutfnext \xF4\x90\x80\x80 +} 4 test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} testutfnext { testutfnext \xA0\xA0\xA0 } 3 @@ -489,13 +502,13 @@ test utf-7.4.2 {Tcl_UtfPrev} testutfprev { testutfprev A\xF8\xF8\xA0\xA0 2 } 1 test utf-7.5 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF4 + testutfprev A\xF2 } 1 test utf-7.5.1 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF4\xA0\xA0\xA0 2 + testutfprev A\xF2\xA0\xA0\xA0 2 } 1 test utf-7.5.2 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF4\xF8\xA0\xA0 2 + testutfprev A\xF2\xF8\xA0\xA0 2 } 1 test utf-7.6 {Tcl_UtfPrev} testutfprev { testutfprev A\xE8 @@ -533,23 +546,23 @@ test utf-7.9.1 {Tcl_UtfPrev} testutfprev { test utf-7.9.2 {Tcl_UtfPrev} testutfprev { testutfprev A\xF8\xA0\xF8\xA0 3 } 2 -test utf-7.10 {Tcl_UtfPrev} {testutfprev smallutf} { - testutfprev A\xF4\xA0 +test utf-7.10.0 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xF2\xA0 } 2 -test utf-7.10.1 {Tcl_UtfPrev} {testutfprev smallutf} { - testutfprev A\xF4\xA0\xA0\xA0 3 -} 2 -test utf-7.10.2 {Tcl_UtfPrev} {testutfprev smallutf} { - testutfprev A\xF4\xA0\xF8\xA0 3 -} 2 -test utf-7.10 {Tcl_UtfPrev} {testutfprev fullutf} { - testutfprev A\xF4\xA0 -} 1 test utf-7.10.1 {Tcl_UtfPrev} {testutfprev fullutf} { - testutfprev A\xF4\xA0\xA0\xA0 3 + testutfprev A\xF2\xA0 } 1 -test utf-7.10.2 {Tcl_UtfPrev} {testutfprev fullutf} { - testutfprev A\xF4\xA0\xF8\xA0 3 +test utf-7.10.1.0 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xF2\xA0\xA0\xA0 3 +} 2 +test utf-7.10.1.1 {Tcl_UtfPrev} {testutfprev fullutf} { + testutfprev A\xF2\xA0\xA0\xA0 3 +} 1 +test utf-7.10.2.0 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xF2\xA0\xF8\xA0 3 +} 2 +test utf-7.10.2.1 {Tcl_UtfPrev} {testutfprev fullutf} { + testutfprev A\xF2\xA0\xF8\xA0 3 } 1 test utf-7.11 {Tcl_UtfPrev} testutfprev { testutfprev A\xE8\xA0 @@ -590,23 +603,23 @@ test utf-7.14.1 {Tcl_UtfPrev} testutfprev { test utf-7.14.2 {Tcl_UtfPrev} testutfprev { testutfprev A\xF8\xA0\xA0\xF8 4 } 3 -test utf-7.15 {Tcl_UtfPrev} {testutfprev smallutf} { - testutfprev A\xF4\xA0\xA0 -} 3 -test utf-7.15.1 {Tcl_UtfPrev} {testutfprev smallutf} { - testutfprev A\xF4\xA0\xA0\xA0 4 +test utf-7.15.0 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xF2\xA0\xA0 } 3 -test utf-7.15.2 {Tcl_UtfPrev} {testutfprev smallutf} { - testutfprev A\xF4\xA0\xA0\xF8 4 -} 3 -test utf-7.15.3 {Tcl_UtfPrev} {testutfprev fullutf} { - testutfprev A\xF4\xA0\xA0 +test utf-7.15.1 {Tcl_UtfPrev} {testutfprev fullutf} { + testutfprev A\xF2\xA0\xA0 } 1 -test utf-7.15.4 {Tcl_UtfPrev} {testutfprev fullutf} { - testutfprev A\xF4\xA0\xA0\xA0 4 +test utf-7.15.1.0 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xF2\xA0\xA0\xA0 4 +} 3 +test utf-7.15.1.1 {Tcl_UtfPrev} {testutfprev fullutf} { + testutfprev A\xF2\xA0\xA0\xA0 4 } 1 -test utf-7.15.5 {Tcl_UtfPrev} {testutfprev fullutf} { - testutfprev A\xF4\xA0\xA0\xF8 4 +test utf-7.15.2.0 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xF2\xA0\xA0\xF8 4 +} 3 +test utf-7.15.2.1 {Tcl_UtfPrev} {testutfprev fullutf} { + testutfprev A\xF2\xA0\xA0\xF8 4 } 1 test utf-7.16 {Tcl_UtfPrev} testutfprev { testutfprev A\xE8\xA0\xA0 @@ -728,19 +741,19 @@ test utf-7.37 {Tcl_UtfPrev -- overlong sequence} testutfprev { test utf-7.38 {Tcl_UtfPrev -- overlong sequence} testutfprev { testutfprev A\xE0\xA0\x80 2 } 1 -test utf-7.39 {Tcl_UtfPrev -- overlong sequence} {testutfprev smallutf} { +test utf-7.39.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev smallutf} { testutfprev A\xF0\x90\x80\x80 } 2 test utf-7.39.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev fullutf} { testutfprev A\xF0\x90\x80\x80 } 1 -test utf-7.40 {Tcl_UtfPrev -- overlong sequence} {testutfprev smallutf} { +test utf-7.40.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev smallutf} { testutfprev A\xF0\x90\x80\x80 4 } 3 test utf-7.40.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev fullutf} { testutfprev A\xF0\x90\x80\x80 4 } 1 -test utf-7.41 {Tcl_UtfPrev -- overlong sequence} {testutfprev smallutf} { +test utf-7.41.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev smallutf} { testutfprev A\xF0\x90\x80\x80 3 } 2 test utf-7.41.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev fullutf} { @@ -773,6 +786,48 @@ test utf-7.47.1 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {te test utf-7.47.2 {Tcl_UtfPrev, pointing to 3th byte of 3-byte invalid sequence} {testutfprev} { testutfprev \xE8\xA0\x00 2 } 0 +test utf-7.48.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev smallutf} { + testutfprev A\xF4\x8F\xBF\xBF +} 2 +test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} { + testutfprev A\xF4\x8F\xBF\xBF +} 1 +test utf-7.48.1.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev smallutf} { + testutfprev A\xF4\x8F\xBF\xBF 4 +} 3 +test utf-7.48.1.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} { + testutfprev A\xF4\x8F\xBF\xBF 4 +} 1 +test utf-7.48.2.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev smallutf} { + testutfprev A\xF4\x8F\xBF\xBF 3 +} 2 +test utf-7.48.2.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} { + testutfprev A\xF4\x8F\xBF\xBF 3 +} 1 +test utf-7.48.3 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { + testutfprev A\xF4\x8F\xBF\xBF 2 +} 1 +test utf-7.49.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev smallutf} { + testutfprev A\xF4\x90\x80\x80 +} 2 +test utf-7.49.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} { + testutfprev A\xF4\x90\x80\x80 +} 1 +test utf-7.49.2 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev smallutf} { + testutfprev A\xF4\x90\x80\x80 4 +} 3 +test utf-7.49.3 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} { + testutfprev A\xF4\x90\x80\x80 4 +} 1 +test utf-7.49.4 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev smallutf} { + testutfprev A\xF4\x90\x80\x80 3 +} 2 +test utf-7.49.5 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} { + testutfprev A\xF4\x90\x80\x80 3 +} 1 +test utf-7.49.6 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { + testutfprev A\xF4\x90\x80\x80 2 +} 1 test utf-8.1 {Tcl_UniCharAtIndex: index = 0} { string index abcd 0 @@ -786,6 +841,18 @@ test utf-8.3 {Tcl_UniCharAtIndex: index > 0} { test utf-8.4 {Tcl_UniCharAtIndex: index > 0} { string index \u4E4E\u25A\xFF\u543 2 } "\uFF" +test utf-8.5 {Tcl_UniCharAtIndex: high surrogate} smallutf { + string index \uD842 0 +} "\uD842" +test utf-8.6 {Tcl_UniCharAtIndex: low surrogate} { + string index \uDC42 0 +} "\uDC42" +test utf-8.7 {Tcl_UniCharAtIndex: Emoji} smallutf { + string index \uD83D\uDE00 0 +} "\uD83D" +test utf-8.8 {Tcl_UniCharAtIndex: Emoji} { + string index \uD83D\uDE00 1 +} "\uDE00" test utf-9.1 {Tcl_UtfAtIndex: index = 0} { string range abcd 0 2 @@ -793,6 +860,12 @@ test utf-9.1 {Tcl_UtfAtIndex: index = 0} { test utf-9.2 {Tcl_UtfAtIndex: index > 0} { string range \u4E4E\u25A\xFF\u543klmnop 1 5 } "\u25A\xFF\u543kl" +test utf-9.3 {Tcl_UtfAtIndex: index = 0, Emoji} smallutf { + string range \uD83D\uDE00G 0 0 +} "\uD83D" +test utf-9.4 {Tcl_UtfAtIndex: index > 0, Emoji} smallutf { + string range \uD83D\uDE00G 1 1 +} "\uDE00" test utf-10.1 {Tcl_UtfBackslash: dst == NULL} { @@ -891,11 +964,11 @@ test utf-11.2 {Tcl_UtfToUpper} { string toupper abc } ABC test utf-11.3 {Tcl_UtfToUpper} { - string toupper \u00E3AB -} \u00C3AB + string toupper \xE3gh +} \xC3GH test utf-11.4 {Tcl_UtfToUpper} { - string toupper \u01E3AB -} \u01E2AB + string toupper \u01E3gh +} \u01E2GH test utf-11.5 {Tcl_UtfToUpper Georgian (new in Unicode 11)} { string toupper \u10D0\u1C90 } \u1C90\u1C90 @@ -907,14 +980,17 @@ test utf-12.2 {Tcl_UtfToLower} { string tolower ABC } abc test utf-12.3 {Tcl_UtfToLower} { - string tolower \u00C3AB -} \u00E3ab + string tolower \xC3GH +} \xE3gh test utf-12.4 {Tcl_UtfToLower} { - string tolower \u01E2AB -} \u01E3ab + string tolower \u01E2GH +} \u01E3gh test utf-12.5 {Tcl_UtfToLower Georgian (new in Unicode 11)} { string tolower \u10D0\u1C90 } \u10D0\u10D0 +test utf-12.6 {Tcl_UtfToUpper low/high surrogate)} smallutf { + string tolower \uDC24\uD824 +} \uDC24\uD824 test utf-13.1 {Tcl_UtfToTitle} { string totitle {} @@ -923,8 +999,8 @@ test utf-13.2 {Tcl_UtfToTitle} { string totitle abc } Abc test utf-13.3 {Tcl_UtfToTitle} { - string totitle \u00E3AB -} \u00C3ab + string totitle \xE3GH +} \xC3gh test utf-13.4 {Tcl_UtfToTitle} { string totitle \u01F3AB } \u01F2ab @@ -934,6 +1010,9 @@ test utf-13.5 {Tcl_UtfToTitle Georgian (new in Unicode 11)} { test utf-13.6 {Tcl_UtfToTitle Georgian (new in Unicode 11)} { string totitle \u1C90\u10D0 } \u1C90\u10D0 +test utf-13.7 {Tcl_UtfToTitle low/high surrogate)} smallutf { + string totitle \uDC24\uD824 +} \uDC24\uD824 test utf-14.1 {Tcl_UtfNcasecmp} { string compare -nocase a b @@ -952,7 +1031,7 @@ test utf-15.1 {Tcl_UniCharToUpper, negative delta} { string toupper aA } AA test utf-15.2 {Tcl_UniCharToUpper, positive delta} { - string toupper \u0178\u00FF + string toupper \u0178\xFF } \u0178\u0178 test utf-15.3 {Tcl_UniCharToUpper, no delta} { string toupper ! @@ -962,8 +1041,8 @@ test utf-16.1 {Tcl_UniCharToLower, negative delta} { string tolower aA } aa test utf-16.2 {Tcl_UniCharToLower, positive delta} { - string tolower \u0178\u00FF\uA78D\u01C5 -} \u00FF\u00FF\u0265\u01C6 + string tolower \u0178\xFF\uA78D\u01C5 +} \xFF\xFF\u0265\u01C6 test utf-17.1 {Tcl_UniCharToLower, no delta} { string tolower ! @@ -977,9 +1056,9 @@ test utf-18.2 {Tcl_UniCharToTitle, subtract one for title} { } \u01C5 test utf-18.3 {Tcl_UniCharToTitle, subtract delta for title (positive)} { string totitle \u017F -} \u0053 +} \x53 test utf-18.4 {Tcl_UniCharToTitle, subtract delta for title (negative)} { - string totitle \u00FF + string totitle \xFF } \u0178 test utf-18.5 {Tcl_UniCharToTitle, no delta} { string totitle ! @@ -1016,31 +1095,31 @@ test utf-21.5 {unicode graph char in regc_locale.c} { } {1} test utf-21.6 {TclUniCharIsGraph} { # [Bug 3464428] - string is graph \u00A0 + string is graph \xA0 } {0} test utf-21.7 {unicode graph char in regc_locale.c} { # [Bug 3464428] - regexp {[[:graph:]]} \u0020\u00A0\u2028\u2029 + regexp {[[:graph:]]} \x20\xA0\u2028\u2029 } {0} test utf-21.8 {TclUniCharIsPrint} { # [Bug 3464428] - string is print \u0009 + string is print \x09 } {0} test utf-21.9 {unicode print char in regc_locale.c} { # [Bug 3464428] - regexp {[[:print:]]} \u0009 + regexp {[[:print:]]} \x09 } {0} test utf-21.10 {unicode print char in regc_locale.c} { # [Bug 3464428] - regexp {[[:print:]]} \u0009 + regexp {[[:print:]]} \x09 } {0} test utf-21.11 {TclUniCharIsControl} { # [Bug 3464428] - string is control \u0000\u001F\u00AD\u0605\u061C\u180E\u2066\uFEFF + string is control \x00\x1F\xAD\u0605\u061C\u180E\u2066\uFEFF } {1} test utf-21.12 {unicode control char in regc_locale.c} { # [Bug 3464428], [Bug a876646efe] - regexp {^[[:cntrl:]]*$} \u0000\u001F\u00AD\u0605\u061C\u180E\u2066\uFEFF + regexp {^[[:cntrl:]]*$} \x00\x1F\xAD\u0605\u061C\u180E\u2066\uFEFF } {1} test utf-22.1 {TclUniCharIsWordChar} { @@ -1070,15 +1149,13 @@ test utf-24.2 {unicode digit char in regc_locale.c} { test utf-24.3 {TclUniCharIsSpace} { # this returns 1 with Unicode 7/TIP 413 compliance - string is space \u0085\u1680\u180E\u200B\u202F\u2060 + string is space \x85\u1680\u180E\u200B\u202F\u2060 } {1} test utf-24.4 {unicode space char in regc_locale.c} { # this returns 1 with Unicode 7/TIP 413 compliance - list [regexp {^[[:space:]]+$} \u0085\u1680\u180E\u200B\u202F\u2060] [regexp {^\s+$} \u0085\u1680\u180E\u200B\u202F\u2060] + list [regexp {^[[:space:]]+$} \x85\u1680\u180E\u200B\u202F\u2060] [regexp {^\s+$} \x85\u1680\u180E\u200B\u202F\u2060] } {1 1} -testConstraint teststringobj [llength [info commands teststringobj]] - test utf-25.1 {Tcl_UniCharNcasecmp} -constraints teststringobj \ -setup { testobj freeallvars -- cgit v0.12 From 92e4d3dffcb47d7c89fd04f7f55803f903bc16b8 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 21 Apr 2020 07:26:38 +0000 Subject: Wrong indent in comment --- generic/tclUtf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 6908985..8954f6b 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -863,7 +863,7 @@ Tcl_UtfFindLast( * Given a pointer to some location in a UTF-8 string, Tcl_UtfNext * returns a pointer to the next UTF-8 character in the string. * The caller must not ask for the next character after the last - * character in the string if the string is not terminated by a null + * character in the string if the string is not terminated by a null * character. * * Results: -- cgit v0.12 From e6d30d023d0709ee78fbe25603c9ee44017b3b5c Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 21 Apr 2020 09:49:19 +0000 Subject: More test cleanup --- generic/tclUtf.c | 4 ++-- tests/utf.test | 24 ++++++++++++++++++------ 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index c018472..d753321 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -563,7 +563,7 @@ Tcl_UtfToChar16( *chPtr = byte; return 1; } - + /* *--------------------------------------------------------------------------- * @@ -910,7 +910,7 @@ Tcl_UtfNext( * determine for certain in all circumstances whether the character * that begins with the returned pointer will or will not include * the byte src[-1]. In the scenario, where src points to the end of - * a buffer being filled, the returned pointer point to either the + * a buffer being filled, the returned pointer points to either the * final complete character in the string or to the earliest byte * that might start an incomplete character waiting for more bytes to * complete. diff --git a/tests/utf.test b/tests/utf.test index a12cc73..d4f45e9 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -140,10 +140,10 @@ test utf-4.10 {Tcl_NumUtfChars: #u0000, calc len, overcomplete} {testnumutfchars testnumutfchars [testbytestring "\x00"] end+1 } {2} test utf-4.11 {Tcl_NumUtfChars: 3 bytes of 4-byte UTF-8 characater} {testnumutfchars testbytestring} { - testnumutfchars [testbytestring \xf0\x9f\x92\xa9] end-1 + testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end-1 } {3} test utf-4.12 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring tip389} { - testnumutfchars [testbytestring \xf0\x9f\x92\xa9] end + testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end } {2} test utf-5.1 {Tcl_UtfFindFirst} {testfindfirst testbytestring} { @@ -246,7 +246,7 @@ test utf-6.30 {Tcl_UtfNext} testutfnext { testutfnext \xF2 } 1 test utf-6.31 {Tcl_UtfNext} testutfnext { - testutfnext \xF2A + testutfnext \xF2G } 1 test utf-6.32 {Tcl_UtfNext} testutfnext { testutfnext \xF2\xA0 @@ -369,7 +369,7 @@ test utf-6.71 {Tcl_UtfNext} testutfnext { testutfnext \xF2\xA0\xA0\xE8 } 1 test utf-6.71 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xA0\xA0\xF4 + testutfnext \xF2\xA0\xA0\xF2 } 1 test utf-6.73 {Tcl_UtfNext} testutfnext { testutfnext \xF2\xA0\xA0\xF8 @@ -434,6 +434,18 @@ test utf-6.90 {Tcl_UtfNext, validity check [493dccc2de]} testutfnext { test utf-6.91 {Tcl_UtfNext, validity check [493dccc2de]} testutfnext { testutfnext \xF4\x90\x80\x80 } 1 +test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} testutfnext { + testutfnext \xA0\xA0\xA0 +} 1 +test utf-6.92.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} testutfnext { + testutfnext \xF2\xA0\xA0\xA0 1 +} 2 +test utf-6.93 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} testutfnext { + testutfnext \x80\x80\x80 +} 1 +test utf-6.93.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} testutfnext { + testutfnext \xF0\x80\x80\x80 1 +} 2 test utf-7.1 {Tcl_UtfPrev} testutfprev { testutfprev {} @@ -861,8 +873,8 @@ test utf-12.3 {Tcl_UtfToLower} { string tolower \xC3GH } \xE3gh test utf-12.4 {Tcl_UtfToLower} { - string tolower \u01E2AB -} \u01E3ab + string tolower \u01E2GH +} \u01E3gh test utf-12.5 {Tcl_UtfToLower Georgian (new in Unicode 11)} { string tolower \u10D0\u1C90 } \u10D0\u10D0 -- cgit v0.12 From 4a16a0e6462d7095a904b8c016ac1937b4cc0bdf Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 21 Apr 2020 10:14:40 +0000 Subject: Fix corner-case in TIP #542 implementation: For extensions compiled with TCL_UTF_MAX=4 while Tcl is built with TCL_UTF_MAX=3, Tcl_UtfCharComplete() might give the wrong result. --- generic/tclDecls.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/generic/tclDecls.h b/generic/tclDecls.h index 890114a..4531be3 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -4178,4 +4178,10 @@ extern const TclStubs *tclStubsPtr; #define Tcl_Close(interp, chan) Tcl_CloseEx(interp, chan, 0) #endif +#if defined(USE_TCL_STUBS) && (TCL_UTF_MAX > 3) +# undef Tcl_UtfCharComplete +# define Tcl_UtfCharComplete(src, length) (((unsigned)((unsigned char)*(src) - 0xF0) < 5) \ + ? 4 : tclStubsPtr->tcl_UtfCharComplete((src), (length))) +#endif + #endif /* _TCLDECLS */ -- cgit v0.12 From 941ef44a3fce68b1dc81abb397a80f209b2ca982 Mon Sep 17 00:00:00 2001 From: dgp Date: Tue, 21 Apr 2020 14:52:48 +0000 Subject: Revert the other encoding system backport. The blocking and failing tests are illustrations of existing tickets [1004065] and [1122671], recording that the encoding machinery hardcodes assumptions in multiple places that sizeof(Tcl_UniChar) == 2. Closing the segfault bug fix should not be hostage to fixing those old bugs. --- generic/tclEncoding.c | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 66bec44..6c16827 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2470,33 +2470,20 @@ UtfToUnicodeProc( if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; break; - } + } src += TclUtfToUniChar(src, &ch); /* * Need to handle this in a way that won't cause misalignment * by casting dst to a Tcl_UniChar. [Bug 1122671] + * XXX: This hard-codes the assumed size of Tcl_UniChar as 2. */ #ifdef WORDS_BIGENDIAN -#if TCL_UTF_MAX > 3 - *dst++ = (ch >> 24); - *dst++ = ((ch >> 16) & 0xFF); - *dst++ = ((ch >> 8) & 0xFF); - *dst++ = (ch & 0xFF); -#else *dst++ = (ch >> 8); *dst++ = (ch & 0xFF); -#endif -#else -#if TCL_UTF_MAX > 3 - *dst++ = (ch & 0xFF); - *dst++ = ((ch >> 8) & 0xFF); - *dst++ = ((ch >> 16) & 0xFF); - *dst++ = (ch >> 24); #else *dst++ = (ch & 0xFF); *dst++ = (ch >> 8); #endif -#endif } *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; -- cgit v0.12 From d48bca33242b3f10d21a25a6c6a91c27ae707b96 Mon Sep 17 00:00:00 2001 From: dgp Date: Tue, 21 Apr 2020 15:41:01 +0000 Subject: Move testing command [testsize] from Windows to generic. Extend it to report sizeof(Tcl_UniChar). --- generic/tclTest.c | 31 +++++++++++++++++++++++++++++++ win/tclWinTest.c | 28 ---------------------------- 2 files changed, 31 insertions(+), 28 deletions(-) diff --git a/generic/tclTest.c b/generic/tclTest.c index 8c29aa7..b9fd204 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -281,6 +281,7 @@ static Tcl_CmdProc Testset2Cmd; static Tcl_CmdProc TestseterrorcodeCmd; static Tcl_ObjCmdProc TestsetobjerrorcodeCmd; static Tcl_CmdProc TestsetplatformCmd; +static Tcl_ObjCmdProc TestsizeCmd; static Tcl_CmdProc TeststaticpkgCmd; static Tcl_CmdProc TesttranslatefilenameCmd; static Tcl_CmdProc TestupvarCmd; @@ -592,6 +593,7 @@ Tcltest_Init( TestFindLastCmd, NULL, NULL); Tcl_CreateCommand(interp, "testsetplatform", TestsetplatformCmd, NULL, NULL); + Tcl_CreateObjCommand(interp, "testsize", TestsizeCmd, NULL, NULL); Tcl_CreateCommand(interp, "teststaticpkg", TeststaticpkgCmd, NULL, NULL); Tcl_CreateCommand(interp, "testtranslatefilename", @@ -4122,6 +4124,35 @@ TestsetplatformCmd( return TCL_OK; } +static int +TestsizeCmd( + ClientData clientData, /* Unused */ + Tcl_Interp* interp, /* Tcl interpreter */ + int objc, /* Parameter count */ + Tcl_Obj *const * objv) /* Parameter vector */ +{ + if (objc != 2) { + goto syntax; + } + if (strcmp(Tcl_GetString(objv[1]), "time_t") == 0) { + Tcl_SetObjResult(interp, Tcl_NewWideIntObj(sizeof(time_t))); + return TCL_OK; + } + if (strcmp(Tcl_GetString(objv[1]), "st_mtime") == 0) { + Tcl_StatBuf *statPtr; + Tcl_SetObjResult(interp, Tcl_NewWideIntObj(sizeof(statPtr->st_mtime))); + return TCL_OK; + } + if (strcmp(Tcl_GetString(objv[1]), "unichar") == 0) { + Tcl_SetObjResult(interp, Tcl_NewWideIntObj(sizeof(Tcl_UniChar))); + return TCL_OK; + } + +syntax: + Tcl_WrongNumArgs(interp, 1, objv, "time_t|st_mtime|unichar"); + return TCL_ERROR; +} + /* *---------------------------------------------------------------------- * diff --git a/win/tclWinTest.c b/win/tclWinTest.c index 04878fe..7f49b63 100644 --- a/win/tclWinTest.c +++ b/win/tclWinTest.c @@ -39,8 +39,6 @@ static int TestwinclockCmd(ClientData dummy, Tcl_Interp* interp, int objc, Tcl_Obj *const objv[]); static int TestwinsleepCmd(ClientData dummy, Tcl_Interp* interp, int objc, Tcl_Obj *const objv[]); -static int TestSizeCmd(ClientData dummy, Tcl_Interp* interp, - int objc, Tcl_Obj *const objv[]); static Tcl_ObjCmdProc TestExceptionCmd; static int TestplatformChmod(const char *nativePath, int pmode); static int TestchmodCmd(ClientData dummy, @@ -78,7 +76,6 @@ TclplatformtestInit( Tcl_CreateObjCommand(interp, "testwinclock", TestwinclockCmd, NULL, NULL); Tcl_CreateObjCommand(interp, "testwinsleep", TestwinsleepCmd, NULL, NULL); Tcl_CreateObjCommand(interp, "testexcept", TestExceptionCmd, NULL, NULL); - Tcl_CreateObjCommand(interp, "testsize", TestSizeCmd, NULL, NULL); return TCL_OK; } @@ -312,31 +309,6 @@ TestwinsleepCmd( return TCL_OK; } -static int -TestSizeCmd( - ClientData clientData, /* Unused */ - Tcl_Interp* interp, /* Tcl interpreter */ - int objc, /* Parameter count */ - Tcl_Obj *const * objv) /* Parameter vector */ -{ - if (objc != 2) { - goto syntax; - } - if (strcmp(Tcl_GetString(objv[1]), "time_t") == 0) { - Tcl_SetObjResult(interp, Tcl_NewWideIntObj(sizeof(time_t))); - return TCL_OK; - } - if (strcmp(Tcl_GetString(objv[1]), "st_mtime") == 0) { - Tcl_StatBuf *statPtr; - Tcl_SetObjResult(interp, Tcl_NewWideIntObj(sizeof(statPtr->st_mtime))); - return TCL_OK; - } - -syntax: - Tcl_WrongNumArgs(interp, 1, objv, "time_t|st_mtime"); - return TCL_ERROR; -} - /* *---------------------------------------------------------------------- * -- cgit v0.12 From 1588b8475d8a1378e6e9504e10913d756d84983b Mon Sep 17 00:00:00 2001 From: dgp Date: Tue, 21 Apr 2020 15:52:03 +0000 Subject: Use new testing command to constrain tests to (sizeof(Tcl_UniChar) == 2) until bugs are fixed when (sizeof(Tcl_UniChar == 4). --- tests/chanio.test | 4 +++- tests/encoding.test | 6 +++++- tests/io.test | 5 ++++- tests/source.test | 5 ++++- 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/tests/chanio.test b/tests/chanio.test index 5fae431..c2f561b 100644 --- a/tests/chanio.test +++ b/tests/chanio.test @@ -29,6 +29,8 @@ namespace eval ::tcl::test::io { variable msg variable expected + testConstraint ucs2 [expr { [llength [info commands testsize]] && + ([testsize unichar] == 2) }] testConstraint testchannel [llength [info commands testchannel]] testConstraint exec [llength [info commands exec]] testConstraint openpipe 1 @@ -875,7 +877,7 @@ test chan-io-6.44 {Tcl_GetsObj: input saw cr, not followed by cr} {stdio testcha chan close $f set x } [list "bbbbbbbbbbbbbbb" 15 "123456789abcdef" 1 4 "abcd" 0 3 "efg"] -test chan-io-6.45 {Tcl_GetsObj: input saw cr, skip right number of bytes} {stdio testchannel openpipe fileevent} { +test chan-io-6.45 {Tcl_GetsObj: input saw cr, skip right number of bytes} {stdio testchannel openpipe fileevent ucs2} { # Tcl_ExternalToUtf() set f [open "|[list [interpreter] $path(cat)]" w+] diff --git a/tests/encoding.test b/tests/encoding.test index 8722a93..ad55e26 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -32,6 +32,10 @@ proc runtests {} { testConstraint testencoding [llength [info commands testencoding]] testConstraint exec [llength [info commands exec]] +testConstraint ucs2 [expr { [llength [info commands testsize]] && + ([testsize unichar] == 2) }] + + # TclInitEncodingSubsystem is tested by the rest of this file # TclFinalizeEncodingSubsystem is not currently tested @@ -316,7 +320,7 @@ test encoding-15.3 {UtfToUtfProc null character input} { list [string bytelength $x] [string bytelength $y] $z } {1 2 c080} -test encoding-16.1 {UnicodeToUtfProc} { +test encoding-16.1 {UnicodeToUtfProc} ucs2 { set val [encoding convertfrom unicode NN] list $val [format %x [scan $val %c]] } "\u4e4e 4e4e" diff --git a/tests/io.test b/tests/io.test index 04fa1d2..1c18576 100644 --- a/tests/io.test +++ b/tests/io.test @@ -29,6 +29,9 @@ namespace eval ::tcl::test::io { variable msg variable expected +testConstraint ucs2 [expr { [llength [info commands testsize]] && + ([testsize unichar] == 2) }] + testConstraint testchannel [llength [info commands testchannel]] testConstraint exec [llength [info commands exec]] testConstraint openpipe 1 @@ -910,7 +913,7 @@ test io-6.44 {Tcl_GetsObj: input saw cr, not followed by cr} {stdio testchannel close $f set x } [list "bbbbbbbbbbbbbbb" 15 "123456789abcdef" 1 4 "abcd" 0 3 "efg"] -test io-6.45 {Tcl_GetsObj: input saw cr, skip right number of bytes} {stdio testchannel openpipe fileevent} { +test io-6.45 {Tcl_GetsObj: input saw cr, skip right number of bytes} {stdio testchannel openpipe fileevent ucs2} { # Tcl_ExternalToUtf() set f [open "|[list [interpreter] $path(cat)]" w+] diff --git a/tests/source.test b/tests/source.test index dc3c2d8..8511004 100644 --- a/tests/source.test +++ b/tests/source.test @@ -20,6 +20,9 @@ if {[catch {package require tcltest 2.1}]} { namespace eval ::tcl::test::source { namespace import ::tcltest::* +testConstraint ucs2 [expr { [llength [info commands testsize]] && + ([testsize unichar] == 2) }] + test source-1.1 {source command} -setup { set x "old x value" set y "old y value" @@ -232,7 +235,7 @@ test source-7.1 {source -encoding test} -setup { } -cleanup { removeFile source.file } -result correct -test source-7.2 {source -encoding test} -setup { +test source-7.2 {source -encoding test} -constraints ucs2 -setup { # This tests for bad interactions between [source -encoding] # and use of the Control-Z character (\u001A) as a cross-platform # EOF character by [source]. Here we write out and the [source] a -- cgit v0.12 From 4da0e252257e24143039784510363d066545be27 Mon Sep 17 00:00:00 2001 From: dgp Date: Tue, 21 Apr 2020 15:56:34 +0000 Subject: remove merge litter --- tests/util.test | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/util.test b/tests/util.test index a483de1..85c06dd 100644 --- a/tests/util.test +++ b/tests/util.test @@ -15,7 +15,6 @@ if {[lsearch [namespace children] ::tcltest] == -1} { testConstraint testdstring [llength [info commands testdstring]] testConstraint testconcatobj [llength [info commands testconcatobj]] testConstraint testdoubledigits [llength [info commands testdoubledigits]] -testConstraint compat85 [expr {[format %c 0x010000] == "\uFFFD"}] # Big test for correct ordering of data in [expr] -- cgit v0.12 From ce76e24a88d8c6c8abfd5da63402691c072e697b Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 21 Apr 2020 19:45:34 +0000 Subject: Improve the "testutfnext" command. It can now accept both bytes and strings, and it will test whether src[-1] is read without needing test-variations for it. --- generic/tclTest.c | 48 +++++++------ tests/utf.test | 208 +++++++++++++++++++++++++++--------------------------- 2 files changed, 131 insertions(+), 125 deletions(-) diff --git a/generic/tclTest.c b/generic/tclTest.c index b9fd204..7a531b4 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -7113,7 +7113,7 @@ SimpleListVolumes(void) /* * Used to check operations of Tcl_UtfNext. * - * Usage: testutfnext $bytes $offset + * Usage: testutfnext -bytestring $bytes */ static int @@ -7123,37 +7123,43 @@ TestUtfNextCmd( int objc, Tcl_Obj *const objv[]) { - int numBytes, offset = 0; + int numBytes; char *bytes; - const char *result; - Tcl_Obj *copy; + const char *result, *first; + char buffer[32]; + static const char tobetested[] = "\xFF\xFE\xF4\xF2\xF0\xEF\xE8\xE3\xE2\xE1\xE0\xC2\xC1\xC0\x82"; + const char *p = tobetested; + + if (objc != 3 || strcmp(Tcl_GetString(objv[1]), "-bytestring")) { + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, "?-bytestring? bytes"); + return TCL_ERROR; + } + bytes = Tcl_GetStringFromObj(objv[1], &numBytes); + } else { + bytes = (char *) Tcl_GetByteArrayFromObj(objv[2], &numBytes); + } - if (objc < 2 || objc > 3) { - Tcl_WrongNumArgs(interp, 1, objv, "bytes ?offset?"); + if (numBytes > sizeof(buffer)-2) { + Tcl_AppendResult(interp, "\"testutfnext\" can only handle 30 bytes", NULL); return TCL_ERROR; } - bytes = (char *) Tcl_GetByteArrayFromObj(objv[1], &numBytes); + memcpy(buffer + 1, bytes, numBytes); + buffer[0] = buffer[numBytes + 1] = '\x00'; - if (objc == 3) { - if (TCL_OK != TclGetIntForIndex(interp, objv[2], numBytes, &offset)) { + first = Tcl_UtfNext(buffer + 1); + while ((buffer[0] = *p++) != '\0') { + /* Run Tcl_UtfNext with many more possible bytes at src[-1], all should give the same result */ + result = Tcl_UtfNext(buffer + 1); + if (first != result) { + Tcl_AppendResult(interp, "Tcl_UtfNext is not supposed to read src[-1]", NULL); return TCL_ERROR; } - if (offset < 0) { - offset = 0; - } - if (offset > numBytes) { - offset = numBytes; - } } - copy = Tcl_DuplicateObj(objv[1]); - bytes = (char *) Tcl_SetByteArrayLength(copy, numBytes+1); - bytes[numBytes] = '\0'; - result = Tcl_UtfNext(bytes + offset); - Tcl_SetObjResult(interp, Tcl_NewIntObj(result - bytes)); + Tcl_SetObjResult(interp, Tcl_NewIntObj(result - buffer - 1)); - Tcl_DecrRefCount(copy); return TCL_OK; } /* diff --git a/tests/utf.test b/tests/utf.test index 1c79f32..0a81ae3 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -143,7 +143,7 @@ test utf-5.2 {Tcl_UtfFindLast} {testfindlast testbytestring} { test utf-6.1 {Tcl_UtfNext} testutfnext { # This takes the pointer one past the terminating NUL. # This is really an invalid call. - testutfnext {} + testutfnext -bytestring {} } 1 test utf-6.2 {Tcl_UtfNext} testutfnext { testutfnext A @@ -152,301 +152,301 @@ test utf-6.3 {Tcl_UtfNext} testutfnext { testutfnext AA } 1 test utf-6.4 {Tcl_UtfNext} testutfnext { - testutfnext A\xA0 + testutfnext -bytestring A\xA0 } 1 test utf-6.5 {Tcl_UtfNext} testutfnext { - testutfnext A\xD0 + testutfnext -bytestring A\xD0 } 1 test utf-6.6 {Tcl_UtfNext} testutfnext { - testutfnext A\xE8 + testutfnext -bytestring A\xE8 } 1 test utf-6.7 {Tcl_UtfNext} testutfnext { - testutfnext A\xF2 + testutfnext -bytestring A\xF2 } 1 test utf-6.8 {Tcl_UtfNext} testutfnext { - testutfnext A\xF8 + testutfnext -bytestring A\xF8 } 1 test utf-6.9 {Tcl_UtfNext} testutfnext { - testutfnext \xA0 + testutfnext -bytestring \xA0 } 1 test utf-6.10 {Tcl_UtfNext} testutfnext { - testutfnext \xA0G + testutfnext -bytestring \xA0G } 1 test utf-6.11 {Tcl_UtfNext} testutfnext { - testutfnext \xA0\xA0 + testutfnext -bytestring \xA0\xA0 } 1 test utf-6.12 {Tcl_UtfNext} testutfnext { - testutfnext \xA0\xD0 + testutfnext -bytestring \xA0\xD0 } 1 test utf-6.13 {Tcl_UtfNext} testutfnext { - testutfnext \xA0\xE8 + testutfnext -bytestring \xA0\xE8 } 1 test utf-6.14 {Tcl_UtfNext} testutfnext { - testutfnext \xA0\xF2 + testutfnext -bytestring \xA0\xF2 } 1 test utf-6.15 {Tcl_UtfNext} testutfnext { - testutfnext \xA0\xF8 + testutfnext -bytestring \xA0\xF8 } 1 test utf-6.16 {Tcl_UtfNext} testutfnext { - testutfnext \xD0 + testutfnext -bytestring \xD0 } 1 test utf-6.17 {Tcl_UtfNext} testutfnext { - testutfnext \xD0G + testutfnext -bytestring \xD0G } 1 test utf-6.18 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xA0 + testutfnext -bytestring \xD0\xA0 } 2 test utf-6.19 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xD0 + testutfnext -bytestring \xD0\xD0 } 1 test utf-6.20 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xE8 + testutfnext -bytestring \xD0\xE8 } 1 test utf-6.21 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xF2 + testutfnext -bytestring \xD0\xF2 } 1 test utf-6.22 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xF8 + testutfnext -bytestring \xD0\xF8 } 1 test utf-6.23 {Tcl_UtfNext} testutfnext { - testutfnext \xE8 + testutfnext -bytestring \xE8 } 1 test utf-6.24 {Tcl_UtfNext} testutfnext { - testutfnext \xE8G + testutfnext -bytestring \xE8G } 1 test utf-6.25 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0 + testutfnext -bytestring \xE8\xA0 } 1 test utf-6.26 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xD0 + testutfnext -bytestring \xE8\xD0 } 1 test utf-6.27 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xE8 + testutfnext -bytestring \xE8\xE8 } 1 test utf-6.28 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xF2 + testutfnext -bytestring \xE8\xF2 } 1 test utf-6.29 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xF8 + testutfnext -bytestring \xE8\xF8 } 1 test utf-6.30 {Tcl_UtfNext} testutfnext { - testutfnext \xF2 + testutfnext -bytestring \xF2 } 1 test utf-6.31 {Tcl_UtfNext} testutfnext { - testutfnext \xF2G + testutfnext -bytestring \xF2G } 1 test utf-6.32 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xA0 + testutfnext -bytestring \xF2\xA0 } 1 test utf-6.33 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xD0 + testutfnext -bytestring \xF2\xD0 } 1 test utf-6.34 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xE8 + testutfnext -bytestring \xF2\xE8 } 1 test utf-6.35 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xF2 + testutfnext -bytestring \xF2\xF2 } 1 test utf-6.36 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xF8 + testutfnext -bytestring \xF2\xF8 } 1 test utf-6.37 {Tcl_UtfNext} testutfnext { - testutfnext \xF8 + testutfnext -bytestring \xF8 } 1 test utf-6.38 {Tcl_UtfNext} testutfnext { - testutfnext \xF8G + testutfnext -bytestring \xF8G } 1 test utf-6.39 {Tcl_UtfNext} testutfnext { - testutfnext \xF8\xA0 + testutfnext -bytestring \xF8\xA0 } 1 test utf-6.40 {Tcl_UtfNext} testutfnext { - testutfnext \xF8\xD0 + testutfnext -bytestring \xF8\xD0 } 1 test utf-6.41 {Tcl_UtfNext} testutfnext { - testutfnext \xF8\xE8 + testutfnext -bytestring \xF8\xE8 } 1 test utf-6.42 {Tcl_UtfNext} testutfnext { - testutfnext \xF8\xF2 + testutfnext -bytestring \xF8\xF2 } 1 test utf-6.43 {Tcl_UtfNext} testutfnext { - testutfnext \xF8\xF8 + testutfnext -bytestring \xF8\xF8 } 1 test utf-6.44 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xA0G + testutfnext -bytestring \xD0\xA0G } 2 test utf-6.45 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xA0\xA0 + testutfnext -bytestring \xD0\xA0\xA0 } 2 test utf-6.46 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xA0\xD0 + testutfnext -bytestring \xD0\xA0\xD0 } 2 test utf-6.47 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xA0\xE8 + testutfnext -bytestring \xD0\xA0\xE8 } 2 test utf-6.48 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xA0\xF2 + testutfnext -bytestring \xD0\xA0\xF2 } 2 test utf-6.49 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xA0\xF8 + testutfnext -bytestring \xD0\xA0\xF8 } 2 test utf-6.50 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0G + testutfnext -bytestring \xE8\xA0G } 1 test utf-6.51 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xA0 + testutfnext -bytestring \xE8\xA0\xA0 } 3 test utf-6.52 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xD0 + testutfnext -bytestring \xE8\xA0\xD0 } 1 test utf-6.53 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xE8 + testutfnext -bytestring \xE8\xA0\xE8 } 1 test utf-6.54 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xF2 + testutfnext -bytestring \xE8\xA0\xF2 } 1 test utf-6.55 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xF8 + testutfnext -bytestring \xE8\xA0\xF8 } 1 test utf-6.56 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xA0G + testutfnext -bytestring \xF2\xA0G } 1 test utf-6.57 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xA0\xA0 + testutfnext -bytestring \xF2\xA0\xA0 } 1 test utf-6.58 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xA0\xD0 + testutfnext -bytestring \xF2\xA0\xD0 } 1 test utf-6.59 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xA0\xE8 + testutfnext -bytestring \xF2\xA0\xE8 } 1 test utf-6.60 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xA0\xF2 + testutfnext -bytestring \xF2\xA0\xF2 } 1 test utf-6.61 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xA0\xF8 + testutfnext -bytestring \xF2\xA0\xF8 } 1 test utf-6.62 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xA0G + testutfnext -bytestring \xE8\xA0\xA0G } 3 test utf-6.63 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xA0\xA0 + testutfnext -bytestring \xE8\xA0\xA0\xA0 } 3 test utf-6.64 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xA0\xD0 + testutfnext -bytestring \xE8\xA0\xA0\xD0 } 3 test utf-6.65 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xA0\xE8 + testutfnext -bytestring \xE8\xA0\xA0\xE8 } 3 test utf-6.66 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xA0\xF2 + testutfnext -bytestring \xE8\xA0\xA0\xF2 } 3 test utf-6.67 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xA0\xF8 + testutfnext -bytestring \xE8\xA0\xA0\xF8 } 3 test utf-6.68 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xA0\xA0G + testutfnext -bytestring \xF2\xA0\xA0G } 1 test utf-6.69.0 {Tcl_UtfNext} {testutfnext compat85} { - testutfnext \xF2\xA0\xA0\xA0 + testutfnext -bytestring \xF2\xA0\xA0\xA0 } 1 test utf-6.69.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext \xF2\xA0\xA0\xA0 + testutfnext -bytestring \xF2\xA0\xA0\xA0 } 4 test utf-6.70 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xA0\xA0\xD0 + testutfnext -bytestring \xF2\xA0\xA0\xD0 } 1 test utf-6.71 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xA0\xA0\xE8 + testutfnext -bytestring \xF2\xA0\xA0\xE8 } 1 test utf-6.71 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xA0\xA0\xF2 + testutfnext -bytestring \xF2\xA0\xA0\xF2 } 1 test utf-6.73 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xA0\xA0\xF8 + testutfnext -bytestring \xF2\xA0\xA0\xF8 } 1 test utf-6.74.0 {Tcl_UtfNext} {testutfnext compat85} { - testutfnext \xF2\xA0\xA0\xA0G + testutfnext -bytestring \xF2\xA0\xA0\xA0G } 1 test utf-6.74.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext \xF2\xA0\xA0\xA0G + testutfnext -bytestring \xF2\xA0\xA0\xA0G } 4 test utf-6.75.0 {Tcl_UtfNext} {testutfnext compat85} { - testutfnext \xF2\xA0\xA0\xA0\xA0 + testutfnext -bytestring \xF2\xA0\xA0\xA0\xA0 } 1 test utf-6.75.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext \xF2\xA0\xA0\xA0\xA0 + testutfnext -bytestring \xF2\xA0\xA0\xA0\xA0 } 4 test utf-6.76.0 {Tcl_UtfNext} {testutfnext compat85} { - testutfnext \xF2\xA0\xA0\xA0\xD0 + testutfnext -bytestring \xF2\xA0\xA0\xA0\xD0 } 1 test utf-6.76.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext \xF2\xA0\xA0\xA0\xD0 + testutfnext -bytestring \xF2\xA0\xA0\xA0\xD0 } 4 test utf-6.77.0 {Tcl_UtfNext} {testutfnext compat85} { - testutfnext \xF2\xA0\xA0\xA0\xE8 + testutfnext -bytestring \xF2\xA0\xA0\xA0\xE8 } 1 test utf-6.77.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext \xF2\xA0\xA0\xA0\xE8 + testutfnext -bytestring \xF2\xA0\xA0\xA0\xE8 } 4 test utf-6.78.0 {Tcl_UtfNext} {testutfnext compat85} { - testutfnext \xF2\xA0\xA0\xA0\xF2 + testutfnext -bytestring \xF2\xA0\xA0\xA0\xF2 } 1 test utf-6.78.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext \xF2\xA0\xA0\xA0\xF2 + testutfnext -bytestring \xF2\xA0\xA0\xA0\xF2 } 4 test utf-6.79.0 {Tcl_UtfNext} {testutfnext compat85} { - testutfnext \xF2\xA0\xA0\xA0G\xF8 + testutfnext -bytestring \xF2\xA0\xA0\xA0G\xF8 } 1 test utf-6.79.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext \xF2\xA0\xA0\xA0G\xF8 + testutfnext -bytestring \xF2\xA0\xA0\xA0G\xF8 } 4 test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext \xC0\x80 + testutfnext -bytestring \xC0\x80 } 2 test utf-6.81 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext \xC0\x81 + testutfnext -bytestring \xC0\x81 } 1 test utf-6.82 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext \xC1\x80 + testutfnext -bytestring \xC1\x80 } 1 test utf-6.83 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext \xC2\x80 + testutfnext -bytestring \xC2\x80 } 2 test utf-6.84 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext \xE0\x80\x80 + testutfnext -bytestring \xE0\x80\x80 } 1 test utf-6.85 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext \xE0\xA0\x80 + testutfnext -bytestring \xE0\xA0\x80 } 3 test utf-6.86 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext \xF0\x80\x80\x80 + testutfnext -bytestring \xF0\x80\x80\x80 } 1 test utf-6.87.0 {Tcl_UtfNext - overlong sequences} {testutfnext compat85} { - testutfnext \xF0\x90\x80\x80 + testutfnext -bytestring \xF0\x90\x80\x80 } 1 test utf-6.87.1 {Tcl_UtfNext - overlong sequences} {testutfnext fullutf} { - testutfnext \xF0\x90\x80\x80 + testutfnext -bytestring \xF0\x90\x80\x80 } 4 test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} { - testutfnext \xA0\xA0 + testutfnext -bytestring \xA0\xA0 } 1 -test utf-6.88.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} { - testutfnext \xE8\xA0\xA0 1 -} 2 test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} { - testutfnext \x80\x80 + testutfnext -bytestring \x80\x80 } 1 -test utf-6.89.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} { - testutfnext \xF0\x80\x80 1 -} 2 test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext compat85} { - testutfnext \xF4\x8F\xBF\xBF + testutfnext -bytestring \xF4\x8F\xBF\xBF } 1 test utf-6.90.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext fullutf} { - testutfnext \xF4\x8F\xBF\xBF + testutfnext -bytestring \xF4\x8F\xBF\xBF } 4 test utf-6.91 {Tcl_UtfNext, validity check [493dccc2de]} testutfnext { - testutfnext \xF4\x90\x80\x80 + testutfnext -bytestring \xF4\x90\x80\x80 +} 1 +test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} testutfnext { + testutfnext -bytestring \xA0\xA0\xA0 +} 1 +test utf-6.93 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} testutfnext { + testutfnext -bytestring \x80\x80\x80 } 1 test utf-7.1 {Tcl_UtfPrev} testutfprev { -- cgit v0.12 From 03d9b705d06f83bbad47974b284c87ea9dcb1960 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 21 Apr 2020 21:41:15 +0000 Subject: Improved "testutfnext" command --- generic/tclTest.c | 54 ++++++++------- tests/utf.test | 198 +++++++++++++++++++++++++----------------------------- 2 files changed, 123 insertions(+), 129 deletions(-) diff --git a/generic/tclTest.c b/generic/tclTest.c index 8c3f2cf..008fdb7 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -6811,7 +6811,7 @@ SimpleListVolumes(void) /* * Used to check operations of Tcl_UtfNext. * - * Usage: testutfnext $bytes $offset + * Usage: testutfnext -bytestring $bytes */ static int @@ -6821,40 +6821,46 @@ TestUtfNextCmd( int objc, Tcl_Obj *const objv[]) { - int numBytes, offset = 0; + int numBytes; char *bytes; - const char *result; - Tcl_Obj *copy; - - if (objc < 2 || objc > 3) { - Tcl_WrongNumArgs(interp, 1, objv, "bytes ?offset?"); - return TCL_ERROR; + const char *result, *first; + char buffer[32]; + static const char tobetested[] = "\xFF\xFE\xF4\xF2\xF0\xEF\xE8\xE3\xE2\xE1\xE0\xC2\xC1\xC0\x82"; + const char *p = tobetested; + + if (objc != 3 || strcmp(Tcl_GetString(objv[1]), "-bytestring")) { + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, "?-bytestring? bytes"); + return TCL_ERROR; + } + bytes = Tcl_GetStringFromObj(objv[1], &numBytes); + } else { + bytes = (char *) TclGetBytesFromObj(interp, objv[2], &numBytes); + if (bytes == NULL) { + return TCL_ERROR; + } } - bytes = (char *) TclGetBytesFromObj(interp, objv[1], &numBytes); - if (bytes == NULL) { + if (numBytes > (int)sizeof(buffer)-2) { + Tcl_AppendResult(interp, "\"testutfnext\" can only handle 30 bytes", NULL); return TCL_ERROR; } - if (objc == 3) { - if (TCL_OK != Tcl_GetIntForIndex(interp, objv[2], numBytes, &offset)) { + memcpy(buffer + 1, bytes, numBytes); + buffer[0] = buffer[numBytes + 1] = '\x00'; + + first = Tcl_UtfNext(buffer + 1); + while ((buffer[0] = *p++) != '\0') { + /* Run Tcl_UtfNext with many more possible bytes at src[-1], all should give the same result */ + result = Tcl_UtfNext(buffer + 1); + if (first != result) { + Tcl_AppendResult(interp, "Tcl_UtfNext is not supposed to read src[-1]", NULL); return TCL_ERROR; } - if (offset < 0) { - offset = 0; - } - if (offset > numBytes) { - offset = numBytes; - } } - copy = Tcl_DuplicateObj(objv[1]); - bytes = (char *) Tcl_SetByteArrayLength(copy, numBytes+1); - bytes[numBytes] = '\0'; - result = Tcl_UtfNext(bytes + offset); - Tcl_SetObjResult(interp, Tcl_NewIntObj(result - bytes)); + Tcl_SetObjResult(interp, Tcl_NewIntObj(first - buffer - 1)); - Tcl_DecrRefCount(copy); return TCL_OK; } /* diff --git a/tests/utf.test b/tests/utf.test index d4f45e9..af1e553 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -156,296 +156,284 @@ test utf-5.2 {Tcl_UtfFindLast} {testfindlast testbytestring} { test utf-6.1 {Tcl_UtfNext} testutfnext { # This takes the pointer one past the terminating NUL. # This is really an invalid call. - testutfnext {} + testutfnext -bytestring {} } 1 test utf-6.2 {Tcl_UtfNext} testutfnext { - testutfnext A + testutfnext -bytestring A } 1 test utf-6.3 {Tcl_UtfNext} testutfnext { - testutfnext AA + testutfnext -bytestring AA } 1 test utf-6.4 {Tcl_UtfNext} testutfnext { - testutfnext A\xA0 + testutfnext -bytestring A\xA0 } 1 test utf-6.5 {Tcl_UtfNext} testutfnext { - testutfnext A\xD0 + testutfnext -bytestring A\xD0 } 1 test utf-6.6 {Tcl_UtfNext} testutfnext { - testutfnext A\xE8 + testutfnext -bytestring A\xE8 } 1 test utf-6.7 {Tcl_UtfNext} testutfnext { - testutfnext A\xF2 + testutfnext -bytestring A\xF2 } 1 test utf-6.8 {Tcl_UtfNext} testutfnext { - testutfnext A\xF8 + testutfnext -bytestring A\xF8 } 1 test utf-6.9 {Tcl_UtfNext} testutfnext { - testutfnext \xA0 + testutfnext -bytestring \xA0 } 1 test utf-6.10 {Tcl_UtfNext} testutfnext { - testutfnext \xA0G + testutfnext -bytestring \xA0G } 1 test utf-6.11 {Tcl_UtfNext} testutfnext { - testutfnext \xA0\xA0 + testutfnext -bytestring \xA0\xA0 } 1 test utf-6.12 {Tcl_UtfNext} testutfnext { - testutfnext \xA0\xD0 + testutfnext -bytestring \xA0\xD0 } 1 test utf-6.13 {Tcl_UtfNext} testutfnext { - testutfnext \xA0\xE8 + testutfnext -bytestring \xA0\xE8 } 1 test utf-6.14 {Tcl_UtfNext} testutfnext { - testutfnext \xA0\xF2 + testutfnext -bytestring \xA0\xF2 } 1 test utf-6.15 {Tcl_UtfNext} testutfnext { - testutfnext \xA0\xF8 + testutfnext -bytestring \xA0\xF8 } 1 test utf-6.16 {Tcl_UtfNext} testutfnext { - testutfnext \xD0 + testutfnext -bytestring \xD0 } 1 test utf-6.17 {Tcl_UtfNext} testutfnext { - testutfnext \xD0G + testutfnext -bytestring \xD0G } 1 test utf-6.18 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xA0 + testutfnext -bytestring \xD0\xA0 } 2 test utf-6.19 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xD0 + testutfnext -bytestring \xD0\xD0 } 1 test utf-6.20 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xE8 + testutfnext -bytestring \xD0\xE8 } 1 test utf-6.21 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xF2 + testutfnext -bytestring \xD0\xF2 } 1 test utf-6.22 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xF8 + testutfnext -bytestring \xD0\xF8 } 1 test utf-6.23 {Tcl_UtfNext} testutfnext { - testutfnext \xE8 + testutfnext -bytestring \xE8 } 1 test utf-6.24 {Tcl_UtfNext} testutfnext { - testutfnext \xE8G + testutfnext -bytestring \xE8G } 1 test utf-6.25 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0 + testutfnext -bytestring \xE8\xA0 } 1 test utf-6.26 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xD0 + testutfnext -bytestring \xE8\xD0 } 1 test utf-6.27 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xE8 + testutfnext -bytestring \xE8\xE8 } 1 test utf-6.28 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xF2 + testutfnext -bytestring \xE8\xF2 } 1 test utf-6.29 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xF8 + testutfnext -bytestring \xE8\xF8 } 1 test utf-6.30 {Tcl_UtfNext} testutfnext { - testutfnext \xF2 + testutfnext -bytestring \xF2 } 1 test utf-6.31 {Tcl_UtfNext} testutfnext { - testutfnext \xF2G + testutfnext -bytestring \xF2G } 1 test utf-6.32 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xA0 + testutfnext -bytestring \xF2\xA0 } 1 test utf-6.33 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xD0 + testutfnext -bytestring \xF2\xD0 } 1 test utf-6.34 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xE8 + testutfnext -bytestring \xF2\xE8 } 1 test utf-6.35 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xF2 + testutfnext -bytestring \xF2\xF2 } 1 test utf-6.36 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xF8 + testutfnext -bytestring \xF2\xF8 } 1 test utf-6.37 {Tcl_UtfNext} testutfnext { - testutfnext \xF8 + testutfnext -bytestring \xF8 } 1 test utf-6.38 {Tcl_UtfNext} testutfnext { - testutfnext \xF8G + testutfnext -bytestring \xF8G } 1 test utf-6.39 {Tcl_UtfNext} testutfnext { - testutfnext \xF8\xA0 + testutfnext -bytestring \xF8\xA0 } 1 test utf-6.40 {Tcl_UtfNext} testutfnext { - testutfnext \xF8\xD0 + testutfnext -bytestring \xF8\xD0 } 1 test utf-6.41 {Tcl_UtfNext} testutfnext { - testutfnext \xF8\xE8 + testutfnext -bytestring \xF8\xE8 } 1 test utf-6.42 {Tcl_UtfNext} testutfnext { - testutfnext \xF8\xF2 + testutfnext -bytestring \xF8\xF2 } 1 test utf-6.43 {Tcl_UtfNext} testutfnext { - testutfnext \xF8\xF8 + testutfnext -bytestring \xF8\xF8 } 1 test utf-6.44 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xA0G + testutfnext -bytestring \xD0\xA0G } 2 test utf-6.45 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xA0\xA0 + testutfnext -bytestring \xD0\xA0\xA0 } 2 test utf-6.46 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xA0\xD0 + testutfnext -bytestring \xD0\xA0\xD0 } 2 test utf-6.47 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xA0\xE8 + testutfnext -bytestring \xD0\xA0\xE8 } 2 test utf-6.48 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xA0\xF2 + testutfnext -bytestring \xD0\xA0\xF2 } 2 test utf-6.49 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xA0\xF8 + testutfnext -bytestring \xD0\xA0\xF8 } 2 test utf-6.50 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0G + testutfnext -bytestring \xE8\xA0G } 1 test utf-6.51 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xA0 + testutfnext -bytestring \xE8\xA0\xA0 } 3 test utf-6.52 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xD0 + testutfnext -bytestring \xE8\xA0\xD0 } 1 test utf-6.53 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xE8 + testutfnext -bytestring \xE8\xA0\xE8 } 1 test utf-6.54 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xF2 + testutfnext -bytestring \xE8\xA0\xF2 } 1 test utf-6.55 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xF8 + testutfnext -bytestring \xE8\xA0\xF8 } 1 test utf-6.56 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xA0G + testutfnext -bytestring \xF2\xA0G } 1 test utf-6.57 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xA0\xA0 + testutfnext -bytestring \xF2\xA0\xA0 } 1 test utf-6.58 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xA0\xD0 + testutfnext -bytestring \xF2\xA0\xD0 } 1 test utf-6.59 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xA0\xE8 + testutfnext -bytestring \xF2\xA0\xE8 } 1 test utf-6.60 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xA0\xF2 + testutfnext -bytestring \xF2\xA0\xF2 } 1 test utf-6.61 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xA0\xF8 + testutfnext -bytestring \xF2\xA0\xF8 } 1 test utf-6.62 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xA0G + testutfnext -bytestring \xE8\xA0\xA0G } 3 test utf-6.63 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xA0\xA0 + testutfnext -bytestring \xE8\xA0\xA0\xA0 } 3 test utf-6.64 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xA0\xD0 + testutfnext -bytestring \xE8\xA0\xA0\xD0 } 3 test utf-6.65 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xA0\xE8 + testutfnext -bytestring \xE8\xA0\xA0\xE8 } 3 test utf-6.66 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xA0\xF2 + testutfnext -bytestring \xE8\xA0\xA0\xF2 } 3 test utf-6.67 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xA0\xF8 + testutfnext -bytestring \xE8\xA0\xA0\xF8 } 3 test utf-6.68 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xA0\xA0G + testutfnext -bytestring \xF2\xA0\xA0G } 1 test utf-6.69 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xA0\xA0\xA0 + testutfnext -bytestring \xF2\xA0\xA0\xA0 } 4 test utf-6.70 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xA0\xA0\xD0 + testutfnext -bytestring \xF2\xA0\xA0\xD0 } 1 test utf-6.71 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xA0\xA0\xE8 + testutfnext -bytestring \xF2\xA0\xA0\xE8 } 1 test utf-6.71 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xA0\xA0\xF2 + testutfnext -bytestring \xF2\xA0\xA0\xF2 } 1 test utf-6.73 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xA0\xA0\xF8 + testutfnext -bytestring \xF2\xA0\xA0\xF8 } 1 test utf-6.74 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xA0\xA0\xA0G + testutfnext -bytestring \xF2\xA0\xA0\xA0G } 4 test utf-6.75 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xA0\xA0\xA0\xA0 + testutfnext -bytestring \xF2\xA0\xA0\xA0\xA0 } 4 test utf-6.76 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xA0\xA0\xA0\xD0 + testutfnext -bytestring \xF2\xA0\xA0\xA0\xD0 } 4 test utf-6.77 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xA0\xA0\xA0\xE8 + testutfnext -bytestring \xF2\xA0\xA0\xA0\xE8 } 4 test utf-6.78 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xA0\xA0\xA0\xF2 + testutfnext -bytestring \xF2\xA0\xA0\xA0\xF2 } 4 test utf-6.79 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xA0\xA0\xA0G\xF8 + testutfnext -bytestring \xF2\xA0\xA0\xA0G\xF8 } 4 test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext \xC0\x80 + testutfnext -bytestring \xC0\x80 } 2 test utf-6.81 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext \xC0\x81 + testutfnext -bytestring \xC0\x81 } 1 test utf-6.82 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext \xC1\x80 + testutfnext -bytestring \xC1\x80 } 1 test utf-6.83 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext \xC2\x80 + testutfnext -bytestring \xC2\x80 } 2 test utf-6.84 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext \xE0\x80\x80 + testutfnext -bytestring \xE0\x80\x80 } 1 test utf-6.85 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext \xE0\xA0\x80 + testutfnext -bytestring \xE0\xA0\x80 } 3 test utf-6.86 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext \xF0\x80\x80\x80 + testutfnext -bytestring \xF0\x80\x80\x80 } 1 test utf-6.87 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext \xF0\x90\x80\x80 + testutfnext -bytestring \xF0\x90\x80\x80 } 4 test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} testutfnext { - testutfnext \xA0\xA0 + testutfnext -bytestring \xA0\xA0 } 1 -test utf-6.88.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} testutfnext { - testutfnext \xE8\xA0\xA0 1 -} 2 test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} testutfnext { - testutfnext \x80\x80 + testutfnext -bytestring \x80\x80 } 1 -test utf-6.89.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} testutfnext { - testutfnext \xF0\x80\x80 1 -} 2 test utf-6.90 {Tcl_UtfNext, validity check [493dccc2de]} testutfnext { - testutfnext \xF4\x8F\xBF\xBF + testutfnext -bytestring \xF4\x8F\xBF\xBF } 4 test utf-6.91 {Tcl_UtfNext, validity check [493dccc2de]} testutfnext { - testutfnext \xF4\x90\x80\x80 + testutfnext -bytestring \xF4\x90\x80\x80 } 1 test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} testutfnext { - testutfnext \xA0\xA0\xA0 + testutfnext -bytestring \xA0\xA0\xA0 } 1 -test utf-6.92.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} testutfnext { - testutfnext \xF2\xA0\xA0\xA0 1 -} 2 test utf-6.93 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} testutfnext { - testutfnext \x80\x80\x80 + testutfnext -bytestring \x80\x80\x80 } 1 -test utf-6.93.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} testutfnext { - testutfnext \xF0\x80\x80\x80 1 -} 2 test utf-7.1 {Tcl_UtfPrev} testutfprev { testutfprev {} -- cgit v0.12 From 070fa22acbfab9614a41de82797ab8709a43ae36 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 22 Apr 2020 10:02:06 +0000 Subject: Determine "testConstraint ucs2" without the need for a testcommand. Rename "compat85" testConstraint to "ucs2", because that's what it actually is. --- tests/chanio.test | 3 +-- tests/encoding.test | 3 +-- tests/io.test | 3 +-- tests/source.test | 3 +-- tests/utf.test | 52 ++++++++++++++++++++++++++-------------------------- 5 files changed, 30 insertions(+), 34 deletions(-) diff --git a/tests/chanio.test b/tests/chanio.test index c2f561b..db4544c 100644 --- a/tests/chanio.test +++ b/tests/chanio.test @@ -29,8 +29,7 @@ namespace eval ::tcl::test::io { variable msg variable expected - testConstraint ucs2 [expr { [llength [info commands testsize]] && - ([testsize unichar] == 2) }] + testConstraint ucs2 [expr {[format %c 0x010000] eq "\uFFFD"}] testConstraint testchannel [llength [info commands testchannel]] testConstraint exec [llength [info commands exec]] testConstraint openpipe 1 diff --git a/tests/encoding.test b/tests/encoding.test index ad55e26..af325c1 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -32,8 +32,7 @@ proc runtests {} { testConstraint testencoding [llength [info commands testencoding]] testConstraint exec [llength [info commands exec]] -testConstraint ucs2 [expr { [llength [info commands testsize]] && - ([testsize unichar] == 2) }] +testConstraint ucs2 [expr {[format %c 0x010000] eq "\uFFFD"}] # TclInitEncodingSubsystem is tested by the rest of this file diff --git a/tests/io.test b/tests/io.test index 1c18576..06be982 100644 --- a/tests/io.test +++ b/tests/io.test @@ -29,8 +29,7 @@ namespace eval ::tcl::test::io { variable msg variable expected -testConstraint ucs2 [expr { [llength [info commands testsize]] && - ([testsize unichar] == 2) }] +testConstraint ucs2 [expr {[format %c 0x010000] eq "\uFFFD"}] testConstraint testchannel [llength [info commands testchannel]] testConstraint exec [llength [info commands exec]] diff --git a/tests/source.test b/tests/source.test index 8511004..877921e 100644 --- a/tests/source.test +++ b/tests/source.test @@ -20,8 +20,7 @@ if {[catch {package require tcltest 2.1}]} { namespace eval ::tcl::test::source { namespace import ::tcltest::* -testConstraint ucs2 [expr { [llength [info commands testsize]] && - ([testsize unichar] == 2) }] +testConstraint ucs2 [expr {[format %c 0x010000] eq "\uFFFD"}] test source-1.1 {source command} -setup { set x "old x value" diff --git a/tests/utf.test b/tests/utf.test index 0a81ae3..fd4e396 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -13,7 +13,7 @@ if {[lsearch [namespace children] ::tcltest] == -1} { namespace import -force ::tcltest::* } -testConstraint compat85 [expr {[format %c 0x010000] eq "\uFFFD"}] +testConstraint ucs2 [expr {[format %c 0x010000] eq "\uFFFD"}] testConstraint fullutf [expr {[format %c 0x010000] ne "\uFFFD"}] testConstraint testbytestring [llength [info commands testbytestring]] @@ -66,13 +66,13 @@ test utf-2.6 {Tcl_UtfToUniChar: lead (3-byte) followed by 1 trail} testbytestrin test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} testbytestring { string length [testbytestring "\xE4\xB9\x8E"] } {1} -test utf-2.8.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring compat85} -body { +test utf-2.8.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring ucs2} -body { string length [testbytestring "\xF0\x90\x80\x80"] } -result {4} test utf-2.8.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring fullutf} -body { string length [testbytestring "\xF0\x90\x80\x80"] } -result {1} -test utf-2.9.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring compat85} -body { +test utf-2.9.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring ucs2} -body { string length [testbytestring "\xF4\x8F\xBF\xBF"] } -result {4} test utf-2.9.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring fullutf} -body { @@ -126,7 +126,7 @@ test utf-4.10 {Tcl_NumUtfChars: #u0000, calc len, overcomplete} {testnumutfchars test utf-4.11 {Tcl_NumUtfChars: 3 bytes of 4-byte UTF-8 characater} {testnumutfchars testbytestring} { testnumutfchars [testbytestring \xF0\x9F\x92\xA9] 3 } {3} -test utf-4.12.0 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring compat85} { +test utf-4.12.0 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring ucs2} { testnumutfchars [testbytestring \xF0\x9F\x92\xA9] 4 } {4} test utf-4.12.1 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring fullutf} { @@ -346,7 +346,7 @@ test utf-6.67 {Tcl_UtfNext} testutfnext { test utf-6.68 {Tcl_UtfNext} testutfnext { testutfnext -bytestring \xF2\xA0\xA0G } 1 -test utf-6.69.0 {Tcl_UtfNext} {testutfnext compat85} { +test utf-6.69.0 {Tcl_UtfNext} {testutfnext ucs2} { testutfnext -bytestring \xF2\xA0\xA0\xA0 } 1 test utf-6.69.1 {Tcl_UtfNext} {testutfnext fullutf} { @@ -364,37 +364,37 @@ test utf-6.71 {Tcl_UtfNext} testutfnext { test utf-6.73 {Tcl_UtfNext} testutfnext { testutfnext -bytestring \xF2\xA0\xA0\xF8 } 1 -test utf-6.74.0 {Tcl_UtfNext} {testutfnext compat85} { +test utf-6.74.0 {Tcl_UtfNext} {testutfnext ucs2} { testutfnext -bytestring \xF2\xA0\xA0\xA0G } 1 test utf-6.74.1 {Tcl_UtfNext} {testutfnext fullutf} { testutfnext -bytestring \xF2\xA0\xA0\xA0G } 4 -test utf-6.75.0 {Tcl_UtfNext} {testutfnext compat85} { +test utf-6.75.0 {Tcl_UtfNext} {testutfnext ucs2} { testutfnext -bytestring \xF2\xA0\xA0\xA0\xA0 } 1 test utf-6.75.1 {Tcl_UtfNext} {testutfnext fullutf} { testutfnext -bytestring \xF2\xA0\xA0\xA0\xA0 } 4 -test utf-6.76.0 {Tcl_UtfNext} {testutfnext compat85} { +test utf-6.76.0 {Tcl_UtfNext} {testutfnext ucs2} { testutfnext -bytestring \xF2\xA0\xA0\xA0\xD0 } 1 test utf-6.76.1 {Tcl_UtfNext} {testutfnext fullutf} { testutfnext -bytestring \xF2\xA0\xA0\xA0\xD0 } 4 -test utf-6.77.0 {Tcl_UtfNext} {testutfnext compat85} { +test utf-6.77.0 {Tcl_UtfNext} {testutfnext ucs2} { testutfnext -bytestring \xF2\xA0\xA0\xA0\xE8 } 1 test utf-6.77.1 {Tcl_UtfNext} {testutfnext fullutf} { testutfnext -bytestring \xF2\xA0\xA0\xA0\xE8 } 4 -test utf-6.78.0 {Tcl_UtfNext} {testutfnext compat85} { +test utf-6.78.0 {Tcl_UtfNext} {testutfnext ucs2} { testutfnext -bytestring \xF2\xA0\xA0\xA0\xF2 } 1 test utf-6.78.1 {Tcl_UtfNext} {testutfnext fullutf} { testutfnext -bytestring \xF2\xA0\xA0\xA0\xF2 } 4 -test utf-6.79.0 {Tcl_UtfNext} {testutfnext compat85} { +test utf-6.79.0 {Tcl_UtfNext} {testutfnext ucs2} { testutfnext -bytestring \xF2\xA0\xA0\xA0G\xF8 } 1 test utf-6.79.1 {Tcl_UtfNext} {testutfnext fullutf} { @@ -421,7 +421,7 @@ test utf-6.85 {Tcl_UtfNext - overlong sequences} testutfnext { test utf-6.86 {Tcl_UtfNext - overlong sequences} testutfnext { testutfnext -bytestring \xF0\x80\x80\x80 } 1 -test utf-6.87.0 {Tcl_UtfNext - overlong sequences} {testutfnext compat85} { +test utf-6.87.0 {Tcl_UtfNext - overlong sequences} {testutfnext ucs2} { testutfnext -bytestring \xF0\x90\x80\x80 } 1 test utf-6.87.1 {Tcl_UtfNext - overlong sequences} {testutfnext fullutf} { @@ -433,7 +433,7 @@ test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {test test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} { testutfnext -bytestring \x80\x80 } 1 -test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext compat85} { +test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext ucs2} { testutfnext -bytestring \xF4\x8F\xBF\xBF } 1 test utf-6.90.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext fullutf} { @@ -512,19 +512,19 @@ test utf-7.9.1 {Tcl_UtfPrev} testutfprev { test utf-7.9.2 {Tcl_UtfPrev} testutfprev { testutfprev A\xF8\xA0\xF8\xA0 3 } 2 -test utf-7.10.0 {Tcl_UtfPrev} {testutfprev compat85} { +test utf-7.10.0 {Tcl_UtfPrev} {testutfprev ucs2} { testutfprev A\xF2\xA0 } 2 test utf-7.10.1 {Tcl_UtfPrev} {testutfprev fullutf} { testutfprev A\xF2\xA0 } 1 -test utf-7.10.1.0 {Tcl_UtfPrev} {testutfprev compat85} { +test utf-7.10.1.0 {Tcl_UtfPrev} {testutfprev ucs2} { testutfprev A\xF2\xA0\xA0\xA0 3 } 2 test utf-7.10.1.1 {Tcl_UtfPrev} {testutfprev fullutf} { testutfprev A\xF2\xA0\xA0\xA0 3 } 1 -test utf-7.10.2.0 {Tcl_UtfPrev} {testutfprev compat85} { +test utf-7.10.2.0 {Tcl_UtfPrev} {testutfprev ucs2} { testutfprev A\xF2\xA0\xF8\xA0 3 } 2 test utf-7.10.2.1 {Tcl_UtfPrev} {testutfprev fullutf} { @@ -569,19 +569,19 @@ test utf-7.14.1 {Tcl_UtfPrev} testutfprev { test utf-7.14.2 {Tcl_UtfPrev} testutfprev { testutfprev A\xF8\xA0\xA0\xF8 4 } 3 -test utf-7.15.0 {Tcl_UtfPrev} {testutfprev compat85} { +test utf-7.15.0 {Tcl_UtfPrev} {testutfprev ucs2} { testutfprev A\xF2\xA0\xA0 } 3 test utf-7.15.1 {Tcl_UtfPrev} {testutfprev fullutf} { testutfprev A\xF2\xA0\xA0 } 1 -test utf-7.15.1.0 {Tcl_UtfPrev} {testutfprev compat85} { +test utf-7.15.1.0 {Tcl_UtfPrev} {testutfprev ucs2} { testutfprev A\xF2\xA0\xA0\xA0 4 } 3 test utf-7.15.1.1 {Tcl_UtfPrev} {testutfprev fullutf} { testutfprev A\xF2\xA0\xA0\xA0 4 } 1 -test utf-7.15.2.0 {Tcl_UtfPrev} {testutfprev compat85} { +test utf-7.15.2.0 {Tcl_UtfPrev} {testutfprev ucs2} { testutfprev A\xF2\xA0\xA0\xF8 4 } 3 test utf-7.15.2.1 {Tcl_UtfPrev} {testutfprev fullutf} { @@ -617,7 +617,7 @@ test utf-7.18.2 {Tcl_UtfPrev} testutfprev { test utf-7.19 {Tcl_UtfPrev} testutfprev { testutfprev A\xF8\xA0\xA0\xA0 } 4 -test utf-7.20.0 {Tcl_UtfPrev} {testutfprev compat85} { +test utf-7.20.0 {Tcl_UtfPrev} {testutfprev ucs2} { testutfprev A\xF2\xA0\xA0\xA0 } 4 test utf-7.20.1 {Tcl_UtfPrev} {testutfprev fullutf} { @@ -683,19 +683,19 @@ test utf-7.37 {Tcl_UtfPrev -- overlong sequence} testutfprev { test utf-7.38 {Tcl_UtfPrev -- overlong sequence} testutfprev { testutfprev A\xE0\xA0\x80 2 } 1 -test utf-7.39.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev compat85} { +test utf-7.39.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev ucs2} { testutfprev A\xF0\x90\x80\x80 } 4 test utf-7.39.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev fullutf} { testutfprev A\xF0\x90\x80\x80 } 1 -test utf-7.40.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev compat85} { +test utf-7.40.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev ucs2} { testutfprev A\xF0\x90\x80\x80 4 } 3 test utf-7.40.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev fullutf} { testutfprev A\xF0\x90\x80\x80 4 } 1 -test utf-7.41.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev compat85} { +test utf-7.41.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev ucs2} { testutfprev A\xF0\x90\x80\x80 3 } 2 test utf-7.41.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev fullutf} { @@ -725,19 +725,19 @@ test utf-7.47.1 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {te test utf-7.47.2 {Tcl_UtfPrev, pointing to 3th byte of 3-byte invalid sequence} {testutfprev} { testutfprev \xE8\xA0\x00 2 } 0 -test utf-7.48.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev compat85} { +test utf-7.48.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev ucs2} { testutfprev A\xF4\x8F\xBF\xBF } 4 test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} { testutfprev A\xF4\x8F\xBF\xBF } 1 -test utf-7.48.1.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev compat85} { +test utf-7.48.1.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev ucs2} { testutfprev A\xF4\x8F\xBF\xBF 4 } 3 test utf-7.48.1.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} { testutfprev A\xF4\x8F\xBF\xBF 4 } 1 -test utf-7.48.2.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev compat85} { +test utf-7.48.2.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev ucs2} { testutfprev A\xF4\x8F\xBF\xBF 3 } 2 test utf-7.48.2.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} { -- cgit v0.12 From 04bb9b292c26675655414e30f8257deca1bbe097 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 22 Apr 2020 16:00:36 +0000 Subject: Attempt to fix [1004065] for TCL_UTF_MAX=4. Disallow building Tcl with TCL_UTF_MAX>4 --- .travis.yml | 13 ------------- generic/tclEncoding.c | 16 ++++++++++------ generic/tclInt.h | 4 ++++ tests/chanio.test | 3 +-- tests/encoding.test | 5 +---- tests/io.test | 4 +--- tests/source.test | 2 +- 7 files changed, 18 insertions(+), 29 deletions(-) diff --git a/.travis.yml b/.travis.yml index 63c1645..b3ac5f9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -26,19 +26,6 @@ matrix: env: - BUILD_DIR=unix - CFGOPT=CFLAGS=-DTCL_UTF_MAX=4 - script: - - make all tcltest - - make test TESTFLAGS="-file utf.test" - - name: "Linux/GCC/Shared: UTF_MAX=6" - os: linux - dist: bionic - compiler: gcc - env: - - BUILD_DIR=unix - - CFGOPT=CFLAGS=-DTCL_UTF_MAX=6 - script: - - make all tcltest - - make test TESTFLAGS="-file utf.test" - name: "Linux/GCC/Static" os: linux dist: bionic diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 6c16827..d948189 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2359,13 +2359,12 @@ UnicodeToUtfProc( const char *srcStart, *srcEnd; char *dstEnd, *dstStart; int result, numChars; - Tcl_UniChar ch; + unsigned short ch; result = TCL_OK; - if ((srcLen % sizeof(Tcl_UniChar)) != 0) { + if ((srcLen & 1) != 0) { result = TCL_CONVERT_MULTIBYTE; - srcLen /= sizeof(Tcl_UniChar); - srcLen *= sizeof(Tcl_UniChar); + srcLen--; } srcStart = src; @@ -2383,13 +2382,13 @@ UnicodeToUtfProc( * Special case for 1-byte utf chars for speed. Make sure we * work with Tcl_UniChar-size data. */ - ch = *(Tcl_UniChar *)src; + ch = *(unsigned short *)src; if (ch && ch < 0x80) { *dst++ = (ch & 0xFF); } else { dst += Tcl_UniCharToUtf(ch, dst); } - src += sizeof(Tcl_UniChar); + src += sizeof(unsigned short); } *srcReadPtr = src - srcStart; @@ -2477,6 +2476,11 @@ UtfToUnicodeProc( * by casting dst to a Tcl_UniChar. [Bug 1122671] * XXX: This hard-codes the assumed size of Tcl_UniChar as 2. */ +#if TCL_UTF_MAX > 3 + if (ch & ~0xFFFF) { + ch = 0xFFFD; + } else +#endif #ifdef WORDS_BIGENDIAN *dst++ = (ch >> 8); *dst++ = (ch & 0xFF); diff --git a/generic/tclInt.h b/generic/tclInt.h index 3fa14e7..23a398f 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -102,6 +102,10 @@ typedef int ptrdiff_t; #define NO_WIDE_TYPE #endif +#if (TCL_UTF_MAX != 3) && (TCL_UTF_MAX != 4) +# error "Tcl 8.5 can only be built with TCL_UTF_MAX=3 or TCL_UTF_MAX=4" +#endif + /* * Macros used to cast between pointers and integers (e.g. when storing an int * in ClientData), on 64-bit architectures they avoid gcc warning about "cast diff --git a/tests/chanio.test b/tests/chanio.test index db4544c..5fae431 100644 --- a/tests/chanio.test +++ b/tests/chanio.test @@ -29,7 +29,6 @@ namespace eval ::tcl::test::io { variable msg variable expected - testConstraint ucs2 [expr {[format %c 0x010000] eq "\uFFFD"}] testConstraint testchannel [llength [info commands testchannel]] testConstraint exec [llength [info commands exec]] testConstraint openpipe 1 @@ -876,7 +875,7 @@ test chan-io-6.44 {Tcl_GetsObj: input saw cr, not followed by cr} {stdio testcha chan close $f set x } [list "bbbbbbbbbbbbbbb" 15 "123456789abcdef" 1 4 "abcd" 0 3 "efg"] -test chan-io-6.45 {Tcl_GetsObj: input saw cr, skip right number of bytes} {stdio testchannel openpipe fileevent ucs2} { +test chan-io-6.45 {Tcl_GetsObj: input saw cr, skip right number of bytes} {stdio testchannel openpipe fileevent} { # Tcl_ExternalToUtf() set f [open "|[list [interpreter] $path(cat)]" w+] diff --git a/tests/encoding.test b/tests/encoding.test index af325c1..8722a93 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -32,9 +32,6 @@ proc runtests {} { testConstraint testencoding [llength [info commands testencoding]] testConstraint exec [llength [info commands exec]] -testConstraint ucs2 [expr {[format %c 0x010000] eq "\uFFFD"}] - - # TclInitEncodingSubsystem is tested by the rest of this file # TclFinalizeEncodingSubsystem is not currently tested @@ -319,7 +316,7 @@ test encoding-15.3 {UtfToUtfProc null character input} { list [string bytelength $x] [string bytelength $y] $z } {1 2 c080} -test encoding-16.1 {UnicodeToUtfProc} ucs2 { +test encoding-16.1 {UnicodeToUtfProc} { set val [encoding convertfrom unicode NN] list $val [format %x [scan $val %c]] } "\u4e4e 4e4e" diff --git a/tests/io.test b/tests/io.test index 06be982..04fa1d2 100644 --- a/tests/io.test +++ b/tests/io.test @@ -29,8 +29,6 @@ namespace eval ::tcl::test::io { variable msg variable expected -testConstraint ucs2 [expr {[format %c 0x010000] eq "\uFFFD"}] - testConstraint testchannel [llength [info commands testchannel]] testConstraint exec [llength [info commands exec]] testConstraint openpipe 1 @@ -912,7 +910,7 @@ test io-6.44 {Tcl_GetsObj: input saw cr, not followed by cr} {stdio testchannel close $f set x } [list "bbbbbbbbbbbbbbb" 15 "123456789abcdef" 1 4 "abcd" 0 3 "efg"] -test io-6.45 {Tcl_GetsObj: input saw cr, skip right number of bytes} {stdio testchannel openpipe fileevent ucs2} { +test io-6.45 {Tcl_GetsObj: input saw cr, skip right number of bytes} {stdio testchannel openpipe fileevent} { # Tcl_ExternalToUtf() set f [open "|[list [interpreter] $path(cat)]" w+] diff --git a/tests/source.test b/tests/source.test index 877921e..1df5ac4 100644 --- a/tests/source.test +++ b/tests/source.test @@ -234,7 +234,7 @@ test source-7.1 {source -encoding test} -setup { } -cleanup { removeFile source.file } -result correct -test source-7.2 {source -encoding test} -constraints ucs2 -setup { +test source-7.2 {source -encoding test} -setup { # This tests for bad interactions between [source -encoding] # and use of the Control-Z character (\u001A) as a cross-platform # EOF character by [source]. Here we write out and the [source] a -- cgit v0.12 From 55d47dd21e8786cc598822fb20b1bf38721eca40 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 22 Apr 2020 16:06:39 +0000 Subject: Oopsee (but not really crucial) --- generic/tclEncoding.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index d948189..ac9f667 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2479,7 +2479,7 @@ UtfToUnicodeProc( #if TCL_UTF_MAX > 3 if (ch & ~0xFFFF) { ch = 0xFFFD; - } else + } #endif #ifdef WORDS_BIGENDIAN *dst++ = (ch >> 8); -- cgit v0.12 From 39736552afb862bfd716daf58321449acd40fc63 Mon Sep 17 00:00:00 2001 From: dgp Date: Wed, 22 Apr 2020 16:12:41 +0000 Subject: Restored a test constraint to tolerate [1004065] (maybe just in time to not need it anymore). --- tests/chanio.test | 5 +++-- tests/encoding.test | 6 +++--- tests/io.test | 5 +++-- tests/source.test | 5 +++-- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/tests/chanio.test b/tests/chanio.test index db4544c..ce39997 100644 --- a/tests/chanio.test +++ b/tests/chanio.test @@ -29,7 +29,8 @@ namespace eval ::tcl::test::io { variable msg variable expected - testConstraint ucs2 [expr {[format %c 0x010000] eq "\uFFFD"}] + testConstraint twobyteunit [expr {[llength [info commands testsize]] && + ([testsize unichar] == 2)}] testConstraint testchannel [llength [info commands testchannel]] testConstraint exec [llength [info commands exec]] testConstraint openpipe 1 @@ -876,7 +877,7 @@ test chan-io-6.44 {Tcl_GetsObj: input saw cr, not followed by cr} {stdio testcha chan close $f set x } [list "bbbbbbbbbbbbbbb" 15 "123456789abcdef" 1 4 "abcd" 0 3 "efg"] -test chan-io-6.45 {Tcl_GetsObj: input saw cr, skip right number of bytes} {stdio testchannel openpipe fileevent ucs2} { +test chan-io-6.45 {Tcl_GetsObj: input saw cr, skip right number of bytes} {stdio testchannel openpipe fileevent twobyteunit} { # Tcl_ExternalToUtf() set f [open "|[list [interpreter] $path(cat)]" w+] diff --git a/tests/encoding.test b/tests/encoding.test index af325c1..7313093 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -32,8 +32,8 @@ proc runtests {} { testConstraint testencoding [llength [info commands testencoding]] testConstraint exec [llength [info commands exec]] -testConstraint ucs2 [expr {[format %c 0x010000] eq "\uFFFD"}] - +testConstraint twobyteunit [expr {[llength [info commands testsize]] && + ([testsize unichar] == 2)}] # TclInitEncodingSubsystem is tested by the rest of this file # TclFinalizeEncodingSubsystem is not currently tested @@ -319,7 +319,7 @@ test encoding-15.3 {UtfToUtfProc null character input} { list [string bytelength $x] [string bytelength $y] $z } {1 2 c080} -test encoding-16.1 {UnicodeToUtfProc} ucs2 { +test encoding-16.1 {UnicodeToUtfProc} twobyteunit { set val [encoding convertfrom unicode NN] list $val [format %x [scan $val %c]] } "\u4e4e 4e4e" diff --git a/tests/io.test b/tests/io.test index 06be982..82d978e 100644 --- a/tests/io.test +++ b/tests/io.test @@ -29,7 +29,8 @@ namespace eval ::tcl::test::io { variable msg variable expected -testConstraint ucs2 [expr {[format %c 0x010000] eq "\uFFFD"}] +testConstraint twobyteunit [expr {[llength [info commands testsize]] && + ([testsize unichar] == 2)}] testConstraint testchannel [llength [info commands testchannel]] testConstraint exec [llength [info commands exec]] @@ -912,7 +913,7 @@ test io-6.44 {Tcl_GetsObj: input saw cr, not followed by cr} {stdio testchannel close $f set x } [list "bbbbbbbbbbbbbbb" 15 "123456789abcdef" 1 4 "abcd" 0 3 "efg"] -test io-6.45 {Tcl_GetsObj: input saw cr, skip right number of bytes} {stdio testchannel openpipe fileevent ucs2} { +test io-6.45 {Tcl_GetsObj: input saw cr, skip right number of bytes} {stdio testchannel openpipe fileevent twobyteunit} { # Tcl_ExternalToUtf() set f [open "|[list [interpreter] $path(cat)]" w+] diff --git a/tests/source.test b/tests/source.test index 877921e..15ff08f 100644 --- a/tests/source.test +++ b/tests/source.test @@ -20,7 +20,8 @@ if {[catch {package require tcltest 2.1}]} { namespace eval ::tcl::test::source { namespace import ::tcltest::* -testConstraint ucs2 [expr {[format %c 0x010000] eq "\uFFFD"}] +testConstraint twobyteunit [expr {[llength [info commands testsize]] && + ([testsize unichar] == 2)}] test source-1.1 {source command} -setup { set x "old x value" @@ -234,7 +235,7 @@ test source-7.1 {source -encoding test} -setup { } -cleanup { removeFile source.file } -result correct -test source-7.2 {source -encoding test} -constraints ucs2 -setup { +test source-7.2 {source -encoding test} -constraints twobyteunit -setup { # This tests for bad interactions between [source -encoding] # and use of the Control-Z character (\u001A) as a cross-platform # EOF character by [source]. Here we write out and the [source] a -- cgit v0.12 From 9c0475426c2cec95a034d9393e7d2cf27dd029e2 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 22 Apr 2020 16:27:16 +0000 Subject: Another ucs2 testContraint no longer used --- tests/source.test | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/source.test b/tests/source.test index 1df5ac4..dc3c2d8 100644 --- a/tests/source.test +++ b/tests/source.test @@ -20,8 +20,6 @@ if {[catch {package require tcltest 2.1}]} { namespace eval ::tcl::test::source { namespace import ::tcltest::* -testConstraint ucs2 [expr {[format %c 0x010000] eq "\uFFFD"}] - test source-1.1 {source command} -setup { set x "old x value" set y "old y value" -- cgit v0.12 From 9c04c8ce98e2cb1c58d3be6d07bba35b24975443 Mon Sep 17 00:00:00 2001 From: dgp Date: Wed, 22 Apr 2020 20:49:33 +0000 Subject: Eliminate the -bytestring option of [testutfnext]. No caller needs anything else. --- generic/tclTest.c | 12 ++-- tests/utf.test | 200 +++++++++++++++++++++++++++--------------------------- 2 files changed, 104 insertions(+), 108 deletions(-) diff --git a/generic/tclTest.c b/generic/tclTest.c index 7a531b4..65599be 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -7130,15 +7130,11 @@ TestUtfNextCmd( static const char tobetested[] = "\xFF\xFE\xF4\xF2\xF0\xEF\xE8\xE3\xE2\xE1\xE0\xC2\xC1\xC0\x82"; const char *p = tobetested; - if (objc != 3 || strcmp(Tcl_GetString(objv[1]), "-bytestring")) { - if (objc != 2) { - Tcl_WrongNumArgs(interp, 1, objv, "?-bytestring? bytes"); - return TCL_ERROR; - } - bytes = Tcl_GetStringFromObj(objv[1], &numBytes); - } else { - bytes = (char *) Tcl_GetByteArrayFromObj(objv[2], &numBytes); + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, "bytes"); + return TCL_ERROR; } + bytes = (char *) Tcl_GetByteArrayFromObj(objv[1], &numBytes); if (numBytes > sizeof(buffer)-2) { Tcl_AppendResult(interp, "\"testutfnext\" can only handle 30 bytes", NULL); diff --git a/tests/utf.test b/tests/utf.test index fd4e396..beba98c 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -143,7 +143,7 @@ test utf-5.2 {Tcl_UtfFindLast} {testfindlast testbytestring} { test utf-6.1 {Tcl_UtfNext} testutfnext { # This takes the pointer one past the terminating NUL. # This is really an invalid call. - testutfnext -bytestring {} + testutfnext {} } 1 test utf-6.2 {Tcl_UtfNext} testutfnext { testutfnext A @@ -152,301 +152,301 @@ test utf-6.3 {Tcl_UtfNext} testutfnext { testutfnext AA } 1 test utf-6.4 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring A\xA0 + testutfnext A\xA0 } 1 test utf-6.5 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring A\xD0 + testutfnext A\xD0 } 1 test utf-6.6 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring A\xE8 + testutfnext A\xE8 } 1 test utf-6.7 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring A\xF2 + testutfnext A\xF2 } 1 test utf-6.8 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring A\xF8 + testutfnext A\xF8 } 1 test utf-6.9 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xA0 + testutfnext \xA0 } 1 test utf-6.10 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xA0G + testutfnext \xA0G } 1 test utf-6.11 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xA0\xA0 + testutfnext \xA0\xA0 } 1 test utf-6.12 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xA0\xD0 + testutfnext \xA0\xD0 } 1 test utf-6.13 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xA0\xE8 + testutfnext \xA0\xE8 } 1 test utf-6.14 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xA0\xF2 + testutfnext \xA0\xF2 } 1 test utf-6.15 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xA0\xF8 + testutfnext \xA0\xF8 } 1 test utf-6.16 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0 + testutfnext \xD0 } 1 test utf-6.17 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0G + testutfnext \xD0G } 1 test utf-6.18 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0\xA0 + testutfnext \xD0\xA0 } 2 test utf-6.19 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0\xD0 + testutfnext \xD0\xD0 } 1 test utf-6.20 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0\xE8 + testutfnext \xD0\xE8 } 1 test utf-6.21 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0\xF2 + testutfnext \xD0\xF2 } 1 test utf-6.22 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0\xF8 + testutfnext \xD0\xF8 } 1 test utf-6.23 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8 + testutfnext \xE8 } 1 test utf-6.24 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8G + testutfnext \xE8G } 1 test utf-6.25 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0 + testutfnext \xE8\xA0 } 1 test utf-6.26 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xD0 + testutfnext \xE8\xD0 } 1 test utf-6.27 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xE8 + testutfnext \xE8\xE8 } 1 test utf-6.28 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xF2 + testutfnext \xE8\xF2 } 1 test utf-6.29 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xF8 + testutfnext \xE8\xF8 } 1 test utf-6.30 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2 + testutfnext \xF2 } 1 test utf-6.31 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2G + testutfnext \xF2G } 1 test utf-6.32 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0 + testutfnext \xF2\xA0 } 1 test utf-6.33 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xD0 + testutfnext \xF2\xD0 } 1 test utf-6.34 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xE8 + testutfnext \xF2\xE8 } 1 test utf-6.35 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xF2 + testutfnext \xF2\xF2 } 1 test utf-6.36 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xF8 + testutfnext \xF2\xF8 } 1 test utf-6.37 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF8 + testutfnext \xF8 } 1 test utf-6.38 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF8G + testutfnext \xF8G } 1 test utf-6.39 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF8\xA0 + testutfnext \xF8\xA0 } 1 test utf-6.40 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF8\xD0 + testutfnext \xF8\xD0 } 1 test utf-6.41 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF8\xE8 + testutfnext \xF8\xE8 } 1 test utf-6.42 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF8\xF2 + testutfnext \xF8\xF2 } 1 test utf-6.43 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF8\xF8 + testutfnext \xF8\xF8 } 1 test utf-6.44 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0\xA0G + testutfnext \xD0\xA0G } 2 test utf-6.45 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0\xA0\xA0 + testutfnext \xD0\xA0\xA0 } 2 test utf-6.46 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0\xA0\xD0 + testutfnext \xD0\xA0\xD0 } 2 test utf-6.47 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0\xA0\xE8 + testutfnext \xD0\xA0\xE8 } 2 test utf-6.48 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0\xA0\xF2 + testutfnext \xD0\xA0\xF2 } 2 test utf-6.49 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0\xA0\xF8 + testutfnext \xD0\xA0\xF8 } 2 test utf-6.50 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0G + testutfnext \xE8\xA0G } 1 test utf-6.51 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0\xA0 + testutfnext \xE8\xA0\xA0 } 3 test utf-6.52 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0\xD0 + testutfnext \xE8\xA0\xD0 } 1 test utf-6.53 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0\xE8 + testutfnext \xE8\xA0\xE8 } 1 test utf-6.54 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0\xF2 + testutfnext \xE8\xA0\xF2 } 1 test utf-6.55 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0\xF8 + testutfnext \xE8\xA0\xF8 } 1 test utf-6.56 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0G + testutfnext \xF2\xA0G } 1 test utf-6.57 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0\xA0 + testutfnext \xF2\xA0\xA0 } 1 test utf-6.58 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0\xD0 + testutfnext \xF2\xA0\xD0 } 1 test utf-6.59 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0\xE8 + testutfnext \xF2\xA0\xE8 } 1 test utf-6.60 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0\xF2 + testutfnext \xF2\xA0\xF2 } 1 test utf-6.61 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0\xF8 + testutfnext \xF2\xA0\xF8 } 1 test utf-6.62 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0\xA0G + testutfnext \xE8\xA0\xA0G } 3 test utf-6.63 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0\xA0\xA0 + testutfnext \xE8\xA0\xA0\xA0 } 3 test utf-6.64 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0\xA0\xD0 + testutfnext \xE8\xA0\xA0\xD0 } 3 test utf-6.65 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0\xA0\xE8 + testutfnext \xE8\xA0\xA0\xE8 } 3 test utf-6.66 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0\xA0\xF2 + testutfnext \xE8\xA0\xA0\xF2 } 3 test utf-6.67 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0\xA0\xF8 + testutfnext \xE8\xA0\xA0\xF8 } 3 test utf-6.68 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0\xA0G + testutfnext \xF2\xA0\xA0G } 1 test utf-6.69.0 {Tcl_UtfNext} {testutfnext ucs2} { - testutfnext -bytestring \xF2\xA0\xA0\xA0 + testutfnext \xF2\xA0\xA0\xA0 } 1 test utf-6.69.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext -bytestring \xF2\xA0\xA0\xA0 + testutfnext \xF2\xA0\xA0\xA0 } 4 test utf-6.70 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0\xA0\xD0 + testutfnext \xF2\xA0\xA0\xD0 } 1 test utf-6.71 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0\xA0\xE8 + testutfnext \xF2\xA0\xA0\xE8 } 1 test utf-6.71 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0\xA0\xF2 + testutfnext \xF2\xA0\xA0\xF2 } 1 test utf-6.73 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0\xA0\xF8 + testutfnext \xF2\xA0\xA0\xF8 } 1 test utf-6.74.0 {Tcl_UtfNext} {testutfnext ucs2} { - testutfnext -bytestring \xF2\xA0\xA0\xA0G + testutfnext \xF2\xA0\xA0\xA0G } 1 test utf-6.74.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext -bytestring \xF2\xA0\xA0\xA0G + testutfnext \xF2\xA0\xA0\xA0G } 4 test utf-6.75.0 {Tcl_UtfNext} {testutfnext ucs2} { - testutfnext -bytestring \xF2\xA0\xA0\xA0\xA0 + testutfnext \xF2\xA0\xA0\xA0\xA0 } 1 test utf-6.75.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext -bytestring \xF2\xA0\xA0\xA0\xA0 + testutfnext \xF2\xA0\xA0\xA0\xA0 } 4 test utf-6.76.0 {Tcl_UtfNext} {testutfnext ucs2} { - testutfnext -bytestring \xF2\xA0\xA0\xA0\xD0 + testutfnext \xF2\xA0\xA0\xA0\xD0 } 1 test utf-6.76.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext -bytestring \xF2\xA0\xA0\xA0\xD0 + testutfnext \xF2\xA0\xA0\xA0\xD0 } 4 test utf-6.77.0 {Tcl_UtfNext} {testutfnext ucs2} { - testutfnext -bytestring \xF2\xA0\xA0\xA0\xE8 + testutfnext \xF2\xA0\xA0\xA0\xE8 } 1 test utf-6.77.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext -bytestring \xF2\xA0\xA0\xA0\xE8 + testutfnext \xF2\xA0\xA0\xA0\xE8 } 4 test utf-6.78.0 {Tcl_UtfNext} {testutfnext ucs2} { - testutfnext -bytestring \xF2\xA0\xA0\xA0\xF2 + testutfnext \xF2\xA0\xA0\xA0\xF2 } 1 test utf-6.78.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext -bytestring \xF2\xA0\xA0\xA0\xF2 + testutfnext \xF2\xA0\xA0\xA0\xF2 } 4 test utf-6.79.0 {Tcl_UtfNext} {testutfnext ucs2} { - testutfnext -bytestring \xF2\xA0\xA0\xA0G\xF8 + testutfnext \xF2\xA0\xA0\xA0G\xF8 } 1 test utf-6.79.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext -bytestring \xF2\xA0\xA0\xA0G\xF8 + testutfnext \xF2\xA0\xA0\xA0G\xF8 } 4 test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext -bytestring \xC0\x80 + testutfnext \xC0\x80 } 2 test utf-6.81 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext -bytestring \xC0\x81 + testutfnext \xC0\x81 } 1 test utf-6.82 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext -bytestring \xC1\x80 + testutfnext \xC1\x80 } 1 test utf-6.83 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext -bytestring \xC2\x80 + testutfnext \xC2\x80 } 2 test utf-6.84 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext -bytestring \xE0\x80\x80 + testutfnext \xE0\x80\x80 } 1 test utf-6.85 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext -bytestring \xE0\xA0\x80 + testutfnext \xE0\xA0\x80 } 3 test utf-6.86 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext -bytestring \xF0\x80\x80\x80 + testutfnext \xF0\x80\x80\x80 } 1 test utf-6.87.0 {Tcl_UtfNext - overlong sequences} {testutfnext ucs2} { - testutfnext -bytestring \xF0\x90\x80\x80 + testutfnext \xF0\x90\x80\x80 } 1 test utf-6.87.1 {Tcl_UtfNext - overlong sequences} {testutfnext fullutf} { - testutfnext -bytestring \xF0\x90\x80\x80 + testutfnext \xF0\x90\x80\x80 } 4 test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} { - testutfnext -bytestring \xA0\xA0 + testutfnext \xA0\xA0 } 1 test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} { - testutfnext -bytestring \x80\x80 + testutfnext \x80\x80 } 1 test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext ucs2} { - testutfnext -bytestring \xF4\x8F\xBF\xBF + testutfnext \xF4\x8F\xBF\xBF } 1 test utf-6.90.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext fullutf} { - testutfnext -bytestring \xF4\x8F\xBF\xBF + testutfnext \xF4\x8F\xBF\xBF } 4 test utf-6.91 {Tcl_UtfNext, validity check [493dccc2de]} testutfnext { - testutfnext -bytestring \xF4\x90\x80\x80 + testutfnext \xF4\x90\x80\x80 } 1 test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} testutfnext { - testutfnext -bytestring \xA0\xA0\xA0 + testutfnext \xA0\xA0\xA0 } 1 test utf-6.93 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} testutfnext { - testutfnext -bytestring \x80\x80\x80 + testutfnext \x80\x80\x80 } 1 test utf-7.1 {Tcl_UtfPrev} testutfprev { -- cgit v0.12 From 7f1a39781fafdf8fc0e10176a7fed4e0713a8866 Mon Sep 17 00:00:00 2001 From: dgp Date: Wed, 22 Apr 2020 21:28:17 +0000 Subject: Add optional second argument to [testutfnext] that can limit how many bytes are permitted to be read. Needed to test protection of buffer overruns. --- generic/tclTest.c | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/generic/tclTest.c b/generic/tclTest.c index 65599be..76a827a 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -7123,27 +7123,54 @@ TestUtfNextCmd( int objc, Tcl_Obj *const objv[]) { - int numBytes; + int numBytes; /* Number of bytes supplied in the test string */ + int offset; /* Number of bytes we are permitted to read */ char *bytes; const char *result, *first; char buffer[32]; static const char tobetested[] = "\xFF\xFE\xF4\xF2\xF0\xEF\xE8\xE3\xE2\xE1\xE0\xC2\xC1\xC0\x82"; const char *p = tobetested; - if (objc != 2) { - Tcl_WrongNumArgs(interp, 1, objv, "bytes"); + if (objc < 2 || objc > 3) { + Tcl_WrongNumArgs(interp, 1, objv, "bytes ?numBytes?"); return TCL_ERROR; } + bytes = (char *) Tcl_GetByteArrayFromObj(objv[1], &numBytes); + offset = numBytes + TCL_UTF_MAX; /* If no constraint is given, allow + * the terminating NUL to limit + * operations. */ + + if (objc == 3) { + if (TCL_OK != TclGetIntForIndex(interp, objv[2], numBytes, &offset)) { + return TCL_ERROR; + } + if (offset < 0) { + offset = 0; + } + if (offset > numBytes + TCL_UTF_MAX) { + offset = numBytes + TCL_UTF_MAX; + } + } + if (numBytes > sizeof(buffer)-2) { - Tcl_AppendResult(interp, "\"testutfnext\" can only handle 30 bytes", NULL); + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "\"testutfnext\" can only handle %d bytes", + sizeof(buffer) - 2)); return TCL_ERROR; } memcpy(buffer + 1, bytes, numBytes); buffer[0] = buffer[numBytes + 1] = '\x00'; + if (!Tcl_UtfCharComplete(buffer + 1, offset)) { + /* Cannot scan a complete sequence from the data */ + + Tcl_SetObjResult(interp, Tcl_NewIntObj(0)); + return TCL_OK; + } + first = Tcl_UtfNext(buffer + 1); while ((buffer[0] = *p++) != '\0') { /* Run Tcl_UtfNext with many more possible bytes at src[-1], all should give the same result */ -- cgit v0.12 From 05fe66d4ee8a8ae3847782fba4717855d1961ceb Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 22 Apr 2020 22:13:53 +0000 Subject: (cherry-pick): Update documentation of Tcl_UtfPrev/Tcl_UtfNext back to how it was. Will be updated later, when implementation is ready and agreed upon. --- doc/Utf.3 | 21 +++++------------ generic/tclUtf.c | 70 +++++++++++++++++++------------------------------------- 2 files changed, 30 insertions(+), 61 deletions(-) diff --git a/doc/Utf.3 b/doc/Utf.3 index 55ef80d..e9bfaa7 100644 --- a/doc/Utf.3 +++ b/doc/Utf.3 @@ -141,7 +141,7 @@ source buffer is long enough such that this routine does not run off the end and dereference non-existent or random memory; if the source buffer is known to be null-terminated, this will not happen. If the input is not in proper UTF-8 format, \fBTcl_UtfToUniChar\fR will store the first -byte of \fIsrc\fR in \fI*chPtr\fR as a Tcl_UniChar between 0x0000 and +byte of \fIsrc\fR in \fI*chPtr\fR as a Tcl_UniChar between 0x0080 and 0x00FF and return 1. .PP \fBTcl_UniCharToUtfDString\fR converts the given Unicode string @@ -217,20 +217,11 @@ returns a pointer to the last occurrence of the Tcl_UniChar \fIch\fR in the null-terminated UTF-8 string \fIsrc\fR. The null terminator is considered part of the UTF-8 string. .PP -\fBTcl_UtfNext\fR is used to step forward through a UTF-8 string. -If the UTF-8 string is made up entirely of complete, well-formed, and -valid character byte sequences, and \fIsrc\fR points to the lead byte -of one of those sequences, then repeated calls of \fBTcl_UtfNext\fR will -return pointers to the lead bytes of each character in the string, one -character at a time. In any other circumstance, \fBTcl_UtfNext\fR -returns \fIsrc\fR+1. \fBTcl_UtfNext\fR will always read \fIsrc[0]\fR -and may read as many following bytes (up to a total of \fBTCL_UTF_MAX\fR) -as needed to find the end of the byte sequence. If the string is -\fBNUL\fR-terminated, \fBTcl_UtfNext\fR will not read beyond the terminating -\fBNUL\fR byte. If not, the caller must use the companion routine -\fBTcl_UtfCharComplete\fR to determine whether there is any risk -\fBTcl_UtfNext\fR might read beyond the readable memory occupied -by the string. +Given \fIsrc\fR, a pointer to some location in a UTF-8 string, +\fBTcl_UtfNext\fR returns a pointer to the next UTF-8 character in the +string. The caller must not ask for the next character after the last +character in the string if the string is not terminated by a null +character. .PP \fBTcl_UtfPrev\fR is used to step backward through but not beyond the UTF-8 string that begins at \fIstart\fR. If the UTF-8 string is made diff --git a/generic/tclUtf.c b/generic/tclUtf.c index a5e4fd4..e7048ee 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -717,35 +717,13 @@ Tcl_UtfFindLast( * * Tcl_UtfNext -- * - * The aim of this routine is to provide a way to iterate forward - * through a UTF-8 string. The caller is expected to pass a non-NULL - * pointer argument /src/ which points to a location within a string. - * (*src) will be read, so /src/ must not point to an unreadable - * location past the end of the string. If /src/ points to the - * beginning of a complete, well-formed and valid UTF_8 byte sequence - * of no more than TCL_UTF_MAX bytes, Tcl_UtfNext returns the pointer - * just past the end of that sequence. In any other circumstance, - * Tcl_UtfNext returns /src/+1. - * - * Because this routine always returns a value > /src/, it is useful - * as a forward iterator that will always make progress. If the string - * is NUL-terminated, Tcl_UtfNext will not read beyond the terminating - * NUL character. If it is not NUL-terminated, the caller must make - * use of the companion routine Tcl_UtfCharComplete to test whether - * there is risk that Tcl_UtfNext will read beyond the end of the string. - * Tcl_UtfNext will never read more than TCL_UTF_MAX bytes. - * - * In a string where all characters are complete and properly formed, - * and /src/ points to the first byte of a character, repeated - * Tcl_UtfNext calls will step to the starting bytes of characters, one - * character at a time. Within those limitations, Tcl_UtfPrev and - * Tcl_UtfNext are inverses. If either condition cannot be met, - * Tcl_UtfPrev and Tcl_UtfNext may not function as inverses and the - * caller will have to take greater care. + * Given a pointer to some current location in a UTF-8 string, move + * forward one character. The caller must ensure that they are not asking + * for the next character after the last character in the string. * * Results: - * A pointer to the start of the next character in the string (or to - * the end of the string) as described above. + * The return value is the pointer to the next character in the UTF-8 + * string. * * Side effects: * None. @@ -786,37 +764,37 @@ Tcl_UtfNext( * * The aim of this routine is to provide a way to move backward * through a UTF-8 string. The caller is expected to pass non-NULL - * pointer arguments /start/ and /src/. /start/ points to the beginning - * of a string, and /src/ (>= /start/) points to a location within (or - * just past the end) of the string. This routine always returns a - * pointer within the string (>= /start/). When (/src/ == /start/), - * it returns /start/. When (/src/ > /start/), it returns a pointer - * (< /src/) and (>= /src/ - TCL_UTF_MAX). Subject to these constraints, - * the routine returns a pointer to the earliest byte in the string that - * starts a character when characters are read starting at /start/ and + * pointer arguments start and src. start points to the beginning + * of a string, and src >= start points to a location within (or just + * past the end) of the string. This routine always returns a + * pointer within the string (>= start). When (src == start), it + * returns start. When (src > start), it returns a pointer (< src) + * and (>= src - TCL_UTF_MAX). Subject to these constraints, the + * routine returns a pointer to the earliest byte in the string that + * starts a character when characters are read starting at start and * that character might include the byte src[-1]. The routine will * examine only those bytes in the range that might be returned. - * It will not examine the byte (*src), and because of that cannot + * It will not examine the byte *src, and because of that cannot * determine for certain in all circumstances whether the character * that begins with the returned pointer will or will not include - * the byte src[-1]. In the scenario where /src/ points to the end of - * a buffer being filled, the returned pointer points to either the + * the byte src[-1]. In the scenario, where src points to the end of + * a buffer being filled, the returned pointer point to either the * final complete character in the string or to the earliest byte * that might start an incomplete character waiting for more bytes to * complete. * - * Because this routine always returns a value < /src/ until the point - * it is forced to return /start/, it is useful as a backward iterator + * Because this routine always returns a value < src until the point + * it is forced to return start, it is useful as a backward iterator * through a string that will always make progress and always be * prevented from running past the beginning of the string. * * In a string where all characters are complete and properly formed, - * and /src/ points to the first byte of a character, repeated - * Tcl_UtfPrev calls will step to the starting bytes of characters, one - * character at a time. Within those limitations, Tcl_UtfPrev and - * Tcl_UtfNext are inverses. If either condition cannot be met, - * Tcl_UtfPrev and Tcl_UtfNext may not function as inverses and the - * caller will have to take greater care. + * and the value of src points to the first byte of a character, + * repeated Tcl_UtfPrev calls will step to the starting bytes of + * characters, one character at a time. Within those limitations, + * Tcl_UtfPrev and Tcl_UtfNext are inverses. If either condition cannot + * be met, Tcl_UtfPrev and Tcl_UtfNext may not function as inverses and + * the caller will have to take greater care. * * Results: * A pointer to the start of a character in the string as described -- cgit v0.12 From 3b1c00722a2401b6c4a6f80044364f6365da9749 Mon Sep 17 00:00:00 2001 From: dgp Date: Wed, 22 Apr 2020 22:18:53 +0000 Subject: Collection of tests checking read limit protections calling Tcl_UtfNext. --- generic/tclTest.c | 8 ++-- tests/utf.test | 117 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 121 insertions(+), 4 deletions(-) diff --git a/generic/tclTest.c b/generic/tclTest.c index 76a827a..d00c852 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -7138,7 +7138,7 @@ TestUtfNextCmd( bytes = (char *) Tcl_GetByteArrayFromObj(objv[1], &numBytes); - offset = numBytes + TCL_UTF_MAX; /* If no constraint is given, allow + offset = numBytes +TCL_UTF_MAX -1; /* If no constraint is given, allow * the terminating NUL to limit * operations. */ @@ -7149,12 +7149,12 @@ TestUtfNextCmd( if (offset < 0) { offset = 0; } - if (offset > numBytes + TCL_UTF_MAX) { - offset = numBytes + TCL_UTF_MAX; + if (offset > numBytes +TCL_UTF_MAX -1) { + offset = numBytes +TCL_UTF_MAX -1; } } - if (numBytes > sizeof(buffer)-2) { + if (numBytes > sizeof(buffer) - 2) { Tcl_SetObjResult(interp, Tcl_ObjPrintf( "\"testutfnext\" can only handle %d bytes", sizeof(buffer) - 2)); diff --git a/tests/utf.test b/tests/utf.test index beba98c..5058cbf 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -448,6 +448,123 @@ test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} tes test utf-6.93 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} testutfnext { testutfnext \x80\x80\x80 } 1 +test utf-6.94 {Tcl_UtfNext, read limits} testutfnext { + testutfnext G 0 +} 0 +test utf-6.95 {Tcl_UtfNext, read limits} testutfnext { + testutfnext \xA0 0 +} 0 +test utf-6.96 {Tcl_UtfNext, read limits} testutfnext { + testutfnext AG 1 +} 1 +test utf-6.97 {Tcl_UtfNext, read limits} testutfnext { + testutfnext A\xA0 1 +} 1 +test utf-6.98 {Tcl_UtfNext, read limits} testutfnext { + testutfnext \xD0\xA0G 1 +} 0 +test utf-6.99 {Tcl_UtfNext, read limits} testutfnext { + testutfnext \xD0\xA0G 2 +} 2 +test utf-6.100 {Tcl_UtfNext, read limits} testutfnext { + testutfnext \xD0\xA0\xA0 1 +} 0 +test utf-6.101 {Tcl_UtfNext, read limits} testutfnext { + testutfnext \xD0\xA0\xA0 2 +} 2 +test utf-6.102 {Tcl_UtfNext, read limits} testutfnext { + testutfnext \xE8\xA0\xA0G 1 +} 0 +test utf-6.103 {Tcl_UtfNext, read limits} testutfnext { + testutfnext \xE8\xA0\xA0G 2 +} 0 +test utf-6.104 {Tcl_UtfNext, read limits} testutfnext { + testutfnext \xE8\xA0\xA0G 3 +} 3 +test utf-6.105 {Tcl_UtfNext, read limits} testutfnext { + testutfnext \xE8\xA0\xA0\xA0 1 +} 0 +test utf-6.106 {Tcl_UtfNext, read limits} testutfnext { + testutfnext \xE8\xA0\xA0\xA0 2 +} 0 +test utf-6.107 {Tcl_UtfNext, read limits} testutfnext { + testutfnext \xE8\xA0\xA0\xA0 3 +} 3 +test utf-6.108.0 {Tcl_UtfNext, read limits} {testutfnext ucs2} { + testutfnext \xF2\xA0\xA0\xA0G 1 +} 1 +test utf-6.108.1 {Tcl_UtfNext, read limits} {testutfnext fullutf} { + testutfnext \xF2\xA0\xA0\xA0G 1 +} 0 +test utf-6.109.0 {Tcl_UtfNext, read limits} {testutfnext ucs2} { + testutfnext \xF2\xA0\xA0\xA0G 2 +} 1 +test utf-6.109.1 {Tcl_UtfNext, read limits} {testutfnext fullutf} { + testutfnext \xF2\xA0\xA0\xA0G 2 +} 0 +test utf-6.110.0 {Tcl_UtfNext, read limits} {testutfnext ucs2} { + testutfnext \xF2\xA0\xA0\xA0G 3 +} 1 +test utf-6.110.1 {Tcl_UtfNext, read limits} {testutfnext fullutf} { + testutfnext \xF2\xA0\xA0\xA0G 3 +} 0 +test utf-6.111.0 {Tcl_UtfNext, read limits} {testutfnext ucs2} { + testutfnext \xF2\xA0\xA0\xA0G 4 +} 1 +test utf-6.111.1 {Tcl_UtfNext, read limits} {testutfnext fullutf} { + testutfnext \xF2\xA0\xA0\xA0G 4 +} 4 +test utf-6.112.0 {Tcl_UtfNext, read limits} {testutfnext ucs2} { + testutfnext \xF2\xA0\xA0\xA0\xA0 1 +} 1 +test utf-6.112.1 {Tcl_UtfNext, read limits} {testutfnext fullutf} { + testutfnext \xF2\xA0\xA0\xA0\xA0 1 +} 0 +test utf-6.113.0 {Tcl_UtfNext, read limits} {testutfnext ucs2} { + testutfnext \xF2\xA0\xA0\xA0\xA0 2 +} 1 +test utf-6.113.1 {Tcl_UtfNext, read limits} {testutfnext fullutf} { + testutfnext \xF2\xA0\xA0\xA0\xA0 2 +} 0 +test utf-6.114.0 {Tcl_UtfNext, read limits} {testutfnext ucs2} { + testutfnext \xF2\xA0\xA0\xA0\xA0 3 +} 1 +test utf-6.114.1 {Tcl_UtfNext, read limits} {testutfnext fullutf} { + testutfnext \xF2\xA0\xA0\xA0\xA0 3 +} 0 +test utf-6.115.0 {Tcl_UtfNext, read limits} {testutfnext ucs2} { + testutfnext \xF2\xA0\xA0\xA0\xA0 4 +} 1 +test utf-6.115.1 {Tcl_UtfNext, read limits} {testutfnext fullutf} { + testutfnext \xF2\xA0\xA0\xA0\xA0 4 +} 4 +test utf-6.116 {Tcl_UtfNext, read limits} testutfnext { + testutfnext \xA0G 0 +} 0 +test utf-6.117 {Tcl_UtfNext, read limits} testutfnext { + testutfnext \xA0G 1 +} 1 +test utf-6.118 {Tcl_UtfNext, read limits} testutfnext { + testutfnext \xA0\xA0 1 +} 1 +test utf-6.119 {Tcl_UtfNext, read limits} testutfnext { + testutfnext \xA0\xA0G 2 +} 1 +test utf-6.120 {Tcl_UtfNext, read limits} testutfnext { + testutfnext \xA0\xA0\xA0 2 +} 1 +test utf-6.121 {Tcl_UtfNext, read limits} testutfnext { + testutfnext \xA0\xA0\xA0G 3 +} 1 +test utf-6.122 {Tcl_UtfNext, read limits} testutfnext { + testutfnext \xA0\xA0\xA0\xA0 3 +} 1 +test utf-6.121 {Tcl_UtfNext, read limits} testutfnext { + testutfnext \xA0\xA0\xA0\xA0G 4 +} 1 +test utf-6.122 {Tcl_UtfNext, read limits} testutfnext { + testutfnext \xA0\xA0\xA0\xA0\xA0 4 +} 1 test utf-7.1 {Tcl_UtfPrev} testutfprev { testutfprev {} -- cgit v0.12 From 6852f6d630fb0cbb7521dc006203efb9697bc815 Mon Sep 17 00:00:00 2001 From: dgp Date: Wed, 22 Apr 2020 22:31:39 +0000 Subject: test number reuse --- tests/utf.test | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/utf.test b/tests/utf.test index 5058cbf..2c8a001 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -358,7 +358,7 @@ test utf-6.70 {Tcl_UtfNext} testutfnext { test utf-6.71 {Tcl_UtfNext} testutfnext { testutfnext \xF2\xA0\xA0\xE8 } 1 -test utf-6.71 {Tcl_UtfNext} testutfnext { +test utf-6.72 {Tcl_UtfNext} testutfnext { testutfnext \xF2\xA0\xA0\xF2 } 1 test utf-6.73 {Tcl_UtfNext} testutfnext { @@ -559,10 +559,10 @@ test utf-6.121 {Tcl_UtfNext, read limits} testutfnext { test utf-6.122 {Tcl_UtfNext, read limits} testutfnext { testutfnext \xA0\xA0\xA0\xA0 3 } 1 -test utf-6.121 {Tcl_UtfNext, read limits} testutfnext { +test utf-6.123 {Tcl_UtfNext, read limits} testutfnext { testutfnext \xA0\xA0\xA0\xA0G 4 } 1 -test utf-6.122 {Tcl_UtfNext, read limits} testutfnext { +test utf-6.124 {Tcl_UtfNext, read limits} testutfnext { testutfnext \xA0\xA0\xA0\xA0\xA0 4 } 1 -- cgit v0.12 From d016b9e8e12b19aeb60ac6654100f0a3cfdb4b05 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 23 Apr 2020 12:19:42 +0000 Subject: Testcase cleanup --- tests/utf.test | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/utf.test b/tests/utf.test index 3af70c4..cb650f4 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -16,9 +16,9 @@ if {[lsearch [namespace children] ::tcltest] == -1} { ::tcltest::loadTestedCommands catch [list package require -exact Tcltest [info patchlevel]] -testConstraint ucs2 [expr {[format %c 0x010000] == "\uFFFD"}] -testConstraint fullutf [expr {[format %c 0x010000] != "\uFFFD"}] -testConstraint tip389 [expr {[string length \U010000] == 2}] +testConstraint ucs2 [expr {[format %c 0x010000] eq "\uFFFD"}] +testConstraint fullutf [expr {[format %c 0x010000] ne "\uFFFD"}] +testConstraint tip389 [expr {[string length \U010000] eq 2}] testConstraint testbytestring [llength [info commands testbytestring]] testConstraint testfindfirst [llength [info commands testfindfirst]] @@ -377,7 +377,7 @@ test utf-6.70 {Tcl_UtfNext} testutfnext { test utf-6.71 {Tcl_UtfNext} testutfnext { testutfnext -bytestring \xF2\xA0\xA0\xE8 } 1 -test utf-6.71 {Tcl_UtfNext} testutfnext { +test utf-6.72 {Tcl_UtfNext} testutfnext { testutfnext -bytestring \xF2\xA0\xA0\xF2 } 1 test utf-6.73 {Tcl_UtfNext} testutfnext { @@ -1064,7 +1064,7 @@ test utf-20.1 {TclUniCharNcmp} { test utf-21.1 {TclUniCharIsAlnum} { # this returns 1 with Unicode 7 compliance string is alnum \u1040\u021F\u0220 -} {1} +} 1 test utf-21.2 {unicode alnum char in regc_locale.c} { # this returns 1 with Unicode 7 compliance list [regexp {^[[:alnum:]]+$} \u1040\u021F\u0220] [regexp {^\w+$} \u1040\u021F\u0220_\u203F\u2040\u2054\uFE33\uFE34\uFE4D\uFE4E\uFE4F\uFF3F] -- cgit v0.12 From b48319b304980c06ca5dd093770f8234eb8dbec5 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 23 Apr 2020 12:34:38 +0000 Subject: Testcase cleanup --- tests/utf.test | 99 +++++++++++++++++++++++++++++----------------------------- 1 file changed, 50 insertions(+), 49 deletions(-) diff --git a/tests/utf.test b/tests/utf.test index a64ce7a..79f866a 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -16,8 +16,9 @@ if {[lsearch [namespace children] ::tcltest] == -1} { ::tcltest::loadTestedCommands catch [list package require -exact Tcltest [info patchlevel]] +testConstraint tip389 [expr {[string length \U010000] eq 2}] + testConstraint testbytestring [llength [info commands testbytestring]] -testConstraint tip389 [expr {[string length \U010000] == 2}] testConstraint testfindfirst [llength [info commands testfindfirst]] testConstraint testfindlast [llength [info commands testfindlast]] testConstraint testnumutfchars [llength [info commands testnumutfchars]] @@ -45,9 +46,9 @@ test utf-1.5 {Tcl_UniCharToUtf: overflowed Tcl_UniChar} testbytestring { test utf-1.6 {Tcl_UniCharToUtf: negative Tcl_UniChar} testbytestring { expr {[format %c -1] eq [testbytestring "\xEF\xBF\xBD"]} } 1 -test utf-1.7 {Tcl_UniCharToUtf: 4 byte sequences} -constraints testbytestring -body { +test utf-1.7 {Tcl_UniCharToUtf: 4 byte sequences} testbytestring { expr {"\U014E4E" eq [testbytestring "\xF0\x94\xB9\x8E"]} -} -result 1 +} 1 test utf-1.8 {Tcl_UniCharToUtf: 3 byte sequence, high surrogate} testbytestring { expr {"\uD842" eq [testbytestring "\xED\xA1\x82"]} } 1 @@ -69,89 +70,89 @@ test utf-1.13 {Tcl_UniCharToUtf: Invalid surrogate} testbytestring { test utf-2.1 {Tcl_UtfToUniChar: low ascii} { string length "abc" -} {3} +} 3 test utf-2.2 {Tcl_UtfToUniChar: naked trail bytes} testbytestring { string length [testbytestring "\x82\x83\x84"] -} {3} +} 3 test utf-2.3 {Tcl_UtfToUniChar: lead (2-byte) followed by non-trail} testbytestring { string length [testbytestring "\xC2"] -} {1} +} 1 test utf-2.4 {Tcl_UtfToUniChar: lead (2-byte) followed by trail} testbytestring { string length [testbytestring "\xC2\xA2"] -} {1} +} 1 test utf-2.5 {Tcl_UtfToUniChar: lead (3-byte) followed by non-trail} testbytestring { string length [testbytestring "\xE2"] -} {1} +} 1 test utf-2.6 {Tcl_UtfToUniChar: lead (3-byte) followed by 1 trail} testbytestring { string length [testbytestring "\xE2\xA2"] -} {2} +} 2 test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} testbytestring { string length [testbytestring "\xE4\xB9\x8E"] -} {1} -test utf-2.8 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {tip389 testbytestring} -body { +} 1 +test utf-2.8 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {tip389 testbytestring} { string length [testbytestring "\xF0\x90\x80\x80"] -} -result {2} -test utf-2.9 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {tip389 testbytestring} -body { +} 2 +test utf-2.9 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {tip389 testbytestring} { string length [testbytestring "\xF4\x8F\xBF\xBF"] -} -result {2} +} 2 test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} testbytestring { string length [testbytestring "\xF0\x8F\xBF\xBF"] -} {4} +} 4 test utf-2.11 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, overflow} testbytestring { # Would decode to U+110000 but that is outside the Unicode range. string length [testbytestring "\xF4\x90\x80\x80"] -} {4} +} 4 test utf-2.12 {Tcl_UtfToUniChar: longer UTF sequences not supported} testbytestring { string length [testbytestring "\xF8\xA2\xA2\xA2\xA2"] -} {5} +} 5 test utf-3.1 {Tcl_UtfCharComplete} { } {} test utf-4.1 {Tcl_NumUtfChars: zero length} testnumutfchars { testnumutfchars "" -} {0} +} 0 test utf-4.2 {Tcl_NumUtfChars: length 1} {testnumutfchars testbytestring} { testnumutfchars [testbytestring "\xC2\xA2"] -} {1} +} 1 test utf-4.3 {Tcl_NumUtfChars: long string} {testnumutfchars testbytestring} { testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\xA2\x4E"] -} {7} +} 7 test utf-4.4 {Tcl_NumUtfChars: #u0000} {testnumutfchars testbytestring} { testnumutfchars [testbytestring "\xC0\x80"] -} {1} +} 1 test utf-4.5 {Tcl_NumUtfChars: zero length, calc len} testnumutfchars { testnumutfchars "" 0 -} {0} +} 0 test utf-4.6 {Tcl_NumUtfChars: length 1, calc len} {testnumutfchars testbytestring} { testnumutfchars [testbytestring "\xC2\xA2"] end -} {1} +} 1 test utf-4.7 {Tcl_NumUtfChars: long string, calc len} {testnumutfchars testbytestring} { testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\uA2\x4E"] end -} {7} +} 7 test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} {testnumutfchars testbytestring} { testnumutfchars [testbytestring "\xC0\x80"] end -} {1} +} 1 # Bug [2738427]: Tcl_NumUtfChars(...) no overflow check test utf-4.9 {Tcl_NumUtfChars: #u20AC, calc len, incomplete} {testnumutfchars testbytestring} { testnumutfchars [testbytestring "\xE2\x82\xAC"] end-1 -} {2} +} 2 test utf-4.10 {Tcl_NumUtfChars: #u0000, calc len, overcomplete} {testnumutfchars testbytestring} { testnumutfchars [testbytestring "\x00"] end+1 -} {2} +} 2 test utf-4.11 {Tcl_NumUtfChars: 3 bytes of 4-byte UTF-8 characater} {testnumutfchars testbytestring} { testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end-1 -} {3} +} 3 test utf-4.12 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring tip389} { testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end -} {2} +} 2 test utf-5.1 {Tcl_UtfFindFirst} {testfindfirst testbytestring} { testfindfirst [testbytestring "abcbc"] 98 -} {bcbc} +} bcbc test utf-5.2 {Tcl_UtfFindLast} {testfindlast testbytestring} { testfindlast [testbytestring "abcbc"] 98 -} {bc} +} bc test utf-6.1 {Tcl_UtfNext} testutfnext { # This takes the pointer one past the terminating NUL. @@ -368,7 +369,7 @@ test utf-6.70 {Tcl_UtfNext} testutfnext { test utf-6.71 {Tcl_UtfNext} testutfnext { testutfnext -bytestring \xF2\xA0\xA0\xE8 } 1 -test utf-6.71 {Tcl_UtfNext} testutfnext { +test utf-6.72 {Tcl_UtfNext} testutfnext { testutfnext -bytestring \xF2\xA0\xA0\xF2 } 1 test utf-6.73 {Tcl_UtfNext} testutfnext { @@ -708,13 +709,13 @@ test utf-7.49.3 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { test utf-8.1 {Tcl_UniCharAtIndex: index = 0} { string index abcd 0 -} {a} +} a test utf-8.2 {Tcl_UniCharAtIndex: index = 0} { string index \u4E4E\u25A 0 } "\u4E4E" test utf-8.3 {Tcl_UniCharAtIndex: index > 0} { string index abcd 2 -} {c} +} c test utf-8.4 {Tcl_UniCharAtIndex: index > 0} { string index \u4E4E\u25A\xFF\u543 2 } "\uFF" @@ -733,7 +734,7 @@ test utf-8.8 {Tcl_UniCharAtIndex: Emoji} { test utf-9.1 {Tcl_UtfAtIndex: index = 0} { string range abcd 0 2 -} {abc} +} abc test utf-9.2 {Tcl_UtfAtIndex: index > 0} { string range \u4E4E\u25A\xFF\u543klmnop 1 5 } "\u25A\xFF\u543kl" @@ -954,7 +955,7 @@ test utf-20.1 {TclUniCharNcmp} { test utf-21.1 {TclUniCharIsAlnum} { # this returns 1 with Unicode 7 compliance string is alnum \u1040\u021F\u0220 -} {1} +} 1 test utf-21.2 {unicode alnum char in regc_locale.c} { # this returns 1 with Unicode 7 compliance list [regexp {^[[:alnum:]]+$} \u1040\u021F\u0220] [regexp {^\w+$} \u1040\u021F\u0220_\u203F\u2040\u2054\uFE33\uFE34\uFE4D\uFE4E\uFE4F\uFF3F] @@ -966,39 +967,39 @@ test utf-21.3 {unicode print char in regc_locale.c} { test utf-21.4 {TclUniCharIsGraph} { # [Bug 3464428] string is graph \u0120 -} {1} +} 1 test utf-21.5 {unicode graph char in regc_locale.c} { # [Bug 3464428] regexp {^[[:graph:]]+$} \u0120 -} {1} +} 1 test utf-21.6 {TclUniCharIsGraph} { # [Bug 3464428] string is graph \xA0 -} {0} +} 0 test utf-21.7 {unicode graph char in regc_locale.c} { # [Bug 3464428] regexp {[[:graph:]]} \x20\xA0\u2028\u2029 -} {0} +} 0 test utf-21.8 {TclUniCharIsPrint} { # [Bug 3464428] string is print \x09 -} {0} +} 0 test utf-21.9 {unicode print char in regc_locale.c} { # [Bug 3464428] regexp {[[:print:]]} \x09 -} {0} +} 0 test utf-21.10 {unicode print char in regc_locale.c} { # [Bug 3464428] regexp {[[:print:]]} \x09 -} {0} +} 0 test utf-21.11 {TclUniCharIsControl} { # [Bug 3464428] string is control \x00\x1F\xAD\u0605\u061C\u180E\u2066\uFEFF -} {1} +} 1 test utf-21.12 {unicode control char in regc_locale.c} { # [Bug 3464428], [Bug a876646efe] regexp {^[[:cntrl:]]*$} \x00\x1F\xAD\u0605\u061C\u180E\u2066\uFEFF -} {1} +} 1 test utf-22.1 {TclUniCharIsWordChar} { string wordend "xyz123_bar fg" 0 @@ -1010,16 +1011,16 @@ test utf-22.2 {TclUniCharIsWordChar} { test utf-23.1 {TclUniCharIsAlpha} { # this returns 1 with Unicode 7 compliance string is alpha \u021F\u0220\u037F\u052F -} {1} +} 1 test utf-23.2 {unicode alpha char in regc_locale.c} { # this returns 1 with Unicode 7 compliance regexp {^[[:alpha:]]+$} \u021F\u0220\u037F\u052F -} {1} +} 1 test utf-24.1 {TclUniCharIsDigit} { # this returns 1 with Unicode 7 compliance string is digit \u1040\uABF0 -} {1} +} 1 test utf-24.2 {unicode digit char in regc_locale.c} { # this returns 1 with Unicode 7 compliance list [regexp {^[[:digit:]]+$} \u1040\uABF0] [regexp {^\d+$} \u1040\uABF0] @@ -1028,7 +1029,7 @@ test utf-24.2 {unicode digit char in regc_locale.c} { test utf-24.3 {TclUniCharIsSpace} { # this returns 1 with Unicode 7/TIP 413 compliance string is space \x85\u1680\u180E\u200B\u202F\u2060 -} {1} +} 1 test utf-24.4 {unicode space char in regc_locale.c} { # this returns 1 with Unicode 7/TIP 413 compliance list [regexp {^[[:space:]]+$} \x85\u1680\u180E\u200B\u202F\u2060] [regexp {^\s+$} \x85\u1680\u180E\u200B\u202F\u2060] -- cgit v0.12 From 2ca7ab9af0d59c9907dde3d844e1785d33df4812 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 23 Apr 2020 13:53:54 +0000 Subject: Fix TclUtfNext() macro. Use it in tclTest.c, so such a mistake can be detected next time. --- generic/tclInt.h | 2 +- generic/tclTest.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/generic/tclInt.h b/generic/tclInt.h index 11c9ec8..780ea30 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -4472,7 +4472,7 @@ MODULE_SCOPE void TclDbInitNewObj(Tcl_Obj *objPtr, const char *file, Tcl_UtfPrev(src, start)) #define TclUtfNext(src) \ - ((((unsigned char) *(src)) < 0xC0) ? src + 1 : Tcl_UtfNext(src)) + ((((unsigned char) *(src)) < 0x80) ? src + 1 : Tcl_UtfNext(src)) /* *---------------------------------------------------------------- diff --git a/generic/tclTest.c b/generic/tclTest.c index 539d188..856e9ea 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -6739,10 +6739,10 @@ TestUtfNextCmd( memcpy(buffer + 1, bytes, numBytes); buffer[0] = buffer[numBytes + 1] = '\x00'; - first = Tcl_UtfNext(buffer + 1); + first = TclUtfNext(buffer + 1); while ((buffer[0] = *p++) != '\0') { /* Run Tcl_UtfNext with many more possible bytes at src[-1], all should give the same result */ - result = Tcl_UtfNext(buffer + 1); + result = TclUtfNext(buffer + 1); if (first != result) { Tcl_AppendResult(interp, "Tcl_UtfNext is not supposed to read src[-1]", NULL); return TCL_ERROR; @@ -6795,7 +6795,7 @@ TestUtfPrevCmd( bytes = (char *) Tcl_SetByteArrayLength(copy, numBytes+1); bytes[numBytes] = '\0'; - result = Tcl_UtfPrev(bytes + offset, bytes); + result = TclUtfPrev(bytes + offset, bytes); Tcl_SetObjResult(interp, Tcl_NewIntObj(result - bytes)); Tcl_DecrRefCount(copy); -- cgit v0.12 From a9e7b242c6177d483c19da4f233e2792677300f7 Mon Sep 17 00:00:00 2001 From: sebres Date: Thu, 23 Apr 2020 14:52:10 +0000 Subject: documentation: descibes the empty list creation (with reserved space) where objv is NULL, like Tcl_NewListObj(n, NULL) --- doc/ListObj.3 | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/ListObj.3 b/doc/ListObj.3 index c0cc109..a4f2a09 100644 --- a/doc/ListObj.3 +++ b/doc/ListObj.3 @@ -138,7 +138,9 @@ create a new object or modify an existing object to hold the \fIobjc\fR elements of the array referenced by \fIobjv\fR where each element is a pointer to a Tcl object. If \fIobjc\fR is less than or equal to zero, -they return an empty object. +they return an empty object. If \fIobjv\fR is NULL, the resulting list +contains 0 elements, with reserved space in an internal representation +for \fIobjc\fR more elements (to avoid its reallocation later). The new object's string representation is left invalid. The two procedures increment the reference counts of the elements in \fIobjc\fR since the list object now refers to them. -- cgit v0.12 From 6eb9a20757397e7b55ba203a87ecb54afe7f563d Mon Sep 17 00:00:00 2001 From: dgp Date: Thu, 23 Apr 2020 19:04:57 +0000 Subject: Argument conditions for Invalid() call were not always satisfied. --- generic/tclUtf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 3741d70..550d528 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -660,7 +660,7 @@ Tcl_UtfNext( } next++; } - if (Invalid((unsigned char *)src)) { + if ((next == src + 1) || Invalid((unsigned char *)src)) { return src + 1; } return next; -- cgit v0.12 From e7186db8a96017cbfe8baf62cb3a23ce279c1bb0 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 23 Apr 2020 19:07:08 +0000 Subject: Fix regression in Tcl_NumUtfChars, caused by this commit: [6596c4af31e29b5d]. Expectations of failing tests was adapted later, that's why this was missed. Lesson: Tcl_UtfNext() is _not_ just an optimized replacement for Tcl_UtfToUniChar(). Sorry, but this change it just to dangerous! Tcl_UniCharAtIndex() and Tcl_UtfAtIndex() most likely have the same regression when fead with invalid byte-sequences, therefore reverted those too. HOLD ON! These regressions are equally the result of [5c322bbd51]. It takes both changes to cause the failing tests. We need to argue about which change was the wrong one. --- generic/tclUtf.c | 95 ++++++++++++++++---------------------------------------- tests/utf.test | 2 +- 2 files changed, 28 insertions(+), 69 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 0e11e0e..e095555 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -579,7 +579,7 @@ Tcl_NumUtfChars( int length) /* The length of the string in bytes, or -1 * for strlen(string). */ { - const char *next; + Tcl_UniChar ch = 0; register int i = 0; /* @@ -590,36 +590,22 @@ Tcl_NumUtfChars( */ if (length < 0) { - while ((*src != '\0') && (i < INT_MAX)) { - next = TclUtfNext(src); -#if TCL_UTF_MAX > 4 + while (*src != '\0') { + src += TclUtfToUniChar(src, &ch); i++; -#else - i += 1 + ((next - src) > 3); -#endif - src = next; } + if (i < 0) i = INT_MAX; /* Bug [2738427] */ } else { register const char *endPtr = src + length - TCL_UTF_MAX; while (src < endPtr) { - next = TclUtfNext(src); -#if TCL_UTF_MAX > 4 + src += TclUtfToUniChar(src, &ch); i++; -#else - i += 1 + ((next - src) > 3); -#endif - src = next; } endPtr += TCL_UTF_MAX; while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) { - next = TclUtfNext(src); -#if TCL_UTF_MAX > 4 + src += TclUtfToUniChar(src, &ch); i++; -#else - i += 1 + ((next - src) > 3); -#endif - src = next; } if (src < endPtr) { i += endPtr - src; @@ -762,43 +748,15 @@ Tcl_UtfNext( * * Tcl_UtfPrev -- * - * The aim of this routine is to provide a way to move backward - * through a UTF-8 string. The caller is expected to pass non-NULL - * pointer arguments start and src. start points to the beginning - * of a string, and src >= start points to a location within (or just - * past the end) of the string. This routine always returns a - * pointer within the string (>= start). When (src == start), it - * returns start. When (src > start), it returns a pointer (< src) - * and (>= src - TCL_UTF_MAX). Subject to these constraints, the - * routine returns a pointer to the earliest byte in the string that - * starts a character when characters are read starting at start and - * that character might include the byte src[-1]. The routine will - * examine only those bytes in the range that might be returned. - * It will not examine the byte *src, and because of that cannot - * determine for certain in all circumstances whether the character - * that begins with the returned pointer will or will not include - * the byte src[-1]. In the scenario, where src points to the end of - * a buffer being filled, the returned pointer points to either the - * final complete character in the string or to the earliest byte - * that might start an incomplete character waiting for more bytes to - * complete. - * - * Because this routine always returns a value < src until the point - * it is forced to return start, it is useful as a backward iterator - * through a string that will always make progress and always be - * prevented from running past the beginning of the string. - * - * In a string where all characters are complete and properly formed, - * and the value of src points to the first byte of a character, - * repeated Tcl_UtfPrev calls will step to the starting bytes of - * characters, one character at a time. Within those limitations, - * Tcl_UtfPrev and Tcl_UtfNext are inverses. If either condition cannot - * be met, Tcl_UtfPrev and Tcl_UtfNext may not function as inverses and - * the caller will have to take greater care. + * Given a pointer to some current location in a UTF-8 string, move + * backwards one character. This works correctly when the pointer is in + * the middle of a UTF-8 character. * * Results: - * A pointer to the start of a character in the string as described - * above. + * The return value is a pointer to the previous character in the UTF-8 + * string. If the current location was already at the beginning of the + * string, the return value will also be a pointer to the beginning of + * the string. * * Side effects: * None. @@ -927,7 +885,9 @@ Tcl_UniCharAtIndex( { Tcl_UniChar ch = 0; - TclUtfToUniChar(Tcl_UtfAtIndex(src, index), &ch); + while (index-- >= 0) { + src += TclUtfToUniChar(src, &ch); + } return ch; } @@ -953,20 +913,19 @@ Tcl_UtfAtIndex( register const char *src, /* The UTF-8 string. */ register int index) /* The position of the desired character. */ { - while (index-- > 0) { - const char *next = TclUtfNext(src); - -#if TCL_UTF_MAX <= 4 - /* - * 4-byte sequences generate two UCS-2 code units in the - * UTF-16 representation, so in the current indexing scheme - * we need to account for an extra index (total of two). - */ - index -= ((next - src) > 3); -#endif + Tcl_UniChar ch = 0; + int len = 0; - src = next; + while (index-- > 0) { + len = TclUtfToUniChar(src, &ch); + src += len; } +#if TCL_UTF_MAX == 4 + if ((ch >= 0xD800) && (len < 3)) { + /* Index points at character following high Surrogate */ + src += TclUtfToUniChar(src, &ch); + } +#endif return src; } diff --git a/tests/utf.test b/tests/utf.test index cb650f4..3f20ace 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -147,7 +147,7 @@ test utf-4.11 {Tcl_NumUtfChars: 3 bytes of 4-byte UTF-8 characater} {testnumutfc } 3 test utf-4.12.0 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring ucs2} { testnumutfchars [testbytestring \xF0\x9F\x92\xA9] 4 -} 4 +} 2 test utf-4.12.1 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring tip389} { testnumutfchars [testbytestring \xF0\x9F\x92\xA9] 4 } 2 -- cgit v0.12 From 7d743964557590063e80992e255c4d5eeaeb0349 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 23 Apr 2020 19:14:29 +0000 Subject: Revert change in ParseLexeme() too --- generic/tclCompExpr.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/generic/tclCompExpr.c b/generic/tclCompExpr.c index ed4e958..9c7ab8d 100644 --- a/generic/tclCompExpr.c +++ b/generic/tclCompExpr.c @@ -1885,6 +1885,7 @@ ParseLexeme( { const char *end; int scanned; + Tcl_UniChar ch = 0; Tcl_Obj *literal = NULL; unsigned char byte; @@ -2063,13 +2064,13 @@ ParseLexeme( if (!TclIsBareword(*start) || *start == '_') { if (Tcl_UtfCharComplete(start, numBytes)) { - scanned = TclUtfNext(start) - start; + scanned = TclUtfToUniChar(start, &ch); } else { char utfBytes[TCL_UTF_MAX]; memcpy(utfBytes, start, (size_t) numBytes); utfBytes[numBytes] = '\0'; - scanned = TclUtfNext(utfBytes) - utfBytes; + scanned = TclUtfToUniChar(utfBytes, &ch); } *lexemePtr = INVALID; Tcl_DecrRefCount(literal); -- cgit v0.12 From 4ca994daf1016eb4a36b7f9a7a16fc1f7df5b5a3 Mon Sep 17 00:00:00 2001 From: dgp Date: Thu, 23 Apr 2020 20:22:59 +0000 Subject: Demonstrate that the failing tests on the 8.6 branch tip can equally well be solved by backing out the recent changes associated with [27944a3661]. --- generic/tclUtf.c | 11 ++--------- tests/utf.test | 10 +++++----- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 0e11e0e..422c501 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -68,8 +68,8 @@ static const unsigned char totalBytes[256] = { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, #else /* Tcl_UtfCharComplete() might point to 2nd byte of valid 4-byte sequence */ - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, #endif 2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, @@ -733,13 +733,6 @@ Tcl_UtfNext( int left = totalBytes[UCHAR(*src)]; const char *next = src + 1; - if (((*src) & 0xC0) == 0x80) { - if ((((*++src) & 0xC0) == 0x80) && (((*++src) & 0xC0) == 0x80)) { - ++src; - } - return src; - } - while (--left) { if ((*next & 0xC0) != 0x80) { /* diff --git a/tests/utf.test b/tests/utf.test index cb650f4..84f3f38 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -193,7 +193,7 @@ test utf-6.10 {Tcl_UtfNext} testutfnext { } 1 test utf-6.11 {Tcl_UtfNext} testutfnext { testutfnext -bytestring \xA0\xA0 -} 2 +} 1 test utf-6.12 {Tcl_UtfNext} testutfnext { testutfnext -bytestring \xA0\xD0 } 1 @@ -448,10 +448,10 @@ test utf-6.87.1 {Tcl_UtfNext - overlong sequences} {testutfnext fullutf} { } 4 test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} testutfnext { testutfnext -bytestring \xA0\xA0 -} 2 +} 1 test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} testutfnext { testutfnext -bytestring \x80\x80 -} 2 +} 1 test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext ucs2} { testutfnext -bytestring \xF4\x8F\xBF\xBF } 1 @@ -466,10 +466,10 @@ test utf-6.91.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext fullutf} } 1 test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} testutfnext { testutfnext -bytestring \xA0\xA0\xA0 -} 3 +} 1 test utf-6.93 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} testutfnext { testutfnext -bytestring \x80\x80\x80 -} 3 +} 1 test utf-7.1 {Tcl_UtfPrev} testutfprev { testutfprev {} -- cgit v0.12 From 423df1f681b111dff8a5633b5aaa6e0049aaeddf Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 24 Apr 2020 12:26:09 +0000 Subject: Fix GCC warning in MemDebug mode: format not a string literal and no format arguments [-Wformat-security] --- generic/tclCkalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/tclCkalloc.c b/generic/tclCkalloc.c index 5263e82..b6616cd 100644 --- a/generic/tclCkalloc.c +++ b/generic/tclCkalloc.c @@ -187,7 +187,7 @@ TclDumpMemoryInfo(ClientData clientData, int flags) maximum_malloc_packets, (unsigned long)maximum_bytes_malloced); if (flags == 0) { - fprintf((FILE *)clientData, buf); + fprintf((FILE *)clientData, "%s", buf); } else { /* Assume objPtr to append to */ Tcl_AppendToObj((Tcl_Obj *) clientData, buf, -1); -- cgit v0.12 From 88661ac0e967d6c68f8ecdb76646b652214fe25b Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 24 Apr 2020 15:10:23 +0000 Subject: Add protections against overflow in Unicode values. Backported from 8.6. Also remove some out-of-date comments. --- generic/regc_lex.c | 22 ++++++-- generic/tclEncoding.c | 13 +---- generic/tclInt.h | 2 - generic/tclParse.c | 152 +++++++++++++++++++++++++++++--------------------- tests/encoding.test | 14 +++-- 5 files changed, 115 insertions(+), 88 deletions(-) diff --git a/generic/regc_lex.c b/generic/regc_lex.c index dab061a..f49c162 100644 --- a/generic/regc_lex.c +++ b/generic/regc_lex.c @@ -63,7 +63,7 @@ /* - lexstart - set up lexical stuff, scan leading options - ^ static VOID lexstart(struct vars *); + ^ static void lexstart(struct vars *); */ static void lexstart( @@ -89,7 +89,7 @@ lexstart( /* - prefixes - implement various special prefixes - ^ static VOID prefixes(struct vars *); + ^ static void prefixes(struct vars *); */ static void prefixes( @@ -207,7 +207,7 @@ prefixes( - lexnest - "call a subroutine", interpolating string at the lexical level * Note, this is not a very general facility. There are a number of * implicit assumptions about what sorts of strings can be subroutines. - ^ static VOID lexnest(struct vars *, const chr *, const chr *); + ^ static void lexnest(struct vars *, const chr *, const chr *); */ static void lexnest( @@ -288,7 +288,7 @@ static const chr brbackw[] = { /* \w within brackets */ /* - lexword - interpolate a bracket expression for word characters * Possibly ought to inquire whether there is a "word" character class. - ^ static VOID lexword(struct vars *); + ^ static void lexword(struct vars *); */ static void lexword( @@ -755,6 +755,7 @@ lexescape( struct vars *v) { chr c; + int i; static const chr alert[] = { CHR('a'), CHR('l'), CHR('e'), CHR('r'), CHR('t') }; @@ -831,17 +832,26 @@ lexescape( RETV(PLAIN, CHR('\t')); break; case CHR('u'): - c = lexdigits(v, 16, 4, 4); + c = (uchr) lexdigits(v, 16, 1, 4); if (ISERR()) { FAILW(REG_EESCAPE); } RETV(PLAIN, c); break; case CHR('U'): - c = lexdigits(v, 16, 8, 8); + i = lexdigits(v, 16, 8, 8); if (ISERR()) { FAILW(REG_EESCAPE); } +#if CHRBITS > 16 + if ((unsigned)i > 0x10FFFF) { + i = 0xFFFD; + } +#else + if ((unsigned)i & ~0xFFFF) { + i = 0xFFFD; + } +#endif RETV(PLAIN, c); break; case CHR('v'): diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index ac9f667..0824c4f 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2471,11 +2471,6 @@ UtfToUnicodeProc( break; } src += TclUtfToUniChar(src, &ch); - /* - * Need to handle this in a way that won't cause misalignment - * by casting dst to a Tcl_UniChar. [Bug 1122671] - * XXX: This hard-codes the assumed size of Tcl_UniChar as 2. - */ #if TCL_UTF_MAX > 3 if (ch & ~0xFFFF) { ch = 0xFFFD; @@ -2685,12 +2680,8 @@ TableFromUtfProc( len = TclUtfToUniChar(src, &ch); #if TCL_UTF_MAX > 3 - /* - * This prevents a crash condition. More evaluation is required for - * full support of int Tcl_UniChar. [Bug 1004065] - */ - - if (ch & 0xffff0000) { + /* Unicode chars > +U0FFFF cannot be represented in any table encoding */ + if (ch & ~0xFFFF) { word = 0; } else #endif diff --git a/generic/tclInt.h b/generic/tclInt.h index 3fa14e7..b6d6a88 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -2650,8 +2650,6 @@ MODULE_SCOPE int TclObjUnsetVar2(Tcl_Interp *interp, Tcl_Obj *part1Ptr, Tcl_Obj *part2Ptr, int flags); MODULE_SCOPE int TclParseBackslash(const char *src, int numBytes, int *readPtr, char *dst); -MODULE_SCOPE int TclParseHex(const char *src, int numBytes, - Tcl_UniChar *resultPtr); MODULE_SCOPE int TclParseNumber(Tcl_Interp *interp, Tcl_Obj *objPtr, const char *expected, const char *bytes, int numBytes, const char **endPtrPtr, int flags); diff --git a/generic/tclParse.c b/generic/tclParse.c index 7bead99..cfd6337 100644 --- a/generic/tclParse.c +++ b/generic/tclParse.c @@ -176,19 +176,21 @@ static int ParseTokens(const char *src, int numBytes, int mask, int flags, Tcl_Parse *parsePtr); static int ParseWhiteSpace(const char *src, int numBytes, int *incompletePtr, char *typePtr); +static int ParseHex(const char *src, int numBytes, + int *resultPtr); /* *---------------------------------------------------------------------- * * TclParseInit -- * - * Initialize the fields of a Tcl_Parse struct. + * Initialize the fields of a Tcl_Parse struct. * * Results: - * None. + * None. * * Side effects: - * The Tcl_Parse struct pointed to by parsePtr gets initialized. + * The Tcl_Parse struct pointed to by parsePtr gets initialized. * *---------------------------------------------------------------------- */ @@ -251,7 +253,7 @@ Tcl_ParseCommand( * command terminator. If zero, then close * bracket has no special meaning. */ register Tcl_Parse *parsePtr) - /* Structure to fill in with information about + /* Structure to fill in with information about * the parsed command; any previous * information in the structure is ignored. */ { @@ -496,9 +498,10 @@ Tcl_ParseCommand( * tokens representing the expanded list. */ - CONST char *listStart; + const char *listStart; int growthNeeded = wordIndex + 2*elemCount - parsePtr->numTokens; + parsePtr->numWords += elemCount - 1; if (growthNeeded > 0) { TclGrowParseTokenArray(parsePtr, growthNeeded); @@ -713,7 +716,7 @@ ParseWhiteSpace( if (p[1] != '\n') { break; } - p+=2; + p += 2; if (--numBytes == 0) { *incompletePtr = 1; break; @@ -761,7 +764,7 @@ TclParseAllWhiteSpace( /* *---------------------------------------------------------------------- * - * TclParseHex -- + * ParseHex -- * * Scans a hexadecimal number as a Tcl_UniChar value (e.g., for parsing * \x and \u escape sequences). At most numBytes bytes are scanned. @@ -781,24 +784,24 @@ TclParseAllWhiteSpace( */ int -TclParseHex( +ParseHex( const char *src, /* First character to parse. */ int numBytes, /* Max number of byes to scan */ - Tcl_UniChar *resultPtr) /* Points to storage provided by caller where - * the Tcl_UniChar resulting from the + int *resultPtr) /* Points to storage provided by caller where + * the character resulting from the * conversion is to be written. */ { - Tcl_UniChar result = 0; + int result = 0; register const char *p = src; while (numBytes--) { unsigned char digit = UCHAR(*p); - if (!isxdigit(digit)) { + if (!isxdigit(digit) || (result > 0x10FFF)) { break; } - ++p; + p++; result <<= 4; if (digit >= 'a') { @@ -823,14 +826,14 @@ TclParseHex( * sequence as defined by Tcl's parsing rules. * * Results: - * Records at readPtr the number of bytes making up the backslash - * sequence. Records at dst the UTF-8 encoded equivalent of that - * backslash sequence. Returns the number of bytes written to dst, at - * most TCL_UTF_MAX. Either readPtr or dst may be NULL, if the results - * are not needed, but the return value is the same either way. + * Records at readPtr the number of bytes making up the backslash + * sequence. Records at dst the UTF-8 encoded equivalent of that + * backslash sequence. Returns the number of bytes written to dst, at + * most TCL_UTF_MAX. Either readPtr or dst may be NULL, if the results + * are not needed, but the return value is the same either way. * * Side effects: - * None. + * None. * *---------------------------------------------------------------------- */ @@ -848,7 +851,8 @@ TclParseBackslash( * written there. */ { register const char *p = src+1; - Tcl_UniChar result; + Tcl_UniChar unichar = 0; + int result; int count; char buf[TCL_UTF_MAX]; @@ -876,7 +880,7 @@ TclParseBackslash( count = 2; switch (*p) { /* - * Note: in the conversions below, use absolute values (e.g., 0xa) + * Note: in the conversions below, use absolute values (e.g., 0xA) * rather than symbolic values (e.g. \n) that get converted by the * compiler. It's possible that compilers on some platforms will do * the symbolic conversions differently, which could result in @@ -890,22 +894,22 @@ TclParseBackslash( result = 0x8; break; case 'f': - result = 0xc; + result = 0xC; break; case 'n': - result = 0xa; + result = 0xA; break; case 'r': - result = 0xd; + result = 0xD; break; case 't': result = 0x9; break; case 'v': - result = 0xb; + result = 0xB; break; case 'x': - count += TclParseHex(p+1, numBytes-2, &result); + count += ParseHex(p+1, numBytes-2, &result); if (count == 2) { /* * No hexadigits -> This is just "x". @@ -920,7 +924,7 @@ TclParseBackslash( } break; case 'u': - count += TclParseHex(p+1, (numBytes > 5) ? 4 : numBytes-2, &result); + count += ParseHex(p+1, (numBytes > 5) ? 4 : numBytes-2, &result); if (count == 2) { /* * No hexadigits -> This is just "u". @@ -928,6 +932,20 @@ TclParseBackslash( result = 'u'; } break; +#if TCL_UTF_MAX > 3 + case 'U': + count += ParseHex(p+1, (numBytes > 9) ? 8 : numBytes-2, &result); + if (count == 2) { + /* + * No hexadigits -> This is just "U". + */ + result = 'U'; + } else if ((result | 0x7FF) == 0xDFFF) { + /* Upper or lower surrogate, not allowed in this syntax. */ + result = 0xFFFD; + } + break; +#endif case '\n': count--; do { @@ -946,21 +964,21 @@ TclParseBackslash( */ if (isdigit(UCHAR(*p)) && (UCHAR(*p) < '8')) { /* INTL: digit */ - result = (unsigned char)(*p - '0'); + result = UCHAR(*p - '0'); p++; if ((numBytes == 2) || !isdigit(UCHAR(*p)) /* INTL: digit */ || (UCHAR(*p) >= '8')) { break; } count = 3; - result = (unsigned char)((result << 3) + (*p - '0')); + result = UCHAR((result << 3) + (*p - '0')); p++; if ((numBytes == 3) || !isdigit(UCHAR(*p)) /* INTL: digit */ || (UCHAR(*p) >= '8')) { break; } count = 4; - result = (unsigned char)((result << 3) + (*p - '0')); + result = UCHAR((result << 3) + (*p - '0')); break; } @@ -972,14 +990,15 @@ TclParseBackslash( */ if (Tcl_UtfCharComplete(p, numBytes - 1)) { - count = Tcl_UtfToUniChar(p, &result) + 1; /* +1 for '\' */ + count = Tcl_UtfToUniChar(p, &unichar) + 1; /* +1 for '\' */ } else { char utfBytes[TCL_UTF_MAX]; memcpy(utfBytes, p, (size_t) (numBytes - 1)); utfBytes[numBytes - 1] = '\0'; - count = Tcl_UtfToUniChar(utfBytes, &result) + 1; + count = Tcl_UtfToUniChar(utfBytes, &unichar) + 1; } + result = unichar; break; } @@ -987,7 +1006,7 @@ TclParseBackslash( if (readPtr != NULL) { *readPtr = count; } - return Tcl_UniCharToUtf((int) result, dst); + return Tcl_UniCharToUtf(result, dst); } /* @@ -999,11 +1018,11 @@ TclParseBackslash( * defined by Tcl's parsing rules. * * Results: - * Records in parsePtr information about the parse. Returns the number of - * bytes consumed. + * Records in parsePtr information about the parse. Returns the number of + * bytes consumed. * * Side effects: - * None. + * None. * *---------------------------------------------------------------------- */ @@ -1156,7 +1175,7 @@ ParseTokens( } /* - * This is a variable reference. Call Tcl_ParseVarName to do all + * This is a variable reference. Call Tcl_ParseVarName to do all * the dirty work of parsing the name. */ @@ -1180,7 +1199,7 @@ ParseTokens( } /* - * Command substitution. Call Tcl_ParseCommand recursively (and + * Command substitution. Call Tcl_ParseCommand recursively (and * repeatedly) to parse the nested command(s), then throw away the * parse information. */ @@ -1191,7 +1210,7 @@ ParseTokens( TclStackAlloc(parsePtr->interp, sizeof(Tcl_Parse)); while (1) { const char *curEnd; - + if (Tcl_ParseCommand(parsePtr->interp, src, numBytes, 1, nestedPtr) != TCL_OK) { parsePtr->errorType = nestedPtr->errorType; @@ -1337,7 +1356,7 @@ Tcl_FreeParse( * call to Tcl_ParseCommand. */ { if (parsePtr->tokenPtr != parsePtr->staticTokens) { - ckfree((char *) parsePtr->tokenPtr); + ckfree((char *)parsePtr->tokenPtr); parsePtr->tokenPtr = parsePtr->staticTokens; } } @@ -1661,7 +1680,7 @@ Tcl_ParseBraces( * the string consists of all bytes up to the * first null character. */ register Tcl_Parse *parsePtr, - /* Structure to fill in with information about + /* Structure to fill in with information about * the string. */ int append, /* Non-zero means append tokens to existing * information in parsePtr; zero means ignore @@ -1862,7 +1881,7 @@ Tcl_ParseQuotedString( * the string consists of all bytes up to the * first null character. */ register Tcl_Parse *parsePtr, - /* Structure to fill in with information about + /* Structure to fill in with information about * the string. */ int append, /* Non-zero means append tokens to existing * information in parsePtr; zero means ignore @@ -2167,13 +2186,13 @@ Tcl_SubstObj( * non-TCL_OK completion code arises. * * Results: - * The return value is a standard Tcl completion code. The result in - * interp is the substituted value, or an error message if TCL_ERROR is - * returned. If tokensLeftPtr is not NULL, then it points to an int where - * the number of tokens remaining to be processed is written. + * The return value is a standard Tcl completion code. The result in + * interp is the substituted value, or an error message if TCL_ERROR is + * returned. If tokensLeftPtr is not NULL, then it points to an int where + * the number of tokens remaining to be processed is written. * * Side effects: - * Can be anything, depending on the types of substitution done. + * Can be anything, depending on the types of substitution done. * *---------------------------------------------------------------------- */ @@ -2212,8 +2231,8 @@ TclSubstTokens( int code = TCL_OK; #define NUM_STATIC_POS 20 int isLiteral, maxNumCL, numCL, i, adjust; - int* clPosition = NULL; - Interp* iPtr = (Interp*) interp; + int *clPosition = NULL; + Interp *iPtr = (Interp *) interp; int inFile = iPtr->evalFlags & TCL_EVAL_FILE; /* @@ -2230,24 +2249,24 @@ TclSubstTokens( * For the handling of continuation lines in literals we first check if * this is actually a literal. For if not we can forego the additional * processing. Otherwise we pre-allocate a small table to store the - * locations of all continuation lines we find in this literal, if - * any. The table is extended if needed. + * locations of all continuation lines we find in this literal, if any. + * The table is extended if needed. */ - numCL = 0; - maxNumCL = 0; + numCL = 0; + maxNumCL = 0; isLiteral = 1; for (i=0 ; i < count; i++) { - if ((tokenPtr[i].type != TCL_TOKEN_TEXT) && - (tokenPtr[i].type != TCL_TOKEN_BS)) { + if ((tokenPtr[i].type != TCL_TOKEN_TEXT) + && (tokenPtr[i].type != TCL_TOKEN_BS)) { isLiteral = 0; break; } } if (isLiteral) { - maxNumCL = NUM_STATIC_POS; - clPosition = (int*) ckalloc (maxNumCL*sizeof(int)); + maxNumCL = NUM_STATIC_POS; + clPosition = (int *)ckalloc(maxNumCL * sizeof(int)); } adjust = 0; @@ -2268,6 +2287,7 @@ TclSubstTokens( appendByteLength = TclParseBackslash(tokenPtr->start, tokenPtr->size, NULL, utfCharBytes); append = utfCharBytes; + /* * If the backslash sequence we found is in a literal, and * represented a continuation line, we compute and store its @@ -2287,6 +2307,7 @@ TclSubstTokens( (tokenPtr->start[1] == '\n')) { if (isLiteral) { int clPos; + if (result == 0) { clPos = 0; } else { @@ -2295,13 +2316,13 @@ TclSubstTokens( if (numCL >= maxNumCL) { maxNumCL *= 2; - clPosition = (int*) ckrealloc ((char*)clPosition, + clPosition = (int *)ckrealloc ((char*)clPosition, maxNumCL*sizeof(int)); } clPosition[numCL] = clPos; - numCL ++; + numCL++; } - adjust ++; + adjust++; } break; @@ -2316,8 +2337,9 @@ TclSubstTokens( */ int theline; - TclAdvanceContinuations (&line, &clNextOuter, - tokenPtr->start - outerScript); + + TclAdvanceContinuations(&line, &clNextOuter, + tokenPtr->start - outerScript); theline = line + adjust; /* TIP #280: Transfer line information to nested command */ code = TclEvalEx(interp, tokenPtr->start+1, tokenPtr->size-2, @@ -2326,7 +2348,8 @@ TclSubstTokens( * Restore flag reset by nested eval for future bracketed * commands and their cmdframe setup */ - if (inFile) { + + if (inFile) { iPtr->evalFlags |= TCL_EVAL_FILE; } } @@ -2429,6 +2452,7 @@ TclSubstTokens( if (code != TCL_ERROR) { /* Keep error message in result! */ if (result != NULL) { Tcl_SetObjResult(interp, result); + /* * If the code found continuation lines (which implies that this * word is a literal), then we store the accumulated table of @@ -2447,7 +2471,7 @@ TclSubstTokens( */ if (maxNumCL) { - ckfree ((char*) clPosition); + ckfree((char*) clPosition); } } else { Tcl_ResetResult(interp); diff --git a/tests/encoding.test b/tests/encoding.test index 8722a93..c259327 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -31,6 +31,7 @@ proc runtests {} { # Some tests require the testencoding command testConstraint testencoding [llength [info commands testencoding]] testConstraint exec [llength [info commands exec]] +testConstraint fullutf [expr {[format %c 0x010000] ne "\uFFFD"}] # TclInitEncodingSubsystem is tested by the rest of this file # TclFinalizeEncodingSubsystem is not currently tested @@ -267,16 +268,16 @@ test encoding-11.6 {LoadEncodingFile: invalid file} {testencoding} { # OpenEncodingFile is fully tested by the rest of the tests in this file. test encoding-12.1 {LoadTableEncoding: normal encoding} { - set x [encoding convertto iso8859-3 \u120] - append x [encoding convertto iso8859-3 \ud5] - append x [encoding convertfrom iso8859-3 \xd5] + set x [encoding convertto iso8859-3 \u0120] + append x [encoding convertto iso8859-3 \xD5] + append x [encoding convertfrom iso8859-3 \xD5] } "\xd5?\u120" test encoding-12.2 {LoadTableEncoding: single-byte encoding} { set x [encoding convertto iso8859-3 ab\u0120g] - append x [encoding convertfrom iso8859-3 ab\xd5g] + append x [encoding convertfrom iso8859-3 ab\xD5g] } "ab\xd5gab\u120g" test encoding-12.3 {LoadTableEncoding: multi-byte encoding} { - set x [encoding convertto shiftjis ab\u4e4eg] + set x [encoding convertto shiftjis ab\u4E4Eg] append x [encoding convertfrom shiftjis ab\x8c\xc1g] } "ab\x8c\xc1gab\u4e4eg" test encoding-12.4 {LoadTableEncoding: double-byte encoding} { @@ -288,6 +289,9 @@ test encoding-12.5 {LoadTableEncoding: symbol encoding} { append x [encoding convertto symbol \u67] append x [encoding convertfrom symbol \x67] } "\x67\x67\u3b3" +test encoding-12.6 {LoadTableEncoding: overflow in char value} fullutf { + encoding convertto iso8859-3 \U01000 +} "?" test encoding-13.1 {LoadEscapeTable} { viewable [set x [encoding convertto iso2022 ab\u4e4e\u68d9g]] -- cgit v0.12 From 5db75c45c9ce6e1fa62becdb0da954f712c391b8 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 24 Apr 2020 15:17:57 +0000 Subject: Missing '0' --- tests/encoding.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/encoding.test b/tests/encoding.test index c259327..ec1d111 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -290,7 +290,7 @@ test encoding-12.5 {LoadTableEncoding: symbol encoding} { append x [encoding convertfrom symbol \x67] } "\x67\x67\u3b3" test encoding-12.6 {LoadTableEncoding: overflow in char value} fullutf { - encoding convertto iso8859-3 \U01000 + encoding convertto iso8859-3 \U010000 } "?" test encoding-13.1 {LoadEscapeTable} { -- cgit v0.12 From 252880a07caecf28ba061cfb72f90a9c4cd43ef6 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 24 Apr 2020 15:38:23 +0000 Subject: Make (unsupported) 'U' regex escaping work again. --- generic/regc_lex.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/regc_lex.c b/generic/regc_lex.c index f49c162..f8fa772 100644 --- a/generic/regc_lex.c +++ b/generic/regc_lex.c @@ -852,7 +852,7 @@ lexescape( i = 0xFFFD; } #endif - RETV(PLAIN, c); + RETV(PLAIN, (uchr)i); break; case CHR('v'): RETV(PLAIN, CHR('\v')); -- cgit v0.12 From cfdcea56833526b4caba822ff1ea8771dd2833b8 Mon Sep 17 00:00:00 2001 From: dgp Date: Fri, 24 Apr 2020 16:31:36 +0000 Subject: Revise tests to reflect fixed bug in RE parsing of \uHHHH escapes. --- tests/reg.test | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/reg.test b/tests/reg.test index 6cd2eb3..7e66f6d 100644 --- a/tests/reg.test +++ b/tests/reg.test @@ -622,7 +622,8 @@ expectMatch 13.13 P "a\\nb" "a\nb" "a\nb" expectMatch 13.14 P "a\\rb" "a\rb" "a\rb" expectMatch 13.15 P "a\\tb" "a\tb" "a\tb" expectMatch 13.16 P "a\\u0008x" "a\bx" "a\bx" -expectError 13.17 - {a\u008x} EESCAPE +expectMatch 13.17 P {a\u008x} "a\bx" "a\bx" +expectError 13.17.1 - {a\ux} EESCAPE expectMatch 13.18 P "a\\u00088x" "a\b8x" "a\b8x" expectMatch 13.19 P "a\\U00000008x" "a\bx" "a\bx" expectError 13.20 - {a\U0000008x} EESCAPE -- cgit v0.12 From 59343a872e8587e5da4eb52956d453af67535f5e Mon Sep 17 00:00:00 2001 From: dgp Date: Fri, 24 Apr 2020 17:18:23 +0000 Subject: Revert the parts of [76213b3f72] that converted callers of Tcl_UtfToUniChar into callers of Tcl_UtfNext. With this reversion, any future divergence between those two will not harm these callers. Retain the tests, and retain the new implementation of Tcl_UtfNext itself and its new macro form. --- generic/tclCompExpr.c | 5 +++-- generic/tclUtf.c | 11 +++++++---- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/generic/tclCompExpr.c b/generic/tclCompExpr.c index 42321af..27d7503 100644 --- a/generic/tclCompExpr.c +++ b/generic/tclCompExpr.c @@ -1801,6 +1801,7 @@ ParseLexeme( { const char *end; int scanned; + Tcl_UniChar ch; Tcl_Obj *literal = NULL; unsigned char byte; @@ -1978,12 +1979,12 @@ ParseLexeme( if (!TclIsBareword(*start) || *start == '_') { if (Tcl_UtfCharComplete(start, numBytes)) { - scanned = TclUtfNext(start) - start; + scanned = Tcl_UtfToUniChar(start, &ch); } else { char utfBytes[TCL_UTF_MAX]; memcpy(utfBytes, start, (size_t) numBytes); utfBytes[numBytes] = '\0'; - scanned = TclUtfNext(utfBytes) - utfBytes; + scanned = Tcl_UtfToUniChar(utfBytes, &ch); } *lexemePtr = INVALID; Tcl_DecrRefCount(literal); diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 550d528..9579eb3 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -507,6 +507,7 @@ Tcl_NumUtfChars( int length) /* The length of the string in bytes, or -1 * for strlen(string). */ { + Tcl_UniChar ch; register int i = 0; /* @@ -518,19 +519,19 @@ Tcl_NumUtfChars( if (length < 0) { while ((*src != '\0') && (i < INT_MAX)) { - src = TclUtfNext(src); + src += TclUtfToUniChar(src, &ch); i++; } } else { register const char *endPtr = src + length - TCL_UTF_MAX; while (src < endPtr) { - src = TclUtfNext(src); + src += TclUtfToUniChar(src, &ch); i++; } endPtr += TCL_UTF_MAX; while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) { - src = TclUtfNext(src); + src += TclUtfToUniChar(src, &ch); i++; } if (src < endPtr) { @@ -858,9 +859,11 @@ Tcl_UtfAtIndex( register CONST char *src, /* The UTF-8 string. */ register int index) /* The position of the desired character. */ { + Tcl_UniChar ch; + while (index > 0) { index--; - src = TclUtfNext(src); + src += TclUtfToUniChar(src, &ch); } return src; } -- cgit v0.12 From fbfa513c23b05ae5deeaa0ff81ce8045967890c0 Mon Sep 17 00:00:00 2001 From: dgp Date: Fri, 24 Apr 2020 17:37:36 +0000 Subject: Two more tests developed during work on [27944a3661]. --- tests/utf.test | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/utf.test b/tests/utf.test index d749552..57b8a80 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -565,6 +565,12 @@ test utf-6.123 {Tcl_UtfNext, read limits} testutfnext { test utf-6.124 {Tcl_UtfNext, read limits} testutfnext { testutfnext \xA0\xA0\xA0\xA0\xA0 4 } 1 +test utf-6.125 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} testutfnext { + testutfnext \xA0\xA0\xA0\xA0 +} 1 +test utf-6.126 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} testutfnext { + testutfnext \x80\x80\x80\x80 +} 1 test utf-7.1 {Tcl_UtfPrev} testutfprev { testutfprev {} -- cgit v0.12 From 7d2cbe2aef563bac2e041eca4e8918de2bb66ccf Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 24 Apr 2020 20:39:05 +0000 Subject: Fix [cc4d805771]: reg-13.17.error fails in 8.5 tip --- generic/regc_lex.c | 2 +- tests/reg.test | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/generic/regc_lex.c b/generic/regc_lex.c index f8fa772..039495a 100644 --- a/generic/regc_lex.c +++ b/generic/regc_lex.c @@ -832,7 +832,7 @@ lexescape( RETV(PLAIN, CHR('\t')); break; case CHR('u'): - c = (uchr) lexdigits(v, 16, 1, 4); + c = (uchr) lexdigits(v, 16, 4, 4); if (ISERR()) { FAILW(REG_EESCAPE); } diff --git a/tests/reg.test b/tests/reg.test index 7e66f6d..6cd2eb3 100644 --- a/tests/reg.test +++ b/tests/reg.test @@ -622,8 +622,7 @@ expectMatch 13.13 P "a\\nb" "a\nb" "a\nb" expectMatch 13.14 P "a\\rb" "a\rb" "a\rb" expectMatch 13.15 P "a\\tb" "a\tb" "a\tb" expectMatch 13.16 P "a\\u0008x" "a\bx" "a\bx" -expectMatch 13.17 P {a\u008x} "a\bx" "a\bx" -expectError 13.17.1 - {a\ux} EESCAPE +expectError 13.17 - {a\u008x} EESCAPE expectMatch 13.18 P "a\\u00088x" "a\b8x" "a\b8x" expectMatch 13.19 P "a\\U00000008x" "a\bx" "a\bx" expectError 13.20 - {a\U0000008x} EESCAPE -- cgit v0.12 From ba884cc1dd1d227fad00c29309a5add78a73b2ba Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 24 Apr 2020 21:03:47 +0000 Subject: Backout [ 649f53b9f4]: This was not meant for 8.6 --- generic/regc_lex.c | 2 +- tests/reg.test | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/generic/regc_lex.c b/generic/regc_lex.c index dc7a012..2208c0e 100644 --- a/generic/regc_lex.c +++ b/generic/regc_lex.c @@ -832,7 +832,7 @@ lexescape( RETV(PLAIN, CHR('\t')); break; case CHR('u'): - c = (uchr) lexdigits(v, 16, 4, 4); + c = (uchr) lexdigits(v, 16, 1, 4); if (ISERR()) { FAILW(REG_EESCAPE); } diff --git a/tests/reg.test b/tests/reg.test index 12cea7e..a95d1e2 100644 --- a/tests/reg.test +++ b/tests/reg.test @@ -625,7 +625,8 @@ expectMatch 13.13 P "a\\nb" "a\nb" "a\nb" expectMatch 13.14 P "a\\rb" "a\rb" "a\rb" expectMatch 13.15 P "a\\tb" "a\tb" "a\tb" expectMatch 13.16 P "a\\u0008x" "a\bx" "a\bx" -expectError 13.17 - {a\u008x} EESCAPE +expectMatch 13.17 P {a\u008x} "a\bx" "a\bx" +expectError 13.17.1 - {a\ux} EESCAPE expectMatch 13.18 P "a\\u00088x" "a\b8x" "a\b8x" expectMatch 13.19 P "a\\U00000008x" "a\bx" "a\bx" expectMatch 13.20 P {a\U0000008x} "a\bx" "a\bx" -- cgit v0.12 From 3ac32056bc3f77e2f2d2af1c7f32ccef2dbf2fda Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 24 Apr 2020 22:40:03 +0000 Subject: Quickfix to Tcl_NumUtfChars(). Barely used in Tcl core. Still needs a better look. Mark two new tests as knownBug. Needs a further look as well. --- generic/tclUtf.c | 16 ++++++++++++++-- tests/utf.test | 4 ++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 1f526f1..ef3fd9e 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -599,12 +599,24 @@ Tcl_NumUtfChars( register const char *endPtr = src + length - TCL_UTF_MAX; while (src < endPtr) { - src += TclUtfToUniChar(src, &ch); + if (((unsigned)(unsigned char)*src - 0xF0) < 5) { + /* treat F0 - F4 as single character */ + ch = 0; + src++; + } else { + src += TclUtfToUniChar(src, &ch); + } i++; } endPtr += TCL_UTF_MAX; while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) { - src += TclUtfToUniChar(src, &ch); + if (((unsigned)(unsigned char)*src - 0xF0) < 5) { + /* treat F0 - F4 as single character */ + ch = 0; + src++; + } else { + src += TclUtfToUniChar(src, &ch); + } i++; } if (src < endPtr) { diff --git a/tests/utf.test b/tests/utf.test index acdd50e..1180bf2 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -470,10 +470,10 @@ test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} testu test utf-6.93 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext ucs2} { testutfnext -bytestring \x80\x80\x80 } 1 -test utf-6.125 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} testutfnext { +test utf-6.125 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext knownBug} { testutfnext \xA0\xA0\xA0\xA0 } 1 -test utf-6.126 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} testutfnext { +test utf-6.126 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext knownBug} { testutfnext \x80\x80\x80\x80 } 1 -- cgit v0.12 From e598b0d32c0f6d2ba5356cbb58d8dcc56c1b2772 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 24 Apr 2020 22:42:30 +0000 Subject: Found problem with utf-6.125/6.126: argument -bytestring was missing in test --- tests/utf.test | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/utf.test b/tests/utf.test index 1180bf2..c2bc896 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -470,11 +470,11 @@ test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} testu test utf-6.93 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext ucs2} { testutfnext -bytestring \x80\x80\x80 } 1 -test utf-6.125 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext knownBug} { - testutfnext \xA0\xA0\xA0\xA0 +test utf-6.125 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} testutfnext { + testutfnext -bytestring \xA0\xA0\xA0\xA0 } 1 -test utf-6.126 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext knownBug} { - testutfnext \x80\x80\x80\x80 +test utf-6.126 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} testutfnext { + testutfnext -bytestring \x80\x80\x80\x80 } 1 test utf-7.1 {Tcl_UtfPrev} testutfprev { -- cgit v0.12 From 4804a3fdec7c1461645097c4aff7561ff9b2d210 Mon Sep 17 00:00:00 2001 From: dgp Date: Sat, 25 Apr 2020 16:26:12 +0000 Subject: Cherrypick [d2143c14c1]: Eliminate the -bytestring option of [testutfnext]. No caller needs anything else. --- generic/tclTest.c | 12 ++-- tests/utf.test | 210 +++++++++++++++++++++++++++--------------------------- 2 files changed, 109 insertions(+), 113 deletions(-) diff --git a/generic/tclTest.c b/generic/tclTest.c index 856e9ea..1676bae 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -6721,15 +6721,11 @@ TestUtfNextCmd( const char *p = tobetested; (void)dummy; - if (objc != 3 || strcmp(Tcl_GetString(objv[1]), "-bytestring")) { - if (objc != 2) { - Tcl_WrongNumArgs(interp, 1, objv, "?-bytestring? bytes"); - return TCL_ERROR; - } - bytes = Tcl_GetStringFromObj(objv[1], &numBytes); - } else { - bytes = (char *) Tcl_GetByteArrayFromObj(objv[2], &numBytes); + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, "bytes"); + return TCL_ERROR; } + bytes = (char *) Tcl_GetByteArrayFromObj(objv[1], &numBytes); if (numBytes > (int)sizeof(buffer)-2) { Tcl_AppendResult(interp, "\"testutfnext\" can only handle 30 bytes", NULL); diff --git a/tests/utf.test b/tests/utf.test index c374209..63ae9ee 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -162,319 +162,319 @@ test utf-5.2 {Tcl_UtfFindLast} {testfindlast testbytestring} { test utf-6.1 {Tcl_UtfNext} testutfnext { # This takes the pointer one past the terminating NUL. # This is really an invalid call. - testutfnext -bytestring {} + testutfnext {} } 1 test utf-6.2 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring A + testutfnext A } 1 test utf-6.3 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring AA + testutfnext AA } 1 test utf-6.4 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring A\xA0 + testutfnext A\xA0 } 1 test utf-6.5 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring A\xD0 + testutfnext A\xD0 } 1 test utf-6.6 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring A\xE8 + testutfnext A\xE8 } 1 test utf-6.7 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring A\xF2 + testutfnext A\xF2 } 1 test utf-6.8 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring A\xF8 + testutfnext A\xF8 } 1 test utf-6.9 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xA0 + testutfnext \xA0 } 1 test utf-6.10 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xA0G + testutfnext \xA0G } 1 test utf-6.11 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xA0\xA0 + testutfnext \xA0\xA0 } 1 test utf-6.12 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xA0\xD0 + testutfnext \xA0\xD0 } 1 test utf-6.13 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xA0\xE8 + testutfnext \xA0\xE8 } 1 test utf-6.14 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xA0\xF2 + testutfnext \xA0\xF2 } 1 test utf-6.15 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xA0\xF8 + testutfnext \xA0\xF8 } 1 test utf-6.16 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0 + testutfnext \xD0 } 1 test utf-6.17 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0G + testutfnext \xD0G } 1 test utf-6.18 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0\xA0 + testutfnext \xD0\xA0 } 2 test utf-6.19 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0\xD0 + testutfnext \xD0\xD0 } 1 test utf-6.20 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0\xE8 + testutfnext \xD0\xE8 } 1 test utf-6.21 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0\xF2 + testutfnext \xD0\xF2 } 1 test utf-6.22 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0\xF8 + testutfnext \xD0\xF8 } 1 test utf-6.23 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8 + testutfnext \xE8 } 1 test utf-6.24 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8G + testutfnext \xE8G } 1 test utf-6.25 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0 + testutfnext \xE8\xA0 } 1 test utf-6.26 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xD0 + testutfnext \xE8\xD0 } 1 test utf-6.27 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xE8 + testutfnext \xE8\xE8 } 1 test utf-6.28 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xF2 + testutfnext \xE8\xF2 } 1 test utf-6.29 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xF8 + testutfnext \xE8\xF8 } 1 test utf-6.30 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2 + testutfnext \xF2 } 1 test utf-6.31 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2G + testutfnext \xF2G } 1 test utf-6.32 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0 + testutfnext \xF2\xA0 } 1 test utf-6.33 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xD0 + testutfnext \xF2\xD0 } 1 test utf-6.34 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xE8 + testutfnext \xF2\xE8 } 1 test utf-6.35 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xF2 + testutfnext \xF2\xF2 } 1 test utf-6.36 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xF8 + testutfnext \xF2\xF8 } 1 test utf-6.37 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF8 + testutfnext \xF8 } 1 test utf-6.38 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF8G + testutfnext \xF8G } 1 test utf-6.39 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF8\xA0 + testutfnext \xF8\xA0 } 1 test utf-6.40 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF8\xD0 + testutfnext \xF8\xD0 } 1 test utf-6.41 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF8\xE8 + testutfnext \xF8\xE8 } 1 test utf-6.42 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF8\xF2 + testutfnext \xF8\xF2 } 1 test utf-6.43 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF8\xF8 + testutfnext \xF8\xF8 } 1 test utf-6.44 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0\xA0G + testutfnext \xD0\xA0G } 2 test utf-6.45 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0\xA0\xA0 + testutfnext \xD0\xA0\xA0 } 2 test utf-6.46 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0\xA0\xD0 + testutfnext \xD0\xA0\xD0 } 2 test utf-6.47 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0\xA0\xE8 + testutfnext \xD0\xA0\xE8 } 2 test utf-6.48 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0\xA0\xF2 + testutfnext \xD0\xA0\xF2 } 2 test utf-6.49 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0\xA0\xF8 + testutfnext \xD0\xA0\xF8 } 2 test utf-6.50 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0G + testutfnext \xE8\xA0G } 1 test utf-6.51 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0\xA0 + testutfnext \xE8\xA0\xA0 } 3 test utf-6.52 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0\xD0 + testutfnext \xE8\xA0\xD0 } 1 test utf-6.53 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0\xE8 + testutfnext \xE8\xA0\xE8 } 1 test utf-6.54 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0\xF2 + testutfnext \xE8\xA0\xF2 } 1 test utf-6.55 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0\xF8 + testutfnext \xE8\xA0\xF8 } 1 test utf-6.56 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0G + testutfnext \xF2\xA0G } 1 test utf-6.57 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0\xA0 + testutfnext \xF2\xA0\xA0 } 1 test utf-6.58 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0\xD0 + testutfnext \xF2\xA0\xD0 } 1 test utf-6.59 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0\xE8 + testutfnext \xF2\xA0\xE8 } 1 test utf-6.60 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0\xF2 + testutfnext \xF2\xA0\xF2 } 1 test utf-6.61 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0\xF8 + testutfnext \xF2\xA0\xF8 } 1 test utf-6.62 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0\xA0G + testutfnext \xE8\xA0\xA0G } 3 test utf-6.63 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0\xA0\xA0 + testutfnext \xE8\xA0\xA0\xA0 } 3 test utf-6.64 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0\xA0\xD0 + testutfnext \xE8\xA0\xA0\xD0 } 3 test utf-6.65 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0\xA0\xE8 + testutfnext \xE8\xA0\xA0\xE8 } 3 test utf-6.66 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0\xA0\xF2 + testutfnext \xE8\xA0\xA0\xF2 } 3 test utf-6.67 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0\xA0\xF8 + testutfnext \xE8\xA0\xA0\xF8 } 3 test utf-6.68 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0\xA0G + testutfnext \xF2\xA0\xA0G } 1 test utf-6.69.0 {Tcl_UtfNext} {testutfnext ucs2} { - testutfnext -bytestring \xF2\xA0\xA0\xA0 + testutfnext \xF2\xA0\xA0\xA0 } 1 test utf-6.69.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext -bytestring \xF2\xA0\xA0\xA0 + testutfnext \xF2\xA0\xA0\xA0 } 4 test utf-6.70 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0\xA0\xD0 + testutfnext \xF2\xA0\xA0\xD0 } 1 test utf-6.71 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0\xA0\xE8 + testutfnext \xF2\xA0\xA0\xE8 } 1 test utf-6.72 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0\xA0\xF2 + testutfnext \xF2\xA0\xA0\xF2 } 1 test utf-6.73 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0\xA0\xF8 + testutfnext \xF2\xA0\xA0\xF8 } 1 test utf-6.74.0 {Tcl_UtfNext} {testutfnext ucs2} { - testutfnext -bytestring \xF2\xA0\xA0\xA0G + testutfnext \xF2\xA0\xA0\xA0G } 1 test utf-6.74.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext -bytestring \xF2\xA0\xA0\xA0G + testutfnext \xF2\xA0\xA0\xA0G } 4 test utf-6.75.0 {Tcl_UtfNext} {testutfnext ucs2} { - testutfnext -bytestring \xF2\xA0\xA0\xA0\xA0 + testutfnext \xF2\xA0\xA0\xA0\xA0 } 1 test utf-6.75.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext -bytestring \xF2\xA0\xA0\xA0\xA0 + testutfnext \xF2\xA0\xA0\xA0\xA0 } 4 test utf-6.76.0 {Tcl_UtfNext} {testutfnext ucs2} { - testutfnext -bytestring \xF2\xA0\xA0\xA0\xD0 + testutfnext \xF2\xA0\xA0\xA0\xD0 } 1 test utf-6.76.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext -bytestring \xF2\xA0\xA0\xA0\xD0 + testutfnext \xF2\xA0\xA0\xA0\xD0 } 4 test utf-6.77.0 {Tcl_UtfNext} {testutfnext ucs2} { - testutfnext -bytestring \xF2\xA0\xA0\xA0\xE8 + testutfnext \xF2\xA0\xA0\xA0\xE8 } 1 test utf-6.77.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext -bytestring \xF2\xA0\xA0\xA0\xE8 + testutfnext \xF2\xA0\xA0\xA0\xE8 } 4 test utf-6.78.0 {Tcl_UtfNext} {testutfnext ucs2} { - testutfnext -bytestring \xF2\xA0\xA0\xA0\xF2 + testutfnext \xF2\xA0\xA0\xA0\xF2 } 1 test utf-6.78.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext -bytestring \xF2\xA0\xA0\xA0\xF2 + testutfnext \xF2\xA0\xA0\xA0\xF2 } 4 test utf-6.79.0 {Tcl_UtfNext} {testutfnext ucs2} { - testutfnext -bytestring \xF2\xA0\xA0\xA0G\xF8 + testutfnext \xF2\xA0\xA0\xA0G\xF8 } 1 test utf-6.79.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext -bytestring \xF2\xA0\xA0\xA0G\xF8 + testutfnext \xF2\xA0\xA0\xA0G\xF8 } 4 test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext -bytestring \xC0\x80 + testutfnext \xC0\x80 } 2 test utf-6.81 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext -bytestring \xC0\x81 + testutfnext \xC0\x81 } 1 test utf-6.82 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext -bytestring \xC1\x80 + testutfnext \xC1\x80 } 1 test utf-6.83 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext -bytestring \xC2\x80 + testutfnext \xC2\x80 } 2 test utf-6.84 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext -bytestring \xE0\x80\x80 + testutfnext \xE0\x80\x80 } 1 test utf-6.85 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext -bytestring \xE0\xA0\x80 + testutfnext \xE0\xA0\x80 } 3 test utf-6.86 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext -bytestring \xF0\x80\x80\x80 + testutfnext \xF0\x80\x80\x80 } 1 test utf-6.87.0 {Tcl_UtfNext - overlong sequences} {testutfnext ucs2} { - testutfnext -bytestring \xF0\x90\x80\x80 + testutfnext \xF0\x90\x80\x80 } 1 test utf-6.87.1 {Tcl_UtfNext - overlong sequences} {testutfnext fullutf} { - testutfnext -bytestring \xF0\x90\x80\x80 + testutfnext \xF0\x90\x80\x80 } 4 test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} testutfnext { - testutfnext -bytestring \xA0\xA0 + testutfnext \xA0\xA0 } 1 test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} testutfnext { - testutfnext -bytestring \x80\x80 + testutfnext \x80\x80 } 1 test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext ucs2} { - testutfnext -bytestring \xF4\x8F\xBF\xBF + testutfnext \xF4\x8F\xBF\xBF } 1 test utf-6.90.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext fullutf} { - testutfnext -bytestring \xF4\x8F\xBF\xBF + testutfnext \xF4\x8F\xBF\xBF } 4 test utf-6.91.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext ucs2} { - testutfnext -bytestring \xF4\x90\x80\x80 + testutfnext \xF4\x90\x80\x80 } 1 test utf-6.91.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext fullutf} { - testutfnext -bytestring \xF4\x90\x80\x80 + testutfnext \xF4\x90\x80\x80 } 1 test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} testutfnext { - testutfnext -bytestring \xA0\xA0\xA0 + testutfnext \xA0\xA0\xA0 } 1 test utf-6.93 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} testutfnext { - testutfnext -bytestring \x80\x80\x80 + testutfnext \x80\x80\x80 } 1 test utf-6.125 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} testutfnext { - testutfnext -bytestring \xA0\xA0\xA0\xA0 + testutfnext \xA0\xA0\xA0\xA0 } 1 test utf-6.126 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} testutfnext { - testutfnext -bytestring \x80\x80\x80\x80 + testutfnext \x80\x80\x80\x80 } 1 test utf-7.1 {Tcl_UtfPrev} testutfprev { -- cgit v0.12 From 58876a37b8505fb2dd9d6f39acab6aa4bf54fb05 Mon Sep 17 00:00:00 2001 From: dgp Date: Sat, 25 Apr 2020 17:03:18 +0000 Subject: dup test name --- tests/utf.test | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/utf.test b/tests/utf.test index c739bb4..8814801 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -882,10 +882,10 @@ test utf-7.44 {Tcl_UtfPrev -- no lead byte at start} testutfprev { test utf-7.45 {Tcl_UtfPrev -- no lead byte at start} testutfprev { testutfprev \xA0\xA0\xA0 } 2 -test utf-7.46 {Tcl_UtfPrev -- no lead byte at start} {testutfprev ucs2} { +test utf-7.46.0 {Tcl_UtfPrev -- no lead byte at start} {testutfprev ucs2} { testutfprev \xA0\xA0\xA0\xA0 } 1 -test utf-7.46 {Tcl_UtfPrev -- no lead byte at start} {testutfprev fullutf} { +test utf-7.46.1 {Tcl_UtfPrev -- no lead byte at start} {testutfprev fullutf} { testutfprev \xA0\xA0\xA0\xA0 } 3 test utf-7.47 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} testutfprev { -- cgit v0.12 From 4d159d9803745ea37abc4e06085682b1870a8fea Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sat, 25 Apr 2020 22:16:37 +0000 Subject: encoding-12.6 only works for "ucs2" for now. Don't use (deprecated) INLINE and CONST --- generic/tclUtf.c | 8 ++++---- tests/encoding.test | 5 +++-- win/tclWinTime.c | 4 ++-- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 91a4b89..665607f 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -157,7 +157,7 @@ static const unsigned char bounds[28] = { #endif }; -INLINE static int +static int Invalid( unsigned char *src) /* Points to lead byte of a UTF-8 byte sequence */ { @@ -775,7 +775,7 @@ Tcl_UtfPrev( const char *start) /* Pointer to the beginning of the string */ { int trailBytesSeen = 0; /* How many trail bytes have been verified? */ - CONST char *fallback = src - 1; + const char *fallback = src - 1; /* If we cannot find a lead byte that might * start a prefix of a valid UTF byte sequence, * we will fallback to a one-byte back step */ @@ -831,13 +831,13 @@ Tcl_UtfPrev( /* Reject */ return fallback; } - return (CONST char *)look; + return (const char *)look; } /* We saw a trail byte. */ trailBytesSeen++; - if ((CONST char *)look == start) { + if ((const char *)look == start) { /* * Do not read before the start of the string * diff --git a/tests/encoding.test b/tests/encoding.test index a8ce162..a969efc 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -36,7 +36,8 @@ proc runtests {} { testConstraint testencoding [llength [info commands testencoding]] testConstraint testbytestring [llength [info commands testbytestring]] testConstraint teststringbytes [llength [info commands teststringbytes]] -testConstraint fullutf [expr {[format %c 0x010000] ne "\ufffd"}] +testConstraint ucs2 [expr {[format %c 0x010000] eq "\uFFFD"}] +testConstraint fullutf [expr {[format %c 0x010000] ne "\uFFFD"}] testConstraint exec [llength [info commands exec]] testConstraint testgetdefenc [llength [info commands testgetdefenc]] @@ -305,7 +306,7 @@ test encoding-12.5 {LoadTableEncoding: symbol encoding} { append x [encoding convertto symbol \u67] append x [encoding convertfrom symbol \x67] } "\x67\x67\u3b3" -test encoding-12.6 {LoadTableEncoding: overflow in char value} fullutf { +test encoding-12.6 {LoadTableEncoding: overflow in char value} ucs2 { encoding convertto iso8859-3 \U010000 } "?" diff --git a/win/tclWinTime.c b/win/tclWinTime.c index a434d86..976dd61 100644 --- a/win/tclWinTime.c +++ b/win/tclWinTime.c @@ -1358,7 +1358,7 @@ TclpGmtime( #if defined(_WIN64) || defined(_USE_64BIT_TIME_T) || (defined(_MSC_VER) && _MSC_VER < 1400) return gmtime(timePtr); #else - return _gmtime32((CONST __time32_t *)timePtr); + return _gmtime32((const __time32_t *)timePtr); #endif } @@ -1393,7 +1393,7 @@ TclpLocaltime( #if defined(_WIN64) || defined(_USE_64BIT_TIME_T) || (defined(_MSC_VER) && _MSC_VER < 1400) return localtime(timePtr); #else - return _localtime32((CONST __time32_t *)timePtr); + return _localtime32((const __time32_t *)timePtr); #endif } -- cgit v0.12 From ff3cf2ae397658af620c85dafae887a07774fab7 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sat, 25 Apr 2020 22:49:16 +0000 Subject: Make reg-13.33/reg-13.34 work again. Still have to investigate what's the problem. --- generic/regc_lex.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/generic/regc_lex.c b/generic/regc_lex.c index 6ef0a83..a303ec6 100644 --- a/generic/regc_lex.c +++ b/generic/regc_lex.c @@ -843,18 +843,12 @@ lexescape( if (ISERR()) { FAILW(REG_EESCAPE); } -#if CHRBITS > 16 - if ((unsigned)i > 0x10FFFF) { - i = 0xFFFD; - } -#else - if ((unsigned)i & ~0xFFFF) { + if (i > 0xFFFF) { /* TODO: output a Surrogate pair */ i = 0xFFFD; } -#endif - RETV(PLAIN, (uchr)i); + RETV(PLAIN, (uchr) i); break; case CHR('v'): RETV(PLAIN, CHR('\v')); -- cgit v0.12 From 70ca5816c6db48d4d21cb0ff980196e317f46290 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sat, 25 Apr 2020 23:19:14 +0000 Subject: Undo last change in regc_lex.c: It doesn't do the expected thing when TCL_UTF_MAX>3. More work needed --- generic/regc_lex.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/generic/regc_lex.c b/generic/regc_lex.c index 2208c0e..1b00b71 100644 --- a/generic/regc_lex.c +++ b/generic/regc_lex.c @@ -843,18 +843,12 @@ lexescape( if (ISERR()) { FAILW(REG_EESCAPE); } -#if CHRBITS > 16 - if ((unsigned)i > 0x10FFFF) { - i = 0xFFFD; - } -#else - if ((unsigned)i & ~0xFFFF) { + if (i > 0xFFFF) { /* TODO: output a Surrogate pair */ i = 0xFFFD; } -#endif - RETV(PLAIN, (uchr)i); + RETV(PLAIN, (uchr) i); break; case CHR('v'): RETV(PLAIN, CHR('\v')); -- cgit v0.12 From 3579ec86f682dba9ad4ddea1f75fda6068425852 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 26 Apr 2020 12:56:47 +0000 Subject: Add 6 "ucs2" markers. The first 4 of them are not bugs: They show that Tcl_UtfComplete() cannot be used to protect Tcl_UtfNext() calls. So, those test-cases, even though they work with ucs2, are simply wrong. The other 2 (utf-6.93 and utf-6.126) look like a bug to me. Not high prio (since TCL_UTF_MAX=4 builds are unsupported), but worth a ticket. --- tests/utf.test | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/utf.test b/tests/utf.test index 8814801..547d573 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -560,19 +560,19 @@ test utf-6.115.0 {Tcl_UtfNext, read limits} {testutfnext ucs2} { test utf-6.115.1 {Tcl_UtfNext, read limits} {testutfnext fullutf} { testutfnext \xF2\xA0\xA0\xA0\xA0 4 } 4 -test utf-6.116 {Tcl_UtfNext, read limits} testutfnext { +test utf-6.116 {Tcl_UtfNext, read limits} {testutfnext ucs2} { testutfnext \xA0G 0 } 0 -test utf-6.117 {Tcl_UtfNext, read limits} testutfnext { +test utf-6.117 {Tcl_UtfNext, read limits} {testutfnext ucs2} { testutfnext \xA0G 1 } 1 -test utf-6.118 {Tcl_UtfNext, read limits} testutfnext { +test utf-6.118 {Tcl_UtfNext, read limits} {testutfnext ucs2} { testutfnext \xA0\xA0 1 } 1 -test utf-6.119 {Tcl_UtfNext, read limits} testutfnext { +test utf-6.119 {Tcl_UtfNext, read limits} {testutfnext ucs2} { testutfnext \xA0\xA0G 2 } 1 -test utf-6.120 {Tcl_UtfNext, read limits} testutfnext { +test utf-6.120 {Tcl_UtfNext, read limits} {testutfnext ucs2} { testutfnext \xA0\xA0\xA0 2 } 1 test utf-6.121 {Tcl_UtfNext, read limits} testutfnext { @@ -590,7 +590,7 @@ test utf-6.124 {Tcl_UtfNext, read limits} testutfnext { test utf-6.125 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} testutfnext { testutfnext \xA0\xA0\xA0\xA0 } 1 -test utf-6.126 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} testutfnext { +test utf-6.126 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext ucs2} { testutfnext \x80\x80\x80\x80 } 1 -- cgit v0.12 From d28ffce7cd1cfb32c243f8384664d3a332ecbcb4 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 26 Apr 2020 15:11:41 +0000 Subject: Cherry-pick Tcl_UniCharAtIndex() implementation from [6596c4af31], but adapted to the needs of TIPs 389/542. --- doc/Utf.3 | 2 +- generic/tclUtf.c | 26 ++++---------------------- 2 files changed, 5 insertions(+), 23 deletions(-) diff --git a/doc/Utf.3 b/doc/Utf.3 index ce8ad74..c8c6132 100644 --- a/doc/Utf.3 +++ b/doc/Utf.3 @@ -282,7 +282,7 @@ byte \fIsrc[0]\fR nor the byte \fIstart[-1]\fR nor the byte \fIsrc[-\fBTCL_UTF_MAX\fI-1]\fR. .PP \fBTcl_UniCharAtIndex\fR corresponds to a C string array dereference or the -Pascal Ord() function. It returns the Tcl_UniChar represented at the +Pascal Ord() function. It returns the Unicode character represented at the specified character (not byte) \fIindex\fR in the UTF-8 string \fIsrc\fR. The source string must contain at least \fIindex\fR characters. Behavior is undefined if a negative \fIindex\fR is given. diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 45a7f1e..1138372 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -1081,7 +1081,7 @@ Tcl_UtfPrev( * * Tcl_UniCharAtIndex -- * - * Returns the Tcl_UniChar represented at the specified character + * Returns the Unicode character represented at the specified character * (not byte) position in the UTF-8 string. * * Results: @@ -1098,28 +1098,10 @@ Tcl_UniCharAtIndex( const char *src, /* The UTF-8 string to dereference. */ int index) /* The position of the desired character. */ { - Tcl_UniChar ch = 0; - int fullchar = 0; -#if TCL_UTF_MAX <= 3 - int len = 0; -#endif + int ch = 0; - while (index-- >= 0) { -#if TCL_UTF_MAX <= 3 - src += (len = TclUtfToUniChar(src, &ch)); -#else - src += TclUtfToUniChar(src, &ch); -#endif - } - fullchar = ch; -#if TCL_UTF_MAX <= 3 - if ((ch >= 0xD800) && (len < 3)) { - /* If last Tcl_UniChar was a high surrogate, combine with low surrogate */ - (void)TclUtfToUniChar(src, &ch); - fullchar = (((fullchar & 0x3FF) << 10) | (ch & 0x3FF)) + 0x10000; - } -#endif - return fullchar; + TclUtfToUCS4(Tcl_UtfAtIndex(src, index), &ch); + return ch; } /* -- cgit v0.12 From f6fd0c69f15d9f33576bc3a40fbfaf053de46d45 Mon Sep 17 00:00:00 2001 From: dgp Date: Sun, 26 Apr 2020 20:18:20 +0000 Subject: Bring back a set of tests from 8.6. Invented new constraints to constrain to the right conditions, not implied ones. --- tests/utf.test | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tests/utf.test b/tests/utf.test index 57b8a80..0d93a12 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -13,9 +13,15 @@ if {[lsearch [namespace children] ::tcltest] == -1} { namespace import -force ::tcltest::* } +namespace path ::tcl::mathop + testConstraint ucs2 [expr {[format %c 0x010000] eq "\uFFFD"}] testConstraint fullutf [expr {[format %c 0x010000] ne "\uFFFD"}] +testConstraint Uesc [eq \U0041 A] +testConstraint pairsTo4bytes [expr {[llength [info commands teststringbytes]] + && [string length [teststringbytes \uD83D\uDCA9]] == 4}] + testConstraint testbytestring [llength [info commands testbytestring]] testConstraint testfindfirst [llength [info commands testfindfirst]] testConstraint testfindlast [llength [info commands testfindlast]] @@ -44,6 +50,30 @@ test utf-1.5 {Tcl_UniCharToUtf: overflowed Tcl_UniChar} testbytestring { test utf-1.6 {Tcl_UniCharToUtf: negative Tcl_UniChar} testbytestring { expr {[format %c -1] eq [testbytestring "\xEF\xBF\xBD"]} } 1 +test utf-1.7.0 {Tcl_UniCharToUtf: 4 byte sequences} {fullutf Uesc testbytestring} { + expr {"\U014E4E" eq [testbytestring "\xF0\x94\xB9\x8E"]} +} 1 +test utf-1.7.1 {Tcl_UniCharToUtf: 4 byte sequences} {ucs2 Uesc testbytestring} { + expr {"\U014E4E" eq [testbytestring "\xF0\x94\xB9\x8E"]} +} 0 +test utf-1.8 {Tcl_UniCharToUtf: 3 byte sequence, high surrogate} testbytestring { + expr {"\uD842" eq [testbytestring "\xED\xA1\x82"]} +} 1 +test utf-1.9 {Tcl_UniCharToUtf: 3 byte sequence, low surrogate} testbytestring { + expr {"\uDC42" eq [testbytestring "\xED\xB1\x82"]} +} 1 +test utf-1.10 {Tcl_UniCharToUtf: 3 byte sequence, high surrogate} testbytestring { + expr {[format %c 0xD842] eq [testbytestring "\xED\xA1\x82"]} +} 1 +test utf-1.11 {Tcl_UniCharToUtf: 3 byte sequence, low surrogate} testbytestring { + expr {[format %c 0xDC42] eq [testbytestring "\xED\xB1\x82"]} +} 1 +test utf-1.12 {Tcl_UniCharToUtf: 4 byte sequence, high/low surrogate} {pairsTo4bytes testbytestring} { + expr {"\uD842\uDC42" eq [testbytestring "\xF0\xA0\xA1\x82"]} +} 1 +test utf-1.13 {Tcl_UniCharToUtf: Invalid surrogate} {Uesc testbytestring} { + expr {"\UD842" eq [testbytestring "\xEF\xBF\xBD"]} +} 1 test utf-2.1 {Tcl_UtfToUniChar: low ascii} { string length "abc" -- cgit v0.12 From 371c07da346f6b67f915100aa1bb558cf02fe41d Mon Sep 17 00:00:00 2001 From: dgp Date: Sun, 26 Apr 2020 20:58:28 +0000 Subject: Continuing test reconciliation. --- tests/utf.test | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/utf.test b/tests/utf.test index 0d93a12..f5e5bcc 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -17,6 +17,7 @@ namespace path ::tcl::mathop testConstraint ucs2 [expr {[format %c 0x010000] eq "\uFFFD"}] testConstraint fullutf [expr {[format %c 0x010000] ne "\uFFFD"}] +testConstraint tip389 [expr {[string length [format %c 0x10000]] eq 2}] testConstraint Uesc [eq \U0041 A] testConstraint pairsTo4bytes [expr {[llength [info commands teststringbytes]] @@ -102,12 +103,18 @@ test utf-2.8.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytest test utf-2.8.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring fullutf} { string length [testbytestring "\xF0\x90\x80\x80"] } 1 +test utf-2.8.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring tip389} { + string length [testbytestring "\xF0\x90\x80\x80"] +} 2 test utf-2.9.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring ucs2} { string length [testbytestring "\xF4\x8F\xBF\xBF"] } 4 test utf-2.9.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring fullutf} { string length [testbytestring "\xF4\x8F\xBF\xBF"] } 1 +test utf-2.9.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring tip389} { + string length [testbytestring "\xF4\x8F\xBF\xBF"] +} 2 test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} testbytestring { string length [testbytestring "\xF0\x8F\xBF\xBF"] } 4 -- cgit v0.12 From e6de958cf55d8685930192267e22e8a842c7575b Mon Sep 17 00:00:00 2001 From: dgp Date: Sun, 26 Apr 2020 21:11:40 +0000 Subject: Refine the constraint. The fact that Tcl stores extended characters internally does not imply that [string length] counts UCS4 characters instead of UTF-16 code units. --- tests/utf.test | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/utf.test b/tests/utf.test index f5e5bcc..a9e5353 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -18,6 +18,8 @@ namespace path ::tcl::mathop testConstraint ucs2 [expr {[format %c 0x010000] eq "\uFFFD"}] testConstraint fullutf [expr {[format %c 0x010000] ne "\uFFFD"}] testConstraint tip389 [expr {[string length [format %c 0x10000]] eq 2}] +testConstraint ucs4 [expr {[testConstraint fullutf] + && [string length [format %c 0x10000]] == 1}] testConstraint Uesc [eq \U0041 A] testConstraint pairsTo4bytes [expr {[llength [info commands teststringbytes]] @@ -100,7 +102,7 @@ test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} testbytestrin test utf-2.8.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring ucs2} { string length [testbytestring "\xF0\x90\x80\x80"] } 4 -test utf-2.8.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring fullutf} { +test utf-2.8.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring ucs4} { string length [testbytestring "\xF0\x90\x80\x80"] } 1 test utf-2.8.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring tip389} { @@ -109,7 +111,7 @@ test utf-2.8.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytest test utf-2.9.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring ucs2} { string length [testbytestring "\xF4\x8F\xBF\xBF"] } 4 -test utf-2.9.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring fullutf} { +test utf-2.9.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring ucs4} { string length [testbytestring "\xF4\x8F\xBF\xBF"] } 1 test utf-2.9.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring tip389} { -- cgit v0.12 From d9333f5aab27d44e0bb0a9038fb07a7fbf7a953d Mon Sep 17 00:00:00 2001 From: dgp Date: Sun, 26 Apr 2020 21:28:10 +0000 Subject: test reconciliation --- tests/utf.test | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/utf.test b/tests/utf.test index a9e5353..9f3234b 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -168,7 +168,10 @@ test utf-4.11 {Tcl_NumUtfChars: 3 bytes of 4-byte UTF-8 characater} {testnumutfc test utf-4.12.0 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring ucs2} { testnumutfchars [testbytestring \xF0\x9F\x92\xA9] 4 } 4 -test utf-4.12.1 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring fullutf} { +test utf-4.12.1 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring ucs4} { + testnumutfchars [testbytestring \xF0\x9F\x92\xA9] 4 +} 1 +test utf-4.12.2 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring tip389} { testnumutfchars [testbytestring \xF0\x9F\x92\xA9] 4 } 1 -- cgit v0.12 From d92241af58e44e02c9dab704c9b1e3925a6f75ab Mon Sep 17 00:00:00 2001 From: dgp Date: Sun, 26 Apr 2020 22:11:07 +0000 Subject: More test reconciliation. --- tests/utf.test | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tests/utf.test b/tests/utf.test index 9f3234b..b74d8f5 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -33,6 +33,8 @@ testConstraint teststringobj [llength [info commands teststringobj]] testConstraint testutfnext [llength [info commands testutfnext]] testConstraint testutfprev [llength [info commands testutfprev]] +testConstraint tip413 [eq {} [string trim \x00]] + catch {unset x} test utf-1.1 {Tcl_UniCharToUtf: 1 byte sequences} testbytestring { @@ -173,7 +175,7 @@ test utf-4.12.1 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars test } 1 test utf-4.12.2 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring tip389} { testnumutfchars [testbytestring \xF0\x9F\x92\xA9] 4 -} 1 +} 2 test utf-5.1 {Tcl_UtfFindFirst} {testfindfirst testbytestring} { testfindfirst [testbytestring "abcbc"] 98 @@ -1221,6 +1223,14 @@ test utf-24.4 {unicode space char in regc_locale.c} { # this returns 1 with Unicode 7 compliance list [regexp {^[[:space:]]+$} \u1680\u180E\u202F] [regexp {^\s+$} \u1680\u180E\u202F] } {1 1} +test utf-24.5 {TclUniCharIsSpace} tip413 { + # this returns 1 with Unicode 7/TIP 413 compliance + string is space \x85\u1680\u180E\u200B\u202F\u2060 +} 1 +test utf-24.6 {unicode space char in regc_locale.c} tip413 { + # this returns 1 with Unicode 7/TIP 413 compliance + list [regexp {^[[:space:]]+$} \x85\u1680\u180E\u200B\u202F\u2060] [regexp {^\s+$} \x85\u1680\u180E\u200B\u202F\u2060] +} {1 1} test utf-25.1 {Tcl_UniCharNcasecmp} -constraints teststringobj \ -setup { -- cgit v0.12 From 48357577d7bdca0d59bb71d6b6b3511adc5cec90 Mon Sep 17 00:00:00 2001 From: dgp Date: Mon, 27 Apr 2020 01:29:18 +0000 Subject: Possible fix for [string to*] writing out a high surrogate at end of string. --- generic/tclUtf.c | 8 ++++---- tests/utf.test | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 665607f..5c0d054 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -1026,7 +1026,7 @@ Tcl_UtfToUpper( * char to dst if its size is <= the original char. */ - if (len < UtfCount(upChar)) { + if (len < UtfCount(upChar) || ((upChar & 0xF800) == 0xD800)) { memmove(dst, src, len); dst += len; } else { @@ -1079,7 +1079,7 @@ Tcl_UtfToLower( * char to dst if its size is <= the original char. */ - if (len < UtfCount(lowChar)) { + if (len < UtfCount(lowChar) || ((lowChar & 0xF800) == 0xD800)) { memmove(dst, src, len); dst += len; } else { @@ -1129,7 +1129,7 @@ Tcl_UtfToTitle( len = TclUtfToUniChar(src, &ch); titleChar = Tcl_UniCharToTitle(ch); - if (len < UtfCount(titleChar)) { + if (len < UtfCount(titleChar) || ((titleChar & 0xF800) == 0xD800)) { memmove(dst, src, len); dst += len; } else { @@ -1145,7 +1145,7 @@ Tcl_UtfToTitle( lowChar = Tcl_UniCharToLower(lowChar); } - if (len < UtfCount(lowChar)) { + if (len < UtfCount(lowChar) || ((lowChar & 0xF800) == 0xD800)) { memmove(dst, src, len); dst += len; } else { diff --git a/tests/utf.test b/tests/utf.test index cf0d1bf..10185d3 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -1127,7 +1127,7 @@ test utf-12.4 {Tcl_UtfToLower} { test utf-12.5 {Tcl_UtfToLower Georgian (new in Unicode 11)} { string tolower \u10D0\u1C90 } \u10D0\u10D0 -test utf-12.6 {Tcl_UtfToUpper low/high surrogate)} ucs2 { +test utf-12.6 {Tcl_UtfToUpper low/high surrogate)} { string tolower \uDC24\uD824 } \uDC24\uD824 @@ -1149,7 +1149,7 @@ test utf-13.5 {Tcl_UtfToTitle Georgian (new in Unicode 11)} { test utf-13.6 {Tcl_UtfToTitle Georgian (new in Unicode 11)} { string totitle \u1C90\u10D0 } \u1C90\u10D0 -test utf-13.7 {Tcl_UtfToTitle low/high surrogate)} ucs2 { +test utf-13.7 {Tcl_UtfToTitle low/high surrogate)} { string totitle \uDC24\uD824 } \uDC24\uD824 -- cgit v0.12 From 54713d6e0a657c0aa590e6714a85b06a37fd4d60 Mon Sep 17 00:00:00 2001 From: dgp Date: Mon, 27 Apr 2020 02:43:51 +0000 Subject: More tests showing more bugs. --- tests/utf.test | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/utf.test b/tests/utf.test index 10185d3..970d4fd 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -1111,6 +1111,9 @@ test utf-11.4 {Tcl_UtfToUpper} { test utf-11.5 {Tcl_UtfToUpper Georgian (new in Unicode 11)} { string toupper \u10D0\u1C90 } \u1C90\u1C90 +test utf-11.6 {Tcl_UtfToUpper beyond U+FFFF} {Uesc fullutf} { + string toupper \U10428 +} \U10400 test utf-12.1 {Tcl_UtfToLower} { string tolower {} @@ -1127,9 +1130,12 @@ test utf-12.4 {Tcl_UtfToLower} { test utf-12.5 {Tcl_UtfToLower Georgian (new in Unicode 11)} { string tolower \u10D0\u1C90 } \u10D0\u10D0 -test utf-12.6 {Tcl_UtfToUpper low/high surrogate)} { +test utf-12.6 {Tcl_UtfToLower low/high surrogate)} { string tolower \uDC24\uD824 } \uDC24\uD824 +test utf-12.7 {Tcl_UtfToLower beyond U+FFFF} {Uesc fullutf} { + string tolower \U10400 +} \U10428 test utf-13.1 {Tcl_UtfToTitle} { string totitle {} @@ -1152,6 +1158,9 @@ test utf-13.6 {Tcl_UtfToTitle Georgian (new in Unicode 11)} { test utf-13.7 {Tcl_UtfToTitle low/high surrogate)} { string totitle \uDC24\uD824 } \uDC24\uD824 +test utf-13.8 {Tcl_UtfToTitle beyond U+FFFF} {Uesc fullutf} { + string totitle \U10428 +} \U10400 test utf-14.1 {Tcl_UtfNcasecmp} { string compare -nocase a b -- cgit v0.12 From 4d391ba3a737560c418ce20e2600d6746388bd1b Mon Sep 17 00:00:00 2001 From: dgp Date: Mon, 27 Apr 2020 03:03:25 +0000 Subject: Still more tests --- tests/utf.test | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/utf.test b/tests/utf.test index 970d4fd..6fed971 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -1114,6 +1114,9 @@ test utf-11.5 {Tcl_UtfToUpper Georgian (new in Unicode 11)} { test utf-11.6 {Tcl_UtfToUpper beyond U+FFFF} {Uesc fullutf} { string toupper \U10428 } \U10400 +test utf-11.7 {Tcl_UtfToUpper beyond U+FFFF} {pairsTo4bytes} { + string toupper \uD801\uDC28 +} \uD801\uDC00 test utf-12.1 {Tcl_UtfToLower} { string tolower {} @@ -1136,6 +1139,9 @@ test utf-12.6 {Tcl_UtfToLower low/high surrogate)} { test utf-12.7 {Tcl_UtfToLower beyond U+FFFF} {Uesc fullutf} { string tolower \U10400 } \U10428 +test utf-12.8 {Tcl_UtfToLower beyond U+FFFF} {pairsTo4bytes} { + string tolower \uD801\uDC00 +} \uD801\uDC28 test utf-13.1 {Tcl_UtfToTitle} { string totitle {} @@ -1161,6 +1167,9 @@ test utf-13.7 {Tcl_UtfToTitle low/high surrogate)} { test utf-13.8 {Tcl_UtfToTitle beyond U+FFFF} {Uesc fullutf} { string totitle \U10428 } \U10400 +test utf-13.9 {Tcl_UtfToTitle beyond U+FFFF} {pairsTo4bytes} { + string totitle \uD801\uDC28 +} \uD801\uDC00 test utf-14.1 {Tcl_UtfNcasecmp} { string compare -nocase a b -- cgit v0.12 From 60d1d8c7eb1ac57639a5666836625c845fe38f2d Mon Sep 17 00:00:00 2001 From: dgp Date: Mon, 27 Apr 2020 12:26:39 +0000 Subject: Use lossless internal routines to cover extended characters. --- generic/tclUtf.c | 64 ++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 44 insertions(+), 20 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 5c0d054..0e9561d 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -87,6 +87,9 @@ static const unsigned char totalBytes[256] = { static int UtfCount(int ch); static int Invalid(unsigned char *src); +static int UCS4ToUpper(int ch); +static int UCS4ToLower(int ch); +static int UCS4ToTitle(int ch); /* *--------------------------------------------------------------------------- @@ -1007,7 +1010,7 @@ int Tcl_UtfToUpper( char *str) /* String to convert in place. */ { - Tcl_UniChar ch = 0, upChar; + int ch, upChar; char *src, *dst; int len; @@ -1017,8 +1020,8 @@ Tcl_UtfToUpper( src = dst = str; while (*src) { - len = TclUtfToUniChar(src, &ch); - upChar = Tcl_UniCharToUpper(ch); + len = TclUtfToUCS4(src, &ch); + upChar = UCS4ToUpper(ch); /* * To keep badly formed Utf strings from getting inflated by the @@ -1060,7 +1063,7 @@ int Tcl_UtfToLower( char *str) /* String to convert in place. */ { - Tcl_UniChar ch = 0, lowChar; + int ch, lowChar; char *src, *dst; int len; @@ -1070,8 +1073,8 @@ Tcl_UtfToLower( src = dst = str; while (*src) { - len = TclUtfToUniChar(src, &ch); - lowChar = Tcl_UniCharToLower(ch); + len = TclUtfToUCS4(src, &ch); + lowChar = UCS4ToLower(ch); /* * To keep badly formed Utf strings from getting inflated by the @@ -1114,7 +1117,7 @@ int Tcl_UtfToTitle( char *str) /* String to convert in place. */ { - Tcl_UniChar ch = 0, titleChar, lowChar; + int ch, titleChar, lowChar; char *src, *dst; int len; @@ -1126,8 +1129,8 @@ Tcl_UtfToTitle( src = dst = str; if (*src) { - len = TclUtfToUniChar(src, &ch); - titleChar = Tcl_UniCharToTitle(ch); + len = TclUtfToUCS4(src, &ch); + titleChar = UCS4ToTitle(ch); if (len < UtfCount(titleChar) || ((titleChar & 0xF800) == 0xD800)) { memmove(dst, src, len); @@ -1138,11 +1141,11 @@ Tcl_UtfToTitle( src += len; } while (*src) { - len = TclUtfToUniChar(src, &ch); + len = TclUtfToUCS4(src, &ch); lowChar = ch; /* Special exception for Georgian Asomtavruli chars, no titlecase. */ if ((unsigned)(lowChar - 0x1C90) >= 0x30) { - lowChar = Tcl_UniCharToLower(lowChar); + lowChar = UCS4ToLower(lowChar); } if (len < UtfCount(lowChar) || ((lowChar & 0xF800) == 0xD800)) { @@ -1382,8 +1385,8 @@ TclUtfCasecmp( *---------------------------------------------------------------------- */ -Tcl_UniChar -Tcl_UniCharToUpper( +static int +UCS4ToUpper( int ch) /* Unicode character to convert. */ { int info = GetUniCharInfo(ch); @@ -1391,7 +1394,14 @@ Tcl_UniCharToUpper( if (GetCaseType(info) & 0x04) { ch -= GetDelta(info); } - return (Tcl_UniChar) ch; + return ch; +} + +Tcl_UniChar +Tcl_UniCharToUpper( + int ch) /* Unicode character to convert. */ +{ + return (Tcl_UniChar) UCS4ToUpper(ch); } /* @@ -1410,8 +1420,8 @@ Tcl_UniCharToUpper( *---------------------------------------------------------------------- */ -Tcl_UniChar -Tcl_UniCharToLower( +static int +UCS4ToLower( int ch) /* Unicode character to convert. */ { int info = GetUniCharInfo(ch); @@ -1420,7 +1430,14 @@ Tcl_UniCharToLower( if ((mode & 0x02) && (mode != 0x7)) { ch += GetDelta(info); } - return (Tcl_UniChar) ch; + return ch; +} + +Tcl_UniChar +Tcl_UniCharToLower( + int ch) /* Unicode character to convert. */ +{ + return (Tcl_UniChar) UCS4ToLower(ch); } /* @@ -1439,8 +1456,8 @@ Tcl_UniCharToLower( *---------------------------------------------------------------------- */ -Tcl_UniChar -Tcl_UniCharToTitle( +static int +UCS4ToTitle( int ch) /* Unicode character to convert. */ { int info = GetUniCharInfo(ch); @@ -1457,7 +1474,14 @@ Tcl_UniCharToTitle( } else if (mode == 0x4) { ch -= GetDelta(info); } - return (Tcl_UniChar) ch; + return ch; +} + +Tcl_UniChar +Tcl_UniCharToTitle( + int ch) /* Unicode character to convert. */ +{ + return (Tcl_UniChar) UCS4ToTitle(ch); } /* -- cgit v0.12 From bada59f796d2327a3f24d6887eb1169aff05c5ba Mon Sep 17 00:00:00 2001 From: dgp Date: Mon, 27 Apr 2020 12:32:24 +0000 Subject: bring back new tests --- tests/utf.test | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/utf.test b/tests/utf.test index b74d8f5..f604c25 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -1040,6 +1040,16 @@ test utf-11.3 {Tcl_UtfToUpper} { test utf-11.4 {Tcl_UtfToUpper} { string toupper \u01E3gh } \u01E2GH +test utf-11.5 {Tcl_UtfToUpper Georgian (new in Unicode 11)} { + string toupper \u10D0\u1C90 +} \u1C90\u1C90 +test utf-11.6 {Tcl_UtfToUpper beyond U+FFFF} {Uesc fullutf} { + string toupper \U10428 +} \U10400 +test utf-11.7 {Tcl_UtfToUpper beyond U+FFFF} {pairsTo4bytes} { + string toupper \uD801\uDC28 +} \uD801\uDC00 + test utf-12.1 {Tcl_UtfToLower} { string tolower {} @@ -1059,6 +1069,12 @@ test utf-12.5 {Tcl_UtfToLower Georgian (new in Unicode 11)} { test utf-12.6 {Tcl_UtfToUpper low/high surrogate)} { string tolower \uDC24\uD824 } \uDC24\uD824 +test utf-12.7 {Tcl_UtfToLower beyond U+FFFF} {Uesc fullutf} { + string tolower \U10400 +} \U10428 +test utf-12.8 {Tcl_UtfToLower beyond U+FFFF} {pairsTo4bytes} { + string tolower \uD801\uDC00 +} \uD801\uDC28 test utf-13.1 {Tcl_UtfToTitle} { string totitle {} @@ -1081,6 +1097,12 @@ test utf-13.6 {Tcl_UtfToTitle Georgian (new in Unicode 11)} { test utf-13.7 {Tcl_UtfToTitle low/high surrogate)} { string totitle \uDC24\uD824 } \uDC24\uD824 +test utf-13.8 {Tcl_UtfToTitle beyond U+FFFF} {Uesc fullutf} { + string totitle \U10428 +} \U10400 +test utf-13.9 {Tcl_UtfToTitle beyond U+FFFF} {pairsTo4bytes} { + string totitle \uD801\uDC28 +} \uD801\uDC00 test utf-14.1 {Tcl_UtfNcasecmp} { string compare -nocase a b -- cgit v0.12 From 4ce7f0cd8682a44257e7189572ad13566fd7dbf4 Mon Sep 17 00:00:00 2001 From: dgp Date: Mon, 27 Apr 2020 12:41:36 +0000 Subject: Pull back another test from 8.7. --- tests/utf.test | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/utf.test b/tests/utf.test index f604c25..7c4831b 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -1049,7 +1049,9 @@ test utf-11.6 {Tcl_UtfToUpper beyond U+FFFF} {Uesc fullutf} { test utf-11.7 {Tcl_UtfToUpper beyond U+FFFF} {pairsTo4bytes} { string toupper \uD801\uDC28 } \uD801\uDC00 - +test utf-11.8 {Tcl_UtfToUpper low/high surrogate)} { + string toupper \uDC24\uD824 +} \uDC24\uD824 test utf-12.1 {Tcl_UtfToLower} { string tolower {} -- cgit v0.12 From b1d47f7bfb156ff7692c3f50c29633642d30c0bc Mon Sep 17 00:00:00 2001 From: dgp Date: Mon, 27 Apr 2020 13:18:16 +0000 Subject: silence compiler warning --- generic/tclTest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/tclTest.c b/generic/tclTest.c index d00c852..27e145c 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -7171,7 +7171,7 @@ TestUtfNextCmd( return TCL_OK; } - first = Tcl_UtfNext(buffer + 1); + first = result = Tcl_UtfNext(buffer + 1); while ((buffer[0] = *p++) != '\0') { /* Run Tcl_UtfNext with many more possible bytes at src[-1], all should give the same result */ result = Tcl_UtfNext(buffer + 1); -- cgit v0.12