From e7186db8a96017cbfe8baf62cb3a23ce279c1bb0 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 23 Apr 2020 19:07:08 +0000 Subject: Fix regression in Tcl_NumUtfChars, caused by this commit: [6596c4af31e29b5d]. Expectations of failing tests was adapted later, that's why this was missed. Lesson: Tcl_UtfNext() is _not_ just an optimized replacement for Tcl_UtfToUniChar(). Sorry, but this change it just to dangerous! Tcl_UniCharAtIndex() and Tcl_UtfAtIndex() most likely have the same regression when fead with invalid byte-sequences, therefore reverted those too. HOLD ON! These regressions are equally the result of [5c322bbd51]. It takes both changes to cause the failing tests. We need to argue about which change was the wrong one. --- generic/tclUtf.c | 95 ++++++++++++++++---------------------------------------- tests/utf.test | 2 +- 2 files changed, 28 insertions(+), 69 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 0e11e0e..e095555 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -579,7 +579,7 @@ Tcl_NumUtfChars( int length) /* The length of the string in bytes, or -1 * for strlen(string). */ { - const char *next; + Tcl_UniChar ch = 0; register int i = 0; /* @@ -590,36 +590,22 @@ Tcl_NumUtfChars( */ if (length < 0) { - while ((*src != '\0') && (i < INT_MAX)) { - next = TclUtfNext(src); -#if TCL_UTF_MAX > 4 + while (*src != '\0') { + src += TclUtfToUniChar(src, &ch); i++; -#else - i += 1 + ((next - src) > 3); -#endif - src = next; } + if (i < 0) i = INT_MAX; /* Bug [2738427] */ } else { register const char *endPtr = src + length - TCL_UTF_MAX; while (src < endPtr) { - next = TclUtfNext(src); -#if TCL_UTF_MAX > 4 + src += TclUtfToUniChar(src, &ch); i++; -#else - i += 1 + ((next - src) > 3); -#endif - src = next; } endPtr += TCL_UTF_MAX; while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) { - next = TclUtfNext(src); -#if TCL_UTF_MAX > 4 + src += TclUtfToUniChar(src, &ch); i++; -#else - i += 1 + ((next - src) > 3); -#endif - src = next; } if (src < endPtr) { i += endPtr - src; @@ -762,43 +748,15 @@ Tcl_UtfNext( * * Tcl_UtfPrev -- * - * The aim of this routine is to provide a way to move backward - * through a UTF-8 string. The caller is expected to pass non-NULL - * pointer arguments start and src. start points to the beginning - * of a string, and src >= start points to a location within (or just - * past the end) of the string. This routine always returns a - * pointer within the string (>= start). When (src == start), it - * returns start. When (src > start), it returns a pointer (< src) - * and (>= src - TCL_UTF_MAX). Subject to these constraints, the - * routine returns a pointer to the earliest byte in the string that - * starts a character when characters are read starting at start and - * that character might include the byte src[-1]. The routine will - * examine only those bytes in the range that might be returned. - * It will not examine the byte *src, and because of that cannot - * determine for certain in all circumstances whether the character - * that begins with the returned pointer will or will not include - * the byte src[-1]. In the scenario, where src points to the end of - * a buffer being filled, the returned pointer points to either the - * final complete character in the string or to the earliest byte - * that might start an incomplete character waiting for more bytes to - * complete. - * - * Because this routine always returns a value < src until the point - * it is forced to return start, it is useful as a backward iterator - * through a string that will always make progress and always be - * prevented from running past the beginning of the string. - * - * In a string where all characters are complete and properly formed, - * and the value of src points to the first byte of a character, - * repeated Tcl_UtfPrev calls will step to the starting bytes of - * characters, one character at a time. Within those limitations, - * Tcl_UtfPrev and Tcl_UtfNext are inverses. If either condition cannot - * be met, Tcl_UtfPrev and Tcl_UtfNext may not function as inverses and - * the caller will have to take greater care. + * Given a pointer to some current location in a UTF-8 string, move + * backwards one character. This works correctly when the pointer is in + * the middle of a UTF-8 character. * * Results: - * A pointer to the start of a character in the string as described - * above. + * The return value is a pointer to the previous character in the UTF-8 + * string. If the current location was already at the beginning of the + * string, the return value will also be a pointer to the beginning of + * the string. * * Side effects: * None. @@ -927,7 +885,9 @@ Tcl_UniCharAtIndex( { Tcl_UniChar ch = 0; - TclUtfToUniChar(Tcl_UtfAtIndex(src, index), &ch); + while (index-- >= 0) { + src += TclUtfToUniChar(src, &ch); + } return ch; } @@ -953,20 +913,19 @@ Tcl_UtfAtIndex( register const char *src, /* The UTF-8 string. */ register int index) /* The position of the desired character. */ { - while (index-- > 0) { - const char *next = TclUtfNext(src); - -#if TCL_UTF_MAX <= 4 - /* - * 4-byte sequences generate two UCS-2 code units in the - * UTF-16 representation, so in the current indexing scheme - * we need to account for an extra index (total of two). - */ - index -= ((next - src) > 3); -#endif + Tcl_UniChar ch = 0; + int len = 0; - src = next; + while (index-- > 0) { + len = TclUtfToUniChar(src, &ch); + src += len; } +#if TCL_UTF_MAX == 4 + if ((ch >= 0xD800) && (len < 3)) { + /* Index points at character following high Surrogate */ + src += TclUtfToUniChar(src, &ch); + } +#endif return src; } diff --git a/tests/utf.test b/tests/utf.test index cb650f4..3f20ace 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -147,7 +147,7 @@ test utf-4.11 {Tcl_NumUtfChars: 3 bytes of 4-byte UTF-8 characater} {testnumutfc } 3 test utf-4.12.0 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring ucs2} { testnumutfchars [testbytestring \xF0\x9F\x92\xA9] 4 -} 4 +} 2 test utf-4.12.1 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring tip389} { testnumutfchars [testbytestring \xF0\x9F\x92\xA9] 4 } 2 -- cgit v0.12 From 7d743964557590063e80992e255c4d5eeaeb0349 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 23 Apr 2020 19:14:29 +0000 Subject: Revert change in ParseLexeme() too --- generic/tclCompExpr.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/generic/tclCompExpr.c b/generic/tclCompExpr.c index ed4e958..9c7ab8d 100644 --- a/generic/tclCompExpr.c +++ b/generic/tclCompExpr.c @@ -1885,6 +1885,7 @@ ParseLexeme( { const char *end; int scanned; + Tcl_UniChar ch = 0; Tcl_Obj *literal = NULL; unsigned char byte; @@ -2063,13 +2064,13 @@ ParseLexeme( if (!TclIsBareword(*start) || *start == '_') { if (Tcl_UtfCharComplete(start, numBytes)) { - scanned = TclUtfNext(start) - start; + scanned = TclUtfToUniChar(start, &ch); } else { char utfBytes[TCL_UTF_MAX]; memcpy(utfBytes, start, (size_t) numBytes); utfBytes[numBytes] = '\0'; - scanned = TclUtfNext(utfBytes) - utfBytes; + scanned = TclUtfToUniChar(utfBytes, &ch); } *lexemePtr = INVALID; Tcl_DecrRefCount(literal); -- cgit v0.12 From 4ca994daf1016eb4a36b7f9a7a16fc1f7df5b5a3 Mon Sep 17 00:00:00 2001 From: dgp Date: Thu, 23 Apr 2020 20:22:59 +0000 Subject: Demonstrate that the failing tests on the 8.6 branch tip can equally well be solved by backing out the recent changes associated with [27944a3661]. --- generic/tclUtf.c | 11 ++--------- tests/utf.test | 10 +++++----- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 0e11e0e..422c501 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -68,8 +68,8 @@ static const unsigned char totalBytes[256] = { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, #else /* Tcl_UtfCharComplete() might point to 2nd byte of valid 4-byte sequence */ - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, #endif 2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, @@ -733,13 +733,6 @@ Tcl_UtfNext( int left = totalBytes[UCHAR(*src)]; const char *next = src + 1; - if (((*src) & 0xC0) == 0x80) { - if ((((*++src) & 0xC0) == 0x80) && (((*++src) & 0xC0) == 0x80)) { - ++src; - } - return src; - } - while (--left) { if ((*next & 0xC0) != 0x80) { /* diff --git a/tests/utf.test b/tests/utf.test index cb650f4..84f3f38 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -193,7 +193,7 @@ test utf-6.10 {Tcl_UtfNext} testutfnext { } 1 test utf-6.11 {Tcl_UtfNext} testutfnext { testutfnext -bytestring \xA0\xA0 -} 2 +} 1 test utf-6.12 {Tcl_UtfNext} testutfnext { testutfnext -bytestring \xA0\xD0 } 1 @@ -448,10 +448,10 @@ test utf-6.87.1 {Tcl_UtfNext - overlong sequences} {testutfnext fullutf} { } 4 test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} testutfnext { testutfnext -bytestring \xA0\xA0 -} 2 +} 1 test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} testutfnext { testutfnext -bytestring \x80\x80 -} 2 +} 1 test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext ucs2} { testutfnext -bytestring \xF4\x8F\xBF\xBF } 1 @@ -466,10 +466,10 @@ test utf-6.91.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext fullutf} } 1 test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} testutfnext { testutfnext -bytestring \xA0\xA0\xA0 -} 3 +} 1 test utf-6.93 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} testutfnext { testutfnext -bytestring \x80\x80\x80 -} 3 +} 1 test utf-7.1 {Tcl_UtfPrev} testutfprev { testutfprev {} -- cgit v0.12 From 4804a3fdec7c1461645097c4aff7561ff9b2d210 Mon Sep 17 00:00:00 2001 From: dgp Date: Sat, 25 Apr 2020 16:26:12 +0000 Subject: Cherrypick [d2143c14c1]: Eliminate the -bytestring option of [testutfnext]. No caller needs anything else. --- generic/tclTest.c | 12 ++-- tests/utf.test | 210 +++++++++++++++++++++++++++--------------------------- 2 files changed, 109 insertions(+), 113 deletions(-) diff --git a/generic/tclTest.c b/generic/tclTest.c index 856e9ea..1676bae 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -6721,15 +6721,11 @@ TestUtfNextCmd( const char *p = tobetested; (void)dummy; - if (objc != 3 || strcmp(Tcl_GetString(objv[1]), "-bytestring")) { - if (objc != 2) { - Tcl_WrongNumArgs(interp, 1, objv, "?-bytestring? bytes"); - return TCL_ERROR; - } - bytes = Tcl_GetStringFromObj(objv[1], &numBytes); - } else { - bytes = (char *) Tcl_GetByteArrayFromObj(objv[2], &numBytes); + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, "bytes"); + return TCL_ERROR; } + bytes = (char *) Tcl_GetByteArrayFromObj(objv[1], &numBytes); if (numBytes > (int)sizeof(buffer)-2) { Tcl_AppendResult(interp, "\"testutfnext\" can only handle 30 bytes", NULL); diff --git a/tests/utf.test b/tests/utf.test index c374209..63ae9ee 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -162,319 +162,319 @@ test utf-5.2 {Tcl_UtfFindLast} {testfindlast testbytestring} { test utf-6.1 {Tcl_UtfNext} testutfnext { # This takes the pointer one past the terminating NUL. # This is really an invalid call. - testutfnext -bytestring {} + testutfnext {} } 1 test utf-6.2 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring A + testutfnext A } 1 test utf-6.3 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring AA + testutfnext AA } 1 test utf-6.4 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring A\xA0 + testutfnext A\xA0 } 1 test utf-6.5 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring A\xD0 + testutfnext A\xD0 } 1 test utf-6.6 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring A\xE8 + testutfnext A\xE8 } 1 test utf-6.7 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring A\xF2 + testutfnext A\xF2 } 1 test utf-6.8 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring A\xF8 + testutfnext A\xF8 } 1 test utf-6.9 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xA0 + testutfnext \xA0 } 1 test utf-6.10 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xA0G + testutfnext \xA0G } 1 test utf-6.11 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xA0\xA0 + testutfnext \xA0\xA0 } 1 test utf-6.12 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xA0\xD0 + testutfnext \xA0\xD0 } 1 test utf-6.13 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xA0\xE8 + testutfnext \xA0\xE8 } 1 test utf-6.14 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xA0\xF2 + testutfnext \xA0\xF2 } 1 test utf-6.15 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xA0\xF8 + testutfnext \xA0\xF8 } 1 test utf-6.16 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0 + testutfnext \xD0 } 1 test utf-6.17 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0G + testutfnext \xD0G } 1 test utf-6.18 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0\xA0 + testutfnext \xD0\xA0 } 2 test utf-6.19 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0\xD0 + testutfnext \xD0\xD0 } 1 test utf-6.20 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0\xE8 + testutfnext \xD0\xE8 } 1 test utf-6.21 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0\xF2 + testutfnext \xD0\xF2 } 1 test utf-6.22 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0\xF8 + testutfnext \xD0\xF8 } 1 test utf-6.23 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8 + testutfnext \xE8 } 1 test utf-6.24 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8G + testutfnext \xE8G } 1 test utf-6.25 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0 + testutfnext \xE8\xA0 } 1 test utf-6.26 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xD0 + testutfnext \xE8\xD0 } 1 test utf-6.27 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xE8 + testutfnext \xE8\xE8 } 1 test utf-6.28 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xF2 + testutfnext \xE8\xF2 } 1 test utf-6.29 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xF8 + testutfnext \xE8\xF8 } 1 test utf-6.30 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2 + testutfnext \xF2 } 1 test utf-6.31 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2G + testutfnext \xF2G } 1 test utf-6.32 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0 + testutfnext \xF2\xA0 } 1 test utf-6.33 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xD0 + testutfnext \xF2\xD0 } 1 test utf-6.34 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xE8 + testutfnext \xF2\xE8 } 1 test utf-6.35 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xF2 + testutfnext \xF2\xF2 } 1 test utf-6.36 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xF8 + testutfnext \xF2\xF8 } 1 test utf-6.37 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF8 + testutfnext \xF8 } 1 test utf-6.38 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF8G + testutfnext \xF8G } 1 test utf-6.39 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF8\xA0 + testutfnext \xF8\xA0 } 1 test utf-6.40 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF8\xD0 + testutfnext \xF8\xD0 } 1 test utf-6.41 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF8\xE8 + testutfnext \xF8\xE8 } 1 test utf-6.42 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF8\xF2 + testutfnext \xF8\xF2 } 1 test utf-6.43 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF8\xF8 + testutfnext \xF8\xF8 } 1 test utf-6.44 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0\xA0G + testutfnext \xD0\xA0G } 2 test utf-6.45 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0\xA0\xA0 + testutfnext \xD0\xA0\xA0 } 2 test utf-6.46 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0\xA0\xD0 + testutfnext \xD0\xA0\xD0 } 2 test utf-6.47 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0\xA0\xE8 + testutfnext \xD0\xA0\xE8 } 2 test utf-6.48 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0\xA0\xF2 + testutfnext \xD0\xA0\xF2 } 2 test utf-6.49 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xD0\xA0\xF8 + testutfnext \xD0\xA0\xF8 } 2 test utf-6.50 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0G + testutfnext \xE8\xA0G } 1 test utf-6.51 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0\xA0 + testutfnext \xE8\xA0\xA0 } 3 test utf-6.52 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0\xD0 + testutfnext \xE8\xA0\xD0 } 1 test utf-6.53 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0\xE8 + testutfnext \xE8\xA0\xE8 } 1 test utf-6.54 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0\xF2 + testutfnext \xE8\xA0\xF2 } 1 test utf-6.55 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0\xF8 + testutfnext \xE8\xA0\xF8 } 1 test utf-6.56 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0G + testutfnext \xF2\xA0G } 1 test utf-6.57 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0\xA0 + testutfnext \xF2\xA0\xA0 } 1 test utf-6.58 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0\xD0 + testutfnext \xF2\xA0\xD0 } 1 test utf-6.59 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0\xE8 + testutfnext \xF2\xA0\xE8 } 1 test utf-6.60 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0\xF2 + testutfnext \xF2\xA0\xF2 } 1 test utf-6.61 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0\xF8 + testutfnext \xF2\xA0\xF8 } 1 test utf-6.62 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0\xA0G + testutfnext \xE8\xA0\xA0G } 3 test utf-6.63 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0\xA0\xA0 + testutfnext \xE8\xA0\xA0\xA0 } 3 test utf-6.64 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0\xA0\xD0 + testutfnext \xE8\xA0\xA0\xD0 } 3 test utf-6.65 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0\xA0\xE8 + testutfnext \xE8\xA0\xA0\xE8 } 3 test utf-6.66 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0\xA0\xF2 + testutfnext \xE8\xA0\xA0\xF2 } 3 test utf-6.67 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xE8\xA0\xA0\xF8 + testutfnext \xE8\xA0\xA0\xF8 } 3 test utf-6.68 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0\xA0G + testutfnext \xF2\xA0\xA0G } 1 test utf-6.69.0 {Tcl_UtfNext} {testutfnext ucs2} { - testutfnext -bytestring \xF2\xA0\xA0\xA0 + testutfnext \xF2\xA0\xA0\xA0 } 1 test utf-6.69.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext -bytestring \xF2\xA0\xA0\xA0 + testutfnext \xF2\xA0\xA0\xA0 } 4 test utf-6.70 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0\xA0\xD0 + testutfnext \xF2\xA0\xA0\xD0 } 1 test utf-6.71 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0\xA0\xE8 + testutfnext \xF2\xA0\xA0\xE8 } 1 test utf-6.72 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0\xA0\xF2 + testutfnext \xF2\xA0\xA0\xF2 } 1 test utf-6.73 {Tcl_UtfNext} testutfnext { - testutfnext -bytestring \xF2\xA0\xA0\xF8 + testutfnext \xF2\xA0\xA0\xF8 } 1 test utf-6.74.0 {Tcl_UtfNext} {testutfnext ucs2} { - testutfnext -bytestring \xF2\xA0\xA0\xA0G + testutfnext \xF2\xA0\xA0\xA0G } 1 test utf-6.74.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext -bytestring \xF2\xA0\xA0\xA0G + testutfnext \xF2\xA0\xA0\xA0G } 4 test utf-6.75.0 {Tcl_UtfNext} {testutfnext ucs2} { - testutfnext -bytestring \xF2\xA0\xA0\xA0\xA0 + testutfnext \xF2\xA0\xA0\xA0\xA0 } 1 test utf-6.75.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext -bytestring \xF2\xA0\xA0\xA0\xA0 + testutfnext \xF2\xA0\xA0\xA0\xA0 } 4 test utf-6.76.0 {Tcl_UtfNext} {testutfnext ucs2} { - testutfnext -bytestring \xF2\xA0\xA0\xA0\xD0 + testutfnext \xF2\xA0\xA0\xA0\xD0 } 1 test utf-6.76.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext -bytestring \xF2\xA0\xA0\xA0\xD0 + testutfnext \xF2\xA0\xA0\xA0\xD0 } 4 test utf-6.77.0 {Tcl_UtfNext} {testutfnext ucs2} { - testutfnext -bytestring \xF2\xA0\xA0\xA0\xE8 + testutfnext \xF2\xA0\xA0\xA0\xE8 } 1 test utf-6.77.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext -bytestring \xF2\xA0\xA0\xA0\xE8 + testutfnext \xF2\xA0\xA0\xA0\xE8 } 4 test utf-6.78.0 {Tcl_UtfNext} {testutfnext ucs2} { - testutfnext -bytestring \xF2\xA0\xA0\xA0\xF2 + testutfnext \xF2\xA0\xA0\xA0\xF2 } 1 test utf-6.78.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext -bytestring \xF2\xA0\xA0\xA0\xF2 + testutfnext \xF2\xA0\xA0\xA0\xF2 } 4 test utf-6.79.0 {Tcl_UtfNext} {testutfnext ucs2} { - testutfnext -bytestring \xF2\xA0\xA0\xA0G\xF8 + testutfnext \xF2\xA0\xA0\xA0G\xF8 } 1 test utf-6.79.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext -bytestring \xF2\xA0\xA0\xA0G\xF8 + testutfnext \xF2\xA0\xA0\xA0G\xF8 } 4 test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext -bytestring \xC0\x80 + testutfnext \xC0\x80 } 2 test utf-6.81 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext -bytestring \xC0\x81 + testutfnext \xC0\x81 } 1 test utf-6.82 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext -bytestring \xC1\x80 + testutfnext \xC1\x80 } 1 test utf-6.83 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext -bytestring \xC2\x80 + testutfnext \xC2\x80 } 2 test utf-6.84 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext -bytestring \xE0\x80\x80 + testutfnext \xE0\x80\x80 } 1 test utf-6.85 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext -bytestring \xE0\xA0\x80 + testutfnext \xE0\xA0\x80 } 3 test utf-6.86 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext -bytestring \xF0\x80\x80\x80 + testutfnext \xF0\x80\x80\x80 } 1 test utf-6.87.0 {Tcl_UtfNext - overlong sequences} {testutfnext ucs2} { - testutfnext -bytestring \xF0\x90\x80\x80 + testutfnext \xF0\x90\x80\x80 } 1 test utf-6.87.1 {Tcl_UtfNext - overlong sequences} {testutfnext fullutf} { - testutfnext -bytestring \xF0\x90\x80\x80 + testutfnext \xF0\x90\x80\x80 } 4 test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} testutfnext { - testutfnext -bytestring \xA0\xA0 + testutfnext \xA0\xA0 } 1 test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} testutfnext { - testutfnext -bytestring \x80\x80 + testutfnext \x80\x80 } 1 test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext ucs2} { - testutfnext -bytestring \xF4\x8F\xBF\xBF + testutfnext \xF4\x8F\xBF\xBF } 1 test utf-6.90.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext fullutf} { - testutfnext -bytestring \xF4\x8F\xBF\xBF + testutfnext \xF4\x8F\xBF\xBF } 4 test utf-6.91.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext ucs2} { - testutfnext -bytestring \xF4\x90\x80\x80 + testutfnext \xF4\x90\x80\x80 } 1 test utf-6.91.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext fullutf} { - testutfnext -bytestring \xF4\x90\x80\x80 + testutfnext \xF4\x90\x80\x80 } 1 test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} testutfnext { - testutfnext -bytestring \xA0\xA0\xA0 + testutfnext \xA0\xA0\xA0 } 1 test utf-6.93 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} testutfnext { - testutfnext -bytestring \x80\x80\x80 + testutfnext \x80\x80\x80 } 1 test utf-6.125 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} testutfnext { - testutfnext -bytestring \xA0\xA0\xA0\xA0 + testutfnext \xA0\xA0\xA0\xA0 } 1 test utf-6.126 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} testutfnext { - testutfnext -bytestring \x80\x80\x80\x80 + testutfnext \x80\x80\x80\x80 } 1 test utf-7.1 {Tcl_UtfPrev} testutfprev { -- cgit v0.12 From 58876a37b8505fb2dd9d6f39acab6aa4bf54fb05 Mon Sep 17 00:00:00 2001 From: dgp Date: Sat, 25 Apr 2020 17:03:18 +0000 Subject: dup test name --- tests/utf.test | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/utf.test b/tests/utf.test index c739bb4..8814801 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -882,10 +882,10 @@ test utf-7.44 {Tcl_UtfPrev -- no lead byte at start} testutfprev { test utf-7.45 {Tcl_UtfPrev -- no lead byte at start} testutfprev { testutfprev \xA0\xA0\xA0 } 2 -test utf-7.46 {Tcl_UtfPrev -- no lead byte at start} {testutfprev ucs2} { +test utf-7.46.0 {Tcl_UtfPrev -- no lead byte at start} {testutfprev ucs2} { testutfprev \xA0\xA0\xA0\xA0 } 1 -test utf-7.46 {Tcl_UtfPrev -- no lead byte at start} {testutfprev fullutf} { +test utf-7.46.1 {Tcl_UtfPrev -- no lead byte at start} {testutfprev fullutf} { testutfprev \xA0\xA0\xA0\xA0 } 3 test utf-7.47 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} testutfprev { -- cgit v0.12 From 4d159d9803745ea37abc4e06085682b1870a8fea Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sat, 25 Apr 2020 22:16:37 +0000 Subject: encoding-12.6 only works for "ucs2" for now. Don't use (deprecated) INLINE and CONST --- generic/tclUtf.c | 8 ++++---- tests/encoding.test | 5 +++-- win/tclWinTime.c | 4 ++-- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 91a4b89..665607f 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -157,7 +157,7 @@ static const unsigned char bounds[28] = { #endif }; -INLINE static int +static int Invalid( unsigned char *src) /* Points to lead byte of a UTF-8 byte sequence */ { @@ -775,7 +775,7 @@ Tcl_UtfPrev( const char *start) /* Pointer to the beginning of the string */ { int trailBytesSeen = 0; /* How many trail bytes have been verified? */ - CONST char *fallback = src - 1; + const char *fallback = src - 1; /* If we cannot find a lead byte that might * start a prefix of a valid UTF byte sequence, * we will fallback to a one-byte back step */ @@ -831,13 +831,13 @@ Tcl_UtfPrev( /* Reject */ return fallback; } - return (CONST char *)look; + return (const char *)look; } /* We saw a trail byte. */ trailBytesSeen++; - if ((CONST char *)look == start) { + if ((const char *)look == start) { /* * Do not read before the start of the string * diff --git a/tests/encoding.test b/tests/encoding.test index a8ce162..a969efc 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -36,7 +36,8 @@ proc runtests {} { testConstraint testencoding [llength [info commands testencoding]] testConstraint testbytestring [llength [info commands testbytestring]] testConstraint teststringbytes [llength [info commands teststringbytes]] -testConstraint fullutf [expr {[format %c 0x010000] ne "\ufffd"}] +testConstraint ucs2 [expr {[format %c 0x010000] eq "\uFFFD"}] +testConstraint fullutf [expr {[format %c 0x010000] ne "\uFFFD"}] testConstraint exec [llength [info commands exec]] testConstraint testgetdefenc [llength [info commands testgetdefenc]] @@ -305,7 +306,7 @@ test encoding-12.5 {LoadTableEncoding: symbol encoding} { append x [encoding convertto symbol \u67] append x [encoding convertfrom symbol \x67] } "\x67\x67\u3b3" -test encoding-12.6 {LoadTableEncoding: overflow in char value} fullutf { +test encoding-12.6 {LoadTableEncoding: overflow in char value} ucs2 { encoding convertto iso8859-3 \U010000 } "?" diff --git a/win/tclWinTime.c b/win/tclWinTime.c index a434d86..976dd61 100644 --- a/win/tclWinTime.c +++ b/win/tclWinTime.c @@ -1358,7 +1358,7 @@ TclpGmtime( #if defined(_WIN64) || defined(_USE_64BIT_TIME_T) || (defined(_MSC_VER) && _MSC_VER < 1400) return gmtime(timePtr); #else - return _gmtime32((CONST __time32_t *)timePtr); + return _gmtime32((const __time32_t *)timePtr); #endif } @@ -1393,7 +1393,7 @@ TclpLocaltime( #if defined(_WIN64) || defined(_USE_64BIT_TIME_T) || (defined(_MSC_VER) && _MSC_VER < 1400) return localtime(timePtr); #else - return _localtime32((CONST __time32_t *)timePtr); + return _localtime32((const __time32_t *)timePtr); #endif } -- cgit v0.12