diff options
| -rw-r--r-- | generic/tclTest.c | 49 | ||||
| -rw-r--r-- | generic/tclUtf.c | 53 | ||||
| -rw-r--r-- | tests/utf.test | 353 |
3 files changed, 279 insertions, 176 deletions
diff --git a/generic/tclTest.c b/generic/tclTest.c index 592998b..539d188 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -6708,42 +6708,49 @@ SimpleListVolumes(void) static int TestUtfNextCmd( - ClientData clientData, + ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *const objv[]) { - int numBytes, offset = 0; + int numBytes; char *bytes; - const char *result; - Tcl_Obj *copy; + const char *result, *first; + char buffer[32]; + static const char tobetested[] = "\xFF\xFE\xF4\xF2\xF0\xEF\xE8\xE3\xE2\xE1\xE0\xC2\xC1\xC0\x82"; + const char *p = tobetested; + (void)dummy; - if (objc < 2 || objc > 3) { - Tcl_WrongNumArgs(interp, 1, objv, "bytes ?offset?"); + if (objc != 3 || strcmp(Tcl_GetString(objv[1]), "-bytestring")) { + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, "?-bytestring? bytes"); + return TCL_ERROR; + } + bytes = Tcl_GetStringFromObj(objv[1], &numBytes); + } else { + bytes = (char *) Tcl_GetByteArrayFromObj(objv[2], &numBytes); + } + + if (numBytes > (int)sizeof(buffer)-2) { + Tcl_AppendResult(interp, "\"testutfnext\" can only handle 30 bytes", NULL); return TCL_ERROR; } - bytes = (char *) Tcl_GetByteArrayFromObj(objv[1], &numBytes); + memcpy(buffer + 1, bytes, numBytes); + buffer[0] = buffer[numBytes + 1] = '\x00'; - if (objc == 3) { - if (TCL_OK != TclGetIntForIndex(interp, objv[2], numBytes, &offset)) { + first = Tcl_UtfNext(buffer + 1); + while ((buffer[0] = *p++) != '\0') { + /* Run Tcl_UtfNext with many more possible bytes at src[-1], all should give the same result */ + result = Tcl_UtfNext(buffer + 1); + if (first != result) { + Tcl_AppendResult(interp, "Tcl_UtfNext is not supposed to read src[-1]", NULL); return TCL_ERROR; } - if (offset < 0) { - offset = 0; - } - if (offset > numBytes) { - offset = numBytes; - } } - copy = Tcl_DuplicateObj(objv[1]); - bytes = (char *) Tcl_SetByteArrayLength(copy, numBytes+1); - bytes[numBytes] = '\0'; - result = Tcl_UtfNext(bytes + offset); - Tcl_SetObjResult(interp, Tcl_NewIntObj(result - bytes)); + Tcl_SetObjResult(interp, Tcl_NewIntObj(result - buffer - 1)); - Tcl_DecrRefCount(copy); return TCL_OK; } /* diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 1ba474e..a5e4fd4 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -93,7 +93,7 @@ static const unsigned char complete[256] = { */ static int UtfCount(int ch); -static int Overlong(unsigned char *src); +static int Invalid(unsigned char *src); /* *--------------------------------------------------------------------------- @@ -132,51 +132,52 @@ UtfCount( /* *--------------------------------------------------------------------------- * - * Overlong -- + * Invalid -- * * Utility routine to report whether /src/ points to the start of an - * overlong byte sequence that should be rejected. Caller guarantees - * that src[0] and src[1] are readable, and + * invald byte sequence that should be rejected. This might be because + * it is an overlong encoding, or because it encodes something out of + * the proper range. Caller guarantees that src[0] and src[1] are + * readable, and * * (src[0] >= 0xC0) && (src[0] != 0xC1) * (src[1] >= 0x80) && (src[1] < 0xC0) - * (src[0] < ((TCL_UTF_MAX > 3) ? 0xF8 : 0xF0)) + * (src[0] < ((TCL_UTF_MAX > 3) ? 0xF5 : 0xF0)) * * Results: * A boolean. *--------------------------------------------------------------------------- */ -static CONST unsigned char overlong[3] = { - 0x80, /* \xD0 -- all sequences valid */ - 0xA0, /* \xE0\x80 through \xE0\x9F are invalid prefixes */ -#if TCL_UTF_MAX >= 3 - 0x90 /* \xF0\x80 through \xF0\x8F are invalid prefixes */ +static const unsigned char bounds[28] = { + 0x80, 0x80, /* \xC0 accepts \x80 only */ + 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, + 0x80, 0xBF, /* (\xC4 - \xDC) -- all sequences valid */ + 0xA0, 0xBF, /* \xE0\x80 through \xE0\x9F are invalid prefixes */ + 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, /* (\xE4 - \xEC) -- all valid */ +#if TCL_UTF_MAX > 3 + 0x90, 0xBF, /* \xF0\x80 through \xF0\x8F are invalid prefixes */ + 0x80, 0x8F /* \xF4\x90 and higher are invalid prefixes */ #else - 0xC0 /* Not used, but reject all again for safety. */ + 0xC0, 0xBF, /* Not used, but reject all again for safety. */ + 0xC0, 0xBF /* Not used, but reject all again for safety. */ #endif }; INLINE static int -Overlong( +Invalid( unsigned char *src) /* Points to lead byte of a UTF-8 byte sequence */ { unsigned char byte = *src; + int index; - if (byte % 0x10) { - /* Only lead bytes 0xC0, 0xE0, 0xF0 need examination */ + if (byte % 0x04) { + /* Only lead bytes 0xC0, 0xE0, 0xF0, 0xF4 need examination */ return 0; } - if (byte == 0xC0) { - if (src[1] == 0x80) { - /* Valid sequence: \xC0\x80 for \u0000 */ - return 0; - } - /* Reject overlong: \xC0\x81 - \xC0\xBF */ - return 1; - } - if (src[1] < overlong[(byte >> 4) - 0x0D]) { - /* Reject overlong */ + index = (byte - 0xC0) >> 1; + if (src[1] < bounds[index] || src[1] > bounds[index+1]) { + /* Out of bounds - report invalid. */ return 1; } return 0; @@ -772,7 +773,7 @@ Tcl_UtfNext( } next++; } - if (Overlong((unsigned char *)src)) { + if (Invalid((unsigned char *)src)) { return src + 1; } return next; @@ -885,7 +886,7 @@ Tcl_UtfPrev( * Use that capability to screen out overlong sequences. */ - if (Overlong(look)) { + if (Invalid(look)) { /* Reject */ return fallback; } diff --git a/tests/utf.test b/tests/utf.test index 0ba2b85..cad91ec 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -16,14 +16,20 @@ if {[lsearch [namespace children] ::tcltest] == -1} { ::tcltest::loadTestedCommands catch [list package require -exact Tcltest [info patchlevel]] +testConstraint smallutf [expr {[format %c 0x010000] eq "\uFFFD"}] +testConstraint tip389 [expr {[string length \U010000] == 2}] +testConstraint fullutf [expr {[format %c 0x010000] ne "\uFFFD"}] + testConstraint testbytestring [llength [info commands testbytestring]] +testConstraint testfindfirst [llength [info commands testfindfirst]] +testConstraint testfindlast [llength [info commands testfindlast]] +testConstraint testnumutfchars [llength [info commands testnumutfchars]] +testConstraint teststringobj [llength [info commands teststringobj]] +testConstraint testutfnext [llength [info commands testutfnext]] +testConstraint testutfprev [llength [info commands testutfprev]] catch {unset x} -# Some tests require support for 4-byte UTF-8 sequences -testConstraint fullutf [expr {[format %c 0x010000] != "\uFFFD"}] -testConstraint tip389 [expr {[string length \U010000] == 2}] - test utf-1.1 {Tcl_UniCharToUtf: 1 byte sequences} testbytestring { expr {"\x01" eq [testbytestring "\x01"]} } 1 @@ -94,7 +100,7 @@ test utf-2.9 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} testbytestring { string length [testbytestring "\xF0\x8F\xBF\xBF"] } {4} -test utf-2.11 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, overflow} {testbytestring knownBug} {# Doesn't work with any TCL_UTF_MAX value +test utf-2.11 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, overflow} {testbytestring} { string length [testbytestring "\xF4\x90\x80\x80"] } {4} test utf-2.12 {Tcl_UtfToUniChar: longer UTF sequences not supported} testbytestring { @@ -115,7 +121,7 @@ test utf-4.2 {Tcl_NumUtfChars: length 1} {testnumutfchars testbytestring} { testnumutfchars [testbytestring "\xC2\xA2"] } {1} test utf-4.3 {Tcl_NumUtfChars: long string} {testnumutfchars testbytestring} { - testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\uA2\x4E"] + testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\xA2\x4E"] } {7} test utf-4.4 {Tcl_NumUtfChars: #u0000} {testnumutfchars testbytestring} { testnumutfchars [testbytestring "\xC0\x80"] @@ -124,13 +130,13 @@ test utf-4.5 {Tcl_NumUtfChars: zero length, calc len} testnumutfchars { testnumutfchars "" 0 } {0} test utf-4.6 {Tcl_NumUtfChars: length 1, calc len} {testnumutfchars testbytestring} { - testnumutfchars [testbytestring "\xC2\xA2"] 2 + testnumutfchars [testbytestring "\xC2\xA2"] 1 } {1} test utf-4.7 {Tcl_NumUtfChars: long string, calc len} {testnumutfchars testbytestring} { - testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\uA2\x4E"] 10 + testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\xA2\x4E"] 10 } {7} test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} {testnumutfchars testbytestring} { - testnumutfchars [testbytestring "\xC0\x80"] 2 + testnumutfchars [testbytestring "\xC0\x80"] 1 } {1} # Bug [2738427]: Tcl_NumUtfChars(...) no overflow check test utf-4.9 {Tcl_NumUtfChars: #u20AC, calc len, incomplete} {testnumutfchars testbytestring} { @@ -140,10 +146,10 @@ test utf-4.10 {Tcl_NumUtfChars: #u0000, calc len, overcomplete} {testnumutfchars testnumutfchars [testbytestring "\x00"] 2 } {2} test utf-4.11 {Tcl_NumUtfChars: 3 bytes of 4-byte UTF-8 characater} {testnumutfchars testbytestring} { - testnumutfchars [testbytestring \xf0\x9f\x92\xa9] 3 + testnumutfchars [testbytestring \xF0\x9F\x92\xA9] 3 } {3} test utf-4.12 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring tip389} { - testnumutfchars [testbytestring \xf0\x9f\x92\xa9] 4 + testnumutfchars [testbytestring \xF0\x9F\x92\xA9] 4 } {2} test utf-5.1 {Tcl_UtfFindFirst} {testfindfirst testbytestring} { @@ -153,12 +159,10 @@ test utf-5.2 {Tcl_UtfFindLast} {testfindlast testbytestring} { testfindlast [testbytestring "abcbc"] 98 } {bc} -testConstraint testutfnext [llength [info commands testutfnext]] - test utf-6.1 {Tcl_UtfNext} testutfnext { # This takes the pointer one past the terminating NUL. # This is really an invalid call. - testutfnext {} + testutfnext -bytestring {} } 1 test utf-6.2 {Tcl_UtfNext} testutfnext { testutfnext A @@ -167,271 +171,281 @@ test utf-6.3 {Tcl_UtfNext} testutfnext { testutfnext AA } 1 test utf-6.4 {Tcl_UtfNext} testutfnext { - testutfnext A\xA0 + testutfnext -bytestring A\xA0 } 1 test utf-6.5 {Tcl_UtfNext} testutfnext { - testutfnext A\xD0 + testutfnext -bytestring A\xD0 } 1 test utf-6.6 {Tcl_UtfNext} testutfnext { - testutfnext A\xE8 + testutfnext -bytestring A\xE8 } 1 test utf-6.7 {Tcl_UtfNext} testutfnext { - testutfnext A\xF4 + testutfnext -bytestring A\xF2 } 1 test utf-6.8 {Tcl_UtfNext} testutfnext { - testutfnext A\xF8 + testutfnext -bytestring A\xF8 } 1 test utf-6.9 {Tcl_UtfNext} testutfnext { - testutfnext \xA0 + testutfnext -bytestring \xA0 } 1 test utf-6.10 {Tcl_UtfNext} testutfnext { - testutfnext \xA0G + testutfnext -bytestring \xA0G } 1 test utf-6.11 {Tcl_UtfNext} testutfnext { - testutfnext \xA0\xA0 + testutfnext -bytestring \xA0\xA0 } 1 test utf-6.12 {Tcl_UtfNext} testutfnext { - testutfnext \xA0\xD0 + testutfnext -bytestring \xA0\xD0 } 1 test utf-6.13 {Tcl_UtfNext} testutfnext { - testutfnext \xA0\xE8 + testutfnext -bytestring \xA0\xE8 } 1 test utf-6.14 {Tcl_UtfNext} testutfnext { - testutfnext \xA0\xF4 + testutfnext -bytestring \xA0\xF2 } 1 test utf-6.15 {Tcl_UtfNext} testutfnext { - testutfnext \xA0\xF8 + testutfnext -bytestring \xA0\xF8 } 1 test utf-6.16 {Tcl_UtfNext} testutfnext { - testutfnext \xD0 + testutfnext -bytestring \xD0 } 1 test utf-6.17 {Tcl_UtfNext} testutfnext { - testutfnext \xD0A + testutfnext -bytestring \xD0G } 1 test utf-6.18 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xA0 + testutfnext -bytestring \xD0\xA0 } 2 test utf-6.19 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xD0 + testutfnext -bytestring \xD0\xD0 } 1 test utf-6.20 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xE8 + testutfnext -bytestring \xD0\xE8 } 1 test utf-6.21 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xF4 + testutfnext -bytestring \xD0\xF2 } 1 test utf-6.22 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xF8 + testutfnext -bytestring \xD0\xF8 } 1 test utf-6.23 {Tcl_UtfNext} testutfnext { - testutfnext \xE8 + testutfnext -bytestring \xE8 } 1 test utf-6.24 {Tcl_UtfNext} testutfnext { - testutfnext \xE8A + testutfnext -bytestring \xE8G } 1 test utf-6.25 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0 + testutfnext -bytestring \xE8\xA0 } 1 test utf-6.26 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xD0 + testutfnext -bytestring \xE8\xD0 } 1 test utf-6.27 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xE8 + testutfnext -bytestring \xE8\xE8 } 1 test utf-6.28 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xF4 + testutfnext -bytestring \xE8\xF2 } 1 test utf-6.29 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xF8 + testutfnext -bytestring \xE8\xF8 } 1 test utf-6.30 {Tcl_UtfNext} testutfnext { - testutfnext \xF4 + testutfnext -bytestring \xF2 } 1 test utf-6.31 {Tcl_UtfNext} testutfnext { - testutfnext \xF4A + testutfnext -bytestring \xF2G } 1 test utf-6.32 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0 + testutfnext -bytestring \xF2\xA0 } 1 test utf-6.33 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xD0 + testutfnext -bytestring \xF2\xD0 } 1 test utf-6.34 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xE8 + testutfnext -bytestring \xF2\xE8 } 1 test utf-6.35 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xF4 + testutfnext -bytestring \xF2\xF2 } 1 test utf-6.36 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xF8 + testutfnext -bytestring \xF2\xF8 } 1 test utf-6.37 {Tcl_UtfNext} testutfnext { - testutfnext \xF8 + testutfnext -bytestring \xF8 } 1 test utf-6.38 {Tcl_UtfNext} testutfnext { - testutfnext \xF8A + testutfnext -bytestring \xF8G } 1 test utf-6.39 {Tcl_UtfNext} testutfnext { - testutfnext \xF8\xA0 + testutfnext -bytestring \xF8\xA0 } 1 test utf-6.40 {Tcl_UtfNext} testutfnext { - testutfnext \xF8\xD0 + testutfnext -bytestring \xF8\xD0 } 1 test utf-6.41 {Tcl_UtfNext} testutfnext { - testutfnext \xF8\xE8 + testutfnext -bytestring \xF8\xE8 } 1 test utf-6.42 {Tcl_UtfNext} testutfnext { - testutfnext \xF8\xF4 + testutfnext -bytestring \xF8\xF2 } 1 test utf-6.43 {Tcl_UtfNext} testutfnext { - testutfnext \xF8\xF8 + testutfnext -bytestring \xF8\xF8 } 1 test utf-6.44 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xA0G + testutfnext -bytestring \xD0\xA0G } 2 test utf-6.45 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xA0\xA0 + testutfnext -bytestring \xD0\xA0\xA0 } 2 test utf-6.46 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xA0\xD0 + testutfnext -bytestring \xD0\xA0\xD0 } 2 test utf-6.47 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xA0\xE8 + testutfnext -bytestring \xD0\xA0\xE8 } 2 test utf-6.48 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xA0\xF4 + testutfnext -bytestring \xD0\xA0\xF2 } 2 test utf-6.49 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xA0\xF8 + testutfnext -bytestring \xD0\xA0\xF8 } 2 test utf-6.50 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0G + testutfnext -bytestring \xE8\xA0G } 1 test utf-6.51 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xA0 + testutfnext -bytestring \xE8\xA0\xA0 } 3 test utf-6.52 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xD0 + testutfnext -bytestring \xE8\xA0\xD0 } 1 test utf-6.53 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xE8 + testutfnext -bytestring \xE8\xA0\xE8 } 1 test utf-6.54 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xF4 + testutfnext -bytestring \xE8\xA0\xF2 } 1 test utf-6.55 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xF8 + testutfnext -bytestring \xE8\xA0\xF8 } 1 test utf-6.56 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0G + testutfnext -bytestring \xF2\xA0G } 1 test utf-6.57 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0 + testutfnext -bytestring \xF2\xA0\xA0 } 1 test utf-6.58 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xD0 + testutfnext -bytestring \xF2\xA0\xD0 } 1 test utf-6.59 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xE8 + testutfnext -bytestring \xF2\xA0\xE8 } 1 test utf-6.60 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xF4 + testutfnext -bytestring \xF2\xA0\xF2 } 1 test utf-6.61 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xF8 + testutfnext -bytestring \xF2\xA0\xF8 } 1 test utf-6.62 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xA0G + testutfnext -bytestring \xE8\xA0\xA0G } 3 test utf-6.63 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xA0\xA0 + testutfnext -bytestring \xE8\xA0\xA0\xA0 } 3 test utf-6.64 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xA0\xD0 + testutfnext -bytestring \xE8\xA0\xA0\xD0 } 3 test utf-6.65 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xA0\xE8 + testutfnext -bytestring \xE8\xA0\xA0\xE8 } 3 test utf-6.66 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xA0\xF4 + testutfnext -bytestring \xE8\xA0\xA0\xF2 } 3 test utf-6.67 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xA0\xF8 + testutfnext -bytestring \xE8\xA0\xA0\xF8 } 3 test utf-6.68 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0G + testutfnext -bytestring \xF2\xA0\xA0G } 1 test utf-6.69 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0\xA0 + testutfnext -bytestring \xF2\xA0\xA0\xA0 } 4 test utf-6.70 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0\xD0 + testutfnext -bytestring \xF2\xA0\xA0\xD0 } 1 test utf-6.71 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0\xE8 + testutfnext -bytestring \xF2\xA0\xA0\xE8 } 1 test utf-6.71 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0\xF4 + testutfnext -bytestring \xF2\xA0\xA0\xF2 } 1 test utf-6.73 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0\xF8 + testutfnext -bytestring \xF2\xA0\xA0\xF8 } 1 test utf-6.74 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0\xA0G + testutfnext -bytestring \xF2\xA0\xA0\xA0G } 4 test utf-6.75 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0\xA0\xA0 + testutfnext -bytestring \xF2\xA0\xA0\xA0\xA0 } 4 test utf-6.76 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0\xA0\xD0 + testutfnext -bytestring \xF2\xA0\xA0\xA0\xD0 } 4 test utf-6.77 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0\xA0\xE8 + testutfnext -bytestring \xF2\xA0\xA0\xA0\xE8 } 4 test utf-6.78 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0\xA0\xF4 + testutfnext -bytestring \xF2\xA0\xA0\xA0\xF2 } 4 test utf-6.79 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0\xA0G\xF8 + testutfnext -bytestring \xF2\xA0\xA0\xA0G\xF8 } 4 test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext \xC0\x80 + testutfnext -bytestring \xC0\x80 } 2 test utf-6.81 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext \xC0\x81 + testutfnext -bytestring \xC0\x81 } 1 test utf-6.82 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext \xC1\x80 + testutfnext -bytestring \xC1\x80 } 1 test utf-6.83 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext \xC2\x80 + testutfnext -bytestring \xC2\x80 } 2 test utf-6.84 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext \xE0\x80\x80 + testutfnext -bytestring \xE0\x80\x80 } 1 test utf-6.85 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext \xE0\xA0\x80 + testutfnext -bytestring \xE0\xA0\x80 } 3 test utf-6.86 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext \xF0\x80\x80\x80 + testutfnext -bytestring \xF0\x80\x80\x80 } 1 -test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext} { - testutfnext \xF0\x90\x80\x80 +test utf-6.87.0 {Tcl_UtfNext - overlong sequences} {testutfnext smallutf} { + testutfnext -bytestring \xF0\x90\x80\x80 +} 1 +test utf-6.87.1 {Tcl_UtfNext - overlong sequences} {testutfnext fullutf} { + testutfnext -bytestring \xF0\x90\x80\x80 } 4 test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} { - testutfnext \xA0\xA0 + testutfnext -bytestring \xA0\xA0 } 1 -test utf-6.88.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} { - testutfnext \xE8\xA0\xA0 1 -} 2 test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} { - testutfnext \x80\x80 + testutfnext -bytestring \x80\x80 +} 1 +test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext smallutf} { + testutfnext -bytestring \xF4\x8F\xBF\xBF +} 1 +test utf-6.90.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext fullutf} { + testutfnext -bytestring \xF4\x8F\xBF\xBF +} 4 +test utf-6.91 {Tcl_UtfNext, validity check [493dccc2de]} testutfnext { + testutfnext -bytestring \xF4\x90\x80\x80 +} 1 +test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} testutfnext { + testutfnext -bytestring \xA0\xA0\xA0 +} 1 +test utf-6.93 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} testutfnext { + testutfnext -bytestring \x80\x80\x80 } 1 -test utf-6.89.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} { - testutfnext \xF0\x80\x80 1 -} 2 - -testConstraint testutfprev [llength [info commands testutfprev]] test utf-7.1 {Tcl_UtfPrev} testutfprev { testutfprev {} @@ -452,13 +466,13 @@ test utf-7.4.2 {Tcl_UtfPrev} testutfprev { testutfprev A\xF8\xF8\xA0\xA0 2 } 1 test utf-7.5 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF4 + testutfprev A\xF2 } 1 test utf-7.5.1 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF4\xA0\xA0\xA0 2 + testutfprev A\xF2\xA0\xA0\xA0 2 } 1 test utf-7.5.2 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF4\xF8\xA0\xA0 2 + testutfprev A\xF2\xF8\xA0\xA0 2 } 1 test utf-7.6 {Tcl_UtfPrev} testutfprev { testutfprev A\xE8 @@ -496,14 +510,23 @@ test utf-7.9.1 {Tcl_UtfPrev} testutfprev { test utf-7.9.2 {Tcl_UtfPrev} testutfprev { testutfprev A\xF8\xA0\xF8\xA0 3 } 2 -test utf-7.10 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF4\xA0 +test utf-7.10.0 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xF2\xA0 +} 1 +test utf-7.10.1 {Tcl_UtfPrev} {testutfprev fullutf} { + testutfprev A\xF2\xA0 } 1 -test utf-7.10.1 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF4\xA0\xA0\xA0 3 +test utf-7.10.1.0 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xF2\xA0\xA0\xA0 3 } 1 -test utf-7.10.2 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF4\xA0\xF8\xA0 3 +test utf-7.10.1.1 {Tcl_UtfPrev} {testutfprev fullutf} { + testutfprev A\xF2\xA0\xA0\xA0 3 +} 1 +test utf-7.10.2.0 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xF2\xA0\xF8\xA0 3 +} 1 +test utf-7.10.2.1 {Tcl_UtfPrev} {testutfprev fullutf} { + testutfprev A\xF2\xA0\xF8\xA0 3 } 1 test utf-7.11 {Tcl_UtfPrev} testutfprev { testutfprev A\xE8\xA0 @@ -544,14 +567,23 @@ test utf-7.14.1 {Tcl_UtfPrev} testutfprev { test utf-7.14.2 {Tcl_UtfPrev} testutfprev { testutfprev A\xF8\xA0\xA0\xF8 4 } 3 -test utf-7.15 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF4\xA0\xA0 +test utf-7.15.0 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xF2\xA0\xA0 +} 1 +test utf-7.15.1 {Tcl_UtfPrev} {testutfprev fullutf} { + testutfprev A\xF2\xA0\xA0 } 1 -test utf-7.15.1 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF4\xA0\xA0\xA0 4 +test utf-7.15.1.0 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xF2\xA0\xA0\xA0 4 } 1 -test utf-7.15.2 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF4\xA0\xA0\xF8 4 +test utf-7.15.1.1 {Tcl_UtfPrev} {testutfprev fullutf} { + testutfprev A\xF2\xA0\xA0\xA0 4 +} 1 +test utf-7.15.2.0 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xF2\xA0\xA0\xF8 4 +} 1 +test utf-7.15.2.1 {Tcl_UtfPrev} {testutfprev fullutf} { + testutfprev A\xF2\xA0\xA0\xF8 4 } 1 test utf-7.16 {Tcl_UtfPrev} testutfprev { testutfprev A\xE8\xA0\xA0 @@ -583,8 +615,11 @@ test utf-7.18.2 {Tcl_UtfPrev} testutfprev { test utf-7.19 {Tcl_UtfPrev} testutfprev { testutfprev A\xF8\xA0\xA0\xA0 } 4 -test utf-7.20 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF4\xA0\xA0\xA0 +test utf-7.20.0 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xF2\xA0\xA0\xA0 +} 1 +test utf-7.20.1 {Tcl_UtfPrev} {testutfprev fullutf} { + testutfprev A\xF2\xA0\xA0\xA0 } 1 test utf-7.21 {Tcl_UtfPrev} testutfprev { testutfprev A\xE8\xA0\xA0\xA0 @@ -646,13 +681,22 @@ test utf-7.37 {Tcl_UtfPrev -- overlong sequence} testutfprev { test utf-7.38 {Tcl_UtfPrev -- overlong sequence} testutfprev { testutfprev A\xE0\xA0\x80 2 } 1 -test utf-7.39 {Tcl_UtfPrev -- overlong sequence} {testutfprev} { +test utf-7.39.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev smallutf} { + testutfprev A\xF0\x90\x80\x80 +} 4 +test utf-7.39.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev fullutf} { testutfprev A\xF0\x90\x80\x80 } 1 -test utf-7.40 {Tcl_UtfPrev -- overlong sequence} {testutfprev} { +test utf-7.40.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev smallutf} { + testutfprev A\xF0\x90\x80\x80 4 +} 3 +test utf-7.40.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev fullutf} { testutfprev A\xF0\x90\x80\x80 4 } 1 -test utf-7.41 {Tcl_UtfPrev -- overlong sequence} {testutfprev} { +test utf-7.41.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev smallutf} { + testutfprev A\xF0\x90\x80\x80 3 +} 2 +test utf-7.41.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev fullutf} { testutfprev A\xF0\x90\x80\x80 3 } 1 test utf-7.42 {Tcl_UtfPrev -- overlong sequence} testutfprev { @@ -679,6 +723,39 @@ test utf-7.47.1 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {te test utf-7.47.2 {Tcl_UtfPrev, pointing to 3th byte of 3-byte invalid sequence} {testutfprev} { testutfprev \xE8\xA0\x00 2 } 0 +test utf-7.48.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev smallutf} { + testutfprev A\xF4\x8F\xBF\xBF +} 4 +test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} { + testutfprev A\xF4\x8F\xBF\xBF +} 1 +test utf-7.48.1.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev smallutf} { + testutfprev A\xF4\x8F\xBF\xBF 4 +} 3 +test utf-7.48.1.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} { + testutfprev A\xF4\x8F\xBF\xBF 4 +} 1 +test utf-7.48.2.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev smallutf} { + testutfprev A\xF4\x8F\xBF\xBF 3 +} 2 +test utf-7.48.2.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} { + testutfprev A\xF4\x8F\xBF\xBF 3 +} 1 +test utf-7.48.3 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { + testutfprev A\xF4\x8F\xBF\xBF 2 +} 1 +test utf-7.49 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { + testutfprev A\xF4\x90\x80\x80 +} 4 +test utf-7.49.1 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { + testutfprev A\xF4\x90\x80\x80 4 +} 3 +test utf-7.49.2 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { + testutfprev A\xF4\x90\x80\x80 3 +} 2 +test utf-7.49.3 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { + testutfprev A\xF4\x90\x80\x80 2 +} 1 test utf-8.1 {Tcl_UniCharAtIndex: index = 0} { string index abcd 0 @@ -692,6 +769,18 @@ test utf-8.3 {Tcl_UniCharAtIndex: index > 0} { test utf-8.4 {Tcl_UniCharAtIndex: index > 0} { string index \u4E4E\u25A\xFF\u543 2 } "\uFF" +test utf-8.5 {Tcl_UniCharAtIndex: high surrogate} smallutf { + string index \uD842 0 +} "\uD842" +test utf-8.6 {Tcl_UniCharAtIndex: low surrogate} { + string index \uDC42 0 +} "\uDC42" +test utf-8.7 {Tcl_UniCharAtIndex: Emoji} smallutf { + string index \uD83D\uDE00 0 +} "\uD83D" +test utf-8.8 {Tcl_UniCharAtIndex: Emoji} smallutf { + string index \uD83D\uDE00 1 +} "\uDE00" test utf-9.1 {Tcl_UtfAtIndex: index = 0} { string range abcd 0 2 @@ -699,6 +788,12 @@ test utf-9.1 {Tcl_UtfAtIndex: index = 0} { test utf-9.2 {Tcl_UtfAtIndex: index > 0} { string range \u4E4E\u25A\xFF\u543klmnop 1 5 } "\u25A\xFF\u543kl" +test utf-9.3 {Tcl_UtfAtIndex: index = 0, Emoji} smallutf { + string range \uD83D\uDE00G 0 0 +} "\uD83D" +test utf-9.4 {Tcl_UtfAtIndex: index > 0, Emoji} smallutf { + string range \uD83D\uDE00G 1 1 +} "\uDE00" test utf-10.1 {Tcl_UtfBackslash: dst == NULL} { |
