diff options
| -rw-r--r-- | generic/tclTest.c | 49 | ||||
| -rw-r--r-- | generic/tclUtf.c | 53 | ||||
| -rw-r--r-- | tests/utf.test | 316 |
3 files changed, 207 insertions, 211 deletions
diff --git a/generic/tclTest.c b/generic/tclTest.c index 592998b..539d188 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -6708,42 +6708,49 @@ SimpleListVolumes(void) static int TestUtfNextCmd( - ClientData clientData, + ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *const objv[]) { - int numBytes, offset = 0; + int numBytes; char *bytes; - const char *result; - Tcl_Obj *copy; + const char *result, *first; + char buffer[32]; + static const char tobetested[] = "\xFF\xFE\xF4\xF2\xF0\xEF\xE8\xE3\xE2\xE1\xE0\xC2\xC1\xC0\x82"; + const char *p = tobetested; + (void)dummy; - if (objc < 2 || objc > 3) { - Tcl_WrongNumArgs(interp, 1, objv, "bytes ?offset?"); + if (objc != 3 || strcmp(Tcl_GetString(objv[1]), "-bytestring")) { + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, "?-bytestring? bytes"); + return TCL_ERROR; + } + bytes = Tcl_GetStringFromObj(objv[1], &numBytes); + } else { + bytes = (char *) Tcl_GetByteArrayFromObj(objv[2], &numBytes); + } + + if (numBytes > (int)sizeof(buffer)-2) { + Tcl_AppendResult(interp, "\"testutfnext\" can only handle 30 bytes", NULL); return TCL_ERROR; } - bytes = (char *) Tcl_GetByteArrayFromObj(objv[1], &numBytes); + memcpy(buffer + 1, bytes, numBytes); + buffer[0] = buffer[numBytes + 1] = '\x00'; - if (objc == 3) { - if (TCL_OK != TclGetIntForIndex(interp, objv[2], numBytes, &offset)) { + first = Tcl_UtfNext(buffer + 1); + while ((buffer[0] = *p++) != '\0') { + /* Run Tcl_UtfNext with many more possible bytes at src[-1], all should give the same result */ + result = Tcl_UtfNext(buffer + 1); + if (first != result) { + Tcl_AppendResult(interp, "Tcl_UtfNext is not supposed to read src[-1]", NULL); return TCL_ERROR; } - if (offset < 0) { - offset = 0; - } - if (offset > numBytes) { - offset = numBytes; - } } - copy = Tcl_DuplicateObj(objv[1]); - bytes = (char *) Tcl_SetByteArrayLength(copy, numBytes+1); - bytes[numBytes] = '\0'; - result = Tcl_UtfNext(bytes + offset); - Tcl_SetObjResult(interp, Tcl_NewIntObj(result - bytes)); + Tcl_SetObjResult(interp, Tcl_NewIntObj(result - buffer - 1)); - Tcl_DecrRefCount(copy); return TCL_OK; } /* diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 35a98a1..2385574 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -86,7 +86,7 @@ static const unsigned char totalBytes[256] = { */ static int UtfCount(int ch); -static int Overlong(unsigned char *src); +static int Invalid(unsigned char *src); /* *--------------------------------------------------------------------------- @@ -125,51 +125,52 @@ UtfCount( /* *--------------------------------------------------------------------------- * - * Overlong -- + * Invalid -- * * Utility routine to report whether /src/ points to the start of an - * overlong byte sequence that should be rejected. Caller guarantees - * that src[0] and src[1] are readable, and + * invald byte sequence that should be rejected. This might be because + * it is an overlong encoding, or because it encodes something out of + * the proper range. Caller guarantees that src[0] and src[1] are + * readable, and * * (src[0] >= 0xC0) && (src[0] != 0xC1) * (src[1] >= 0x80) && (src[1] < 0xC0) - * (src[0] < ((TCL_UTF_MAX > 3) ? 0xF8 : 0xF0)) + * (src[0] < ((TCL_UTF_MAX > 3) ? 0xF5 : 0xF0)) * * Results: * A boolean. *--------------------------------------------------------------------------- */ -static CONST unsigned char overlong[3] = { - 0x80, /* \xD0 -- all sequences valid */ - 0xA0, /* \xE0\x80 through \xE0\x9F are invalid prefixes */ -#if TCL_UTF_MAX >= 3 - 0x90 /* \xF0\x80 through \xF0\x8F are invalid prefixes */ +static const unsigned char bounds[28] = { + 0x80, 0x80, /* \xC0 accepts \x80 only */ + 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, + 0x80, 0xBF, /* (\xC4 - \xDC) -- all sequences valid */ + 0xA0, 0xBF, /* \xE0\x80 through \xE0\x9F are invalid prefixes */ + 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, /* (\xE4 - \xEC) -- all valid */ +#if TCL_UTF_MAX > 3 + 0x90, 0xBF, /* \xF0\x80 through \xF0\x8F are invalid prefixes */ + 0x80, 0x8F /* \xF4\x90 and higher are invalid prefixes */ #else - 0xC0 /* Not used, but reject all again for safety. */ + 0xC0, 0xBF, /* Not used, but reject all again for safety. */ + 0xC0, 0xBF /* Not used, but reject all again for safety. */ #endif }; INLINE static int -Overlong( +Invalid( unsigned char *src) /* Points to lead byte of a UTF-8 byte sequence */ { unsigned char byte = *src; + int index; - if (byte % 0x10) { - /* Only lead bytes 0xC0, 0xE0, 0xF0 need examination */ + if (byte % 0x04) { + /* Only lead bytes 0xC0, 0xE0, 0xF0, 0xF4 need examination */ return 0; } - if (byte == 0xC0) { - if (src[1] == 0x80) { - /* Valid sequence: \xC0\x80 for \u0000 */ - return 0; - } - /* Reject overlong: \xC0\x81 - \xC0\xBF */ - return 1; - } - if (src[1] < overlong[(byte >> 4) - 0x0D]) { - /* Reject overlong */ + index = (byte - 0xC0) >> 1; + if (src[1] < bounds[index] || src[1] > bounds[index+1]) { + /* Out of bounds - report invalid. */ return 1; } return 0; @@ -751,7 +752,7 @@ Tcl_UtfNext( } next++; } - if (Overlong((unsigned char *)src)) { + if (Invalid((unsigned char *)src)) { return src + 1; } return next; @@ -864,7 +865,7 @@ Tcl_UtfPrev( * Use that capability to screen out overlong sequences. */ - if (Overlong(look)) { + if (Invalid(look)) { /* Reject */ return fallback; } diff --git a/tests/utf.test b/tests/utf.test index 35772ae..181b9ff 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -16,7 +16,7 @@ if {[lsearch [namespace children] ::tcltest] == -1} { ::tcltest::loadTestedCommands catch [list package require -exact Tcltest [info patchlevel]] -testConstraint smallutf [expr {[format %c 0x010000] == "\uFFFD"}] +testConstraint ucs2 [expr {[format %c 0x010000] == "\uFFFD"}] testConstraint fullutf [expr {[format %c 0x010000] != "\uFFFD"}] testConstraint tip389 [expr {[string length \U010000] == 2}] @@ -145,7 +145,7 @@ test utf-4.10 {Tcl_NumUtfChars: #u0000, calc len, overcomplete} {testnumutfchars test utf-4.11 {Tcl_NumUtfChars: 3 bytes of 4-byte UTF-8 characater} {testnumutfchars testbytestring} { testnumutfchars [testbytestring \xF0\x9F\x92\xA9] 3 } {3} -test utf-4.12.0 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring smallutf} { +test utf-4.12.0 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring ucs2} { testnumutfchars [testbytestring \xF0\x9F\x92\xA9] 4 } {4} test utf-4.12.1 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring tip389} { @@ -162,326 +162,314 @@ test utf-5.2 {Tcl_UtfFindLast} {testfindlast testbytestring} { test utf-6.1 {Tcl_UtfNext} testutfnext { # This takes the pointer one past the terminating NUL. # This is really an invalid call. - testutfnext {} + testutfnext -bytestring {} } 1 test utf-6.2 {Tcl_UtfNext} testutfnext { - testutfnext A + testutfnext -bytestring A } 1 test utf-6.3 {Tcl_UtfNext} testutfnext { - testutfnext AA + testutfnext -bytestring AA } 1 test utf-6.4 {Tcl_UtfNext} testutfnext { - testutfnext A\xA0 + testutfnext -bytestring A\xA0 } 1 test utf-6.5 {Tcl_UtfNext} testutfnext { - testutfnext A\xD0 + testutfnext -bytestring A\xD0 } 1 test utf-6.6 {Tcl_UtfNext} testutfnext { - testutfnext A\xE8 + testutfnext -bytestring A\xE8 } 1 test utf-6.7 {Tcl_UtfNext} testutfnext { - testutfnext A\xF2 + testutfnext -bytestring A\xF2 } 1 test utf-6.8 {Tcl_UtfNext} testutfnext { - testutfnext A\xF8 + testutfnext -bytestring A\xF8 } 1 test utf-6.9 {Tcl_UtfNext} testutfnext { - testutfnext \xA0 + testutfnext -bytestring \xA0 } 1 test utf-6.10 {Tcl_UtfNext} testutfnext { - testutfnext \xA0G + testutfnext -bytestring \xA0G } 1 test utf-6.11 {Tcl_UtfNext} testutfnext { - testutfnext \xA0\xA0 + testutfnext -bytestring \xA0\xA0 } 2 test utf-6.12 {Tcl_UtfNext} testutfnext { - testutfnext \xA0\xD0 + testutfnext -bytestring \xA0\xD0 } 1 test utf-6.13 {Tcl_UtfNext} testutfnext { - testutfnext \xA0\xE8 + testutfnext -bytestring \xA0\xE8 } 1 test utf-6.14 {Tcl_UtfNext} testutfnext { - testutfnext \xA0\xF2 + testutfnext -bytestring \xA0\xF2 } 1 test utf-6.15 {Tcl_UtfNext} testutfnext { - testutfnext \xA0\xF8 + testutfnext -bytestring \xA0\xF8 } 1 test utf-6.16 {Tcl_UtfNext} testutfnext { - testutfnext \xD0 + testutfnext -bytestring \xD0 } 1 test utf-6.17 {Tcl_UtfNext} testutfnext { - testutfnext \xD0G + testutfnext -bytestring \xD0G } 1 test utf-6.18 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xA0 + testutfnext -bytestring \xD0\xA0 } 2 test utf-6.19 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xD0 + testutfnext -bytestring \xD0\xD0 } 1 test utf-6.20 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xE8 + testutfnext -bytestring \xD0\xE8 } 1 test utf-6.21 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xF2 + testutfnext -bytestring \xD0\xF2 } 1 test utf-6.22 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xF8 + testutfnext -bytestring \xD0\xF8 } 1 test utf-6.23 {Tcl_UtfNext} testutfnext { - testutfnext \xE8 + testutfnext -bytestring \xE8 } 1 test utf-6.24 {Tcl_UtfNext} testutfnext { - testutfnext \xE8G + testutfnext -bytestring \xE8G } 1 test utf-6.25 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0 + testutfnext -bytestring \xE8\xA0 } 1 test utf-6.26 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xD0 + testutfnext -bytestring \xE8\xD0 } 1 test utf-6.27 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xE8 + testutfnext -bytestring \xE8\xE8 } 1 test utf-6.28 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xF2 + testutfnext -bytestring \xE8\xF2 } 1 test utf-6.29 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xF8 + testutfnext -bytestring \xE8\xF8 } 1 test utf-6.30 {Tcl_UtfNext} testutfnext { - testutfnext \xF2 + testutfnext -bytestring \xF2 } 1 test utf-6.31 {Tcl_UtfNext} testutfnext { - testutfnext \xF2G + testutfnext -bytestring \xF2G } 1 test utf-6.32 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xA0 + testutfnext -bytestring \xF2\xA0 } 1 test utf-6.33 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xD0 + testutfnext -bytestring \xF2\xD0 } 1 test utf-6.34 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xE8 + testutfnext -bytestring \xF2\xE8 } 1 test utf-6.35 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xF2 + testutfnext -bytestring \xF2\xF2 } 1 test utf-6.36 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xF8 + testutfnext -bytestring \xF2\xF8 } 1 test utf-6.37 {Tcl_UtfNext} testutfnext { - testutfnext \xF8 + testutfnext -bytestring \xF8 } 1 test utf-6.38 {Tcl_UtfNext} testutfnext { - testutfnext \xF8G + testutfnext -bytestring \xF8G } 1 test utf-6.39 {Tcl_UtfNext} testutfnext { - testutfnext \xF8\xA0 + testutfnext -bytestring \xF8\xA0 } 1 test utf-6.40 {Tcl_UtfNext} testutfnext { - testutfnext \xF8\xD0 + testutfnext -bytestring \xF8\xD0 } 1 test utf-6.41 {Tcl_UtfNext} testutfnext { - testutfnext \xF8\xE8 + testutfnext -bytestring \xF8\xE8 } 1 test utf-6.42 {Tcl_UtfNext} testutfnext { - testutfnext \xF8\xF2 + testutfnext -bytestring \xF8\xF2 } 1 test utf-6.43 {Tcl_UtfNext} testutfnext { - testutfnext \xF8\xF8 + testutfnext -bytestring \xF8\xF8 } 1 test utf-6.44 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xA0G + testutfnext -bytestring \xD0\xA0G } 2 test utf-6.45 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xA0\xA0 + testutfnext -bytestring \xD0\xA0\xA0 } 2 test utf-6.46 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xA0\xD0 + testutfnext -bytestring \xD0\xA0\xD0 } 2 test utf-6.47 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xA0\xE8 + testutfnext -bytestring \xD0\xA0\xE8 } 2 test utf-6.48 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xA0\xF2 + testutfnext -bytestring \xD0\xA0\xF2 } 2 test utf-6.49 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xA0\xF8 + testutfnext -bytestring \xD0\xA0\xF8 } 2 test utf-6.50 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0G + testutfnext -bytestring \xE8\xA0G } 1 test utf-6.51 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xA0 + testutfnext -bytestring \xE8\xA0\xA0 } 3 test utf-6.52 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xD0 + testutfnext -bytestring \xE8\xA0\xD0 } 1 test utf-6.53 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xE8 + testutfnext -bytestring \xE8\xA0\xE8 } 1 test utf-6.54 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xF2 + testutfnext -bytestring \xE8\xA0\xF2 } 1 test utf-6.55 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xF8 + testutfnext -bytestring \xE8\xA0\xF8 } 1 test utf-6.56 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xA0G + testutfnext -bytestring \xF2\xA0G } 1 test utf-6.57 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xA0\xA0 + testutfnext -bytestring \xF2\xA0\xA0 } 1 test utf-6.58 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xA0\xD0 + testutfnext -bytestring \xF2\xA0\xD0 } 1 test utf-6.59 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xA0\xE8 + testutfnext -bytestring \xF2\xA0\xE8 } 1 test utf-6.60 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xA0\xF2 + testutfnext -bytestring \xF2\xA0\xF2 } 1 test utf-6.61 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xA0\xF8 + testutfnext -bytestring \xF2\xA0\xF8 } 1 test utf-6.62 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xA0G + testutfnext -bytestring \xE8\xA0\xA0G } 3 test utf-6.63 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xA0\xA0 + testutfnext -bytestring \xE8\xA0\xA0\xA0 } 3 test utf-6.64 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xA0\xD0 + testutfnext -bytestring \xE8\xA0\xA0\xD0 } 3 test utf-6.65 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xA0\xE8 + testutfnext -bytestring \xE8\xA0\xA0\xE8 } 3 test utf-6.66 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xA0\xF2 + testutfnext -bytestring \xE8\xA0\xA0\xF2 } 3 test utf-6.67 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xA0\xF8 + testutfnext -bytestring \xE8\xA0\xA0\xF8 } 3 test utf-6.68 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xA0\xA0G + testutfnext -bytestring \xF2\xA0\xA0G } 1 -test utf-6.69.0 {Tcl_UtfNext} {testutfnext smallutf} { - testutfnext \xF2\xA0\xA0\xA0 +test utf-6.69.0 {Tcl_UtfNext} {testutfnext ucs2} { + testutfnext -bytestring \xF2\xA0\xA0\xA0 } 1 test utf-6.69.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext \xF2\xA0\xA0\xA0 + testutfnext -bytestring \xF2\xA0\xA0\xA0 } 4 test utf-6.70 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xA0\xA0\xD0 + testutfnext -bytestring \xF2\xA0\xA0\xD0 } 1 test utf-6.71 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xA0\xA0\xE8 + testutfnext -bytestring \xF2\xA0\xA0\xE8 } 1 test utf-6.71 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xA0\xA0\xF2 + testutfnext -bytestring \xF2\xA0\xA0\xF2 } 1 test utf-6.73 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xA0\xA0\xF8 + testutfnext -bytestring \xF2\xA0\xA0\xF8 } 1 -test utf-6.74.0 {Tcl_UtfNext} {testutfnext smallutf} { - testutfnext \xF2\xA0\xA0\xA0G +test utf-6.74.0 {Tcl_UtfNext} {testutfnext ucs2} { + testutfnext -bytestring \xF2\xA0\xA0\xA0G } 1 test utf-6.74.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext \xF2\xA0\xA0\xA0G + testutfnext -bytestring \xF2\xA0\xA0\xA0G } 4 -test utf-6.75.0 {Tcl_UtfNext} {testutfnext smallutf} { - testutfnext \xF2\xA0\xA0\xA0\xA0 +test utf-6.75.0 {Tcl_UtfNext} {testutfnext ucs2} { + testutfnext -bytestring \xF2\xA0\xA0\xA0\xA0 } 1 test utf-6.75.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext \xF2\xA0\xA0\xA0\xA0 + testutfnext -bytestring \xF2\xA0\xA0\xA0\xA0 } 4 -test utf-6.76.0 {Tcl_UtfNext} {testutfnext smallutf} { - testutfnext \xF2\xA0\xA0\xA0\xD0 +test utf-6.76.0 {Tcl_UtfNext} {testutfnext ucs2} { + testutfnext -bytestring \xF2\xA0\xA0\xA0\xD0 } 1 test utf-6.76.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext \xF2\xA0\xA0\xA0\xD0 + testutfnext -bytestring \xF2\xA0\xA0\xA0\xD0 } 4 -test utf-6.77.0 {Tcl_UtfNext} {testutfnext smallutf} { - testutfnext \xF2\xA0\xA0\xA0\xE8 +test utf-6.77.0 {Tcl_UtfNext} {testutfnext ucs2} { + testutfnext -bytestring \xF2\xA0\xA0\xA0\xE8 } 1 test utf-6.77.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext \xF2\xA0\xA0\xA0\xE8 + testutfnext -bytestring \xF2\xA0\xA0\xA0\xE8 } 4 -test utf-6.78.0 {Tcl_UtfNext} {testutfnext smallutf} { - testutfnext \xF2\xA0\xA0\xA0\xF2 +test utf-6.78.0 {Tcl_UtfNext} {testutfnext ucs2} { + testutfnext -bytestring \xF2\xA0\xA0\xA0\xF2 } 1 test utf-6.78.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext \xF2\xA0\xA0\xA0\xF2 + testutfnext -bytestring \xF2\xA0\xA0\xA0\xF2 } 4 -test utf-6.79.0 {Tcl_UtfNext} {testutfnext smallutf} { - testutfnext \xF2\xA0\xA0\xA0G\xF8 +test utf-6.79.0 {Tcl_UtfNext} {testutfnext ucs2} { + testutfnext -bytestring \xF2\xA0\xA0\xA0G\xF8 } 1 test utf-6.79.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext \xF2\xA0\xA0\xA0G\xF8 + testutfnext -bytestring \xF2\xA0\xA0\xA0G\xF8 } 4 test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext \xC0\x80 + testutfnext -bytestring \xC0\x80 } 2 test utf-6.81 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext \xC0\x81 + testutfnext -bytestring \xC0\x81 } 1 test utf-6.82 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext \xC1\x80 + testutfnext -bytestring \xC1\x80 } 1 test utf-6.83 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext \xC2\x80 + testutfnext -bytestring \xC2\x80 } 2 test utf-6.84 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext \xE0\x80\x80 + testutfnext -bytestring \xE0\x80\x80 } 1 test utf-6.85 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext \xE0\xA0\x80 + testutfnext -bytestring \xE0\xA0\x80 } 3 test utf-6.86 {Tcl_UtfNext - overlong sequences} testutfnext { - testutfnext \xF0\x80\x80\x80 + testutfnext -bytestring \xF0\x80\x80\x80 } 1 -test utf-6.87.0 {Tcl_UtfNext - overlong sequences} {testutfnext smallutf} { - testutfnext \xF0\x90\x80\x80 +test utf-6.87.0 {Tcl_UtfNext - overlong sequences} {testutfnext ucs2} { + testutfnext -bytestring \xF0\x90\x80\x80 } 1 test utf-6.87.1 {Tcl_UtfNext - overlong sequences} {testutfnext fullutf} { - testutfnext \xF0\x90\x80\x80 + testutfnext -bytestring \xF0\x90\x80\x80 } 4 test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} { - testutfnext \xA0\xA0 + testutfnext -bytestring \xA0\xA0 } 2 -test utf-6.88.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} { - testutfnext \xE8\xA0\xA0 1 -} 3 test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} { - testutfnext \x80\x80 + testutfnext -bytestring \x80\x80 } 2 -test utf-6.89.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} { - testutfnext \xF0\x80\x80 1 -} 3 -test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext smallutf} { - testutfnext \xF4\x8F\xBF\xBF +test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext ucs2} { + testutfnext -bytestring \xF4\x8F\xBF\xBF } 1 test utf-6.90.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext fullutf} { - testutfnext \xF4\x8F\xBF\xBF + testutfnext -bytestring \xF4\x8F\xBF\xBF } 4 -test utf-6.91.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext smallutf} { - testutfnext \xF4\x90\x80\x80 +test utf-6.91.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext ucs2} { + testutfnext -bytestring \xF4\x90\x80\x80 } 1 test utf-6.91.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext fullutf} { - testutfnext \xF4\x90\x80\x80 -} 4 + testutfnext -bytestring \xF4\x90\x80\x80 +} 1 test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} testutfnext { - testutfnext \xA0\xA0\xA0 + testutfnext -bytestring \xA0\xA0\xA0 } 3 -test utf-6.92.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} testutfnext { - testutfnext \xF2\xA0\xA0\xA0 1 -} 4 test utf-6.93 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} testutfnext { - testutfnext \x80\x80\x80 + testutfnext -bytestring \x80\x80\x80 } 3 -test utf-6.93.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} testutfnext { - testutfnext \xF0\x80\x80\x80 1 -} 4 test utf-7.1 {Tcl_UtfPrev} testutfprev { testutfprev {} @@ -546,19 +534,19 @@ test utf-7.9.1 {Tcl_UtfPrev} testutfprev { test utf-7.9.2 {Tcl_UtfPrev} testutfprev { testutfprev A\xF8\xA0\xF8\xA0 3 } 2 -test utf-7.10.0 {Tcl_UtfPrev} {testutfprev smallutf} { +test utf-7.10.0 {Tcl_UtfPrev} {testutfprev ucs2} { testutfprev A\xF2\xA0 } 2 test utf-7.10.1 {Tcl_UtfPrev} {testutfprev fullutf} { testutfprev A\xF2\xA0 } 1 -test utf-7.10.1.0 {Tcl_UtfPrev} {testutfprev smallutf} { +test utf-7.10.1.0 {Tcl_UtfPrev} {testutfprev ucs2} { testutfprev A\xF2\xA0\xA0\xA0 3 } 2 test utf-7.10.1.1 {Tcl_UtfPrev} {testutfprev fullutf} { testutfprev A\xF2\xA0\xA0\xA0 3 } 1 -test utf-7.10.2.0 {Tcl_UtfPrev} {testutfprev smallutf} { +test utf-7.10.2.0 {Tcl_UtfPrev} {testutfprev ucs2} { testutfprev A\xF2\xA0\xF8\xA0 3 } 2 test utf-7.10.2.1 {Tcl_UtfPrev} {testutfprev fullutf} { @@ -603,19 +591,19 @@ test utf-7.14.1 {Tcl_UtfPrev} testutfprev { test utf-7.14.2 {Tcl_UtfPrev} testutfprev { testutfprev A\xF8\xA0\xA0\xF8 4 } 3 -test utf-7.15.0 {Tcl_UtfPrev} {testutfprev smallutf} { +test utf-7.15.0 {Tcl_UtfPrev} {testutfprev ucs2} { testutfprev A\xF2\xA0\xA0 } 3 test utf-7.15.1 {Tcl_UtfPrev} {testutfprev fullutf} { testutfprev A\xF2\xA0\xA0 } 1 -test utf-7.15.1.0 {Tcl_UtfPrev} {testutfprev smallutf} { +test utf-7.15.1.0 {Tcl_UtfPrev} {testutfprev ucs2} { testutfprev A\xF2\xA0\xA0\xA0 4 } 3 test utf-7.15.1.1 {Tcl_UtfPrev} {testutfprev fullutf} { testutfprev A\xF2\xA0\xA0\xA0 4 } 1 -test utf-7.15.2.0 {Tcl_UtfPrev} {testutfprev smallutf} { +test utf-7.15.2.0 {Tcl_UtfPrev} {testutfprev ucs2} { testutfprev A\xF2\xA0\xA0\xF8 4 } 3 test utf-7.15.2.1 {Tcl_UtfPrev} {testutfprev fullutf} { @@ -639,13 +627,13 @@ test utf-7.17.1 {Tcl_UtfPrev} testutfprev { test utf-7.17.2 {Tcl_UtfPrev} testutfprev { testutfprev A\xD0\xA0\xA0\xF8 4 } 3 -test utf-7.18 {Tcl_UtfPrev} {testutfprev smallutf} { +test utf-7.18 {Tcl_UtfPrev} {testutfprev ucs2} { testutfprev A\xA0\xA0\xA0 } 1 -test utf-7.18.1 {Tcl_UtfPrev} {testutfprev smallutf} { +test utf-7.18.1 {Tcl_UtfPrev} {testutfprev ucs2} { testutfprev A\xA0\xA0\xA0\xA0 4 } 1 -test utf-7.18.2 {Tcl_UtfPrev} {testutfprev smallutf} { +test utf-7.18.2 {Tcl_UtfPrev} {testutfprev ucs2} { testutfprev A\xA0\xA0\xA0\xF8 4 } 1 test utf-7.18.3 {Tcl_UtfPrev} {testutfprev fullutf} { @@ -657,31 +645,31 @@ test utf-7.18.4 {Tcl_UtfPrev} {testutfprev fullutf} { test utf-7.18.5 {Tcl_UtfPrev} {testutfprev fullutf} { testutfprev A\xA0\xA0\xA0\xF8 4 } 3 -test utf-7.19 {Tcl_UtfPrev} {testutfprev smallutf} { +test utf-7.19 {Tcl_UtfPrev} {testutfprev ucs2} { testutfprev A\xF8\xA0\xA0\xA0 } 2 test utf-7.19.1 {Tcl_UtfPrev} {testutfprev fullutf} { testutfprev A\xF8\xA0\xA0\xA0 } 4 -test utf-7.20 {Tcl_UtfPrev} {testutfprev smallutf} { +test utf-7.20 {Tcl_UtfPrev} {testutfprev ucs2} { testutfprev A\xF4\xA0\xA0\xA0 } 2 test utf-7.20.1 {Tcl_UtfPrev} {testutfprev fullutf} { testutfprev A\xF4\xA0\xA0\xA0 -} 1 -test utf-7.21 {Tcl_UtfPrev} {testutfprev smallutf} { +} 4 +test utf-7.21 {Tcl_UtfPrev} {testutfprev ucs2} { testutfprev A\xE8\xA0\xA0\xA0 } 2 test utf-7.21.1 {Tcl_UtfPrev} {testutfprev fullutf} { testutfprev A\xE8\xA0\xA0\xA0 } 4 -test utf-7.22 {Tcl_UtfPrev} {testutfprev smallutf} { +test utf-7.22 {Tcl_UtfPrev} {testutfprev ucs2} { testutfprev A\xD0\xA0\xA0\xA0 } 2 test utf-7.22.1 {Tcl_UtfPrev} {testutfprev fullutf} { testutfprev A\xD0\xA0\xA0\xA0 } 4 -test utf-7.23 {Tcl_UtfPrev} {testutfprev smallutf} { +test utf-7.23 {Tcl_UtfPrev} {testutfprev ucs2} { testutfprev A\xA0\xA0\xA0\xA0 } 2 test utf-7.23.1 {Tcl_UtfPrev} {testutfprev fullutf} { @@ -708,7 +696,7 @@ test utf-7.28 {Tcl_UtfPrev -- overlong sequence} testutfprev { test utf-7.28.1 {Tcl_UtfPrev -- overlong sequence} testutfprev { testutfprev A\xE0\x80\x80 2 } 1 -test utf-7.29 {Tcl_UtfPrev -- overlong sequence} {testutfprev smallutf} { +test utf-7.29 {Tcl_UtfPrev -- overlong sequence} {testutfprev ucs2} { testutfprev A\xF0\x80\x80\x80 } 2 test utf-7.29.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev fullutf} { @@ -741,19 +729,19 @@ test utf-7.37 {Tcl_UtfPrev -- overlong sequence} testutfprev { test utf-7.38 {Tcl_UtfPrev -- overlong sequence} testutfprev { testutfprev A\xE0\xA0\x80 2 } 1 -test utf-7.39.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev smallutf} { +test utf-7.39.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev ucs2} { testutfprev A\xF0\x90\x80\x80 } 2 test utf-7.39.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev fullutf} { testutfprev A\xF0\x90\x80\x80 } 1 -test utf-7.40.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev smallutf} { +test utf-7.40.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev ucs2} { testutfprev A\xF0\x90\x80\x80 4 } 3 test utf-7.40.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev fullutf} { testutfprev A\xF0\x90\x80\x80 4 } 1 -test utf-7.41.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev smallutf} { +test utf-7.41.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev ucs2} { testutfprev A\xF0\x90\x80\x80 3 } 2 test utf-7.41.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev fullutf} { @@ -771,7 +759,7 @@ test utf-7.44 {Tcl_UtfPrev -- no lead byte at start} testutfprev { test utf-7.45 {Tcl_UtfPrev -- no lead byte at start} testutfprev { testutfprev \xA0\xA0\xA0 } 2 -test utf-7.46 {Tcl_UtfPrev -- no lead byte at start} {testutfprev smallutf} { +test utf-7.46 {Tcl_UtfPrev -- no lead byte at start} {testutfprev ucs2} { testutfprev \xA0\xA0\xA0\xA0 } 1 test utf-7.46 {Tcl_UtfPrev -- no lead byte at start} {testutfprev fullutf} { @@ -786,19 +774,19 @@ test utf-7.47.1 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {te test utf-7.47.2 {Tcl_UtfPrev, pointing to 3th byte of 3-byte invalid sequence} {testutfprev} { testutfprev \xE8\xA0\x00 2 } 0 -test utf-7.48.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev smallutf} { +test utf-7.48.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev ucs2} { testutfprev A\xF4\x8F\xBF\xBF } 2 test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} { testutfprev A\xF4\x8F\xBF\xBF } 1 -test utf-7.48.1.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev smallutf} { +test utf-7.48.1.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev ucs2} { testutfprev A\xF4\x8F\xBF\xBF 4 } 3 test utf-7.48.1.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} { testutfprev A\xF4\x8F\xBF\xBF 4 } 1 -test utf-7.48.2.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev smallutf} { +test utf-7.48.2.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev ucs2} { testutfprev A\xF4\x8F\xBF\xBF 3 } 2 test utf-7.48.2.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} { @@ -807,24 +795,24 @@ test utf-7.48.2.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullut test utf-7.48.3 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { testutfprev A\xF4\x8F\xBF\xBF 2 } 1 -test utf-7.49.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev smallutf} { +test utf-7.49.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev ucs2} { testutfprev A\xF4\x90\x80\x80 } 2 test utf-7.49.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} { testutfprev A\xF4\x90\x80\x80 -} 1 -test utf-7.49.2 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev smallutf} { +} 4 +test utf-7.49.2 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev ucs2} { testutfprev A\xF4\x90\x80\x80 4 } 3 test utf-7.49.3 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} { testutfprev A\xF4\x90\x80\x80 4 -} 1 -test utf-7.49.4 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev smallutf} { +} 3 +test utf-7.49.4 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev ucs2} { testutfprev A\xF4\x90\x80\x80 3 } 2 test utf-7.49.5 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} { testutfprev A\xF4\x90\x80\x80 3 -} 1 +} 2 test utf-7.49.6 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { testutfprev A\xF4\x90\x80\x80 2 } 1 @@ -841,16 +829,16 @@ test utf-8.3 {Tcl_UniCharAtIndex: index > 0} { test utf-8.4 {Tcl_UniCharAtIndex: index > 0} { string index \u4E4E\u25A\xFF\u543 2 } "\uFF" -test utf-8.5 {Tcl_UniCharAtIndex: high surrogate} smallutf { +test utf-8.5 {Tcl_UniCharAtIndex: high surrogate} ucs2 { string index \uD842 0 } "\uD842" test utf-8.6 {Tcl_UniCharAtIndex: low surrogate} { string index \uDC42 0 } "\uDC42" -test utf-8.7 {Tcl_UniCharAtIndex: Emoji} smallutf { +test utf-8.7 {Tcl_UniCharAtIndex: Emoji} ucs2 { string index \uD83D\uDE00 0 } "\uD83D" -test utf-8.8 {Tcl_UniCharAtIndex: Emoji} { +test utf-8.8 {Tcl_UniCharAtIndex: Emoji} ucs2 { string index \uD83D\uDE00 1 } "\uDE00" @@ -860,10 +848,10 @@ test utf-9.1 {Tcl_UtfAtIndex: index = 0} { test utf-9.2 {Tcl_UtfAtIndex: index > 0} { string range \u4E4E\u25A\xFF\u543klmnop 1 5 } "\u25A\xFF\u543kl" -test utf-9.3 {Tcl_UtfAtIndex: index = 0, Emoji} smallutf { +test utf-9.3 {Tcl_UtfAtIndex: index = 0, Emoji} ucs2 { string range \uD83D\uDE00G 0 0 } "\uD83D" -test utf-9.4 {Tcl_UtfAtIndex: index > 0, Emoji} smallutf { +test utf-9.4 {Tcl_UtfAtIndex: index > 0, Emoji} ucs2 { string range \uD83D\uDE00G 1 1 } "\uDE00" @@ -988,7 +976,7 @@ test utf-12.4 {Tcl_UtfToLower} { test utf-12.5 {Tcl_UtfToLower Georgian (new in Unicode 11)} { string tolower \u10D0\u1C90 } \u10D0\u10D0 -test utf-12.6 {Tcl_UtfToUpper low/high surrogate)} smallutf { +test utf-12.6 {Tcl_UtfToUpper low/high surrogate)} ucs2 { string tolower \uDC24\uD824 } \uDC24\uD824 @@ -1010,7 +998,7 @@ test utf-13.5 {Tcl_UtfToTitle Georgian (new in Unicode 11)} { test utf-13.6 {Tcl_UtfToTitle Georgian (new in Unicode 11)} { string totitle \u1C90\u10D0 } \u1C90\u10D0 -test utf-13.7 {Tcl_UtfToTitle low/high surrogate)} smallutf { +test utf-13.7 {Tcl_UtfToTitle low/high surrogate)} ucs2 { string totitle \uDC24\uD824 } \uDC24\uD824 |
