summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--generic/tclTest.c49
-rw-r--r--generic/tclUtf.c53
-rw-r--r--tests/utf.test353
3 files changed, 279 insertions, 176 deletions
diff --git a/generic/tclTest.c b/generic/tclTest.c
index 592998b..539d188 100644
--- a/generic/tclTest.c
+++ b/generic/tclTest.c
@@ -6708,42 +6708,49 @@ SimpleListVolumes(void)
static int
TestUtfNextCmd(
- ClientData clientData,
+ ClientData dummy,
Tcl_Interp *interp,
int objc,
Tcl_Obj *const objv[])
{
- int numBytes, offset = 0;
+ int numBytes;
char *bytes;
- const char *result;
- Tcl_Obj *copy;
+ const char *result, *first;
+ char buffer[32];
+ static const char tobetested[] = "\xFF\xFE\xF4\xF2\xF0\xEF\xE8\xE3\xE2\xE1\xE0\xC2\xC1\xC0\x82";
+ const char *p = tobetested;
+ (void)dummy;
- if (objc < 2 || objc > 3) {
- Tcl_WrongNumArgs(interp, 1, objv, "bytes ?offset?");
+ if (objc != 3 || strcmp(Tcl_GetString(objv[1]), "-bytestring")) {
+ if (objc != 2) {
+ Tcl_WrongNumArgs(interp, 1, objv, "?-bytestring? bytes");
+ return TCL_ERROR;
+ }
+ bytes = Tcl_GetStringFromObj(objv[1], &numBytes);
+ } else {
+ bytes = (char *) Tcl_GetByteArrayFromObj(objv[2], &numBytes);
+ }
+
+ if (numBytes > (int)sizeof(buffer)-2) {
+ Tcl_AppendResult(interp, "\"testutfnext\" can only handle 30 bytes", NULL);
return TCL_ERROR;
}
- bytes = (char *) Tcl_GetByteArrayFromObj(objv[1], &numBytes);
+ memcpy(buffer + 1, bytes, numBytes);
+ buffer[0] = buffer[numBytes + 1] = '\x00';
- if (objc == 3) {
- if (TCL_OK != TclGetIntForIndex(interp, objv[2], numBytes, &offset)) {
+ first = Tcl_UtfNext(buffer + 1);
+ while ((buffer[0] = *p++) != '\0') {
+ /* Run Tcl_UtfNext with many more possible bytes at src[-1], all should give the same result */
+ result = Tcl_UtfNext(buffer + 1);
+ if (first != result) {
+ Tcl_AppendResult(interp, "Tcl_UtfNext is not supposed to read src[-1]", NULL);
return TCL_ERROR;
}
- if (offset < 0) {
- offset = 0;
- }
- if (offset > numBytes) {
- offset = numBytes;
- }
}
- copy = Tcl_DuplicateObj(objv[1]);
- bytes = (char *) Tcl_SetByteArrayLength(copy, numBytes+1);
- bytes[numBytes] = '\0';
- result = Tcl_UtfNext(bytes + offset);
- Tcl_SetObjResult(interp, Tcl_NewIntObj(result - bytes));
+ Tcl_SetObjResult(interp, Tcl_NewIntObj(result - buffer - 1));
- Tcl_DecrRefCount(copy);
return TCL_OK;
}
/*
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 1ba474e..a5e4fd4 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -93,7 +93,7 @@ static const unsigned char complete[256] = {
*/
static int UtfCount(int ch);
-static int Overlong(unsigned char *src);
+static int Invalid(unsigned char *src);
/*
*---------------------------------------------------------------------------
@@ -132,51 +132,52 @@ UtfCount(
/*
*---------------------------------------------------------------------------
*
- * Overlong --
+ * Invalid --
*
* Utility routine to report whether /src/ points to the start of an
- * overlong byte sequence that should be rejected. Caller guarantees
- * that src[0] and src[1] are readable, and
+ * invald byte sequence that should be rejected. This might be because
+ * it is an overlong encoding, or because it encodes something out of
+ * the proper range. Caller guarantees that src[0] and src[1] are
+ * readable, and
*
* (src[0] >= 0xC0) && (src[0] != 0xC1)
* (src[1] >= 0x80) && (src[1] < 0xC0)
- * (src[0] < ((TCL_UTF_MAX > 3) ? 0xF8 : 0xF0))
+ * (src[0] < ((TCL_UTF_MAX > 3) ? 0xF5 : 0xF0))
*
* Results:
* A boolean.
*---------------------------------------------------------------------------
*/
-static CONST unsigned char overlong[3] = {
- 0x80, /* \xD0 -- all sequences valid */
- 0xA0, /* \xE0\x80 through \xE0\x9F are invalid prefixes */
-#if TCL_UTF_MAX >= 3
- 0x90 /* \xF0\x80 through \xF0\x8F are invalid prefixes */
+static const unsigned char bounds[28] = {
+ 0x80, 0x80, /* \xC0 accepts \x80 only */
+ 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF,
+ 0x80, 0xBF, /* (\xC4 - \xDC) -- all sequences valid */
+ 0xA0, 0xBF, /* \xE0\x80 through \xE0\x9F are invalid prefixes */
+ 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, /* (\xE4 - \xEC) -- all valid */
+#if TCL_UTF_MAX > 3
+ 0x90, 0xBF, /* \xF0\x80 through \xF0\x8F are invalid prefixes */
+ 0x80, 0x8F /* \xF4\x90 and higher are invalid prefixes */
#else
- 0xC0 /* Not used, but reject all again for safety. */
+ 0xC0, 0xBF, /* Not used, but reject all again for safety. */
+ 0xC0, 0xBF /* Not used, but reject all again for safety. */
#endif
};
INLINE static int
-Overlong(
+Invalid(
unsigned char *src) /* Points to lead byte of a UTF-8 byte sequence */
{
unsigned char byte = *src;
+ int index;
- if (byte % 0x10) {
- /* Only lead bytes 0xC0, 0xE0, 0xF0 need examination */
+ if (byte % 0x04) {
+ /* Only lead bytes 0xC0, 0xE0, 0xF0, 0xF4 need examination */
return 0;
}
- if (byte == 0xC0) {
- if (src[1] == 0x80) {
- /* Valid sequence: \xC0\x80 for \u0000 */
- return 0;
- }
- /* Reject overlong: \xC0\x81 - \xC0\xBF */
- return 1;
- }
- if (src[1] < overlong[(byte >> 4) - 0x0D]) {
- /* Reject overlong */
+ index = (byte - 0xC0) >> 1;
+ if (src[1] < bounds[index] || src[1] > bounds[index+1]) {
+ /* Out of bounds - report invalid. */
return 1;
}
return 0;
@@ -772,7 +773,7 @@ Tcl_UtfNext(
}
next++;
}
- if (Overlong((unsigned char *)src)) {
+ if (Invalid((unsigned char *)src)) {
return src + 1;
}
return next;
@@ -885,7 +886,7 @@ Tcl_UtfPrev(
* Use that capability to screen out overlong sequences.
*/
- if (Overlong(look)) {
+ if (Invalid(look)) {
/* Reject */
return fallback;
}
diff --git a/tests/utf.test b/tests/utf.test
index 0ba2b85..cad91ec 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -16,14 +16,20 @@ if {[lsearch [namespace children] ::tcltest] == -1} {
::tcltest::loadTestedCommands
catch [list package require -exact Tcltest [info patchlevel]]
+testConstraint smallutf [expr {[format %c 0x010000] eq "\uFFFD"}]
+testConstraint tip389 [expr {[string length \U010000] == 2}]
+testConstraint fullutf [expr {[format %c 0x010000] ne "\uFFFD"}]
+
testConstraint testbytestring [llength [info commands testbytestring]]
+testConstraint testfindfirst [llength [info commands testfindfirst]]
+testConstraint testfindlast [llength [info commands testfindlast]]
+testConstraint testnumutfchars [llength [info commands testnumutfchars]]
+testConstraint teststringobj [llength [info commands teststringobj]]
+testConstraint testutfnext [llength [info commands testutfnext]]
+testConstraint testutfprev [llength [info commands testutfprev]]
catch {unset x}
-# Some tests require support for 4-byte UTF-8 sequences
-testConstraint fullutf [expr {[format %c 0x010000] != "\uFFFD"}]
-testConstraint tip389 [expr {[string length \U010000] == 2}]
-
test utf-1.1 {Tcl_UniCharToUtf: 1 byte sequences} testbytestring {
expr {"\x01" eq [testbytestring "\x01"]}
} 1
@@ -94,7 +100,7 @@ test utf-2.9 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints
test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} testbytestring {
string length [testbytestring "\xF0\x8F\xBF\xBF"]
} {4}
-test utf-2.11 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, overflow} {testbytestring knownBug} {# Doesn't work with any TCL_UTF_MAX value
+test utf-2.11 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, overflow} {testbytestring} {
string length [testbytestring "\xF4\x90\x80\x80"]
} {4}
test utf-2.12 {Tcl_UtfToUniChar: longer UTF sequences not supported} testbytestring {
@@ -115,7 +121,7 @@ test utf-4.2 {Tcl_NumUtfChars: length 1} {testnumutfchars testbytestring} {
testnumutfchars [testbytestring "\xC2\xA2"]
} {1}
test utf-4.3 {Tcl_NumUtfChars: long string} {testnumutfchars testbytestring} {
- testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\uA2\x4E"]
+ testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\xA2\x4E"]
} {7}
test utf-4.4 {Tcl_NumUtfChars: #u0000} {testnumutfchars testbytestring} {
testnumutfchars [testbytestring "\xC0\x80"]
@@ -124,13 +130,13 @@ test utf-4.5 {Tcl_NumUtfChars: zero length, calc len} testnumutfchars {
testnumutfchars "" 0
} {0}
test utf-4.6 {Tcl_NumUtfChars: length 1, calc len} {testnumutfchars testbytestring} {
- testnumutfchars [testbytestring "\xC2\xA2"] 2
+ testnumutfchars [testbytestring "\xC2\xA2"] 1
} {1}
test utf-4.7 {Tcl_NumUtfChars: long string, calc len} {testnumutfchars testbytestring} {
- testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\uA2\x4E"] 10
+ testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\xA2\x4E"] 10
} {7}
test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} {testnumutfchars testbytestring} {
- testnumutfchars [testbytestring "\xC0\x80"] 2
+ testnumutfchars [testbytestring "\xC0\x80"] 1
} {1}
# Bug [2738427]: Tcl_NumUtfChars(...) no overflow check
test utf-4.9 {Tcl_NumUtfChars: #u20AC, calc len, incomplete} {testnumutfchars testbytestring} {
@@ -140,10 +146,10 @@ test utf-4.10 {Tcl_NumUtfChars: #u0000, calc len, overcomplete} {testnumutfchars
testnumutfchars [testbytestring "\x00"] 2
} {2}
test utf-4.11 {Tcl_NumUtfChars: 3 bytes of 4-byte UTF-8 characater} {testnumutfchars testbytestring} {
- testnumutfchars [testbytestring \xf0\x9f\x92\xa9] 3
+ testnumutfchars [testbytestring \xF0\x9F\x92\xA9] 3
} {3}
test utf-4.12 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring tip389} {
- testnumutfchars [testbytestring \xf0\x9f\x92\xa9] 4
+ testnumutfchars [testbytestring \xF0\x9F\x92\xA9] 4
} {2}
test utf-5.1 {Tcl_UtfFindFirst} {testfindfirst testbytestring} {
@@ -153,12 +159,10 @@ test utf-5.2 {Tcl_UtfFindLast} {testfindlast testbytestring} {
testfindlast [testbytestring "abcbc"] 98
} {bc}
-testConstraint testutfnext [llength [info commands testutfnext]]
-
test utf-6.1 {Tcl_UtfNext} testutfnext {
# This takes the pointer one past the terminating NUL.
# This is really an invalid call.
- testutfnext {}
+ testutfnext -bytestring {}
} 1
test utf-6.2 {Tcl_UtfNext} testutfnext {
testutfnext A
@@ -167,271 +171,281 @@ test utf-6.3 {Tcl_UtfNext} testutfnext {
testutfnext AA
} 1
test utf-6.4 {Tcl_UtfNext} testutfnext {
- testutfnext A\xA0
+ testutfnext -bytestring A\xA0
} 1
test utf-6.5 {Tcl_UtfNext} testutfnext {
- testutfnext A\xD0
+ testutfnext -bytestring A\xD0
} 1
test utf-6.6 {Tcl_UtfNext} testutfnext {
- testutfnext A\xE8
+ testutfnext -bytestring A\xE8
} 1
test utf-6.7 {Tcl_UtfNext} testutfnext {
- testutfnext A\xF4
+ testutfnext -bytestring A\xF2
} 1
test utf-6.8 {Tcl_UtfNext} testutfnext {
- testutfnext A\xF8
+ testutfnext -bytestring A\xF8
} 1
test utf-6.9 {Tcl_UtfNext} testutfnext {
- testutfnext \xA0
+ testutfnext -bytestring \xA0
} 1
test utf-6.10 {Tcl_UtfNext} testutfnext {
- testutfnext \xA0G
+ testutfnext -bytestring \xA0G
} 1
test utf-6.11 {Tcl_UtfNext} testutfnext {
- testutfnext \xA0\xA0
+ testutfnext -bytestring \xA0\xA0
} 1
test utf-6.12 {Tcl_UtfNext} testutfnext {
- testutfnext \xA0\xD0
+ testutfnext -bytestring \xA0\xD0
} 1
test utf-6.13 {Tcl_UtfNext} testutfnext {
- testutfnext \xA0\xE8
+ testutfnext -bytestring \xA0\xE8
} 1
test utf-6.14 {Tcl_UtfNext} testutfnext {
- testutfnext \xA0\xF4
+ testutfnext -bytestring \xA0\xF2
} 1
test utf-6.15 {Tcl_UtfNext} testutfnext {
- testutfnext \xA0\xF8
+ testutfnext -bytestring \xA0\xF8
} 1
test utf-6.16 {Tcl_UtfNext} testutfnext {
- testutfnext \xD0
+ testutfnext -bytestring \xD0
} 1
test utf-6.17 {Tcl_UtfNext} testutfnext {
- testutfnext \xD0A
+ testutfnext -bytestring \xD0G
} 1
test utf-6.18 {Tcl_UtfNext} testutfnext {
- testutfnext \xD0\xA0
+ testutfnext -bytestring \xD0\xA0
} 2
test utf-6.19 {Tcl_UtfNext} testutfnext {
- testutfnext \xD0\xD0
+ testutfnext -bytestring \xD0\xD0
} 1
test utf-6.20 {Tcl_UtfNext} testutfnext {
- testutfnext \xD0\xE8
+ testutfnext -bytestring \xD0\xE8
} 1
test utf-6.21 {Tcl_UtfNext} testutfnext {
- testutfnext \xD0\xF4
+ testutfnext -bytestring \xD0\xF2
} 1
test utf-6.22 {Tcl_UtfNext} testutfnext {
- testutfnext \xD0\xF8
+ testutfnext -bytestring \xD0\xF8
} 1
test utf-6.23 {Tcl_UtfNext} testutfnext {
- testutfnext \xE8
+ testutfnext -bytestring \xE8
} 1
test utf-6.24 {Tcl_UtfNext} testutfnext {
- testutfnext \xE8A
+ testutfnext -bytestring \xE8G
} 1
test utf-6.25 {Tcl_UtfNext} testutfnext {
- testutfnext \xE8\xA0
+ testutfnext -bytestring \xE8\xA0
} 1
test utf-6.26 {Tcl_UtfNext} testutfnext {
- testutfnext \xE8\xD0
+ testutfnext -bytestring \xE8\xD0
} 1
test utf-6.27 {Tcl_UtfNext} testutfnext {
- testutfnext \xE8\xE8
+ testutfnext -bytestring \xE8\xE8
} 1
test utf-6.28 {Tcl_UtfNext} testutfnext {
- testutfnext \xE8\xF4
+ testutfnext -bytestring \xE8\xF2
} 1
test utf-6.29 {Tcl_UtfNext} testutfnext {
- testutfnext \xE8\xF8
+ testutfnext -bytestring \xE8\xF8
} 1
test utf-6.30 {Tcl_UtfNext} testutfnext {
- testutfnext \xF4
+ testutfnext -bytestring \xF2
} 1
test utf-6.31 {Tcl_UtfNext} testutfnext {
- testutfnext \xF4A
+ testutfnext -bytestring \xF2G
} 1
test utf-6.32 {Tcl_UtfNext} testutfnext {
- testutfnext \xF4\xA0
+ testutfnext -bytestring \xF2\xA0
} 1
test utf-6.33 {Tcl_UtfNext} testutfnext {
- testutfnext \xF4\xD0
+ testutfnext -bytestring \xF2\xD0
} 1
test utf-6.34 {Tcl_UtfNext} testutfnext {
- testutfnext \xF4\xE8
+ testutfnext -bytestring \xF2\xE8
} 1
test utf-6.35 {Tcl_UtfNext} testutfnext {
- testutfnext \xF4\xF4
+ testutfnext -bytestring \xF2\xF2
} 1
test utf-6.36 {Tcl_UtfNext} testutfnext {
- testutfnext \xF4\xF8
+ testutfnext -bytestring \xF2\xF8
} 1
test utf-6.37 {Tcl_UtfNext} testutfnext {
- testutfnext \xF8
+ testutfnext -bytestring \xF8
} 1
test utf-6.38 {Tcl_UtfNext} testutfnext {
- testutfnext \xF8A
+ testutfnext -bytestring \xF8G
} 1
test utf-6.39 {Tcl_UtfNext} testutfnext {
- testutfnext \xF8\xA0
+ testutfnext -bytestring \xF8\xA0
} 1
test utf-6.40 {Tcl_UtfNext} testutfnext {
- testutfnext \xF8\xD0
+ testutfnext -bytestring \xF8\xD0
} 1
test utf-6.41 {Tcl_UtfNext} testutfnext {
- testutfnext \xF8\xE8
+ testutfnext -bytestring \xF8\xE8
} 1
test utf-6.42 {Tcl_UtfNext} testutfnext {
- testutfnext \xF8\xF4
+ testutfnext -bytestring \xF8\xF2
} 1
test utf-6.43 {Tcl_UtfNext} testutfnext {
- testutfnext \xF8\xF8
+ testutfnext -bytestring \xF8\xF8
} 1
test utf-6.44 {Tcl_UtfNext} testutfnext {
- testutfnext \xD0\xA0G
+ testutfnext -bytestring \xD0\xA0G
} 2
test utf-6.45 {Tcl_UtfNext} testutfnext {
- testutfnext \xD0\xA0\xA0
+ testutfnext -bytestring \xD0\xA0\xA0
} 2
test utf-6.46 {Tcl_UtfNext} testutfnext {
- testutfnext \xD0\xA0\xD0
+ testutfnext -bytestring \xD0\xA0\xD0
} 2
test utf-6.47 {Tcl_UtfNext} testutfnext {
- testutfnext \xD0\xA0\xE8
+ testutfnext -bytestring \xD0\xA0\xE8
} 2
test utf-6.48 {Tcl_UtfNext} testutfnext {
- testutfnext \xD0\xA0\xF4
+ testutfnext -bytestring \xD0\xA0\xF2
} 2
test utf-6.49 {Tcl_UtfNext} testutfnext {
- testutfnext \xD0\xA0\xF8
+ testutfnext -bytestring \xD0\xA0\xF8
} 2
test utf-6.50 {Tcl_UtfNext} testutfnext {
- testutfnext \xE8\xA0G
+ testutfnext -bytestring \xE8\xA0G
} 1
test utf-6.51 {Tcl_UtfNext} testutfnext {
- testutfnext \xE8\xA0\xA0
+ testutfnext -bytestring \xE8\xA0\xA0
} 3
test utf-6.52 {Tcl_UtfNext} testutfnext {
- testutfnext \xE8\xA0\xD0
+ testutfnext -bytestring \xE8\xA0\xD0
} 1
test utf-6.53 {Tcl_UtfNext} testutfnext {
- testutfnext \xE8\xA0\xE8
+ testutfnext -bytestring \xE8\xA0\xE8
} 1
test utf-6.54 {Tcl_UtfNext} testutfnext {
- testutfnext \xE8\xA0\xF4
+ testutfnext -bytestring \xE8\xA0\xF2
} 1
test utf-6.55 {Tcl_UtfNext} testutfnext {
- testutfnext \xE8\xA0\xF8
+ testutfnext -bytestring \xE8\xA0\xF8
} 1
test utf-6.56 {Tcl_UtfNext} testutfnext {
- testutfnext \xF4\xA0G
+ testutfnext -bytestring \xF2\xA0G
} 1
test utf-6.57 {Tcl_UtfNext} testutfnext {
- testutfnext \xF4\xA0\xA0
+ testutfnext -bytestring \xF2\xA0\xA0
} 1
test utf-6.58 {Tcl_UtfNext} testutfnext {
- testutfnext \xF4\xA0\xD0
+ testutfnext -bytestring \xF2\xA0\xD0
} 1
test utf-6.59 {Tcl_UtfNext} testutfnext {
- testutfnext \xF4\xA0\xE8
+ testutfnext -bytestring \xF2\xA0\xE8
} 1
test utf-6.60 {Tcl_UtfNext} testutfnext {
- testutfnext \xF4\xA0\xF4
+ testutfnext -bytestring \xF2\xA0\xF2
} 1
test utf-6.61 {Tcl_UtfNext} testutfnext {
- testutfnext \xF4\xA0\xF8
+ testutfnext -bytestring \xF2\xA0\xF8
} 1
test utf-6.62 {Tcl_UtfNext} testutfnext {
- testutfnext \xE8\xA0\xA0G
+ testutfnext -bytestring \xE8\xA0\xA0G
} 3
test utf-6.63 {Tcl_UtfNext} testutfnext {
- testutfnext \xE8\xA0\xA0\xA0
+ testutfnext -bytestring \xE8\xA0\xA0\xA0
} 3
test utf-6.64 {Tcl_UtfNext} testutfnext {
- testutfnext \xE8\xA0\xA0\xD0
+ testutfnext -bytestring \xE8\xA0\xA0\xD0
} 3
test utf-6.65 {Tcl_UtfNext} testutfnext {
- testutfnext \xE8\xA0\xA0\xE8
+ testutfnext -bytestring \xE8\xA0\xA0\xE8
} 3
test utf-6.66 {Tcl_UtfNext} testutfnext {
- testutfnext \xE8\xA0\xA0\xF4
+ testutfnext -bytestring \xE8\xA0\xA0\xF2
} 3
test utf-6.67 {Tcl_UtfNext} testutfnext {
- testutfnext \xE8\xA0\xA0\xF8
+ testutfnext -bytestring \xE8\xA0\xA0\xF8
} 3
test utf-6.68 {Tcl_UtfNext} testutfnext {
- testutfnext \xF4\xA0\xA0G
+ testutfnext -bytestring \xF2\xA0\xA0G
} 1
test utf-6.69 {Tcl_UtfNext} testutfnext {
- testutfnext \xF4\xA0\xA0\xA0
+ testutfnext -bytestring \xF2\xA0\xA0\xA0
} 4
test utf-6.70 {Tcl_UtfNext} testutfnext {
- testutfnext \xF4\xA0\xA0\xD0
+ testutfnext -bytestring \xF2\xA0\xA0\xD0
} 1
test utf-6.71 {Tcl_UtfNext} testutfnext {
- testutfnext \xF4\xA0\xA0\xE8
+ testutfnext -bytestring \xF2\xA0\xA0\xE8
} 1
test utf-6.71 {Tcl_UtfNext} testutfnext {
- testutfnext \xF4\xA0\xA0\xF4
+ testutfnext -bytestring \xF2\xA0\xA0\xF2
} 1
test utf-6.73 {Tcl_UtfNext} testutfnext {
- testutfnext \xF4\xA0\xA0\xF8
+ testutfnext -bytestring \xF2\xA0\xA0\xF8
} 1
test utf-6.74 {Tcl_UtfNext} testutfnext {
- testutfnext \xF4\xA0\xA0\xA0G
+ testutfnext -bytestring \xF2\xA0\xA0\xA0G
} 4
test utf-6.75 {Tcl_UtfNext} testutfnext {
- testutfnext \xF4\xA0\xA0\xA0\xA0
+ testutfnext -bytestring \xF2\xA0\xA0\xA0\xA0
} 4
test utf-6.76 {Tcl_UtfNext} testutfnext {
- testutfnext \xF4\xA0\xA0\xA0\xD0
+ testutfnext -bytestring \xF2\xA0\xA0\xA0\xD0
} 4
test utf-6.77 {Tcl_UtfNext} testutfnext {
- testutfnext \xF4\xA0\xA0\xA0\xE8
+ testutfnext -bytestring \xF2\xA0\xA0\xA0\xE8
} 4
test utf-6.78 {Tcl_UtfNext} testutfnext {
- testutfnext \xF4\xA0\xA0\xA0\xF4
+ testutfnext -bytestring \xF2\xA0\xA0\xA0\xF2
} 4
test utf-6.79 {Tcl_UtfNext} testutfnext {
- testutfnext \xF4\xA0\xA0\xA0G\xF8
+ testutfnext -bytestring \xF2\xA0\xA0\xA0G\xF8
} 4
test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext {
- testutfnext \xC0\x80
+ testutfnext -bytestring \xC0\x80
} 2
test utf-6.81 {Tcl_UtfNext - overlong sequences} testutfnext {
- testutfnext \xC0\x81
+ testutfnext -bytestring \xC0\x81
} 1
test utf-6.82 {Tcl_UtfNext - overlong sequences} testutfnext {
- testutfnext \xC1\x80
+ testutfnext -bytestring \xC1\x80
} 1
test utf-6.83 {Tcl_UtfNext - overlong sequences} testutfnext {
- testutfnext \xC2\x80
+ testutfnext -bytestring \xC2\x80
} 2
test utf-6.84 {Tcl_UtfNext - overlong sequences} testutfnext {
- testutfnext \xE0\x80\x80
+ testutfnext -bytestring \xE0\x80\x80
} 1
test utf-6.85 {Tcl_UtfNext - overlong sequences} testutfnext {
- testutfnext \xE0\xA0\x80
+ testutfnext -bytestring \xE0\xA0\x80
} 3
test utf-6.86 {Tcl_UtfNext - overlong sequences} testutfnext {
- testutfnext \xF0\x80\x80\x80
+ testutfnext -bytestring \xF0\x80\x80\x80
} 1
-test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext} {
- testutfnext \xF0\x90\x80\x80
+test utf-6.87.0 {Tcl_UtfNext - overlong sequences} {testutfnext smallutf} {
+ testutfnext -bytestring \xF0\x90\x80\x80
+} 1
+test utf-6.87.1 {Tcl_UtfNext - overlong sequences} {testutfnext fullutf} {
+ testutfnext -bytestring \xF0\x90\x80\x80
} 4
test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} {
- testutfnext \xA0\xA0
+ testutfnext -bytestring \xA0\xA0
} 1
-test utf-6.88.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} {
- testutfnext \xE8\xA0\xA0 1
-} 2
test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} {
- testutfnext \x80\x80
+ testutfnext -bytestring \x80\x80
+} 1
+test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext smallutf} {
+ testutfnext -bytestring \xF4\x8F\xBF\xBF
+} 1
+test utf-6.90.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext fullutf} {
+ testutfnext -bytestring \xF4\x8F\xBF\xBF
+} 4
+test utf-6.91 {Tcl_UtfNext, validity check [493dccc2de]} testutfnext {
+ testutfnext -bytestring \xF4\x90\x80\x80
+} 1
+test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} testutfnext {
+ testutfnext -bytestring \xA0\xA0\xA0
+} 1
+test utf-6.93 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} testutfnext {
+ testutfnext -bytestring \x80\x80\x80
} 1
-test utf-6.89.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} {
- testutfnext \xF0\x80\x80 1
-} 2
-
-testConstraint testutfprev [llength [info commands testutfprev]]
test utf-7.1 {Tcl_UtfPrev} testutfprev {
testutfprev {}
@@ -452,13 +466,13 @@ test utf-7.4.2 {Tcl_UtfPrev} testutfprev {
testutfprev A\xF8\xF8\xA0\xA0 2
} 1
test utf-7.5 {Tcl_UtfPrev} testutfprev {
- testutfprev A\xF4
+ testutfprev A\xF2
} 1
test utf-7.5.1 {Tcl_UtfPrev} testutfprev {
- testutfprev A\xF4\xA0\xA0\xA0 2
+ testutfprev A\xF2\xA0\xA0\xA0 2
} 1
test utf-7.5.2 {Tcl_UtfPrev} testutfprev {
- testutfprev A\xF4\xF8\xA0\xA0 2
+ testutfprev A\xF2\xF8\xA0\xA0 2
} 1
test utf-7.6 {Tcl_UtfPrev} testutfprev {
testutfprev A\xE8
@@ -496,14 +510,23 @@ test utf-7.9.1 {Tcl_UtfPrev} testutfprev {
test utf-7.9.2 {Tcl_UtfPrev} testutfprev {
testutfprev A\xF8\xA0\xF8\xA0 3
} 2
-test utf-7.10 {Tcl_UtfPrev} testutfprev {
- testutfprev A\xF4\xA0
+test utf-7.10.0 {Tcl_UtfPrev} {testutfprev smallutf} {
+ testutfprev A\xF2\xA0
+} 1
+test utf-7.10.1 {Tcl_UtfPrev} {testutfprev fullutf} {
+ testutfprev A\xF2\xA0
} 1
-test utf-7.10.1 {Tcl_UtfPrev} testutfprev {
- testutfprev A\xF4\xA0\xA0\xA0 3
+test utf-7.10.1.0 {Tcl_UtfPrev} {testutfprev smallutf} {
+ testutfprev A\xF2\xA0\xA0\xA0 3
} 1
-test utf-7.10.2 {Tcl_UtfPrev} testutfprev {
- testutfprev A\xF4\xA0\xF8\xA0 3
+test utf-7.10.1.1 {Tcl_UtfPrev} {testutfprev fullutf} {
+ testutfprev A\xF2\xA0\xA0\xA0 3
+} 1
+test utf-7.10.2.0 {Tcl_UtfPrev} {testutfprev smallutf} {
+ testutfprev A\xF2\xA0\xF8\xA0 3
+} 1
+test utf-7.10.2.1 {Tcl_UtfPrev} {testutfprev fullutf} {
+ testutfprev A\xF2\xA0\xF8\xA0 3
} 1
test utf-7.11 {Tcl_UtfPrev} testutfprev {
testutfprev A\xE8\xA0
@@ -544,14 +567,23 @@ test utf-7.14.1 {Tcl_UtfPrev} testutfprev {
test utf-7.14.2 {Tcl_UtfPrev} testutfprev {
testutfprev A\xF8\xA0\xA0\xF8 4
} 3
-test utf-7.15 {Tcl_UtfPrev} testutfprev {
- testutfprev A\xF4\xA0\xA0
+test utf-7.15.0 {Tcl_UtfPrev} {testutfprev smallutf} {
+ testutfprev A\xF2\xA0\xA0
+} 1
+test utf-7.15.1 {Tcl_UtfPrev} {testutfprev fullutf} {
+ testutfprev A\xF2\xA0\xA0
} 1
-test utf-7.15.1 {Tcl_UtfPrev} testutfprev {
- testutfprev A\xF4\xA0\xA0\xA0 4
+test utf-7.15.1.0 {Tcl_UtfPrev} {testutfprev smallutf} {
+ testutfprev A\xF2\xA0\xA0\xA0 4
} 1
-test utf-7.15.2 {Tcl_UtfPrev} testutfprev {
- testutfprev A\xF4\xA0\xA0\xF8 4
+test utf-7.15.1.1 {Tcl_UtfPrev} {testutfprev fullutf} {
+ testutfprev A\xF2\xA0\xA0\xA0 4
+} 1
+test utf-7.15.2.0 {Tcl_UtfPrev} {testutfprev smallutf} {
+ testutfprev A\xF2\xA0\xA0\xF8 4
+} 1
+test utf-7.15.2.1 {Tcl_UtfPrev} {testutfprev fullutf} {
+ testutfprev A\xF2\xA0\xA0\xF8 4
} 1
test utf-7.16 {Tcl_UtfPrev} testutfprev {
testutfprev A\xE8\xA0\xA0
@@ -583,8 +615,11 @@ test utf-7.18.2 {Tcl_UtfPrev} testutfprev {
test utf-7.19 {Tcl_UtfPrev} testutfprev {
testutfprev A\xF8\xA0\xA0\xA0
} 4
-test utf-7.20 {Tcl_UtfPrev} testutfprev {
- testutfprev A\xF4\xA0\xA0\xA0
+test utf-7.20.0 {Tcl_UtfPrev} {testutfprev smallutf} {
+ testutfprev A\xF2\xA0\xA0\xA0
+} 1
+test utf-7.20.1 {Tcl_UtfPrev} {testutfprev fullutf} {
+ testutfprev A\xF2\xA0\xA0\xA0
} 1
test utf-7.21 {Tcl_UtfPrev} testutfprev {
testutfprev A\xE8\xA0\xA0\xA0
@@ -646,13 +681,22 @@ test utf-7.37 {Tcl_UtfPrev -- overlong sequence} testutfprev {
test utf-7.38 {Tcl_UtfPrev -- overlong sequence} testutfprev {
testutfprev A\xE0\xA0\x80 2
} 1
-test utf-7.39 {Tcl_UtfPrev -- overlong sequence} {testutfprev} {
+test utf-7.39.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev smallutf} {
+ testutfprev A\xF0\x90\x80\x80
+} 4
+test utf-7.39.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev fullutf} {
testutfprev A\xF0\x90\x80\x80
} 1
-test utf-7.40 {Tcl_UtfPrev -- overlong sequence} {testutfprev} {
+test utf-7.40.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev smallutf} {
+ testutfprev A\xF0\x90\x80\x80 4
+} 3
+test utf-7.40.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev fullutf} {
testutfprev A\xF0\x90\x80\x80 4
} 1
-test utf-7.41 {Tcl_UtfPrev -- overlong sequence} {testutfprev} {
+test utf-7.41.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev smallutf} {
+ testutfprev A\xF0\x90\x80\x80 3
+} 2
+test utf-7.41.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev fullutf} {
testutfprev A\xF0\x90\x80\x80 3
} 1
test utf-7.42 {Tcl_UtfPrev -- overlong sequence} testutfprev {
@@ -679,6 +723,39 @@ test utf-7.47.1 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {te
test utf-7.47.2 {Tcl_UtfPrev, pointing to 3th byte of 3-byte invalid sequence} {testutfprev} {
testutfprev \xE8\xA0\x00 2
} 0
+test utf-7.48.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev smallutf} {
+ testutfprev A\xF4\x8F\xBF\xBF
+} 4
+test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} {
+ testutfprev A\xF4\x8F\xBF\xBF
+} 1
+test utf-7.48.1.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev smallutf} {
+ testutfprev A\xF4\x8F\xBF\xBF 4
+} 3
+test utf-7.48.1.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} {
+ testutfprev A\xF4\x8F\xBF\xBF 4
+} 1
+test utf-7.48.2.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev smallutf} {
+ testutfprev A\xF4\x8F\xBF\xBF 3
+} 2
+test utf-7.48.2.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} {
+ testutfprev A\xF4\x8F\xBF\xBF 3
+} 1
+test utf-7.48.3 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
+ testutfprev A\xF4\x8F\xBF\xBF 2
+} 1
+test utf-7.49 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
+ testutfprev A\xF4\x90\x80\x80
+} 4
+test utf-7.49.1 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
+ testutfprev A\xF4\x90\x80\x80 4
+} 3
+test utf-7.49.2 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
+ testutfprev A\xF4\x90\x80\x80 3
+} 2
+test utf-7.49.3 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
+ testutfprev A\xF4\x90\x80\x80 2
+} 1
test utf-8.1 {Tcl_UniCharAtIndex: index = 0} {
string index abcd 0
@@ -692,6 +769,18 @@ test utf-8.3 {Tcl_UniCharAtIndex: index > 0} {
test utf-8.4 {Tcl_UniCharAtIndex: index > 0} {
string index \u4E4E\u25A\xFF\u543 2
} "\uFF"
+test utf-8.5 {Tcl_UniCharAtIndex: high surrogate} smallutf {
+ string index \uD842 0
+} "\uD842"
+test utf-8.6 {Tcl_UniCharAtIndex: low surrogate} {
+ string index \uDC42 0
+} "\uDC42"
+test utf-8.7 {Tcl_UniCharAtIndex: Emoji} smallutf {
+ string index \uD83D\uDE00 0
+} "\uD83D"
+test utf-8.8 {Tcl_UniCharAtIndex: Emoji} smallutf {
+ string index \uD83D\uDE00 1
+} "\uDE00"
test utf-9.1 {Tcl_UtfAtIndex: index = 0} {
string range abcd 0 2
@@ -699,6 +788,12 @@ test utf-9.1 {Tcl_UtfAtIndex: index = 0} {
test utf-9.2 {Tcl_UtfAtIndex: index > 0} {
string range \u4E4E\u25A\xFF\u543klmnop 1 5
} "\u25A\xFF\u543kl"
+test utf-9.3 {Tcl_UtfAtIndex: index = 0, Emoji} smallutf {
+ string range \uD83D\uDE00G 0 0
+} "\uD83D"
+test utf-9.4 {Tcl_UtfAtIndex: index > 0, Emoji} smallutf {
+ string range \uD83D\uDE00G 1 1
+} "\uDE00"
test utf-10.1 {Tcl_UtfBackslash: dst == NULL} {