summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2020-04-23 11:31:47 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2020-04-23 11:31:47 (GMT)
commit3afe55c9785becfec6f6a303bd13792c81c76ff1 (patch)
tree28ea78ad8fa6646b84ff0d6b1c07cbc1cddd71f5
parent05fe66d4ee8a8ae3847782fba4717855d1961ceb (diff)
parent11a3e7e9eef7c94cb2cfbac414f01bcc63f205e7 (diff)
downloadtcl-3afe55c9785becfec6f6a303bd13792c81c76ff1.zip
tcl-3afe55c9785becfec6f6a303bd13792c81c76ff1.tar.gz
tcl-3afe55c9785becfec6f6a303bd13792c81c76ff1.tar.bz2
Fix [27944a3661]: Taming test utf-6.88
Fix [c11e0c5ce4]: Regression in Tcl_UtfCharComplete Fix [1b1f5f0b53]: Tcl_UtfNext incompatibility in Tcl 8.6.10
-rw-r--r--generic/tclUtf.c56
-rw-r--r--tests/utf.test293
2 files changed, 201 insertions, 148 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index e7048ee..0e11e0e 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -64,17 +64,6 @@ static const unsigned char totalBytes[256] = {
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1
-};
-
-static const unsigned char complete[256] = {
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
#if TCL_UTF_MAX > 4
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
@@ -84,7 +73,11 @@ static const unsigned char complete[256] = {
#endif
2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+#if TCL_UTF_MAX > 3
4,4,4,4,4,
+#else
+ 1,1,1,1,1,
+#endif
1,1,1,1,1,1,1,1,1,1,1
};
@@ -559,7 +552,7 @@ Tcl_UtfCharComplete(
* a complete UTF-8 character. */
int length) /* Length of above string in bytes. */
{
- return length >= complete[(unsigned char)*src];
+ return length >= totalBytes[(unsigned char)*src];
}
/*
@@ -607,7 +600,7 @@ Tcl_NumUtfChars(
src = next;
}
} else {
- register const char *endPtr = src + length - /*TCL_UTF_MAX*/ 4;
+ register const char *endPtr = src + length - TCL_UTF_MAX;
while (src < endPtr) {
next = TclUtfNext(src);
@@ -618,7 +611,7 @@ Tcl_NumUtfChars(
#endif
src = next;
}
- endPtr += /*TCL_UTF_MAX*/ 4;
+ endPtr += TCL_UTF_MAX;
while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) {
next = TclUtfNext(src);
#if TCL_UTF_MAX > 4
@@ -717,9 +710,11 @@ Tcl_UtfFindLast(
*
* Tcl_UtfNext --
*
- * Given a pointer to some current location in a UTF-8 string, move
- * forward one character. The caller must ensure that they are not asking
- * for the next character after the last character in the string.
+ * Given a pointer to some location in a UTF-8 string, Tcl_UtfNext
+ * returns a pointer to the next UTF-8 character in the string.
+ * The caller must not ask for the next character after the last
+ * character in the string if the string is not terminated by a null
+ * character.
*
* Results:
* The return value is the pointer to the next character in the UTF-8
@@ -735,13 +730,18 @@ const char *
Tcl_UtfNext(
const char *src) /* The current location in the string. */
{
- int byte = *((unsigned char *) src);
- int left = totalBytes[byte];
+ int left = totalBytes[UCHAR(*src)];
const char *next = src + 1;
+ if (((*src) & 0xC0) == 0x80) {
+ if ((((*++src) & 0xC0) == 0x80) && (((*++src) & 0xC0) == 0x80)) {
+ ++src;
+ }
+ return src;
+ }
+
while (--left) {
- byte = *((unsigned char *) next);
- if ((byte & 0xC0) != 0x80) {
+ if ((*next & 0xC0) != 0x80) {
/*
* src points to non-trail byte; We ran out of trail bytes
* before the needs of the lead byte were satisfied.
@@ -778,7 +778,7 @@ Tcl_UtfNext(
* determine for certain in all circumstances whether the character
* that begins with the returned pointer will or will not include
* the byte src[-1]. In the scenario, where src points to the end of
- * a buffer being filled, the returned pointer point to either the
+ * a buffer being filled, the returned pointer points to either the
* final complete character in the string or to the earliest byte
* that might start an incomplete character waiting for more bytes to
* complete.
@@ -888,15 +888,19 @@ Tcl_UtfPrev(
/* Continue the search backwards... */
look--;
- } while (trailBytesSeen < /* was TCL_UTF_MAX */ 4);
+ } while (trailBytesSeen < TCL_UTF_MAX);
/*
- * We've seen 4 (was TCL_UTF_MAX) trail bytes, so we know there will not be a
+ * We've seen TCL_UTF_MAX trail bytes, so we know there will not be a
* properly formed byte sequence to find, and we can stop looking,
- * accepting the fallback.
+ * accepting the fallback (for TCL_UTF_MAX > 3) or just go back as
+ * far as we can.
*/
-
+#if TCL_UTF_MAX > 3
return fallback;
+#else
+ return src - TCL_UTF_MAX;
+#endif
}
/*
diff --git a/tests/utf.test b/tests/utf.test
index 570de0d..3af70c4 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -16,9 +16,9 @@ if {[lsearch [namespace children] ::tcltest] == -1} {
::tcltest::loadTestedCommands
catch [list package require -exact Tcltest [info patchlevel]]
-testConstraint ucs2 [expr {[format %c 0x010000] eq "\uFFFD"}]
-testConstraint tip389 [expr {[string length \U010000] eq 2}]
-testConstraint fullutf [expr {[format %c 0x010000] ne "\uFFFD"}]
+testConstraint ucs2 [expr {[format %c 0x010000] == "\uFFFD"}]
+testConstraint fullutf [expr {[format %c 0x010000] != "\uFFFD"}]
+testConstraint tip389 [expr {[string length \U010000] == 2}]
testConstraint testbytestring [llength [info commands testbytestring]]
testConstraint testfindfirst [llength [info commands testfindfirst]]
@@ -48,9 +48,9 @@ test utf-1.5 {Tcl_UniCharToUtf: overflowed Tcl_UniChar} testbytestring {
test utf-1.6 {Tcl_UniCharToUtf: negative Tcl_UniChar} testbytestring {
expr {[format %c -1] eq [testbytestring "\xEF\xBF\xBD"]}
} 1
-test utf-1.7 {Tcl_UniCharToUtf: 4 byte sequences} -constraints {fullutf testbytestring} -body {
+test utf-1.7 {Tcl_UniCharToUtf: 4 byte sequences} {fullutf testbytestring} {
expr {"\U014E4E" eq [testbytestring "\xF0\x94\xB9\x8E"]}
-} -result 1
+} 1
test utf-1.8 {Tcl_UniCharToUtf: 3 byte sequence, high surrogate} testbytestring {
expr {"\uD842" eq [testbytestring "\xED\xA1\x82"]}
} 1
@@ -72,88 +72,92 @@ test utf-1.13 {Tcl_UniCharToUtf: Invalid surrogate} testbytestring {
test utf-2.1 {Tcl_UtfToUniChar: low ascii} {
string length "abc"
-} {3}
+} 3
test utf-2.2 {Tcl_UtfToUniChar: naked trail bytes} testbytestring {
string length [testbytestring "\x82\x83\x84"]
-} {3}
+} 3
test utf-2.3 {Tcl_UtfToUniChar: lead (2-byte) followed by non-trail} testbytestring {
string length [testbytestring "\xC2"]
-} {1}
+} 1
test utf-2.4 {Tcl_UtfToUniChar: lead (2-byte) followed by trail} testbytestring {
string length [testbytestring "\xC2\xA2"]
-} {1}
+} 1
test utf-2.5 {Tcl_UtfToUniChar: lead (3-byte) followed by non-trail} testbytestring {
string length [testbytestring "\xE2"]
-} {1}
+} 1
test utf-2.6 {Tcl_UtfToUniChar: lead (3-byte) followed by 1 trail} testbytestring {
string length [testbytestring "\xE2\xA2"]
-} {2}
+} 2
test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} testbytestring {
string length [testbytestring "\xE4\xB9\x8E"]
-} {1}
-test utf-2.8 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {tip389 testbytestring} -body {
+} 1
+test utf-2.8 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {tip389 testbytestring} {
string length [testbytestring "\xF0\x90\x80\x80"]
-} -result {2}
-test utf-2.9 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {tip389 testbytestring} -body {
+} 2
+test utf-2.9 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {tip389 testbytestring} {
string length [testbytestring "\xF4\x8F\xBF\xBF"]
-} -result {2}
+} 2
test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} testbytestring {
string length [testbytestring "\xF0\x8F\xBF\xBF"]
-} {4}
-test utf-2.11 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, overflow} {testbytestring} {
+} 4
+test utf-2.11 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, overflow} testbytestring {
+ # Would decode to U+110000 but that is outside the Unicode range.
string length [testbytestring "\xF4\x90\x80\x80"]
-} {4}
+} 4
test utf-2.12 {Tcl_UtfToUniChar: longer UTF sequences not supported} testbytestring {
string length [testbytestring "\xF8\xA2\xA2\xA2\xA2"]
-} {5}
+} 5
test utf-3.1 {Tcl_UtfCharComplete} {
} {}
test utf-4.1 {Tcl_NumUtfChars: zero length} testnumutfchars {
testnumutfchars ""
-} {0}
+} 0
test utf-4.2 {Tcl_NumUtfChars: length 1} {testnumutfchars testbytestring} {
testnumutfchars [testbytestring "\xC2\xA2"]
-} {1}
+} 1
test utf-4.3 {Tcl_NumUtfChars: long string} {testnumutfchars testbytestring} {
testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\xA2\x4E"]
-} {7}
+} 7
test utf-4.4 {Tcl_NumUtfChars: #u0000} {testnumutfchars testbytestring} {
testnumutfchars [testbytestring "\xC0\x80"]
-} {1}
+} 1
test utf-4.5 {Tcl_NumUtfChars: zero length, calc len} testnumutfchars {
testnumutfchars "" 0
-} {0}
+} 0
test utf-4.6 {Tcl_NumUtfChars: length 1, calc len} {testnumutfchars testbytestring} {
testnumutfchars [testbytestring "\xC2\xA2"] 1
-} {1}
+} 1
test utf-4.7 {Tcl_NumUtfChars: long string, calc len} {testnumutfchars testbytestring} {
testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\xA2\x4E"] 10
-} {7}
+} 7
test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} {testnumutfchars testbytestring} {
testnumutfchars [testbytestring "\xC0\x80"] 1
-} {1}
+} 1
# Bug [2738427]: Tcl_NumUtfChars(...) no overflow check
test utf-4.9 {Tcl_NumUtfChars: #u20AC, calc len, incomplete} {testnumutfchars testbytestring} {
testnumutfchars [testbytestring "\xE2\x82\xAC"] 2
-} {2}
+} 2
test utf-4.10 {Tcl_NumUtfChars: #u0000, calc len, overcomplete} {testnumutfchars testbytestring} {
testnumutfchars [testbytestring "\x00"] 2
-} {2}
+} 2
test utf-4.11 {Tcl_NumUtfChars: 3 bytes of 4-byte UTF-8 characater} {testnumutfchars testbytestring} {
testnumutfchars [testbytestring \xF0\x9F\x92\xA9] 3
-} {3}
-test utf-4.12 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring tip389} {
+} 3
+test utf-4.12.0 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring ucs2} {
+ testnumutfchars [testbytestring \xF0\x9F\x92\xA9] 4
+} 4
+test utf-4.12.1 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring tip389} {
testnumutfchars [testbytestring \xF0\x9F\x92\xA9] 4
-} {2}
+} 2
test utf-5.1 {Tcl_UtfFindFirst} {testfindfirst testbytestring} {
testfindfirst [testbytestring "abcbc"] 98
-} {bcbc}
+} bcbc
test utf-5.2 {Tcl_UtfFindLast} {testfindlast testbytestring} {
testfindlast [testbytestring "abcbc"] 98
-} {bc}
+} bc
test utf-6.1 {Tcl_UtfNext} testutfnext {
# This takes the pointer one past the terminating NUL.
@@ -161,10 +165,10 @@ test utf-6.1 {Tcl_UtfNext} testutfnext {
testutfnext -bytestring {}
} 1
test utf-6.2 {Tcl_UtfNext} testutfnext {
- testutfnext A
+ testutfnext -bytestring A
} 1
test utf-6.3 {Tcl_UtfNext} testutfnext {
- testutfnext AA
+ testutfnext -bytestring AA
} 1
test utf-6.4 {Tcl_UtfNext} testutfnext {
testutfnext -bytestring A\xA0
@@ -189,7 +193,7 @@ test utf-6.10 {Tcl_UtfNext} testutfnext {
} 1
test utf-6.11 {Tcl_UtfNext} testutfnext {
testutfnext -bytestring \xA0\xA0
-} 1
+} 2
test utf-6.12 {Tcl_UtfNext} testutfnext {
testutfnext -bytestring \xA0\xD0
} 1
@@ -363,7 +367,7 @@ test utf-6.68 {Tcl_UtfNext} testutfnext {
} 1
test utf-6.69.0 {Tcl_UtfNext} {testutfnext ucs2} {
testutfnext -bytestring \xF2\xA0\xA0\xA0
-} 4
+} 1
test utf-6.69.1 {Tcl_UtfNext} {testutfnext fullutf} {
testutfnext -bytestring \xF2\xA0\xA0\xA0
} 4
@@ -381,37 +385,37 @@ test utf-6.73 {Tcl_UtfNext} testutfnext {
} 1
test utf-6.74.0 {Tcl_UtfNext} {testutfnext ucs2} {
testutfnext -bytestring \xF2\xA0\xA0\xA0G
-} 4
+} 1
test utf-6.74.1 {Tcl_UtfNext} {testutfnext fullutf} {
testutfnext -bytestring \xF2\xA0\xA0\xA0G
} 4
test utf-6.75.0 {Tcl_UtfNext} {testutfnext ucs2} {
testutfnext -bytestring \xF2\xA0\xA0\xA0\xA0
-} 4
+} 1
test utf-6.75.1 {Tcl_UtfNext} {testutfnext fullutf} {
testutfnext -bytestring \xF2\xA0\xA0\xA0\xA0
} 4
test utf-6.76.0 {Tcl_UtfNext} {testutfnext ucs2} {
testutfnext -bytestring \xF2\xA0\xA0\xA0\xD0
-} 4
+} 1
test utf-6.76.1 {Tcl_UtfNext} {testutfnext fullutf} {
testutfnext -bytestring \xF2\xA0\xA0\xA0\xD0
} 4
test utf-6.77.0 {Tcl_UtfNext} {testutfnext ucs2} {
testutfnext -bytestring \xF2\xA0\xA0\xA0\xE8
-} 4
+} 1
test utf-6.77.1 {Tcl_UtfNext} {testutfnext fullutf} {
testutfnext -bytestring \xF2\xA0\xA0\xA0\xE8
} 4
test utf-6.78.0 {Tcl_UtfNext} {testutfnext ucs2} {
testutfnext -bytestring \xF2\xA0\xA0\xA0\xF2
-} 4
+} 1
test utf-6.78.1 {Tcl_UtfNext} {testutfnext fullutf} {
testutfnext -bytestring \xF2\xA0\xA0\xA0\xF2
} 4
test utf-6.79.0 {Tcl_UtfNext} {testutfnext ucs2} {
testutfnext -bytestring \xF2\xA0\xA0\xA0G\xF8
-} 4
+} 1
test utf-6.79.1 {Tcl_UtfNext} {testutfnext fullutf} {
testutfnext -bytestring \xF2\xA0\xA0\xA0G\xF8
} 4
@@ -442,27 +446,30 @@ test utf-6.87.0 {Tcl_UtfNext - overlong sequences} {testutfnext ucs2} {
test utf-6.87.1 {Tcl_UtfNext - overlong sequences} {testutfnext fullutf} {
testutfnext -bytestring \xF0\x90\x80\x80
} 4
-test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} {
+test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} testutfnext {
testutfnext -bytestring \xA0\xA0
-} 1
-test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} {
+} 2
+test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} testutfnext {
testutfnext -bytestring \x80\x80
-} 1
+} 2
test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext ucs2} {
testutfnext -bytestring \xF4\x8F\xBF\xBF
} 1
test utf-6.90.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext fullutf} {
testutfnext -bytestring \xF4\x8F\xBF\xBF
} 4
-test utf-6.91 {Tcl_UtfNext, validity check [493dccc2de]} testutfnext {
+test utf-6.91.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext ucs2} {
testutfnext -bytestring \xF4\x90\x80\x80
} 1
-test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} testutfnext {
- testutfnext -bytestring \xA0\xA0\xA0
+test utf-6.91.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext fullutf} {
+ testutfnext -bytestring \xF4\x90\x80\x80
} 1
+test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} testutfnext {
+ testutfnext -bytestring \xA0\xA0\xA0
+} 3
test utf-6.93 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} testutfnext {
testutfnext -bytestring \x80\x80\x80
-} 1
+} 3
test utf-7.1 {Tcl_UtfPrev} testutfprev {
testutfprev {}
@@ -529,19 +536,19 @@ test utf-7.9.2 {Tcl_UtfPrev} testutfprev {
} 2
test utf-7.10.0 {Tcl_UtfPrev} {testutfprev ucs2} {
testutfprev A\xF2\xA0
-} 1
+} 2
test utf-7.10.1 {Tcl_UtfPrev} {testutfprev fullutf} {
testutfprev A\xF2\xA0
} 1
test utf-7.10.1.0 {Tcl_UtfPrev} {testutfprev ucs2} {
testutfprev A\xF2\xA0\xA0\xA0 3
-} 1
+} 2
test utf-7.10.1.1 {Tcl_UtfPrev} {testutfprev fullutf} {
testutfprev A\xF2\xA0\xA0\xA0 3
} 1
test utf-7.10.2.0 {Tcl_UtfPrev} {testutfprev ucs2} {
testutfprev A\xF2\xA0\xF8\xA0 3
-} 1
+} 2
test utf-7.10.2.1 {Tcl_UtfPrev} {testutfprev fullutf} {
testutfprev A\xF2\xA0\xF8\xA0 3
} 1
@@ -586,19 +593,19 @@ test utf-7.14.2 {Tcl_UtfPrev} testutfprev {
} 3
test utf-7.15.0 {Tcl_UtfPrev} {testutfprev ucs2} {
testutfprev A\xF2\xA0\xA0
-} 1
+} 3
test utf-7.15.1 {Tcl_UtfPrev} {testutfprev fullutf} {
testutfprev A\xF2\xA0\xA0
} 1
test utf-7.15.1.0 {Tcl_UtfPrev} {testutfprev ucs2} {
testutfprev A\xF2\xA0\xA0\xA0 4
-} 1
+} 3
test utf-7.15.1.1 {Tcl_UtfPrev} {testutfprev fullutf} {
testutfprev A\xF2\xA0\xA0\xA0 4
} 1
test utf-7.15.2.0 {Tcl_UtfPrev} {testutfprev ucs2} {
testutfprev A\xF2\xA0\xA0\xF8 4
-} 1
+} 3
test utf-7.15.2.1 {Tcl_UtfPrev} {testutfprev fullutf} {
testutfprev A\xF2\xA0\xA0\xF8 4
} 1
@@ -620,31 +627,52 @@ test utf-7.17.1 {Tcl_UtfPrev} testutfprev {
test utf-7.17.2 {Tcl_UtfPrev} testutfprev {
testutfprev A\xD0\xA0\xA0\xF8 4
} 3
-test utf-7.18 {Tcl_UtfPrev} testutfprev {
+test utf-7.18 {Tcl_UtfPrev} {testutfprev ucs2} {
+ testutfprev A\xA0\xA0\xA0
+} 1
+test utf-7.18.1 {Tcl_UtfPrev} {testutfprev ucs2} {
+ testutfprev A\xA0\xA0\xA0\xA0 4
+} 1
+test utf-7.18.2 {Tcl_UtfPrev} {testutfprev ucs2} {
+ testutfprev A\xA0\xA0\xA0\xF8 4
+} 1
+test utf-7.18.3 {Tcl_UtfPrev} {testutfprev fullutf} {
testutfprev A\xA0\xA0\xA0
} 3
-test utf-7.18.1 {Tcl_UtfPrev} testutfprev {
+test utf-7.18.4 {Tcl_UtfPrev} {testutfprev fullutf} {
testutfprev A\xA0\xA0\xA0\xA0 4
} 3
-test utf-7.18.2 {Tcl_UtfPrev} testutfprev {
+test utf-7.18.5 {Tcl_UtfPrev} {testutfprev fullutf} {
testutfprev A\xA0\xA0\xA0\xF8 4
} 3
-test utf-7.19 {Tcl_UtfPrev} testutfprev {
+test utf-7.19 {Tcl_UtfPrev} {testutfprev ucs2} {
+ testutfprev A\xF8\xA0\xA0\xA0
+} 2
+test utf-7.19.1 {Tcl_UtfPrev} {testutfprev fullutf} {
testutfprev A\xF8\xA0\xA0\xA0
} 4
-test utf-7.20.0 {Tcl_UtfPrev} {testutfprev ucs2} {
- testutfprev A\xF2\xA0\xA0\xA0
-} 1
+test utf-7.20 {Tcl_UtfPrev} {testutfprev ucs2} {
+ testutfprev A\xF4\xA0\xA0\xA0
+} 2
test utf-7.20.1 {Tcl_UtfPrev} {testutfprev fullutf} {
- testutfprev A\xF2\xA0\xA0\xA0
-} 1
-test utf-7.21 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xF4\xA0\xA0\xA0
+} 4
+test utf-7.21 {Tcl_UtfPrev} {testutfprev ucs2} {
+ testutfprev A\xE8\xA0\xA0\xA0
+} 2
+test utf-7.21.1 {Tcl_UtfPrev} {testutfprev fullutf} {
testutfprev A\xE8\xA0\xA0\xA0
} 4
-test utf-7.22 {Tcl_UtfPrev} testutfprev {
+test utf-7.22 {Tcl_UtfPrev} {testutfprev ucs2} {
+ testutfprev A\xD0\xA0\xA0\xA0
+} 2
+test utf-7.22.1 {Tcl_UtfPrev} {testutfprev fullutf} {
testutfprev A\xD0\xA0\xA0\xA0
} 4
-test utf-7.23 {Tcl_UtfPrev} testutfprev {
+test utf-7.23 {Tcl_UtfPrev} {testutfprev ucs2} {
+ testutfprev A\xA0\xA0\xA0\xA0
+} 2
+test utf-7.23.1 {Tcl_UtfPrev} {testutfprev fullutf} {
testutfprev A\xA0\xA0\xA0\xA0
} 4
test utf-7.24 {Tcl_UtfPrev -- overlong sequence} testutfprev {
@@ -668,7 +696,10 @@ test utf-7.28 {Tcl_UtfPrev -- overlong sequence} testutfprev {
test utf-7.28.1 {Tcl_UtfPrev -- overlong sequence} testutfprev {
testutfprev A\xE0\x80\x80 2
} 1
-test utf-7.29 {Tcl_UtfPrev -- overlong sequence} testutfprev {
+test utf-7.29 {Tcl_UtfPrev -- overlong sequence} {testutfprev ucs2} {
+ testutfprev A\xF0\x80\x80\x80
+} 2
+test utf-7.29.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev fullutf} {
testutfprev A\xF0\x80\x80\x80
} 4
test utf-7.30 {Tcl_UtfPrev -- overlong sequence} testutfprev {
@@ -700,7 +731,7 @@ test utf-7.38 {Tcl_UtfPrev -- overlong sequence} testutfprev {
} 1
test utf-7.39.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev ucs2} {
testutfprev A\xF0\x90\x80\x80
-} 4
+} 2
test utf-7.39.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev fullutf} {
testutfprev A\xF0\x90\x80\x80
} 1
@@ -728,21 +759,24 @@ test utf-7.44 {Tcl_UtfPrev -- no lead byte at start} testutfprev {
test utf-7.45 {Tcl_UtfPrev -- no lead byte at start} testutfprev {
testutfprev \xA0\xA0\xA0
} 2
-test utf-7.46 {Tcl_UtfPrev -- no lead byte at start} testutfprev {
+test utf-7.46 {Tcl_UtfPrev -- no lead byte at start} {testutfprev ucs2} {
+ testutfprev \xA0\xA0\xA0\xA0
+} 1
+test utf-7.46 {Tcl_UtfPrev -- no lead byte at start} {testutfprev fullutf} {
testutfprev \xA0\xA0\xA0\xA0
} 3
-test utf-7.47 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {testutfprev} {
+test utf-7.47 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} testutfprev {
testutfprev \xE8\xA0
} 0
-test utf-7.47.1 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {testutfprev} {
+test utf-7.47.1 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} testutfprev {
testutfprev \xE8\xA0\xA0 2
} 0
-test utf-7.47.2 {Tcl_UtfPrev, pointing to 3th byte of 3-byte invalid sequence} {testutfprev} {
+test utf-7.47.2 {Tcl_UtfPrev, pointing to 3th byte of 3-byte invalid sequence} testutfprev {
testutfprev \xE8\xA0\x00 2
} 0
test utf-7.48.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev ucs2} {
testutfprev A\xF4\x8F\xBF\xBF
-} 4
+} 2
test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} {
testutfprev A\xF4\x8F\xBF\xBF
} 1
@@ -761,28 +795,37 @@ test utf-7.48.2.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullut
test utf-7.48.3 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
testutfprev A\xF4\x8F\xBF\xBF 2
} 1
-test utf-7.49 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
+test utf-7.49.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev ucs2} {
+ testutfprev A\xF4\x90\x80\x80
+} 2
+test utf-7.49.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} {
testutfprev A\xF4\x90\x80\x80
} 4
-test utf-7.49.1 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
+test utf-7.49.2 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev ucs2} {
+ testutfprev A\xF4\x90\x80\x80 4
+} 3
+test utf-7.49.3 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} {
testutfprev A\xF4\x90\x80\x80 4
} 3
-test utf-7.49.2 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
+test utf-7.49.4 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev ucs2} {
+ testutfprev A\xF4\x90\x80\x80 3
+} 2
+test utf-7.49.5 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} {
testutfprev A\xF4\x90\x80\x80 3
} 2
-test utf-7.49.3 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
+test utf-7.49.6 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
testutfprev A\xF4\x90\x80\x80 2
} 1
test utf-8.1 {Tcl_UniCharAtIndex: index = 0} {
string index abcd 0
-} {a}
+} a
test utf-8.2 {Tcl_UniCharAtIndex: index = 0} {
string index \u4E4E\u25A 0
} "\u4E4E"
test utf-8.3 {Tcl_UniCharAtIndex: index > 0} {
string index abcd 2
-} {c}
+} c
test utf-8.4 {Tcl_UniCharAtIndex: index > 0} {
string index \u4E4E\u25A\xFF\u543 2
} "\uFF"
@@ -801,7 +844,7 @@ test utf-8.8 {Tcl_UniCharAtIndex: Emoji} ucs2 {
test utf-9.1 {Tcl_UtfAtIndex: index = 0} {
string range abcd 0 2
-} {abc}
+} abc
test utf-9.2 {Tcl_UtfAtIndex: index > 0} {
string range \u4E4E\u25A\xFF\u543klmnop 1 5
} "\u25A\xFF\u543kl"
@@ -909,11 +952,11 @@ test utf-11.2 {Tcl_UtfToUpper} {
string toupper abc
} ABC
test utf-11.3 {Tcl_UtfToUpper} {
- string toupper \u00E3AB
-} \u00C3AB
+ string toupper \xE3gh
+} \xC3GH
test utf-11.4 {Tcl_UtfToUpper} {
- string toupper \u01E3AB
-} \u01E2AB
+ string toupper \u01E3gh
+} \u01E2GH
test utf-11.5 {Tcl_UtfToUpper Georgian (new in Unicode 11)} {
string toupper \u10D0\u1C90
} \u1C90\u1C90
@@ -925,14 +968,17 @@ test utf-12.2 {Tcl_UtfToLower} {
string tolower ABC
} abc
test utf-12.3 {Tcl_UtfToLower} {
- string tolower \u00C3AB
-} \u00E3ab
+ string tolower \xC3GH
+} \xE3gh
test utf-12.4 {Tcl_UtfToLower} {
- string tolower \u01E2AB
-} \u01E3ab
+ string tolower \u01E2GH
+} \u01E3gh
test utf-12.5 {Tcl_UtfToLower Georgian (new in Unicode 11)} {
string tolower \u10D0\u1C90
} \u10D0\u10D0
+test utf-12.6 {Tcl_UtfToUpper low/high surrogate)} ucs2 {
+ string tolower \uDC24\uD824
+} \uDC24\uD824
test utf-13.1 {Tcl_UtfToTitle} {
string totitle {}
@@ -941,8 +987,8 @@ test utf-13.2 {Tcl_UtfToTitle} {
string totitle abc
} Abc
test utf-13.3 {Tcl_UtfToTitle} {
- string totitle \u00E3AB
-} \u00C3ab
+ string totitle \xE3GH
+} \xC3gh
test utf-13.4 {Tcl_UtfToTitle} {
string totitle \u01F3AB
} \u01F2ab
@@ -952,6 +998,9 @@ test utf-13.5 {Tcl_UtfToTitle Georgian (new in Unicode 11)} {
test utf-13.6 {Tcl_UtfToTitle Georgian (new in Unicode 11)} {
string totitle \u1C90\u10D0
} \u1C90\u10D0
+test utf-13.7 {Tcl_UtfToTitle low/high surrogate)} ucs2 {
+ string totitle \uDC24\uD824
+} \uDC24\uD824
test utf-14.1 {Tcl_UtfNcasecmp} {
string compare -nocase a b
@@ -970,7 +1019,7 @@ test utf-15.1 {Tcl_UniCharToUpper, negative delta} {
string toupper aA
} AA
test utf-15.2 {Tcl_UniCharToUpper, positive delta} {
- string toupper \u0178\u00FF
+ string toupper \u0178\xFF
} \u0178\u0178
test utf-15.3 {Tcl_UniCharToUpper, no delta} {
string toupper !
@@ -980,8 +1029,8 @@ test utf-16.1 {Tcl_UniCharToLower, negative delta} {
string tolower aA
} aa
test utf-16.2 {Tcl_UniCharToLower, positive delta} {
- string tolower \u0178\u00FF\uA78D\u01C5
-} \u00FF\u00FF\u0265\u01C6
+ string tolower \u0178\xFF\uA78D\u01C5
+} \xFF\xFF\u0265\u01C6
test utf-17.1 {Tcl_UniCharToLower, no delta} {
string tolower !
@@ -995,9 +1044,9 @@ test utf-18.2 {Tcl_UniCharToTitle, subtract one for title} {
} \u01C5
test utf-18.3 {Tcl_UniCharToTitle, subtract delta for title (positive)} {
string totitle \u017F
-} \u0053
+} \x53
test utf-18.4 {Tcl_UniCharToTitle, subtract delta for title (negative)} {
- string totitle \u00FF
+ string totitle \xFF
} \u0178
test utf-18.5 {Tcl_UniCharToTitle, no delta} {
string totitle !
@@ -1027,39 +1076,39 @@ test utf-21.3 {unicode print char in regc_locale.c} {
test utf-21.4 {TclUniCharIsGraph} {
# [Bug 3464428]
string is graph \u0120
-} {1}
+} 1
test utf-21.5 {unicode graph char in regc_locale.c} {
# [Bug 3464428]
regexp {^[[:graph:]]+$} \u0120
-} {1}
+} 1
test utf-21.6 {TclUniCharIsGraph} {
# [Bug 3464428]
- string is graph \u00A0
-} {0}
+ string is graph \xA0
+} 0
test utf-21.7 {unicode graph char in regc_locale.c} {
# [Bug 3464428]
- regexp {[[:graph:]]} \u0020\u00A0\u2028\u2029
-} {0}
+ regexp {[[:graph:]]} \x20\xA0\u2028\u2029
+} 0
test utf-21.8 {TclUniCharIsPrint} {
# [Bug 3464428]
- string is print \u0009
-} {0}
+ string is print \x09
+} 0
test utf-21.9 {unicode print char in regc_locale.c} {
# [Bug 3464428]
- regexp {[[:print:]]} \u0009
-} {0}
+ regexp {[[:print:]]} \x09
+} 0
test utf-21.10 {unicode print char in regc_locale.c} {
# [Bug 3464428]
- regexp {[[:print:]]} \u0009
-} {0}
+ regexp {[[:print:]]} \x09
+} 0
test utf-21.11 {TclUniCharIsControl} {
# [Bug 3464428]
- string is control \u0000\u001F\u00AD\u0605\u061C\u180E\u2066\uFEFF
-} {1}
+ string is control \x00\x1F\xAD\u0605\u061C\u180E\u2066\uFEFF
+} 1
test utf-21.12 {unicode control char in regc_locale.c} {
# [Bug 3464428], [Bug a876646efe]
- regexp {^[[:cntrl:]]*$} \u0000\u001F\u00AD\u0605\u061C\u180E\u2066\uFEFF
-} {1}
+ regexp {^[[:cntrl:]]*$} \x00\x1F\xAD\u0605\u061C\u180E\u2066\uFEFF
+} 1
test utf-22.1 {TclUniCharIsWordChar} {
string wordend "xyz123_bar fg" 0
@@ -1071,16 +1120,16 @@ test utf-22.2 {TclUniCharIsWordChar} {
test utf-23.1 {TclUniCharIsAlpha} {
# this returns 1 with Unicode 7 compliance
string is alpha \u021F\u0220\u037F\u052F
-} {1}
+} 1
test utf-23.2 {unicode alpha char in regc_locale.c} {
# this returns 1 with Unicode 7 compliance
regexp {^[[:alpha:]]+$} \u021F\u0220\u037F\u052F
-} {1}
+} 1
test utf-24.1 {TclUniCharIsDigit} {
# this returns 1 with Unicode 7 compliance
string is digit \u1040\uABF0
-} {1}
+} 1
test utf-24.2 {unicode digit char in regc_locale.c} {
# this returns 1 with Unicode 7 compliance
list [regexp {^[[:digit:]]+$} \u1040\uABF0] [regexp {^\d+$} \u1040\uABF0]
@@ -1088,11 +1137,11 @@ test utf-24.2 {unicode digit char in regc_locale.c} {
test utf-24.3 {TclUniCharIsSpace} {
# this returns 1 with Unicode 7/TIP 413 compliance
- string is space \u0085\u1680\u180E\u200B\u202F\u2060
-} {1}
+ string is space \x85\u1680\u180E\u200B\u202F\u2060
+} 1
test utf-24.4 {unicode space char in regc_locale.c} {
# this returns 1 with Unicode 7/TIP 413 compliance
- list [regexp {^[[:space:]]+$} \u0085\u1680\u180E\u200B\u202F\u2060] [regexp {^\s+$} \u0085\u1680\u180E\u200B\u202F\u2060]
+ list [regexp {^[[:space:]]+$} \x85\u1680\u180E\u200B\u202F\u2060] [regexp {^\s+$} \x85\u1680\u180E\u200B\u202F\u2060]
} {1 1}
test utf-25.1 {Tcl_UniCharNcasecmp} -constraints teststringobj \