summaryrefslogtreecommitdiffstats
path: root/tests/utf.test
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2020-05-05 07:29:46 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2020-05-05 07:29:46 (GMT)
commit9d533c3d4842c4792ffb95a166933602ccba7895 (patch)
tree0938379c5afbfc944ba1e3464b7f1e9a11e87307 /tests/utf.test
parent35051648affc3b6e48d6abe8f629810f28a80112 (diff)
downloadtcl-9d533c3d4842c4792ffb95a166933602ccba7895.zip
tcl-9d533c3d4842c4792ffb95a166933602ccba7895.tar.gz
tcl-9d533c3d4842c4792ffb95a166933602ccba7895.tar.bz2
Properly protect "Invalid" function against lead bytes 0x80-0xBF. This fixes "knownBug" testcase utf-6.93.1.
Rename tip389 selector to utf16, since that's what it actually is, in contrast to ucs2 and ucs4.
Diffstat (limited to 'tests/utf.test')
-rw-r--r--tests/utf.test59
1 files changed, 34 insertions, 25 deletions
diff --git a/tests/utf.test b/tests/utf.test
index 988bede..c281d11 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -17,7 +17,7 @@ namespace path ::tcl::mathop
testConstraint ucs2 [expr {[format %c 0x010000] eq "\uFFFD"}]
testConstraint fullutf [expr {[format %c 0x010000] ne "\uFFFD"}]
-testConstraint tip389 [expr {[string length [format %c 0x10000]] == 2}]
+testConstraint utf16 [expr {[string length [format %c 0x10000]] == 2}]
testConstraint ucs4 [expr {[testConstraint fullutf]
&& [string length [format %c 0x10000]] == 1}]
@@ -108,7 +108,7 @@ test utf-2.8.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytest
test utf-2.8.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring ucs4} {
string length [testbytestring \xF0\x90\x80\x80]
} 1
-test utf-2.8.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring tip389} {
+test utf-2.8.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring utf16} {
string length [testbytestring \xF0\x90\x80\x80]
} 2
test utf-2.9.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring ucs2} {
@@ -117,7 +117,7 @@ test utf-2.9.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytest
test utf-2.9.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {Uesc ucs4} {
string length \U10FFFF
} 1
-test utf-2.9.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} tip389 {
+test utf-2.9.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} utf16 {
string length \uDBFF\uDFFF
} 2
test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} testbytestring {
@@ -174,7 +174,7 @@ test utf-4.12.0 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars test
test utf-4.12.1 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring ucs4} {
testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end
} 1
-test utf-4.12.2 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring tip389} {
+test utf-4.12.2 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring utf16} {
testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end
} 2
@@ -490,16 +490,25 @@ test utf-6.91.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext ucs2} {
test utf-6.91.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext fullutf} {
testutfnext \xF4\x90\x80\x80
} 1
-test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} testutfnext {
+test utf-6.92.0 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} {testutfnext ucs2} {
+ testutfnext \xA0\xA0\xA0
+} 1
+test utf-6.92.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} {testutfnext utf16} {
+ testutfnext \xA0\xA0\xA0
+} 3
+test utf-6.92.2 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} {testutfnext ucs4} {
testutfnext \xA0\xA0\xA0
} 1
test utf-6.93.0 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext ucs2} {
testutfnext \x80\x80\x80
} 1
-test utf-6.93.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext fullutf knownBug} {
+test utf-6.93.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext utf16} {
testutfnext \x80\x80\x80
} 3
-test utf-6.94 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} testutfnext {
+test utf-6.93.2 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext ucs4} {
+ testutfnext \x80\x80\x80
+} 1
+test utf-6.94 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext ucs2} {
testutfnext \xA0\xA0\xA0\xA0
} 1
test utf-6.95 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext ucs2} {
@@ -610,16 +619,16 @@ test utf-6.121 {Tcl_UtfNext, read limits} {testutfnext ucs2} {
test utf-6.122 {Tcl_UtfNext, read limits} {testutfnext ucs2} {
testutfnext \xA0\xA0\xA0 2
} 1
-test utf-6.123 {Tcl_UtfNext, read limits} testutfnext {
+test utf-6.123 {Tcl_UtfNext, read limits} {testutfnext ucs2} {
testutfnext \xA0\xA0\xA0G 3
} 1
-test utf-6.124 {Tcl_UtfNext, read limits} testutfnext {
+test utf-6.124 {Tcl_UtfNext, read limits} {testutfnext ucs2} {
testutfnext \xA0\xA0\xA0\xA0 3
} 1
-test utf-6.125 {Tcl_UtfNext, read limits} testutfnext {
+test utf-6.125 {Tcl_UtfNext, read limits} {testutfnext ucs2} {
testutfnext \xA0\xA0\xA0\xA0G 4
} 1
-test utf-6.126 {Tcl_UtfNext, read limits} testutfnext {
+test utf-6.126 {Tcl_UtfNext, read limits} {testutfnext ucs2} {
testutfnext \xA0\xA0\xA0\xA0\xA0 4
} 1
@@ -987,7 +996,7 @@ test utf-8.5.0 {Tcl_UniCharAtIndex: high surrogate} ucs2 {
test utf-8.5.1 {Tcl_UniCharAtIndex: high surrogate} ucs4 {
string index \uD842 0
} \uD842
-test utf-8.5.2 {Tcl_UniCharAtIndex: high surrogate} tip389 {
+test utf-8.5.2 {Tcl_UniCharAtIndex: high surrogate} utf16 {
string index \uD842 0
} \uD842
test utf-8.6 {Tcl_UniCharAtIndex: low surrogate} {
@@ -999,7 +1008,7 @@ test utf-8.7.0 {Tcl_UniCharAtIndex: Emoji} ucs2 {
test utf-8.7.1 {Tcl_UniCharAtIndex: Emoji} ucs4 {
string index \uD83D\uDE00G 0
} \U1F600
-test utf-8.7.2 {Tcl_UniCharAtIndex: Emoji} tip389 {
+test utf-8.7.2 {Tcl_UniCharAtIndex: Emoji} utf16 {
string index \uD83D\uDE00G 0
} \U1F600
test utf-8.8.0 {Tcl_UniCharAtIndex: Emoji} ucs2 {
@@ -1008,7 +1017,7 @@ test utf-8.8.0 {Tcl_UniCharAtIndex: Emoji} ucs2 {
test utf-8.8.1 {Tcl_UniCharAtIndex: Emoji} ucs4 {
string index \uD83D\uDE00G 1
} G
-test utf-8.8.2 {Tcl_UniCharAtIndex: Emoji} tip389 {
+test utf-8.8.2 {Tcl_UniCharAtIndex: Emoji} utf16 {
string index \uD83D\uDE00G 1
} {}
test utf-8.9.0 {Tcl_UniCharAtIndex: Emoji} ucs2 {
@@ -1017,7 +1026,7 @@ test utf-8.9.0 {Tcl_UniCharAtIndex: Emoji} ucs2 {
test utf-8.9.1 {Tcl_UniCharAtIndex: Emoji} ucs4 {
string index \uD83D\uDE00G 2
} {}
-test utf-8.9.2 {Tcl_UniCharAtIndex: Emoji} tip389 {
+test utf-8.9.2 {Tcl_UniCharAtIndex: Emoji} utf16 {
string index \uD83D\uDE00G 2
} G
test utf-8.10.0 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs2} {
@@ -1026,7 +1035,7 @@ test utf-8.10.0 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs2} {
test utf-8.10.1 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs4} {
string index \U1F600G 0
} \U1F600
-test utf-8.10.2 {Tcl_UniCharAtIndex: Emoji} {Uesc tip389} {
+test utf-8.10.2 {Tcl_UniCharAtIndex: Emoji} {Uesc utf16} {
string index \U1F600G 0
} \U1F600
test utf-8.11.0 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs2} {
@@ -1035,7 +1044,7 @@ test utf-8.11.0 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs2} {
test utf-8.11.1 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs4} {
string index \U1F600G 1
} G
-test utf-8.11.2 {Tcl_UniCharAtIndex: Emoji} {Uesc tip389} {
+test utf-8.11.2 {Tcl_UniCharAtIndex: Emoji} {Uesc utf16} {
string index \U1F600G 1
} {}
test utf-8.12.0 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs2} {
@@ -1044,7 +1053,7 @@ test utf-8.12.0 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs2} {
test utf-8.12.1 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs4} {
string index \U1F600G 2
} {}
-test utf-8.12.2 {Tcl_UniCharAtIndex: Emoji} {Uesc tip389} {
+test utf-8.12.2 {Tcl_UniCharAtIndex: Emoji} {Uesc utf16} {
string index \U1F600G 2
} G
@@ -1060,7 +1069,7 @@ test utf-9.3.0 {Tcl_UtfAtIndex: index = 0, Emoji} ucs2 {
test utf-9.3.1 {Tcl_UtfAtIndex: index = 0, Emoji} ucs4 {
string range \uD83D\uDE00G 0 0
} \U1F600
-test utf-9.3.2 {Tcl_UtfAtIndex: index = 0, Emoji} tip389 {
+test utf-9.3.2 {Tcl_UtfAtIndex: index = 0, Emoji} utf16 {
string range \uD83D\uDE00G 0 0
} \U1F600
test utf-9.4.0 {Tcl_UtfAtIndex: index > 0, Emoji} ucs2 {
@@ -1069,7 +1078,7 @@ test utf-9.4.0 {Tcl_UtfAtIndex: index > 0, Emoji} ucs2 {
test utf-9.4.1 {Tcl_UtfAtIndex: index > 0, Emoji} ucs4 {
string range \uD83D\uDE00G 1 1
} G
-test utf-9.4.2 {Tcl_UtfAtIndex: index > 0, Emoji} tip389 {
+test utf-9.4.2 {Tcl_UtfAtIndex: index > 0, Emoji} utf16 {
string range \uD83D\uDE00G 1 1
} {}
test utf-9.5.0 {Tcl_UtfAtIndex: index > 0, Emoji} ucs2 {
@@ -1078,7 +1087,7 @@ test utf-9.5.0 {Tcl_UtfAtIndex: index > 0, Emoji} ucs2 {
test utf-9.5.1 {Tcl_UtfAtIndex: index > 0, Emoji} ucs4 {
string range \uD83D\uDE00G 2 2
} {}
-test utf-9.5.2 {Tcl_UtfAtIndex: index > 0, Emoji} tip389 {
+test utf-9.5.2 {Tcl_UtfAtIndex: index > 0, Emoji} utf16 {
string range \uD83D\uDE00G 2 2
} G
test utf-9.6.0 {Tcl_UtfAtIndex: index = 0, Emoji} {Uesc ucs2} {
@@ -1087,7 +1096,7 @@ test utf-9.6.0 {Tcl_UtfAtIndex: index = 0, Emoji} {Uesc ucs2} {
test utf-9.6.1 {Tcl_UtfAtIndex: index = 0, Emoji} {Uesc ucs4} {
string range \U1f600G 0 0
} \U1F600
-test utf-9.6.2 {Tcl_UtfAtIndex: index = 0, Emoji} {Uesc tip389} {
+test utf-9.6.2 {Tcl_UtfAtIndex: index = 0, Emoji} {Uesc utf16} {
string range \U1f600G 0 0
} \U1F600
test utf-9.7.0 {Tcl_UtfAtIndex: index > 0, Emoji} {Uesc ucs2} {
@@ -1096,7 +1105,7 @@ test utf-9.7.0 {Tcl_UtfAtIndex: index > 0, Emoji} {Uesc ucs2} {
test utf-9.7.1 {Tcl_UtfAtIndex: index > 0, Emoji} {Uesc ucs4} {
string range \U1f600G 1 1
} G
-test utf-9.7.2 {Tcl_UtfAtIndex: index > 0, Emoji} {Uesc tip389} {
+test utf-9.7.2 {Tcl_UtfAtIndex: index > 0, Emoji} {Uesc utf16} {
string range \U1f600G 1 1
} {}
test utf-9.8.0 {Tcl_UtfAtIndex: index > 0, Emoji} {Uesc ucs2} {
@@ -1105,7 +1114,7 @@ test utf-9.8.0 {Tcl_UtfAtIndex: index > 0, Emoji} {Uesc ucs2} {
test utf-9.8.1 {Tcl_UtfAtIndex: index > 0, Emoji} {Uesc ucs4} {
string range \U1f600G 2 2
} {}
-test utf-9.8.2 {Tcl_UtfAtIndex: index > 0, Emoji} {Uesc tip389} {
+test utf-9.8.2 {Tcl_UtfAtIndex: index > 0, Emoji} {Uesc utf16} {
string range \U1f600G 2 2
} G
@@ -1333,7 +1342,7 @@ test utf-19.1 {TclUniCharLen} -body {
unset -nocomplain foo
} -result {1 4}
-test utf-20.1 {TclUniCharNcmp} {ucs4} {
+test utf-20.1 {TclUniCharNcmp} ucs4 {
string compare [string range [format %c 0xFFFF] 0 0] [string range [format %c 0x10000] 0 0]
} -1
test utf-20.2 {[4c591fa487] TclUniCharNcmp/TclUtfNcmp} {