diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2020-05-05 07:29:46 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2020-05-05 07:29:46 (GMT) |
commit | 9d533c3d4842c4792ffb95a166933602ccba7895 (patch) | |
tree | 0938379c5afbfc944ba1e3464b7f1e9a11e87307 /tests/utf.test | |
parent | 35051648affc3b6e48d6abe8f629810f28a80112 (diff) | |
download | tcl-9d533c3d4842c4792ffb95a166933602ccba7895.zip tcl-9d533c3d4842c4792ffb95a166933602ccba7895.tar.gz tcl-9d533c3d4842c4792ffb95a166933602ccba7895.tar.bz2 |
Properly protect "Invalid" function against lead bytes 0x80-0xBF. This fixes "knownBug" testcase utf-6.93.1.
Rename tip389 selector to utf16, since that's what it actually is, in contrast to ucs2 and ucs4.
Diffstat (limited to 'tests/utf.test')
-rw-r--r-- | tests/utf.test | 59 |
1 files changed, 34 insertions, 25 deletions
diff --git a/tests/utf.test b/tests/utf.test index 988bede..c281d11 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -17,7 +17,7 @@ namespace path ::tcl::mathop testConstraint ucs2 [expr {[format %c 0x010000] eq "\uFFFD"}] testConstraint fullutf [expr {[format %c 0x010000] ne "\uFFFD"}] -testConstraint tip389 [expr {[string length [format %c 0x10000]] == 2}] +testConstraint utf16 [expr {[string length [format %c 0x10000]] == 2}] testConstraint ucs4 [expr {[testConstraint fullutf] && [string length [format %c 0x10000]] == 1}] @@ -108,7 +108,7 @@ test utf-2.8.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytest test utf-2.8.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring ucs4} { string length [testbytestring \xF0\x90\x80\x80] } 1 -test utf-2.8.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring tip389} { +test utf-2.8.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring utf16} { string length [testbytestring \xF0\x90\x80\x80] } 2 test utf-2.9.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring ucs2} { @@ -117,7 +117,7 @@ test utf-2.9.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytest test utf-2.9.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {Uesc ucs4} { string length \U10FFFF } 1 -test utf-2.9.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} tip389 { +test utf-2.9.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} utf16 { string length \uDBFF\uDFFF } 2 test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} testbytestring { @@ -174,7 +174,7 @@ test utf-4.12.0 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars test test utf-4.12.1 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring ucs4} { testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end } 1 -test utf-4.12.2 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring tip389} { +test utf-4.12.2 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring utf16} { testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end } 2 @@ -490,16 +490,25 @@ test utf-6.91.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext ucs2} { test utf-6.91.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext fullutf} { testutfnext \xF4\x90\x80\x80 } 1 -test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} testutfnext { +test utf-6.92.0 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} {testutfnext ucs2} { + testutfnext \xA0\xA0\xA0 +} 1 +test utf-6.92.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} {testutfnext utf16} { + testutfnext \xA0\xA0\xA0 +} 3 +test utf-6.92.2 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} {testutfnext ucs4} { testutfnext \xA0\xA0\xA0 } 1 test utf-6.93.0 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext ucs2} { testutfnext \x80\x80\x80 } 1 -test utf-6.93.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext fullutf knownBug} { +test utf-6.93.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext utf16} { testutfnext \x80\x80\x80 } 3 -test utf-6.94 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} testutfnext { +test utf-6.93.2 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext ucs4} { + testutfnext \x80\x80\x80 +} 1 +test utf-6.94 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext ucs2} { testutfnext \xA0\xA0\xA0\xA0 } 1 test utf-6.95 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext ucs2} { @@ -610,16 +619,16 @@ test utf-6.121 {Tcl_UtfNext, read limits} {testutfnext ucs2} { test utf-6.122 {Tcl_UtfNext, read limits} {testutfnext ucs2} { testutfnext \xA0\xA0\xA0 2 } 1 -test utf-6.123 {Tcl_UtfNext, read limits} testutfnext { +test utf-6.123 {Tcl_UtfNext, read limits} {testutfnext ucs2} { testutfnext \xA0\xA0\xA0G 3 } 1 -test utf-6.124 {Tcl_UtfNext, read limits} testutfnext { +test utf-6.124 {Tcl_UtfNext, read limits} {testutfnext ucs2} { testutfnext \xA0\xA0\xA0\xA0 3 } 1 -test utf-6.125 {Tcl_UtfNext, read limits} testutfnext { +test utf-6.125 {Tcl_UtfNext, read limits} {testutfnext ucs2} { testutfnext \xA0\xA0\xA0\xA0G 4 } 1 -test utf-6.126 {Tcl_UtfNext, read limits} testutfnext { +test utf-6.126 {Tcl_UtfNext, read limits} {testutfnext ucs2} { testutfnext \xA0\xA0\xA0\xA0\xA0 4 } 1 @@ -987,7 +996,7 @@ test utf-8.5.0 {Tcl_UniCharAtIndex: high surrogate} ucs2 { test utf-8.5.1 {Tcl_UniCharAtIndex: high surrogate} ucs4 { string index \uD842 0 } \uD842 -test utf-8.5.2 {Tcl_UniCharAtIndex: high surrogate} tip389 { +test utf-8.5.2 {Tcl_UniCharAtIndex: high surrogate} utf16 { string index \uD842 0 } \uD842 test utf-8.6 {Tcl_UniCharAtIndex: low surrogate} { @@ -999,7 +1008,7 @@ test utf-8.7.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { test utf-8.7.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { string index \uD83D\uDE00G 0 } \U1F600 -test utf-8.7.2 {Tcl_UniCharAtIndex: Emoji} tip389 { +test utf-8.7.2 {Tcl_UniCharAtIndex: Emoji} utf16 { string index \uD83D\uDE00G 0 } \U1F600 test utf-8.8.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { @@ -1008,7 +1017,7 @@ test utf-8.8.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { test utf-8.8.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { string index \uD83D\uDE00G 1 } G -test utf-8.8.2 {Tcl_UniCharAtIndex: Emoji} tip389 { +test utf-8.8.2 {Tcl_UniCharAtIndex: Emoji} utf16 { string index \uD83D\uDE00G 1 } {} test utf-8.9.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { @@ -1017,7 +1026,7 @@ test utf-8.9.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { test utf-8.9.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { string index \uD83D\uDE00G 2 } {} -test utf-8.9.2 {Tcl_UniCharAtIndex: Emoji} tip389 { +test utf-8.9.2 {Tcl_UniCharAtIndex: Emoji} utf16 { string index \uD83D\uDE00G 2 } G test utf-8.10.0 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs2} { @@ -1026,7 +1035,7 @@ test utf-8.10.0 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs2} { test utf-8.10.1 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs4} { string index \U1F600G 0 } \U1F600 -test utf-8.10.2 {Tcl_UniCharAtIndex: Emoji} {Uesc tip389} { +test utf-8.10.2 {Tcl_UniCharAtIndex: Emoji} {Uesc utf16} { string index \U1F600G 0 } \U1F600 test utf-8.11.0 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs2} { @@ -1035,7 +1044,7 @@ test utf-8.11.0 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs2} { test utf-8.11.1 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs4} { string index \U1F600G 1 } G -test utf-8.11.2 {Tcl_UniCharAtIndex: Emoji} {Uesc tip389} { +test utf-8.11.2 {Tcl_UniCharAtIndex: Emoji} {Uesc utf16} { string index \U1F600G 1 } {} test utf-8.12.0 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs2} { @@ -1044,7 +1053,7 @@ test utf-8.12.0 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs2} { test utf-8.12.1 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs4} { string index \U1F600G 2 } {} -test utf-8.12.2 {Tcl_UniCharAtIndex: Emoji} {Uesc tip389} { +test utf-8.12.2 {Tcl_UniCharAtIndex: Emoji} {Uesc utf16} { string index \U1F600G 2 } G @@ -1060,7 +1069,7 @@ test utf-9.3.0 {Tcl_UtfAtIndex: index = 0, Emoji} ucs2 { test utf-9.3.1 {Tcl_UtfAtIndex: index = 0, Emoji} ucs4 { string range \uD83D\uDE00G 0 0 } \U1F600 -test utf-9.3.2 {Tcl_UtfAtIndex: index = 0, Emoji} tip389 { +test utf-9.3.2 {Tcl_UtfAtIndex: index = 0, Emoji} utf16 { string range \uD83D\uDE00G 0 0 } \U1F600 test utf-9.4.0 {Tcl_UtfAtIndex: index > 0, Emoji} ucs2 { @@ -1069,7 +1078,7 @@ test utf-9.4.0 {Tcl_UtfAtIndex: index > 0, Emoji} ucs2 { test utf-9.4.1 {Tcl_UtfAtIndex: index > 0, Emoji} ucs4 { string range \uD83D\uDE00G 1 1 } G -test utf-9.4.2 {Tcl_UtfAtIndex: index > 0, Emoji} tip389 { +test utf-9.4.2 {Tcl_UtfAtIndex: index > 0, Emoji} utf16 { string range \uD83D\uDE00G 1 1 } {} test utf-9.5.0 {Tcl_UtfAtIndex: index > 0, Emoji} ucs2 { @@ -1078,7 +1087,7 @@ test utf-9.5.0 {Tcl_UtfAtIndex: index > 0, Emoji} ucs2 { test utf-9.5.1 {Tcl_UtfAtIndex: index > 0, Emoji} ucs4 { string range \uD83D\uDE00G 2 2 } {} -test utf-9.5.2 {Tcl_UtfAtIndex: index > 0, Emoji} tip389 { +test utf-9.5.2 {Tcl_UtfAtIndex: index > 0, Emoji} utf16 { string range \uD83D\uDE00G 2 2 } G test utf-9.6.0 {Tcl_UtfAtIndex: index = 0, Emoji} {Uesc ucs2} { @@ -1087,7 +1096,7 @@ test utf-9.6.0 {Tcl_UtfAtIndex: index = 0, Emoji} {Uesc ucs2} { test utf-9.6.1 {Tcl_UtfAtIndex: index = 0, Emoji} {Uesc ucs4} { string range \U1f600G 0 0 } \U1F600 -test utf-9.6.2 {Tcl_UtfAtIndex: index = 0, Emoji} {Uesc tip389} { +test utf-9.6.2 {Tcl_UtfAtIndex: index = 0, Emoji} {Uesc utf16} { string range \U1f600G 0 0 } \U1F600 test utf-9.7.0 {Tcl_UtfAtIndex: index > 0, Emoji} {Uesc ucs2} { @@ -1096,7 +1105,7 @@ test utf-9.7.0 {Tcl_UtfAtIndex: index > 0, Emoji} {Uesc ucs2} { test utf-9.7.1 {Tcl_UtfAtIndex: index > 0, Emoji} {Uesc ucs4} { string range \U1f600G 1 1 } G -test utf-9.7.2 {Tcl_UtfAtIndex: index > 0, Emoji} {Uesc tip389} { +test utf-9.7.2 {Tcl_UtfAtIndex: index > 0, Emoji} {Uesc utf16} { string range \U1f600G 1 1 } {} test utf-9.8.0 {Tcl_UtfAtIndex: index > 0, Emoji} {Uesc ucs2} { @@ -1105,7 +1114,7 @@ test utf-9.8.0 {Tcl_UtfAtIndex: index > 0, Emoji} {Uesc ucs2} { test utf-9.8.1 {Tcl_UtfAtIndex: index > 0, Emoji} {Uesc ucs4} { string range \U1f600G 2 2 } {} -test utf-9.8.2 {Tcl_UtfAtIndex: index > 0, Emoji} {Uesc tip389} { +test utf-9.8.2 {Tcl_UtfAtIndex: index > 0, Emoji} {Uesc utf16} { string range \U1f600G 2 2 } G @@ -1333,7 +1342,7 @@ test utf-19.1 {TclUniCharLen} -body { unset -nocomplain foo } -result {1 4} -test utf-20.1 {TclUniCharNcmp} {ucs4} { +test utf-20.1 {TclUniCharNcmp} ucs4 { string compare [string range [format %c 0xFFFF] 0 0] [string range [format %c 0x10000] 0 0] } -1 test utf-20.2 {[4c591fa487] TclUniCharNcmp/TclUtfNcmp} { |