From b48319b304980c06ca5dd093770f8234eb8dbec5 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 23 Apr 2020 12:34:38 +0000 Subject: Testcase cleanup --- tests/utf.test | 99 +++++++++++++++++++++++++++++----------------------------- 1 file changed, 50 insertions(+), 49 deletions(-) diff --git a/tests/utf.test b/tests/utf.test index a64ce7a..79f866a 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -16,8 +16,9 @@ if {[lsearch [namespace children] ::tcltest] == -1} { ::tcltest::loadTestedCommands catch [list package require -exact Tcltest [info patchlevel]] +testConstraint tip389 [expr {[string length \U010000] eq 2}] + testConstraint testbytestring [llength [info commands testbytestring]] -testConstraint tip389 [expr {[string length \U010000] == 2}] testConstraint testfindfirst [llength [info commands testfindfirst]] testConstraint testfindlast [llength [info commands testfindlast]] testConstraint testnumutfchars [llength [info commands testnumutfchars]] @@ -45,9 +46,9 @@ test utf-1.5 {Tcl_UniCharToUtf: overflowed Tcl_UniChar} testbytestring { test utf-1.6 {Tcl_UniCharToUtf: negative Tcl_UniChar} testbytestring { expr {[format %c -1] eq [testbytestring "\xEF\xBF\xBD"]} } 1 -test utf-1.7 {Tcl_UniCharToUtf: 4 byte sequences} -constraints testbytestring -body { +test utf-1.7 {Tcl_UniCharToUtf: 4 byte sequences} testbytestring { expr {"\U014E4E" eq [testbytestring "\xF0\x94\xB9\x8E"]} -} -result 1 +} 1 test utf-1.8 {Tcl_UniCharToUtf: 3 byte sequence, high surrogate} testbytestring { expr {"\uD842" eq [testbytestring "\xED\xA1\x82"]} } 1 @@ -69,89 +70,89 @@ test utf-1.13 {Tcl_UniCharToUtf: Invalid surrogate} testbytestring { test utf-2.1 {Tcl_UtfToUniChar: low ascii} { string length "abc" -} {3} +} 3 test utf-2.2 {Tcl_UtfToUniChar: naked trail bytes} testbytestring { string length [testbytestring "\x82\x83\x84"] -} {3} +} 3 test utf-2.3 {Tcl_UtfToUniChar: lead (2-byte) followed by non-trail} testbytestring { string length [testbytestring "\xC2"] -} {1} +} 1 test utf-2.4 {Tcl_UtfToUniChar: lead (2-byte) followed by trail} testbytestring { string length [testbytestring "\xC2\xA2"] -} {1} +} 1 test utf-2.5 {Tcl_UtfToUniChar: lead (3-byte) followed by non-trail} testbytestring { string length [testbytestring "\xE2"] -} {1} +} 1 test utf-2.6 {Tcl_UtfToUniChar: lead (3-byte) followed by 1 trail} testbytestring { string length [testbytestring "\xE2\xA2"] -} {2} +} 2 test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} testbytestring { string length [testbytestring "\xE4\xB9\x8E"] -} {1} -test utf-2.8 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {tip389 testbytestring} -body { +} 1 +test utf-2.8 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {tip389 testbytestring} { string length [testbytestring "\xF0\x90\x80\x80"] -} -result {2} -test utf-2.9 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {tip389 testbytestring} -body { +} 2 +test utf-2.9 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {tip389 testbytestring} { string length [testbytestring "\xF4\x8F\xBF\xBF"] -} -result {2} +} 2 test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} testbytestring { string length [testbytestring "\xF0\x8F\xBF\xBF"] -} {4} +} 4 test utf-2.11 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, overflow} testbytestring { # Would decode to U+110000 but that is outside the Unicode range. string length [testbytestring "\xF4\x90\x80\x80"] -} {4} +} 4 test utf-2.12 {Tcl_UtfToUniChar: longer UTF sequences not supported} testbytestring { string length [testbytestring "\xF8\xA2\xA2\xA2\xA2"] -} {5} +} 5 test utf-3.1 {Tcl_UtfCharComplete} { } {} test utf-4.1 {Tcl_NumUtfChars: zero length} testnumutfchars { testnumutfchars "" -} {0} +} 0 test utf-4.2 {Tcl_NumUtfChars: length 1} {testnumutfchars testbytestring} { testnumutfchars [testbytestring "\xC2\xA2"] -} {1} +} 1 test utf-4.3 {Tcl_NumUtfChars: long string} {testnumutfchars testbytestring} { testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\xA2\x4E"] -} {7} +} 7 test utf-4.4 {Tcl_NumUtfChars: #u0000} {testnumutfchars testbytestring} { testnumutfchars [testbytestring "\xC0\x80"] -} {1} +} 1 test utf-4.5 {Tcl_NumUtfChars: zero length, calc len} testnumutfchars { testnumutfchars "" 0 -} {0} +} 0 test utf-4.6 {Tcl_NumUtfChars: length 1, calc len} {testnumutfchars testbytestring} { testnumutfchars [testbytestring "\xC2\xA2"] end -} {1} +} 1 test utf-4.7 {Tcl_NumUtfChars: long string, calc len} {testnumutfchars testbytestring} { testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\uA2\x4E"] end -} {7} +} 7 test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} {testnumutfchars testbytestring} { testnumutfchars [testbytestring "\xC0\x80"] end -} {1} +} 1 # Bug [2738427]: Tcl_NumUtfChars(...) no overflow check test utf-4.9 {Tcl_NumUtfChars: #u20AC, calc len, incomplete} {testnumutfchars testbytestring} { testnumutfchars [testbytestring "\xE2\x82\xAC"] end-1 -} {2} +} 2 test utf-4.10 {Tcl_NumUtfChars: #u0000, calc len, overcomplete} {testnumutfchars testbytestring} { testnumutfchars [testbytestring "\x00"] end+1 -} {2} +} 2 test utf-4.11 {Tcl_NumUtfChars: 3 bytes of 4-byte UTF-8 characater} {testnumutfchars testbytestring} { testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end-1 -} {3} +} 3 test utf-4.12 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring tip389} { testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end -} {2} +} 2 test utf-5.1 {Tcl_UtfFindFirst} {testfindfirst testbytestring} { testfindfirst [testbytestring "abcbc"] 98 -} {bcbc} +} bcbc test utf-5.2 {Tcl_UtfFindLast} {testfindlast testbytestring} { testfindlast [testbytestring "abcbc"] 98 -} {bc} +} bc test utf-6.1 {Tcl_UtfNext} testutfnext { # This takes the pointer one past the terminating NUL. @@ -368,7 +369,7 @@ test utf-6.70 {Tcl_UtfNext} testutfnext { test utf-6.71 {Tcl_UtfNext} testutfnext { testutfnext -bytestring \xF2\xA0\xA0\xE8 } 1 -test utf-6.71 {Tcl_UtfNext} testutfnext { +test utf-6.72 {Tcl_UtfNext} testutfnext { testutfnext -bytestring \xF2\xA0\xA0\xF2 } 1 test utf-6.73 {Tcl_UtfNext} testutfnext { @@ -708,13 +709,13 @@ test utf-7.49.3 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { test utf-8.1 {Tcl_UniCharAtIndex: index = 0} { string index abcd 0 -} {a} +} a test utf-8.2 {Tcl_UniCharAtIndex: index = 0} { string index \u4E4E\u25A 0 } "\u4E4E" test utf-8.3 {Tcl_UniCharAtIndex: index > 0} { string index abcd 2 -} {c} +} c test utf-8.4 {Tcl_UniCharAtIndex: index > 0} { string index \u4E4E\u25A\xFF\u543 2 } "\uFF" @@ -733,7 +734,7 @@ test utf-8.8 {Tcl_UniCharAtIndex: Emoji} { test utf-9.1 {Tcl_UtfAtIndex: index = 0} { string range abcd 0 2 -} {abc} +} abc test utf-9.2 {Tcl_UtfAtIndex: index > 0} { string range \u4E4E\u25A\xFF\u543klmnop 1 5 } "\u25A\xFF\u543kl" @@ -954,7 +955,7 @@ test utf-20.1 {TclUniCharNcmp} { test utf-21.1 {TclUniCharIsAlnum} { # this returns 1 with Unicode 7 compliance string is alnum \u1040\u021F\u0220 -} {1} +} 1 test utf-21.2 {unicode alnum char in regc_locale.c} { # this returns 1 with Unicode 7 compliance list [regexp {^[[:alnum:]]+$} \u1040\u021F\u0220] [regexp {^\w+$} \u1040\u021F\u0220_\u203F\u2040\u2054\uFE33\uFE34\uFE4D\uFE4E\uFE4F\uFF3F] @@ -966,39 +967,39 @@ test utf-21.3 {unicode print char in regc_locale.c} { test utf-21.4 {TclUniCharIsGraph} { # [Bug 3464428] string is graph \u0120 -} {1} +} 1 test utf-21.5 {unicode graph char in regc_locale.c} { # [Bug 3464428] regexp {^[[:graph:]]+$} \u0120 -} {1} +} 1 test utf-21.6 {TclUniCharIsGraph} { # [Bug 3464428] string is graph \xA0 -} {0} +} 0 test utf-21.7 {unicode graph char in regc_locale.c} { # [Bug 3464428] regexp {[[:graph:]]} \x20\xA0\u2028\u2029 -} {0} +} 0 test utf-21.8 {TclUniCharIsPrint} { # [Bug 3464428] string is print \x09 -} {0} +} 0 test utf-21.9 {unicode print char in regc_locale.c} { # [Bug 3464428] regexp {[[:print:]]} \x09 -} {0} +} 0 test utf-21.10 {unicode print char in regc_locale.c} { # [Bug 3464428] regexp {[[:print:]]} \x09 -} {0} +} 0 test utf-21.11 {TclUniCharIsControl} { # [Bug 3464428] string is control \x00\x1F\xAD\u0605\u061C\u180E\u2066\uFEFF -} {1} +} 1 test utf-21.12 {unicode control char in regc_locale.c} { # [Bug 3464428], [Bug a876646efe] regexp {^[[:cntrl:]]*$} \x00\x1F\xAD\u0605\u061C\u180E\u2066\uFEFF -} {1} +} 1 test utf-22.1 {TclUniCharIsWordChar} { string wordend "xyz123_bar fg" 0 @@ -1010,16 +1011,16 @@ test utf-22.2 {TclUniCharIsWordChar} { test utf-23.1 {TclUniCharIsAlpha} { # this returns 1 with Unicode 7 compliance string is alpha \u021F\u0220\u037F\u052F -} {1} +} 1 test utf-23.2 {unicode alpha char in regc_locale.c} { # this returns 1 with Unicode 7 compliance regexp {^[[:alpha:]]+$} \u021F\u0220\u037F\u052F -} {1} +} 1 test utf-24.1 {TclUniCharIsDigit} { # this returns 1 with Unicode 7 compliance string is digit \u1040\uABF0 -} {1} +} 1 test utf-24.2 {unicode digit char in regc_locale.c} { # this returns 1 with Unicode 7 compliance list [regexp {^[[:digit:]]+$} \u1040\uABF0] [regexp {^\d+$} \u1040\uABF0] @@ -1028,7 +1029,7 @@ test utf-24.2 {unicode digit char in regc_locale.c} { test utf-24.3 {TclUniCharIsSpace} { # this returns 1 with Unicode 7/TIP 413 compliance string is space \x85\u1680\u180E\u200B\u202F\u2060 -} {1} +} 1 test utf-24.4 {unicode space char in regc_locale.c} { # this returns 1 with Unicode 7/TIP 413 compliance list [regexp {^[[:space:]]+$} \x85\u1680\u180E\u200B\u202F\u2060] [regexp {^\s+$} \x85\u1680\u180E\u200B\u202F\u2060] -- cgit v0.12