diff options
| author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2020-04-21 07:03:46 (GMT) |
|---|---|---|
| committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2020-04-21 07:03:46 (GMT) |
| commit | 108ee434235c4aed8698d7624c7b7c8bce4dae55 (patch) | |
| tree | 00492a3ffda48290fa65066fa0431b1b2090454a | |
| parent | 4f2621d9d59b2df9183fc4a90bb530dbccd18fc2 (diff) | |
| download | tcl-108ee434235c4aed8698d7624c7b7c8bce4dae55.zip tcl-108ee434235c4aed8698d7624c7b7c8bce4dae55.tar.gz tcl-108ee434235c4aed8698d7624c7b7c8bce4dae55.tar.bz2 | |
Add more test-cases for TCL_UTF_MAX>3
| -rw-r--r-- | generic/tclUtf.c | 2 | ||||
| -rw-r--r-- | tests/utf.test | 341 |
2 files changed, 210 insertions, 133 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 842744d..35a98a1 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -74,7 +74,7 @@ static const unsigned char totalBytes[256] = { 2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, #if TCL_UTF_MAX > 3 - 4,4,4,4,4, + 4,4,4,4,4, #else 1,1,1,1,1, #endif diff --git a/tests/utf.test b/tests/utf.test index 3301dde..35772ae 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -16,15 +16,20 @@ if {[lsearch [namespace children] ::tcltest] == -1} { ::tcltest::loadTestedCommands catch [list package require -exact Tcltest [info patchlevel]] -testConstraint testbytestring [llength [info commands testbytestring]] - -catch {unset x} - -# Some tests require support for 4-byte UTF-8 sequences testConstraint smallutf [expr {[format %c 0x010000] == "\uFFFD"}] testConstraint fullutf [expr {[format %c 0x010000] != "\uFFFD"}] testConstraint tip389 [expr {[string length \U010000] == 2}] +testConstraint testbytestring [llength [info commands testbytestring]] +testConstraint testfindfirst [llength [info commands testfindfirst]] +testConstraint testfindlast [llength [info commands testfindlast]] +testConstraint testnumutfchars [llength [info commands testnumutfchars]] +testConstraint teststringobj [llength [info commands teststringobj]] +testConstraint testutfnext [llength [info commands testutfnext]] +testConstraint testutfprev [llength [info commands testutfprev]] + +catch {unset x} + test utf-1.1 {Tcl_UniCharToUtf: 1 byte sequences} testbytestring { expr {"\x01" eq [testbytestring "\x01"]} } 1 @@ -96,6 +101,7 @@ test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} t string length [testbytestring "\xF0\x8F\xBF\xBF"] } {4} test utf-2.11 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, overflow} {testbytestring knownBug} {# Doesn't work with any TCL_UTF_MAX value + # Would decode to U+110000 but that is outside the Unicode range. string length [testbytestring "\xF4\x90\x80\x80"] } {4} test utf-2.12 {Tcl_UtfToUniChar: longer UTF sequences not supported} testbytestring { @@ -105,10 +111,6 @@ test utf-2.12 {Tcl_UtfToUniChar: longer UTF sequences not supported} testbytestr test utf-3.1 {Tcl_UtfCharComplete} { } {} -testConstraint testnumutfchars [llength [info commands testnumutfchars]] -testConstraint testfindfirst [llength [info commands testfindfirst]] -testConstraint testfindlast [llength [info commands testfindlast]] - test utf-4.1 {Tcl_NumUtfChars: zero length} testnumutfchars { testnumutfchars "" } {0} @@ -116,7 +118,7 @@ test utf-4.2 {Tcl_NumUtfChars: length 1} {testnumutfchars testbytestring} { testnumutfchars [testbytestring "\xC2\xA2"] } {1} test utf-4.3 {Tcl_NumUtfChars: long string} {testnumutfchars testbytestring} { - testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\uA2\x4E"] + testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\xA2\x4E"] } {7} test utf-4.4 {Tcl_NumUtfChars: #u0000} {testnumutfchars testbytestring} { testnumutfchars [testbytestring "\xC0\x80"] @@ -125,13 +127,13 @@ test utf-4.5 {Tcl_NumUtfChars: zero length, calc len} testnumutfchars { testnumutfchars "" 0 } {0} test utf-4.6 {Tcl_NumUtfChars: length 1, calc len} {testnumutfchars testbytestring} { - testnumutfchars [testbytestring "\xC2\xA2"] 2 + testnumutfchars [testbytestring "\xC2\xA2"] 1 } {1} test utf-4.7 {Tcl_NumUtfChars: long string, calc len} {testnumutfchars testbytestring} { - testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\uA2\x4E"] 10 + testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\xA2\x4E"] 10 } {7} test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} {testnumutfchars testbytestring} { - testnumutfchars [testbytestring "\xC0\x80"] 2 + testnumutfchars [testbytestring "\xC0\x80"] 1 } {1} # Bug [2738427]: Tcl_NumUtfChars(...) no overflow check test utf-4.9 {Tcl_NumUtfChars: #u20AC, calc len, incomplete} {testnumutfchars testbytestring} { @@ -141,10 +143,13 @@ test utf-4.10 {Tcl_NumUtfChars: #u0000, calc len, overcomplete} {testnumutfchars testnumutfchars [testbytestring "\x00"] 2 } {2} test utf-4.11 {Tcl_NumUtfChars: 3 bytes of 4-byte UTF-8 characater} {testnumutfchars testbytestring} { - testnumutfchars [testbytestring \xf0\x9f\x92\xa9] 3 + testnumutfchars [testbytestring \xF0\x9F\x92\xA9] 3 } {3} -test utf-4.12 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring tip389} { - testnumutfchars [testbytestring \xf0\x9f\x92\xa9] 4 +test utf-4.12.0 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring smallutf} { + testnumutfchars [testbytestring \xF0\x9F\x92\xA9] 4 +} {4} +test utf-4.12.1 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring tip389} { + testnumutfchars [testbytestring \xF0\x9F\x92\xA9] 4 } {2} test utf-5.1 {Tcl_UtfFindFirst} {testfindfirst testbytestring} { @@ -154,8 +159,6 @@ test utf-5.2 {Tcl_UtfFindLast} {testfindlast testbytestring} { testfindlast [testbytestring "abcbc"] 98 } {bc} -testConstraint testutfnext [llength [info commands testutfnext]] - test utf-6.1 {Tcl_UtfNext} testutfnext { # This takes the pointer one past the terminating NUL. # This is really an invalid call. @@ -177,7 +180,7 @@ test utf-6.6 {Tcl_UtfNext} testutfnext { testutfnext A\xE8 } 1 test utf-6.7 {Tcl_UtfNext} testutfnext { - testutfnext A\xF4 + testutfnext A\xF2 } 1 test utf-6.8 {Tcl_UtfNext} testutfnext { testutfnext A\xF8 @@ -198,7 +201,7 @@ test utf-6.13 {Tcl_UtfNext} testutfnext { testutfnext \xA0\xE8 } 1 test utf-6.14 {Tcl_UtfNext} testutfnext { - testutfnext \xA0\xF4 + testutfnext \xA0\xF2 } 1 test utf-6.15 {Tcl_UtfNext} testutfnext { testutfnext \xA0\xF8 @@ -207,7 +210,7 @@ test utf-6.16 {Tcl_UtfNext} testutfnext { testutfnext \xD0 } 1 test utf-6.17 {Tcl_UtfNext} testutfnext { - testutfnext \xD0A + testutfnext \xD0G } 1 test utf-6.18 {Tcl_UtfNext} testutfnext { testutfnext \xD0\xA0 @@ -219,7 +222,7 @@ test utf-6.20 {Tcl_UtfNext} testutfnext { testutfnext \xD0\xE8 } 1 test utf-6.21 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xF4 + testutfnext \xD0\xF2 } 1 test utf-6.22 {Tcl_UtfNext} testutfnext { testutfnext \xD0\xF8 @@ -228,7 +231,7 @@ test utf-6.23 {Tcl_UtfNext} testutfnext { testutfnext \xE8 } 1 test utf-6.24 {Tcl_UtfNext} testutfnext { - testutfnext \xE8A + testutfnext \xE8G } 1 test utf-6.25 {Tcl_UtfNext} testutfnext { testutfnext \xE8\xA0 @@ -240,37 +243,37 @@ test utf-6.27 {Tcl_UtfNext} testutfnext { testutfnext \xE8\xE8 } 1 test utf-6.28 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xF4 + testutfnext \xE8\xF2 } 1 test utf-6.29 {Tcl_UtfNext} testutfnext { testutfnext \xE8\xF8 } 1 test utf-6.30 {Tcl_UtfNext} testutfnext { - testutfnext \xF4 + testutfnext \xF2 } 1 test utf-6.31 {Tcl_UtfNext} testutfnext { - testutfnext \xF4A + testutfnext \xF2G } 1 test utf-6.32 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0 + testutfnext \xF2\xA0 } 1 test utf-6.33 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xD0 + testutfnext \xF2\xD0 } 1 test utf-6.34 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xE8 + testutfnext \xF2\xE8 } 1 test utf-6.35 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xF4 + testutfnext \xF2\xF2 } 1 test utf-6.36 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xF8 + testutfnext \xF2\xF8 } 1 test utf-6.37 {Tcl_UtfNext} testutfnext { testutfnext \xF8 } 1 test utf-6.38 {Tcl_UtfNext} testutfnext { - testutfnext \xF8A + testutfnext \xF8G } 1 test utf-6.39 {Tcl_UtfNext} testutfnext { testutfnext \xF8\xA0 @@ -282,7 +285,7 @@ test utf-6.41 {Tcl_UtfNext} testutfnext { testutfnext \xF8\xE8 } 1 test utf-6.42 {Tcl_UtfNext} testutfnext { - testutfnext \xF8\xF4 + testutfnext \xF8\xF2 } 1 test utf-6.43 {Tcl_UtfNext} testutfnext { testutfnext \xF8\xF8 @@ -300,7 +303,7 @@ test utf-6.47 {Tcl_UtfNext} testutfnext { testutfnext \xD0\xA0\xE8 } 2 test utf-6.48 {Tcl_UtfNext} testutfnext { - testutfnext \xD0\xA0\xF4 + testutfnext \xD0\xA0\xF2 } 2 test utf-6.49 {Tcl_UtfNext} testutfnext { testutfnext \xD0\xA0\xF8 @@ -318,28 +321,28 @@ test utf-6.53 {Tcl_UtfNext} testutfnext { testutfnext \xE8\xA0\xE8 } 1 test utf-6.54 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xF4 + testutfnext \xE8\xA0\xF2 } 1 test utf-6.55 {Tcl_UtfNext} testutfnext { testutfnext \xE8\xA0\xF8 } 1 test utf-6.56 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0G + testutfnext \xF2\xA0G } 1 test utf-6.57 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0 + testutfnext \xF2\xA0\xA0 } 1 test utf-6.58 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xD0 + testutfnext \xF2\xA0\xD0 } 1 test utf-6.59 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xE8 + testutfnext \xF2\xA0\xE8 } 1 test utf-6.60 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xF4 + testutfnext \xF2\xA0\xF2 } 1 test utf-6.61 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xF8 + testutfnext \xF2\xA0\xF8 } 1 test utf-6.62 {Tcl_UtfNext} testutfnext { testutfnext \xE8\xA0\xA0G @@ -354,67 +357,67 @@ test utf-6.65 {Tcl_UtfNext} testutfnext { testutfnext \xE8\xA0\xA0\xE8 } 3 test utf-6.66 {Tcl_UtfNext} testutfnext { - testutfnext \xE8\xA0\xA0\xF4 + testutfnext \xE8\xA0\xA0\xF2 } 3 test utf-6.67 {Tcl_UtfNext} testutfnext { testutfnext \xE8\xA0\xA0\xF8 } 3 test utf-6.68 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0G + testutfnext \xF2\xA0\xA0G } 1 -test utf-6.69 {Tcl_UtfNext} {testutfnext smallutf} { - testutfnext \xF4\xA0\xA0\xA0 +test utf-6.69.0 {Tcl_UtfNext} {testutfnext smallutf} { + testutfnext \xF2\xA0\xA0\xA0 } 1 test utf-6.69.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext \xF4\xA0\xA0\xA0 + testutfnext \xF2\xA0\xA0\xA0 } 4 test utf-6.70 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0\xD0 + testutfnext \xF2\xA0\xA0\xD0 } 1 test utf-6.71 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0\xE8 + testutfnext \xF2\xA0\xA0\xE8 } 1 test utf-6.71 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0\xF4 + testutfnext \xF2\xA0\xA0\xF2 } 1 test utf-6.73 {Tcl_UtfNext} testutfnext { - testutfnext \xF4\xA0\xA0\xF8 + testutfnext \xF2\xA0\xA0\xF8 } 1 -test utf-6.74 {Tcl_UtfNext} {testutfnext smallutf} { - testutfnext \xF4\xA0\xA0\xA0G +test utf-6.74.0 {Tcl_UtfNext} {testutfnext smallutf} { + testutfnext \xF2\xA0\xA0\xA0G } 1 test utf-6.74.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext \xF4\xA0\xA0\xA0G + testutfnext \xF2\xA0\xA0\xA0G } 4 -test utf-6.75 {Tcl_UtfNext} {testutfnext smallutf} { - testutfnext \xF4\xA0\xA0\xA0\xA0 +test utf-6.75.0 {Tcl_UtfNext} {testutfnext smallutf} { + testutfnext \xF2\xA0\xA0\xA0\xA0 } 1 test utf-6.75.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext \xF4\xA0\xA0\xA0\xA0 + testutfnext \xF2\xA0\xA0\xA0\xA0 } 4 -test utf-6.76 {Tcl_UtfNext} {testutfnext smallutf} { - testutfnext \xF4\xA0\xA0\xA0\xD0 +test utf-6.76.0 {Tcl_UtfNext} {testutfnext smallutf} { + testutfnext \xF2\xA0\xA0\xA0\xD0 } 1 test utf-6.76.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext \xF4\xA0\xA0\xA0\xD0 + testutfnext \xF2\xA0\xA0\xA0\xD0 } 4 -test utf-6.77 {Tcl_UtfNext} {testutfnext smallutf} { - testutfnext \xF4\xA0\xA0\xA0\xE8 +test utf-6.77.0 {Tcl_UtfNext} {testutfnext smallutf} { + testutfnext \xF2\xA0\xA0\xA0\xE8 } 1 test utf-6.77.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext \xF4\xA0\xA0\xA0\xE8 + testutfnext \xF2\xA0\xA0\xA0\xE8 } 4 -test utf-6.78 {Tcl_UtfNext} {testutfnext smallutf} { - testutfnext \xF4\xA0\xA0\xA0\xF4 +test utf-6.78.0 {Tcl_UtfNext} {testutfnext smallutf} { + testutfnext \xF2\xA0\xA0\xA0\xF2 } 1 test utf-6.78.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext \xF4\xA0\xA0\xA0\xF4 + testutfnext \xF2\xA0\xA0\xA0\xF2 } 4 -test utf-6.79 {Tcl_UtfNext} {testutfnext smallutf} { - testutfnext \xF4\xA0\xA0\xA0G\xF8 +test utf-6.79.0 {Tcl_UtfNext} {testutfnext smallutf} { + testutfnext \xF2\xA0\xA0\xA0G\xF8 } 1 test utf-6.79.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext \xF4\xA0\xA0\xA0G\xF8 + testutfnext \xF2\xA0\xA0\xA0G\xF8 } 4 test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext { testutfnext \xC0\x80 @@ -437,10 +440,10 @@ test utf-6.85 {Tcl_UtfNext - overlong sequences} testutfnext { test utf-6.86 {Tcl_UtfNext - overlong sequences} testutfnext { testutfnext \xF0\x80\x80\x80 } 1 -test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext smallutf} { +test utf-6.87.0 {Tcl_UtfNext - overlong sequences} {testutfnext smallutf} { testutfnext \xF0\x90\x80\x80 } 1 -test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext fullutf} { +test utf-6.87.1 {Tcl_UtfNext - overlong sequences} {testutfnext fullutf} { testutfnext \xF0\x90\x80\x80 } 4 test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} { @@ -455,8 +458,18 @@ test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {te test utf-6.89.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} { testutfnext \xF0\x80\x80 1 } 3 - -testConstraint testutfprev [llength [info commands testutfprev]] +test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext smallutf} { + testutfnext \xF4\x8F\xBF\xBF +} 1 +test utf-6.90.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext fullutf} { + testutfnext \xF4\x8F\xBF\xBF +} 4 +test utf-6.91.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext smallutf} { + testutfnext \xF4\x90\x80\x80 +} 1 +test utf-6.91.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext fullutf} { + testutfnext \xF4\x90\x80\x80 +} 4 test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} testutfnext { testutfnext \xA0\xA0\xA0 } 3 @@ -489,13 +502,13 @@ test utf-7.4.2 {Tcl_UtfPrev} testutfprev { testutfprev A\xF8\xF8\xA0\xA0 2 } 1 test utf-7.5 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF4 + testutfprev A\xF2 } 1 test utf-7.5.1 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF4\xA0\xA0\xA0 2 + testutfprev A\xF2\xA0\xA0\xA0 2 } 1 test utf-7.5.2 {Tcl_UtfPrev} testutfprev { - testutfprev A\xF4\xF8\xA0\xA0 2 + testutfprev A\xF2\xF8\xA0\xA0 2 } 1 test utf-7.6 {Tcl_UtfPrev} testutfprev { testutfprev A\xE8 @@ -533,23 +546,23 @@ test utf-7.9.1 {Tcl_UtfPrev} testutfprev { test utf-7.9.2 {Tcl_UtfPrev} testutfprev { testutfprev A\xF8\xA0\xF8\xA0 3 } 2 -test utf-7.10 {Tcl_UtfPrev} {testutfprev smallutf} { - testutfprev A\xF4\xA0 +test utf-7.10.0 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xF2\xA0 } 2 -test utf-7.10.1 {Tcl_UtfPrev} {testutfprev smallutf} { - testutfprev A\xF4\xA0\xA0\xA0 3 -} 2 -test utf-7.10.2 {Tcl_UtfPrev} {testutfprev smallutf} { - testutfprev A\xF4\xA0\xF8\xA0 3 -} 2 -test utf-7.10 {Tcl_UtfPrev} {testutfprev fullutf} { - testutfprev A\xF4\xA0 -} 1 test utf-7.10.1 {Tcl_UtfPrev} {testutfprev fullutf} { - testutfprev A\xF4\xA0\xA0\xA0 3 + testutfprev A\xF2\xA0 } 1 -test utf-7.10.2 {Tcl_UtfPrev} {testutfprev fullutf} { - testutfprev A\xF4\xA0\xF8\xA0 3 +test utf-7.10.1.0 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xF2\xA0\xA0\xA0 3 +} 2 +test utf-7.10.1.1 {Tcl_UtfPrev} {testutfprev fullutf} { + testutfprev A\xF2\xA0\xA0\xA0 3 +} 1 +test utf-7.10.2.0 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xF2\xA0\xF8\xA0 3 +} 2 +test utf-7.10.2.1 {Tcl_UtfPrev} {testutfprev fullutf} { + testutfprev A\xF2\xA0\xF8\xA0 3 } 1 test utf-7.11 {Tcl_UtfPrev} testutfprev { testutfprev A\xE8\xA0 @@ -590,23 +603,23 @@ test utf-7.14.1 {Tcl_UtfPrev} testutfprev { test utf-7.14.2 {Tcl_UtfPrev} testutfprev { testutfprev A\xF8\xA0\xA0\xF8 4 } 3 -test utf-7.15 {Tcl_UtfPrev} {testutfprev smallutf} { - testutfprev A\xF4\xA0\xA0 -} 3 -test utf-7.15.1 {Tcl_UtfPrev} {testutfprev smallutf} { - testutfprev A\xF4\xA0\xA0\xA0 4 +test utf-7.15.0 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xF2\xA0\xA0 } 3 -test utf-7.15.2 {Tcl_UtfPrev} {testutfprev smallutf} { - testutfprev A\xF4\xA0\xA0\xF8 4 -} 3 -test utf-7.15.3 {Tcl_UtfPrev} {testutfprev fullutf} { - testutfprev A\xF4\xA0\xA0 +test utf-7.15.1 {Tcl_UtfPrev} {testutfprev fullutf} { + testutfprev A\xF2\xA0\xA0 } 1 -test utf-7.15.4 {Tcl_UtfPrev} {testutfprev fullutf} { - testutfprev A\xF4\xA0\xA0\xA0 4 +test utf-7.15.1.0 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xF2\xA0\xA0\xA0 4 +} 3 +test utf-7.15.1.1 {Tcl_UtfPrev} {testutfprev fullutf} { + testutfprev A\xF2\xA0\xA0\xA0 4 } 1 -test utf-7.15.5 {Tcl_UtfPrev} {testutfprev fullutf} { - testutfprev A\xF4\xA0\xA0\xF8 4 +test utf-7.15.2.0 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xF2\xA0\xA0\xF8 4 +} 3 +test utf-7.15.2.1 {Tcl_UtfPrev} {testutfprev fullutf} { + testutfprev A\xF2\xA0\xA0\xF8 4 } 1 test utf-7.16 {Tcl_UtfPrev} testutfprev { testutfprev A\xE8\xA0\xA0 @@ -728,19 +741,19 @@ test utf-7.37 {Tcl_UtfPrev -- overlong sequence} testutfprev { test utf-7.38 {Tcl_UtfPrev -- overlong sequence} testutfprev { testutfprev A\xE0\xA0\x80 2 } 1 -test utf-7.39 {Tcl_UtfPrev -- overlong sequence} {testutfprev smallutf} { +test utf-7.39.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev smallutf} { testutfprev A\xF0\x90\x80\x80 } 2 test utf-7.39.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev fullutf} { testutfprev A\xF0\x90\x80\x80 } 1 -test utf-7.40 {Tcl_UtfPrev -- overlong sequence} {testutfprev smallutf} { +test utf-7.40.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev smallutf} { testutfprev A\xF0\x90\x80\x80 4 } 3 test utf-7.40.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev fullutf} { testutfprev A\xF0\x90\x80\x80 4 } 1 -test utf-7.41 {Tcl_UtfPrev -- overlong sequence} {testutfprev smallutf} { +test utf-7.41.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev smallutf} { testutfprev A\xF0\x90\x80\x80 3 } 2 test utf-7.41.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev fullutf} { @@ -773,6 +786,48 @@ test utf-7.47.1 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {te test utf-7.47.2 {Tcl_UtfPrev, pointing to 3th byte of 3-byte invalid sequence} {testutfprev} { testutfprev \xE8\xA0\x00 2 } 0 +test utf-7.48.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev smallutf} { + testutfprev A\xF4\x8F\xBF\xBF +} 2 +test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} { + testutfprev A\xF4\x8F\xBF\xBF +} 1 +test utf-7.48.1.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev smallutf} { + testutfprev A\xF4\x8F\xBF\xBF 4 +} 3 +test utf-7.48.1.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} { + testutfprev A\xF4\x8F\xBF\xBF 4 +} 1 +test utf-7.48.2.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev smallutf} { + testutfprev A\xF4\x8F\xBF\xBF 3 +} 2 +test utf-7.48.2.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} { + testutfprev A\xF4\x8F\xBF\xBF 3 +} 1 +test utf-7.48.3 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { + testutfprev A\xF4\x8F\xBF\xBF 2 +} 1 +test utf-7.49.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev smallutf} { + testutfprev A\xF4\x90\x80\x80 +} 2 +test utf-7.49.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} { + testutfprev A\xF4\x90\x80\x80 +} 1 +test utf-7.49.2 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev smallutf} { + testutfprev A\xF4\x90\x80\x80 4 +} 3 +test utf-7.49.3 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} { + testutfprev A\xF4\x90\x80\x80 4 +} 1 +test utf-7.49.4 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev smallutf} { + testutfprev A\xF4\x90\x80\x80 3 +} 2 +test utf-7.49.5 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} { + testutfprev A\xF4\x90\x80\x80 3 +} 1 +test utf-7.49.6 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { + testutfprev A\xF4\x90\x80\x80 2 +} 1 test utf-8.1 {Tcl_UniCharAtIndex: index = 0} { string index abcd 0 @@ -786,6 +841,18 @@ test utf-8.3 {Tcl_UniCharAtIndex: index > 0} { test utf-8.4 {Tcl_UniCharAtIndex: index > 0} { string index \u4E4E\u25A\xFF\u543 2 } "\uFF" +test utf-8.5 {Tcl_UniCharAtIndex: high surrogate} smallutf { + string index \uD842 0 +} "\uD842" +test utf-8.6 {Tcl_UniCharAtIndex: low surrogate} { + string index \uDC42 0 +} "\uDC42" +test utf-8.7 {Tcl_UniCharAtIndex: Emoji} smallutf { + string index \uD83D\uDE00 0 +} "\uD83D" +test utf-8.8 {Tcl_UniCharAtIndex: Emoji} { + string index \uD83D\uDE00 1 +} "\uDE00" test utf-9.1 {Tcl_UtfAtIndex: index = 0} { string range abcd 0 2 @@ -793,6 +860,12 @@ test utf-9.1 {Tcl_UtfAtIndex: index = 0} { test utf-9.2 {Tcl_UtfAtIndex: index > 0} { string range \u4E4E\u25A\xFF\u543klmnop 1 5 } "\u25A\xFF\u543kl" +test utf-9.3 {Tcl_UtfAtIndex: index = 0, Emoji} smallutf { + string range \uD83D\uDE00G 0 0 +} "\uD83D" +test utf-9.4 {Tcl_UtfAtIndex: index > 0, Emoji} smallutf { + string range \uD83D\uDE00G 1 1 +} "\uDE00" test utf-10.1 {Tcl_UtfBackslash: dst == NULL} { @@ -891,11 +964,11 @@ test utf-11.2 {Tcl_UtfToUpper} { string toupper abc } ABC test utf-11.3 {Tcl_UtfToUpper} { - string toupper \u00E3AB -} \u00C3AB + string toupper \xE3gh +} \xC3GH test utf-11.4 {Tcl_UtfToUpper} { - string toupper \u01E3AB -} \u01E2AB + string toupper \u01E3gh +} \u01E2GH test utf-11.5 {Tcl_UtfToUpper Georgian (new in Unicode 11)} { string toupper \u10D0\u1C90 } \u1C90\u1C90 @@ -907,14 +980,17 @@ test utf-12.2 {Tcl_UtfToLower} { string tolower ABC } abc test utf-12.3 {Tcl_UtfToLower} { - string tolower \u00C3AB -} \u00E3ab + string tolower \xC3GH +} \xE3gh test utf-12.4 {Tcl_UtfToLower} { - string tolower \u01E2AB -} \u01E3ab + string tolower \u01E2GH +} \u01E3gh test utf-12.5 {Tcl_UtfToLower Georgian (new in Unicode 11)} { string tolower \u10D0\u1C90 } \u10D0\u10D0 +test utf-12.6 {Tcl_UtfToUpper low/high surrogate)} smallutf { + string tolower \uDC24\uD824 +} \uDC24\uD824 test utf-13.1 {Tcl_UtfToTitle} { string totitle {} @@ -923,8 +999,8 @@ test utf-13.2 {Tcl_UtfToTitle} { string totitle abc } Abc test utf-13.3 {Tcl_UtfToTitle} { - string totitle \u00E3AB -} \u00C3ab + string totitle \xE3GH +} \xC3gh test utf-13.4 {Tcl_UtfToTitle} { string totitle \u01F3AB } \u01F2ab @@ -934,6 +1010,9 @@ test utf-13.5 {Tcl_UtfToTitle Georgian (new in Unicode 11)} { test utf-13.6 {Tcl_UtfToTitle Georgian (new in Unicode 11)} { string totitle \u1C90\u10D0 } \u1C90\u10D0 +test utf-13.7 {Tcl_UtfToTitle low/high surrogate)} smallutf { + string totitle \uDC24\uD824 +} \uDC24\uD824 test utf-14.1 {Tcl_UtfNcasecmp} { string compare -nocase a b @@ -952,7 +1031,7 @@ test utf-15.1 {Tcl_UniCharToUpper, negative delta} { string toupper aA } AA test utf-15.2 {Tcl_UniCharToUpper, positive delta} { - string toupper \u0178\u00FF + string toupper \u0178\xFF } \u0178\u0178 test utf-15.3 {Tcl_UniCharToUpper, no delta} { string toupper ! @@ -962,8 +1041,8 @@ test utf-16.1 {Tcl_UniCharToLower, negative delta} { string tolower aA } aa test utf-16.2 {Tcl_UniCharToLower, positive delta} { - string tolower \u0178\u00FF\uA78D\u01C5 -} \u00FF\u00FF\u0265\u01C6 + string tolower \u0178\xFF\uA78D\u01C5 +} \xFF\xFF\u0265\u01C6 test utf-17.1 {Tcl_UniCharToLower, no delta} { string tolower ! @@ -977,9 +1056,9 @@ test utf-18.2 {Tcl_UniCharToTitle, subtract one for title} { } \u01C5 test utf-18.3 {Tcl_UniCharToTitle, subtract delta for title (positive)} { string totitle \u017F -} \u0053 +} \x53 test utf-18.4 {Tcl_UniCharToTitle, subtract delta for title (negative)} { - string totitle \u00FF + string totitle \xFF } \u0178 test utf-18.5 {Tcl_UniCharToTitle, no delta} { string totitle ! @@ -1016,31 +1095,31 @@ test utf-21.5 {unicode graph char in regc_locale.c} { } {1} test utf-21.6 {TclUniCharIsGraph} { # [Bug 3464428] - string is graph \u00A0 + string is graph \xA0 } {0} test utf-21.7 {unicode graph char in regc_locale.c} { # [Bug 3464428] - regexp {[[:graph:]]} \u0020\u00A0\u2028\u2029 + regexp {[[:graph:]]} \x20\xA0\u2028\u2029 } {0} test utf-21.8 {TclUniCharIsPrint} { # [Bug 3464428] - string is print \u0009 + string is print \x09 } {0} test utf-21.9 {unicode print char in regc_locale.c} { # [Bug 3464428] - regexp {[[:print:]]} \u0009 + regexp {[[:print:]]} \x09 } {0} test utf-21.10 {unicode print char in regc_locale.c} { # [Bug 3464428] - regexp {[[:print:]]} \u0009 + regexp {[[:print:]]} \x09 } {0} test utf-21.11 {TclUniCharIsControl} { # [Bug 3464428] - string is control \u0000\u001F\u00AD\u0605\u061C\u180E\u2066\uFEFF + string is control \x00\x1F\xAD\u0605\u061C\u180E\u2066\uFEFF } {1} test utf-21.12 {unicode control char in regc_locale.c} { # [Bug 3464428], [Bug a876646efe] - regexp {^[[:cntrl:]]*$} \u0000\u001F\u00AD\u0605\u061C\u180E\u2066\uFEFF + regexp {^[[:cntrl:]]*$} \x00\x1F\xAD\u0605\u061C\u180E\u2066\uFEFF } {1} test utf-22.1 {TclUniCharIsWordChar} { @@ -1070,15 +1149,13 @@ test utf-24.2 {unicode digit char in regc_locale.c} { test utf-24.3 {TclUniCharIsSpace} { # this returns 1 with Unicode 7/TIP 413 compliance - string is space \u0085\u1680\u180E\u200B\u202F\u2060 + string is space \x85\u1680\u180E\u200B\u202F\u2060 } {1} test utf-24.4 {unicode space char in regc_locale.c} { # this returns 1 with Unicode 7/TIP 413 compliance - list [regexp {^[[:space:]]+$} \u0085\u1680\u180E\u200B\u202F\u2060] [regexp {^\s+$} \u0085\u1680\u180E\u200B\u202F\u2060] + list [regexp {^[[:space:]]+$} \x85\u1680\u180E\u200B\u202F\u2060] [regexp {^\s+$} \x85\u1680\u180E\u200B\u202F\u2060] } {1 1} -testConstraint teststringobj [llength [info commands teststringobj]] - test utf-25.1 {Tcl_UniCharNcasecmp} -constraints teststringobj \ -setup { testobj freeallvars |
