diff options
author | dkf <donal.k.fellows@manchester.ac.uk> | 2018-11-06 10:04:02 (GMT) |
---|---|---|
committer | dkf <donal.k.fellows@manchester.ac.uk> | 2018-11-06 10:04:02 (GMT) |
commit | 179e8f4df7568f6aca6e53525abfe0505fd2a578 (patch) | |
tree | c9328ec8e0afd8af8a96b9f3da12b152cec7c281 /tests/utf.test | |
parent | c5a85dbfdc7dce9328b7f5fffb0bae519f68cf9f (diff) | |
parent | 2798a075ee62ea5ab4aa80279d614a8634ba378a (diff) | |
download | tcl-179e8f4df7568f6aca6e53525abfe0505fd2a578.zip tcl-179e8f4df7568f6aca6e53525abfe0505fd2a578.tar.gz tcl-179e8f4df7568f6aca6e53525abfe0505fd2a578.tar.bz2 |
merge core-8-branch
Diffstat (limited to 'tests/utf.test')
-rw-r--r-- | tests/utf.test | 63 |
1 files changed, 50 insertions, 13 deletions
diff --git a/tests/utf.test b/tests/utf.test index 95775a8..e820359 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -21,7 +21,7 @@ testConstraint testbytestring [llength [info commands testbytestring]] catch {unset x} # Some tests require support for 4-byte UTF-8 sequences -testConstraint fullutf [expr {[format %c 0x010000] != "\ufffd"}] +testConstraint tip389 [expr {[string length \U010000] == 2}] test utf-1.1 {Tcl_UniCharToUtf: 1 byte sequences} testbytestring { expr {"\x01" eq [testbytestring "\x01"]} @@ -41,9 +41,21 @@ test utf-1.5 {Tcl_UniCharToUtf: overflowed Tcl_UniChar} testbytestring { test utf-1.6 {Tcl_UniCharToUtf: negative Tcl_UniChar} testbytestring { expr {[format %c -1] eq [testbytestring "\xef\xbf\xbd"]} } 1 -test utf-1.7 {Tcl_UniCharToUtf: 4 byte sequences} -constraints {fullutf testbytestring} -body { +test utf-1.7 {Tcl_UniCharToUtf: 4 byte sequences} -constraints testbytestring -body { expr {"\U014e4e" eq [testbytestring "\xf0\x94\xb9\x8e"]} } -result 1 +test utf-1.8 {Tcl_UniCharToUtf: 3 byte sequence, upper surrogate} testbytestring { + expr {"\ud842" eq [testbytestring "\xed\xa1\x82"]} +} 1 +test utf-1.9 {Tcl_UniCharToUtf: 3 byte sequence, lower surrogate} testbytestring { + expr {"\udc42" eq [testbytestring "\xed\xb1\x82"]} +} 1 +test utf-1.10 {Tcl_UniCharToUtf: 3 byte sequence, upper surrogate} testbytestring { + expr {[format %c 0xd842] eq [testbytestring "\xed\xa1\x82"]} +} 1 +test utf-1.11 {Tcl_UniCharToUtf: 3 byte sequence, lower surrogate} testbytestring { + expr {[format %c 0xdc42] eq [testbytestring "\xed\xb1\x82"]} +} 1 test utf-2.1 {Tcl_UtfToUniChar: low ascii} { string length "abc" @@ -66,10 +78,10 @@ test utf-2.6 {Tcl_UtfToUniChar: lead (3-byte) followed by 1 trail} testbytestrin test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} testbytestring { string length [testbytestring "\xE4\xb9\x8e"] } {1} -test utf-2.8 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {fullutf testbytestring} -body { +test utf-2.8 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {tip389 testbytestring} -body { string length [testbytestring "\xF0\x90\x80\x80"] } -result {2} -test utf-2.9 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {fullutf testbytestring} -body { +test utf-2.9 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {tip389 testbytestring} -body { string length [testbytestring "\xF4\x8F\xBF\xBF"] } -result {2} test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} testbytestring { @@ -146,6 +158,12 @@ test utf-8.3 {Tcl_UniCharAtIndex: index > 0} { test utf-8.4 {Tcl_UniCharAtIndex: index > 0} { string index \u4e4e\u25a\xff\u543 2 } "\uff" +test utf-8.5 {Tcl_UniCharAtIndex: upper surrogate} { + string index \ud842 0 +} "\ud842" +test utf-8.5 {Tcl_UniCharAtIndex: lower surrogate} { + string index \udc42 0 +} "\udc42" test utf-9.1 {Tcl_UtfAtIndex: index = 0} { string range abcd 0 2 @@ -228,15 +246,13 @@ bsCheck \U4e21 20001 bsCheck \U004e21 20001 bsCheck \U00004e21 20001 bsCheck \U0000004e21 78 -if {[testConstraint fullutf]} { - bsCheck \U00110000 69632 - bsCheck \U01100000 69632 - bsCheck \U11000000 69632 - bsCheck \U0010FFFF 1114111 - bsCheck \U010FFFF0 1114111 - bsCheck \U10FFFF00 1114111 - bsCheck \UFFFFFFFF 1048575 -} +bsCheck \U00110000 69632 +bsCheck \U01100000 69632 +bsCheck \U11000000 69632 +bsCheck \U0010FFFF 1114111 +bsCheck \U010FFFF0 1114111 +bsCheck \U10FFFF00 1114111 +bsCheck \UFFFFFFFF 1048575 test utf-11.1 {Tcl_UtfToUpper} { string toupper {} @@ -250,6 +266,12 @@ test utf-11.3 {Tcl_UtfToUpper} { test utf-11.4 {Tcl_UtfToUpper} { string toupper \u01e3ab } \u01e2AB +test utf-11.5 {Tcl_UtfToUpper Georgian (new in Unicode 11)} { + string toupper \u10d0\u1c90 +} \u1c90\u1c90 +test utf-11.6 {Tcl_UtfToUpper low/high surrogate)} { + string toupper \udc24\ud824 +} \udc24\ud824 test utf-12.1 {Tcl_UtfToLower} { string tolower {} @@ -263,6 +285,12 @@ test utf-12.3 {Tcl_UtfToLower} { test utf-12.4 {Tcl_UtfToLower} { string tolower \u01e2AB } \u01e3ab +test utf-12.5 {Tcl_UtfToLower Georgian (new in Unicode 11)} { + string tolower \u10d0\u1c90 +} \u10d0\u10d0 +test utf-12.6 {Tcl_UtfToUpper low/high surrogate)} { + string tolower \udc24\ud824 +} \udc24\ud824 test utf-13.1 {Tcl_UtfToTitle} { string totitle {} @@ -276,6 +304,15 @@ test utf-13.3 {Tcl_UtfToTitle} { test utf-13.4 {Tcl_UtfToTitle} { string totitle \u01f3ab } \u01f2ab +test utf-13.5 {Tcl_UtfToTitle Georgian (new in Unicode 11)} { + string totitle \u10d0\u1c90 +} \u10d0\u1c90 +test utf-13.6 {Tcl_UtfToTitle Georgian (new in Unicode 11)} { + string totitle \u1c90\u10d0 +} \u1c90\u10d0 +test utf-13.7 {Tcl_UtfToTitle low/high surrogate)} { + string totitle \udc24\ud824 +} \udc24\ud824 test utf-14.1 {Tcl_UtfNcasecmp} { string compare -nocase a b |