summaryrefslogtreecommitdiffstats
path: root/tests/utf.test
diff options
context:
space:
mode:
authordkf <donal.k.fellows@manchester.ac.uk>2018-11-06 10:04:02 (GMT)
committerdkf <donal.k.fellows@manchester.ac.uk>2018-11-06 10:04:02 (GMT)
commit179e8f4df7568f6aca6e53525abfe0505fd2a578 (patch)
treec9328ec8e0afd8af8a96b9f3da12b152cec7c281 /tests/utf.test
parentc5a85dbfdc7dce9328b7f5fffb0bae519f68cf9f (diff)
parent2798a075ee62ea5ab4aa80279d614a8634ba378a (diff)
downloadtcl-179e8f4df7568f6aca6e53525abfe0505fd2a578.zip
tcl-179e8f4df7568f6aca6e53525abfe0505fd2a578.tar.gz
tcl-179e8f4df7568f6aca6e53525abfe0505fd2a578.tar.bz2
merge core-8-branch
Diffstat (limited to 'tests/utf.test')
-rw-r--r--tests/utf.test63
1 files changed, 50 insertions, 13 deletions
diff --git a/tests/utf.test b/tests/utf.test
index 95775a8..e820359 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -21,7 +21,7 @@ testConstraint testbytestring [llength [info commands testbytestring]]
catch {unset x}
# Some tests require support for 4-byte UTF-8 sequences
-testConstraint fullutf [expr {[format %c 0x010000] != "\ufffd"}]
+testConstraint tip389 [expr {[string length \U010000] == 2}]
test utf-1.1 {Tcl_UniCharToUtf: 1 byte sequences} testbytestring {
expr {"\x01" eq [testbytestring "\x01"]}
@@ -41,9 +41,21 @@ test utf-1.5 {Tcl_UniCharToUtf: overflowed Tcl_UniChar} testbytestring {
test utf-1.6 {Tcl_UniCharToUtf: negative Tcl_UniChar} testbytestring {
expr {[format %c -1] eq [testbytestring "\xef\xbf\xbd"]}
} 1
-test utf-1.7 {Tcl_UniCharToUtf: 4 byte sequences} -constraints {fullutf testbytestring} -body {
+test utf-1.7 {Tcl_UniCharToUtf: 4 byte sequences} -constraints testbytestring -body {
expr {"\U014e4e" eq [testbytestring "\xf0\x94\xb9\x8e"]}
} -result 1
+test utf-1.8 {Tcl_UniCharToUtf: 3 byte sequence, upper surrogate} testbytestring {
+ expr {"\ud842" eq [testbytestring "\xed\xa1\x82"]}
+} 1
+test utf-1.9 {Tcl_UniCharToUtf: 3 byte sequence, lower surrogate} testbytestring {
+ expr {"\udc42" eq [testbytestring "\xed\xb1\x82"]}
+} 1
+test utf-1.10 {Tcl_UniCharToUtf: 3 byte sequence, upper surrogate} testbytestring {
+ expr {[format %c 0xd842] eq [testbytestring "\xed\xa1\x82"]}
+} 1
+test utf-1.11 {Tcl_UniCharToUtf: 3 byte sequence, lower surrogate} testbytestring {
+ expr {[format %c 0xdc42] eq [testbytestring "\xed\xb1\x82"]}
+} 1
test utf-2.1 {Tcl_UtfToUniChar: low ascii} {
string length "abc"
@@ -66,10 +78,10 @@ test utf-2.6 {Tcl_UtfToUniChar: lead (3-byte) followed by 1 trail} testbytestrin
test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} testbytestring {
string length [testbytestring "\xE4\xb9\x8e"]
} {1}
-test utf-2.8 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {fullutf testbytestring} -body {
+test utf-2.8 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {tip389 testbytestring} -body {
string length [testbytestring "\xF0\x90\x80\x80"]
} -result {2}
-test utf-2.9 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {fullutf testbytestring} -body {
+test utf-2.9 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {tip389 testbytestring} -body {
string length [testbytestring "\xF4\x8F\xBF\xBF"]
} -result {2}
test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} testbytestring {
@@ -146,6 +158,12 @@ test utf-8.3 {Tcl_UniCharAtIndex: index > 0} {
test utf-8.4 {Tcl_UniCharAtIndex: index > 0} {
string index \u4e4e\u25a\xff\u543 2
} "\uff"
+test utf-8.5 {Tcl_UniCharAtIndex: upper surrogate} {
+ string index \ud842 0
+} "\ud842"
+test utf-8.5 {Tcl_UniCharAtIndex: lower surrogate} {
+ string index \udc42 0
+} "\udc42"
test utf-9.1 {Tcl_UtfAtIndex: index = 0} {
string range abcd 0 2
@@ -228,15 +246,13 @@ bsCheck \U4e21 20001
bsCheck \U004e21 20001
bsCheck \U00004e21 20001
bsCheck \U0000004e21 78
-if {[testConstraint fullutf]} {
- bsCheck \U00110000 69632
- bsCheck \U01100000 69632
- bsCheck \U11000000 69632
- bsCheck \U0010FFFF 1114111
- bsCheck \U010FFFF0 1114111
- bsCheck \U10FFFF00 1114111
- bsCheck \UFFFFFFFF 1048575
-}
+bsCheck \U00110000 69632
+bsCheck \U01100000 69632
+bsCheck \U11000000 69632
+bsCheck \U0010FFFF 1114111
+bsCheck \U010FFFF0 1114111
+bsCheck \U10FFFF00 1114111
+bsCheck \UFFFFFFFF 1048575
test utf-11.1 {Tcl_UtfToUpper} {
string toupper {}
@@ -250,6 +266,12 @@ test utf-11.3 {Tcl_UtfToUpper} {
test utf-11.4 {Tcl_UtfToUpper} {
string toupper \u01e3ab
} \u01e2AB
+test utf-11.5 {Tcl_UtfToUpper Georgian (new in Unicode 11)} {
+ string toupper \u10d0\u1c90
+} \u1c90\u1c90
+test utf-11.6 {Tcl_UtfToUpper low/high surrogate)} {
+ string toupper \udc24\ud824
+} \udc24\ud824
test utf-12.1 {Tcl_UtfToLower} {
string tolower {}
@@ -263,6 +285,12 @@ test utf-12.3 {Tcl_UtfToLower} {
test utf-12.4 {Tcl_UtfToLower} {
string tolower \u01e2AB
} \u01e3ab
+test utf-12.5 {Tcl_UtfToLower Georgian (new in Unicode 11)} {
+ string tolower \u10d0\u1c90
+} \u10d0\u10d0
+test utf-12.6 {Tcl_UtfToUpper low/high surrogate)} {
+ string tolower \udc24\ud824
+} \udc24\ud824
test utf-13.1 {Tcl_UtfToTitle} {
string totitle {}
@@ -276,6 +304,15 @@ test utf-13.3 {Tcl_UtfToTitle} {
test utf-13.4 {Tcl_UtfToTitle} {
string totitle \u01f3ab
} \u01f2ab
+test utf-13.5 {Tcl_UtfToTitle Georgian (new in Unicode 11)} {
+ string totitle \u10d0\u1c90
+} \u10d0\u1c90
+test utf-13.6 {Tcl_UtfToTitle Georgian (new in Unicode 11)} {
+ string totitle \u1c90\u10d0
+} \u1c90\u10d0
+test utf-13.7 {Tcl_UtfToTitle low/high surrogate)} {
+ string totitle \udc24\ud824
+} \udc24\ud824
test utf-14.1 {Tcl_UtfNcasecmp} {
string compare -nocase a b