From 371c07da346f6b67f915100aa1bb558cf02fe41d Mon Sep 17 00:00:00 2001 From: dgp Date: Sun, 26 Apr 2020 20:58:28 +0000 Subject: Continuing test reconciliation. --- tests/utf.test | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/utf.test b/tests/utf.test index 0d93a12..f5e5bcc 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -17,6 +17,7 @@ namespace path ::tcl::mathop testConstraint ucs2 [expr {[format %c 0x010000] eq "\uFFFD"}] testConstraint fullutf [expr {[format %c 0x010000] ne "\uFFFD"}] +testConstraint tip389 [expr {[string length [format %c 0x10000]] eq 2}] testConstraint Uesc [eq \U0041 A] testConstraint pairsTo4bytes [expr {[llength [info commands teststringbytes]] @@ -102,12 +103,18 @@ test utf-2.8.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytest test utf-2.8.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring fullutf} { string length [testbytestring "\xF0\x90\x80\x80"] } 1 +test utf-2.8.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring tip389} { + string length [testbytestring "\xF0\x90\x80\x80"] +} 2 test utf-2.9.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring ucs2} { string length [testbytestring "\xF4\x8F\xBF\xBF"] } 4 test utf-2.9.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring fullutf} { string length [testbytestring "\xF4\x8F\xBF\xBF"] } 1 +test utf-2.9.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring tip389} { + string length [testbytestring "\xF4\x8F\xBF\xBF"] +} 2 test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} testbytestring { string length [testbytestring "\xF0\x8F\xBF\xBF"] } 4 -- cgit v0.12 From e6de958cf55d8685930192267e22e8a842c7575b Mon Sep 17 00:00:00 2001 From: dgp Date: Sun, 26 Apr 2020 21:11:40 +0000 Subject: Refine the constraint. The fact that Tcl stores extended characters internally does not imply that [string length] counts UCS4 characters instead of UTF-16 code units. --- tests/utf.test | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/utf.test b/tests/utf.test index f5e5bcc..a9e5353 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -18,6 +18,8 @@ namespace path ::tcl::mathop testConstraint ucs2 [expr {[format %c 0x010000] eq "\uFFFD"}] testConstraint fullutf [expr {[format %c 0x010000] ne "\uFFFD"}] testConstraint tip389 [expr {[string length [format %c 0x10000]] eq 2}] +testConstraint ucs4 [expr {[testConstraint fullutf] + && [string length [format %c 0x10000]] == 1}] testConstraint Uesc [eq \U0041 A] testConstraint pairsTo4bytes [expr {[llength [info commands teststringbytes]] @@ -100,7 +102,7 @@ test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} testbytestrin test utf-2.8.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring ucs2} { string length [testbytestring "\xF0\x90\x80\x80"] } 4 -test utf-2.8.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring fullutf} { +test utf-2.8.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring ucs4} { string length [testbytestring "\xF0\x90\x80\x80"] } 1 test utf-2.8.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring tip389} { @@ -109,7 +111,7 @@ test utf-2.8.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytest test utf-2.9.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring ucs2} { string length [testbytestring "\xF4\x8F\xBF\xBF"] } 4 -test utf-2.9.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring fullutf} { +test utf-2.9.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring ucs4} { string length [testbytestring "\xF4\x8F\xBF\xBF"] } 1 test utf-2.9.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring tip389} { -- cgit v0.12