From cf9fc355f851761768c2eba9e991682558c20f48 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 12 Apr 2021 10:04:46 +0000 Subject: Backport utf testcase tweaks from 8.7. No change for ucs-2 --- tests/utf.test | 150 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 75 insertions(+), 75 deletions(-) diff --git a/tests/utf.test b/tests/utf.test index 76b6847..5cd9cf5 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -57,10 +57,10 @@ test utf-1.5 {Tcl_UniCharToUtf: overflowed Tcl_UniChar} testbytestring { test utf-1.6 {Tcl_UniCharToUtf: negative Tcl_UniChar} testbytestring { expr {[format %c -1] eq [testbytestring \xEF\xBF\xBD]} } 1 -test utf-1.7.0 {Tcl_UniCharToUtf: 4 byte sequences} {fullutf Uesc testbytestring} { +test utf-1.7.0 {Tcl_UniCharToUtf: 4 byte sequences} {fullutf testbytestring} { expr {"\U014E4E" eq [testbytestring \xF0\x94\xB9\x8E]} } 1 -test utf-1.7.1 {Tcl_UniCharToUtf: 4 byte sequences} {ucs2 Uesc testbytestring} { +test utf-1.7.1 {Tcl_UniCharToUtf: 4 byte sequences} {Uesc ucs2 testbytestring} { expr {"\U014E4E" eq [testbytestring \xF0\x94\xB9\x8E]} } 0 test utf-1.8 {Tcl_UniCharToUtf: 3 byte sequence, high surrogate} testbytestring { @@ -81,7 +81,7 @@ test utf-1.12 {Tcl_UniCharToUtf: 4 byte sequence, high/low surrogate} {pairsTo4b test utf-1.13.0 {Tcl_UniCharToUtf: Invalid surrogate} {Uesc ucs2} { expr {"\UD842" eq "\uD842"} } 1 -test utf-1.13.1 {Tcl_UniCharToUtf: Invalid surrogate} {Uesc testbytestring fullutf} { +test utf-1.13.1 {Tcl_UniCharToUtf: Invalid surrogate} {fullutf testbytestring} { expr {"\UD842" eq [testbytestring \xEF\xBF\xBD]} } 1 @@ -106,22 +106,22 @@ test utf-2.6 {Tcl_UtfToUniChar: lead (3-byte) followed by 1 trail} testbytestrin test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} testbytestring { string length [testbytestring \xE4\xB9\x8E] } 1 -test utf-2.8.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring ucs2} { +test utf-2.8.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {ucs2 testbytestring} { string length [testbytestring \xF0\x90\x80\x80] } 4 -test utf-2.8.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {Uesc utf16} { +test utf-2.8.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} utf16 { string length \U010000 } 2 -test utf-2.8.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {Uesc ucs4} { +test utf-2.8.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} ucs4 { string length \U010000 } 1 -test utf-2.9.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring ucs2} { +test utf-2.9.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {ucs2 testbytestring} { string length [testbytestring \xF4\x8F\xBF\xBF] } 4 -test utf-2.9.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {Uesc utf16} { +test utf-2.9.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} utf16 { string length \U10FFFF } 2 -test utf-2.9.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {Uesc ucs4} { +test utf-2.9.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} ucs4 { string length \U10FFFF } 1 test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} testbytestring { @@ -204,7 +204,7 @@ test utf-6.3 {Tcl_UtfNext} testutfnext { testutfnext AA } 1 test utf-6.4 {Tcl_UtfNext} {testutfnext testbytestring} { - testutfnext A[testbytestring \xA0] + testutfnext [testbytestring A\xA0] } 1 test utf-6.5 {Tcl_UtfNext} {testutfnext testbytestring} { testutfnext A[testbytestring \xD0] @@ -390,7 +390,7 @@ test utf-6.62 {Tcl_UtfNext} testutfnext { testutfnext \u8820G } 3 test utf-6.63 {Tcl_UtfNext} {testutfnext testbytestring} { - testutfnext \u8820[testbytestring \xA0] + testutfnext [testbytestring \xE8\xA0\xA0\xA0] } 3 test utf-6.64 {Tcl_UtfNext} {testutfnext testbytestring} { testutfnext \u8820[testbytestring \xD0] @@ -682,7 +682,7 @@ test utf-7.6 {Tcl_UtfPrev} {testutfprev testbytestring} { testutfprev A[testbytestring \xE8] } 1 test utf-7.6.1 {Tcl_UtfPrev} {testutfprev testbytestring} { - testutfprev A\u8820[testbytestring \xA0] 2 + testutfprev A[testbytestring \xE8\xA0\xA0\xA0] 2 } 1 test utf-7.6.2 {Tcl_UtfPrev} {testutfprev testbytestring} { testutfprev A[testbytestring \xE8\xF8\xA0\xA0] 2 @@ -697,13 +697,13 @@ test utf-7.7.2 {Tcl_UtfPrev} {testutfprev testbytestring} { testutfprev A[testbytestring \xD0\xF8\xA0\xA0] 2 } 1 test utf-7.8 {Tcl_UtfPrev} {testutfprev testbytestring} { - testutfprev A[testbytestring \xA0] + testutfprev [testbytestring A\xA0] } 1 test utf-7.8.1 {Tcl_UtfPrev} {testutfprev testbytestring} { - testutfprev A[testbytestring \xA0\xA0\xA0\xA0] 2 + testutfprev [testbytestring A\xA0\xA0\xA0\xA0] 2 } 1 test utf-7.8.2 {Tcl_UtfPrev} {testutfprev testbytestring} { - testutfprev A[testbytestring \xA0\xF8\xA0\xA0] 2 + testutfprev [testbytestring A\xA0\xF8\xA0\xA0] 2 } 1 test utf-7.9 {Tcl_UtfPrev} {testutfprev testbytestring} { testutfprev A[testbytestring \xF8\xA0] @@ -736,7 +736,7 @@ test utf-7.11 {Tcl_UtfPrev} {testutfprev testbytestring} { testutfprev A[testbytestring \xE8\xA0] } 1 test utf-7.11.1 {Tcl_UtfPrev} {testutfprev testbytestring} { - testutfprev A\u8820[testbytestring \xA0] 3 + testutfprev A[testbytestring \xE8\xA0\xA0\xA0] 3 } 1 test utf-7.11.2 {Tcl_UtfPrev} {testutfprev testbytestring} { testutfprev A[testbytestring \xE8\xA0\xF8\xA0] 3 @@ -754,13 +754,13 @@ test utf-7.12.2 {Tcl_UtfPrev} {testutfprev testbytestring} { testutfprev A[testbytestring \xD0\xA0\xF8\xA0] 3 } 1 test utf-7.13 {Tcl_UtfPrev} {testutfprev testbytestring} { - testutfprev A[testbytestring \xA0\xA0] + testutfprev [testbytestring A\xA0\xA0] } 2 test utf-7.13.1 {Tcl_UtfPrev} {testutfprev testbytestring} { - testutfprev A[testbytestring \xA0\xA0\xA0\xA0] 3 + testutfprev [testbytestring A\xA0\xA0\xA0\xA0] 3 } 2 test utf-7.13.2 {Tcl_UtfPrev} {testutfprev testbytestring} { - testutfprev A[testbytestring \xA0\xA0\xF8\xA0] 3 + testutfprev [testbytestring A\xA0\xA0\xF8\xA0] 3 } 2 test utf-7.14 {Tcl_UtfPrev} {testutfprev testbytestring} { testutfprev A[testbytestring \xF8\xA0\xA0] @@ -793,7 +793,7 @@ test utf-7.16 {Tcl_UtfPrev} testutfprev { testutfprev A\u8820 } 1 test utf-7.16.1 {Tcl_UtfPrev} {testutfprev testbytestring} { - testutfprev A\u8820[testbytestring \xA0] 4 + testutfprev A[testbytestring \xE8\xA0\xA0\xA0] 4 } 1 test utf-7.16.2 {Tcl_UtfPrev} {testutfprev testbytestring} { testutfprev A\u8820[testbytestring \xF8] 4 @@ -808,31 +808,31 @@ test utf-7.17.2 {Tcl_UtfPrev} {testutfprev testbytestring} { testutfprev A[testbytestring \xD0\xA0\xA0\xF8] 4 } 3 test utf-7.18.0 {Tcl_UtfPrev} {testutfprev testbytestring} { - testutfprev A[testbytestring \xA0\xA0\xA0] + testutfprev [testbytestring A\xA0\xA0\xA0] } 3 test utf-7.18.1 {Tcl_UtfPrev} {testutfprev testbytestring} { - testutfprev A[testbytestring \xA0\xA0\xA0\xA0] 4 + testutfprev [testbytestring A\xA0\xA0\xA0\xA0] 4 } 3 test utf-7.18.2 {Tcl_UtfPrev} {testutfprev testbytestring} { - testutfprev A[testbytestring \xA0\xA0\xA0\xF8] 4 + testutfprev [testbytestring A\xA0\xA0\xA0\xF8] 4 } 3 test utf-7.19 {Tcl_UtfPrev} {testutfprev testbytestring} { - testutfprev A[testbytestring \xF8\xA0\xA0\xA0] + testutfprev [testbytestring A\xF8\xA0\xA0\xA0] } 4 test utf-7.20.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { - testutfprev A[testbytestring \xF2\xA0\xA0\xA0] + testutfprev [testbytestring A\xF2\xA0\xA0\xA0] } 4 test utf-7.20.1 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} { - testutfprev A[testbytestring \xF2\xA0\xA0\xA0] + testutfprev [testbytestring A\xF2\xA0\xA0\xA0] } 1 test utf-7.21 {Tcl_UtfPrev} {testutfprev testbytestring} { - testutfprev A\u8820[testbytestring \xA0] + testutfprev A[testbytestring \xE8\xA0\xA0\xA0] } 4 test utf-7.22 {Tcl_UtfPrev} {testutfprev testbytestring} { testutfprev A[testbytestring \xD0\xA0\xA0\xA0] } 4 test utf-7.23 {Tcl_UtfPrev} {testutfprev testbytestring} { - testutfprev A[testbytestring \xA0\xA0\xA0\xA0] + testutfprev [testbytestring A\xA0\xA0\xA0\xA0] } 4 test utf-7.24 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { testutfprev A[testbytestring \xC0\x81] @@ -989,54 +989,54 @@ test utf-8.7.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { string index \uD83D\uDE00G 0 } \uD83D test utf-8.7.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { - string index \uD83D\uDE00G 0 + string index \U1F600G 0 } \U1F600 test utf-8.7.2 {Tcl_UniCharAtIndex: Emoji} utf16 { - string index \uD83D\uDE00G 0 + string index \U1F600G 0 } \U1F600 test utf-8.8.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { string index \uD83D\uDE00G 1 } \uDE00 test utf-8.8.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { - string index \uD83D\uDE00G 1 + string index \U1F600G 1 } G test utf-8.8.2 {Tcl_UniCharAtIndex: Emoji} utf16 { - string index \uD83D\uDE00G 1 + string index \U1F600G 1 } {} test utf-8.9.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { string index \uD83D\uDE00G 2 } G test utf-8.9.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { - string index \uD83D\uDE00G 2 + string index \U1F600G 2 } {} test utf-8.9.2 {Tcl_UniCharAtIndex: Emoji} utf16 { - string index \uD83D\uDE00G 2 + string index \U1F600G 2 } G test utf-8.10.0 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs2} { string index \U1F600G 0 } \uFFFD -test utf-8.10.1 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs4} { +test utf-8.10.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { string index \U1F600G 0 } \U1F600 -test utf-8.10.2 {Tcl_UniCharAtIndex: Emoji} {Uesc utf16} { +test utf-8.10.2 {Tcl_UniCharAtIndex: Emoji} utf16 { string index \U1F600G 0 } \U1F600 test utf-8.11.0 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs2} { string index \U1F600G 1 } G -test utf-8.11.1 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs4} { +test utf-8.11.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { string index \U1F600G 1 } G -test utf-8.11.2 {Tcl_UniCharAtIndex: Emoji} {Uesc utf16} { +test utf-8.11.2 {Tcl_UniCharAtIndex: Emoji} utf16 { string index \U1F600G 1 } {} test utf-8.12.0 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs2} { string index \U1F600G 2 } {} -test utf-8.12.1 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs4} { +test utf-8.12.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { string index \U1F600G 2 } {} -test utf-8.12.2 {Tcl_UniCharAtIndex: Emoji} {Uesc utf16} { +test utf-8.12.2 {Tcl_UniCharAtIndex: Emoji} utf16 { string index \U1F600G 2 } G @@ -1050,55 +1050,55 @@ test utf-9.3.0 {Tcl_UtfAtIndex: index = 0, Emoji} ucs2 { string range \uD83D\uDE00G 0 0 } \uD83D test utf-9.3.1 {Tcl_UtfAtIndex: index = 0, Emoji} ucs4 { - string range \uD83D\uDE00G 0 0 + string range \U1F600G 0 0 } \U1F600 test utf-9.3.2 {Tcl_UtfAtIndex: index = 0, Emoji} utf16 { - string range \uD83D\uDE00G 0 0 + string range \U1F600G 0 0 } \U1F600 test utf-9.4.0 {Tcl_UtfAtIndex: index > 0, Emoji} ucs2 { string range \uD83D\uDE00G 1 1 } \uDE00 test utf-9.4.1 {Tcl_UtfAtIndex: index > 0, Emoji} ucs4 { - string range \uD83D\uDE00G 1 1 + string range \U1F600G 1 1 } G test utf-9.4.2 {Tcl_UtfAtIndex: index > 0, Emoji} utf16 { - string range \uD83D\uDE00G 1 1 + string range \U1F600G 1 1 } {} test utf-9.5.0 {Tcl_UtfAtIndex: index > 0, Emoji} ucs2 { string range \uD83D\uDE00G 2 2 } G test utf-9.5.1 {Tcl_UtfAtIndex: index > 0, Emoji} ucs4 { - string range \uD83D\uDE00G 2 2 + string range \U1F600G 2 2 } {} test utf-9.5.2 {Tcl_UtfAtIndex: index > 0, Emoji} utf16 { - string range \uD83D\uDE00G 2 2 + string range \U1F600G 2 2 } G test utf-9.6.0 {Tcl_UtfAtIndex: index = 0, Emoji} {Uesc ucs2} { - string range \U1f600G 0 0 + string range \U1F600G 0 0 } \uFFFD -test utf-9.6.1 {Tcl_UtfAtIndex: index = 0, Emoji} {Uesc ucs4} { - string range \U1f600G 0 0 +test utf-9.6.1 {Tcl_UtfAtIndex: index = 0, Emoji} ucs4 { + string range \U1F600G 0 0 } \U1F600 -test utf-9.6.2 {Tcl_UtfAtIndex: index = 0, Emoji} {Uesc utf16} { - string range \U1f600G 0 0 +test utf-9.6.2 {Tcl_UtfAtIndex: index = 0, Emoji} utf16 { + string range \U1F600G 0 0 } \U1F600 test utf-9.7.0 {Tcl_UtfAtIndex: index > 0, Emoji} {Uesc ucs2} { - string range \U1f600G 1 1 + string range \U1F600G 1 1 } G -test utf-9.7.1 {Tcl_UtfAtIndex: index > 0, Emoji} {Uesc ucs4} { - string range \U1f600G 1 1 +test utf-9.7.1 {Tcl_UtfAtIndex: index > 0, Emoji} ucs4 { + string range \U1F600G 1 1 } G -test utf-9.7.2 {Tcl_UtfAtIndex: index > 0, Emoji} {Uesc utf16} { - string range \U1f600G 1 1 +test utf-9.7.2 {Tcl_UtfAtIndex: index > 0, Emoji} utf16 { + string range \U1F600G 1 1 } {} test utf-9.8.0 {Tcl_UtfAtIndex: index > 0, Emoji} {Uesc ucs2} { - string range \U1f600G 2 2 + string range \U1F600G 2 2 } {} -test utf-9.8.1 {Tcl_UtfAtIndex: index > 0, Emoji} {Uesc ucs4} { - string range \U1f600G 2 2 +test utf-9.8.1 {Tcl_UtfAtIndex: index > 0, Emoji} ucs4 { + string range \U1F600G 2 2 } {} -test utf-9.8.2 {Tcl_UtfAtIndex: index > 0, Emoji} {Uesc utf16} { - string range \U1f600G 2 2 +test utf-9.8.2 {Tcl_UtfAtIndex: index > 0, Emoji} utf16 { + string range \U1F600G 2 2 } G test utf-10.1 {Tcl_UtfBackslash: dst == NULL} { @@ -1117,10 +1117,10 @@ test utf-10.4 {Tcl_UtfBackslash: stops at first non-hex} testbytestring { test utf-10.5 {Tcl_UtfBackslash: stops after 4 hex chars} testbytestring { expr {"\u4E216" eq "[testbytestring \xE4\xB8\xA1]6"} } 1 -test utf-10.6 {Tcl_UtfBackslash: stops after 5 hex chars} {Uesc fullutf testbytestring} { +test utf-10.6 {Tcl_UtfBackslash: stops after 5 hex chars} {fullutf testbytestring} { expr {"\U1E2165" eq "[testbytestring \xF0\x9E\x88\x96]5"} } 1 -test utf-10.7 {Tcl_UtfBackslash: stops after 6 hex chars} {Uesc fullutf testbytestring} { +test utf-10.7 {Tcl_UtfBackslash: stops after 6 hex chars} {fullutf testbytestring} { expr {"\U10E2165" eq "[testbytestring \xF4\x8E\x88\x96]5"} } 1 @@ -1183,13 +1183,13 @@ bsCheck \U4E21 20001 Uesc bsCheck \U004E21 20001 Uesc bsCheck \U00004E21 20001 Uesc bsCheck \U0000004E21 78 Uesc -bsCheck \U00110000 69632 {Uesc fullutf} -bsCheck \U01100000 69632 {Uesc fullutf} -bsCheck \U11000000 69632 {Uesc fullutf} -bsCheck \U0010FFFF 1114111 {Uesc fullutf} -bsCheck \U010FFFF0 1114111 {Uesc fullutf} -bsCheck \U10FFFF00 1114111 {Uesc fullutf} -bsCheck \UFFFFFFFF 1048575 {Uesc fullutf} +bsCheck \U00110000 69632 fullutf +bsCheck \U01100000 69632 fullutf +bsCheck \U11000000 69632 fullutf +bsCheck \U0010FFFF 1114111 fullutf +bsCheck \U010FFFF0 1114111 fullutf +bsCheck \U10FFFF00 1114111 fullutf +bsCheck \UFFFFFFFF 1048575 fullutf test utf-11.1 {Tcl_UtfToUpper} { string toupper {} @@ -1206,7 +1206,7 @@ test utf-11.4 {Tcl_UtfToUpper} { test utf-11.5 {Tcl_UtfToUpper Georgian (new in Unicode 11)} { string toupper \u10D0\u1C90 } \u1C90\u1C90 -test utf-11.6 {Tcl_UtfToUpper beyond U+FFFF} {Uesc fullutf} { +test utf-11.6 {Tcl_UtfToUpper beyond U+FFFF} fullutf { string toupper \U10428 } \U10400 test utf-11.7 {Tcl_UtfToUpper beyond U+FFFF} fullutf { @@ -1234,7 +1234,7 @@ test utf-12.5 {Tcl_UtfToLower Georgian (new in Unicode 11)} { test utf-12.6 {Tcl_UtfToLower low/high surrogate)} { string tolower \uDC24\uD824 } \uDC24\uD824 -test utf-12.7 {Tcl_UtfToLower beyond U+FFFF} {Uesc fullutf} { +test utf-12.7 {Tcl_UtfToLower beyond U+FFFF} fullutf { string tolower \U10400 } \U10428 test utf-12.8 {Tcl_UtfToLower beyond U+FFFF} fullutf { @@ -1262,7 +1262,7 @@ test utf-13.6 {Tcl_UtfToTitle Georgian (new in Unicode 11)} { test utf-13.7 {Tcl_UtfToTitle low/high surrogate)} { string totitle \uDC24\uD824 } \uDC24\uD824 -test utf-13.8 {Tcl_UtfToTitle beyond U+FFFF} {Uesc fullutf} { +test utf-13.8 {Tcl_UtfToTitle beyond U+FFFF} fullutf { string totitle \U10428\U10400 } \U10400\U10428 test utf-13.9 {Tcl_UtfToTitle beyond U+FFFF} fullutf { @@ -1456,9 +1456,9 @@ UniCharCaseCmpTest > b a UniCharCaseCmpTest > B a UniCharCaseCmpTest > aBcB abca UniCharCaseCmpTest < \uFFFF [format %c 0x10000] ucs4 -UniCharCaseCmpTest < \uFFFF \U10000 {Uesc ucs4} +UniCharCaseCmpTest < \uFFFF \U10000 ucs4 UniCharCaseCmpTest > [format %c 0x10000] \uFFFF ucs4 -UniCharCaseCmpTest > \U10000 \uFFFF {Uesc ucs4} +UniCharCaseCmpTest > \U10000 \uFFFF ucs4 test utf-26.1 {Tcl_UniCharDString} -setup { -- cgit v0.12