summaryrefslogtreecommitdiffstats
path: root/tests/encoding.test
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2022-03-29 22:17:33 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2022-03-29 22:17:33 (GMT)
commita2a0ca2a1734d496f9500dc2ae02772a9eac548d (patch)
tree0a13f841c77376f33919ac3b86328af05bc8bf03 /tests/encoding.test
parentb19862400b0fde8a9ee9233384130eb36164597a (diff)
parent60e9b38dfbeb8b382fd60528363fe726331ac4db (diff)
downloadtcl-a2a0ca2a1734d496f9500dc2ae02772a9eac548d.zip
tcl-a2a0ca2a1734d496f9500dc2ae02772a9eac548d.tar.gz
tcl-a2a0ca2a1734d496f9500dc2ae02772a9eac548d.tar.bz2
Merge 9.0. Fix CONTINUATION macro, and testcases
Diffstat (limited to 'tests/encoding.test')
-rw-r--r--tests/encoding.test59
1 files changed, 31 insertions, 28 deletions
diff --git a/tests/encoding.test b/tests/encoding.test
index fffcdd5..dfe844f 100644
--- a/tests/encoding.test
+++ b/tests/encoding.test
@@ -40,7 +40,10 @@ testConstraint testbytestring [llength [info commands testbytestring]]
testConstraint teststringbytes [llength [info commands teststringbytes]]
testConstraint exec [llength [info commands exec]]
testConstraint testgetencpath [llength [info commands testgetencpath]]
-
+testConstraint fullutf [expr {[format %c 0x010000] ne "\uFFFD"}]
+testConstraint ucs4 [expr {[testConstraint fullutf]
+ && [string length [format %c 0x10000]] == 1}]
+
# TclInitEncodingSubsystem is tested by the rest of this file
# TclFinalizeEncodingSubsystem is not currently tested
@@ -336,74 +339,74 @@ test encoding-15.3 {UtfToUtfProc null character input} teststringbytes {
} c080
test encoding-15.4 {UtfToUtfProc emoji character input} -body {
set x \xED\xA0\xBD\xED\xB8\x82
- set y [encoding convertfrom utf-8 \xED\xA0\xBD\xED\xB8\x82]
- list [string length $x] $y
-} -result "6 😂"
-test encoding-15.5 {UtfToUtfProc emoji character input} {
+ set y [encoding convertfrom -nocomplain utf-8 \xED\xA0\xBD\xED\xB8\x82]
+ list [string length $y] $y
+} -result "2 \uD83D\uDE02"
+test encoding-15.5 {UtfToUtfProc emoji character input} ucs4 {
set x \xF0\x9F\x98\x82
set y [encoding convertfrom utf-8 \xF0\x9F\x98\x82]
- list [string length $x] $y
-} "4 😂"
-test encoding-15.6 {UtfToUtfProc emoji character output} {
+ list [string length $y] $y
+} "1 😂"
+test encoding-15.6 {UtfToUtfProc emoji character output} ucs4 {
set x \uDE02\uD83D\uDE02\uD83D
set y [encoding convertto -nocomplain utf-8 \uDE02\uD83D\uDE02\uD83D]
binary scan $y H* z
list [string length $y] $z
-} {10 edb882f09f9882eda0bd}
-test encoding-15.7 {UtfToUtfProc emoji character output} {
+} {12 efbfbdefbfbdefbfbdefbfbd}
+test encoding-15.7 {UtfToUtfProc emoji character output} ucs4 {
set x \uDE02\uD83D\uD83D
set y [encoding convertto -nocomplain utf-8 \uDE02\uD83D\uD83D]
binary scan $y H* z
list [string length $x] [string length $y] $z
-} {3 9 edb882eda0bdeda0bd}
-test encoding-15.8 {UtfToUtfProc emoji character output} {
+} {3 9 efbfbdefbfbdefbfbd}
+test encoding-15.8 {UtfToUtfProc emoji character output} ucs4 {
set x \uDE02\uD83Dé
set y [encoding convertto -nocomplain utf-8 \uDE02\uD83Dé]
binary scan $y H* z
list [string length $x] [string length $y] $z
-} {3 8 edb882eda0bdc3a9}
-test encoding-15.9 {UtfToUtfProc emoji character output} {
+} {3 8 efbfbdefbfbdc3a9}
+test encoding-15.9 {UtfToUtfProc emoji character output} ucs4 {
set x \uDE02\uD83DX
set y [encoding convertto -nocomplain utf-8 \uDE02\uD83DX]
binary scan $y H* z
list [string length $x] [string length $y] $z
-} {3 7 edb882eda0bd58}
-test encoding-15.10 {UtfToUtfProc high surrogate character output} {
+} {3 7 efbfbdefbfbd58}
+test encoding-15.10 {UtfToUtfProc high surrogate character output} ucs4 {
set x \uDE02é
set y [encoding convertto -nocomplain utf-8 \uDE02é]
binary scan $y H* z
list [string length $x] [string length $y] $z
-} {2 5 edb882c3a9}
-test encoding-15.11 {UtfToUtfProc low surrogate character output} {
+} {2 5 efbfbdc3a9}
+test encoding-15.11 {UtfToUtfProc low surrogate character output} ucs4 {
set x \uDA02é
set y [encoding convertto -nocomplain utf-8 \uDA02é]
binary scan $y H* z
list [string length $x] [string length $y] $z
-} {2 5 eda882c3a9}
-test encoding-15.12 {UtfToUtfProc high surrogate character output} {
+} {2 5 efbfbdc3a9}
+test encoding-15.12 {UtfToUtfProc high surrogate character output} ucs4 {
set x \uDE02Y
set y [encoding convertto -nocomplain utf-8 \uDE02Y]
binary scan $y H* z
list [string length $x] [string length $y] $z
-} {2 4 edb88259}
-test encoding-15.13 {UtfToUtfProc low surrogate character output} {
+} {2 4 efbfbd59}
+test encoding-15.13 {UtfToUtfProc low surrogate character output} ucs4 {
set x \uDA02Y
set y [encoding convertto -nocomplain utf-8 \uDA02Y]
binary scan $y H* z
list [string length $x] [string length $y] $z
-} {2 4 eda88259}
-test encoding-15.14 {UtfToUtfProc high surrogate character output} {
+} {2 4 efbfbd59}
+test encoding-15.14 {UtfToUtfProc high surrogate character output} ucs4 {
set x \uDE02
set y [encoding convertto -nocomplain utf-8 \uDE02]
binary scan $y H* z
list [string length $x] [string length $y] $z
-} {1 3 edb882}
-test encoding-15.15 {UtfToUtfProc low surrogate character output} {
+} {1 3 efbfbd}
+test encoding-15.15 {UtfToUtfProc low surrogate character output} ucs4 {
set x \uDA02
set y [encoding convertto -nocomplain utf-8 \uDA02]
binary scan $y H* z
list [string length $x] [string length $y] $z
-} {1 3 eda882}
+} {1 3 efbfbd}
test encoding-15.16 {UtfToUtfProc: Invalid 4-byte UTF-8, see [ed29806ba]} {
set x \xF0\xA0\xA1\xC2
set y [encoding convertfrom -nocomplain utf-8 \xF0\xA0\xA1\xC2]
@@ -646,7 +649,7 @@ test encoding-24.14 {Parse valid or invalid utf-8} {
} 1
test encoding-24.15 {Parse valid or invalid utf-8} -body {
encoding convertfrom utf-8 "Z\xE0\x80"
-} -returnCodes 1 -result {unexpected byte sequence starting at index 1: '\xE0'}
+} -result Z\xE0\x80
test encoding-24.16 {Parse valid or invalid utf-8} -constraints testbytestring -body {
encoding convertto utf-8 [testbytestring "Z\u4343\x80"]
} -returnCodes 1 -result {expected byte sequence but character 1 was '䍃€' (U+004343)}