summaryrefslogtreecommitdiffstats
path: root/tests/encoding.test
diff options
context:
space:
mode:
Diffstat (limited to 'tests/encoding.test')
-rw-r--r--tests/encoding.test147
1 files changed, 120 insertions, 27 deletions
diff --git a/tests/encoding.test b/tests/encoding.test
index c8daed6..6f11968 100644
--- a/tests/encoding.test
+++ b/tests/encoding.test
@@ -22,6 +22,8 @@ catch {
package require -exact tcl::test [info patchlevel]
}
+package require tcltests
+
proc toutf {args} {
variable x
lappend x "toutf $args"
@@ -287,11 +289,17 @@ test encoding-11.8 {encoding: extended Unicode UTF-16} {
test encoding-11.9 {encoding: extended Unicode UTF-16} {
viewable [encoding convertto utf-16be 😹]
} {Ø=Þ9 (\u00D8=\u00DE9)}
+test encoding-11.10 {encoding: extended Unicode UTF-32} {
+ viewable [encoding convertto utf-32le 😹]
+} "9\xF6\x01\x00 (9\\u00F6\\u0001\\u0000)"
+test encoding-11.11 {encoding: extended Unicode UTF-32} {
+ viewable [encoding convertto utf-32be 😹]
+} "\x00\x01\xF69 (\\u0000\\u0001\\u00F69)"
# OpenEncodingFile is fully tested by the rest of the tests in this file.
test encoding-12.1 {LoadTableEncoding: normal encoding} {
set x [encoding convertto iso8859-3 Ġ]
- append x [encoding convertto iso8859-3 Õ]
+ append x [encoding convertto -nocomplain iso8859-3 Õ]
append x [encoding convertfrom iso8859-3 Õ]
} "Õ?Ġ"
test encoding-12.2 {LoadTableEncoding: single-byte encoding} {
@@ -340,67 +348,67 @@ test encoding-15.5 {UtfToUtfProc emoji character input} {
} "4 😂"
test encoding-15.6 {UtfToUtfProc emoji character output} {
set x \uDE02\uD83D\uDE02\uD83D
- set y [encoding convertto utf-8 \uDE02\uD83D\uDE02\uD83D]
+ set y [encoding convertto -nocomplain utf-8 \uDE02\uD83D\uDE02\uD83D]
binary scan $y H* z
list [string length $y] $z
} {10 edb882f09f9882eda0bd}
test encoding-15.7 {UtfToUtfProc emoji character output} {
set x \uDE02\uD83D\uD83D
- set y [encoding convertto utf-8 \uDE02\uD83D\uD83D]
+ set y [encoding convertto -nocomplain utf-8 \uDE02\uD83D\uD83D]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {3 9 edb882eda0bdeda0bd}
test encoding-15.8 {UtfToUtfProc emoji character output} {
set x \uDE02\uD83Dé
- set y [encoding convertto utf-8 \uDE02\uD83Dé]
+ set y [encoding convertto -nocomplain utf-8 \uDE02\uD83Dé]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {3 8 edb882eda0bdc3a9}
test encoding-15.9 {UtfToUtfProc emoji character output} {
set x \uDE02\uD83DX
- set y [encoding convertto utf-8 \uDE02\uD83DX]
+ set y [encoding convertto -nocomplain utf-8 \uDE02\uD83DX]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {3 7 edb882eda0bd58}
test encoding-15.10 {UtfToUtfProc high surrogate character output} {
set x \uDE02é
- set y [encoding convertto utf-8 \uDE02é]
+ set y [encoding convertto -nocomplain utf-8 \uDE02é]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {2 5 edb882c3a9}
test encoding-15.11 {UtfToUtfProc low surrogate character output} {
set x \uDA02é
- set y [encoding convertto utf-8 \uDA02é]
+ set y [encoding convertto -nocomplain utf-8 \uDA02é]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {2 5 eda882c3a9}
test encoding-15.12 {UtfToUtfProc high surrogate character output} {
set x \uDE02Y
- set y [encoding convertto utf-8 \uDE02Y]
+ set y [encoding convertto -nocomplain utf-8 \uDE02Y]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {2 4 edb88259}
test encoding-15.13 {UtfToUtfProc low surrogate character output} {
set x \uDA02Y
- set y [encoding convertto utf-8 \uDA02Y]
+ set y [encoding convertto -nocomplain utf-8 \uDA02Y]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {2 4 eda88259}
test encoding-15.14 {UtfToUtfProc high surrogate character output} {
set x \uDE02
- set y [encoding convertto utf-8 \uDE02]
+ set y [encoding convertto -nocomplain utf-8 \uDE02]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {1 3 edb882}
test encoding-15.15 {UtfToUtfProc low surrogate character output} {
set x \uDA02
- set y [encoding convertto utf-8 \uDA02]
+ set y [encoding convertto -nocomplain utf-8 \uDA02]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {1 3 eda882}
test encoding-15.16 {UtfToUtfProc: Invalid 4-byte UTF-8, see [ed29806ba]} {
set x \xF0\xA0\xA1\xC2
- set y [encoding convertfrom utf-8 \xF0\xA0\xA1\xC2]
+ set y [encoding convertfrom -nocomplain utf-8 \xF0\xA0\xA1\xC2]
list [string length $x] $y
} "4 \xF0\xA0\xA1\xC2"
test encoding-15.17 {UtfToUtfProc emoji character output} {
@@ -409,6 +417,41 @@ test encoding-15.17 {UtfToUtfProc emoji character output} {
binary scan $y H* z
list [string length $y] $z
} {4 f09f9882}
+test encoding-15.18 {UtfToUtfProc CESU-8 6-byte sequence} {
+ set y [encoding convertto cesu-8 \U10000]
+ binary scan $y H* z
+ list [string length $y] $z
+} {6 eda080edb080}
+test encoding-15.19 {UtfToUtfProc CESU-8 upper surrogate} {
+ set y [encoding convertto cesu-8 \uD800]
+ binary scan $y H* z
+ list [string length $y] $z
+} {3 eda080}
+test encoding-15.20 {UtfToUtfProc CESU-8 lower surrogate} {
+ set y [encoding convertto cesu-8 \uDC00]
+ binary scan $y H* z
+ list [string length $y] $z
+} {3 edb080}
+test encoding-15.21 {UtfToUtfProc CESU-8 noncharacter} {
+ set y [encoding convertto cesu-8 \uFFFF]
+ binary scan $y H* z
+ list [string length $y] $z
+} {3 efbfbf}
+test encoding-15.22 {UtfToUtfProc CESU-8 bug [048dd20b4171c8da]} {
+ set y [encoding convertto cesu-8 \x80]
+ binary scan $y H* z
+ list [string length $y] $z
+} {2 c280}
+test encoding-15.23 {UtfToUtfProc CESU-8 bug [048dd20b4171c8da]} {
+ set y [encoding convertto cesu-8 \u100]
+ binary scan $y H* z
+ list [string length $y] $z
+} {2 c480}
+test encoding-15.24 {UtfToUtfProc CESU-8 bug [048dd20b4171c8da]} {
+ set y [encoding convertto cesu-8 \u3FF]
+ binary scan $y H* z
+ list [string length $y] $z
+} {2 cfbf}
test encoding-16.1 {Utf16ToUtfProc} -body {
set val [encoding convertfrom utf-16 NN]
@@ -426,23 +469,37 @@ test encoding-16.4 {Ucs2ToUtfProc} -body {
set val [encoding convertfrom ucs-2 NN]
list $val [format %x [scan $val %c]]
} -result "乎 4e4e"
-test encoding-16.4 {Ucs2ToUtfProc} -body {
+test encoding-16.5 {Ucs2ToUtfProc} -body {
set val [encoding convertfrom ucs-2 "\xD8\xD8\xDC\xDC"]
list $val [format %x [scan $val %c]]
} -result "\U460DC 460dc"
+test encoding-16.6 {Utf32ToUtfProc} -body {
+ set val [encoding convertfrom utf-32le NN\0\0]
+ list $val [format %x [scan $val %c]]
+} -result "乎 4e4e"
+test encoding-16.7 {Utf32ToUtfProc} -body {
+ set val [encoding convertfrom utf-32be \0\0NN]
+ list $val [format %x [scan $val %c]]
+} -result "乎 4e4e"
test encoding-17.1 {UtfToUtf16Proc} -body {
encoding convertto utf-16 "\U460DC"
} -result "\xD8\xD8\xDC\xDC"
-test encoding-17.2 {UtfToUtf16Proc} -body {
- encoding convertto utf-16 "\uDCDC"
-} -result "\xDC\xDC"
-test encoding-17.3 {UtfToUtf16Proc} -body {
- encoding convertto utf-16 "\uD8D8"
-} -result "\xD8\xD8"
-test encoding-17.4 {UtfToUcs2Proc} -body {
+test encoding-17.2 {UtfToUcs2Proc} -body {
encoding convertfrom utf-16 [encoding convertto ucs-2 "\U460DC"]
} -result "\uFFFD"
+test encoding-17.3 {UtfToUtf16Proc} -body {
+ encoding convertto -nocomplain utf-16be "\uDCDC"
+} -result "\xFF\xFD"
+test encoding-17.4 {UtfToUtf16Proc} -body {
+ encoding convertto -nocomplain utf-16le "\uD8D8"
+} -result "\xFD\xFF"
+test encoding-17.5 {UtfToUtf16Proc} -body {
+ encoding convertto utf-32le "\U460DC"
+} -result "\xDC\x60\x04\x00"
+test encoding-17.6 {UtfToUtf16Proc} -body {
+ encoding convertto utf-32be "\U460DC"
+} -result "\x00\x04\x60\xDC"
test encoding-18.1 {TableToUtfProc} {
} {}
@@ -560,26 +617,62 @@ test encoding-24.4 {Parse valid or invalid utf-8} {
string length [encoding convertfrom utf-8 "\xC0\x80"]
} 1
test encoding-24.5 {Parse valid or invalid utf-8} {
- string length [encoding convertfrom utf-8 "\xC0\x81"]
+ string length [encoding convertfrom -nocomplain utf-8 "\xC0\x81"]
} 2
test encoding-24.6 {Parse valid or invalid utf-8} {
- string length [encoding convertfrom utf-8 "\xC1\xBF"]
+ string length [encoding convertfrom -nocomplain utf-8 "\xC1\xBF"]
} 2
test encoding-24.7 {Parse valid or invalid utf-8} {
string length [encoding convertfrom utf-8 "\xC2\x80"]
} 1
test encoding-24.8 {Parse valid or invalid utf-8} {
- string length [encoding convertfrom utf-8 "\xE0\x80\x80"]
+ string length [encoding convertfrom -nocomplain utf-8 "\xE0\x80\x80"]
} 3
test encoding-24.9 {Parse valid or invalid utf-8} {
- string length [encoding convertfrom utf-8 "\xE0\x9F\xBF"]
+ string length [encoding convertfrom -nocomplain utf-8 "\xE0\x9F\xBF"]
} 3
test encoding-24.10 {Parse valid or invalid utf-8} {
string length [encoding convertfrom utf-8 "\xE0\xA0\x80"]
} 1
test encoding-24.11 {Parse valid or invalid utf-8} {
- string length [encoding convertfrom utf-8 "\xEF\xBF\xBF"]
+ string length [encoding convertfrom -nocomplain utf-8 "\xEF\xBF\xBF"]
+} 1
+test encoding-24.12 {Parse valid or invalid utf-8} -constraints deprecated -body {
+ encoding convertfrom utf-8 "\xC0\x81"
+} -result \xC0\x81
+test encoding-24.13 {Parse valid or invalid utf-8} -constraints deprecated -body {
+ encoding convertfrom utf-8 "\xC1\xBF"
+} -result \xC1\xBF
+test encoding-24.14 {Parse valid or invalid utf-8} {
+ string length [encoding convertfrom utf-8 "\xC2\x80"]
+} 1
+test encoding-24.15 {Parse valid or invalid utf-8} -constraints deprecated -body {
+ encoding convertfrom utf-8 "Z\xE0\x80"
+} -result Z\xE0\x80
+test encoding-24.16 {Parse valid or invalid utf-8} -constraints testbytestring -body {
+ encoding convertto utf-8 [testbytestring "Z\u4343\x80"]
+} -returnCodes 1 -result {expected byte sequence but character 1 was '䍃€' (U+004343)}
+test encoding-24.17 {Parse valid or invalid utf-8} -constraints testbytestring -body {
+ encoding convertto utf-8 [testbytestring "Z\xE0\x80"]
+} -result "Z\xC3\xA0\xE2\x82\xAC"
+test encoding-24.18 {Parse valid or invalid utf-8} -constraints testbytestring -body {
+ encoding convertto utf-8 [testbytestring "Z\xE0\x80xxxxxx"]
+} -result "Z\xC3\xA0\xE2\x82\xACxxxxxx"
+test encoding-24.19 {Parse valid or invalid utf-8} -constraints deprecated -body {
+ encoding convertto utf-8 "ZX\uD800"
+} -result ZX\xED\xA0\x80
+test encoding-24.20 {Parse with -nocomplain but without providing encoding} {
+ string length [encoding convertfrom -nocomplain "\x20"]
+} 1
+test encoding-24.21 {Parse with -nocomplain but without providing encoding} {
+ string length [encoding convertto -nocomplain "\x20"]
} 1
+test encoding-24.22 {Syntax error, two encodings} -body {
+ encoding convertfrom iso8859-1 utf-8 "ZX\uD800"
+} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain? ?-failindex var? ?encoding? data"}
+test encoding-24.23 {Syntax error, two encodings} -body {
+ encoding convertto iso8859-1 utf-8 "ZX\uD800"
+} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain? ?-failindex var? ?encoding? data"}
file delete [file join [temporaryDirectory] iso2022.txt]
@@ -735,14 +828,14 @@ test encoding-28.0 {all encodings load} -body {
set string hello
foreach name [encoding names] {
incr count
- encoding convertto $name $string
+ encoding convertto -nocomplain $name $string
# discard the cached internal representation of Tcl_Encoding
# Unfortunately, without this, encoding 2-1 fails.
llength $name
}
return $count
-} -result [expr {[info exists ::tcl_precision] ? 86 : 85}]
+} -result [expr {[info exists ::tcl_precision] ? 92 : 91}]
runtests