summaryrefslogtreecommitdiffstats
path: root/tests/encoding.test
diff options
context:
space:
mode:
Diffstat (limited to 'tests/encoding.test')
-rw-r--r--tests/encoding.test548
1 files changed, 438 insertions, 110 deletions
diff --git a/tests/encoding.test b/tests/encoding.test
index 26efb19..efe5a91 100644
--- a/tests/encoding.test
+++ b/tests/encoding.test
@@ -2,8 +2,8 @@
# Sourcing this file into Tcl runs the tests and generates output for errors.
# No output means no errors were found.
#
-# Copyright (c) 1997 Sun Microsystems, Inc.
-# Copyright (c) 1998-1999 Scriptics Corporation.
+# Copyright © 1997 Sun Microsystems, Inc.
+# Copyright © 1998-1999 Scriptics Corporation.
#
# See the file "license.terms" for information on usage and redistribution of
# this file, and for a DISCLAIMER OF ALL WARRANTIES.
@@ -13,14 +13,17 @@ if {"::tcltest" ni [namespace children]} {
namespace import -force ::tcltest::*
}
+
namespace eval ::tcl::test::encoding {
variable x
catch {
::tcltest::loadTestedCommands
- package require -exact Tcltest [info patchlevel]
+ package require -exact tcl::test [info patchlevel]
}
+source [file join [file dirname [info script]] tcltests.tcl]
+
proc toutf {args} {
variable x
lappend x "toutf $args"
@@ -37,10 +40,8 @@ proc runtests {} {
testConstraint testencoding [llength [info commands testencoding]]
testConstraint testbytestring [llength [info commands testbytestring]]
testConstraint teststringbytes [llength [info commands teststringbytes]]
-testConstraint ucs2 [expr {[format %c 0x010000] eq "\uFFFD"}]
-testConstraint fullutf [expr {[format %c 0x010000] ne "\uFFFD"}]
testConstraint exec [llength [info commands exec]]
-testConstraint testgetdefenc [llength [info commands testgetdefenc]]
+testConstraint testgetencpath [llength [info commands testgetencpath]]
# TclInitEncodingSubsystem is tested by the rest of this file
# TclFinalizeEncodingSubsystem is not currently tested
@@ -65,12 +66,12 @@ test encoding-1.2 {Tcl_GetEncoding: existing encoding} {testencoding} {
return $x
} {{fromutf }}
test encoding-1.3 {Tcl_GetEncoding: load encoding} {
- list [encoding convertto jis0208 \u4E4E] \
+ list [encoding convertto jis0208 乎] \
[encoding convertfrom jis0208 8C]
-} "8C \u4E4E"
+} "8C 乎"
test encoding-2.1 {Tcl_FreeEncoding: refcount == 0} {
- encoding convertto jis0208 \u4E4E
+ encoding convertto jis0208 乎
} {8C}
test encoding-2.2 {Tcl_FreeEncoding: refcount != 0} -setup {
set system [encoding system]
@@ -78,10 +79,10 @@ test encoding-2.2 {Tcl_FreeEncoding: refcount != 0} -setup {
} -constraints {testencoding} -body {
encoding system shiftjis ;# incr ref count
encoding dirs [list [pwd]]
- set x [encoding convertto shiftjis \u4E4E] ;# old one found
+ set x [encoding convertto shiftjis 乎] ;# old one found
encoding system iso8859-1
llength shiftjis ;# Shimmer away any cache of Tcl_Encoding
- lappend x [catch {encoding convertto shiftjis \u4E4E} msg] $msg
+ lappend x [catch {encoding convertto shiftjis 乎} msg] $msg
} -cleanup {
encoding system iso8859-1
encoding dirs $path
@@ -138,7 +139,7 @@ test encoding-5.1 {Tcl_SetSystemEncoding} -setup {
set old [encoding system]
} -body {
encoding system jis0208
- encoding convertto \u4E4E
+ encoding convertto 乎
} -cleanup {
encoding system iso8859-1
encoding system $old
@@ -170,7 +171,7 @@ test encoding-6.2 {Tcl_CreateEncoding: replace encoding} {testencoding} {
test encoding-7.1 {Tcl_ExternalToUtfDString: small buffer} {
encoding convertfrom jis0208 8c8c8c8c
-} "\u543E\u543E\u543E\u543E"
+} 吾吾吾吾
test encoding-7.2 {Tcl_UtfToExternalDString: big buffer} {
set a 8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C
append a $a
@@ -179,7 +180,7 @@ test encoding-7.2 {Tcl_UtfToExternalDString: big buffer} {
append a $a
set x [encoding convertfrom jis0208 $a]
list [string length $x] [string index $x 0]
-} "512 \u4E4E"
+} "512 乎"
test encoding-8.1 {Tcl_ExternalToUtf} {
set f [open [file join [temporaryDirectory] dummy] w]
@@ -192,13 +193,13 @@ test encoding-8.1 {Tcl_ExternalToUtf} {
close $f
file delete [file join [temporaryDirectory] dummy]
return $x
-} "ab\u4E4Eg"
+} ab乎g
test encoding-9.1 {Tcl_UtfToExternalDString: small buffer} {
- encoding convertto jis0208 "\u543E\u543E\u543E\u543E"
+ encoding convertto jis0208 "吾吾吾吾"
} {8c8c8c8c}
test encoding-9.2 {Tcl_UtfToExternalDString: big buffer} {
- set a \u4E4E\u4E4E\u4E4E\u4E4E\u4E4E\u4E4E\u4E4E\u4E4E
+ set a 乎乎乎乎乎乎乎乎
append a $a
append a $a
append a $a
@@ -212,7 +213,7 @@ test encoding-9.2 {Tcl_UtfToExternalDString: big buffer} {
test encoding-10.1 {Tcl_UtfToExternal} {
set f [open [file join [temporaryDirectory] dummy] w]
fconfigure $f -translation binary -encoding shiftjis
- puts -nonewline $f "ab\u4E4Eg"
+ puts -nonewline $f ab乎g
close $f
set f [open [file join [temporaryDirectory] dummy] r]
fconfigure $f -translation binary -encoding iso8859-1
@@ -240,25 +241,25 @@ test encoding-11.1 {LoadEncodingFile: unknown encoding} {testencoding} {
encoding system iso8859-1
encoding dirs {}
llength jis0208 ;# Shimmer any cached Tcl_Encoding in shared literal
- set x [list [catch {encoding convertto jis0208 \u4E4E} msg] $msg]
+ set x [list [catch {encoding convertto jis0208 乎} msg] $msg]
encoding dirs $path
encoding system $system
- lappend x [encoding convertto jis0208 \u4E4E]
+ lappend x [encoding convertto jis0208 乎]
} {1 {unknown encoding "jis0208"} 8C}
test encoding-11.2 {LoadEncodingFile: single-byte} {
encoding convertfrom jis0201 \xA1
-} \uFF61
+} 。
test encoding-11.3 {LoadEncodingFile: double-byte} {
encoding convertfrom jis0208 8C
-} \u4E4E
+} 乎
test encoding-11.4 {LoadEncodingFile: multi-byte} {
encoding convertfrom shiftjis \x8C\xC1
-} \u4E4E
+} 乎
test encoding-11.5 {LoadEncodingFile: escape file} {
- viewable [encoding convertto iso2022 \u4E4E]
+ viewable [encoding convertto iso2022 乎]
} [viewable "\x1B\$B8C\x1B(B"]
test encoding-11.5.1 {LoadEncodingFile: escape file} {
- viewable [encoding convertto iso2022-jp \u4E4E]
+ viewable [encoding convertto iso2022-jp 乎]
} [viewable "\x1B\$B8C\x1B(B"]
test encoding-11.6 {LoadEncodingFile: invalid file} -constraints {testencoding} -setup {
set system [encoding system]
@@ -273,7 +274,7 @@ test encoding-11.6 {LoadEncodingFile: invalid file} -constraints {testencoding}
fconfigure $f -translation binary
puts $f "abcdefghijklmnop"
close $f
- encoding convertto splat \u4E4E
+ encoding convertto splat 乎
} -returnCodes error -cleanup {
file delete [file join [temporaryDirectory] tmp encoding splat.enc]
removeDirectory [file join tmp encoding]
@@ -282,45 +283,49 @@ test encoding-11.6 {LoadEncodingFile: invalid file} -constraints {testencoding}
encoding dirs $path
encoding system $system
} -result {invalid encoding file "splat"}
-
+test encoding-11.8 {encoding: extended Unicode UTF-16} {
+ viewable [encoding convertto utf-16le 😹]
+} {=Ø9Þ (=\u00D89\u00DE)}
+test encoding-11.9 {encoding: extended Unicode UTF-16} {
+ viewable [encoding convertto utf-16be 😹]
+} {Ø=Þ9 (\u00D8=\u00DE9)}
+test encoding-11.10 {encoding: extended Unicode UTF-32} {
+ viewable [encoding convertto utf-32le 😹]
+} "9\xF6\x01\x00 (9\\u00F6\\u0001\\u0000)"
+test encoding-11.11 {encoding: extended Unicode UTF-32} {
+ viewable [encoding convertto utf-32be 😹]
+} "\x00\x01\xF69 (\\u0000\\u0001\\u00F69)"
# OpenEncodingFile is fully tested by the rest of the tests in this file.
test encoding-12.1 {LoadTableEncoding: normal encoding} {
- set x [encoding convertto iso8859-3 \u0120]
- append x [encoding convertto iso8859-3 \xD5]
- append x [encoding convertfrom iso8859-3 \xD5]
-} \xD5?\u0120
+ set x [encoding convertto iso8859-3 Ġ]
+ append x [encoding convertto -nocomplain iso8859-3 Õ]
+ append x [encoding convertfrom iso8859-3 Õ]
+} "Õ?Ġ"
test encoding-12.2 {LoadTableEncoding: single-byte encoding} {
- set x [encoding convertto iso8859-3 ab\u0120g]
- append x [encoding convertfrom iso8859-3 ab\xD5g]
-} ab\xD5gab\u0120g
+ set x [encoding convertto iso8859-3 abĠg]
+ append x [encoding convertfrom iso8859-3 abÕg]
+} "abÕgabĠg"
test encoding-12.3 {LoadTableEncoding: multi-byte encoding} {
- set x [encoding convertto shiftjis ab\u4E4Eg]
+ set x [encoding convertto shiftjis ab乎g]
append x [encoding convertfrom shiftjis ab\x8C\xC1g]
-} ab\x8C\xC1gab\u4E4Eg
+} "ab\x8C\xC1gab乎g"
test encoding-12.4 {LoadTableEncoding: double-byte encoding} {
- set x [encoding convertto jis0208 \u4E4E\u03B1]
+ set x [encoding convertto jis0208 乎α]
append x [encoding convertfrom jis0208 8C&A]
-} 8C&A\u4E4E\u03B1
+} "8C&A乎α"
test encoding-12.5 {LoadTableEncoding: symbol encoding} {
- set x [encoding convertto symbol \u03B3]
+ set x [encoding convertto symbol γ]
append x [encoding convertto symbol g]
append x [encoding convertfrom symbol g]
-} gg\u03B3
-test encoding-12.6 {LoadTableEncoding: overflow in char value} ucs2 {
- encoding convertto iso8859-3 \U010000
-} ?
+} "ggγ"
test encoding-13.1 {LoadEscapeTable} {
- viewable [set x [encoding convertto iso2022 ab\u4E4E\u68D9g]]
+ viewable [set x [encoding convertto iso2022 ab乎棙g]]
} [viewable "ab\x1B\$B8C\x1B\$\(DD%\x1B(Bg"]
-test encoding-14.1 {BinaryProc} {
- encoding convertto identity \x12\x34\x56\xff\x69
-} "\x12\x34\x56\xc3\xbf\x69"
-
test encoding-15.1 {UtfToUtfProc} {
- encoding convertto utf-8 \xA3
+ encoding convertto utf-8 £
} "\xC2\xA3"
test encoding-15.2 {UtfToUtfProc null character output} testbytestring {
binary scan [testbytestring [encoding convertto utf-8 \x00]] H* z
@@ -335,113 +340,315 @@ test encoding-15.4 {UtfToUtfProc emoji character input} -body {
set x \xED\xA0\xBD\xED\xB8\x82
set y [encoding convertfrom utf-8 \xED\xA0\xBD\xED\xB8\x82]
list [string length $x] $y
-} -result "6 \uD83D\uDE02"
+} -result "6 😂"
test encoding-15.5 {UtfToUtfProc emoji character input} {
set x \xF0\x9F\x98\x82
set y [encoding convertfrom utf-8 \xF0\x9F\x98\x82]
list [string length $x] $y
-} "4 \uD83D\uDE02"
+} "4 😂"
test encoding-15.6 {UtfToUtfProc emoji character output} {
set x \uDE02\uD83D\uDE02\uD83D
- set y [encoding convertto utf-8 \uDE02\uD83D\uDE02\uD83D]
+ set y [encoding convertto -nocomplain utf-8 \uDE02\uD83D\uDE02\uD83D]
binary scan $y H* z
list [string length $y] $z
} {10 edb882f09f9882eda0bd}
test encoding-15.7 {UtfToUtfProc emoji character output} {
set x \uDE02\uD83D\uD83D
- set y [encoding convertto utf-8 \uDE02\uD83D\uD83D]
+ set y [encoding convertto -nocomplain utf-8 \uDE02\uD83D\uD83D]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {3 9 edb882eda0bdeda0bd}
test encoding-15.8 {UtfToUtfProc emoji character output} {
- set x \uDE02\uD83D\xE9
- set y [encoding convertto utf-8 \uDE02\uD83D\xE9]
+ set x \uDE02\uD83Dé
+ set y [encoding convertto -nocomplain utf-8 \uDE02\uD83Dé]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {3 8 edb882eda0bdc3a9}
test encoding-15.9 {UtfToUtfProc emoji character output} {
set x \uDE02\uD83DX
- set y [encoding convertto utf-8 \uDE02\uD83DX]
+ set y [encoding convertto -nocomplain utf-8 \uDE02\uD83DX]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {3 7 edb882eda0bd58}
test encoding-15.10 {UtfToUtfProc high surrogate character output} {
- set x \uDE02\xE9
- set y [encoding convertto utf-8 \uDE02\xE9]
+ set x \uDE02é
+ set y [encoding convertto -nocomplain utf-8 \uDE02é]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {2 5 edb882c3a9}
test encoding-15.11 {UtfToUtfProc low surrogate character output} {
- set x \uDA02\xE9
- set y [encoding convertto utf-8 \uDA02\xE9]
+ set x \uDA02é
+ set y [encoding convertto -nocomplain utf-8 \uDA02é]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {2 5 eda882c3a9}
test encoding-15.12 {UtfToUtfProc high surrogate character output} {
set x \uDE02Y
- set y [encoding convertto utf-8 \uDE02Y]
+ set y [encoding convertto -nocomplain utf-8 \uDE02Y]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {2 4 edb88259}
test encoding-15.13 {UtfToUtfProc low surrogate character output} {
set x \uDA02Y
- set y [encoding convertto utf-8 \uDA02Y]
+ set y [encoding convertto -nocomplain utf-8 \uDA02Y]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {2 4 eda88259}
test encoding-15.14 {UtfToUtfProc high surrogate character output} {
set x \uDE02
- set y [encoding convertto utf-8 \uDE02]
+ set y [encoding convertto -nocomplain utf-8 \uDE02]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {1 3 edb882}
test encoding-15.15 {UtfToUtfProc low surrogate character output} {
set x \uDA02
- set y [encoding convertto utf-8 \uDA02]
+ set y [encoding convertto -nocomplain utf-8 \uDA02]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {1 3 eda882}
test encoding-15.16 {UtfToUtfProc: Invalid 4-byte UTF-8, see [ed29806ba]} {
set x \xF0\xA0\xA1\xC2
- set y [encoding convertfrom utf-8 \xF0\xA0\xA1\xC2]
+ set y [encoding convertfrom -nocomplain utf-8 \xF0\xA0\xA1\xC2]
list [string length $x] $y
} "4 \xF0\xA0\xA1\xC2"
+test encoding-15.17 {UtfToUtfProc emoji character output} {
+ set x 😂
+ set y [encoding convertto utf-8 😂]
+ binary scan $y H* z
+ list [string length $y] $z
+} {4 f09f9882}
+test encoding-15.18 {UtfToUtfProc CESU-8 6-byte sequence} {
+ set y [encoding convertto cesu-8 \U10000]
+ binary scan $y H* z
+ list [string length $y] $z
+} {6 eda080edb080}
+test encoding-15.19 {UtfToUtfProc CESU-8 upper surrogate} {
+ set y [encoding convertto cesu-8 \uD800]
+ binary scan $y H* z
+ list [string length $y] $z
+} {3 eda080}
+test encoding-15.20 {UtfToUtfProc CESU-8 lower surrogate} {
+ set y [encoding convertto cesu-8 \uDC00]
+ binary scan $y H* z
+ list [string length $y] $z
+} {3 edb080}
+test encoding-15.21 {UtfToUtfProc CESU-8 noncharacter} {
+ set y [encoding convertto cesu-8 \uFFFF]
+ binary scan $y H* z
+ list [string length $y] $z
+} {3 efbfbf}
+test encoding-15.22 {UtfToUtfProc CESU-8 bug [048dd20b4171c8da]} {
+ set y [encoding convertto cesu-8 \x80]
+ binary scan $y H* z
+ list [string length $y] $z
+} {2 c280}
+test encoding-15.23 {UtfToUtfProc CESU-8 bug [048dd20b4171c8da]} {
+ set y [encoding convertto cesu-8 \u100]
+ binary scan $y H* z
+ list [string length $y] $z
+} {2 c480}
+test encoding-15.24 {UtfToUtfProc CESU-8 bug [048dd20b4171c8da]} {
+ set y [encoding convertto cesu-8 \u3FF]
+ binary scan $y H* z
+ list [string length $y] $z
+} {2 cfbf}
+test encoding-15.25 {UtfToUtfProc CESU-8} {
+ encoding convertfrom cesu-8 \x00
+} \x00
+test encoding-15.26 {UtfToUtfProc CESU-8} {
+ encoding convertfrom cesu-8 \xC0\x80
+} \x00
+test encoding-15.27 {UtfToUtfProc -strict CESU-8} {
+ encoding convertfrom -strict cesu-8 \x00
+} \x00
+test encoding-15.28 {UtfToUtfProc -strict CESU-8} -body {
+ encoding convertfrom -strict cesu-8 \xC0\x80
+} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'}
+test encoding-15.29 {UtfToUtfProc CESU-8} {
+ encoding convertto cesu-8 \x00
+} \x00
+test encoding-15.30 {UtfToUtfProc -strict CESU-8} {
+ encoding convertto -strict cesu-8 \x00
+} \x00
+test encoding-15.31 {UtfToUtfProc -strict CESU-8 (bytes F0-F4 are invalid)} -body {
+ encoding convertfrom -strict cesu-8 \xF1\x86\x83\x9C
+} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xF1'}
-test encoding-16.1 {UnicodeToUtfProc} -body {
- set val [encoding convertfrom unicode NN]
- list $val [format %X [scan $val %c]]
-} -result "\u4E4E 4E4E"
-test encoding-16.2 {UnicodeToUtfProc} -constraints fullutf -body {
- set val [encoding convertfrom unicode "\xD8\xD8\xDC\xDC"]
+test encoding-16.1 {Utf16ToUtfProc} -body {
+ set val [encoding convertfrom utf-16 NN]
+ list $val [format %x [scan $val %c]]
+} -result "乎 4e4e"
+test encoding-16.2 {Utf16ToUtfProc} -body {
+ set val [encoding convertfrom utf-16 "\xD8\xD8\xDC\xDC"]
+ list $val [format %x [scan $val %c]]
+} -result "\U460DC 460dc"
+test encoding-16.3 {Utf16ToUtfProc} -body {
+ set val [encoding convertfrom utf-16 "\xDC\xDC"]
+ list $val [format %x [scan $val %c]]
+} -result "\uDCDC dcdc"
+test encoding-16.4 {Ucs2ToUtfProc} -body {
+ set val [encoding convertfrom ucs-2 NN]
+ list $val [format %x [scan $val %c]]
+} -result "乎 4e4e"
+test encoding-16.5 {Ucs2ToUtfProc} -body {
+ set val [encoding convertfrom ucs-2 "\xD8\xD8\xDC\xDC"]
list $val [format %x [scan $val %c]]
} -result "\U460DC 460dc"
-test encoding-16.3 {UnicodeToUtfProc} -body {
- set val [encoding convertfrom unicode "\xDC\xDC"]
- list $val [format %X [scan $val %c]]
-} -result "\uDCDC DCDC"
-test encoding-16.4 {UnicodeToUtfProc, bug [d19fe0a5b]} -body {
- encoding convertfrom unicode "\x41\x41\x41"
+test encoding-16.6 {Utf32ToUtfProc} -body {
+ set val [encoding convertfrom utf-32le NN\0\0]
+ list $val [format %x [scan $val %c]]
+} -result "乎 4e4e"
+test encoding-16.7 {Utf32ToUtfProc} -body {
+ set val [encoding convertfrom utf-32be \0\0NN]
+ list $val [format %x [scan $val %c]]
+} -result "乎 4e4e"
+test encoding-16.8 {Utf32ToUtfProc} -body {
+ set val [encoding convertfrom -nocomplain utf-32 \x41\x00\x00\x41]
+ list $val [format %x [scan $val %c]]
+} -result "\uFFFD fffd"
+test encoding-16.9 {Utf32ToUtfProc} -body {
+ encoding convertfrom utf-32le \x00\xD8\x00\x00
+} -result \uD800
+test encoding-16.10 {Utf32ToUtfProc} -body {
+ encoding convertfrom utf-32le \x00\xDC\x00\x00
+} -result \uDC00
+test encoding-16.11 {Utf32ToUtfProc} -body {
+ encoding convertfrom utf-32le \x00\xD8\x00\x00\x00\xDC\x00\x00
+} -result \uD800\uDC00
+test encoding-16.12 {Utf32ToUtfProc} -body {
+ encoding convertfrom utf-32le \x00\xDC\x00\x00\x00\xD8\x00\x00
+} -result \uDC00\uD800
+test encoding-16.13 {Utf16ToUtfProc} -body {
+ encoding convertfrom utf-16le \x00\xD8
+} -result \uD800
+test encoding-16.14 {Utf16ToUtfProc} -body {
+ encoding convertfrom utf-16le \x00\xDC
+} -result \uDC00
+test encoding-16.15 {Utf16ToUtfProc} -body {
+ encoding convertfrom utf-16le \x00\xD8\x00\xDC
+} -result \uD800\uDC00
+test encoding-16.16 {Utf16ToUtfProc} -body {
+ encoding convertfrom utf-16le \x00\xDC\x00\xD8
+} -result \uDC00\uD800
+test encoding-16.17 {Utf32ToUtfProc} -body {
+ list [encoding convertfrom -strict -failindex idx utf-32le \x41\x00\x00\x00\x00\xD8\x00\x00\x42\x00\x00\x00] [set idx]
+} -result {A 4}
+
+test encoding-16.18 {
+ Utf16ToUtfProc, Tcl_UniCharToUtf, surrogate pairs in utf-16
+} -body {
+ apply [list {} {
+ for {set i 0xD800} {$i < 0xDBFF} {incr i} {
+ for {set j 0xDC00} {$j < 0xDFFF} {incr j} {
+ set string [binary format S2 [list $i $j]]
+ set status [catch {
+ set decoded [encoding convertfrom utf-16be $string]
+ set encoded [encoding convertto utf-16be $decoded]
+ }]
+ if {$status || ( $encoded ne $string )} {
+ return [list [format %x $i] [format %x $j]]
+ }
+ }
+ }
+ return done
+ } [namespace current]]
+} -result done
+test encoding-16.19 {Utf16ToUtfProc, bug [d19fe0a5b]} -body {
+ encoding convertfrom utf-16 "\x41\x41\x41"
} -result \u4141\uFFFD
-test encoding-16.5 {UnicodeToUtfProc, bug [d19fe0a5b]} -constraints ucs2 -body {
- encoding convertfrom unicode "\xD8\xD8"
+test encoding-16.20 {Utf16ToUtfProc, bug [d19fe0a5b]} -constraints deprecated -body {
+ encoding convertfrom utf-16 "\xD8\xD8"
} -result \uD8D8
+test encoding-16.21 {Utf16ToUtfProc, bug [d19fe0a5b]} -body {
+ encoding convertfrom utf-32 "\x00\x00\x00\x00\x41\x41"
+} -result \x00\uFFFD
+test encoding-16.22 {Utf16ToUtfProc, strict, bug [db7a085bd9]} -body {
+ encoding convertfrom -strict utf-16le \x00\xD8
+} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\x00'}
+test encoding-16.23 {Utf16ToUtfProc, strict, bug [db7a085bd9]} -body {
+ encoding convertfrom -strict utf-16le \x00\xDC
+} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\x00'}
+test encoding-16.24 {Utf32ToUtfProc} -body {
+ encoding convertfrom utf-32 "\xFF\xFF\xFF\xFF"
+} -result \uFFFD
+test encoding-16.25 {Utf32ToUtfProc} -body {
+ encoding convertfrom utf-32 "\x01\x00\x00\x01"
+} -result \uFFFD
-test encoding-17.1 {UtfToUnicodeProc} -constraints fullutf -body {
- encoding convertto unicode "\U460DC"
+test encoding-17.1 {UtfToUtf16Proc} -body {
+ encoding convertto utf-16 "\U460DC"
} -result "\xD8\xD8\xDC\xDC"
-test encoding-17.2 {UtfToUnicodeProc} -body {
- encoding convertto unicode "\uDCDC"
+test encoding-17.2 {UtfToUcs2Proc, invalid testcase, see [5607d6482c]} -constraints deprecated -body {
+ encoding convertfrom utf-16 [encoding convertto ucs-2 "\U460DC"]
+} -result "\uFFFD"
+test encoding-17.3 {UtfToUtf16Proc} -body {
+ encoding convertto -nocomplain utf-16be "\uDCDC"
} -result "\xDC\xDC"
-test encoding-17.3 {UtfToUnicodeProc} -body {
- encoding convertto unicode "\uD8D8"
+test encoding-17.4 {UtfToUtf16Proc} -body {
+ encoding convertto -nocomplain utf-16le "\uD8D8"
} -result "\xD8\xD8"
+test encoding-17.5 {UtfToUtf16Proc} -body {
+ encoding convertto utf-32le "\U460DC"
+} -result "\xDC\x60\x04\x00"
+test encoding-17.6 {UtfToUtf16Proc} -body {
+ encoding convertto utf-32be "\U460DC"
+} -result "\x00\x04\x60\xDC"
+test encoding-17.7 {UtfToUtf16Proc} -body {
+ encoding convertto -strict utf-16be "\uDCDC"
+} -returnCodes error -result {unexpected character at index 0: 'U+00DCDC'}
+test encoding-17.8 {UtfToUtf16Proc} -body {
+ encoding convertto -strict utf-16le "\uD8D8"
+} -returnCodes error -result {unexpected character at index 0: 'U+00D8D8'}
+test encoding-17.9 {Utf32ToUtfProc} -body {
+ encoding convertfrom -strict utf-32 "\xFF\xFF\xFF\xFF"
+} -returnCodes error -result {unexpected byte sequence starting at index 0: '\xFF'}
+test encoding-17.10 {Utf32ToUtfProc} -body {
+ encoding convertfrom -nocomplain utf-32 "\xFF\xFF\xFF\xFF"
+} -result \uFFFD
+test encoding-17.11 {Utf32ToUtfProc} -body {
+ encoding convertfrom -strict utf-32le "\x00\xD8\x00\x00"
+} -returnCodes error -result {unexpected byte sequence starting at index 0: '\x00'}
+test encoding-17.12 {Utf32ToUtfProc} -body {
+ encoding convertfrom -strict utf-32le "\x00\xDC\x00\x00"
+} -returnCodes error -result {unexpected byte sequence starting at index 0: '\x00'}
-test encoding-18.1 {TableToUtfProc on invalid input} -body {
+test encoding-18.1 {TableToUtfProc on invalid input} -constraints deprecated -body {
list [catch {encoding convertto jis0208 \\} res] $res
} -result {0 !)}
+test encoding-18.2 {TableToUtfProc on invalid input with -strict} -body {
+ list [catch {encoding convertto -strict jis0208 \\} res] $res
+} -result {1 {unexpected character at index 0: 'U+00005C'}}
+test encoding-18.3 {TableToUtfProc on invalid input with -strict -failindex} -body {
+ list [catch {encoding convertto -strict -failindex pos jis0208 \\} res] $res $pos
+} -result {0 {} 0}
+test encoding-18.4 {TableToUtfProc on invalid input with -failindex -strict} -body {
+ list [catch {encoding convertto -failindex pos -strict jis0208 \\} res] $res $pos
+} -result {0 {} 0}
+test encoding-18.5 {TableToUtfProc on invalid input with -failindex} -body {
+ list [catch {encoding convertto -failindex pos jis0208 \\} res] $res $pos
+} -result {0 {} 0}
+test encoding-18.6 {TableToUtfProc on invalid input with -nocomplain} -body {
+ list [catch {encoding convertto -nocomplain jis0208 \\} res] $res
+} -result {0 !)}
-test encoding-19.1 {TableFromUtfProc} {
-} {}
+test encoding-19.1 {TableFromUtfProc} -body {
+ encoding convertfrom ascii AÁ
+} -result AÁ
+test encoding-19.2 {TableFromUtfProc} -body {
+ encoding convertfrom -nocomplain ascii AÁ
+} -result AÁ
+test encoding-19.3 {TableFromUtfProc} -body {
+ encoding convertfrom -strict ascii AÁ
+} -returnCodes 1 -result {unexpected byte sequence starting at index 1: '\xC1'}
+test encoding-19.4 {TableFromUtfProc} -body {
+ list [encoding convertfrom -failindex idx ascii AÁ] [set idx]
+} -result {A 1}
+test encoding-19.5 {TableFromUtfProc} -body {
+ list [encoding convertfrom -failindex idx -strict ascii AÁ] [set idx]
+} -result {A 1}
+test encoding-19.6 {TableFromUtfProc} -body {
+ list [encoding convertfrom -failindex idx -strict ascii AÁB] [set idx]
+} -result {A 1}
test encoding-20.1 {TableFreefProc} {
} {}
@@ -459,11 +666,11 @@ casino_japanese@___.com \x1B\$B!K\$^\$G\$4=;=jJQ99:Q\$NO\"Mm\$r\$\$\$?\$@\$1\$J\
\x1B\$B\$7\$g\$&\$+!)\x1B(B"
set iso2022uniData [encoding convertfrom iso2022-jp $iso2022encData]
-set iso2022uniData2 "\u79c1\u3069\u3082\u3067\u306f\u3001\u30c1\u30c3\u30d7\u3054\u8cfc\u5165\u6642\u306b\u3054\u767b\u9332\u3044\u305f\u3060\u3044\u305f\u3054\u4f4f\u6240\u3092\u30ad\u30e3\u30c3\u30b7\u30e5\u30a2\u30a6\u30c8\u306e\u969b\u306e
-\u5c0f\u5207\u624b\u9001\u4ed8\u5148\u3068\u3057\u3066\u4f7f\u7528\u3057\u3066\u304a\u308a\u307e\u3059\u3002\u6050\u308c\u5165\u308a\u307e\u3059\u304c\u3001\u6b63\u3057\u3044\u4f4f\u6240\u3092\u3054\u767b\u9332\u3057\u306a\u304a
-\u304a\u9858\u3044\u3044\u305f\u3057\u307e\u3059\u3002\u307e\u305f\u3001\u5927\u5909\u6050\u7e2e\u3067\u3059\u304c\u3001\u4f4f\u6240\u5909\u66f4\u306e\u3042\u3068\u3001\u65e5\u672c\u8a9e\u30b5\u30fc\u30d3\u30b9\u90e8\uff08
-\u0063\u0061\u0073\u0069\u006e\u006f\u005f\u006a\u0061\u0070\u0061\u006e\u0065\u0073\u0065\u0040\u005f\u005f\u005f\u002e\u0063\u006f\u006d\u0020\uff09\u307e\u3067\u3054\u4f4f\u6240\u5909\u66f4\u6e08\u306e\u9023\u7d61\u3092\u3044\u305f\u3060\u3051\u306a\u3044\u3067
-\u3057\u3087\u3046\u304b\uff1f"
+set iso2022uniData2 "私どもでは、チップご購入時にご登録いただいたご住所をキャッシュアウトの際の
+小切手送付先として使用しております。恐れ入りますが、正しい住所をご登録しなお
+お願いいたします。また、大変恐縮ですが、住所変更のあと、日本語サービス部(
+casino_japanese@___.com )までご住所変更済の連絡をいただけないで
+しょうか?"
cd [temporaryDirectory]
set fid [open iso2022.txt w]
@@ -528,7 +735,7 @@ test encoding-24.2 {EscapeFreeProc on open channels} {exec} {
viewable [runInSubprocess {
encoding system cp1252; # Bug #2891556 crash revelator
fconfigure stdout -encoding iso2022-jp
- puts ab\u4E4E\u68D9g
+ puts ab乎棙g
set env(TCL_FINALIZE_ON_EXIT) 1
exit
}]
@@ -538,7 +745,7 @@ test encoding-24.3 {EscapeFreeProc on open channels} {stdio} {
# closure, we go boom
set file [makeFile {
encoding system iso2022-jp
- set a \u4E4E\u4E5E\u4E5F; # 3 Japanese Kanji letters
+ set a "乎乞也"; # 3 Japanese Kanji letters
puts $a
} iso2022.tcl]
set f [open "|[list [interpreter] $file]"]
@@ -547,32 +754,134 @@ test encoding-24.3 {EscapeFreeProc on open channels} {stdio} {
close $f
removeFile iso2022.tcl
list $count [viewable $line]
-} [list 3 "\u4E4E\u4E5E\u4E5F (\\u4E4E\\u4E5E\\u4E5F)"]
+} [list 3 "乎乞也 (\\u4E4E\\u4E5E\\u4E5F)"]
test encoding-24.4 {Parse valid or invalid utf-8} {
string length [encoding convertfrom utf-8 "\xC0\x80"]
} 1
test encoding-24.5 {Parse valid or invalid utf-8} {
- string length [encoding convertfrom utf-8 "\xC0\x81"]
+ string length [encoding convertfrom -nocomplain utf-8 "\xC0\x81"]
} 2
test encoding-24.6 {Parse valid or invalid utf-8} {
- string length [encoding convertfrom utf-8 "\xC1\xBF"]
+ string length [encoding convertfrom -nocomplain utf-8 "\xC1\xBF"]
} 2
test encoding-24.7 {Parse valid or invalid utf-8} {
string length [encoding convertfrom utf-8 "\xC2\x80"]
} 1
test encoding-24.8 {Parse valid or invalid utf-8} {
- string length [encoding convertfrom utf-8 "\xE0\x80\x80"]
+ string length [encoding convertfrom -nocomplain utf-8 "\xE0\x80\x80"]
} 3
test encoding-24.9 {Parse valid or invalid utf-8} {
- string length [encoding convertfrom utf-8 "\xE0\x9F\xBF"]
+ string length [encoding convertfrom -nocomplain utf-8 "\xE0\x9F\xBF"]
} 3
test encoding-24.10 {Parse valid or invalid utf-8} {
string length [encoding convertfrom utf-8 "\xE0\xA0\x80"]
} 1
test encoding-24.11 {Parse valid or invalid utf-8} {
- string length [encoding convertfrom utf-8 "\xEF\xBF\xBF"]
+ string length [encoding convertfrom -nocomplain utf-8 "\xEF\xBF\xBF"]
+} 1
+test encoding-24.12 {Parse valid or invalid utf-8} -constraints deprecated -body {
+ encoding convertfrom utf-8 "\xC0\x81"
+} -result \xC0\x81
+test encoding-24.13 {Parse valid or invalid utf-8} -constraints deprecated -body {
+ encoding convertfrom utf-8 "\xC1\xBF"
+} -result \xC1\xBF
+test encoding-24.14 {Parse valid or invalid utf-8} {
+ string length [encoding convertfrom utf-8 "\xC2\x80"]
+} 1
+test encoding-24.15 {Parse valid or invalid utf-8} -constraints deprecated -body {
+ encoding convertfrom utf-8 "Z\xE0\x80"
+} -result Z\xE0\u20AC
+test encoding-24.16 {Parse valid or invalid utf-8} -constraints testbytestring -body {
+ encoding convertto utf-8 [testbytestring "Z\u4343\x80"]
+} -returnCodes 1 -result {expected byte sequence but character 1 was '䍃€' (U+004343)}
+test encoding-24.17 {Parse valid or invalid utf-8} -constraints testbytestring -body {
+ encoding convertto utf-8 [testbytestring "Z\xE0\x80"]
+} -result "Z\xC3\xA0\xE2\x82\xAC"
+test encoding-24.18 {Parse valid or invalid utf-8} -constraints testbytestring -body {
+ encoding convertto utf-8 [testbytestring "Z\xE0\x80xxxxxx"]
+} -result "Z\xC3\xA0\xE2\x82\xACxxxxxx"
+test encoding-24.19 {Parse valid or invalid utf-8} -constraints deprecated -body {
+ encoding convertto utf-8 "ZX\uD800"
+} -result ZX\xED\xA0\x80
+test encoding-24.20 {Parse with -nocomplain but without providing encoding} {
+ string length [encoding convertfrom -nocomplain "\x20"]
+} 1
+test encoding-24.21 {Parse with -nocomplain but without providing encoding} {
+ string length [encoding convertto -nocomplain "\x20"]
} 1
+test encoding-24.22 {Syntax error, two encodings} -body {
+ encoding convertfrom iso8859-1 utf-8 "ZX\uD800"
+} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-strict? ?-failindex var? ?encoding? data" or "::tcl::encoding::convertfrom -nocomplain ?encoding? data"}
+test encoding-24.23 {Syntax error, two encodings} -body {
+ encoding convertto iso8859-1 utf-8 "ZX\uD800"
+} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-strict? ?-failindex var? ?encoding? data" or "::tcl::encoding::convertto -nocomplain ?encoding? data"}
+test encoding-24.24 {Parse invalid utf-8 with -strict} -body {
+ encoding convertfrom -strict utf-8 "\xC0\x80\x00\x00"
+} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'}
+test encoding-24.25 {Parse invalid utf-8 with -strict} -body {
+ encoding convertfrom -strict utf-8 "\x40\x80\x00\x00"
+} -returnCodes 1 -result {unexpected byte sequence starting at index 1: '\x80'}
+test encoding-24.26 {Parse valid utf-8 with -strict} -body {
+ encoding convertfrom -strict utf-8 "\xF1\x80\x80\x80"
+} -result \U40000
+test encoding-24.27 {Parse invalid utf-8 with -strict} -body {
+ encoding convertfrom -strict utf-8 "\xF0\x80\x80\x80"
+} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xF0'}
+test encoding-24.28 {Parse invalid utf-8 with -strict} -body {
+ encoding convertfrom -strict utf-8 "\xFF\x00\x00"
+} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xFF'}
+test encoding-24.29 {Parse invalid utf-8} -body {
+ encoding convertfrom utf-8 \xEF\xBF\xBF
+} -result \uFFFF
+test encoding-24.30 {Parse noncharacter with -strict} -body {
+ encoding convertfrom -strict utf-8 \xEF\xBF\xBF
+} -result \uFFFF
+test encoding-24.31 {Parse invalid utf-8 with -nocomplain} -body {
+ encoding convertfrom -nocomplain utf-8 \xEF\xBF\xBF
+} -result \uFFFF
+test encoding-24.32 {Try to generate invalid utf-8} -body {
+ encoding convertto utf-8 \uFFFF
+} -result \xEF\xBF\xBF
+test encoding-24.33 {Try to generate noncharacter with -strict} -body {
+ encoding convertto -strict utf-8 \uFFFF
+} -result \xEF\xBF\xBF
+test encoding-24.34 {Try to generate invalid utf-8 with -nocomplain} -body {
+ encoding convertto -nocomplain utf-8 \uFFFF
+} -result \xEF\xBF\xBF
+test encoding-24.35 {Parse invalid utf-8} -constraints deprecated -body {
+ encoding convertfrom utf-8 \xED\xA0\x80
+} -result \uD800
+test encoding-24.36 {Parse invalid utf-8 with -strict} -body {
+ encoding convertfrom -strict utf-8 \xED\xA0\x80
+} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xED'}
+test encoding-24.37 {Parse invalid utf-8 with -nocomplain} -body {
+ encoding convertfrom -nocomplain utf-8 \xED\xA0\x80
+} -result \uD800
+test encoding-24.38 {Try to generate invalid utf-8} -constraints deprecated -body {
+ encoding convertto utf-8 \uD800
+} -result \xED\xA0\x80
+test encoding-24.39 {Try to generate invalid utf-8 with -strict} -body {
+ encoding convertto -strict utf-8 \uD800
+} -returnCodes 1 -result {unexpected character at index 0: 'U+00D800'}
+test encoding-24.40 {Try to generate invalid utf-8 with -nocomplain} -body {
+ encoding convertto -nocomplain utf-8 \uD800
+} -result \xED\xA0\x80
+test encoding-24.41 {Parse invalid utf-8 with -strict} -body {
+ encoding convertfrom -strict utf-8 \xED\xA0\x80\xED\xB0\x80
+} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xED'}
+test encoding-24.42 {Parse invalid utf-8, fallback to cp1252 [885c86a9a0]} -body {
+ encoding convertfrom -nocomplain utf-8 \xF0\x80\x80\x80
+} -result \xF0\u20AC\u20AC\u20AC
+test encoding-24.43 {Parse invalid utf-8, fallback to cp1252 [885c86a9a0]} -body {
+ encoding convertfrom -nocomplain utf-8 \x80
+} -result \u20AC
+test encoding-24.44 {Try to generate invalid ucs-2 with -strict} -body {
+ encoding convertto -strict ucs-2 \uD800
+} -returnCodes 1 -result {unexpected character at index 0: 'U+00D800'}
+test encoding-24.45 {Try to generate invalid ucs-2 with -strict} -body {
+ encoding convertto -strict ucs-2 \U10000
+} -returnCodes 1 -result {unexpected character at index 0: 'U+010000'}
file delete [file join [temporaryDirectory] iso2022.txt]
@@ -696,15 +1005,15 @@ foreach from {cp932 shiftjis euc-jp iso2022-jp} {
}
}
-test encoding-26.0 {Tcl_GetDefaultEncodingDir} -constraints {
- testgetdefenc
+test encoding-26.0 {Tcl_GetEncodingSearchPath} -constraints {
+ testgetencpath
} -setup {
- set origDir [testgetdefenc]
- testsetdefenc slappy
+ set origPath [testgetencpath]
+ testsetencpath slappy
} -body {
- testgetdefenc
+ testgetencpath
} -cleanup {
- testsetdefenc $origDir
+ testsetencpath $origPath
} -result slappy
file delete {*}[glob -directory [temporaryDirectory] *.chars *.tcltestout]
@@ -727,15 +1036,17 @@ test encoding-27.2 {encoding dirs basic behavior} -returnCodes error -body {
test encoding-28.0 {all encodings load} -body {
set string hello
foreach name [encoding names] {
- incr count
- encoding convertto $name $string
+ if {$name ne "unicode"} {
+ incr count
+ }
+ encoding convertto -nocomplain $name $string
# discard the cached internal representation of Tcl_Encoding
# Unfortunately, without this, encoding 2-1 fails.
llength $name
}
return $count
-} -result 83
+} -result 91
runtests
@@ -769,13 +1080,30 @@ test encoding-bug-183a1adcc0-4 {Bug [183a1adcc0] Buffer overflow Tcl_UtfToExtern
test encoding-bug-183a1adcc0-5 {Bug [183a1adcc0] Buffer overflow Tcl_UtfToExternal} -constraints {
testencoding
+} -constraints {
+ knownBug
} -body {
+ # The knownBug constraint is because test depends on TCL_UTF_MAX and
+ # also UtfToUtf16 assumes space required in destination buffer is
+ # sizeof(Tcl_UniChar) which is incorrect when TCL_UTF_MAX==4
# Note - buffers are initialized to \xff
list [catch {testencoding Tcl_UtfToExternal unicode A {start end} {} 4} result] $result
} -result [list 0 [list ok {} [expr {$::tcl_platform(byteOrder) eq "littleEndian" ? "\x41\x00" : "\x00\x41"}]\x00\x00]]
}
+test encoding-29.0 {get encoding nul terminator lengths} -constraints {
+ testencoding
+} -body {
+ list \
+ [testencoding nullength ascii] \
+ [testencoding nullength utf-16] \
+ [testencoding nullength utf-32] \
+ [testencoding nullength gb12345] \
+ [testencoding nullength ksc5601]
+} -result {1 2 4 2 2}
+
+
# cleanup
namespace delete ::tcl::test::encoding
::tcltest::cleanupTests