summaryrefslogtreecommitdiffstats
path: root/tests/encoding.test
diff options
context:
space:
mode:
Diffstat (limited to 'tests/encoding.test')
-rw-r--r--tests/encoding.test406
1 files changed, 347 insertions, 59 deletions
diff --git a/tests/encoding.test b/tests/encoding.test
index 21c53b0..aa50360 100644
--- a/tests/encoding.test
+++ b/tests/encoding.test
@@ -7,34 +7,36 @@
#
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
-#
-# RCS: @(#) $Id: encoding.test,v 1.8 2000/04/10 17:18:58 ericm Exp $
-if {[lsearch [namespace children] ::tcltest] == -1} {
- package require tcltest
- namespace import -force ::tcltest::*
-}
+package require tcltest 2
+
+namespace eval ::tcl::test::encoding {
+ variable x
+
+namespace import -force ::tcltest::*
proc toutf {args} {
- global x
+ variable x
lappend x "toutf $args"
}
proc fromutf {args} {
- global x
+ variable x
lappend x "fromutf $args"
}
-# Some tests require the testencoding command
+proc runtests {} {
-set ::tcltest::testConstraints(testencoding) \
- [expr {[info commands testencoding] != {}}]
+ variable x
+# Some tests require the testencoding command
+testConstraint testencoding [llength [info commands testencoding]]
+testConstraint exec [llength [info commands exec]]
# TclInitEncodingSubsystem is tested by the rest of this file
# TclFinalizeEncodingSubsystem is not currently tested
test encoding-1.1 {Tcl_GetEncoding: system encoding} {testencoding} {
- testencoding create foo toutf fromutf
+ testencoding create foo [namespace origin toutf] [namespace origin fromutf]
set old [encoding system]
encoding system foo
set x {}
@@ -44,7 +46,7 @@ test encoding-1.1 {Tcl_GetEncoding: system encoding} {testencoding} {
set x
} {{fromutf }}
test encoding-1.2 {Tcl_GetEncoding: existing encoding} {testencoding} {
- testencoding create foo toutf fromutf
+ testencoding create foo [namespace origin toutf] [namespace origin fromutf]
set x {}
encoding convertto foo abcd
testencoding delete foo
@@ -60,14 +62,15 @@ test encoding-2.1 {Tcl_FreeEncoding: refcount == 0} {
} {8C}
test encoding-2.2 {Tcl_FreeEncoding: refcount != 0} {testencoding} {
set system [encoding system]
- set path [testencoding path]
+ set path [encoding dirs]
encoding system shiftjis ;# incr ref count
- testencoding path [list [pwd]]
+ encoding dirs [list [pwd]]
set x [encoding convertto shiftjis \u4e4e] ;# old one found
encoding system identity
+ llength shiftjis
lappend x [catch {encoding convertto shiftjis \u4e4e} msg] $msg
encoding system identity
- testencoding path $path
+ encoding dirs $path
encoding system $system
set x
} "\u008c\u00c1 1 {unknown encoding \"shiftjis\"}"
@@ -88,26 +91,29 @@ test encoding-3.2 {Tcl_GetEncodingName, non-null} {
} {jis0208}
test encoding-4.1 {Tcl_GetEncodingNames} {testencoding} {
- file mkdir tmp/encoding
- close [open tmp/encoding/junk.enc w]
- close [open tmp/encoding/junk2.enc w]
- cd tmp
- set path [testencoding path]
- testencoding path {}
+ cd [makeDirectory tmp]
+ makeDirectory [file join tmp encoding]
+ makeFile {} [file join tmp encoding junk.enc]
+ makeFile {} [file join tmp encoding junk2.enc]
+ set path [encoding dirs]
+ encoding dirs {}
catch {unset encodings}
catch {unset x}
foreach encoding [encoding names] {
set encodings($encoding) 1
}
- testencoding path [list [pwd]]
+ encoding dirs [list [file join [pwd] encoding]]
foreach encoding [encoding names] {
if {![info exists encodings($encoding)]} {
lappend x $encoding
}
}
- testencoding path $path
- cd ..
- file delete -force tmp
+ encoding dirs $path
+ cd [workingDirectory]
+ removeFile [file join tmp encoding junk2.enc]
+ removeFile [file join tmp encoding junk.enc]
+ removeDirectory [file join tmp encoding]
+ removeDirectory tmp
lsort $x
} {junk junk2}
@@ -126,7 +132,8 @@ test encoding-5.2 {Tcl_SetSystemEncoding: test ref count} {
} {0}
test encoding-6.1 {Tcl_CreateEncoding: new} {testencoding} {
- testencoding create foo {toutf 1} {fromutf 2}
+ testencoding create foo [namespace code {toutf 1}] \
+ [namespace code {fromutf 2}]
set x {}
encoding convertfrom foo abcd
encoding convertto foo abcd
@@ -134,7 +141,8 @@ test encoding-6.1 {Tcl_CreateEncoding: new} {testencoding} {
set x
} {{toutf 1} {fromutf 2}}
test encoding-6.2 {Tcl_CreateEncoding: replace encoding} {testencoding} {
- testencoding create foo {toutf a} {fromutf b}
+ testencoding create foo [namespace code {toutf a}] \
+ [namespace code {fromutf b}]
set x {}
encoding convertfrom foo abcd
encoding convertto foo abcd
@@ -156,15 +164,15 @@ test encoding-7.2 {Tcl_UtfToExternalDString: big buffer} {
} "512 \u4e4e"
test encoding-8.1 {Tcl_ExternalToUtf} {
- set f [open dummy w]
+ set f [open [file join [temporaryDirectory] dummy] w]
fconfigure $f -translation binary -encoding iso8859-1
puts -nonewline $f "ab\x8c\xc1g"
close $f
- set f [open dummy r]
+ set f [open [file join [temporaryDirectory] dummy] r]
fconfigure $f -translation binary -encoding shiftjis
set x [read $f]
close $f
- file delete dummy
+ file delete [file join [temporaryDirectory] dummy]
set x
} "ab\u4e4eg"
@@ -184,25 +192,38 @@ test encoding-9.2 {Tcl_UtfToExternalDString: big buffer} {
} "1024 8C"
test encoding-10.1 {Tcl_UtfToExternal} {
- set f [open dummy w]
+ set f [open [file join [temporaryDirectory] dummy] w]
fconfigure $f -translation binary -encoding shiftjis
puts -nonewline $f "ab\u4e4eg"
close $f
- set f [open dummy r]
+ set f [open [file join [temporaryDirectory] dummy] r]
fconfigure $f -translation binary -encoding iso8859-1
set x [read $f]
close $f
- file delete dummy
+ file delete [file join [temporaryDirectory] dummy]
set x
} "ab\x8c\xc1g"
+proc viewable {str} {
+ set res ""
+ foreach c [split $str {}] {
+ if {[string is print $c] && [string is ascii $c]} {
+ append res $c
+ } else {
+ append res "\\u[format %4.4x [scan $c %c]]"
+ }
+ }
+ return "$str ($res)"
+}
+
test encoding-11.1 {LoadEncodingFile: unknown encoding} {testencoding} {
set system [encoding system]
- set path [testencoding path]
+ set path [encoding dirs]
encoding system iso8859-1
- testencoding path {}
+ encoding dirs {}
+ llength jis0208
set x [list [catch {encoding convertto jis0208 \u4e4e} msg] $msg]
- testencoding path $path
+ encoding dirs $path
encoding system $system
lappend x [encoding convertto jis0208 \u4e4e]
} {1 {unknown encoding "jis0208"} 8C}
@@ -216,22 +237,29 @@ test encoding-11.4 {LoadEncodingFile: multi-byte} {
encoding convertfrom shiftjis \x8c\xc1
} "\u4e4e"
test encoding-11.5 {LoadEncodingFile: escape file} {
- encoding convertto iso2022 \u4e4e
-} "\x1b(B\x1b$@8C"
+ viewable [encoding convertto iso2022 \u4e4e]
+} [viewable "\x1b\$B8C\x1b(B"]
+test encoding-11.5.1 {LoadEncodingFile: escape file} {
+ viewable [encoding convertto iso2022-jp \u4e4e]
+} [viewable "\x1b\$B8C\x1b(B"]
test encoding-11.6 {LoadEncodingFile: invalid file} {testencoding} {
set system [encoding system]
- set path [testencoding path]
+ set path [encoding dirs]
encoding system identity
- testencoding path tmp
- file mkdir tmp/encoding
- set f [open tmp/encoding/splat.enc w]
+ cd [temporaryDirectory]
+ encoding dirs [file join tmp encoding]
+ makeDirectory tmp
+ makeDirectory [file join tmp encoding]
+ set f [open [file join tmp encoding splat.enc] w]
fconfigure $f -translation binary
puts $f "abcdefghijklmnop"
close $f
set x [list [catch {encoding convertto splat \u4e4e} msg] $msg]
- file delete -force tmp
- catch {file delete encoding}
- testencoding path $path
+ file delete [file join [temporaryDirectory] tmp encoding splat.enc]
+ removeDirectory [file join tmp encoding]
+ removeDirectory tmp
+ cd [workingDirectory]
+ encoding dirs $path
encoding system $system
set x
} {1 {invalid encoding file "splat"}}
@@ -262,8 +290,8 @@ test encoding-12.5 {LoadTableEncoding: symbol encoding} {
} "\x67\x67\u3b3"
test encoding-13.1 {LoadEscapeTable} {
- set x [encoding convertto iso2022 ab\u4e4e\u68d9g]
-} "\x1b(Bab\x1b$@8C\x1b$\(DD%\x1b(Bg"
+ viewable [set x [encoding convertto iso2022 ab\u4e4e\u68d9g]]
+} [viewable "ab\x1b\$B8C\x1b\$\(DD%\x1b(Bg"]
test encoding-14.1 {BinaryProc} {
encoding convertto identity \x12\x34\x56\xff\x69
@@ -273,9 +301,25 @@ test encoding-15.1 {UtfToUtfProc} {
encoding convertto utf-8 \xa3
} "\xc2\xa3"
+test encoding-15.2 {UtfToUtfProc null character output} {
+ set x \u0000
+ set y [encoding convertto utf-8 \u0000]
+ set y [encoding convertfrom identity $y]
+ binary scan $y H* z
+ list [string bytelength $x] [string bytelength $y] $z
+} {2 1 00}
+
+test encoding-15.3 {UtfToUtfProc null character input} {
+ set x [encoding convertfrom identity \x00]
+ set y [encoding convertfrom utf-8 $x]
+ binary scan [encoding convertto identity $y] H* z
+ list [string bytelength $x] [string bytelength $y] $z
+} {1 2 c080}
+
test encoding-16.1 {UnicodeToUtfProc} {
- encoding convertfrom unicode NN
-} "\u4e4e"
+ set val [encoding convertfrom unicode NN]
+ list $val [format %x [scan $val %c]]
+} "\u4e4e 4e4e"
test encoding-17.1 {UtfToUnicodeProc} {
} {}
@@ -295,23 +339,267 @@ test encoding-21.1 {EscapeToUtfProc} {
test encoding-22.1 {EscapeFromUtfProc} {
} {}
-# EscapeFreeProc, GetTableEncoding, unilen
-# are fully tested by the rest of this file
-
-# cleanup
-::tcltest::cleanupTests
-return
-
-
-
+set iso2022encData "\u001b\$B;d\$I\$b\$G\$O!\"%A%C%W\$49XF~;~\$K\$4EPO?\$\$\$?\$@\$\$\$?\$4=;=j\$r%-%c%C%7%e%\"%&%H\$N:]\$N\u001b(B
+\u001b\$B>.@Z<jAwIU@h\$H\$7\$F;HMQ\$7\$F\$*\$j\$^\$9!#62\$lF~\$j\$^\$9\$,!\"@5\$7\$\$=;=j\$r\$4EPO?\$7\$J\$*\u001b(B
+\u001b\$B\$*4j\$\$\$\$\$?\$7\$^\$9!#\$^\$?!\"BgJQ62=L\$G\$9\$,!\"=;=jJQ99\$N\$\"\$H!\"F|K\\8l%5!<%S%9It!J\u001b(B
+casino_japanese@___.com \u001b\$B!K\$^\$G\$4=;=jJQ99:Q\$NO\"Mm\$r\$\$\$?\$@\$1\$J\$\$\$G\u001b(B
+\u001b\$B\$7\$g\$&\$+!)\u001b(B"
+
+set iso2022uniData [encoding convertfrom iso2022-jp $iso2022encData]
+set iso2022uniData2 "\u79c1\u3069\u3082\u3067\u306f\u3001\u30c1\u30c3\u30d7\u3054\u8cfc\u5165\u6642\u306b\u3054\u767b\u9332\u3044\u305f\u3060\u3044\u305f\u3054\u4f4f\u6240\u3092\u30ad\u30e3\u30c3\u30b7\u30e5\u30a2\u30a6\u30c8\u306e\u969b\u306e
+\u5c0f\u5207\u624b\u9001\u4ed8\u5148\u3068\u3057\u3066\u4f7f\u7528\u3057\u3066\u304a\u308a\u307e\u3059\u3002\u6050\u308c\u5165\u308a\u307e\u3059\u304c\u3001\u6b63\u3057\u3044\u4f4f\u6240\u3092\u3054\u767b\u9332\u3057\u306a\u304a
+\u304a\u9858\u3044\u3044\u305f\u3057\u307e\u3059\u3002\u307e\u305f\u3001\u5927\u5909\u6050\u7e2e\u3067\u3059\u304c\u3001\u4f4f\u6240\u5909\u66f4\u306e\u3042\u3068\u3001\u65e5\u672c\u8a9e\u30b5\u30fc\u30d3\u30b9\u90e8\uff08
+\u0063\u0061\u0073\u0069\u006e\u006f\u005f\u006a\u0061\u0070\u0061\u006e\u0065\u0073\u0065\u0040\u005f\u005f\u005f\u002e\u0063\u006f\u006d\u0020\uff09\u307e\u3067\u3054\u4f4f\u6240\u5909\u66f4\u6e08\u306e\u9023\u7d61\u3092\u3044\u305f\u3060\u3051\u306a\u3044\u3067
+\u3057\u3087\u3046\u304b\uff1f"
+
+cd [temporaryDirectory]
+set fid [open iso2022.txt w]
+fconfigure $fid -encoding binary
+puts -nonewline $fid $iso2022encData
+close $fid
+
+test encoding-23.1 {iso2022-jp escape encoding test} {
+ string equal $iso2022uniData $iso2022uniData2
+} 1
+test encoding-23.2 {iso2022-jp escape encoding test} {
+ # This checks that 'gets' isn't resetting the encoding inappropriately.
+ # [Bug #523988]
+ set fid [open iso2022.txt r]
+ fconfigure $fid -encoding iso2022-jp
+ set out ""
+ set count 0
+ while {[set num [gets $fid line]] >= 0} {
+ if {$count} {
+ incr count 1 ; # account for newline
+ append out \n
+ }
+ append out $line
+ incr count $num
+ }
+ close $fid
+ if {[string compare $iso2022uniData $out]} {
+ return -code error "iso2022-jp read in doesn't match original"
+ }
+ list $count $out
+} [list [string length $iso2022uniData] $iso2022uniData]
+test encoding-23.3 {iso2022-jp escape encoding test} {
+ # read $fis <size> reads size in chars, not raw bytes.
+ set fid [open iso2022.txt r]
+ fconfigure $fid -encoding iso2022-jp
+ set data [read $fid 50]
+ close $fid
+ set data
+} [string range $iso2022uniData 0 49] ; # 0 .. 49 inclusive == 50
+cd [workingDirectory]
+
+test encoding-24.1 {EscapeFreeProc on open channels} -constraints {
+ exec
+} -setup {
+ # Bug #524674 input
+ set file [makeFile {
+ set f [open [file join [file dirname [info script]] iso2022.txt]]
+ fconfigure $f -encoding iso2022-jp
+ gets $f
+ } iso2022.tcl]
+} -body {
+ exec [interpreter] $file
+} -cleanup {
+ removeFile iso2022.tcl
+} -result {}
+
+test encoding-24.2 {EscapeFreeProc on open channels} -constraints {
+ exec
+} -setup {
+ # Bug #524674 output
+ set file [makeFile {
+ encoding system cp1252; # Bug #2891556 crash revelator
+ fconfigure stdout -encoding iso2022-jp
+ puts ab\u4e4e\u68d9g
+ exit
+ } iso2022.tcl]
+} -body {
+ viewable [exec [interpreter] $file]
+} -cleanup {
+ removeFile iso2022.tcl
+} -result "ab\x1b\$B8C\x1b\$(DD%\x1b(Bg (ab\\u001b\$B8C\\u001b\$(DD%\\u001b(Bg)"
+
+test encoding-24.3 {EscapeFreeProc on open channels} {stdio} {
+ # Bug #219314 - if we don't free escape encodings correctly on
+ # channel closure, we go boom
+ set file [makeFile {
+ encoding system iso2022-jp
+ set a "\u4e4e\u4e5e\u4e5f"; # 3 Japanese Kanji letters
+ puts $a
+ } iso2022.tcl]
+ set f [open "|[list [interpreter] $file]"]
+ fconfigure $f -encoding iso2022-jp
+ set count [gets $f line]
+ close $f
+ removeFile iso2022.tcl
+ list $count [viewable $line]
+} [list 3 "\u4e4e\u4e5e\u4e5f (\\u4e4e\\u4e5e\\u4e5f)"]
+file delete [file join [temporaryDirectory] iso2022.txt]
+#
+# Begin jajp encoding round-trip conformity tests
+#
+proc foreach-jisx0208 {varName command} {
+ upvar 1 $varName code
+ foreach range {
+ {2121 217E}
+ {2221 222E}
+ {223A 2241}
+ {224A 2250}
+ {225C 226A}
+ {2272 2279}
+ {227E 227E}
+ {2330 2339}
+ {2421 2473}
+ {2521 2576}
+ {2821 2821}
+ {282C 282C}
+ {2837 2837}
+
+ {30 21 4E 7E}
+ {4F21 4F53}
+
+ {50 21 73 7E}
+ {7421 7426}
+ } {
+ if {[llength $range] == 2} {
+ # for adhoc range. simple {first last}. inclusive.
+ set first [scan [lindex $range 0] %x]
+ set last [scan [lindex $range 1] %x]
+ for {set i $first} {$i <= $last} {incr i} {
+ set code $i
+ uplevel 1 $command
+ }
+ } elseif {[llength $range] == 4} {
+ # for uniform range.
+ set h0 [scan [lindex $range 0] %x]
+ set l0 [scan [lindex $range 1] %x]
+ set hend [scan [lindex $range 2] %x]
+ set lend [scan [lindex $range 3] %x]
+ for {set hi $h0} {$hi <= $hend} {incr hi} {
+ for {set lo $l0} {$lo <= $lend} {incr lo} {
+ set code [expr {$hi << 8 | ($lo & 0xff)}]
+ uplevel 1 $command
+ }
+ }
+ } else {
+ error "really?"
+ }
+ }
+}
+proc gen-jisx0208-euc-jp {code} {
+ binary format cc \
+ [expr {($code >> 8) | 0x80}] [expr {($code & 0xff) | 0x80}]
+}
+proc gen-jisx0208-iso2022-jp {code} {
+ binary format a3cca3 \
+ "\x1b\$B" [expr {$code >> 8}] [expr {$code & 0xff}] "\x1b(B"
+}
+proc gen-jisx0208-cp932 {code} {
+ set c1 [expr {($code >> 8) | 0x80}]
+ set c2 [expr {($code & 0xff)| 0x80}]
+ if {$c1 % 2} {
+ set c1 [expr {($c1 >> 1) + ($c1 < 0xdf ? 0x31 : 0x71)}]
+ incr c2 [expr {- (0x60 + ($c2 < 0xe0))}]
+ } else {
+ set c1 [expr {($c1 >> 1) + ($c1 < 0xdf ? 0x30 : 0x70)}]
+ incr c2 -2
+ }
+ binary format cc $c1 $c2
+}
+proc channel-diff {fa fb} {
+ set diff {}
+ while {[gets $fa la] >= 0 && [gets $fb lb] >= 0} {
+ if {[string compare $la $lb] == 0} continue
+ # lappend diff $la $lb
+
+ # For more readable (easy to analyze) output.
+ set code [lindex $la 0]
+ binary scan [lindex $la 1] H* expected
+ binary scan [lindex $lb 1] H* got
+ lappend diff [list $code $expected $got]
+ }
+ set diff
+}
+# Create char tables.
+cd [temporaryDirectory]
+foreach enc {cp932 euc-jp iso2022-jp} {
+ set f [open $enc.chars w]
+ fconfigure $f -encoding binary
+ foreach-jisx0208 code {
+ puts $f [format "%04X %s" $code [gen-jisx0208-$enc $code]]
+ }
+ close $f
+}
+# shiftjis == cp932 for jisx0208.
+file copy -force cp932.chars shiftjis.chars
+
+set NUM 0
+foreach from {cp932 shiftjis euc-jp iso2022-jp} {
+ foreach to {cp932 shiftjis euc-jp iso2022-jp} {
+ test encoding-25.[incr NUM] "jisx0208 $from => $to" {
+ cd [temporaryDirectory]
+ set f [open $from.chars]
+ fconfigure $f -encoding $from
+ set out [open $from.$to.tcltestout w]
+ fconfigure $out -encoding $to
+ puts -nonewline $out [read $f]
+ close $out
+ close $f
+
+ # then compare $to.chars <=> $from.to.tcltestout as binary.
+ set fa [open $to.chars]
+ fconfigure $fa -encoding binary
+ set fb [open $from.$to.tcltestout]
+ fconfigure $fb -encoding binary
+ set diff [channel-diff $fa $fb]
+ close $fa
+ close $fb
+
+ # Difference should be empty.
+ set diff
+ } {}
+ }
+}
+testConstraint testgetdefenc [llength [info commands testgetdefenc]]
+test encoding-26.0 {Tcl_GetDefaultEncodingDir} -constraints {
+ testgetdefenc
+} -setup {
+ set origDir [testgetdefenc]
+ testsetdefenc slappy
+} -body {
+ testgetdefenc
+} -cleanup {
+ testsetdefenc $origDir
+} -result slappy
+file delete {*}[glob -directory [temporaryDirectory] *.chars *.tcltestout]
+# ===> Cut here <===
+# EscapeFreeProc, GetTableEncoding, unilen
+# are fully tested by the rest of this file
+test encoding-27.1 {encoding dirs basic behavior} -returnCodes error -body {
+ encoding dirs ? ?
+} -result {wrong # args: should be "encoding dirs ?dirList?"}
+test encoding-27.2 {encoding dirs basic behavior} -returnCodes error -body {
+ encoding dirs "\{not a list"
+} -result "expected directory list but got \"\{not a list\""
+}
+runtests
+}
+# cleanup
+namespace delete ::tcl::test::encoding
+::tcltest::cleanupTests
+return