summaryrefslogtreecommitdiffstats
path: root/tests/encoding.test
diff options
context:
space:
mode:
Diffstat (limited to 'tests/encoding.test')
-rw-r--r--tests/encoding.test338
1 files changed, 221 insertions, 117 deletions
diff --git a/tests/encoding.test b/tests/encoding.test
index 67317c2..552c97f 100644
--- a/tests/encoding.test
+++ b/tests/encoding.test
@@ -1,12 +1,12 @@
# This file contains a collection of tests for tclEncoding.c
-# Sourcing this file into Tcl runs the tests and generates output for
-# errors. No output means no errors were found.
+# Sourcing this file into Tcl runs the tests and generates output for errors.
+# No output means no errors were found.
#
# Copyright (c) 1997 Sun Microsystems, Inc.
# Copyright (c) 1998-1999 by Scriptics Corporation.
#
-# See the file "license.terms" for information on usage and redistribution
-# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
+# See the file "license.terms" for information on usage and redistribution of
+# this file, and for a DISCLAIMER OF ALL WARRANTIES.
package require tcltest 2
@@ -15,6 +15,11 @@ namespace eval ::tcl::test::encoding {
namespace import -force ::tcltest::*
+catch {
+ ::tcltest::loadTestedCommands
+ package require -exact Tcltest [info patchlevel]
+}
+
proc toutf {args} {
variable x
lappend x "toutf $args"
@@ -25,33 +30,38 @@ proc fromutf {args} {
}
proc runtests {} {
-
variable x
# Some tests require the testencoding command
testConstraint testencoding [llength [info commands testencoding]]
-testConstraint exec [llength [info commands exec]]
+testConstraint testbytestring [llength [info commands testbytestring]]
+testConstraint teststringbytes [llength [info commands teststringbytes]]
+testConstraint ucs2 [expr {[format %c 0x010000] eq "\uFFFD"}]
testConstraint fullutf [expr {[format %c 0x010000] ne "\uFFFD"}]
-
+testConstraint exec [llength [info commands exec]]
+testConstraint testgetdefenc [llength [info commands testgetdefenc]]
+
# TclInitEncodingSubsystem is tested by the rest of this file
# TclFinalizeEncodingSubsystem is not currently tested
-test encoding-1.1 {Tcl_GetEncoding: system encoding} {testencoding} {
- testencoding create foo [namespace origin toutf] [namespace origin fromutf]
+test encoding-1.1 {Tcl_GetEncoding: system encoding} -setup {
set old [encoding system]
+} -constraints {testencoding} -body {
+ testencoding create foo [namespace origin toutf] [namespace origin fromutf]
encoding system foo
set x {}
encoding convertto abcd
+ return $x
+} -cleanup {
encoding system $old
testencoding delete foo
- set x
-} {{fromutf }}
+} -result {{fromutf }}
test encoding-1.2 {Tcl_GetEncoding: existing encoding} {testencoding} {
testencoding create foo [namespace origin toutf] [namespace origin fromutf]
set x {}
encoding convertto foo abcd
testencoding delete foo
- set x
+ return $x
} {{fromutf }}
test encoding-1.3 {Tcl_GetEncoding: load encoding} {
list [encoding convertto jis0208 \u4e4e] \
@@ -61,71 +71,77 @@ test encoding-1.3 {Tcl_GetEncoding: load encoding} {
test encoding-2.1 {Tcl_FreeEncoding: refcount == 0} {
encoding convertto jis0208 \u4e4e
} {8C}
-test encoding-2.2 {Tcl_FreeEncoding: refcount != 0} {testencoding} {
+test encoding-2.2 {Tcl_FreeEncoding: refcount != 0} -setup {
set system [encoding system]
set path [encoding dirs]
+} -constraints {testencoding} -body {
encoding system shiftjis ;# incr ref count
encoding dirs [list [pwd]]
- set x [encoding convertto shiftjis \u4e4e] ;# old one found
+ set x [encoding convertto shiftjis \u4e4e] ;# old one found
encoding system iso8859-1
- llength shiftjis
+ llength shiftjis ;# Shimmer away any cache of Tcl_Encoding
lappend x [catch {encoding convertto shiftjis \u4e4e} msg] $msg
+} -cleanup {
encoding system iso8859-1
encoding dirs $path
encoding system $system
- set x
-} "\u008c\u00c1 1 {unknown encoding \"shiftjis\"}"
+} -result "\u008c\u00c1 1 {unknown encoding \"shiftjis\"}"
-test encoding-3.1 {Tcl_GetEncodingName, NULL} {
+test encoding-3.1 {Tcl_GetEncodingName, NULL} -setup {
set old [encoding system]
+} -body {
encoding system shiftjis
- set x [encoding system]
+ encoding system
+} -cleanup {
encoding system $old
- set x
-} {shiftjis}
-test encoding-3.2 {Tcl_GetEncodingName, non-null} {
+} -result {shiftjis}
+test encoding-3.2 {Tcl_GetEncodingName, non-null} -setup {
set old [fconfigure stdout -encoding]
+} -body {
fconfigure stdout -encoding jis0208
- set x [fconfigure stdout -encoding]
+ fconfigure stdout -encoding
+} -cleanup {
fconfigure stdout -encoding $old
- set x
-} {jis0208}
+} -result {jis0208}
-test encoding-4.1 {Tcl_GetEncodingNames} {testencoding} {
+test encoding-4.1 {Tcl_GetEncodingNames} -constraints {testencoding} -setup {
cd [makeDirectory tmp]
makeDirectory [file join tmp encoding]
- makeFile {} [file join tmp encoding junk.enc]
- makeFile {} [file join tmp encoding junk2.enc]
set path [encoding dirs]
encoding dirs {}
catch {unset encodings}
catch {unset x}
+} -body {
foreach encoding [encoding names] {
set encodings($encoding) 1
}
+ makeFile {} [file join tmp encoding junk.enc]
+ makeFile {} [file join tmp encoding junk2.enc]
encoding dirs [list [file join [pwd] encoding]]
foreach encoding [encoding names] {
if {![info exists encodings($encoding)]} {
lappend x $encoding
}
}
+ lsort $x
+} -cleanup {
encoding dirs $path
cd [workingDirectory]
removeFile [file join tmp encoding junk2.enc]
removeFile [file join tmp encoding junk.enc]
removeDirectory [file join tmp encoding]
removeDirectory tmp
- lsort $x
-} {junk junk2}
+} -result {junk junk2}
-test encoding-5.1 {Tcl_SetSystemEncoding} {
+test encoding-5.1 {Tcl_SetSystemEncoding} -setup {
set old [encoding system]
+} -body {
encoding system jis0208
- set x [encoding convertto \u4e4e]
+ encoding convertto \u4e4e
+} -cleanup {
encoding system iso8859-1
encoding system $old
- set x
-} {8C}
+} -result {8C}
test encoding-5.2 {Tcl_SetSystemEncoding: test ref count} {
set old [encoding system]
encoding system $old
@@ -139,7 +155,7 @@ test encoding-6.1 {Tcl_CreateEncoding: new} {testencoding} {
encoding convertfrom foo abcd
encoding convertto foo abcd
testencoding delete foo
- set x
+ return $x
} {{toutf 1} {fromutf 2}}
test encoding-6.2 {Tcl_CreateEncoding: replace encoding} {testencoding} {
testencoding create foo [namespace code {toutf a}] \
@@ -148,7 +164,7 @@ test encoding-6.2 {Tcl_CreateEncoding: replace encoding} {testencoding} {
encoding convertfrom foo abcd
encoding convertto foo abcd
testencoding delete foo
- set x
+ return $x
} {{toutf a} {fromutf b}}
test encoding-7.1 {Tcl_ExternalToUtfDString: small buffer} {
@@ -170,11 +186,11 @@ test encoding-8.1 {Tcl_ExternalToUtf} {
puts -nonewline $f "ab\x8c\xc1g"
close $f
set f [open [file join [temporaryDirectory] dummy] r]
- fconfigure $f -translation binary -encoding shiftjis
+ fconfigure $f -translation binary -encoding shiftjis
set x [read $f]
close $f
file delete [file join [temporaryDirectory] dummy]
- set x
+ return $x
} "ab\u4e4eg"
test encoding-9.1 {Tcl_UtfToExternalDString: small buffer} {
@@ -202,7 +218,7 @@ test encoding-10.1 {Tcl_UtfToExternal} {
set x [read $f]
close $f
file delete [file join [temporaryDirectory] dummy]
- set x
+ return $x
} "ab\x8c\xc1g"
proc viewable {str} {
@@ -222,7 +238,7 @@ test encoding-11.1 {LoadEncodingFile: unknown encoding} {testencoding} {
set path [encoding dirs]
encoding system iso8859-1
encoding dirs {}
- llength jis0208
+ llength jis0208 ;# Shimmer any cached Tcl_Encoding in shared literal
set x [list [catch {encoding convertto jis0208 \u4e4e} msg] $msg]
encoding dirs $path
encoding system $system
@@ -243,27 +259,28 @@ test encoding-11.5 {LoadEncodingFile: escape file} {
test encoding-11.5.1 {LoadEncodingFile: escape file} {
viewable [encoding convertto iso2022-jp \u4e4e]
} [viewable "\x1b\$B8C\x1b(B"]
-test encoding-11.6 {LoadEncodingFile: invalid file} {testencoding} {
+test encoding-11.6 {LoadEncodingFile: invalid file} -constraints {testencoding} -setup {
set system [encoding system]
set path [encoding dirs]
encoding system iso8859-1
+} -body {
cd [temporaryDirectory]
encoding dirs [file join tmp encoding]
makeDirectory tmp
makeDirectory [file join tmp encoding]
set f [open [file join tmp encoding splat.enc] w]
- fconfigure $f -translation binary
+ fconfigure $f -translation binary
puts $f "abcdefghijklmnop"
close $f
- set x [list [catch {encoding convertto splat \u4e4e} msg] $msg]
+ encoding convertto splat \u4e4e
+} -returnCodes error -cleanup {
file delete [file join [temporaryDirectory] tmp encoding splat.enc]
removeDirectory [file join tmp encoding]
removeDirectory tmp
cd [workingDirectory]
encoding dirs $path
encoding system $system
- set x
-} {1 {invalid encoding file "splat"}}
+} -result {invalid encoding file "splat"}
# OpenEncodingFile is fully tested by the rest of the tests in this file.
@@ -273,7 +290,7 @@ test encoding-12.1 {LoadTableEncoding: normal encoding} {
append x [encoding convertfrom iso8859-3 \xD5]
} "\xd5?\u120"
test encoding-12.2 {LoadTableEncoding: single-byte encoding} {
- set x [encoding convertto iso8859-3 ab\u0120g]
+ set x [encoding convertto iso8859-3 ab\u0120g]
append x [encoding convertfrom iso8859-3 ab\xD5g]
} "ab\xd5gab\u120g"
test encoding-12.3 {LoadTableEncoding: multi-byte encoding} {
@@ -289,7 +306,7 @@ test encoding-12.5 {LoadTableEncoding: symbol encoding} {
append x [encoding convertto symbol \u67]
append x [encoding convertfrom symbol \x67]
} "\x67\x67\u3b3"
-test encoding-12.6 {LoadTableEncoding: overflow in char value} fullutf {
+test encoding-12.6 {LoadTableEncoding: overflow in char value} ucs2 {
encoding convertto iso8859-3 \U010000
} "?"
@@ -304,21 +321,85 @@ test encoding-14.1 {BinaryProc} {
test encoding-15.1 {UtfToUtfProc} {
encoding convertto utf-8 \xa3
} "\xc2\xa3"
-
-test encoding-15.2 {UtfToUtfProc null character output} {
- set x \u0000
- set y [encoding convertto utf-8 \u0000]
- set y [bytestring $y]
+test encoding-15.2 {UtfToUtfProc null character output} testbytestring {
+ binary scan [testbytestring [encoding convertto utf-8 \u0000]] H* z
+ set z
+} 00
+test encoding-15.3 {UtfToUtfProc null character input} teststringbytes {
+ set y [encoding convertfrom utf-8 [encoding convertto utf-8 \u0000]]
+ binary scan [teststringbytes $y] H* z
+ set z
+} c080
+test encoding-15.4 {UtfToUtfProc emoji character input} -body {
+ set x \xED\xA0\xBD\xED\xB8\x82
+ set y [encoding convertfrom utf-8 \xED\xA0\xBD\xED\xB8\x82]
+ list [string length $x] $y
+} -result "6 \uD83D\uDE02"
+test encoding-15.5 {UtfToUtfProc emoji character input} {
+ set x \xF0\x9F\x98\x82
+ set y [encoding convertfrom utf-8 \xF0\x9F\x98\x82]
+ list [string length $x] $y
+} "4 \uD83D\uDE02"
+test encoding-15.6 {UtfToUtfProc emoji character output} {
+ set x \uDE02\uD83D\uDE02\uD83D
+ set y [encoding convertto utf-8 \uDE02\uD83D\uDE02\uD83D]
+ binary scan $y H* z
+ list [string length $y] $z
+} {10 edb882f09f9882eda0bd}
+test encoding-15.7 {UtfToUtfProc emoji character output} {
+ set x \uDE02\uD83D\uD83D
+ set y [encoding convertto utf-8 \uDE02\uD83D\uD83D]
+ binary scan $y H* z
+ list [string length $x] [string length $y] $z
+} {3 9 edb882eda0bdeda0bd}
+test encoding-15.8 {UtfToUtfProc emoji character output} {
+ set x \uDE02\uD83D\xE9
+ set y [encoding convertto utf-8 \uDE02\uD83D\xE9]
+ binary scan $y H* z
+ list [string length $x] [string length $y] $z
+} {3 8 edb882eda0bdc3a9}
+test encoding-15.9 {UtfToUtfProc emoji character output} {
+ set x \uDE02\uD83DX
+ set y [encoding convertto utf-8 \uDE02\uD83DX]
+ binary scan $y H* z
+ list [string length $x] [string length $y] $z
+} {3 7 edb882eda0bd58}
+test encoding-15.10 {UtfToUtfProc high surrogate character output} {
+ set x \uDE02\xE9
+ set y [encoding convertto utf-8 \uDE02\xE9]
+ binary scan $y H* z
+ list [string length $x] [string length $y] $z
+} {2 5 edb882c3a9}
+test encoding-15.11 {UtfToUtfProc low surrogate character output} {
+ set x \uDA02\xE9
+ set y [encoding convertto utf-8 \uDA02\xE9]
binary scan $y H* z
- list [string bytelength $x] [string bytelength $y] $z
-} {2 1 00}
-
-test encoding-15.3 {UtfToUtfProc null character input} {
- set x [bytestring \x00]
- set y [encoding convertfrom utf-8 $x]
- binary scan [encoding convertto identity $y] H* z
- list [string bytelength $x] [string bytelength $y] $z
-} {1 2 c080}
+ list [string length $x] [string length $y] $z
+} {2 5 eda882c3a9}
+test encoding-15.12 {UtfToUtfProc high surrogate character output} {
+ set x \uDE02Y
+ set y [encoding convertto utf-8 \uDE02Y]
+ binary scan $y H* z
+ list [string length $x] [string length $y] $z
+} {2 4 edb88259}
+test encoding-15.13 {UtfToUtfProc low surrogate character output} {
+ set x \uDA02Y
+ set y [encoding convertto utf-8 \uDA02Y]
+ binary scan $y H* z
+ list [string length $x] [string length $y] $z
+} {2 4 eda88259}
+test encoding-15.14 {UtfToUtfProc high surrogate character output} {
+ set x \uDE02
+ set y [encoding convertto utf-8 \uDE02]
+ binary scan $y H* z
+ list [string length $x] [string length $y] $z
+} {1 3 edb882}
+test encoding-15.15 {UtfToUtfProc low surrogate character output} {
+ set x \uDA02
+ set y [encoding convertto utf-8 \uDA02]
+ binary scan $y H* z
+ list [string length $x] [string length $y] $z
+} {1 3 eda882}
test encoding-15.16 {UtfToUtfProc: Invalid 4-byte UTF-8, see [ed29806ba]} {
set x \xF0\xA0\xA1\xC2
set y [encoding convertfrom utf-8 \xF0\xA0\xA1\xC2]
@@ -329,9 +410,24 @@ test encoding-16.1 {UnicodeToUtfProc} {
set val [encoding convertfrom unicode NN]
list $val [format %x [scan $val %c]]
} "\u4e4e 4e4e"
-
-test encoding-17.1 {UtfToUnicodeProc} {
-} {}
+test encoding-16.2 {UnicodeToUtfProc} -constraints fullutf -body {
+ set val [encoding convertfrom unicode "\xD8\xD8\xDC\xDC"]
+ list $val [format %x [scan $val %c]]
+} -result "\U460DC 460dc"
+test encoding-16.3 {UnicodeToUtfProc} -body {
+ set val [encoding convertfrom unicode "\xDC\xDC"]
+ list $val [format %x [scan $val %c]]
+} -result "\uDCDC dcdc"
+
+test encoding-17.1 {UtfToUnicodeProc} -constraints fullutf -body {
+ encoding convertto unicode "\U460DC"
+} -result "\xD8\xD8\xDC\xDC"
+test encoding-17.2 {UtfToUnicodeProc} -body {
+ encoding convertto unicode "\uDCDC"
+} -result "\xDC\xDC"
+test encoding-17.3 {UtfToUnicodeProc} -body {
+ encoding convertto unicode "\uD8D8"
+} -result "\xD8\xD8"
test encoding-18.1 {TableToUtfProc} {
} {}
@@ -397,44 +493,41 @@ test encoding-23.3 {iso2022-jp escape encoding test} {
fconfigure $fid -encoding iso2022-jp
set data [read $fid 50]
close $fid
- set data
+ return $data
} [string range $iso2022uniData 0 49] ; # 0 .. 49 inclusive == 50
cd [workingDirectory]
-test encoding-24.1 {EscapeFreeProc on open channels} -constraints {
- exec
-} -setup {
- # Bug #524674 input
- set file [makeFile {
+# Code to make the next few tests more intelligible; the code being tested
+# should be in the body of the test!
+proc runInSubprocess {contents {filename iso2022.tcl}} {
+ set theFile [makeFile $contents $filename]
+ try {
+ exec [interpreter] $theFile
+ } finally {
+ removeFile $theFile
+ }
+}
+
+test encoding-24.1 {EscapeFreeProc on open channels} exec {
+ runInSubprocess {
set f [open [file join [file dirname [info script]] iso2022.txt]]
fconfigure $f -encoding iso2022-jp
gets $f
- } iso2022.tcl]
-} -body {
- exec [interpreter] $file
-} -cleanup {
- removeFile iso2022.tcl
-} -result {}
-
-test encoding-24.2 {EscapeFreeProc on open channels} -constraints {
- exec
-} -setup {
+ }
+} {}
+test encoding-24.2 {EscapeFreeProc on open channels} {exec} {
# Bug #524674 output
- set file [makeFile {
+ viewable [runInSubprocess {
encoding system cp1252; # Bug #2891556 crash revelator
fconfigure stdout -encoding iso2022-jp
puts ab\u4e4e\u68d9g
+ set env(TCL_FINALIZE_ON_EXIT) 1
exit
- } iso2022.tcl]
-} -body {
- viewable [exec [interpreter] $file]
-} -cleanup {
- removeFile iso2022.tcl
-} -result "ab\x1b\$B8C\x1b\$(DD%\x1b(Bg (ab\\u001b\$B8C\\u001b\$(DD%\\u001b(Bg)"
-
+ }]
+} "ab\x1b\$B8C\x1b\$(DD%\x1b(Bg (ab\\u001b\$B8C\\u001b\$(DD%\\u001b(Bg)"
test encoding-24.3 {EscapeFreeProc on open channels} {stdio} {
- # Bug #219314 - if we don't free escape encodings correctly on
- # channel closure, we go boom
+ # Bug #219314 - if we don't free escape encodings correctly on channel
+ # closure, we go boom
set file [makeFile {
encoding system iso2022-jp
set a "\u4e4e\u4e5e\u4e5f"; # 3 Japanese Kanji letters
@@ -503,18 +596,14 @@ proc foreach-jisx0208 {varName command} {
} {
if {[llength $range] == 2} {
# for adhoc range. simple {first last}. inclusive.
- set first [scan [lindex $range 0] %x]
- set last [scan [lindex $range 1] %x]
+ scan $range %x%x first last
for {set i $first} {$i <= $last} {incr i} {
set code $i
uplevel 1 $command
}
} elseif {[llength $range] == 4} {
# for uniform range.
- set h0 [scan [lindex $range 0] %x]
- set l0 [scan [lindex $range 1] %x]
- set hend [scan [lindex $range 2] %x]
- set lend [scan [lindex $range 3] %x]
+ scan $range %x%x%x%x h0 l0 hend lend
for {set hi $h0} {$hi <= $hend} {incr hi} {
for {set lo $l0} {$lo <= $lend} {incr lo} {
set code [expr {$hi << 8 | ($lo & 0xff)}]
@@ -558,7 +647,7 @@ proc channel-diff {fa fb} {
binary scan [lindex $lb 1] H* got
lappend diff [list $code $expected $got]
}
- set diff
+ return $diff
}
# Create char tables.
@@ -577,8 +666,9 @@ file copy -force cp932.chars shiftjis.chars
set NUM 0
foreach from {cp932 shiftjis euc-jp iso2022-jp} {
foreach to {cp932 shiftjis euc-jp iso2022-jp} {
- test encoding-25.[incr NUM] "jisx0208 $from => $to" {
+ test encoding-25.[incr NUM] "jisx0208 $from => $to" -setup {
cd [temporaryDirectory]
+ } -body {
set f [open $from.chars]
fconfigure $f -encoding $from
set out [open $from.$to.tcltestout w]
@@ -586,40 +676,35 @@ foreach from {cp932 shiftjis euc-jp iso2022-jp} {
puts -nonewline $out [read $f]
close $out
close $f
-
# then compare $to.chars <=> $from.to.tcltestout as binary.
- set fa [open $to.chars]
- fconfigure $fa -encoding binary
- set fb [open $from.$to.tcltestout]
- fconfigure $fb -encoding binary
- set diff [channel-diff $fa $fb]
+ set fa [open $to.chars rb]
+ set fb [open $from.$to.tcltestout rb]
+ channel-diff $fa $fb
+ # Difference should be empty.
+ } -cleanup {
close $fa
close $fb
-
- # Difference should be empty.
- set diff
- } {}
+ } -result {}
}
}
-testConstraint testgetdefenc [llength [info commands testgetdefenc]]
-
test encoding-26.0 {Tcl_GetDefaultEncodingDir} -constraints {
- testgetdefenc
+ testgetdefenc
} -setup {
- set origDir [testgetdefenc]
- testsetdefenc slappy
+ set origDir [testgetdefenc]
+ testsetdefenc slappy
} -body {
- testgetdefenc
+ testgetdefenc
} -cleanup {
- testsetdefenc $origDir
+ testsetdefenc $origDir
} -result slappy
file delete {*}[glob -directory [temporaryDirectory] *.chars *.tcltestout]
# ===> Cut here <===
-# EscapeFreeProc, GetTableEncoding, unilen
-# are fully tested by the rest of this file
+# EscapeFreeProc, GetTableEncoding, unilen are fully tested by the rest of
+# this file.
+
test encoding-27.1 {encoding dirs basic behavior} -returnCodes error -body {
encoding dirs ? ?
@@ -629,6 +714,21 @@ test encoding-27.2 {encoding dirs basic behavior} -returnCodes error -body {
} -result "expected directory list but got \"\{not a list\""
}
+
+
+test encoding-28.0 {all encodings load} -body {
+ set string hello
+ foreach name [encoding names] {
+ incr count
+ encoding convertto $name $string
+
+ # discard the cached internal representation of Tcl_Encoding
+ # Unfortunately, without this, encoding 2-1 fails.
+ llength $name
+ }
+ return $count
+} -result 81
+
runtests
}
@@ -637,3 +737,7 @@ runtests
namespace delete ::tcl::test::encoding
::tcltest::cleanupTests
return
+
+# Local Variables:
+# mode: tcl
+# End: