# This file contains a collection of tests for tclEncoding.c # Sourcing this file into Tcl runs the tests and generates output for errors. # No output means no errors were found. # # Copyright © 1997 Sun Microsystems, Inc. # Copyright © 1998-1999 Scriptics Corporation. # # See the file "license.terms" for information on usage and redistribution of # this file, and for a DISCLAIMER OF ALL WARRANTIES. if {"::tcltest" ni [namespace children]} { package require tcltest 2.5 namespace import -force ::tcltest::* } namespace eval ::tcl::test::encoding { variable x catch { ::tcltest::loadTestedCommands package require -exact tcl::test [info patchlevel] } proc toutf {args} { variable x lappend x "toutf $args" } proc fromutf {args} { variable x lappend x "fromutf $args" } proc runtests {} { variable x # Some tests require the testencoding command testConstraint testencoding [llength [info commands testencoding]] testConstraint testbytestring [llength [info commands testbytestring]] testConstraint teststringbytes [llength [info commands teststringbytes]] testConstraint exec [llength [info commands exec]] testConstraint testgetencpath [llength [info commands testgetencpath]] testConstraint fullutf [expr {[format %c 0x010000] ne "\uFFFD"}] testConstraint utf32 [expr {[testConstraint fullutf] && [string length [format %c 0x10000]] == 1}] # TclInitEncodingSubsystem is tested by the rest of this file # TclFinalizeEncodingSubsystem is not currently tested test encoding-1.1 {Tcl_GetEncoding: system encoding} -setup { set old [encoding system] } -constraints {testencoding} -body { testencoding create foo [namespace origin toutf] [namespace origin fromutf] encoding system foo set x {} encoding convertto abcd return $x } -cleanup { encoding system $old testencoding delete foo } -result {{fromutf }} test encoding-1.2 {Tcl_GetEncoding: existing encoding} {testencoding} { testencoding create foo [namespace origin toutf] [namespace origin fromutf] set x {} encoding convertto foo abcd testencoding delete foo return $x } {{fromutf }} test encoding-1.3 {Tcl_GetEncoding: load encoding} { list [encoding convertto jis0208 乎] \ [encoding convertfrom jis0208 8C] } "8C 乎" test encoding-2.1 {Tcl_FreeEncoding: refcount == 0} { encoding convertto jis0208 乎 } {8C} test encoding-2.2 {Tcl_FreeEncoding: refcount != 0} -setup { set system [encoding system] set path [encoding dirs] } -constraints {testencoding} -body { encoding system shiftjis ;# incr ref count encoding dirs [list [pwd]] set x [encoding convertto shiftjis 乎] ;# old one found encoding system iso8859-1 llength shiftjis ;# Shimmer away any cache of Tcl_Encoding lappend x [catch {encoding convertto shiftjis 乎} msg] $msg } -cleanup { encoding system iso8859-1 encoding dirs $path encoding system $system } -result "\x8C\xC1 1 {unknown encoding \"shiftjis\"}" test encoding-3.1 {Tcl_GetEncodingName, NULL} -setup { set old [encoding system] } -body { encoding system shiftjis encoding system } -cleanup { encoding system $old } -result {shiftjis} test encoding-3.2 {Tcl_GetEncodingName, non-null} -setup { set old [fconfigure stdout -encoding] } -body { fconfigure stdout -encoding jis0208 fconfigure stdout -encoding } -cleanup { fconfigure stdout -encoding $old } -result {jis0208} test encoding-4.1 {Tcl_GetEncodingNames} -constraints {testencoding} -setup { cd [makeDirectory tmp] makeDirectory [file join tmp encoding] set path [encoding dirs] encoding dirs {} catch {unset encodings} catch {unset x} } -body { foreach encoding [encoding names] { set encodings($encoding) 1 } makeFile {} [file join tmp encoding junk.enc] makeFile {} [file join tmp encoding junk2.enc] encoding dirs [list [file join [pwd] encoding]] foreach encoding [encoding names] { if {![info exists encodings($encoding)]} { lappend x $encoding } } lsort $x } -cleanup { encoding dirs $path cd [workingDirectory] removeFile [file join tmp encoding junk2.enc] removeFile [file join tmp encoding junk.enc] removeDirectory [file join tmp encoding] removeDirectory tmp } -result {junk junk2} test encoding-5.1 {Tcl_SetSystemEncoding} -setup { set old [encoding system] } -body { encoding system jis0208 encoding convertto 乎 } -cleanup { encoding system iso8859-1 encoding system $old } -result {8C} test encoding-5.2 {Tcl_SetSystemEncoding: test ref count} { set old [encoding system] encoding system $old string compare $old [encoding system] } {0} test encoding-6.1 {Tcl_CreateEncoding: new} {testencoding} { testencoding create foo [namespace code {toutf 1}] \ [namespace code {fromutf 2}] set x {} encoding convertfrom foo abcd encoding convertto foo abcd testencoding delete foo return $x } {{toutf 1} {fromutf 2}} test encoding-6.2 {Tcl_CreateEncoding: replace encoding} {testencoding} { testencoding create foo [namespace code {toutf a}] \ [namespace code {fromutf b}] set x {} encoding convertfrom foo abcd encoding convertto foo abcd testencoding delete foo return $x } {{toutf a} {fromutf b}} test encoding-7.1 {Tcl_ExternalToUtfDString: small buffer} { encoding convertfrom jis0208 8c8c8c8c } 吾吾吾吾 test encoding-7.2 {Tcl_UtfToExternalDString: big buffer} { set a 8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C append a $a append a $a append a $a append a $a set x [encoding convertfrom jis0208 $a] list [string length $x] [string index $x 0] } "512 乎" test encoding-8.1 {Tcl_ExternalToUtf} { set f [open [file join [temporaryDirectory] dummy] w] fconfigure $f -translation binary -encoding iso8859-1 puts -nonewline $f "ab\x8C\xC1g" close $f set f [open [file join [temporaryDirectory] dummy] r] fconfigure $f -translation binary -encoding shiftjis set x [read $f] close $f file delete [file join [temporaryDirectory] dummy] return $x } ab乎g test encoding-9.1 {Tcl_UtfToExternalDString: small buffer} { encoding convertto jis0208 "吾吾吾吾" } {8c8c8c8c} test encoding-9.2 {Tcl_UtfToExternalDString: big buffer} { set a 乎乎乎乎乎乎乎乎 append a $a append a $a append a $a append a $a append a $a append a $a set x [encoding convertto jis0208 $a] list [string length $x] [string range $x 0 1] } "1024 8C" test encoding-10.1 {Tcl_UtfToExternal} { set f [open [file join [temporaryDirectory] dummy] w] fconfigure $f -translation binary -encoding shiftjis puts -nonewline $f ab乎g close $f set f [open [file join [temporaryDirectory] dummy] r] fconfigure $f -translation binary -encoding iso8859-1 set x [read $f] close $f file delete [file join [temporaryDirectory] dummy] return $x } "ab\x8C\xC1g" proc viewable {str} { set res "" foreach c [split $str {}] { if {[string is print $c] && [string is ascii $c]} { append res $c } else { append res "\\u[format %4.4X [scan $c %c]]" } } return "$str ($res)" } test encoding-11.1 {LoadEncodingFile: unknown encoding} {testencoding} { set system [encoding system] set path [encoding dirs] encoding system iso8859-1 encoding dirs {} llength jis0208 ;# Shimmer any cached Tcl_Encoding in shared literal set x [list [catch {encoding convertto jis0208 乎} msg] $msg] encoding dirs $path encoding system $system lappend x [encoding convertto jis0208 乎] } {1 {unknown encoding "jis0208"} 8C} test encoding-11.2 {LoadEncodingFile: single-byte} { encoding convertfrom jis0201 \xA1 } 。 test encoding-11.3 {LoadEncodingFile: double-byte} { encoding convertfrom jis0208 8C } 乎 test encoding-11.4 {LoadEncodingFile: multi-byte} { encoding convertfrom shiftjis \x8C\xC1 } 乎 test encoding-11.5 {LoadEncodingFile: escape file} { viewable [encoding convertto iso2022 乎] } [viewable "\x1B\$B8C\x1B(B"] test encoding-11.5.1 {LoadEncodingFile: escape file} { viewable [encoding convertto iso2022-jp 乎] } [viewable "\x1B\$B8C\x1B(B"] test encoding-11.6 {LoadEncodingFile: invalid file} -constraints {testencoding} -setup { set system [encoding system] set path [encoding dirs] encoding system iso8859-1 } -body { cd [temporaryDirectory] encoding dirs [file join tmp encoding] makeDirectory tmp makeDirectory [file join tmp encoding] set f [open [file join tmp encoding splat.enc] w] fconfigure $f -translation binary puts $f "abcdefghijklmnop" close $f encoding convertto splat 乎 } -returnCodes error -cleanup { file delete [file join [temporaryDirectory] tmp encoding splat.enc] removeDirectory [file join tmp encoding] removeDirectory tmp cd [workingDirectory] encoding dirs $path encoding system $system } -result {invalid encoding file "splat"} test encoding-11.8 {encoding: extended Unicode UTF-16} { viewable [encoding convertto utf-16le 😹] } {=Ø9Þ (=\u00D89\u00DE)} test encoding-11.9 {encoding: extended Unicode UTF-16} { viewable [encoding convertto utf-16be 😹] } {Ø=Þ9 (\u00D8=\u00DE9)} test encoding-11.10 {encoding: extended Unicode UTF-32} { viewable [encoding convertto utf-32le 😹] } "9\xF6\x01\x00 (9\\u00F6\\u0001\\u0000)" test encoding-11.11 {encoding: extended Unicode UTF-32} { viewable [encoding convertto utf-32be 😹] } "\x00\x01\xF69 (\\u0000\\u0001\\u00F69)" # OpenEncodingFile is fully tested by the rest of the tests in this file. test encoding-12.1 {LoadTableEncoding: normal encoding} { set x [encoding convertto iso8859-3 Ġ] append x [encoding convertto -nocomplain iso8859-3 Õ] append x [encoding convertfrom iso8859-3 Õ] } "Õ?Ġ" test encoding-12.2 {LoadTableEncoding: single-byte encoding} { set x [encoding convertto iso8859-3 abĠg] append x [encoding convertfrom iso8859-3 abÕg] } "abÕgabĠg" test encoding-12.3 {LoadTableEncoding: multi-byte encoding} { set x [encoding convertto shiftjis ab乎g] append x [encoding convertfrom shiftjis ab\x8C\xC1g] } "ab\x8C\xC1gab乎g" test encoding-12.4 {LoadTableEncoding: double-byte encoding} { set x [encoding convertto jis0208 乎α] append x [encoding convertfrom jis0208 8C&A] } "8C&A乎α" test encoding-12.5 {LoadTableEncoding: symbol encoding} { set x [encoding convertto symbol γ] append x [encoding convertto symbol g] append x [encoding convertfrom symbol g] } "ggγ" test encoding-13.1 {LoadEscapeTable} { viewable [set x [encoding convertto iso2022 ab乎棙g]] } [viewable "ab\x1B\$B8C\x1B\$\(DD%\x1B(Bg"] test encoding-15.1 {UtfToUtfProc} { encoding convertto utf-8 £ } "\xC2\xA3" test encoding-15.2 {UtfToUtfProc null character output} testbytestring { binary scan [testbytestring [encoding convertto utf-8 \x00]] H* z set z } 00 test encoding-15.3 {UtfToUtfProc null character input} teststringbytes { set y [encoding convertfrom utf-8 [encoding convertto utf-8 \x00]] binary scan [teststringbytes $y] H* z set z } c080 test encoding-15.4 {UtfToUtfProc emoji character input} -body { set x \xED\xA0\xBD\xED\xB8\x82 set y [encoding convertfrom -nocomplain utf-8 \xED\xA0\xBD\xED\xB8\x82] list [string length $x] $y } -result "6 \uD83D\uDE02" test encoding-15.5 {UtfToUtfProc emoji character input} { set x \xF0\x9F\x98\x82 set y [encoding convertfrom utf-8 \xF0\x9F\x98\x82] list [string length $x] $y } "4 😂" test encoding-15.6 {UtfToUtfProc emoji character output} utf32 { set x \uDE02\uD83D\uDE02\uD83D set y [encoding convertto -nocomplain utf-8 \uDE02\uD83D\uDE02\uD83D] binary scan $y H* z list [string length $y] $z } {12 edb882eda0bdedb882eda0bd} test encoding-15.7 {UtfToUtfProc emoji character output} { set x \uDE02\uD83D\uD83D set y [encoding convertto -nocomplain utf-8 \uDE02\uD83D\uD83D] binary scan $y H* z list [string length $x] [string length $y] $z } {3 9 edb882eda0bdeda0bd} test encoding-15.8 {UtfToUtfProc emoji character output} { set x \uDE02\uD83Dé set y [encoding convertto -nocomplain utf-8 \uDE02\uD83Dé] binary scan $y H* z list [string length $x] [string length $y] $z } {3 8 edb882eda0bdc3a9} test encoding-15.9 {UtfToUtfProc emoji character output} { set x \uDE02\uD83DX set y [encoding convertto -nocomplain utf-8 \uDE02\uD83DX] binary scan $y H* z list [string length $x] [string length $y] $z } {3 7 edb882eda0bd58} test encoding-15.10 {UtfToUtfProc high surrogate character output} { set x \uDE02é set y [encoding convertto -nocomplain utf-8 \uDE02é] binary scan $y H* z list [string length $x] [string length $y] $z } {2 5 edb882c3a9} test encoding-15.11 {UtfToUtfProc low surrogate character output} { set x \uDA02é set y [encoding convertto -nocomplain utf-8 \uDA02é] binary scan $y H* z list [string length $x] [string length $y] $z } {2 5 eda882c3a9} test encoding-15.12 {UtfToUtfProc high surrogate character output} { set x \uDE02Y set y [encoding convertto -nocomplain utf-8 \uDE02Y] binary scan $y H* z list [string length $x] [string length $y] $z } {2 4 edb88259} test encoding-15.13 {UtfToUtfProc low surrogate character output} { set x \uDA02Y set y [encoding convertto -nocomplain utf-8 \uDA02Y] binary scan $y H* z list [string length $x] [string length $y] $z } {2 4 eda88259} test encoding-15.14 {UtfToUtfProc high surrogate character output} { set x \uDE02 set y [encoding convertto -nocomplain utf-8 \uDE02] binary scan $y H* z list [string length $x] [string length $y] $z } {1 3 edb882} test encoding-15.15 {UtfToUtfProc low surrogate character output} { set x \uDA02 set y [encoding convertto -nocomplain utf-8 \uDA02] binary scan $y H* z list [string length $x] [string length $y] $z } {1 3 eda882} test encoding-15.16 {UtfToUtfProc: Invalid 4-byte UTF-8, see [ed29806ba]} { set x \xF0\xA0\xA1\xC2 set y [encoding convertfrom -nocomplain utf-8 \xF0\xA0\xA1\xC2] list [string length $x] $y } "4 \xF0\xA0\xA1\xC2" test encoding-15.17 {UtfToUtfProc emoji character output} { set x 😂 set y [encoding convertto utf-8 😂] binary scan $y H* z list [string length $y] $z } {4 f09f9882} test encoding-15.18 {UtfToUtfProc CESU-8 6-byte sequence} { set y [encoding convertto cesu-8 \U10000] binary scan $y H* z list [string length $y] $z } {6 eda080edb080} test encoding-15.19 {UtfToUtfProc CESU-8 upper surrogate} { set y [encoding convertto cesu-8 \uD800] binary scan $y H* z list [string length $y] $z } {3 eda080} test encoding-15.20 {UtfToUtfProc CESU-8 lower surrogate} { set y [encoding convertto cesu-8 \uDC00] binary scan $y H* z list [string length $y] $z } {3 edb080} test encoding-15.21 {UtfToUtfProc CESU-8 noncharacter} { set y [encoding convertto cesu-8 \uFFFF] binary scan $y H* z list [string length $y] $z } {3 efbfbf} test encoding-15.22 {UtfToUtfProc CESU-8 bug [048dd20b4171c8da]} { set y [encoding convertto cesu-8 \x80] binary scan $y H* z list [string length $y] $z } {2 c280} test encoding-15.23 {UtfToUtfProc CESU-8 bug [048dd20b4171c8da]} { set y [encoding convertto cesu-8 \u100] binary scan $y H* z list [string length $y] $z } {2 c480} test encoding-15.24 {UtfToUtfProc CESU-8 bug [048dd20b4171c8da]} { set y [encoding convertto cesu-8 \u3FF] binary scan $y H* z list [string length $y] $z } {2 cfbf} test encoding-15.25 {UtfToUtfProc CESU-8} { encoding convertfrom cesu-8 \x00 } \x00 test encoding-15.26 {UtfToUtfProc CESU-8} { encoding convertfrom cesu-8 \xC0\x80 } \x00 test encoding-15.27 {UtfToUtfProc -strict CESU-8} { encoding convertfrom -strict cesu-8 \x00 } \x00 test encoding-15.28 {UtfToUtfProc -strict CESU-8} -body { encoding convertfrom -strict cesu-8 \xC0\x80 } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'} test encoding-15.29 {UtfToUtfProc CESU-8} { encoding convertto cesu-8 \x00 } \x00 test encoding-15.30 {UtfToUtfProc -strict CESU-8} { encoding convertto -strict cesu-8 \x00 } \x00 test encoding-15.31 {UtfToUtfProc -strict CESU-8 (bytes F0-F4 are invalid)} -body { encoding convertfrom -strict cesu-8 \xF1\x86\x83\x9C } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xF1'} test encoding-16.1 {Utf16ToUtfProc} -body { set val [encoding convertfrom utf-16 NN] list $val [format %x [scan $val %c]] } -result "乎 4e4e" test encoding-16.2 {Utf16ToUtfProc} -body { set val [encoding convertfrom utf-16 "\xD8\xD8\xDC\xDC"] list $val [format %x [scan $val %c]] } -result "\U460DC 460dc" test encoding-16.3 {Utf16ToUtfProc} -body { set val [encoding convertfrom utf-16 "\xDC\xDC"] list $val [format %x [scan $val %c]] } -result "\uDCDC dcdc" test encoding-16.4 {Ucs2ToUtfProc} -body { set val [encoding convertfrom ucs-2 NN] list $val [format %x [scan $val %c]] } -result "乎 4e4e" test encoding-16.5 {Ucs2ToUtfProc} -body { set val [encoding convertfrom ucs-2 "\xD8\xD8\xDC\xDC"] list $val [format %x [scan $val %c]] } -result "\U460DC 460dc" test encoding-16.6 {Utf32ToUtfProc} -body { set val [encoding convertfrom utf-32le NN\0\0] list $val [format %x [scan $val %c]] } -result "乎 4e4e" test encoding-16.7 {Utf32ToUtfProc} -body { set val [encoding convertfrom utf-32be \0\0NN] list $val [format %x [scan $val %c]] } -result "乎 4e4e" test encoding-16.8 {Utf32ToUtfProc} -body { set val [encoding convertfrom -nocomplain utf-32 \x41\x00\x00\x41] list $val [format %x [scan $val %c]] } -result "\uFFFD fffd" test encoding-16.9 {Utf32ToUtfProc} -constraints utf32 -body { encoding convertfrom utf-32le \x00\xD8\x00\x00 } -result \uD800 test encoding-16.10 {Utf32ToUtfProc} -body { encoding convertfrom utf-32le \x00\xDC\x00\x00 } -result \uDC00 test encoding-16.11 {Utf32ToUtfProc} -body { encoding convertfrom utf-32le \x00\xD8\x00\x00\x00\xDC\x00\x00 } -result \uD800\uDC00 test encoding-16.12 {Utf32ToUtfProc} -constraints utf32 -body { encoding convertfrom utf-32le \x00\xDC\x00\x00\x00\xD8\x00\x00 } -result \uDC00\uD800 test encoding-16.13 {Utf16ToUtfProc} -body { encoding convertfrom utf-16le \x00\xD8 } -result \uD800 test encoding-16.14 {Utf16ToUtfProc} -body { encoding convertfrom utf-16le \x00\xDC } -result \uDC00 test encoding-16.15 {Utf16ToUtfProc} -constraints knownBug -body { encoding convertfrom utf-16le \x00\xD8\x00\xDC } -result \uD800\uDC00 test encoding-16.16 {Utf16ToUtfProc} -body { encoding convertfrom utf-16le \x00\xDC\x00\xD8 } -result \uDC00\uD800 test encoding-16.17 {Utf32ToUtfProc} -body { list [encoding convertfrom -strict -failindex idx utf-32le \x41\x00\x00\x00\x00\xD8\x00\x00\x42\x00\x00\x00] [set idx] } -result {A 4} test encoding-16.9 { Utf16ToUtfProc, Tcl_UniCharToUtf, surrogate pairs in utf-16 } -body { apply [list {} { for {set i 0xD800} {$i < 0xDBFF} {incr i} { for {set j 0xDC00} {$j < 0xDFFF} {incr j} { set string [binary format S2 [list $i $j]] set status [catch { set decoded [encoding convertfrom utf-16be $string] set encoded [encoding convertto utf-16be $decoded] }] if {$status || ( $encoded ne $string )} { return [list [format %x $i] [format %x $j]] } } } return done } [namespace current]] } -result done test encoding-17.1 {UtfToUtf16Proc} -body { encoding convertto utf-16 "\U460DC" } -result "\xD8\xD8\xDC\xDC" test encoding-17.2 {UtfToUcs2Proc} -body { encoding convertfrom utf-16 [encoding convertto ucs-2 "\U460DC"] } -result "\uFFFD" test encoding-17.3 {UtfToUtf16Proc} -body { encoding convertto -nocomplain utf-16be "\uDCDC" } -result "\xDC\xDC" test encoding-17.4 {UtfToUtf16Proc} -body { encoding convertto -nocomplain utf-16le "\uD8D8" } -result "\xD8\xD8" test encoding-17.5 {UtfToUtf16Proc} -body { encoding convertto utf-32le "\U460DC" } -result "\xDC\x60\x04\x00" test encoding-17.6 {UtfToUtf16Proc} -body { encoding convertto utf-32be "\U460DC" } -result "\x00\x04\x60\xDC" test encoding-17.7 {UtfToUtf16Proc} -body { encoding convertto -strict utf-16be "\uDCDC" } -returnCodes error -result {unexpected character at index 0: 'U+00DCDC'} test encoding-17.8 {UtfToUtf16Proc} -body { encoding convertto -strict utf-16le "\uD8D8" } -returnCodes error -result {unexpected character at index 0: 'U+00D8D8'} test encoding-17.9 {Utf32ToUtfProc} -body { encoding convertfrom -strict utf-32 "\xFF\xFF\xFF\xFF" } -returnCodes error -result {unexpected byte sequence starting at index 0: '\xFF'} test encoding-17.10 {Utf32ToUtfProc} -body { encoding convertfrom -nocomplain utf-32 "\xFF\xFF\xFF\xFF" } -result \uFFFD test encoding-18.1 {TableToUtfProc on invalid input} -body { list [catch {encoding convertto jis0208 \\} res] $res } -result {1 {unexpected character at index 0: 'U+00005C'}} test encoding-18.2 {TableToUtfProc on invalid input with -strict} -body { list [catch {encoding convertto -strict jis0208 \\} res] $res } -result {1 {unexpected character at index 0: 'U+00005C'}} test encoding-18.3 {TableToUtfProc on invalid input with -strict -failindex} -body { list [catch {encoding convertto -strict -failindex pos jis0208 \\} res] $res $pos } -result {0 {} 0} test encoding-18.4 {TableToUtfProc on invalid input with -failindex -strict} -body { list [catch {encoding convertto -failindex pos -strict jis0208 \\} res] $res $pos } -result {0 {} 0} test encoding-18.5 {TableToUtfProc on invalid input with -failindex} -body { list [catch {encoding convertto -failindex pos jis0208 \\} res] $res $pos } -result {0 {} 0} test encoding-18.6 {TableToUtfProc on invalid input with -nocomplain} -body { list [catch {encoding convertto -nocomplain jis0208 \\} res] $res } -result {0 !)} test encoding-19.1 {TableFromUtfProc} -body { encoding convertfrom ascii AÁ } -result AÁ test encoding-19.2 {TableFromUtfProc} -body { encoding convertfrom -nocomplain ascii AÁ } -result AÁ test encoding-19.3 {TableFromUtfProc} -body { encoding convertfrom -strict ascii AÁ } -returnCodes 1 -result {unexpected byte sequence starting at index 1: '\xC1'} test encoding-19.4 {TableFromUtfProc} -body { list [encoding convertfrom -failindex idx ascii AÁ] [set idx] } -result {A 1} test encoding-19.5 {TableFromUtfProc} -body { list [encoding convertfrom -failindex idx -strict ascii AÁ] [set idx] } -result {A 1} test encoding-19.6 {TableFromUtfProc} -body { list [encoding convertfrom -failindex idx -strict ascii AÁB] [set idx] } -result {A 1} test encoding-20.1 {TableFreefProc} { } {} test encoding-21.1 {EscapeToUtfProc} { } {} test encoding-22.1 {EscapeFromUtfProc} { } {} set iso2022encData "\x1B\$B;d\$I\$b\$G\$O!\"%A%C%W\$49XF~;~\$K\$4EPO?\$\$\$?\$@\$\$\$?\$4=;=j\$r%-%c%C%7%e%\"%&%H\$N:]\$N\x1B(B \x1B\$B>.@Z= 0} { if {$count} { incr count 1 ; # account for newline append out \n } append out $line incr count $num } close $fid if {[string compare $iso2022uniData $out]} { return -code error "iso2022-jp read in doesn't match original" } list $count $out } [list [string length $iso2022uniData] $iso2022uniData] test encoding-23.3 {iso2022-jp escape encoding test} { # read $fis reads size in chars, not raw bytes. set fid [open iso2022.txt r] fconfigure $fid -encoding iso2022-jp set data [read $fid 50] close $fid return $data } [string range $iso2022uniData 0 49] ; # 0 .. 49 inclusive == 50 cd [workingDirectory] # Code to make the next few tests more intelligible; the code being tested # should be in the body of the test! proc runInSubprocess {contents {filename iso2022.tcl}} { set theFile [makeFile $contents $filename] try { exec [interpreter] $theFile } finally { removeFile $theFile } } test encoding-24.1 {EscapeFreeProc on open channels} exec { runInSubprocess { set f [open [file join [file dirname [info script]] iso2022.txt]] fconfigure $f -encoding iso2022-jp gets $f } } {} test encoding-24.2 {EscapeFreeProc on open channels} {exec} { # Bug #524674 output viewable [runInSubprocess { encoding system cp1252; # Bug #2891556 crash revelator fconfigure stdout -encoding iso2022-jp puts ab乎棙g set env(TCL_FINALIZE_ON_EXIT) 1 exit }] } "ab\x1B\$B8C\x1B\$(DD%\x1B(Bg (ab\\u001B\$B8C\\u001B\$(DD%\\u001B(Bg)" test encoding-24.3 {EscapeFreeProc on open channels} {stdio} { # Bug #219314 - if we don't free escape encodings correctly on channel # closure, we go boom set file [makeFile { encoding system iso2022-jp set a "乎乞也"; # 3 Japanese Kanji letters puts $a } iso2022.tcl] set f [open "|[list [interpreter] $file]"] fconfigure $f -encoding iso2022-jp set count [gets $f line] close $f removeFile iso2022.tcl list $count [viewable $line] } [list 3 "乎乞也 (\\u4E4E\\u4E5E\\u4E5F)"] test encoding-24.4 {Parse valid or invalid utf-8} { string length [encoding convertfrom utf-8 "\xC0\x80"] } 1 test encoding-24.5 {Parse valid or invalid utf-8} { string length [encoding convertfrom -nocomplain utf-8 "\xC0\x81"] } 2 test encoding-24.6 {Parse valid or invalid utf-8} { string length [encoding convertfrom -nocomplain utf-8 "\xC1\xBF"] } 2 test encoding-24.7 {Parse valid or invalid utf-8} { string length [encoding convertfrom utf-8 "\xC2\x80"] } 1 test encoding-24.8 {Parse valid or invalid utf-8} { string length [encoding convertfrom -nocomplain utf-8 "\xE0\x80\x80"] } 3 test encoding-24.9 {Parse valid or invalid utf-8} { string length [encoding convertfrom -nocomplain utf-8 "\xE0\x9F\xBF"] } 3 test encoding-24.10 {Parse valid or invalid utf-8} { string length [encoding convertfrom utf-8 "\xE0\xA0\x80"] } 1 test encoding-24.11 {Parse valid or invalid utf-8} { string length [encoding convertfrom -nocomplain utf-8 "\xEF\xBF\xBF"] } 1 test encoding-24.12 {Parse valid or invalid utf-8} -body { encoding convertfrom -strict utf-8 "\xC0\x81" } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'} test encoding-24.13 {Parse valid or invalid utf-8} -body { encoding convertfrom -strict utf-8 "\xC1\xBF" } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC1'} test encoding-24.14 {Parse valid or invalid utf-8} { string length [encoding convertfrom utf-8 "\xC2\x80"] } 1 test encoding-24.15 {Parse valid or invalid utf-8} -body { encoding convertfrom utf-8 "Z\xE0\x80" } -result Z\xE0\x80 test encoding-24.16 {Parse valid or invalid utf-8} -constraints testbytestring -body { encoding convertto utf-8 [testbytestring "Z\u4343\x80"] } -returnCodes 1 -result {expected byte sequence but character 1 was '䍃€' (U+004343)} test encoding-24.17 {Parse valid or invalid utf-8} -constraints testbytestring -body { encoding convertto utf-8 [testbytestring "Z\xE0\x80"] } -result "Z\xC3\xA0\xE2\x82\xAC" test encoding-24.18 {Parse valid or invalid utf-8} -constraints testbytestring -body { encoding convertto utf-8 [testbytestring "Z\xE0\x80xxxxxx"] } -result "Z\xC3\xA0\xE2\x82\xACxxxxxx" test encoding-24.19 {Parse valid or invalid utf-8} -body { encoding convertto utf-8 "ZX\uD800" } -returnCodes 1 -match glob -result "unexpected character at index 2: 'U+00D800'" test encoding-24.20 {Parse with -nocomplain but without providing encoding} { string length [encoding convertfrom -nocomplain "\x20"] } 1 test encoding-24.21 {Parse with -nocomplain but without providing encoding} { string length [encoding convertto -nocomplain "\x20"] } 1 test encoding-24.22 {Syntax error, two encodings} -body { encoding convertfrom iso8859-1 utf-8 "ZX\uD800" } -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-strict? ?-failindex var? ?encoding? data" or "::tcl::encoding::convertfrom -nocomplain ?encoding? data"} test encoding-24.23 {Syntax error, two encodings} -body { encoding convertto iso8859-1 utf-8 "ZX\uD800" } -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-strict? ?-failindex var? ?encoding? data" or "::tcl::encoding::convertto -nocomplain ?encoding? data"} test encoding-24.24 {Parse invalid utf-8 with -strict} -body { encoding convertfrom -strict utf-8 "\xC0\x80\x00\x00" } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'} test encoding-24.25 {Parse invalid utf-8 with -strict} -body { encoding convertfrom -strict utf-8 "\x40\x80\x00\x00" } -returnCodes 1 -result {unexpected byte sequence starting at index 1: '\x80'} test encoding-24.26 {Parse valid utf-8 with -strict} -body { encoding convertfrom -strict utf-8 "\xF1\x80\x80\x80" } -result \U40000 test encoding-24.27 {Parse invalid utf-8 with -strict} -body { encoding convertfrom -strict utf-8 "\xF0\x80\x80\x80" } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xF0'} test encoding-24.28 {Parse invalid utf-8 with -strict} -body { encoding convertfrom -strict utf-8 "\xFF\x00\x00" } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xFF'} test encoding-24.29 {Parse invalid utf-8} -body { encoding convertfrom utf-8 \xEF\xBF\xBF } -result \uFFFF test encoding-24.30 {Parse noncharacter with -strict} -body { encoding convertfrom -strict utf-8 \xEF\xBF\xBF } -result \uFFFF test encoding-24.31 {Parse invalid utf-8 with -nocomplain} -body { encoding convertfrom -nocomplain utf-8 \xEF\xBF\xBF } -result \uFFFF test encoding-24.32 {Try to generate invalid utf-8} -body { encoding convertto utf-8 \uFFFF } -result \xEF\xBF\xBF test encoding-24.33 {Try to generate noncharacter with -strict} -body { encoding convertto -strict utf-8 \uFFFF } -result \xEF\xBF\xBF test encoding-24.34 {Try to generate invalid utf-8 with -nocomplain} -body { encoding convertto -nocomplain utf-8 \uFFFF } -result \xEF\xBF\xBF test encoding-24.35 {Parse invalid utf-8} -constraints utf32 -body { encoding convertfrom utf-8 \xED\xA0\x80 } -result \uD800 test encoding-24.36 {Parse invalid utf-8 with -strict} -body { encoding convertfrom -strict utf-8 \xED\xA0\x80 } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xED'} test encoding-24.37 {Parse invalid utf-8 with -nocomplain} -body { encoding convertfrom -nocomplain utf-8 \xED\xA0\x80 } -result \uD800 test encoding-24.38 {Try to generate invalid utf-8} -body { encoding convertto utf-8 \uD800 } -returnCodes 1 -result {unexpected character at index 0: 'U+00D800'} test encoding-24.39 {Try to generate invalid utf-8 with -strict} -body { encoding convertto -strict utf-8 \uD800 } -returnCodes 1 -result {unexpected character at index 0: 'U+00D800'} test encoding-24.40 {Try to generate invalid utf-8 with -nocomplain} -body { encoding convertto -nocomplain utf-8 \uD800 } -result \xED\xA0\x80 test encoding-24.41 {Parse invalid utf-8 with -strict} -body { encoding convertfrom -strict utf-8 \xED\xA0\x80\xED\xB0\x80 } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xED'} file delete [file join [temporaryDirectory] iso2022.txt] # # Begin jajp encoding round-trip conformity tests # proc foreach-jisx0208 {varName command} { upvar 1 $varName code foreach range { {2121 217E} {2221 222E} {223A 2241} {224A 2250} {225C 226A} {2272 2279} {227E 227E} {2330 2339} {2421 2473} {2521 2576} {2821 2821} {282C 282C} {2837 2837} {30 21 4E 7E} {4F21 4F53} {50 21 73 7E} {7421 7426} } { if {[llength $range] == 2} { # for adhoc range. simple {first last}. inclusive. scan $range %x%x first last for {set i $first} {$i <= $last} {incr i} { set code $i uplevel 1 $command } } elseif {[llength $range] == 4} { # for uniform range. scan $range %x%x%x%x h0 l0 hend lend for {set hi $h0} {$hi <= $hend} {incr hi} { for {set lo $l0} {$lo <= $lend} {incr lo} { set code [expr {$hi << 8 | ($lo & 0xff)}] uplevel 1 $command } } } else { error "really?" } } } proc gen-jisx0208-euc-jp {code} { binary format cc \ [expr {($code >> 8) | 0x80}] [expr {($code & 0xFF) | 0x80}] } proc gen-jisx0208-iso2022-jp {code} { binary format a3cca3 \ "\x1B\$B" [expr {$code >> 8}] [expr {$code & 0xFF}] "\x1B(B" } proc gen-jisx0208-cp932 {code} { set c1 [expr {($code >> 8) | 0x80}] set c2 [expr {($code & 0xff)| 0x80}] if {$c1 % 2} { set c1 [expr {($c1 >> 1) + ($c1 < 0xDF ? 0x31 : 0x71)}] incr c2 [expr {- (0x60 + ($c2 < 0xE0))}] } else { set c1 [expr {($c1 >> 1) + ($c1 < 0xDF ? 0x30 : 0x70)}] incr c2 -2 } binary format cc $c1 $c2 } proc channel-diff {fa fb} { set diff {} while {[gets $fa la] >= 0 && [gets $fb lb] >= 0} { if {[string compare $la $lb] == 0} continue # lappend diff $la $lb # For more readable (easy to analyze) output. set code [lindex $la 0] binary scan [lindex $la 1] H* expected binary scan [lindex $lb 1] H* got lappend diff [list $code $expected $got] } return $diff } # Create char tables. cd [temporaryDirectory] foreach enc {cp932 euc-jp iso2022-jp} { set f [open $enc.chars w] fconfigure $f -encoding binary foreach-jisx0208 code { puts $f [format "%04X %s" $code [gen-jisx0208-$enc $code]] } close $f } # shiftjis == cp932 for jisx0208. file copy -force cp932.chars shiftjis.chars set NUM 0 foreach from {cp932 shiftjis euc-jp iso2022-jp} { foreach to {cp932 shiftjis euc-jp iso2022-jp} { test encoding-25.[incr NUM] "jisx0208 $from => $to" -setup { cd [temporaryDirectory] } -body { set f [open $from.chars] fconfigure $f -encoding $from set out [open $from.$to.tcltestout w] fconfigure $out -encoding $to puts -nonewline $out [read $f] close $out close $f # then compare $to.chars <=> $from.to.tcltestout as binary. set fa [open $to.chars rb] set fb [open $from.$to.tcltestout rb] channel-diff $fa $fb # Difference should be empty. } -cleanup { close $fa close $fb } -result {} } } test encoding-26.0 {Tcl_GetEncodingSearchPath} -constraints { testgetencpath } -setup { set origPath [testgetencpath] testsetencpath slappy } -body { testgetencpath } -cleanup { testsetencpath $origPath } -result slappy file delete {*}[glob -directory [temporaryDirectory] *.chars *.tcltestout] # ===> Cut here <=== # EscapeFreeProc, GetTableEncoding, unilen are fully tested by the rest of # this file. test encoding-27.1 {encoding dirs basic behavior} -returnCodes error -body { encoding dirs ? ? } -result {wrong # args: should be "encoding dirs ?dirList?"} test encoding-27.2 {encoding dirs basic behavior} -returnCodes error -body { encoding dirs "\{not a list" } -result "expected directory list but got \"\{not a list\"" } test encoding-28.0 {all encodings load} -body { set string hello foreach name [encoding names] { if {$name ne "unicode"} { incr count } encoding convertto -nocomplain $name $string # discard the cached internal representation of Tcl_Encoding # Unfortunately, without this, encoding 2-1 fails. llength $name } return $count } -result 91 runtests } test encoding-29.0 {get encoding nul terminator lengths} -constraints { testencoding } -body { list \ [testencoding nullength ascii] \ [testencoding nullength utf-16] \ [testencoding nullength utf-32] \ [testencoding nullength gb12345] \ [testencoding nullength ksc5601] } -result {1 2 4 2 2} # cleanup namespace delete ::tcl::test::encoding ::tcltest::cleanupTests return # Local Variables: # mode: tcl # End: