diff options
Diffstat (limited to 'tests/encoding.test')
| -rw-r--r-- | tests/encoding.test | 977 |
1 files changed, 210 insertions, 767 deletions
diff --git a/tests/encoding.test b/tests/encoding.test index 70aa99e..aa50360 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -1,28 +1,19 @@ # This file contains a collection of tests for tclEncoding.c -# Sourcing this file into Tcl runs the tests and generates output for errors. -# No output means no errors were found. +# Sourcing this file into Tcl runs the tests and generates output for +# errors. No output means no errors were found. # -# Copyright © 1997 Sun Microsystems, Inc. -# Copyright © 1998-1999 Scriptics Corporation. +# Copyright (c) 1997 Sun Microsystems, Inc. +# Copyright (c) 1998-1999 by Scriptics Corporation. # -# See the file "license.terms" for information on usage and redistribution of -# this file, and for a DISCLAIMER OF ALL WARRANTIES. - -if {"::tcltest" ni [namespace children]} { - package require tcltest 2.5 - namespace import -force ::tcltest::* -} +# See the file "license.terms" for information on usage and redistribution +# of this file, and for a DISCLAIMER OF ALL WARRANTIES. +package require tcltest 2 namespace eval ::tcl::test::encoding { variable x -catch { - ::tcltest::loadTestedCommands - package require -exact tcl::test [info patchlevel] -} - -source [file join [file dirname [info script]] tcltests.tcl] +namespace import -force ::tcltest::* proc toutf {args} { variable x @@ -34,123 +25,106 @@ proc fromutf {args} { } proc runtests {} { + variable x # Some tests require the testencoding command testConstraint testencoding [llength [info commands testencoding]] -testConstraint testbytestring [llength [info commands testbytestring]] -testConstraint teststringbytes [llength [info commands teststringbytes]] testConstraint exec [llength [info commands exec]] - + # TclInitEncodingSubsystem is tested by the rest of this file # TclFinalizeEncodingSubsystem is not currently tested -test encoding-1.1 {Tcl_GetEncoding: system encoding} -setup { - set old [encoding system] -} -constraints {testencoding} -body { +test encoding-1.1 {Tcl_GetEncoding: system encoding} {testencoding} { testencoding create foo [namespace origin toutf] [namespace origin fromutf] + set old [encoding system] encoding system foo set x {} encoding convertto abcd - return $x -} -cleanup { encoding system $old testencoding delete foo -} -result {{fromutf }} + set x +} {{fromutf }} test encoding-1.2 {Tcl_GetEncoding: existing encoding} {testencoding} { testencoding create foo [namespace origin toutf] [namespace origin fromutf] set x {} encoding convertto foo abcd testencoding delete foo - return $x + set x } {{fromutf }} test encoding-1.3 {Tcl_GetEncoding: load encoding} { - list [encoding convertto jis0208 乎] \ + list [encoding convertto jis0208 \u4e4e] \ [encoding convertfrom jis0208 8C] -} "8C 乎" +} "8C \u4e4e" test encoding-2.1 {Tcl_FreeEncoding: refcount == 0} { - encoding convertto jis0208 乎 + encoding convertto jis0208 \u4e4e } {8C} -test encoding-2.2 {Tcl_FreeEncoding: refcount != 0} -setup { +test encoding-2.2 {Tcl_FreeEncoding: refcount != 0} {testencoding} { set system [encoding system] set path [encoding dirs] -} -constraints {testencoding} -body { encoding system shiftjis ;# incr ref count encoding dirs [list [pwd]] - set x [encoding convertto shiftjis 乎] ;# old one found - encoding system iso8859-1 - llength shiftjis ;# Shimmer away any cache of Tcl_Encoding - lappend x [catch {encoding convertto shiftjis 乎} msg] $msg -} -cleanup { - encoding system iso8859-1 + set x [encoding convertto shiftjis \u4e4e] ;# old one found + encoding system identity + llength shiftjis + lappend x [catch {encoding convertto shiftjis \u4e4e} msg] $msg + encoding system identity encoding dirs $path encoding system $system -} -result "\x8C\xC1 1 {unknown encoding \"shiftjis\"}" + set x +} "\u008c\u00c1 1 {unknown encoding \"shiftjis\"}" -test encoding-3.1 {Tcl_GetEncodingName, NULL} -setup { +test encoding-3.1 {Tcl_GetEncodingName, NULL} { set old [encoding system] -} -body { encoding system shiftjis - encoding system -} -cleanup { + set x [encoding system] encoding system $old -} -result {shiftjis} -test encoding-3.2 {Tcl_GetEncodingName, non-null} -setup { + set x +} {shiftjis} +test encoding-3.2 {Tcl_GetEncodingName, non-null} { set old [fconfigure stdout -encoding] -} -body { fconfigure stdout -encoding jis0208 - fconfigure stdout -encoding -} -cleanup { + set x [fconfigure stdout -encoding] fconfigure stdout -encoding $old -} -result {jis0208} -test encoding-3.3 {fconfigure -profile} -setup { - set old [fconfigure stdout -profile] -} -body { - fconfigure stdout -profile replace - fconfigure stdout -profile -} -cleanup { - fconfigure stdout -profile $old -} -result replace + set x +} {jis0208} -test encoding-4.1 {Tcl_GetEncodingNames} -constraints {testencoding} -setup { +test encoding-4.1 {Tcl_GetEncodingNames} {testencoding} { cd [makeDirectory tmp] makeDirectory [file join tmp encoding] + makeFile {} [file join tmp encoding junk.enc] + makeFile {} [file join tmp encoding junk2.enc] set path [encoding dirs] encoding dirs {} catch {unset encodings} catch {unset x} -} -body { foreach encoding [encoding names] { set encodings($encoding) 1 } - makeFile {} [file join tmp encoding junk.enc] - makeFile {} [file join tmp encoding junk2.enc] encoding dirs [list [file join [pwd] encoding]] foreach encoding [encoding names] { if {![info exists encodings($encoding)]} { lappend x $encoding } } - lsort $x -} -cleanup { encoding dirs $path cd [workingDirectory] removeFile [file join tmp encoding junk2.enc] removeFile [file join tmp encoding junk.enc] removeDirectory [file join tmp encoding] removeDirectory tmp -} -result {junk junk2} + lsort $x +} {junk junk2} -test encoding-5.1 {Tcl_SetSystemEncoding} -setup { +test encoding-5.1 {Tcl_SetSystemEncoding} { set old [encoding system] -} -body { encoding system jis0208 - encoding convertto 乎 -} -cleanup { - encoding system iso8859-1 + set x [encoding convertto \u4e4e] + encoding system identity encoding system $old -} -result {8C} + set x +} {8C} test encoding-5.2 {Tcl_SetSystemEncoding: test ref count} { set old [encoding system] encoding system $old @@ -164,7 +138,7 @@ test encoding-6.1 {Tcl_CreateEncoding: new} {testencoding} { encoding convertfrom foo abcd encoding convertto foo abcd testencoding delete foo - return $x + set x } {{toutf 1} {fromutf 2}} test encoding-6.2 {Tcl_CreateEncoding: replace encoding} {testencoding} { testencoding create foo [namespace code {toutf a}] \ @@ -173,12 +147,12 @@ test encoding-6.2 {Tcl_CreateEncoding: replace encoding} {testencoding} { encoding convertfrom foo abcd encoding convertto foo abcd testencoding delete foo - return $x + set x } {{toutf a} {fromutf b}} test encoding-7.1 {Tcl_ExternalToUtfDString: small buffer} { encoding convertfrom jis0208 8c8c8c8c -} 吾吾吾吾 +} "\u543e\u543e\u543e\u543e" test encoding-7.2 {Tcl_UtfToExternalDString: big buffer} { set a 8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C append a $a @@ -187,26 +161,26 @@ test encoding-7.2 {Tcl_UtfToExternalDString: big buffer} { append a $a set x [encoding convertfrom jis0208 $a] list [string length $x] [string index $x 0] -} "512 乎" +} "512 \u4e4e" test encoding-8.1 {Tcl_ExternalToUtf} { set f [open [file join [temporaryDirectory] dummy] w] fconfigure $f -translation binary -encoding iso8859-1 - puts -nonewline $f "ab\x8C\xC1g" + puts -nonewline $f "ab\x8c\xc1g" close $f set f [open [file join [temporaryDirectory] dummy] r] - fconfigure $f -translation binary -encoding shiftjis + fconfigure $f -translation binary -encoding shiftjis set x [read $f] close $f file delete [file join [temporaryDirectory] dummy] - return $x -} ab乎g + set x +} "ab\u4e4eg" test encoding-9.1 {Tcl_UtfToExternalDString: small buffer} { - encoding convertto jis0208 "吾吾吾吾" + encoding convertto jis0208 "\u543e\u543e\u543e\u543e" } {8c8c8c8c} test encoding-9.2 {Tcl_UtfToExternalDString: big buffer} { - set a 乎乎乎乎乎乎乎乎 + set a \u4e4e\u4e4e\u4e4e\u4e4e\u4e4e\u4e4e\u4e4e\u4e4e append a $a append a $a append a $a @@ -220,442 +194,141 @@ test encoding-9.2 {Tcl_UtfToExternalDString: big buffer} { test encoding-10.1 {Tcl_UtfToExternal} { set f [open [file join [temporaryDirectory] dummy] w] fconfigure $f -translation binary -encoding shiftjis - puts -nonewline $f ab乎g + puts -nonewline $f "ab\u4e4eg" close $f set f [open [file join [temporaryDirectory] dummy] r] fconfigure $f -translation binary -encoding iso8859-1 set x [read $f] close $f file delete [file join [temporaryDirectory] dummy] - return $x -} "ab\x8C\xC1g" + set x +} "ab\x8c\xc1g" + +proc viewable {str} { + set res "" + foreach c [split $str {}] { + if {[string is print $c] && [string is ascii $c]} { + append res $c + } else { + append res "\\u[format %4.4x [scan $c %c]]" + } + } + return "$str ($res)" +} test encoding-11.1 {LoadEncodingFile: unknown encoding} {testencoding} { set system [encoding system] set path [encoding dirs] encoding system iso8859-1 encoding dirs {} - llength jis0208 ;# Shimmer any cached Tcl_Encoding in shared literal - set x [list [catch {encoding convertto jis0208 乎} msg] $msg] + llength jis0208 + set x [list [catch {encoding convertto jis0208 \u4e4e} msg] $msg] encoding dirs $path encoding system $system - lappend x [encoding convertto jis0208 乎] + lappend x [encoding convertto jis0208 \u4e4e] } {1 {unknown encoding "jis0208"} 8C} test encoding-11.2 {LoadEncodingFile: single-byte} { - encoding convertfrom jis0201 \xA1 -} 。 + encoding convertfrom jis0201 \xa1 +} "\uff61" test encoding-11.3 {LoadEncodingFile: double-byte} { encoding convertfrom jis0208 8C -} 乎 +} "\u4e4e" test encoding-11.4 {LoadEncodingFile: multi-byte} { - encoding convertfrom shiftjis \x8C\xC1 -} 乎 + encoding convertfrom shiftjis \x8c\xc1 +} "\u4e4e" test encoding-11.5 {LoadEncodingFile: escape file} { - encoding convertto iso2022 乎 -} \x1B\$B8C\x1B(B + viewable [encoding convertto iso2022 \u4e4e] +} [viewable "\x1b\$B8C\x1b(B"] test encoding-11.5.1 {LoadEncodingFile: escape file} { - encoding convertto iso2022-jp 乎 -} \x1B\$B8C\x1B(B -test encoding-11.6 {LoadEncodingFile: invalid file} -constraints {testencoding} -setup { + viewable [encoding convertto iso2022-jp \u4e4e] +} [viewable "\x1b\$B8C\x1b(B"] +test encoding-11.6 {LoadEncodingFile: invalid file} {testencoding} { set system [encoding system] set path [encoding dirs] - encoding system iso8859-1 -} -body { + encoding system identity cd [temporaryDirectory] encoding dirs [file join tmp encoding] makeDirectory tmp makeDirectory [file join tmp encoding] set f [open [file join tmp encoding splat.enc] w] - fconfigure $f -translation binary + fconfigure $f -translation binary puts $f "abcdefghijklmnop" close $f - encoding convertto splat 乎 -} -returnCodes error -cleanup { + set x [list [catch {encoding convertto splat \u4e4e} msg] $msg] file delete [file join [temporaryDirectory] tmp encoding splat.enc] removeDirectory [file join tmp encoding] removeDirectory tmp cd [workingDirectory] encoding dirs $path encoding system $system -} -result {invalid encoding file "splat"} -test encoding-11.8 {encoding: extended Unicode UTF-16} { - encoding convertto utf-16le 😹 -} =Ø9Þ -test encoding-11.9 {encoding: extended Unicode UTF-16} { - encoding convertto utf-16be 😹 -} Ø=Þ9 -test encoding-11.10 {encoding: extended Unicode UTF-32} { - encoding convertto utf-32le 😹 -} 9\xF6\x01\x00 -test encoding-11.11 {encoding: extended Unicode UTF-32} { - encoding convertto utf-32be 😹 -} \x00\x01\xF69 + set x +} {1 {invalid encoding file "splat"}} + # OpenEncodingFile is fully tested by the rest of the tests in this file. test encoding-12.1 {LoadTableEncoding: normal encoding} { - set x [encoding convertto iso8859-3 Ġ] - append x [encoding convertto -profile tcl8 iso8859-3 Õ] - append x [encoding convertfrom iso8859-3 Õ] -} "Õ?Ġ" + set x [encoding convertto iso8859-3 \u120] + append x [encoding convertto iso8859-3 \ud5] + append x [encoding convertfrom iso8859-3 \xd5] +} "\xd5?\u120" test encoding-12.2 {LoadTableEncoding: single-byte encoding} { - set x [encoding convertto iso8859-3 abĠg] - append x [encoding convertfrom iso8859-3 abÕg] -} "abÕgabĠg" + set x [encoding convertto iso8859-3 ab\u0120g] + append x [encoding convertfrom iso8859-3 ab\xd5g] +} "ab\xd5gab\u120g" test encoding-12.3 {LoadTableEncoding: multi-byte encoding} { - set x [encoding convertto shiftjis ab乎g] - append x [encoding convertfrom shiftjis ab\x8C\xC1g] -} "ab\x8C\xC1gab乎g" + set x [encoding convertto shiftjis ab\u4e4eg] + append x [encoding convertfrom shiftjis ab\x8c\xc1g] +} "ab\x8c\xc1gab\u4e4eg" test encoding-12.4 {LoadTableEncoding: double-byte encoding} { - set x [encoding convertto jis0208 乎α] + set x [encoding convertto jis0208 \u4e4e\u3b1] append x [encoding convertfrom jis0208 8C&A] -} "8C&A乎α" +} "8C&A\u4e4e\u3b1" test encoding-12.5 {LoadTableEncoding: symbol encoding} { - set x [encoding convertto symbol γ] - append x [encoding convertto symbol g] - append x [encoding convertfrom symbol g] -} "ggγ" + set x [encoding convertto symbol \u3b3] + append x [encoding convertto symbol \u67] + append x [encoding convertfrom symbol \x67] +} "\x67\x67\u3b3" test encoding-13.1 {LoadEscapeTable} { - encoding convertto iso2022 ab乎棙g -} ab\x1B\$B8C\x1B\$\(DD%\x1B(Bg + viewable [set x [encoding convertto iso2022 ab\u4e4e\u68d9g]] +} [viewable "ab\x1b\$B8C\x1b\$\(DD%\x1b(Bg"] + +test encoding-14.1 {BinaryProc} { + encoding convertto identity \x12\x34\x56\xff\x69 +} "\x12\x34\x56\xc3\xbf\x69" test encoding-15.1 {UtfToUtfProc} { - encoding convertto utf-8 £ -} "\xC2\xA3" -test encoding-15.2 {UtfToUtfProc null character output} testbytestring { - binary scan [testbytestring [encoding convertto utf-8 \x00]] H* z - set z -} 00 -test encoding-15.3 {UtfToUtfProc null character input} teststringbytes { - set y [encoding convertfrom utf-8 [encoding convertto utf-8 \x00]] - binary scan [teststringbytes $y] H* z - set z -} c080 -test encoding-15.4 {UtfToUtfProc emoji character input} -body { - set x \xED\xA0\xBD\xED\xB8\x82 - set y [encoding convertfrom -profile tcl8 utf-8 \xED\xA0\xBD\xED\xB8\x82] - list [string length $x] $y -} -result "6 😂" -test encoding-15.5 {UtfToUtfProc emoji character input} { - set x \xF0\x9F\x98\x82 - set y [encoding convertfrom utf-8 \xF0\x9F\x98\x82] - list [string length $x] $y -} "4 😂" -test encoding-15.6 {UtfToUtfProc emoji character output} { - set x \uDE02\uD83D\uDE02\uD83D - set y [encoding convertto -profile tcl8 utf-8 \uDE02\uD83D\uDE02\uD83D] - binary scan $y H* z - list [string length $y] $z -} {10 edb882f09f9882eda0bd} -test encoding-15.7 {UtfToUtfProc emoji character output} { - set x \uDE02\uD83D\uD83D - set y [encoding convertto -profile tcl8 utf-8 \uDE02\uD83D\uD83D] - binary scan $y H* z - list [string length $x] [string length $y] $z -} {3 9 edb882eda0bdeda0bd} -test encoding-15.8 {UtfToUtfProc emoji character output} { - set x \uDE02\uD83Dé - set y [encoding convertto -profile tcl8 utf-8 \uDE02\uD83Dé] - binary scan $y H* z - list [string length $x] [string length $y] $z -} {3 8 edb882eda0bdc3a9} -test encoding-15.9 {UtfToUtfProc emoji character output} { - set x \uDE02\uD83DX - set y [encoding convertto -profile tcl8 utf-8 \uDE02\uD83DX] - binary scan $y H* z - list [string length $x] [string length $y] $z -} {3 7 edb882eda0bd58} -test encoding-15.10 {UtfToUtfProc high surrogate character output} { - set x \uDE02é - set y [encoding convertto -profile tcl8 utf-8 \uDE02é] - binary scan $y H* z - list [string length $x] [string length $y] $z -} {2 5 edb882c3a9} -test encoding-15.11 {UtfToUtfProc low surrogate character output} { - set x \uDA02é - set y [encoding convertto -profile tcl8 utf-8 \uDA02é] - binary scan $y H* z - list [string length $x] [string length $y] $z -} {2 5 eda882c3a9} -test encoding-15.12 {UtfToUtfProc high surrogate character output} { - set x \uDE02Y - set y [encoding convertto -profile tcl8 utf-8 \uDE02Y] - binary scan $y H* z - list [string length $x] [string length $y] $z -} {2 4 edb88259} -test encoding-15.13 {UtfToUtfProc low surrogate character output} { - set x \uDA02Y - set y [encoding convertto -profile tcl8 utf-8 \uDA02Y] - binary scan $y H* z - list [string length $x] [string length $y] $z -} {2 4 eda88259} -test encoding-15.14 {UtfToUtfProc high surrogate character output} { - set x \uDE02 - set y [encoding convertto -profile tcl8 utf-8 \uDE02] - binary scan $y H* z - list [string length $x] [string length $y] $z -} {1 3 edb882} -test encoding-15.15 {UtfToUtfProc low surrogate character output} { - set x \uDA02 - set y [encoding convertto -profile tcl8 utf-8 \uDA02] - binary scan $y H* z - list [string length $x] [string length $y] $z -} {1 3 eda882} -test encoding-15.16 {UtfToUtfProc: Invalid 4-byte UTF-8, see [ed29806ba]} { - set x \xF0\xA0\xA1\xC2 - set y [encoding convertfrom -profile tcl8 utf-8 \xF0\xA0\xA1\xC2] - list [string length $x] $y -} "4 \xF0\xA0\xA1\xC2" -test encoding-15.17 {UtfToUtfProc emoji character output} { - set x 😂 - set y [encoding convertto utf-8 😂] - binary scan $y H* z - list [string length $y] $z -} {4 f09f9882} -test encoding-15.18 {UtfToUtfProc CESU-8 6-byte sequence} { - set y [encoding convertto cesu-8 \U10000] - binary scan $y H* z - list [string length $y] $z -} {6 eda080edb080} -test encoding-15.19 {UtfToUtfProc CESU-8 upper surrogate} { - set y [encoding convertto cesu-8 \uD800] - binary scan $y H* z - list [string length $y] $z -} {3 eda080} -test encoding-15.20 {UtfToUtfProc CESU-8 lower surrogate} { - set y [encoding convertto cesu-8 \uDC00] - binary scan $y H* z - list [string length $y] $z -} {3 edb080} -test encoding-15.21 {UtfToUtfProc CESU-8 noncharacter} { - set y [encoding convertto cesu-8 \uFFFF] - binary scan $y H* z - list [string length $y] $z -} {3 efbfbf} -test encoding-15.22 {UtfToUtfProc CESU-8 bug [048dd20b4171c8da]} { - set y [encoding convertto cesu-8 \x80] - binary scan $y H* z - list [string length $y] $z -} {2 c280} -test encoding-15.23 {UtfToUtfProc CESU-8 bug [048dd20b4171c8da]} { - set y [encoding convertto cesu-8 \u100] - binary scan $y H* z - list [string length $y] $z -} {2 c480} -test encoding-15.24 {UtfToUtfProc CESU-8 bug [048dd20b4171c8da]} { - set y [encoding convertto cesu-8 \u3FF] - binary scan $y H* z - list [string length $y] $z -} {2 cfbf} -test encoding-15.25 {UtfToUtfProc CESU-8} { - encoding convertfrom cesu-8 \x00 -} \x00 -test encoding-15.26 {UtfToUtfProc CESU-8} { - encoding convertfrom -profile tcl8 cesu-8 \xC0\x80 -} \x00 -test encoding-15.27 {UtfToUtfProc -profile strict CESU-8} { - encoding convertfrom -profile strict cesu-8 \x00 -} \x00 -test encoding-15.28 {UtfToUtfProc -profile strict CESU-8} -body { - encoding convertfrom -profile strict cesu-8 \xC0\x80 -} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'} -test encoding-15.29 {UtfToUtfProc CESU-8} { - encoding convertto cesu-8 \x00 -} \x00 -test encoding-15.30 {UtfToUtfProc -profile strict CESU-8} { - encoding convertto -profile strict cesu-8 \x00 -} \x00 -test encoding-15.31 {UtfToUtfProc -profile strict CESU-8 (bytes F0-F4 are invalid)} -body { - encoding convertfrom -profile strict cesu-8 \xF1\x86\x83\x9C -} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xF1'} + encoding convertto utf-8 \xa3 +} "\xc2\xa3" -test encoding-16.1 {Utf16ToUtfProc} -body { - set val [encoding convertfrom utf-16 NN] - list $val [format %x [scan $val %c]] -} -result "乎 4e4e" -test encoding-16.2 {Utf16ToUtfProc} -body { - set val [encoding convertfrom utf-16 "\xD8\xD8\xDC\xDC"] - list $val [format %x [scan $val %c]] -} -result "\U460DC 460dc" -test encoding-16.3 {Utf16ToUtfProc} -body { - set val [encoding convertfrom -profile tcl8 utf-16 "\xDC\xDC"] - list $val [format %x [scan $val %c]] -} -result "\uDCDC dcdc" -test encoding-16.4 {Ucs2ToUtfProc} -body { - set val [encoding convertfrom ucs-2 NN] - list $val [format %x [scan $val %c]] -} -result "乎 4e4e" -test encoding-16.5 {Ucs2ToUtfProc} -body { - set val [encoding convertfrom ucs-2 "\xD8\xD8\xDC\xDC"] - list $val [format %x [scan $val %c]] -} -result "\U460DC 460dc" -test encoding-16.6 {Utf32ToUtfProc} -body { - set val [encoding convertfrom utf-32le NN\0\0] - list $val [format %x [scan $val %c]] -} -result "乎 4e4e" -test encoding-16.7 {Utf32ToUtfProc} -body { - set val [encoding convertfrom utf-32be \0\0NN] - list $val [format %x [scan $val %c]] -} -result "乎 4e4e" -test encoding-16.8 {Utf32ToUtfProc} -body { - set val [encoding convertfrom -profile tcl8 utf-32 \x41\x00\x00\x41] +test encoding-15.2 {UtfToUtfProc null character output} { + set x \u0000 + set y [encoding convertto utf-8 \u0000] + set y [encoding convertfrom identity $y] + binary scan $y H* z + list [string bytelength $x] [string bytelength $y] $z +} {2 1 00} + +test encoding-15.3 {UtfToUtfProc null character input} { + set x [encoding convertfrom identity \x00] + set y [encoding convertfrom utf-8 $x] + binary scan [encoding convertto identity $y] H* z + list [string bytelength $x] [string bytelength $y] $z +} {1 2 c080} + +test encoding-16.1 {UnicodeToUtfProc} { + set val [encoding convertfrom unicode NN] list $val [format %x [scan $val %c]] -} -result "\uFFFD fffd" -test encoding-16.9 {Utf32ToUtfProc} -body { - encoding convertfrom -profile tcl8 utf-32le \x00\xD8\x00\x00 -} -result \uD800 -test encoding-16.10 {Utf32ToUtfProc} -body { - encoding convertfrom -profile tcl8 utf-32le \x00\xDC\x00\x00 -} -result \uDC00 -test encoding-16.11 {Utf32ToUtfProc} -body { - encoding convertfrom -profile tcl8 utf-32le \x00\xD8\x00\x00\x00\xDC\x00\x00 -} -result \uD800\uDC00 -test encoding-16.12 {Utf32ToUtfProc} -body { - encoding convertfrom -profile tcl8 utf-32le \x00\xDC\x00\x00\x00\xD8\x00\x00 -} -result \uDC00\uD800 -test encoding-16.13 {Utf16ToUtfProc} -body { - encoding convertfrom -profile tcl8 utf-16le \x00\xD8 -} -result \uD800 -test encoding-16.14 {Utf16ToUtfProc} -body { - encoding convertfrom -profile tcl8 utf-16le \x00\xDC -} -result \uDC00 -test encoding-16.15 {Utf16ToUtfProc} -body { - encoding convertfrom utf-16le \x00\xD8\x00\xDC -} -result \uD800\uDC00 -test encoding-16.16 {Utf16ToUtfProc} -body { - encoding convertfrom -profile tcl8 utf-16le \x00\xDC\x00\xD8 -} -result \uDC00\uD800 -test encoding-16.17 {Utf32ToUtfProc} -body { - list [encoding convertfrom -profile strict -failindex idx utf-32le \x41\x00\x00\x00\x00\xD8\x00\x00\x42\x00\x00\x00] [set idx] -} -result {A 4} +} "\u4e4e 4e4e" -test encoding-16.18 { - Utf16ToUtfProc, Tcl_UniCharToUtf, surrogate pairs in utf-16 -} -body { - apply [list {} { - for {set i 0xD800} {$i < 0xDBFF} {incr i} { - for {set j 0xDC00} {$j < 0xDFFF} {incr j} { - set string [binary format S2 [list $i $j]] - set status [catch { - set decoded [encoding convertfrom utf-16be $string] - set encoded [encoding convertto utf-16be $decoded] - }] - if {$status || ( $encoded ne $string )} { - return [list [format %x $i] [format %x $j]] - } - } - } - return done - } [namespace current]] -} -result done -test encoding-16.19.strict {Utf16ToUtfProc, bug [d19fe0a5b]} -body { - encoding convertfrom -profile strict utf-16 "\x41\x41\x41" -} -returnCodes 1 -result {unexpected byte sequence starting at index 2: '\x41'} -test encoding-16.19.tcl8 {Utf16ToUtfProc, bug [d19fe0a5b]} -body { - encoding convertfrom -profile tcl8 utf-16 "\x41\x41\x41" -} -result \u4141\uFFFD -test encoding-16.20.tcl8 {Utf16ToUtfProc, bug [d19fe0a5b]} -body { - encoding convertfrom -profile tcl8 utf-16 "\xD8\xD8" -} -result \uD8D8 -test encoding-16.20.strict {Utf16ToUtfProc, bug [d19fe0a5b]} -body { - encoding convertfrom -profile strict utf-16 "\xD8\xD8" -} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xD8'} -test encoding-16.21.tcl8 {Utf32ToUtfProc, bug [d19fe0a5b]} -body { - encoding convertfrom -profile tcl8 utf-32 "\x00\x00\x00\x00\x41\x41" -} -result \x00\uFFFD -test encoding-16.21.strict {Utf32ToUtfProc, bug [d19fe0a5b]} -body { - encoding convertfrom -profile strict utf-32 "\x00\x00\x00\x00\x41\x41" -} -returnCodes 1 -result {unexpected byte sequence starting at index 4: '\x41'} -test encoding-16.22 {Utf16ToUtfProc, strict, bug [db7a085bd9]} -body { - encoding convertfrom -profile strict utf-16le \x00\xD8 -} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\x00'} -test encoding-16.23 {Utf16ToUtfProc, strict, bug [db7a085bd9]} -body { - encoding convertfrom -profile strict utf-16le \x00\xDC -} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\x00'} -test encoding-16.24 {Utf32ToUtfProc} -body { - encoding convertfrom -profile tcl8 utf-32 "\xFF\xFF\xFF\xFF" -} -result \uFFFD -test encoding-16.25.strict {Utf32ToUtfProc} -body { - encoding convertfrom -profile strict utf-32 "\x01\x00\x00\x01" -} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\x01'} -test encoding-16.25.tcl8 {Utf32ToUtfProc} -body { - encoding convertfrom -profile tcl8 utf-32 "\x01\x00\x00\x01" -} -result \uFFFD - -test encoding-17.1 {UtfToUtf16Proc} -body { - encoding convertto utf-16 "\U460DC" -} -result "\xD8\xD8\xDC\xDC" -test encoding-17.2 {UtfToUcs2Proc, invalid testcase, see [5607d6482c]} -constraints deprecated -body { - encoding convertfrom utf-16 [encoding convertto ucs-2 "\U460DC"] -} -result "\uFFFD" -test encoding-17.3 {UtfToUtf16Proc} -body { - encoding convertto -profile tcl8 utf-16be "\uDCDC" -} -result "\xDC\xDC" -test encoding-17.4 {UtfToUtf16Proc} -body { - encoding convertto -profile tcl8 utf-16le "\uD8D8" -} -result "\xD8\xD8" -test encoding-17.5 {UtfToUtf32Proc} -body { - encoding convertto utf-32le "\U460DC" -} -result "\xDC\x60\x04\x00" -test encoding-17.6 {UtfToUtf32Proc} -body { - encoding convertto utf-32be "\U460DC" -} -result "\x00\x04\x60\xDC" -test encoding-17.7 {UtfToUtf16Proc} -body { - encoding convertto -profile strict utf-16be "\uDCDC" -} -returnCodes error -result {unexpected character at index 0: 'U+00DCDC'} -test encoding-17.8 {UtfToUtf16Proc} -body { - encoding convertto -profile strict utf-16le "\uD8D8" -} -returnCodes error -result {unexpected character at index 0: 'U+00D8D8'} -test encoding-17.9 {Utf32ToUtfProc} -body { - encoding convertfrom -profile strict utf-32 "\xFF\xFF\xFF\xFF" -} -returnCodes error -result {unexpected byte sequence starting at index 0: '\xFF'} -test encoding-17.10 {Utf32ToUtfProc} -body { - encoding convertfrom -profile tcl8 utf-32 "\xFF\xFF\xFF\xFF" -} -result \uFFFD -test encoding-17.11 {Utf32ToUtfProc} -body { - encoding convertfrom -profile strict utf-32le "\x00\xD8\x00\x00" -} -returnCodes error -result {unexpected byte sequence starting at index 0: '\x00'} -test encoding-17.12 {Utf32ToUtfProc} -body { - encoding convertfrom -profile strict utf-32le "\x00\xDC\x00\x00" -} -returnCodes error -result {unexpected byte sequence starting at index 0: '\x00'} +test encoding-17.1 {UtfToUnicodeProc} { +} {} -test encoding-18.1 {TableToUtfProc on invalid input} -body { - list [catch {encoding convertto -profile tcl8 jis0208 \\} res] $res -} -result {0 !)} -test encoding-18.2 {TableToUtfProc on invalid input with -profile strict} -body { - list [catch {encoding convertto -profile strict jis0208 \\} res] $res -} -result {1 {unexpected character at index 0: 'U+00005C'}} -test encoding-18.3 {TableToUtfProc on invalid input with -profile strict -failindex} -body { - list [catch {encoding convertto -profile strict -failindex pos jis0208 \\} res] $res $pos -} -result {0 {} 0} -test encoding-18.4 {TableToUtfProc on invalid input with -failindex -profile strict} -body { - list [catch {encoding convertto -failindex pos -profile strict jis0208 \\} res] $res $pos -} -result {0 {} 0} -test encoding-18.5 {TableToUtfProc on invalid input with -failindex} -body { - list [catch {encoding convertto -profile tcl8 -failindex pos jis0208 \\} res] $res $pos -} -result {0 !) -1} -test encoding-18.6 {TableToUtfProc on invalid input with -profile tcl8} -body { - list [catch {encoding convertto -profile tcl8 jis0208 \\} res] $res -} -result {0 !)} +test encoding-18.1 {TableToUtfProc} { +} {} -test encoding-19.1 {TableFromUtfProc} -body { - encoding convertfrom -profile tcl8 ascii AÁ -} -result AÁ -test encoding-19.2 {TableFromUtfProc} -body { - encoding convertfrom -profile tcl8 ascii AÁ -} -result AÁ -test encoding-19.3 {TableFromUtfProc} -body { - encoding convertfrom -profile strict ascii AÁ -} -returnCodes 1 -result {unexpected byte sequence starting at index 1: '\xC1'} -test encoding-19.4 {TableFromUtfProc} -body { - list [encoding convertfrom -profile tcl8 -failindex idx ascii AÁ] [set idx] -} -result [list A\xC1 -1] -test encoding-19.5 {TableFromUtfProc} -body { - list [encoding convertfrom -failindex idx -profile strict ascii A\xC1] [set idx] -} -result {A 1} -test encoding-19.6 {TableFromUtfProc} -body { - list [encoding convertfrom -failindex idx -profile strict ascii AÁB] [set idx] -} -result {A 1} +test encoding-19.1 {TableFromUtfProc} { +} {} test encoding-20.1 {TableFreefProc} { } {} @@ -666,18 +339,18 @@ test encoding-21.1 {EscapeToUtfProc} { test encoding-22.1 {EscapeFromUtfProc} { } {} -set iso2022encData "\x1B\$B;d\$I\$b\$G\$O!\"%A%C%W\$49XF~;~\$K\$4EPO?\$\$\$?\$@\$\$\$?\$4=;=j\$r%-%c%C%7%e%\"%&%H\$N:]\$N\x1B(B -\x1B\$B>.@Z<jAwIU@h\$H\$7\$F;HMQ\$7\$F\$*\$j\$^\$9!#62\$lF~\$j\$^\$9\$,!\"@5\$7\$\$=;=j\$r\$4EPO?\$7\$J\$*\x1B(B -\x1B\$B\$*4j\$\$\$\$\$?\$7\$^\$9!#\$^\$?!\"BgJQ62=L\$G\$9\$,!\"=;=jJQ99\$N\$\"\$H!\"F|K\\8l%5!<%S%9It!J\x1B(B -casino_japanese@___.com \x1B\$B!K\$^\$G\$4=;=jJQ99:Q\$NO\"Mm\$r\$\$\$?\$@\$1\$J\$\$\$G\x1B(B -\x1B\$B\$7\$g\$&\$+!)\x1B(B" +set iso2022encData "\u001b\$B;d\$I\$b\$G\$O!\"%A%C%W\$49XF~;~\$K\$4EPO?\$\$\$?\$@\$\$\$?\$4=;=j\$r%-%c%C%7%e%\"%&%H\$N:]\$N\u001b(B +\u001b\$B>.@Z<jAwIU@h\$H\$7\$F;HMQ\$7\$F\$*\$j\$^\$9!#62\$lF~\$j\$^\$9\$,!\"@5\$7\$\$=;=j\$r\$4EPO?\$7\$J\$*\u001b(B +\u001b\$B\$*4j\$\$\$\$\$?\$7\$^\$9!#\$^\$?!\"BgJQ62=L\$G\$9\$,!\"=;=jJQ99\$N\$\"\$H!\"F|K\\8l%5!<%S%9It!J\u001b(B +casino_japanese@___.com \u001b\$B!K\$^\$G\$4=;=jJQ99:Q\$NO\"Mm\$r\$\$\$?\$@\$1\$J\$\$\$G\u001b(B +\u001b\$B\$7\$g\$&\$+!)\u001b(B" set iso2022uniData [encoding convertfrom iso2022-jp $iso2022encData] -set iso2022uniData2 "私どもでは、チップご購入時にご登録いただいたご住所をキャッシュアウトの際の -小切手送付先として使用しております。恐れ入りますが、正しい住所をご登録しなお -お願いいたします。また、大変恐縮ですが、住所変更のあと、日本語サービス部( -casino_japanese@___.com )までご住所変更済の連絡をいただけないで -しょうか?" +set iso2022uniData2 "\u79c1\u3069\u3082\u3067\u306f\u3001\u30c1\u30c3\u30d7\u3054\u8cfc\u5165\u6642\u306b\u3054\u767b\u9332\u3044\u305f\u3060\u3044\u305f\u3054\u4f4f\u6240\u3092\u30ad\u30e3\u30c3\u30b7\u30e5\u30a2\u30a6\u30c8\u306e\u969b\u306e +\u5c0f\u5207\u624b\u9001\u4ed8\u5148\u3068\u3057\u3066\u4f7f\u7528\u3057\u3066\u304a\u308a\u307e\u3059\u3002\u6050\u308c\u5165\u308a\u307e\u3059\u304c\u3001\u6b63\u3057\u3044\u4f4f\u6240\u3092\u3054\u767b\u9332\u3057\u306a\u304a +\u304a\u9858\u3044\u3044\u305f\u3057\u307e\u3059\u3002\u307e\u305f\u3001\u5927\u5909\u6050\u7e2e\u3067\u3059\u304c\u3001\u4f4f\u6240\u5909\u66f4\u306e\u3042\u3068\u3001\u65e5\u672c\u8a9e\u30b5\u30fc\u30d3\u30b9\u90e8\uff08 +\u0063\u0061\u0073\u0069\u006e\u006f\u005f\u006a\u0061\u0070\u0061\u006e\u0065\u0073\u0065\u0040\u005f\u005f\u005f\u002e\u0063\u006f\u006d\u0020\uff09\u307e\u3067\u3054\u4f4f\u6240\u5909\u66f4\u6e08\u306e\u9023\u7d61\u3092\u3044\u305f\u3060\u3051\u306a\u3044\u3067 +\u3057\u3087\u3046\u304b\uff1f" cd [temporaryDirectory] set fid [open iso2022.txt w] @@ -715,44 +388,47 @@ test encoding-23.3 {iso2022-jp escape encoding test} { fconfigure $fid -encoding iso2022-jp set data [read $fid 50] close $fid - return $data + set data } [string range $iso2022uniData 0 49] ; # 0 .. 49 inclusive == 50 cd [workingDirectory] -# Code to make the next few tests more intelligible; the code being tested -# should be in the body of the test! -proc runInSubprocess {contents {filename iso2022.tcl}} { - set theFile [makeFile $contents $filename] - try { - exec [interpreter] $theFile - } finally { - removeFile $theFile - } -} - -test encoding-24.1 {EscapeFreeProc on open channels} exec { - runInSubprocess { +test encoding-24.1 {EscapeFreeProc on open channels} -constraints { + exec +} -setup { + # Bug #524674 input + set file [makeFile { set f [open [file join [file dirname [info script]] iso2022.txt]] fconfigure $f -encoding iso2022-jp gets $f - } -} {} -test encoding-24.2 {EscapeFreeProc on open channels} {exec} { + } iso2022.tcl] +} -body { + exec [interpreter] $file +} -cleanup { + removeFile iso2022.tcl +} -result {} + +test encoding-24.2 {EscapeFreeProc on open channels} -constraints { + exec +} -setup { # Bug #524674 output - runInSubprocess { + set file [makeFile { encoding system cp1252; # Bug #2891556 crash revelator fconfigure stdout -encoding iso2022-jp - puts ab乎棙g - set env(TCL_FINALIZE_ON_EXIT) 1 + puts ab\u4e4e\u68d9g exit - } -} "ab\x1B\$B8C\x1B\$(DD%\x1B(Bg" + } iso2022.tcl] +} -body { + viewable [exec [interpreter] $file] +} -cleanup { + removeFile iso2022.tcl +} -result "ab\x1b\$B8C\x1b\$(DD%\x1b(Bg (ab\\u001b\$B8C\\u001b\$(DD%\\u001b(Bg)" + test encoding-24.3 {EscapeFreeProc on open channels} {stdio} { - # Bug #219314 - if we don't free escape encodings correctly on channel - # closure, we go boom + # Bug #219314 - if we don't free escape encodings correctly on + # channel closure, we go boom set file [makeFile { encoding system iso2022-jp - set a "乎乞也"; # 3 Japanese Kanji letters + set a "\u4e4e\u4e5e\u4e5f"; # 3 Japanese Kanji letters puts $a } iso2022.tcl] set f [open "|[list [interpreter] $file]"] @@ -760,153 +436,8 @@ test encoding-24.3 {EscapeFreeProc on open channels} {stdio} { set count [gets $f line] close $f removeFile iso2022.tcl - list $count $line -} [list 3 乎乞也] - -test encoding-24.4.strict {Parse invalid utf-8, strict} -body { - encoding convertfrom -profile strict utf-8 "\xC0\x80" -} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'} -test encoding-24.4.tcl8 {UtfToUtfProc utf-8} { - encoding convertfrom -profile tcl8 utf-8 \xC0\x80 -} \x00 -test encoding-24.5 {Parse valid or invalid utf-8} { - string length [encoding convertfrom -profile tcl8 utf-8 "\xC0\x81"] -} 2 -test encoding-24.6 {Parse valid or invalid utf-8} { - string length [encoding convertfrom -profile tcl8 utf-8 "\xC1\xBF"] -} 2 -test encoding-24.7 {Parse valid or invalid utf-8} { - string length [encoding convertfrom utf-8 "\xC2\x80"] -} 1 -test encoding-24.8 {Parse valid or invalid utf-8} { - string length [encoding convertfrom -profile tcl8 utf-8 "\xE0\x80\x80"] -} 3 -test encoding-24.9 {Parse valid or invalid utf-8} { - string length [encoding convertfrom -profile tcl8 utf-8 "\xE0\x9F\xBF"] -} 3 -test encoding-24.10 {Parse valid or invalid utf-8} { - string length [encoding convertfrom utf-8 "\xE0\xA0\x80"] -} 1 -test encoding-24.11 {Parse valid or invalid utf-8} { - string length [encoding convertfrom -profile tcl8 utf-8 "\xEF\xBF\xBF"] -} 1 -test encoding-24.12 {Parse invalid utf-8} -body { - encoding convertfrom -profile tcl8 utf-8 "\xC0\x81" -} -result \xC0\x81 -test encoding-24.12.1 {Parse invalid utf-8} -body { - encoding convertfrom -profile strict utf-8 "\xC0\x81" -} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'} -test encoding-24.13 {Parse invalid utf-8} -body { - encoding convertfrom -profile tcl8 utf-8 "\xC1\xBF" -} -result \xC1\xBF -test encoding-24.13.1 {Parse invalid utf-8} -body { - encoding convertfrom -profile strict utf-8 "\xC1\xBF" -} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC1'} -test encoding-24.14 {Parse valid utf-8} { - encoding convertfrom utf-8 "\xC2\x80" -} \x80 -test encoding-24.15.strict {Parse invalid utf-8, -profile strict} -body { - encoding convertfrom -profile strict utf-8 "Z\xE0\x80" -} -returnCodes 1 -result "unexpected byte sequence starting at index 1: '\\xE0'" -test encoding-24.15.tcl8 {Parse invalid utf-8, -profile tcl8} -body { - encoding convertfrom -profile tcl8 utf-8 "Z\xE0\x80" -} -result Z\xE0\u20AC -test encoding-24.16 {Parse valid or invalid utf-8} -constraints testbytestring -body { - encoding convertto utf-8 [testbytestring "Z\u4343\x80"] -} -returnCodes 1 -result {expected byte sequence but character 1 was '䍃' (U+004343)} -test encoding-24.17 {Parse valid or invalid utf-8} -constraints testbytestring -body { - encoding convertto utf-8 [testbytestring "Z\xE0\x80"] -} -result "Z\xC3\xA0\xE2\x82\xAC" -test encoding-24.18 {Parse valid or invalid utf-8} -constraints testbytestring -body { - encoding convertto utf-8 [testbytestring "Z\xE0\x80xxxxxx"] -} -result "Z\xC3\xA0\xE2\x82\xACxxxxxx" -test encoding-24.19.1 {Parse valid or invalid utf-8} -body { - encoding convertto -profile tcl8 utf-8 "ZX\uD800" -} -result ZX\xED\xA0\x80 -test encoding-24.19.2 {Parse valid or invalid utf-8} -body { - encoding convertto -profile strict utf-8 "ZX\uD800" -} -returnCodes 1 -match glob -result "unexpected character at index 2: 'U+00D800'" -test encoding-24.20 {Parse with -profile tcl8 but without providing encoding} -body { - encoding convertfrom -profile tcl8 "\x20" -} -result {wrong # args: should be "::tcl::encoding::convertfrom ?-profile profile? ?-failindex var? encoding data" or "::tcl::encoding::convertfrom data"} -returnCodes error -test encoding-24.21 {Parse with -profile tcl8 but without providing encoding} -body { - string length [encoding convertto -profile tcl8 "\x20"] -} -result {wrong # args: should be "::tcl::encoding::convertto ?-profile profile? ?-failindex var? encoding data" or "::tcl::encoding::convertto data"} -returnCodes error -test encoding-24.22 {Syntax error, two encodings} -body { - encoding convertfrom iso8859-1 utf-8 "ZX\uD800" -} -result {bad option "iso8859-1": must be -profile or -failindex} -returnCodes error -test encoding-24.23 {Syntax error, two encodings} -body { - encoding convertto iso8859-1 utf-8 "ZX\uD800" -} -result {bad option "iso8859-1": must be -profile or -failindex} -returnCodes error -test encoding-24.24 {Parse invalid utf-8 with -profile strict} -body { - encoding convertfrom -profile strict utf-8 "\xC0\x80\x00\x00" -} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'} -test encoding-24.25 {Parse invalid utf-8 with -profile strict} -body { - encoding convertfrom -profile strict utf-8 "\x40\x80\x00\x00" -} -returnCodes 1 -result {unexpected byte sequence starting at index 1: '\x80'} -test encoding-24.26 {Parse valid utf-8 with -profile strict} -body { - encoding convertfrom -profile strict utf-8 "\xF1\x80\x80\x80" -} -result \U40000 -test encoding-24.27 {Parse invalid utf-8 with -profile strict} -body { - encoding convertfrom -profile strict utf-8 "\xF0\x80\x80\x80" -} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xF0'} -test encoding-24.28 {Parse invalid utf-8 with -profile strict} -body { - encoding convertfrom -profile strict utf-8 "\xFF\x00\x00" -} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xFF'} -test encoding-24.29 {Parse invalid utf-8} -body { - encoding convertfrom utf-8 \xEF\xBF\xBF -} -result \uFFFF -test encoding-24.30 {Parse noncharacter with -profile strict} -body { - encoding convertfrom -profile strict utf-8 \xEF\xBF\xBF -} -result \uFFFF -test encoding-24.31 {Parse invalid utf-8 with -profile tcl8} -body { - encoding convertfrom -profile tcl8 utf-8 \xEF\xBF\xBF -} -result \uFFFF -test encoding-24.32 {Try to generate invalid utf-8} -body { - encoding convertto utf-8 \uFFFF -} -result \xEF\xBF\xBF -test encoding-24.33 {Try to generate invalid utf-8} -body { - encoding convertto -profile strict utf-8 \uFFFF -} -result \xEF\xBF\xBF -test encoding-24.34 {Try to generate invalid utf-8 with -profile tcl8} -body { - encoding convertto -profile tcl8 utf-8 \uFFFF -} -result \xEF\xBF\xBF -test encoding-24.35 {Parse invalid utf-8} -body { - encoding convertfrom -profile tcl8 utf-8 \xED\xA0\x80 -} -result \uD800 -test encoding-24.36 {Parse invalid utf-8 with -profile strict} -body { - encoding convertfrom -profile strict utf-8 \xED\xA0\x80 -} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xED'} -test encoding-24.37 {Parse invalid utf-8 with -profile tcl8} -body { - encoding convertfrom -profile tcl8 utf-8 \xED\xA0\x80 -} -result \uD800 -test encoding-24.38.1 {Try to generate invalid utf-8} -body { - encoding convertto -profile tcl8 utf-8 \uD800 -} -result \xED\xA0\x80 -test encoding-24.38.2 {Try to generate invalid utf-8} -body { - encoding convertto -profile strict utf-8 \uD800 -} -returnCodes 1 -result {unexpected character at index 0: 'U+00D800'} -test encoding-24.39 {Try to generate invalid utf-8 with -profile strict} -body { - encoding convertto -profile strict utf-8 \uD800 -} -returnCodes 1 -result {unexpected character at index 0: 'U+00D800'} -test encoding-24.40 {Try to generate invalid utf-8 with -profile tcl8} -body { - encoding convertto -profile tcl8 utf-8 \uD800 -} -result \xED\xA0\x80 -test encoding-24.41 {Parse invalid utf-8 with -profile strict} -body { - encoding convertfrom -profile strict utf-8 \xED\xA0\x80\xED\xB0\x80 -} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xED'} -test encoding-24.42 {Parse invalid utf-8, fallback to cp1252 [885c86a9a0]} -body { - encoding convertfrom -profile tcl8 utf-8 \xF0\x80\x80\x80 -} -result \xF0\u20AC\u20AC\u20AC -test encoding-24.43 {Parse invalid utf-8, fallback to cp1252 [885c86a9a0]} -body { - encoding convertfrom -profile tcl8 utf-8 \x80 -} -result \u20AC -test encoding-24.44 {Try to generate invalid ucs-2 with -profile strict} -body { - encoding convertto -profile strict ucs-2 \uD800 -} -returnCodes 1 -result {unexpected character at index 0: 'U+00D800'} -test encoding-24.45 {Try to generate invalid ucs-2 with -profile strict} -body { - encoding convertto -profile strict ucs-2 \U10000 -} -returnCodes 1 -result {unexpected character at index 0: 'U+010000'} + list $count [viewable $line] +} [list 3 "\u4e4e\u4e5e\u4e5f (\\u4e4e\\u4e5e\\u4e5f)"] file delete [file join [temporaryDirectory] iso2022.txt] @@ -938,14 +469,18 @@ proc foreach-jisx0208 {varName command} { } { if {[llength $range] == 2} { # for adhoc range. simple {first last}. inclusive. - scan $range %x%x first last + set first [scan [lindex $range 0] %x] + set last [scan [lindex $range 1] %x] for {set i $first} {$i <= $last} {incr i} { set code $i uplevel 1 $command } } elseif {[llength $range] == 4} { # for uniform range. - scan $range %x%x%x%x h0 l0 hend lend + set h0 [scan [lindex $range 0] %x] + set l0 [scan [lindex $range 1] %x] + set hend [scan [lindex $range 2] %x] + set lend [scan [lindex $range 3] %x] for {set hi $h0} {$hi <= $hend} {incr hi} { for {set lo $l0} {$lo <= $lend} {incr lo} { set code [expr {$hi << 8 | ($lo & 0xff)}] @@ -959,20 +494,20 @@ proc foreach-jisx0208 {varName command} { } proc gen-jisx0208-euc-jp {code} { binary format cc \ - [expr {($code >> 8) | 0x80}] [expr {($code & 0xFF) | 0x80}] + [expr {($code >> 8) | 0x80}] [expr {($code & 0xff) | 0x80}] } proc gen-jisx0208-iso2022-jp {code} { binary format a3cca3 \ - "\x1B\$B" [expr {$code >> 8}] [expr {$code & 0xFF}] "\x1B(B" + "\x1b\$B" [expr {$code >> 8}] [expr {$code & 0xff}] "\x1b(B" } proc gen-jisx0208-cp932 {code} { set c1 [expr {($code >> 8) | 0x80}] set c2 [expr {($code & 0xff)| 0x80}] if {$c1 % 2} { - set c1 [expr {($c1 >> 1) + ($c1 < 0xDF ? 0x31 : 0x71)}] - incr c2 [expr {- (0x60 + ($c2 < 0xE0))}] + set c1 [expr {($c1 >> 1) + ($c1 < 0xdf ? 0x31 : 0x71)}] + incr c2 [expr {- (0x60 + ($c2 < 0xe0))}] } else { - set c1 [expr {($c1 >> 1) + ($c1 < 0xDF ? 0x30 : 0x70)}] + set c1 [expr {($c1 >> 1) + ($c1 < 0xdf ? 0x30 : 0x70)}] incr c2 -2 } binary format cc $c1 $c2 @@ -989,7 +524,7 @@ proc channel-diff {fa fb} { binary scan [lindex $lb 1] H* got lappend diff [list $code $expected $got] } - return $diff + set diff } # Create char tables. @@ -1008,9 +543,8 @@ file copy -force cp932.chars shiftjis.chars set NUM 0 foreach from {cp932 shiftjis euc-jp iso2022-jp} { foreach to {cp932 shiftjis euc-jp iso2022-jp} { - test encoding-25.[incr NUM] "jisx0208 $from => $to" -setup { + test encoding-25.[incr NUM] "jisx0208 $from => $to" { cd [temporaryDirectory] - } -body { set f [open $from.chars] fconfigure $f -encoding $from set out [open $from.$to.tcltestout w] @@ -1018,33 +552,40 @@ foreach from {cp932 shiftjis euc-jp iso2022-jp} { puts -nonewline $out [read $f] close $out close $f + # then compare $to.chars <=> $from.to.tcltestout as binary. - set fa [open $to.chars rb] - set fb [open $from.$to.tcltestout rb] - channel-diff $fa $fb - # Difference should be empty. - } -cleanup { + set fa [open $to.chars] + fconfigure $fa -encoding binary + set fb [open $from.$to.tcltestout] + fconfigure $fb -encoding binary + set diff [channel-diff $fa $fb] close $fa close $fb - } -result {} + + # Difference should be empty. + set diff + } {} } } -test encoding-26.0 {Tcl_GetEncodingSearchPath} -setup { - set origPath [encoding dirs] - encoding dirs slappy +testConstraint testgetdefenc [llength [info commands testgetdefenc]] + +test encoding-26.0 {Tcl_GetDefaultEncodingDir} -constraints { + testgetdefenc +} -setup { + set origDir [testgetdefenc] + testsetdefenc slappy } -body { - encoding dirs + testgetdefenc } -cleanup { - encoding dirs $origPath + testsetdefenc $origDir } -result slappy file delete {*}[glob -directory [temporaryDirectory] *.chars *.tcltestout] # ===> Cut here <=== -# EscapeFreeProc, GetTableEncoding, unilen are fully tested by the rest of -# this file. - +# EscapeFreeProc, GetTableEncoding, unilen +# are fully tested by the rest of this file test encoding-27.1 {encoding dirs basic behavior} -returnCodes error -body { encoding dirs ? ? @@ -1054,109 +595,11 @@ test encoding-27.2 {encoding dirs basic behavior} -returnCodes error -body { } -result "expected directory list but got \"\{not a list\"" } - - -test encoding-28.0 {all encodings load} -body { - set string hello - foreach name [encoding names] { - if {$name ne "unicode"} { - incr count - } - encoding convertto -profile tcl8 $name $string - - # discard the cached internal representation of Tcl_Encoding - # Unfortunately, without this, encoding 2-1 fails. - llength $name - } - return $count -} -result 91 - runtests -test encoding-bug-183a1adcc0-1 {Bug [183a1adcc0] Buffer overflow Tcl_UtfToExternal} -constraints { - testencoding -} -body { - # Note - buffers are initialized to \xFF - list [catch {testencoding Tcl_UtfToExternal utf-16 A {start end} {} 1} result] $result -} -result [list 0 [list nospace {} \xFF]] - -test encoding-bug-183a1adcc0-2 {Bug [183a1adcc0] Buffer overflow Tcl_UtfToExternal} -constraints { - testencoding -} -body { - # Note - buffers are initialized to \xFF - list [catch {testencoding Tcl_UtfToExternal utf-16 A {start end} {} 0} result] $result -} -result [list 0 [list nospace {} {}]] - -test encoding-bug-183a1adcc0-3 {Bug [183a1adcc0] Buffer overflow Tcl_UtfToExternal} -constraints { - testencoding -} -body { - # Note - buffers are initialized to \xFF - list [catch {testencoding Tcl_UtfToExternal utf-16 A {start end} {} 2} result] $result -} -result [list 0 [list nospace {} \x00\x00]] - -test encoding-bug-183a1adcc0-4 {Bug [183a1adcc0] Buffer overflow Tcl_UtfToExternal} -constraints { - testencoding -} -body { - # Note - buffers are initialized to \xFF - list [catch {testencoding Tcl_UtfToExternal utf-16 A {start end} {} 3} result] $result -} -result [list 0 [list nospace {} \x00\x00\xFF]] - -test encoding-bug-183a1adcc0-5 {Bug [183a1adcc0] Buffer overflow Tcl_UtfToExternal} -constraints { - testencoding -} -body { - list [catch {testencoding Tcl_UtfToExternal utf-16 A {start end} {} 4} result] $result -} -result [list 0 [list ok {} [expr {$::tcl_platform(byteOrder) eq "littleEndian" ? "\x41\x00" : "\x00\x41"}]\x00\x00]] - } -test encoding-29.0 {get encoding nul terminator lengths} -constraints { - testencoding -} -body { - list \ - [testencoding nullength ascii] \ - [testencoding nullength utf-16] \ - [testencoding nullength utf-32] \ - [testencoding nullength gb12345] \ - [testencoding nullength ksc5601] -} -result {1 2 4 2 2} - -test encoding-bug-6a3e2cb0f0-1 {Bug [6a3e2cb0f0] - invalid bytes in escape encodings} -body { - encoding convertfrom -profile tcl8 iso2022-jp x\x1B\x7Aaby -} -result x\uFFFDy -test encoding-bug-6a3e2cb0f0-2 {Bug [6a3e2cb0f0] - invalid bytes in escape encodings} -body { - encoding convertfrom -profile strict iso2022-jp x\x1B\x7Aaby -} -returnCodes error -result {unexpected byte sequence starting at index 1: '\x1B'} -test encoding-bug-6a3e2cb0f0-3 {Bug [6a3e2cb0f0] - invalid bytes in escape encodings} -body { - encoding convertfrom -profile replace iso2022-jp x\x1B\x7Aaby -} -result x\uFFFDy - -test encoding-bug-66ffafd309-1-tcl8 {Bug [66ffafd309] - truncated DBCS} -body { - encoding convertfrom -profile tcl8 gb12345 x -} -result x -test encoding-bug-66ffafd309-1-strict {Bug [66ffafd309] - truncated DBCS} -body { - encoding convertfrom -profile strict gb12345 x -} -result {unexpected byte sequence starting at index 0: '\x78'} -returnCodes error -test encoding-bug-66ffafd309-1-replace {Bug [66ffafd309] - truncated DBCS} -body { - encoding convertfrom -profile replace gb12345 x -} -result \uFFFD -test encoding-bug-66ffafd309-2-tcl8 {Bug [66ffafd309] - invalid DBCS} -body { - # Not truncated but invalid - encoding convertfrom -profile tcl8 jis0208 \x78\x79 -} -result \x78\x79 -test encoding-bug-66ffafd309-2-strict {Bug [66ffafd309] - invalid DBCS} -body { - # Not truncated but invalid - encoding convertfrom -profile strict jis0208 \x78\x79 -} -result {unexpected byte sequence starting at index 1: '\x79'} -returnCodes error -test encoding-bug-66ffafd309-2-replace {Bug [66ffafd309] - invalid DBCS} -body { - # Not truncated but invalid - encoding convertfrom -profile replace jis0208 \x78\x79 -} -result \uFFFD\uFFFD - # cleanup namespace delete ::tcl::test::encoding ::tcltest::cleanupTests return - -# Local Variables: -# mode: tcl -# End: |
