diff options
Diffstat (limited to 'tests/encoding.test')
| -rw-r--r-- | tests/encoding.test | 377 |
1 files changed, 301 insertions, 76 deletions
diff --git a/tests/encoding.test b/tests/encoding.test index 4dd2e98..35340a6 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -106,6 +106,14 @@ test encoding-3.2 {Tcl_GetEncodingName, non-null} -setup { } -cleanup { fconfigure stdout -encoding $old } -result {jis0208} +test encoding-3.3 {fconfigure -profile} -setup { + set old [fconfigure stdout -profile] +} -body { + fconfigure stdout -profile replace + fconfigure stdout -profile +} -cleanup { + fconfigure stdout -profile $old +} -result replace test encoding-4.1 {Tcl_GetEncodingNames} -constraints {testencoding} -setup { cd [makeDirectory tmp] @@ -172,7 +180,7 @@ test encoding-6.2 {Tcl_CreateEncoding: replace encoding} {testencoding} { test encoding-7.1 {Tcl_ExternalToUtfDString: small buffer} { encoding convertfrom jis0208 8c8c8c8c -} "吾吾吾吾" +} 吾吾吾吾 test encoding-7.2 {Tcl_UtfToExternalDString: big buffer} { set a 8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C append a $a @@ -194,7 +202,7 @@ test encoding-8.1 {Tcl_ExternalToUtf} { close $f file delete [file join [temporaryDirectory] dummy] return $x -} "ab乎g" +} ab乎g test encoding-9.1 {Tcl_UtfToExternalDString: small buffer} { encoding convertto jis0208 "吾吾吾吾" @@ -214,7 +222,7 @@ test encoding-9.2 {Tcl_UtfToExternalDString: big buffer} { test encoding-10.1 {Tcl_UtfToExternal} { set f [open [file join [temporaryDirectory] dummy] w] fconfigure $f -translation binary -encoding shiftjis - puts -nonewline $f "ab乎g" + puts -nonewline $f ab乎g close $f set f [open [file join [temporaryDirectory] dummy] r] fconfigure $f -translation binary -encoding iso8859-1 @@ -249,7 +257,7 @@ test encoding-11.1 {LoadEncodingFile: unknown encoding} {testencoding} { } {1 {unknown encoding "jis0208"} 8C} test encoding-11.2 {LoadEncodingFile: single-byte} { encoding convertfrom jis0201 \xA1 -} "。" +} 。 test encoding-11.3 {LoadEncodingFile: double-byte} { encoding convertfrom jis0208 8C } 乎 @@ -300,7 +308,7 @@ test encoding-11.11 {encoding: extended Unicode UTF-32} { test encoding-12.1 {LoadTableEncoding: normal encoding} { set x [encoding convertto iso8859-3 Ġ] - append x [encoding convertto -nocomplain iso8859-3 Õ] + append x [encoding convertto -profile tcl8 iso8859-3 Õ] append x [encoding convertfrom iso8859-3 Õ] } "Õ?Ġ" test encoding-12.2 {LoadTableEncoding: single-byte encoding} { @@ -339,7 +347,7 @@ test encoding-15.3 {UtfToUtfProc null character input} teststringbytes { } c080 test encoding-15.4 {UtfToUtfProc emoji character input} -body { set x \xED\xA0\xBD\xED\xB8\x82 - set y [encoding convertfrom -nocomplain utf-8 \xED\xA0\xBD\xED\xB8\x82] + set y [encoding convertfrom -profile tcl8 utf-8 \xED\xA0\xBD\xED\xB8\x82] list [string length $x] $y } -result "6 \uD83D\uDE02" test encoding-15.5 {UtfToUtfProc emoji character input} { @@ -349,67 +357,67 @@ test encoding-15.5 {UtfToUtfProc emoji character input} { } "4 😂" test encoding-15.6 {UtfToUtfProc emoji character output} utf32 { set x \uDE02\uD83D\uDE02\uD83D - set y [encoding convertto -nocomplain utf-8 \uDE02\uD83D\uDE02\uD83D] + set y [encoding convertto -profile tcl8 utf-8 \uDE02\uD83D\uDE02\uD83D] binary scan $y H* z list [string length $y] $z } {12 edb882eda0bdedb882eda0bd} test encoding-15.7 {UtfToUtfProc emoji character output} { set x \uDE02\uD83D\uD83D - set y [encoding convertto -nocomplain utf-8 \uDE02\uD83D\uD83D] + set y [encoding convertto -profile tcl8 utf-8 \uDE02\uD83D\uD83D] binary scan $y H* z list [string length $x] [string length $y] $z } {3 9 edb882eda0bdeda0bd} test encoding-15.8 {UtfToUtfProc emoji character output} { set x \uDE02\uD83Dé - set y [encoding convertto -nocomplain utf-8 \uDE02\uD83Dé] + set y [encoding convertto -profile tcl8 utf-8 \uDE02\uD83Dé] binary scan $y H* z list [string length $x] [string length $y] $z } {3 8 edb882eda0bdc3a9} test encoding-15.9 {UtfToUtfProc emoji character output} { set x \uDE02\uD83DX - set y [encoding convertto -nocomplain utf-8 \uDE02\uD83DX] + set y [encoding convertto -profile tcl8 utf-8 \uDE02\uD83DX] binary scan $y H* z list [string length $x] [string length $y] $z } {3 7 edb882eda0bd58} test encoding-15.10 {UtfToUtfProc high surrogate character output} { set x \uDE02é - set y [encoding convertto -nocomplain utf-8 \uDE02é] + set y [encoding convertto -profile tcl8 utf-8 \uDE02é] binary scan $y H* z list [string length $x] [string length $y] $z } {2 5 edb882c3a9} test encoding-15.11 {UtfToUtfProc low surrogate character output} { set x \uDA02é - set y [encoding convertto -nocomplain utf-8 \uDA02é] + set y [encoding convertto -profile tcl8 utf-8 \uDA02é] binary scan $y H* z list [string length $x] [string length $y] $z } {2 5 eda882c3a9} test encoding-15.12 {UtfToUtfProc high surrogate character output} { set x \uDE02Y - set y [encoding convertto -nocomplain utf-8 \uDE02Y] + set y [encoding convertto -profile tcl8 utf-8 \uDE02Y] binary scan $y H* z list [string length $x] [string length $y] $z } {2 4 edb88259} test encoding-15.13 {UtfToUtfProc low surrogate character output} { set x \uDA02Y - set y [encoding convertto -nocomplain utf-8 \uDA02Y] + set y [encoding convertto -profile tcl8 utf-8 \uDA02Y] binary scan $y H* z list [string length $x] [string length $y] $z } {2 4 eda88259} test encoding-15.14 {UtfToUtfProc high surrogate character output} { set x \uDE02 - set y [encoding convertto -nocomplain utf-8 \uDE02] + set y [encoding convertto -profile tcl8 utf-8 \uDE02] binary scan $y H* z list [string length $x] [string length $y] $z } {1 3 edb882} test encoding-15.15 {UtfToUtfProc low surrogate character output} { set x \uDA02 - set y [encoding convertto -nocomplain utf-8 \uDA02] + set y [encoding convertto -profile tcl8 utf-8 \uDA02] binary scan $y H* z list [string length $x] [string length $y] $z } {1 3 eda882} test encoding-15.16 {UtfToUtfProc: Invalid 4-byte UTF-8, see [ed29806ba]} { set x \xF0\xA0\xA1\xC2 - set y [encoding convertfrom -nocomplain utf-8 \xF0\xA0\xA1\xC2] + set y [encoding convertfrom -profile tcl8 utf-8 \xF0\xA0\xA1\xC2] list [string length $x] $y } "4 \xF0\xA0\xA1\xC2" test encoding-15.17 {UtfToUtfProc emoji character output} { @@ -453,6 +461,27 @@ test encoding-15.24 {UtfToUtfProc CESU-8 bug [048dd20b4171c8da]} { binary scan $y H* z list [string length $y] $z } {2 cfbf} +test encoding-15.25 {UtfToUtfProc CESU-8} { + encoding convertfrom cesu-8 \x00 +} \x00 +test encoding-15.26 {UtfToUtfProc CESU-8} { + encoding convertfrom cesu-8 \xC0\x80 +} \x00 +test encoding-15.27 {UtfToUtfProc -profile strict CESU-8} { + encoding convertfrom -profile strict cesu-8 \x00 +} \x00 +test encoding-15.28 {UtfToUtfProc -profile strict CESU-8} -body { + encoding convertfrom -profile strict cesu-8 \xC0\x80 +} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'} +test encoding-15.29 {UtfToUtfProc CESU-8} { + encoding convertto cesu-8 \x00 +} \x00 +test encoding-15.30 {UtfToUtfProc -profile strict CESU-8} { + encoding convertto -profile strict cesu-8 \x00 +} \x00 +test encoding-15.31 {UtfToUtfProc -profile strict CESU-8 (bytes F0-F4 are invalid)} -body { + encoding convertfrom -profile strict cesu-8 \xF1\x86\x83\x9C +} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xF1'} test encoding-16.1 {Utf16ToUtfProc} -body { set val [encoding convertfrom utf-16 NN] @@ -482,18 +511,90 @@ test encoding-16.7 {Utf32ToUtfProc} -body { set val [encoding convertfrom utf-32be \0\0NN] list $val [format %x [scan $val %c]] } -result "乎 4e4e" +test encoding-16.8 {Utf32ToUtfProc} -body { + set val [encoding convertfrom -profile tcl8 utf-32 \x41\x00\x00\x41] + list $val [format %x [scan $val %c]] +} -result "\uFFFD fffd" +test encoding-16.9 {Utf32ToUtfProc} -constraints utf32 -body { + encoding convertfrom utf-32le \x00\xD8\x00\x00 +} -result \uD800 +test encoding-16.10 {Utf32ToUtfProc} -body { + encoding convertfrom utf-32le \x00\xDC\x00\x00 +} -result \uDC00 +test encoding-16.11 {Utf32ToUtfProc} -body { + encoding convertfrom utf-32le \x00\xD8\x00\x00\x00\xDC\x00\x00 +} -result \uD800\uDC00 +test encoding-16.12 {Utf32ToUtfProc} -constraints utf32 -body { + encoding convertfrom utf-32le \x00\xDC\x00\x00\x00\xD8\x00\x00 +} -result \uDC00\uD800 +test encoding-16.13 {Utf16ToUtfProc} -body { + encoding convertfrom utf-16le \x00\xD8 +} -result \uD800 +test encoding-16.14 {Utf16ToUtfProc} -body { + encoding convertfrom utf-16le \x00\xDC +} -result \uDC00 +test encoding-16.15 {Utf16ToUtfProc} -body { + encoding convertfrom utf-16le \x00\xD8\x00\xDC +} -result \U010000 +test encoding-16.16 {Utf16ToUtfProc} -body { + encoding convertfrom utf-16le \x00\xDC\x00\xD8 +} -result \uDC00\uD800 +test encoding-16.17 {Utf32ToUtfProc} -body { + list [encoding convertfrom -profile strict -failindex idx utf-32le \x41\x00\x00\x00\x00\xD8\x00\x00\x42\x00\x00\x00] [set idx] +} -result {A 4} + +test encoding-16.18 { + Utf16ToUtfProc, Tcl_UniCharToUtf, surrogate pairs in utf-16 +} -body { + apply [list {} { + for {set i 0xD800} {$i < 0xDBFF} {incr i} { + for {set j 0xDC00} {$j < 0xDFFF} {incr j} { + set string [binary format S2 [list $i $j]] + set status [catch { + set decoded [encoding convertfrom utf-16be $string] + set encoded [encoding convertto utf-16be $decoded] + }] + if {$status || ( $encoded ne $string )} { + return [list [format %x $i] [format %x $j]] + } + } + } + return done + } [namespace current]] +} -result done +test encoding-16.19 {Utf16ToUtfProc, bug [d19fe0a5b]} -body { + encoding convertfrom utf-16 "\x41\x41\x41" +} -result \u4141\uFFFD +test encoding-16.20 {Utf16ToUtfProc, bug [d19fe0a5b]} -constraints deprecated -body { + encoding convertfrom utf-16 "\xD8\xD8" +} -result \uD8D8 +test encoding-16.21 {Utf16ToUtfProc, bug [d19fe0a5b]} -body { + encoding convertfrom utf-32 "\x00\x00\x00\x00\x41\x41" +} -result \x00\uFFFD +test encoding-16.22 {Utf16ToUtfProc, strict, bug [db7a085bd9]} -body { + encoding convertfrom -profile strict utf-16le \x00\xD8 +} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\x00'} +test encoding-16.23 {Utf16ToUtfProc, strict, bug [db7a085bd9]} -body { + encoding convertfrom -profile strict utf-16le \x00\xDC +} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\x00'} +test encoding-16.24 {Utf32ToUtfProc} -body { + encoding convertfrom utf-32 "\xFF\xFF\xFF\xFF" +} -result \uFFFD +test encoding-16.25 {Utf32ToUtfProc} -body { + encoding convertfrom utf-32 "\x01\x00\x00\x01" +} -result \uFFFD test encoding-17.1 {UtfToUtf16Proc} -body { encoding convertto utf-16 "\U460DC" } -result "\xD8\xD8\xDC\xDC" test encoding-17.2 {UtfToUcs2Proc} -body { - encoding convertfrom utf-16 [encoding convertto ucs-2 "\U460DC"] -} -result "\uFFFD" + encoding convertfrom utf-16 \xD8\xD8\xDC\xDC +} -result "\U460DC" test encoding-17.3 {UtfToUtf16Proc} -body { - encoding convertto -nocomplain utf-16be "\uDCDC" + encoding convertto -profile tcl8 utf-16be "\uDCDC" } -result "\xDC\xDC" test encoding-17.4 {UtfToUtf16Proc} -body { - encoding convertto -nocomplain utf-16le "\uD8D8" + encoding convertto -profile tcl8 utf-16le "\uD8D8" } -result "\xD8\xD8" test encoding-17.5 {UtfToUtf16Proc} -body { encoding convertto utf-32le "\U460DC" @@ -502,23 +603,61 @@ test encoding-17.6 {UtfToUtf16Proc} -body { encoding convertto utf-32be "\U460DC" } -result "\x00\x04\x60\xDC" test encoding-17.7 {UtfToUtf16Proc} -body { - encoding convertto -strict utf-16be "\uDCDC" + encoding convertto -profile strict utf-16be "\uDCDC" } -returnCodes error -result {unexpected character at index 0: 'U+00DCDC'} test encoding-17.8 {UtfToUtf16Proc} -body { - encoding convertto -strict utf-16le "\uD8D8" + encoding convertto -profile strict utf-16le "\uD8D8" } -returnCodes error -result {unexpected character at index 0: 'U+00D8D8'} test encoding-17.9 {Utf32ToUtfProc} -body { - encoding convertfrom -strict utf-32 "\xFF\xFF\xFF\xFF" + encoding convertfrom -profile strict utf-32 "\xFF\xFF\xFF\xFF" } -returnCodes error -result {unexpected byte sequence starting at index 0: '\xFF'} test encoding-17.10 {Utf32ToUtfProc} -body { - encoding convertfrom -nocomplain utf-32 "\xFF\xFF\xFF\xFF" + encoding convertfrom -profile tcl8 utf-32 "\xFF\xFF\xFF\xFF" } -result \uFFFD +test encoding-17.11 {Utf32ToUtfProc} -body { + encoding convertfrom -profile strict utf-32le "\x00\xD8\x00\x00" +} -returnCodes error -result {unexpected byte sequence starting at index 0: '\x00'} +test encoding-17.12 {Utf32ToUtfProc} -body { + encoding convertfrom -profile strict utf-32le "\x00\xDC\x00\x00" +} -returnCodes error -result {unexpected byte sequence starting at index 0: '\x00'} -test encoding-18.1 {TableToUtfProc} { -} {} +test encoding-18.1 {TableToUtfProc on invalid input} -body { + list [catch {encoding convertto jis0208 \\} res] $res +} -result {0 !)} +test encoding-18.2 {TableToUtfProc on invalid input with -profile strict} -body { + list [catch {encoding convertto -profile strict jis0208 \\} res] $res +} -result {1 {unexpected character at index 0: 'U+00005C'}} +test encoding-18.3 {TableToUtfProc on invalid input with -profile strict -failindex} -body { + list [catch {encoding convertto -profile strict -failindex pos jis0208 \\} res] $res $pos +} -result {0 {} 0} +test encoding-18.4 {TableToUtfProc on invalid input with -failindex -profile strict} -body { + list [catch {encoding convertto -failindex pos -profile strict jis0208 \\} res] $res $pos +} -result {0 {} 0} +test encoding-18.5 {TableToUtfProc on invalid input with -failindex} -body { + list [catch {encoding convertto -failindex pos jis0208 \\} res] $res $pos +} -result {0 !) -1} +test encoding-18.6 {TableToUtfProc on invalid input with -profile tcl8} -body { + list [catch {encoding convertto -profile tcl8 jis0208 \\} res] $res +} -result {0 !)} -test encoding-19.1 {TableFromUtfProc} { -} {} +test encoding-19.1 {TableFromUtfProc} -body { + encoding convertfrom ascii AÁ +} -result AÁ +test encoding-19.2 {TableFromUtfProc} -body { + encoding convertfrom -profile tcl8 ascii AÁ +} -result AÁ +test encoding-19.3 {TableFromUtfProc} -body { + encoding convertfrom -profile strict ascii AÁ +} -returnCodes 1 -result {unexpected byte sequence starting at index 1: '\xC1'} +test encoding-19.4 {TableFromUtfProc} -body { + list [encoding convertfrom -failindex idx ascii AÁ] [set idx] +} -result [list A\xC1 -1] +test encoding-19.5 {TableFromUtfProc} -body { + list [encoding convertfrom -failindex idx -profile strict ascii A\xC1] [set idx] +} -result {A 1} +test encoding-19.6 {TableFromUtfProc} -body { + list [encoding convertfrom -failindex idx -profile strict ascii AÁB] [set idx] +} -result {A 1} test encoding-20.1 {TableFreefProc} { } {} @@ -630,38 +769,38 @@ test encoding-24.4 {Parse valid or invalid utf-8} { string length [encoding convertfrom utf-8 "\xC0\x80"] } 1 test encoding-24.5 {Parse valid or invalid utf-8} { - string length [encoding convertfrom -nocomplain utf-8 "\xC0\x81"] + string length [encoding convertfrom -profile tcl8 utf-8 "\xC0\x81"] } 2 test encoding-24.6 {Parse valid or invalid utf-8} { - string length [encoding convertfrom -nocomplain utf-8 "\xC1\xBF"] + string length [encoding convertfrom -profile tcl8 utf-8 "\xC1\xBF"] } 2 test encoding-24.7 {Parse valid or invalid utf-8} { string length [encoding convertfrom utf-8 "\xC2\x80"] } 1 test encoding-24.8 {Parse valid or invalid utf-8} { - string length [encoding convertfrom -nocomplain utf-8 "\xE0\x80\x80"] + string length [encoding convertfrom -profile tcl8 utf-8 "\xE0\x80\x80"] } 3 test encoding-24.9 {Parse valid or invalid utf-8} { - string length [encoding convertfrom -nocomplain utf-8 "\xE0\x9F\xBF"] + string length [encoding convertfrom -profile tcl8 utf-8 "\xE0\x9F\xBF"] } 3 test encoding-24.10 {Parse valid or invalid utf-8} { string length [encoding convertfrom utf-8 "\xE0\xA0\x80"] } 1 test encoding-24.11 {Parse valid or invalid utf-8} { - string length [encoding convertfrom -nocomplain utf-8 "\xEF\xBF\xBF"] + string length [encoding convertfrom -profile tcl8 utf-8 "\xEF\xBF\xBF"] } 1 test encoding-24.12 {Parse valid or invalid utf-8} -body { - encoding convertfrom -strict utf-8 "\xC0\x81" + encoding convertfrom -profile strict utf-8 "\xC0\x81" } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'} test encoding-24.13 {Parse valid or invalid utf-8} -body { - encoding convertfrom -strict utf-8 "\xC1\xBF" + encoding convertfrom -profile strict utf-8 "\xC1\xBF" } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC1'} test encoding-24.14 {Parse valid or invalid utf-8} { string length [encoding convertfrom utf-8 "\xC2\x80"] } 1 test encoding-24.15 {Parse valid or invalid utf-8} -body { encoding convertfrom utf-8 "Z\xE0\x80" -} -result Z\xE0\x80 +} -result Z\xE0\u20AC test encoding-24.16 {Parse valid or invalid utf-8} -constraints testbytestring -body { encoding convertto utf-8 [testbytestring "Z\u4343\x80"] } -returnCodes 1 -result {expected byte sequence but character 1 was '䍃' (U+004343)} @@ -671,72 +810,93 @@ test encoding-24.17 {Parse valid or invalid utf-8} -constraints testbytestring - test encoding-24.18 {Parse valid or invalid utf-8} -constraints testbytestring -body { encoding convertto utf-8 [testbytestring "Z\xE0\x80xxxxxx"] } -result "Z\xC3\xA0\xE2\x82\xACxxxxxx" -test encoding-24.19 {Parse valid or invalid utf-8} -body { - encoding convertto utf-8 "ZX\uD800" +test encoding-24.19.1 {Parse valid or invalid utf-8} -body { + encoding convertto -profile tcl8 utf-8 "ZX\uD800" +} -result ZX\xED\xA0\x80 +test encoding-24.19.2 {Parse valid or invalid utf-8} -body { + encoding convertto -profile strict utf-8 "ZX\uD800" } -returnCodes 1 -match glob -result "unexpected character at index 2: 'U+00D800'" -test encoding-24.20 {Parse with -nocomplain but without providing encoding} { - string length [encoding convertfrom -nocomplain "\x20"] -} 1 -test encoding-24.21 {Parse with -nocomplain but without providing encoding} { - string length [encoding convertto -nocomplain "\x20"] -} 1 +test encoding-24.20 {Parse with -profile tcl8 but without providing encoding} -body { + encoding convertfrom -profile tcl8 "\x20" +} -result {wrong # args: should be "::tcl::encoding::convertfrom ?-profile profile? ?-failindex var? encoding data" or "::tcl::encoding::convertfrom data"} -returnCodes error +test encoding-24.21 {Parse with -profile tcl8 but without providing encoding} -body { + string length [encoding convertto -profile tcl8 "\x20"] +} -result {wrong # args: should be "::tcl::encoding::convertto ?-profile profile? ?-failindex var? encoding data" or "::tcl::encoding::convertto data"} -returnCodes error test encoding-24.22 {Syntax error, two encodings} -body { encoding convertfrom iso8859-1 utf-8 "ZX\uD800" -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} +} -result {bad option "iso8859-1": must be -profile or -failindex} -returnCodes error test encoding-24.23 {Syntax error, two encodings} -body { encoding convertto iso8859-1 utf-8 "ZX\uD800" -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} -test encoding-24.24 {Parse invalid utf-8 with -strict} -body { - encoding convertfrom -strict utf-8 "\xC0\x80\x00\x00" +} -result {bad option "iso8859-1": must be -profile or -failindex} -returnCodes error +test encoding-24.24 {Parse invalid utf-8 with -profile strict} -body { + encoding convertfrom -profile strict utf-8 "\xC0\x80\x00\x00" } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'} -test encoding-24.25 {Parse invalid utf-8 with -strict} -body { - encoding convertfrom -strict utf-8 "\x40\x80\x00\x00" +test encoding-24.25 {Parse invalid utf-8 with -profile strict} -body { + encoding convertfrom -profile strict utf-8 "\x40\x80\x00\x00" } -returnCodes 1 -result {unexpected byte sequence starting at index 1: '\x80'} -test encoding-24.26 {Parse valid utf-8 with -strict} -body { - encoding convertfrom -strict utf-8 "\xF1\x80\x80\x80" +test encoding-24.26 {Parse valid utf-8 with -profile strict} -body { + encoding convertfrom -profile strict utf-8 "\xF1\x80\x80\x80" } -result \U40000 -test encoding-24.27 {Parse invalid utf-8 with -strict} -body { - encoding convertfrom -strict utf-8 "\xF0\x80\x80\x80" +test encoding-24.27 {Parse invalid utf-8 with -profile strict} -body { + encoding convertfrom -profile strict utf-8 "\xF0\x80\x80\x80" } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xF0'} -test encoding-24.28 {Parse invalid utf-8 with -strict} -body { - encoding convertfrom -strict utf-8 "\xFF\x00\x00" +test encoding-24.28 {Parse invalid utf-8 with -profile strict} -body { + encoding convertfrom -profile strict utf-8 "\xFF\x00\x00" } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xFF'} test encoding-24.29 {Parse invalid utf-8} -body { encoding convertfrom utf-8 \xEF\xBF\xBF } -result \uFFFF -test encoding-24.30 {Parse noncharacter with -strict} -body { - encoding convertfrom -strict utf-8 \xEF\xBF\xBF +test encoding-24.30 {Parse noncharacter with -profile strict} -body { + encoding convertfrom -profile strict utf-8 \xEF\xBF\xBF } -result \uFFFF -test encoding-24.31 {Parse invalid utf-8 with -nocomplain} -body { - encoding convertfrom -nocomplain utf-8 \xEF\xBF\xBF +test encoding-24.31 {Parse invalid utf-8 with -profile tcl8} -body { + encoding convertfrom -profile tcl8 utf-8 \xEF\xBF\xBF } -result \uFFFF test encoding-24.32 {Try to generate invalid utf-8} -body { encoding convertto utf-8 \uFFFF } -result \xEF\xBF\xBF -test encoding-24.33 {Try to generate noncharacter with -strict} -body { - encoding convertto -strict utf-8 \uFFFF +test encoding-24.33 {Try to generate noncharacter with -profile strict} -body { + encoding convertto -profile strict utf-8 \uFFFF } -result \xEF\xBF\xBF -test encoding-24.34 {Try to generate invalid utf-8 with -nocomplain} -body { - encoding convertto -nocomplain utf-8 \uFFFF +test encoding-24.34 {Try to generate invalid utf-8 with -profile tcl8} -body { + encoding convertto -profile tcl8 utf-8 \uFFFF } -result \xEF\xBF\xBF test encoding-24.35 {Parse invalid utf-8} -constraints utf32 -body { encoding convertfrom utf-8 \xED\xA0\x80 } -result \uD800 -test encoding-24.36 {Parse invalid utf-8 with -strict} -body { - encoding convertfrom -strict utf-8 \xED\xA0\x80 +test encoding-24.36 {Parse invalid utf-8 with -profile strict} -body { + encoding convertfrom -profile strict utf-8 \xED\xA0\x80 } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xED'} -test encoding-24.37 {Parse invalid utf-8 with -nocomplain} -body { - encoding convertfrom -nocomplain utf-8 \xED\xA0\x80 +test encoding-24.37 {Parse invalid utf-8 with -profile tcl8} -body { + encoding convertfrom -profile tcl8 utf-8 \xED\xA0\x80 } -result \uD800 -test encoding-24.38 {Try to generate invalid utf-8} -body { - encoding convertto utf-8 \uD800 +test encoding-24.38.1 {Try to generate invalid utf-8} -body { + encoding convertto -profile tcl8 utf-8 \uD800 +} -result \xED\xA0\x80 +test encoding-24.38.2 {Try to generate invalid utf-8} -body { + encoding convertto -profile strict utf-8 \uD800 } -returnCodes 1 -result {unexpected character at index 0: 'U+00D800'} -test encoding-24.39 {Try to generate invalid utf-8 with -strict} -body { - encoding convertto -strict utf-8 \uD800 +test encoding-24.39 {Try to generate invalid utf-8 with -profile strict} -body { + encoding convertto -profile strict utf-8 \uD800 } -returnCodes 1 -result {unexpected character at index 0: 'U+00D800'} -test encoding-24.40 {Try to generate invalid utf-8 with -nocomplain} -body { - encoding convertto -nocomplain utf-8 \uD800 +test encoding-24.40 {Try to generate invalid utf-8 with -profile tcl8} -body { + encoding convertto -profile tcl8 utf-8 \uD800 } -result \xED\xA0\x80 +test encoding-24.41 {Parse invalid utf-8 with -profile strict} -body { + encoding convertfrom -profile strict utf-8 \xED\xA0\x80\xED\xB0\x80 +} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xED'} +test encoding-24.42 {Parse invalid utf-8, fallback to cp1252 [885c86a9a0]} -body { + encoding convertfrom -profile tcl8 utf-8 \xF0\x80\x80\x80 +} -result \xF0\u20AC\u20AC\u20AC +test encoding-24.43 {Parse invalid utf-8, fallback to cp1252 [885c86a9a0]} -body { + encoding convertfrom -profile tcl8 utf-8 \x80 +} -result \u20AC +test encoding-24.44 {Try to generate invalid ucs-2 with -profile strict} -body { + encoding convertto -profile strict ucs-2 \uD800 +} -returnCodes 1 -result {unexpected character at index 0: 'U+00D800'} +test encoding-24.45 {Try to generate invalid ucs-2 with -profile strict} -body { + encoding convertto -profile strict ucs-2 \U10000 +} -returnCodes 1 -result {unexpected character at index 0: 'U+010000'} file delete [file join [temporaryDirectory] iso2022.txt] @@ -894,7 +1054,7 @@ test encoding-28.0 {all encodings load} -body { if {$name ne "unicode"} { incr count } - encoding convertto -nocomplain $name $string + encoding convertto -profile tcl8 $name $string # discard the cached internal representation of Tcl_Encoding # Unfortunately, without this, encoding 2-1 fails. @@ -905,6 +1065,44 @@ test encoding-28.0 {all encodings load} -body { runtests +test encoding-bug-183a1adcc0-1 {Bug [183a1adcc0] Buffer overflow Tcl_UtfToExternal} -constraints { + testencoding +} -body { + # Note - buffers are initialized to \xff + list [catch {testencoding Tcl_UtfToExternal utf-16 A {start end} {} 1} result] $result +} -result [list 0 [list nospace {} \xff]] + +test encoding-bug-183a1adcc0-2 {Bug [183a1adcc0] Buffer overflow Tcl_UtfToExternal} -constraints { + testencoding +} -body { + # Note - buffers are initialized to \xff + list [catch {testencoding Tcl_UtfToExternal utf-16 A {start end} {} 0} result] $result +} -result [list 0 [list nospace {} {}]] + +test encoding-bug-183a1adcc0-3 {Bug [183a1adcc0] Buffer overflow Tcl_UtfToExternal} -constraints { + testencoding +} -body { + # Note - buffers are initialized to \xff + list [catch {testencoding Tcl_UtfToExternal utf-16 A {start end} {} 2} result] $result +} -result [list 0 [list nospace {} \x00\x00]] + +test encoding-bug-183a1adcc0-4 {Bug [183a1adcc0] Buffer overflow Tcl_UtfToExternal} -constraints { + testencoding +} -body { + # Note - buffers are initialized to \xff + list [catch {testencoding Tcl_UtfToExternal utf-16 A {start end} {} 3} result] $result +} -result [list 0 [list nospace {} \x00\x00\xff]] + +test encoding-bug-183a1adcc0-5 {Bug [183a1adcc0] Buffer overflow Tcl_UtfToExternal} -constraints { + testencoding ucs2 knownBug +} -body { + # The knownBug constraint is because test depends on TCL_UTF_MAX and + # also UtfToUtf16 assumes space required in destination buffer is + # sizeof(Tcl_UniChar) which is incorrect when TCL_UTF_MAX==4 + # Note - buffers are initialized to \xff + list [catch {testencoding Tcl_UtfToExternal utf-16 A {start end} {} 4} result] $result +} -result [list 0 [list ok {} [expr {$::tcl_platform(byteOrder) eq "littleEndian" ? "\x41\x00" : "\x00\x41"}]\x00\x00]] + } test encoding-29.0 {get encoding nul terminator lengths} -constraints { @@ -918,6 +1116,33 @@ test encoding-29.0 {get encoding nul terminator lengths} -constraints { [testencoding nullength ksc5601] } -result {1 2 4 2 2} +test encoding-30.0 {encoding convertto large strings UINT_MAX} -constraints { + perf +} -body { + # Test to ensure not misinterpreted as -1 + list [string length [set s [string repeat A 0xFFFFFFFF]]] [string equal $s [encoding convertto ascii $s]] +} -result {4294967295 1} + +test encoding-30.1 {encoding convertto large strings > 4GB} -constraints { + perf +} -body { + list [string length [set s [string repeat A 0x100000000]]] [string equal $s [encoding convertto ascii $s]] +} -result {4294967296 1} + +test encoding-30.2 {encoding convertfrom large strings UINT_MAX} -constraints { + perf +} -body { + # Test to ensure not misinterpreted as -1 + list [string length [set s [string repeat A 0xFFFFFFFF]]] [string equal $s [encoding convertfrom ascii $s]] +} -result {4294967295 1} + +test encoding-30.3 {encoding convertfrom large strings > 4GB} -constraints { + perf +} -body { + list [string length [set s [string repeat A 0x100000000]]] [string equal $s [encoding convertfrom ascii $s]] +} -result {4294967296 1} + + # cleanup namespace delete ::tcl::test::encoding ::tcltest::cleanupTests |
