summaryrefslogtreecommitdiffstats
path: root/tests/encoding.test
diff options
context:
space:
mode:
Diffstat (limited to 'tests/encoding.test')
-rw-r--r--tests/encoding.test377
1 files changed, 301 insertions, 76 deletions
diff --git a/tests/encoding.test b/tests/encoding.test
index 4dd2e98..35340a6 100644
--- a/tests/encoding.test
+++ b/tests/encoding.test
@@ -106,6 +106,14 @@ test encoding-3.2 {Tcl_GetEncodingName, non-null} -setup {
} -cleanup {
fconfigure stdout -encoding $old
} -result {jis0208}
+test encoding-3.3 {fconfigure -profile} -setup {
+ set old [fconfigure stdout -profile]
+} -body {
+ fconfigure stdout -profile replace
+ fconfigure stdout -profile
+} -cleanup {
+ fconfigure stdout -profile $old
+} -result replace
test encoding-4.1 {Tcl_GetEncodingNames} -constraints {testencoding} -setup {
cd [makeDirectory tmp]
@@ -172,7 +180,7 @@ test encoding-6.2 {Tcl_CreateEncoding: replace encoding} {testencoding} {
test encoding-7.1 {Tcl_ExternalToUtfDString: small buffer} {
encoding convertfrom jis0208 8c8c8c8c
-} "吾吾吾吾"
+} 吾吾吾吾
test encoding-7.2 {Tcl_UtfToExternalDString: big buffer} {
set a 8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C
append a $a
@@ -194,7 +202,7 @@ test encoding-8.1 {Tcl_ExternalToUtf} {
close $f
file delete [file join [temporaryDirectory] dummy]
return $x
-} "ab乎g"
+} ab乎g
test encoding-9.1 {Tcl_UtfToExternalDString: small buffer} {
encoding convertto jis0208 "吾吾吾吾"
@@ -214,7 +222,7 @@ test encoding-9.2 {Tcl_UtfToExternalDString: big buffer} {
test encoding-10.1 {Tcl_UtfToExternal} {
set f [open [file join [temporaryDirectory] dummy] w]
fconfigure $f -translation binary -encoding shiftjis
- puts -nonewline $f "ab乎g"
+ puts -nonewline $f ab乎g
close $f
set f [open [file join [temporaryDirectory] dummy] r]
fconfigure $f -translation binary -encoding iso8859-1
@@ -249,7 +257,7 @@ test encoding-11.1 {LoadEncodingFile: unknown encoding} {testencoding} {
} {1 {unknown encoding "jis0208"} 8C}
test encoding-11.2 {LoadEncodingFile: single-byte} {
encoding convertfrom jis0201 \xA1
-} "。"
+} 。
test encoding-11.3 {LoadEncodingFile: double-byte} {
encoding convertfrom jis0208 8C
} 乎
@@ -300,7 +308,7 @@ test encoding-11.11 {encoding: extended Unicode UTF-32} {
test encoding-12.1 {LoadTableEncoding: normal encoding} {
set x [encoding convertto iso8859-3 Ġ]
- append x [encoding convertto -nocomplain iso8859-3 Õ]
+ append x [encoding convertto -profile tcl8 iso8859-3 Õ]
append x [encoding convertfrom iso8859-3 Õ]
} "Õ?Ġ"
test encoding-12.2 {LoadTableEncoding: single-byte encoding} {
@@ -339,7 +347,7 @@ test encoding-15.3 {UtfToUtfProc null character input} teststringbytes {
} c080
test encoding-15.4 {UtfToUtfProc emoji character input} -body {
set x \xED\xA0\xBD\xED\xB8\x82
- set y [encoding convertfrom -nocomplain utf-8 \xED\xA0\xBD\xED\xB8\x82]
+ set y [encoding convertfrom -profile tcl8 utf-8 \xED\xA0\xBD\xED\xB8\x82]
list [string length $x] $y
} -result "6 \uD83D\uDE02"
test encoding-15.5 {UtfToUtfProc emoji character input} {
@@ -349,67 +357,67 @@ test encoding-15.5 {UtfToUtfProc emoji character input} {
} "4 😂"
test encoding-15.6 {UtfToUtfProc emoji character output} utf32 {
set x \uDE02\uD83D\uDE02\uD83D
- set y [encoding convertto -nocomplain utf-8 \uDE02\uD83D\uDE02\uD83D]
+ set y [encoding convertto -profile tcl8 utf-8 \uDE02\uD83D\uDE02\uD83D]
binary scan $y H* z
list [string length $y] $z
} {12 edb882eda0bdedb882eda0bd}
test encoding-15.7 {UtfToUtfProc emoji character output} {
set x \uDE02\uD83D\uD83D
- set y [encoding convertto -nocomplain utf-8 \uDE02\uD83D\uD83D]
+ set y [encoding convertto -profile tcl8 utf-8 \uDE02\uD83D\uD83D]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {3 9 edb882eda0bdeda0bd}
test encoding-15.8 {UtfToUtfProc emoji character output} {
set x \uDE02\uD83Dé
- set y [encoding convertto -nocomplain utf-8 \uDE02\uD83Dé]
+ set y [encoding convertto -profile tcl8 utf-8 \uDE02\uD83Dé]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {3 8 edb882eda0bdc3a9}
test encoding-15.9 {UtfToUtfProc emoji character output} {
set x \uDE02\uD83DX
- set y [encoding convertto -nocomplain utf-8 \uDE02\uD83DX]
+ set y [encoding convertto -profile tcl8 utf-8 \uDE02\uD83DX]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {3 7 edb882eda0bd58}
test encoding-15.10 {UtfToUtfProc high surrogate character output} {
set x \uDE02é
- set y [encoding convertto -nocomplain utf-8 \uDE02é]
+ set y [encoding convertto -profile tcl8 utf-8 \uDE02é]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {2 5 edb882c3a9}
test encoding-15.11 {UtfToUtfProc low surrogate character output} {
set x \uDA02é
- set y [encoding convertto -nocomplain utf-8 \uDA02é]
+ set y [encoding convertto -profile tcl8 utf-8 \uDA02é]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {2 5 eda882c3a9}
test encoding-15.12 {UtfToUtfProc high surrogate character output} {
set x \uDE02Y
- set y [encoding convertto -nocomplain utf-8 \uDE02Y]
+ set y [encoding convertto -profile tcl8 utf-8 \uDE02Y]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {2 4 edb88259}
test encoding-15.13 {UtfToUtfProc low surrogate character output} {
set x \uDA02Y
- set y [encoding convertto -nocomplain utf-8 \uDA02Y]
+ set y [encoding convertto -profile tcl8 utf-8 \uDA02Y]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {2 4 eda88259}
test encoding-15.14 {UtfToUtfProc high surrogate character output} {
set x \uDE02
- set y [encoding convertto -nocomplain utf-8 \uDE02]
+ set y [encoding convertto -profile tcl8 utf-8 \uDE02]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {1 3 edb882}
test encoding-15.15 {UtfToUtfProc low surrogate character output} {
set x \uDA02
- set y [encoding convertto -nocomplain utf-8 \uDA02]
+ set y [encoding convertto -profile tcl8 utf-8 \uDA02]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {1 3 eda882}
test encoding-15.16 {UtfToUtfProc: Invalid 4-byte UTF-8, see [ed29806ba]} {
set x \xF0\xA0\xA1\xC2
- set y [encoding convertfrom -nocomplain utf-8 \xF0\xA0\xA1\xC2]
+ set y [encoding convertfrom -profile tcl8 utf-8 \xF0\xA0\xA1\xC2]
list [string length $x] $y
} "4 \xF0\xA0\xA1\xC2"
test encoding-15.17 {UtfToUtfProc emoji character output} {
@@ -453,6 +461,27 @@ test encoding-15.24 {UtfToUtfProc CESU-8 bug [048dd20b4171c8da]} {
binary scan $y H* z
list [string length $y] $z
} {2 cfbf}
+test encoding-15.25 {UtfToUtfProc CESU-8} {
+ encoding convertfrom cesu-8 \x00
+} \x00
+test encoding-15.26 {UtfToUtfProc CESU-8} {
+ encoding convertfrom cesu-8 \xC0\x80
+} \x00
+test encoding-15.27 {UtfToUtfProc -profile strict CESU-8} {
+ encoding convertfrom -profile strict cesu-8 \x00
+} \x00
+test encoding-15.28 {UtfToUtfProc -profile strict CESU-8} -body {
+ encoding convertfrom -profile strict cesu-8 \xC0\x80
+} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'}
+test encoding-15.29 {UtfToUtfProc CESU-8} {
+ encoding convertto cesu-8 \x00
+} \x00
+test encoding-15.30 {UtfToUtfProc -profile strict CESU-8} {
+ encoding convertto -profile strict cesu-8 \x00
+} \x00
+test encoding-15.31 {UtfToUtfProc -profile strict CESU-8 (bytes F0-F4 are invalid)} -body {
+ encoding convertfrom -profile strict cesu-8 \xF1\x86\x83\x9C
+} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xF1'}
test encoding-16.1 {Utf16ToUtfProc} -body {
set val [encoding convertfrom utf-16 NN]
@@ -482,18 +511,90 @@ test encoding-16.7 {Utf32ToUtfProc} -body {
set val [encoding convertfrom utf-32be \0\0NN]
list $val [format %x [scan $val %c]]
} -result "乎 4e4e"
+test encoding-16.8 {Utf32ToUtfProc} -body {
+ set val [encoding convertfrom -profile tcl8 utf-32 \x41\x00\x00\x41]
+ list $val [format %x [scan $val %c]]
+} -result "\uFFFD fffd"
+test encoding-16.9 {Utf32ToUtfProc} -constraints utf32 -body {
+ encoding convertfrom utf-32le \x00\xD8\x00\x00
+} -result \uD800
+test encoding-16.10 {Utf32ToUtfProc} -body {
+ encoding convertfrom utf-32le \x00\xDC\x00\x00
+} -result \uDC00
+test encoding-16.11 {Utf32ToUtfProc} -body {
+ encoding convertfrom utf-32le \x00\xD8\x00\x00\x00\xDC\x00\x00
+} -result \uD800\uDC00
+test encoding-16.12 {Utf32ToUtfProc} -constraints utf32 -body {
+ encoding convertfrom utf-32le \x00\xDC\x00\x00\x00\xD8\x00\x00
+} -result \uDC00\uD800
+test encoding-16.13 {Utf16ToUtfProc} -body {
+ encoding convertfrom utf-16le \x00\xD8
+} -result \uD800
+test encoding-16.14 {Utf16ToUtfProc} -body {
+ encoding convertfrom utf-16le \x00\xDC
+} -result \uDC00
+test encoding-16.15 {Utf16ToUtfProc} -body {
+ encoding convertfrom utf-16le \x00\xD8\x00\xDC
+} -result \U010000
+test encoding-16.16 {Utf16ToUtfProc} -body {
+ encoding convertfrom utf-16le \x00\xDC\x00\xD8
+} -result \uDC00\uD800
+test encoding-16.17 {Utf32ToUtfProc} -body {
+ list [encoding convertfrom -profile strict -failindex idx utf-32le \x41\x00\x00\x00\x00\xD8\x00\x00\x42\x00\x00\x00] [set idx]
+} -result {A 4}
+
+test encoding-16.18 {
+ Utf16ToUtfProc, Tcl_UniCharToUtf, surrogate pairs in utf-16
+} -body {
+ apply [list {} {
+ for {set i 0xD800} {$i < 0xDBFF} {incr i} {
+ for {set j 0xDC00} {$j < 0xDFFF} {incr j} {
+ set string [binary format S2 [list $i $j]]
+ set status [catch {
+ set decoded [encoding convertfrom utf-16be $string]
+ set encoded [encoding convertto utf-16be $decoded]
+ }]
+ if {$status || ( $encoded ne $string )} {
+ return [list [format %x $i] [format %x $j]]
+ }
+ }
+ }
+ return done
+ } [namespace current]]
+} -result done
+test encoding-16.19 {Utf16ToUtfProc, bug [d19fe0a5b]} -body {
+ encoding convertfrom utf-16 "\x41\x41\x41"
+} -result \u4141\uFFFD
+test encoding-16.20 {Utf16ToUtfProc, bug [d19fe0a5b]} -constraints deprecated -body {
+ encoding convertfrom utf-16 "\xD8\xD8"
+} -result \uD8D8
+test encoding-16.21 {Utf16ToUtfProc, bug [d19fe0a5b]} -body {
+ encoding convertfrom utf-32 "\x00\x00\x00\x00\x41\x41"
+} -result \x00\uFFFD
+test encoding-16.22 {Utf16ToUtfProc, strict, bug [db7a085bd9]} -body {
+ encoding convertfrom -profile strict utf-16le \x00\xD8
+} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\x00'}
+test encoding-16.23 {Utf16ToUtfProc, strict, bug [db7a085bd9]} -body {
+ encoding convertfrom -profile strict utf-16le \x00\xDC
+} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\x00'}
+test encoding-16.24 {Utf32ToUtfProc} -body {
+ encoding convertfrom utf-32 "\xFF\xFF\xFF\xFF"
+} -result \uFFFD
+test encoding-16.25 {Utf32ToUtfProc} -body {
+ encoding convertfrom utf-32 "\x01\x00\x00\x01"
+} -result \uFFFD
test encoding-17.1 {UtfToUtf16Proc} -body {
encoding convertto utf-16 "\U460DC"
} -result "\xD8\xD8\xDC\xDC"
test encoding-17.2 {UtfToUcs2Proc} -body {
- encoding convertfrom utf-16 [encoding convertto ucs-2 "\U460DC"]
-} -result "\uFFFD"
+ encoding convertfrom utf-16 \xD8\xD8\xDC\xDC
+} -result "\U460DC"
test encoding-17.3 {UtfToUtf16Proc} -body {
- encoding convertto -nocomplain utf-16be "\uDCDC"
+ encoding convertto -profile tcl8 utf-16be "\uDCDC"
} -result "\xDC\xDC"
test encoding-17.4 {UtfToUtf16Proc} -body {
- encoding convertto -nocomplain utf-16le "\uD8D8"
+ encoding convertto -profile tcl8 utf-16le "\uD8D8"
} -result "\xD8\xD8"
test encoding-17.5 {UtfToUtf16Proc} -body {
encoding convertto utf-32le "\U460DC"
@@ -502,23 +603,61 @@ test encoding-17.6 {UtfToUtf16Proc} -body {
encoding convertto utf-32be "\U460DC"
} -result "\x00\x04\x60\xDC"
test encoding-17.7 {UtfToUtf16Proc} -body {
- encoding convertto -strict utf-16be "\uDCDC"
+ encoding convertto -profile strict utf-16be "\uDCDC"
} -returnCodes error -result {unexpected character at index 0: 'U+00DCDC'}
test encoding-17.8 {UtfToUtf16Proc} -body {
- encoding convertto -strict utf-16le "\uD8D8"
+ encoding convertto -profile strict utf-16le "\uD8D8"
} -returnCodes error -result {unexpected character at index 0: 'U+00D8D8'}
test encoding-17.9 {Utf32ToUtfProc} -body {
- encoding convertfrom -strict utf-32 "\xFF\xFF\xFF\xFF"
+ encoding convertfrom -profile strict utf-32 "\xFF\xFF\xFF\xFF"
} -returnCodes error -result {unexpected byte sequence starting at index 0: '\xFF'}
test encoding-17.10 {Utf32ToUtfProc} -body {
- encoding convertfrom -nocomplain utf-32 "\xFF\xFF\xFF\xFF"
+ encoding convertfrom -profile tcl8 utf-32 "\xFF\xFF\xFF\xFF"
} -result \uFFFD
+test encoding-17.11 {Utf32ToUtfProc} -body {
+ encoding convertfrom -profile strict utf-32le "\x00\xD8\x00\x00"
+} -returnCodes error -result {unexpected byte sequence starting at index 0: '\x00'}
+test encoding-17.12 {Utf32ToUtfProc} -body {
+ encoding convertfrom -profile strict utf-32le "\x00\xDC\x00\x00"
+} -returnCodes error -result {unexpected byte sequence starting at index 0: '\x00'}
-test encoding-18.1 {TableToUtfProc} {
-} {}
+test encoding-18.1 {TableToUtfProc on invalid input} -body {
+ list [catch {encoding convertto jis0208 \\} res] $res
+} -result {0 !)}
+test encoding-18.2 {TableToUtfProc on invalid input with -profile strict} -body {
+ list [catch {encoding convertto -profile strict jis0208 \\} res] $res
+} -result {1 {unexpected character at index 0: 'U+00005C'}}
+test encoding-18.3 {TableToUtfProc on invalid input with -profile strict -failindex} -body {
+ list [catch {encoding convertto -profile strict -failindex pos jis0208 \\} res] $res $pos
+} -result {0 {} 0}
+test encoding-18.4 {TableToUtfProc on invalid input with -failindex -profile strict} -body {
+ list [catch {encoding convertto -failindex pos -profile strict jis0208 \\} res] $res $pos
+} -result {0 {} 0}
+test encoding-18.5 {TableToUtfProc on invalid input with -failindex} -body {
+ list [catch {encoding convertto -failindex pos jis0208 \\} res] $res $pos
+} -result {0 !) -1}
+test encoding-18.6 {TableToUtfProc on invalid input with -profile tcl8} -body {
+ list [catch {encoding convertto -profile tcl8 jis0208 \\} res] $res
+} -result {0 !)}
-test encoding-19.1 {TableFromUtfProc} {
-} {}
+test encoding-19.1 {TableFromUtfProc} -body {
+ encoding convertfrom ascii AÁ
+} -result AÁ
+test encoding-19.2 {TableFromUtfProc} -body {
+ encoding convertfrom -profile tcl8 ascii AÁ
+} -result AÁ
+test encoding-19.3 {TableFromUtfProc} -body {
+ encoding convertfrom -profile strict ascii AÁ
+} -returnCodes 1 -result {unexpected byte sequence starting at index 1: '\xC1'}
+test encoding-19.4 {TableFromUtfProc} -body {
+ list [encoding convertfrom -failindex idx ascii AÁ] [set idx]
+} -result [list A\xC1 -1]
+test encoding-19.5 {TableFromUtfProc} -body {
+ list [encoding convertfrom -failindex idx -profile strict ascii A\xC1] [set idx]
+} -result {A 1}
+test encoding-19.6 {TableFromUtfProc} -body {
+ list [encoding convertfrom -failindex idx -profile strict ascii AÁB] [set idx]
+} -result {A 1}
test encoding-20.1 {TableFreefProc} {
} {}
@@ -630,38 +769,38 @@ test encoding-24.4 {Parse valid or invalid utf-8} {
string length [encoding convertfrom utf-8 "\xC0\x80"]
} 1
test encoding-24.5 {Parse valid or invalid utf-8} {
- string length [encoding convertfrom -nocomplain utf-8 "\xC0\x81"]
+ string length [encoding convertfrom -profile tcl8 utf-8 "\xC0\x81"]
} 2
test encoding-24.6 {Parse valid or invalid utf-8} {
- string length [encoding convertfrom -nocomplain utf-8 "\xC1\xBF"]
+ string length [encoding convertfrom -profile tcl8 utf-8 "\xC1\xBF"]
} 2
test encoding-24.7 {Parse valid or invalid utf-8} {
string length [encoding convertfrom utf-8 "\xC2\x80"]
} 1
test encoding-24.8 {Parse valid or invalid utf-8} {
- string length [encoding convertfrom -nocomplain utf-8 "\xE0\x80\x80"]
+ string length [encoding convertfrom -profile tcl8 utf-8 "\xE0\x80\x80"]
} 3
test encoding-24.9 {Parse valid or invalid utf-8} {
- string length [encoding convertfrom -nocomplain utf-8 "\xE0\x9F\xBF"]
+ string length [encoding convertfrom -profile tcl8 utf-8 "\xE0\x9F\xBF"]
} 3
test encoding-24.10 {Parse valid or invalid utf-8} {
string length [encoding convertfrom utf-8 "\xE0\xA0\x80"]
} 1
test encoding-24.11 {Parse valid or invalid utf-8} {
- string length [encoding convertfrom -nocomplain utf-8 "\xEF\xBF\xBF"]
+ string length [encoding convertfrom -profile tcl8 utf-8 "\xEF\xBF\xBF"]
} 1
test encoding-24.12 {Parse valid or invalid utf-8} -body {
- encoding convertfrom -strict utf-8 "\xC0\x81"
+ encoding convertfrom -profile strict utf-8 "\xC0\x81"
} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'}
test encoding-24.13 {Parse valid or invalid utf-8} -body {
- encoding convertfrom -strict utf-8 "\xC1\xBF"
+ encoding convertfrom -profile strict utf-8 "\xC1\xBF"
} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC1'}
test encoding-24.14 {Parse valid or invalid utf-8} {
string length [encoding convertfrom utf-8 "\xC2\x80"]
} 1
test encoding-24.15 {Parse valid or invalid utf-8} -body {
encoding convertfrom utf-8 "Z\xE0\x80"
-} -result Z\xE0\x80
+} -result Z\xE0\u20AC
test encoding-24.16 {Parse valid or invalid utf-8} -constraints testbytestring -body {
encoding convertto utf-8 [testbytestring "Z\u4343\x80"]
} -returnCodes 1 -result {expected byte sequence but character 1 was '䍃€' (U+004343)}
@@ -671,72 +810,93 @@ test encoding-24.17 {Parse valid or invalid utf-8} -constraints testbytestring -
test encoding-24.18 {Parse valid or invalid utf-8} -constraints testbytestring -body {
encoding convertto utf-8 [testbytestring "Z\xE0\x80xxxxxx"]
} -result "Z\xC3\xA0\xE2\x82\xACxxxxxx"
-test encoding-24.19 {Parse valid or invalid utf-8} -body {
- encoding convertto utf-8 "ZX\uD800"
+test encoding-24.19.1 {Parse valid or invalid utf-8} -body {
+ encoding convertto -profile tcl8 utf-8 "ZX\uD800"
+} -result ZX\xED\xA0\x80
+test encoding-24.19.2 {Parse valid or invalid utf-8} -body {
+ encoding convertto -profile strict utf-8 "ZX\uD800"
} -returnCodes 1 -match glob -result "unexpected character at index 2: 'U+00D800'"
-test encoding-24.20 {Parse with -nocomplain but without providing encoding} {
- string length [encoding convertfrom -nocomplain "\x20"]
-} 1
-test encoding-24.21 {Parse with -nocomplain but without providing encoding} {
- string length [encoding convertto -nocomplain "\x20"]
-} 1
+test encoding-24.20 {Parse with -profile tcl8 but without providing encoding} -body {
+ encoding convertfrom -profile tcl8 "\x20"
+} -result {wrong # args: should be "::tcl::encoding::convertfrom ?-profile profile? ?-failindex var? encoding data" or "::tcl::encoding::convertfrom data"} -returnCodes error
+test encoding-24.21 {Parse with -profile tcl8 but without providing encoding} -body {
+ string length [encoding convertto -profile tcl8 "\x20"]
+} -result {wrong # args: should be "::tcl::encoding::convertto ?-profile profile? ?-failindex var? encoding data" or "::tcl::encoding::convertto data"} -returnCodes error
test encoding-24.22 {Syntax error, two encodings} -body {
encoding convertfrom iso8859-1 utf-8 "ZX\uD800"
-} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"}
+} -result {bad option "iso8859-1": must be -profile or -failindex} -returnCodes error
test encoding-24.23 {Syntax error, two encodings} -body {
encoding convertto iso8859-1 utf-8 "ZX\uD800"
-} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"}
-test encoding-24.24 {Parse invalid utf-8 with -strict} -body {
- encoding convertfrom -strict utf-8 "\xC0\x80\x00\x00"
+} -result {bad option "iso8859-1": must be -profile or -failindex} -returnCodes error
+test encoding-24.24 {Parse invalid utf-8 with -profile strict} -body {
+ encoding convertfrom -profile strict utf-8 "\xC0\x80\x00\x00"
} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'}
-test encoding-24.25 {Parse invalid utf-8 with -strict} -body {
- encoding convertfrom -strict utf-8 "\x40\x80\x00\x00"
+test encoding-24.25 {Parse invalid utf-8 with -profile strict} -body {
+ encoding convertfrom -profile strict utf-8 "\x40\x80\x00\x00"
} -returnCodes 1 -result {unexpected byte sequence starting at index 1: '\x80'}
-test encoding-24.26 {Parse valid utf-8 with -strict} -body {
- encoding convertfrom -strict utf-8 "\xF1\x80\x80\x80"
+test encoding-24.26 {Parse valid utf-8 with -profile strict} -body {
+ encoding convertfrom -profile strict utf-8 "\xF1\x80\x80\x80"
} -result \U40000
-test encoding-24.27 {Parse invalid utf-8 with -strict} -body {
- encoding convertfrom -strict utf-8 "\xF0\x80\x80\x80"
+test encoding-24.27 {Parse invalid utf-8 with -profile strict} -body {
+ encoding convertfrom -profile strict utf-8 "\xF0\x80\x80\x80"
} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xF0'}
-test encoding-24.28 {Parse invalid utf-8 with -strict} -body {
- encoding convertfrom -strict utf-8 "\xFF\x00\x00"
+test encoding-24.28 {Parse invalid utf-8 with -profile strict} -body {
+ encoding convertfrom -profile strict utf-8 "\xFF\x00\x00"
} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xFF'}
test encoding-24.29 {Parse invalid utf-8} -body {
encoding convertfrom utf-8 \xEF\xBF\xBF
} -result \uFFFF
-test encoding-24.30 {Parse noncharacter with -strict} -body {
- encoding convertfrom -strict utf-8 \xEF\xBF\xBF
+test encoding-24.30 {Parse noncharacter with -profile strict} -body {
+ encoding convertfrom -profile strict utf-8 \xEF\xBF\xBF
} -result \uFFFF
-test encoding-24.31 {Parse invalid utf-8 with -nocomplain} -body {
- encoding convertfrom -nocomplain utf-8 \xEF\xBF\xBF
+test encoding-24.31 {Parse invalid utf-8 with -profile tcl8} -body {
+ encoding convertfrom -profile tcl8 utf-8 \xEF\xBF\xBF
} -result \uFFFF
test encoding-24.32 {Try to generate invalid utf-8} -body {
encoding convertto utf-8 \uFFFF
} -result \xEF\xBF\xBF
-test encoding-24.33 {Try to generate noncharacter with -strict} -body {
- encoding convertto -strict utf-8 \uFFFF
+test encoding-24.33 {Try to generate noncharacter with -profile strict} -body {
+ encoding convertto -profile strict utf-8 \uFFFF
} -result \xEF\xBF\xBF
-test encoding-24.34 {Try to generate invalid utf-8 with -nocomplain} -body {
- encoding convertto -nocomplain utf-8 \uFFFF
+test encoding-24.34 {Try to generate invalid utf-8 with -profile tcl8} -body {
+ encoding convertto -profile tcl8 utf-8 \uFFFF
} -result \xEF\xBF\xBF
test encoding-24.35 {Parse invalid utf-8} -constraints utf32 -body {
encoding convertfrom utf-8 \xED\xA0\x80
} -result \uD800
-test encoding-24.36 {Parse invalid utf-8 with -strict} -body {
- encoding convertfrom -strict utf-8 \xED\xA0\x80
+test encoding-24.36 {Parse invalid utf-8 with -profile strict} -body {
+ encoding convertfrom -profile strict utf-8 \xED\xA0\x80
} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xED'}
-test encoding-24.37 {Parse invalid utf-8 with -nocomplain} -body {
- encoding convertfrom -nocomplain utf-8 \xED\xA0\x80
+test encoding-24.37 {Parse invalid utf-8 with -profile tcl8} -body {
+ encoding convertfrom -profile tcl8 utf-8 \xED\xA0\x80
} -result \uD800
-test encoding-24.38 {Try to generate invalid utf-8} -body {
- encoding convertto utf-8 \uD800
+test encoding-24.38.1 {Try to generate invalid utf-8} -body {
+ encoding convertto -profile tcl8 utf-8 \uD800
+} -result \xED\xA0\x80
+test encoding-24.38.2 {Try to generate invalid utf-8} -body {
+ encoding convertto -profile strict utf-8 \uD800
} -returnCodes 1 -result {unexpected character at index 0: 'U+00D800'}
-test encoding-24.39 {Try to generate invalid utf-8 with -strict} -body {
- encoding convertto -strict utf-8 \uD800
+test encoding-24.39 {Try to generate invalid utf-8 with -profile strict} -body {
+ encoding convertto -profile strict utf-8 \uD800
} -returnCodes 1 -result {unexpected character at index 0: 'U+00D800'}
-test encoding-24.40 {Try to generate invalid utf-8 with -nocomplain} -body {
- encoding convertto -nocomplain utf-8 \uD800
+test encoding-24.40 {Try to generate invalid utf-8 with -profile tcl8} -body {
+ encoding convertto -profile tcl8 utf-8 \uD800
} -result \xED\xA0\x80
+test encoding-24.41 {Parse invalid utf-8 with -profile strict} -body {
+ encoding convertfrom -profile strict utf-8 \xED\xA0\x80\xED\xB0\x80
+} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xED'}
+test encoding-24.42 {Parse invalid utf-8, fallback to cp1252 [885c86a9a0]} -body {
+ encoding convertfrom -profile tcl8 utf-8 \xF0\x80\x80\x80
+} -result \xF0\u20AC\u20AC\u20AC
+test encoding-24.43 {Parse invalid utf-8, fallback to cp1252 [885c86a9a0]} -body {
+ encoding convertfrom -profile tcl8 utf-8 \x80
+} -result \u20AC
+test encoding-24.44 {Try to generate invalid ucs-2 with -profile strict} -body {
+ encoding convertto -profile strict ucs-2 \uD800
+} -returnCodes 1 -result {unexpected character at index 0: 'U+00D800'}
+test encoding-24.45 {Try to generate invalid ucs-2 with -profile strict} -body {
+ encoding convertto -profile strict ucs-2 \U10000
+} -returnCodes 1 -result {unexpected character at index 0: 'U+010000'}
file delete [file join [temporaryDirectory] iso2022.txt]
@@ -894,7 +1054,7 @@ test encoding-28.0 {all encodings load} -body {
if {$name ne "unicode"} {
incr count
}
- encoding convertto -nocomplain $name $string
+ encoding convertto -profile tcl8 $name $string
# discard the cached internal representation of Tcl_Encoding
# Unfortunately, without this, encoding 2-1 fails.
@@ -905,6 +1065,44 @@ test encoding-28.0 {all encodings load} -body {
runtests
+test encoding-bug-183a1adcc0-1 {Bug [183a1adcc0] Buffer overflow Tcl_UtfToExternal} -constraints {
+ testencoding
+} -body {
+ # Note - buffers are initialized to \xff
+ list [catch {testencoding Tcl_UtfToExternal utf-16 A {start end} {} 1} result] $result
+} -result [list 0 [list nospace {} \xff]]
+
+test encoding-bug-183a1adcc0-2 {Bug [183a1adcc0] Buffer overflow Tcl_UtfToExternal} -constraints {
+ testencoding
+} -body {
+ # Note - buffers are initialized to \xff
+ list [catch {testencoding Tcl_UtfToExternal utf-16 A {start end} {} 0} result] $result
+} -result [list 0 [list nospace {} {}]]
+
+test encoding-bug-183a1adcc0-3 {Bug [183a1adcc0] Buffer overflow Tcl_UtfToExternal} -constraints {
+ testencoding
+} -body {
+ # Note - buffers are initialized to \xff
+ list [catch {testencoding Tcl_UtfToExternal utf-16 A {start end} {} 2} result] $result
+} -result [list 0 [list nospace {} \x00\x00]]
+
+test encoding-bug-183a1adcc0-4 {Bug [183a1adcc0] Buffer overflow Tcl_UtfToExternal} -constraints {
+ testencoding
+} -body {
+ # Note - buffers are initialized to \xff
+ list [catch {testencoding Tcl_UtfToExternal utf-16 A {start end} {} 3} result] $result
+} -result [list 0 [list nospace {} \x00\x00\xff]]
+
+test encoding-bug-183a1adcc0-5 {Bug [183a1adcc0] Buffer overflow Tcl_UtfToExternal} -constraints {
+ testencoding ucs2 knownBug
+} -body {
+ # The knownBug constraint is because test depends on TCL_UTF_MAX and
+ # also UtfToUtf16 assumes space required in destination buffer is
+ # sizeof(Tcl_UniChar) which is incorrect when TCL_UTF_MAX==4
+ # Note - buffers are initialized to \xff
+ list [catch {testencoding Tcl_UtfToExternal utf-16 A {start end} {} 4} result] $result
+} -result [list 0 [list ok {} [expr {$::tcl_platform(byteOrder) eq "littleEndian" ? "\x41\x00" : "\x00\x41"}]\x00\x00]]
+
}
test encoding-29.0 {get encoding nul terminator lengths} -constraints {
@@ -918,6 +1116,33 @@ test encoding-29.0 {get encoding nul terminator lengths} -constraints {
[testencoding nullength ksc5601]
} -result {1 2 4 2 2}
+test encoding-30.0 {encoding convertto large strings UINT_MAX} -constraints {
+ perf
+} -body {
+ # Test to ensure not misinterpreted as -1
+ list [string length [set s [string repeat A 0xFFFFFFFF]]] [string equal $s [encoding convertto ascii $s]]
+} -result {4294967295 1}
+
+test encoding-30.1 {encoding convertto large strings > 4GB} -constraints {
+ perf
+} -body {
+ list [string length [set s [string repeat A 0x100000000]]] [string equal $s [encoding convertto ascii $s]]
+} -result {4294967296 1}
+
+test encoding-30.2 {encoding convertfrom large strings UINT_MAX} -constraints {
+ perf
+} -body {
+ # Test to ensure not misinterpreted as -1
+ list [string length [set s [string repeat A 0xFFFFFFFF]]] [string equal $s [encoding convertfrom ascii $s]]
+} -result {4294967295 1}
+
+test encoding-30.3 {encoding convertfrom large strings > 4GB} -constraints {
+ perf
+} -body {
+ list [string length [set s [string repeat A 0x100000000]]] [string equal $s [encoding convertfrom ascii $s]]
+} -result {4294967296 1}
+
+
# cleanup
namespace delete ::tcl::test::encoding
::tcltest::cleanupTests