From c4618712a008b8d0377029f7991ee60deba4fb0a Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Mon, 26 May 2025 05:12:10 +0000 Subject: Handle tcl8 and replace profiles for truncated escape encodings. Add tests. --- generic/tclEncoding.c | 26 +++++++++++++++++++++++++- tests/encoding.test | 21 ++++++++++++++++++++- tests/utfext.test | 42 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 87 insertions(+), 2 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index bdf06c9..3f26ab7 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -1544,7 +1544,7 @@ Tcl_UtfToExternalDStringEx( * and loop. Otherwise, return the result we got. */ if ((result != TCL_CONVERT_NOSPACE) && - !(result == TCL_CONVERT_MULTIBYTE && (flags & TCL_ENCODING_END))) { + (result != TCL_CONVERT_MULTIBYTE || (flags & TCL_ENCODING_END))) { Tcl_Size nBytesProcessed = (src - srcStart); Tcl_Size i = soFar + encodingPtr->nullSize - 1; /* Loop as DStringSetLength only stores one nul byte at a time */ @@ -4067,6 +4067,30 @@ EscapeToUtfProc( numChars++; } + if ((flags & TCL_ENCODING_END) && (result == TCL_CONVERT_MULTIBYTE)) { + /* We have a code fragment left-over at the end */ + if (dst > dstEnd) { + result = TCL_CONVERT_NOSPACE; + } else { + /* destination is not full, so we really are at the end now */ + if (PROFILE_STRICT(flags)) { + result = TCL_CONVERT_SYNTAX; + } else { + /* + * PROFILE_REPLACE or PROFILE_TCL8. The latter is treated + * similar to former because Tcl8 was broken in this regard + * as it just ignored the byte and truncated which is really + * a no-no as per Unicode recommendations. + */ + result = TCL_OK; + dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst); + numChars++; + /* TCL_CONVERT_MULTIBYTE means all source consumed */ + src = srcEnd; + } + } + } + *statePtr = (Tcl_EncodingState) INT2PTR(state); *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; diff --git a/tests/encoding.test b/tests/encoding.test index a754f72..b20b18d 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -1057,7 +1057,7 @@ test encoding-27.2 {encoding dirs basic behavior} -returnCodes error -body { encoding dirs "\{not a list" } -result "expected directory list but got \"\{not a list\"" -} +}; # proc runtests test encoding-28.0 {all encodings load} -body { @@ -1194,6 +1194,25 @@ test encoding-bug-201c7a3aa6-tcl8 {Crash encoding non-BMP to iso2022} -body { encoding convertto -profile tcl8 iso2022 \U1f600 } -result ? +test encoding-bug-7346adc50f-strict {OOM on convertfrom truncated iso2022 - strict} -body { + encoding convertfrom -profile strict iso2022-jp "\x1b\$B\$*;n\$" +} -result {unexpected byte sequence starting at index 7: '\x24'} -returnCodes error + +test encoding-bug-7346adc50f-failindex {OOM on convertfrom truncated iso2022 - failindex} -body { + list [encoding convertfrom -failindex failix iso2022-jp "\x1b\$B\$*;n\$"] $failix +} -cleanup { + unset -nocomplain failix +} -result [list \u304A\u8A66 7] + +test encoding-bug-7346adc50f-strict {OOM on convertfrom truncated iso2022 - replace} -body { + encoding convertfrom -profile replace iso2022-jp "\x1b\$B\$*;n\$" +} -result \u304A\u8A66\uFFFD + +test encoding-bug-7346adc50f-tcl8 {OOM on convertfrom truncated iso2022 - tcl8} -body { + encoding convertfrom -profile tcl8 iso2022-jp "\x1b\$B\$*;n\$" +} -result \u304A\u8A66\uFFFD + + # cleanup namespace delete ::tcl::test::encoding ::tcltest::cleanupTests diff --git a/tests/utfext.test b/tests/utfext.test index ca74229..8ab4bc4 100644 --- a/tests/utfext.test +++ b/tests/utfext.test @@ -320,6 +320,48 @@ namespace eval utftest { set result [list [testencoding Tcl_ExternalToUtf shiftjis $src {start tcl8} 0 16 srcRead dstWritten charsWritten] $srcRead $dstWritten $charsWritten] lappend result {*}[list [testencoding Tcl_ExternalToUtf shiftjis [string range $src $srcRead end] {end tcl8} 0 10 srcRead dstWritten charsWritten] $srcRead $dstWritten $charsWritten] } -result [list [list multibyte 0 \xEF\xBC\x90\xEF\xBC\x91\x00\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF] 4 6 2 [list ok 0 \xC2\x82\x00\xFF\xFF\xFF\xFF\xFF\xFF\xFF] 1 2 1] -constraints testencoding + + test Tcl_ExternalToUtf-bug-7346adc50f-strict-0 { + truncated input in escape encoding (strict) + } -body { + set src [binary decode hex 1b2442242a3b6e24] + list {*}[testencoding Tcl_ExternalToUtf iso2022-jp $src {start end strict} 0 16 srcRead dstWritten charsWritten] $srcRead $dstWritten $charsWritten + } -result [list syntax 2 [binary decode hex e3818ae8a9a600ffffffffffffffffff] 7 6 2] + + test Tcl_ExternalToUtf-bug-7346adc50f-strict-1 { + truncated input in escape encoding (strict, partial) + } -body { + set src [binary decode hex 1b2442242a3b6e24] + list {*}[testencoding Tcl_ExternalToUtf iso2022-jp $src {start strict} 0 16 srcRead dstWritten charsWritten] $srcRead $dstWritten $charsWritten + } -result [list multibyte 2 [binary decode hex e3818ae8a9a600ffffffffffffffffff] 7 6 2] + + test Tcl_ExternalToUtf-bug-7346adc50f-replace-0 { + truncated input in escape encoding (replace) + } -body { + set src [binary decode hex 1b2442242a3b6e24] + list {*}[testencoding Tcl_ExternalToUtf iso2022-jp $src {start end replace} 0 16 srcRead dstWritten charsWritten] $srcRead $dstWritten $charsWritten + } -result [list ok 2 [binary decode hex e3818ae8a9a6efbfbd00ffffffffffff] 8 9 3] + + test Tcl_ExternalToUtf-bug-7346adc50f-replace-1 { + truncated input in escape encoding (replace, partial) + } -body { + set src [binary decode hex 1b2442242a3b6e24] + list {*}[testencoding Tcl_ExternalToUtf iso2022-jp $src {start replace} 0 16 srcRead dstWritten charsWritten] $srcRead $dstWritten $charsWritten + } -result [list multibyte 2 [binary decode hex e3818ae8a9a600ffffffffffffffffff] 7 6 2] + + test Tcl_ExternalToUtf-bug-7346adc50f-tcl8-0 { + truncated input in escape encoding (tcl8) + } -body { + set src [binary decode hex 1b2442242a3b6e24] + list {*}[testencoding Tcl_ExternalToUtf iso2022-jp $src {start end tcl8} 0 16 srcRead dstWritten charsWritten] $srcRead $dstWritten $charsWritten + } -result [list ok 2 [binary decode hex e3818ae8a9a6efbfbd00ffffffffffff] 8 9 3] + + test Tcl_ExternalToUtf-bug-7346adc50f-tcl8-1 { + truncated input in escape encoding (tcl8, partial) + } -body { + set src [binary decode hex 1b2442242a3b6e24] + list {*}[testencoding Tcl_ExternalToUtf iso2022-jp $src {start tcl8} 0 16 srcRead dstWritten charsWritten] $srcRead $dstWritten $charsWritten + } -result [list multibyte 2 [binary decode hex e3818ae8a9a600ffffffffffffffffff] 7 6 2] } namespace delete utftest -- cgit v0.12