summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--generic/tclEncoding.c26
-rw-r--r--tests/encoding.test21
-rw-r--r--tests/utfext.test42
3 files changed, 87 insertions, 2 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index bdf06c9..3f26ab7 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -1544,7 +1544,7 @@ Tcl_UtfToExternalDStringEx(
* and loop. Otherwise, return the result we got.
*/
if ((result != TCL_CONVERT_NOSPACE) &&
- !(result == TCL_CONVERT_MULTIBYTE && (flags & TCL_ENCODING_END))) {
+ (result != TCL_CONVERT_MULTIBYTE || (flags & TCL_ENCODING_END))) {
Tcl_Size nBytesProcessed = (src - srcStart);
Tcl_Size i = soFar + encodingPtr->nullSize - 1;
/* Loop as DStringSetLength only stores one nul byte at a time */
@@ -4067,6 +4067,30 @@ EscapeToUtfProc(
numChars++;
}
+ if ((flags & TCL_ENCODING_END) && (result == TCL_CONVERT_MULTIBYTE)) {
+ /* We have a code fragment left-over at the end */
+ if (dst > dstEnd) {
+ result = TCL_CONVERT_NOSPACE;
+ } else {
+ /* destination is not full, so we really are at the end now */
+ if (PROFILE_STRICT(flags)) {
+ result = TCL_CONVERT_SYNTAX;
+ } else {
+ /*
+ * PROFILE_REPLACE or PROFILE_TCL8. The latter is treated
+ * similar to former because Tcl8 was broken in this regard
+ * as it just ignored the byte and truncated which is really
+ * a no-no as per Unicode recommendations.
+ */
+ result = TCL_OK;
+ dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst);
+ numChars++;
+ /* TCL_CONVERT_MULTIBYTE means all source consumed */
+ src = srcEnd;
+ }
+ }
+ }
+
*statePtr = (Tcl_EncodingState) INT2PTR(state);
*srcReadPtr = src - srcStart;
*dstWrotePtr = dst - dstStart;
diff --git a/tests/encoding.test b/tests/encoding.test
index a754f72..b20b18d 100644
--- a/tests/encoding.test
+++ b/tests/encoding.test
@@ -1057,7 +1057,7 @@ test encoding-27.2 {encoding dirs basic behavior} -returnCodes error -body {
encoding dirs "\{not a list"
} -result "expected directory list but got \"\{not a list\""
-}
+}; # proc runtests
test encoding-28.0 {all encodings load} -body {
@@ -1194,6 +1194,25 @@ test encoding-bug-201c7a3aa6-tcl8 {Crash encoding non-BMP to iso2022} -body {
encoding convertto -profile tcl8 iso2022 \U1f600
} -result ?
+test encoding-bug-7346adc50f-strict {OOM on convertfrom truncated iso2022 - strict} -body {
+ encoding convertfrom -profile strict iso2022-jp "\x1b\$B\$*;n\$"
+} -result {unexpected byte sequence starting at index 7: '\x24'} -returnCodes error
+
+test encoding-bug-7346adc50f-failindex {OOM on convertfrom truncated iso2022 - failindex} -body {
+ list [encoding convertfrom -failindex failix iso2022-jp "\x1b\$B\$*;n\$"] $failix
+} -cleanup {
+ unset -nocomplain failix
+} -result [list \u304A\u8A66 7]
+
+test encoding-bug-7346adc50f-strict {OOM on convertfrom truncated iso2022 - replace} -body {
+ encoding convertfrom -profile replace iso2022-jp "\x1b\$B\$*;n\$"
+} -result \u304A\u8A66\uFFFD
+
+test encoding-bug-7346adc50f-tcl8 {OOM on convertfrom truncated iso2022 - tcl8} -body {
+ encoding convertfrom -profile tcl8 iso2022-jp "\x1b\$B\$*;n\$"
+} -result \u304A\u8A66\uFFFD
+
+
# cleanup
namespace delete ::tcl::test::encoding
::tcltest::cleanupTests
diff --git a/tests/utfext.test b/tests/utfext.test
index ca74229..8ab4bc4 100644
--- a/tests/utfext.test
+++ b/tests/utfext.test
@@ -320,6 +320,48 @@ namespace eval utftest {
set result [list [testencoding Tcl_ExternalToUtf shiftjis $src {start tcl8} 0 16 srcRead dstWritten charsWritten] $srcRead $dstWritten $charsWritten]
lappend result {*}[list [testencoding Tcl_ExternalToUtf shiftjis [string range $src $srcRead end] {end tcl8} 0 10 srcRead dstWritten charsWritten] $srcRead $dstWritten $charsWritten]
} -result [list [list multibyte 0 \xEF\xBC\x90\xEF\xBC\x91\x00\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF] 4 6 2 [list ok 0 \xC2\x82\x00\xFF\xFF\xFF\xFF\xFF\xFF\xFF] 1 2 1] -constraints testencoding
+
+ test Tcl_ExternalToUtf-bug-7346adc50f-strict-0 {
+ truncated input in escape encoding (strict)
+ } -body {
+ set src [binary decode hex 1b2442242a3b6e24]
+ list {*}[testencoding Tcl_ExternalToUtf iso2022-jp $src {start end strict} 0 16 srcRead dstWritten charsWritten] $srcRead $dstWritten $charsWritten
+ } -result [list syntax 2 [binary decode hex e3818ae8a9a600ffffffffffffffffff] 7 6 2]
+
+ test Tcl_ExternalToUtf-bug-7346adc50f-strict-1 {
+ truncated input in escape encoding (strict, partial)
+ } -body {
+ set src [binary decode hex 1b2442242a3b6e24]
+ list {*}[testencoding Tcl_ExternalToUtf iso2022-jp $src {start strict} 0 16 srcRead dstWritten charsWritten] $srcRead $dstWritten $charsWritten
+ } -result [list multibyte 2 [binary decode hex e3818ae8a9a600ffffffffffffffffff] 7 6 2]
+
+ test Tcl_ExternalToUtf-bug-7346adc50f-replace-0 {
+ truncated input in escape encoding (replace)
+ } -body {
+ set src [binary decode hex 1b2442242a3b6e24]
+ list {*}[testencoding Tcl_ExternalToUtf iso2022-jp $src {start end replace} 0 16 srcRead dstWritten charsWritten] $srcRead $dstWritten $charsWritten
+ } -result [list ok 2 [binary decode hex e3818ae8a9a6efbfbd00ffffffffffff] 8 9 3]
+
+ test Tcl_ExternalToUtf-bug-7346adc50f-replace-1 {
+ truncated input in escape encoding (replace, partial)
+ } -body {
+ set src [binary decode hex 1b2442242a3b6e24]
+ list {*}[testencoding Tcl_ExternalToUtf iso2022-jp $src {start replace} 0 16 srcRead dstWritten charsWritten] $srcRead $dstWritten $charsWritten
+ } -result [list multibyte 2 [binary decode hex e3818ae8a9a600ffffffffffffffffff] 7 6 2]
+
+ test Tcl_ExternalToUtf-bug-7346adc50f-tcl8-0 {
+ truncated input in escape encoding (tcl8)
+ } -body {
+ set src [binary decode hex 1b2442242a3b6e24]
+ list {*}[testencoding Tcl_ExternalToUtf iso2022-jp $src {start end tcl8} 0 16 srcRead dstWritten charsWritten] $srcRead $dstWritten $charsWritten
+ } -result [list ok 2 [binary decode hex e3818ae8a9a6efbfbd00ffffffffffff] 8 9 3]
+
+ test Tcl_ExternalToUtf-bug-7346adc50f-tcl8-1 {
+ truncated input in escape encoding (tcl8, partial)
+ } -body {
+ set src [binary decode hex 1b2442242a3b6e24]
+ list {*}[testencoding Tcl_ExternalToUtf iso2022-jp $src {start tcl8} 0 16 srcRead dstWritten charsWritten] $srcRead $dstWritten $charsWritten
+ } -result [list multibyte 2 [binary decode hex e3818ae8a9a600ffffffffffffffffff] 7 6 2]
}
namespace delete utftest