From c606ae1574a7d66bcbf8666506e91840875f6d45 Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Tue, 21 Feb 2023 17:50:36 +0000
Subject: Proposed fix for [d19fe0a5b]: Handling incomplete byte sequences for
 utf-16/utf-32

---
 generic/tclEncoding.c | 27 ++++++++++++++++++++++++---
 tests/encoding.test   |  6 ++++++
 2 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index dfa7907..ecec6e9 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -2457,21 +2457,27 @@ UnicodeToUtfProc(
     }
     result = TCL_OK;
 
-    /* check alignment with utf-16 (2 == sizeof(UTF-16)) */
+    /*
+     * Check alignment with utf-16 (2 == sizeof(UTF-16))
+     */
+
     if ((srcLen % 2) != 0) {
 	result = TCL_CONVERT_MULTIBYTE;
 	srcLen--;
     }
 
+#if TCL_UTF_MAX > 3
     /*
-     * If last code point is a high surrogate, we cannot handle that yet.
+     * If last code point is a high surrogate, we cannot handle that yet,
+     * unless we are at the end.
      */
 
-    if ((srcLen >= 2) &&
+    if (!(flags & TCL_ENCODING_END) && (srcLen >= 2) &&
 	    ((src[srcLen - (clientData?1:2)] & 0xFC) == 0xD8)) {
 	result = TCL_CONVERT_MULTIBYTE;
 	srcLen-= 2;
     }
+#endif
 
     srcStart = src;
     srcEnd = src + srcLen;
@@ -2504,6 +2510,21 @@ UnicodeToUtfProc(
 	src += sizeof(unsigned short);
     }
 
+    if ((flags & TCL_ENCODING_END) && (result == TCL_CONVERT_MULTIBYTE)) {
+	/* We have a single byte left-over at the end */
+	if (dst > dstEnd) {
+	    result = TCL_CONVERT_NOSPACE;
+	} else {
+	    /* destination is not full, so we really are at the end now */
+	    if (flags & TCL_ENCODING_STOPONERROR) {
+		result = TCL_CONVERT_SYNTAX;
+	    } else {
+		dst += Tcl_UniCharToUtf(0xFFFD, dst);
+		numChars++;
+		src++;
+	    }
+	}
+    }
     *srcReadPtr = src - srcStart;
     *dstWrotePtr = dst - dstStart;
     *dstCharsPtr = numChars;
diff --git a/tests/encoding.test b/tests/encoding.test
index f558e01..f6f9abc 100644
--- a/tests/encoding.test
+++ b/tests/encoding.test
@@ -419,6 +419,12 @@ test encoding-16.3 {UnicodeToUtfProc} -body {
     set val [encoding convertfrom unicode "\xDC\xDC"]
     list $val [format %X [scan $val %c]]
 } -result "\uDCDC DCDC"
+test encoding-16.4 {UnicodeToUtfProc, bug [d19fe0a5b]} -body {
+    encoding convertfrom unicode "\x41\x41\x41"
+} -result \u4141\uFFFD
+test encoding-16.5 {UnicodeToUtfProc, bug [d19fe0a5b]} -constraints ucs2 -body {
+    encoding convertfrom unicode "\xD8\xD8"
+} -result \uD8D8
 
 test encoding-17.1 {UtfToUnicodeProc} -constraints fullutf -body {
     encoding convertto unicode "\U460DC"
-- 
cgit v0.12