From 50a13754610c04730f483b8100a8b9c0fccb1e3e Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Wed, 3 Oct 2018 19:24:13 +0000
Subject: Tcl_UniCharToUtfDString: Don't allocate too much memory for this
 function. Tcl_UtfToUniCharDString: Don't allocate too much memory for this
 function. And make sure that we never access more than 'length' bytes from
 the string, not even when encountering invalid UTF-8.

---
 generic/tclUtf.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index c2963bf..b33bf5f 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -243,7 +243,7 @@ Tcl_UniCharToUtfDString(
      */
 
     oldLength = Tcl_DStringLength(dsPtr);
-    Tcl_DStringSetLength(dsPtr, (oldLength + uniLength + 1) * TCL_UTF_MAX);
+    Tcl_DStringSetLength(dsPtr, oldLength + (uniLength + 1) * TCL_UTF_MAX);
     string = Tcl_DStringValue(dsPtr) + oldLength;
 
     p = string;
@@ -432,17 +432,27 @@ Tcl_UtfToUniCharDString(
      */
 
     oldLength = Tcl_DStringLength(dsPtr);
-/* TODO: fix overreach! */
+
     Tcl_DStringSetLength(dsPtr,
-	    (int) ((oldLength + length + 1) * sizeof(Tcl_UniChar)));
+	    oldLength + (int) ((length + 1) * sizeof(Tcl_UniChar)));
     wString = (Tcl_UniChar *) (Tcl_DStringValue(dsPtr) + oldLength);
 
     w = wString;
-    end = src + length;
-    for (p = src; p < end; ) {
+    p = src;
+    end = src + length - TCL_UTF_MAX;
+    while (p < end) {
 	p += TclUtfToUniChar(p, &ch);
 	*w++ = ch;
     }
+    end += TCL_UTF_MAX;
+    while (p < end) {
+	if (Tcl_UtfCharComplete(p, end-p)) {
+	    p += TclUtfToUniChar(p, &ch);
+	} else {
+	    ch = UCHAR(*p++);
+	}
+	*w++ = ch;
+    }
     *w = '\0';
     Tcl_DStringSetLength(dsPtr,
 	    (oldLength + ((char *) w - (char *) wString)));
-- 
cgit v0.12