From 2ff0d1c5c1dfa32d96b3d627878eedb04c72b18f Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Thu, 9 Mar 2023 16:20:30 +0000
Subject: Bug-fix for Utf32ToUtfProc, in case TCL_UTF_MAX=3

---
 generic/tclEncoding.c       | 55 ++++++++++++++++++++++++++++++++++++---------
 library/tcltest/tcltest.tcl | 16 ++++++-------
 tests/io.test               |  3 ++-
 3 files changed, 54 insertions(+), 20 deletions(-)

diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 267a667..dacc263 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -2712,7 +2712,7 @@ Utf32ToUtfProc(
     const char *srcStart, *srcEnd;
     const char *dstEnd, *dstStart;
     int result, numChars, charLimit = INT_MAX;
-    int ch, bytesLeft = srcLen % 4;
+    int ch = 0, bytesLeft = srcLen % 4;
 
     flags |= PTR2INT(clientData);
     if (flags & TCL_ENCODING_CHAR_LIMIT) {
@@ -2729,6 +2729,21 @@ Utf32ToUtfProc(
 	srcLen -= bytesLeft;
     }
 
+#if TCL_UTF_MAX < 4
+    /*
+     * If last code point is a high surrogate, we cannot handle that yet,
+     * unless we are at the end.
+     */
+
+    if (!(flags & TCL_ENCODING_END) && (srcLen >= 4) &&
+	    ((src[srcLen - ((flags & TCL_ENCODING_LE)?3:2)] & 0xFC) == 0xD8) &&
+	    ((src[srcLen - ((flags & TCL_ENCODING_LE)?2:3)]) == 0) &&
+	    ((src[srcLen - ((flags & TCL_ENCODING_LE)?1:4)]) == 0)) {
+	result = TCL_CONVERT_MULTIBYTE;
+	srcLen-= 4;
+    }
+#endif
+
     srcStart = src;
     srcEnd = src + srcLen;
 
@@ -2741,15 +2756,27 @@ Utf32ToUtfProc(
 	    break;
 	}
 
+#if TCL_UTF_MAX < 4
+	int prev = ch;
+#endif
 	if (flags & TCL_ENCODING_LE) {
 	    ch = (src[3] & 0xFF) << 24 | (src[2] & 0xFF) << 16 | (src[1] & 0xFF) << 8 | (src[0] & 0xFF);
 	} else {
 	    ch = (src[0] & 0xFF) << 24 | (src[1] & 0xFF) << 16 | (src[2] & 0xFF) << 8 | (src[3] & 0xFF);
 	}
+#if TCL_UTF_MAX < 4
+	if (((prev  & ~0x3FF) == 0xD800) && ((ch  & ~0x3FF) != 0xDC00)) {
+	    /* Bug [10c2c17c32]. If Hi surrogate not followed by Lo surrogate, finish 3-byte UTF-8 */
+	    dst += Tcl_UniCharToUtf(-1, dst);
+	}
+#endif
 
 	if ((unsigned)ch > 0x10FFFF || SURROGATE(ch)) {
 	    if (PROFILE_STRICT(flags)) {
 		result = TCL_CONVERT_SYNTAX;
+#if TCL_UTF_MAX < 4
+		ch = 0;
+#endif
 		break;
 	    }
 	    if (PROFILE_REPLACE(flags)) {
@@ -2770,6 +2797,12 @@ Utf32ToUtfProc(
 	src += 4;
     }
 
+#if TCL_UTF_MAX < 4
+    if ((ch  & ~0x3FF) == 0xD800) {
+	/* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */
+	dst += Tcl_UniCharToUtf(-1, dst);
+    }
+#endif
     
 
     /*
@@ -2780,16 +2813,16 @@ Utf32ToUtfProc(
 	if (dst > dstEnd) {
 	    result = TCL_CONVERT_NOSPACE;
 	} else {
-            if (PROFILE_STRICT(flags)) {
-                result = TCL_CONVERT_SYNTAX;
-            } else {
-                /* PROFILE_REPLACE or PROFILE_TCL8 */
-                result = TCL_OK;
-                dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst);
-                numChars++;
-                src += bytesLeft; /* Go past truncated code unit */
-            }
-        }
+	    if (PROFILE_STRICT(flags)) {
+		result = TCL_CONVERT_SYNTAX;
+	    } else {
+		/* PROFILE_REPLACE or PROFILE_TCL8 */
+		result = TCL_OK;
+		dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst);
+		numChars++;
+		src += bytesLeft; /* Go past truncated code unit */
+	    }
+	}
     }
 
     *srcReadPtr = src - srcStart;
diff --git a/library/tcltest/tcltest.tcl b/library/tcltest/tcltest.tcl
index 7344f9f..1ba5d9f 100644
--- a/library/tcltest/tcltest.tcl
+++ b/library/tcltest/tcltest.tcl
@@ -400,7 +400,7 @@ namespace eval tcltest {
 	    default {
 		set outputChannel [open $filename a]
 		if {[package vsatisfies [package provide Tcl] 8.7-]} {
-		    fconfigure $outputChannel -encoding utf-8
+		    fconfigure $outputChannel -profile tcl8 -encoding utf-8
 		}
 		set ChannelsWeOpened($outputChannel) 1
 
@@ -447,7 +447,7 @@ namespace eval tcltest {
 	    default {
 		set errorChannel [open $filename a]
 		if {[package vsatisfies [package provide Tcl] 8.7-]} {
-		    fconfigure $errorChannel -encoding utf-8
+		    fconfigure $errorChannel -profile tcl8 -encoding utf-8
 		}
 		set ChannelsWeOpened($errorChannel) 1
 
@@ -792,7 +792,7 @@ namespace eval tcltest {
 	if {$Option(-loadfile) eq {}} {return}
 	set tmp [open $Option(-loadfile) r]
 	if {[package vsatisfies [package provide Tcl] 8.7-]} {
-	    fconfigure $tmp -encoding utf-8
+	    fconfigure $tmp -profile tcl8 -encoding utf-8
 	}
 	loadScript [read $tmp]
 	close $tmp
@@ -1340,7 +1340,7 @@ proc tcltest::DefineConstraintInitializers {} {
 	set code 0
 	if {![catch {set f [open "|[list [interpreter]]" w]}]} {
 	    if {[package vsatisfies [package provide Tcl] 8.7-]} {
-		fconfigure $f -encoding utf-8
+		fconfigure $f -profile tcl8 -encoding utf-8
 	    }
 	    if {![catch {puts $f exit}]} {
 		if {![catch {close $f}]} {
@@ -2190,7 +2190,7 @@ proc tcltest::test {name description args} {
 	    if {[file readable $testFile]} {
 		set testFd [open $testFile r]
 		if {[package vsatisfies [package provide Tcl] 8.7-]} {
-		    fconfigure $testFd -encoding utf-8
+		    fconfigure $testFd -profile tcl8 -encoding utf-8
 		}
 		set testLine [expr {[lsearch -regexp \
 			[split [read $testFd] "\n"] \
@@ -2901,7 +2901,7 @@ proc tcltest::runAllTests { {shell ""} } {
 		incr numTestFiles
 		set pipeFd [open $cmd "r"]
 		if {[package vsatisfies [package provide Tcl] 8.7-]} {
-		    fconfigure $pipeFd -encoding utf-8
+		    fconfigure $pipeFd -profile tcl8 -encoding utf-8
 		}
 		while {[gets $pipeFd line] >= 0} {
 		    if {[regexp [join {
@@ -3101,7 +3101,7 @@ proc tcltest::makeFile {contents name {directory ""}} {
     set fd [open $fullName w]
     fconfigure $fd -translation lf
     if {[package vsatisfies [package provide Tcl] 8.7-]} {
-	fconfigure $fd -encoding utf-8
+	fconfigure $fd -profile tcl8 -encoding utf-8
     }
     if {[string index $contents end] eq "\n"} {
 	puts -nonewline $fd $contents
@@ -3252,7 +3252,7 @@ proc tcltest::viewFile {name {directory ""}} {
     set fullName [file join $directory $name]
     set f [open $fullName]
     if {[package vsatisfies [package provide Tcl] 8.7-]} {
-	fconfigure $f -encoding utf-8
+	fconfigure $f -profile tcl8 -encoding utf-8
     }
     set data [read -nonewline $f]
     close $f
diff --git a/tests/io.test b/tests/io.test
index 8dde2b2..a8ec7e5 100644
--- a/tests/io.test
+++ b/tests/io.test
@@ -9292,10 +9292,10 @@ test io-75.12 {invalid utf-8 encoding read is ignored} -setup {
     fconfigure $f -encoding utf-8 -profile tcl8 -buffering none -eofchar "" -translation lf
 } -body {
     set d [read $f]
-    close $f
     binary scan $d H* hd
     set hd
 } -cleanup {
+    close $f
     removeFile io-75.12
 } -result 4181
 test io-75.13 {invalid utf-8 encoding read is not ignored (-profile strict)} -setup {
@@ -9310,6 +9310,7 @@ test io-75.13 {invalid utf-8 encoding read is not ignored (-profile strict)} -se
 } -body {
     read $f
 } -cleanup {
+    close $f
     removeFile io-75.13
 } -match glob -returnCodes 1 -result {error reading "*": illegal byte sequence}
 
-- 
cgit v0.12