From e9ccf557eb23f66c28210e967e344a61fef2ed58 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 17 Mar 2023 16:12:23 +0000 Subject: Fix [6390566ecd]: Testcase cmdAH-4.3.13.00DC0000.tail.utf-32.tcl8.a fails sometimes --- generic/tclEncoding.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index f15b479..5a89644 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2635,9 +2635,12 @@ Utf32ToUtfProc( * unsigned short-size data. */ - if ((ch > 0) && (ch < 0x80)) { + if ((unsigned)ch - 1 < 0x7F) { *dst++ = (ch & 0xFF); } else { + if (((prev & ~0x3FF) != 0xD800) && ((ch & ~0x3FF) == 0xDC00)) { + *dst = 0; /* In case of lower surrogate, don't try to combine */ + } dst += Tcl_UniCharToUtf(ch, dst); } src += sizeof(unsigned int); @@ -2856,7 +2859,7 @@ Utf16ToUtfProc( } if (((prev & ~0x3FF) == 0xD800) && ((ch & ~0x3FF) != 0xDC00)) { if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)) { - result = TCL_CONVERT_UNKNOWN; + result = TCL_CONVERT_SYNTAX; src -= 2; /* Go back to beginning of high surrogate */ dst--; /* Also undo writing a single byte too much */ numChars--; @@ -2877,7 +2880,7 @@ Utf16ToUtfProc( dst += Tcl_UniCharToUtf(ch, dst); } else if (((ch & ~0x3FF) == 0xDC00) && ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)) { /* Lo surrogate not preceded by Hi surrogate */ - result = TCL_CONVERT_UNKNOWN; + result = TCL_CONVERT_SYNTAX; break; } else { *dst = 0; /* In case of lower surrogate, don't try to combine */ @@ -2888,7 +2891,7 @@ Utf16ToUtfProc( if ((ch & ~0x3FF) == 0xD800) { if ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) { - result = TCL_CONVERT_UNKNOWN; + result = TCL_CONVERT_SYNTAX; src -= 2; dst--; numChars--; -- cgit v0.12