From f5f5ff4257a24b2e8a8d96c820f6874c86e81304 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 6 Feb 2023 22:43:30 +0000 Subject: Proposed fix for [10c2c17c32]: UTF-LE32 encoder mapping of surrogates. TODO: testcase --- generic/tclEncoding.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 288b07c..d19e237 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2582,6 +2582,10 @@ Utf32ToUtfProc( *dst++ = (ch & 0xFF); } else { dst += Tcl_UniCharToUtf(ch, dst); + if ((ch & ~0x3FF) == 0xD800) { + /* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */ + dst += Tcl_UniCharToUtf(-1, dst); + } } src += sizeof(unsigned int); } -- cgit v0.12