From 400a5524e5f12e96c47dc1613835765f4a9f0271 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 1 Aug 2019 08:02:38 +0000 Subject: Attempt to fix [https://core.tcl-lang.org/tk/tktview?name=a179564826|a179564826]: Tk 8.6: prevent issues when encountering non-BMP Unicode characters --- generic/tclUtf.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 4b70f96..0a275d7 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -71,7 +71,7 @@ static const unsigned char totalBytes[256] = { #if TCL_UTF_MAX > 3 4,4,4,4,4, #else - 1,1,1,1,1, + 3,3,3,3,3, /* Tcl_UtfCharComplete() only checks TCL_UTF_MAX bytes */ #endif 1,1,1,1,1,1,1,1,1,1,1 }; @@ -314,7 +314,7 @@ Tcl_UtfToUniChar( * characters representing themselves. */ -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX <= 4 /* If *chPtr contains a high surrogate (produced by a previous * Tcl_UtfToUniChar() call) and the next 3 bytes are UTF-8 continuation * bytes, then we must produce a follow-up low surrogate. We only @@ -364,13 +364,12 @@ Tcl_UtfToUniChar( * represents itself. */ } -#if TCL_UTF_MAX > 3 else if (byte < 0xF8) { if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80) && ((src[3] & 0xC0) == 0x80)) { /* * Four-byte-character lead byte followed by three trail bytes. */ -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX <= 4 Tcl_UniChar high = (((byte & 0x07) << 8) | ((src[1] & 0x3F) << 2) | ((src[2] & 0x3F) >> 4)) - 0x40; if (high >= 0x400) { @@ -394,7 +393,6 @@ Tcl_UtfToUniChar( * represents itself. */ } -#endif *chPtr = byte; return 1; -- cgit v0.12