From d8ec6222e8374b744712d5901e829fd92ee43cf0 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 20 Mar 2019 22:45:51 +0000 Subject: Fix Tcl_UtfToUniCharDString() function, handling invalid byte at the end of the string: Not quite correct for bytes between 0x80-0x9F, according to TIP --- generic/tclUtf.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index f3561f9..a330d11 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -453,7 +453,7 @@ Tcl_UtfToUniChar( } /* - * A four-byte-character lead-byte not followed by two trail-bytes + * A four-byte-character lead-byte not followed by three trail-bytes * represents itself. */ } @@ -619,10 +619,10 @@ Tcl_UtfToUniCharDString( } end += 4; while (p < end) { - if (Tcl_UtfCharComplete(p, end-p)) { - p += TclUtfToUniChar(p, &ch); - } else if (((UCHAR(*p)-0x80)) < 0x20) { + if (((unsigned)(UCHAR(*p)-0x80)) < 0x20) { ch = cp1252[UCHAR(*p++)-0x80]; + } else if (Tcl_UtfCharComplete(p, end-p)) { + p += TclUtfToUniChar(p, &ch); } else { ch = UCHAR(*p++); } @@ -673,10 +673,10 @@ TclUtfToWCharDString( } end += 4; while (p < end) { - if (Tcl_UtfCharComplete(p, end-p)) { - p += TclUtfToWChar(p, &ch); - } else if (((UCHAR(*p)-0x80)) < 0x20) { + if (((unsigned)(UCHAR(*p)-0x80)) < 0x20) { ch = cp1252[UCHAR(*p++)-0x80]; + } else if (Tcl_UtfCharComplete(p, end-p)) { + p += TclUtfToWChar(p, &ch); } else { ch = UCHAR(*p++); } -- cgit v0.12