From d5479489aab71c267a1371d2ac1d0674a15a0c61 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 19 Mar 2021 12:58:41 +0000 Subject: Fully implement TCL_ENCODING_STOPONERROR flag for Utf2Utf encoder/decoder. --- generic/tclEncoding.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 688d46e..4eabbda 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2225,7 +2225,7 @@ UtfToUtfProc( result = TCL_CONVERT_NOSPACE; break; } - if ((UCHAR(*src - 1) < 0x7F) && !(flags & TCL_ENCODING_EXTERNAL)) { + if (UCHAR(*src) < 0x80 && !(UCHAR(*src) == 0 && !(flags & TCL_ENCODING_EXTERNAL))) { /* * Copy 7bit characters, but skip null-bytes when we are in input * mode, so that they get converted to 0xC080. @@ -2256,14 +2256,19 @@ UtfToUtfProc( src += 1; dst += Tcl_UniCharToUtf(ch, dst); } else { - src += TclUtfToUCS4(src, &ch); + size_t len = TclUtfToUCS4(src, &ch); + if ((len < 2) && (ch != 0) && (flags & TCL_ENCODING_STOPONERROR)) { + result = TCL_CONVERT_SYNTAX; + break; + } + src += len; if ((ch | 0x7FF) == 0xDFFF) { /* * A surrogate character is detected, handle especially. */ int low = ch; - size_t len = (src <= srcEnd-3) ? TclUtfToUCS4(src, &low) : 0; + len = (src <= srcEnd-3) ? TclUtfToUCS4(src, &low) : 0; if (((low & ~0x3FF) != 0xDC00) || (ch & 0x400)) { *dst++ = (char) (((ch >> 12) | 0xE0) & 0xEF); -- cgit v0.12