diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2021-03-19 12:58:41 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2021-03-19 12:58:41 (GMT) |
commit | d5479489aab71c267a1371d2ac1d0674a15a0c61 (patch) | |
tree | 63882be2aa68092b3c348448ab1f4bbcce1f6105 | |
parent | c39345b47aa9604e3ffb82599ee15be9cbce57f6 (diff) | |
download | tcl-d5479489aab71c267a1371d2ac1d0674a15a0c61.zip tcl-d5479489aab71c267a1371d2ac1d0674a15a0c61.tar.gz tcl-d5479489aab71c267a1371d2ac1d0674a15a0c61.tar.bz2 |
Fully implement TCL_ENCODING_STOPONERROR flag for Utf2Utf encoder/decoder.
-rw-r--r-- | generic/tclEncoding.c | 11 |
1 files changed, 8 insertions, 3 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 688d46e..4eabbda 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2225,7 +2225,7 @@ UtfToUtfProc( result = TCL_CONVERT_NOSPACE; break; } - if ((UCHAR(*src - 1) < 0x7F) && !(flags & TCL_ENCODING_EXTERNAL)) { + if (UCHAR(*src) < 0x80 && !(UCHAR(*src) == 0 && !(flags & TCL_ENCODING_EXTERNAL))) { /* * Copy 7bit characters, but skip null-bytes when we are in input * mode, so that they get converted to 0xC080. @@ -2256,14 +2256,19 @@ UtfToUtfProc( src += 1; dst += Tcl_UniCharToUtf(ch, dst); } else { - src += TclUtfToUCS4(src, &ch); + size_t len = TclUtfToUCS4(src, &ch); + if ((len < 2) && (ch != 0) && (flags & TCL_ENCODING_STOPONERROR)) { + result = TCL_CONVERT_SYNTAX; + break; + } + src += len; if ((ch | 0x7FF) == 0xDFFF) { /* * A surrogate character is detected, handle especially. */ int low = ch; - size_t len = (src <= srcEnd-3) ? TclUtfToUCS4(src, &low) : 0; + len = (src <= srcEnd-3) ? TclUtfToUCS4(src, &low) : 0; if (((low & ~0x3FF) != 0xDC00) || (ch & 0x400)) { *dst++ = (char) (((ch >> 12) | 0xE0) & 0xEF); |