summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2021-03-19 12:58:41 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2021-03-19 12:58:41 (GMT)
commitd5479489aab71c267a1371d2ac1d0674a15a0c61 (patch)
tree63882be2aa68092b3c348448ab1f4bbcce1f6105
parentc39345b47aa9604e3ffb82599ee15be9cbce57f6 (diff)
downloadtcl-d5479489aab71c267a1371d2ac1d0674a15a0c61.zip
tcl-d5479489aab71c267a1371d2ac1d0674a15a0c61.tar.gz
tcl-d5479489aab71c267a1371d2ac1d0674a15a0c61.tar.bz2
Fully implement TCL_ENCODING_STOPONERROR flag for Utf2Utf encoder/decoder.
-rw-r--r--generic/tclEncoding.c11
1 files changed, 8 insertions, 3 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 688d46e..4eabbda 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -2225,7 +2225,7 @@ UtfToUtfProc(
result = TCL_CONVERT_NOSPACE;
break;
}
- if ((UCHAR(*src - 1) < 0x7F) && !(flags & TCL_ENCODING_EXTERNAL)) {
+ if (UCHAR(*src) < 0x80 && !(UCHAR(*src) == 0 && !(flags & TCL_ENCODING_EXTERNAL))) {
/*
* Copy 7bit characters, but skip null-bytes when we are in input
* mode, so that they get converted to 0xC080.
@@ -2256,14 +2256,19 @@ UtfToUtfProc(
src += 1;
dst += Tcl_UniCharToUtf(ch, dst);
} else {
- src += TclUtfToUCS4(src, &ch);
+ size_t len = TclUtfToUCS4(src, &ch);
+ if ((len < 2) && (ch != 0) && (flags & TCL_ENCODING_STOPONERROR)) {
+ result = TCL_CONVERT_SYNTAX;
+ break;
+ }
+ src += len;
if ((ch | 0x7FF) == 0xDFFF) {
/*
* A surrogate character is detected, handle especially.
*/
int low = ch;
- size_t len = (src <= srcEnd-3) ? TclUtfToUCS4(src, &low) : 0;
+ len = (src <= srcEnd-3) ? TclUtfToUCS4(src, &low) : 0;
if (((low & ~0x3FF) != 0xDC00) || (ch & 0x400)) {
*dst++ = (char) (((ch >> 12) | 0xE0) & 0xEF);