diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2023-02-22 17:34:22 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2023-02-22 17:34:22 (GMT) |
commit | c3e063b0aa8200f75aef7612fe3d63d09796ad67 (patch) | |
tree | 5ce99c50beac47b134c5a9142a9463a796974be7 /generic | |
parent | 5219c94af582e25644cda9972735c8c913efc24e (diff) | |
parent | c606ae1574a7d66bcbf8666506e91840875f6d45 (diff) | |
download | tcl-c3e063b0aa8200f75aef7612fe3d63d09796ad67.zip tcl-c3e063b0aa8200f75aef7612fe3d63d09796ad67.tar.gz tcl-c3e063b0aa8200f75aef7612fe3d63d09796ad67.tar.bz2 |
Fix [d19fe0a5b]: Handling incomplete byte sequences for utf-16/utf-32
Diffstat (limited to 'generic')
-rw-r--r-- | generic/tclEncoding.c | 27 |
1 files changed, 24 insertions, 3 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index dfa7907..ecec6e9 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2457,21 +2457,27 @@ UnicodeToUtfProc( } result = TCL_OK; - /* check alignment with utf-16 (2 == sizeof(UTF-16)) */ + /* + * Check alignment with utf-16 (2 == sizeof(UTF-16)) + */ + if ((srcLen % 2) != 0) { result = TCL_CONVERT_MULTIBYTE; srcLen--; } +#if TCL_UTF_MAX > 3 /* - * If last code point is a high surrogate, we cannot handle that yet. + * If last code point is a high surrogate, we cannot handle that yet, + * unless we are at the end. */ - if ((srcLen >= 2) && + if (!(flags & TCL_ENCODING_END) && (srcLen >= 2) && ((src[srcLen - (clientData?1:2)] & 0xFC) == 0xD8)) { result = TCL_CONVERT_MULTIBYTE; srcLen-= 2; } +#endif srcStart = src; srcEnd = src + srcLen; @@ -2504,6 +2510,21 @@ UnicodeToUtfProc( src += sizeof(unsigned short); } + if ((flags & TCL_ENCODING_END) && (result == TCL_CONVERT_MULTIBYTE)) { + /* We have a single byte left-over at the end */ + if (dst > dstEnd) { + result = TCL_CONVERT_NOSPACE; + } else { + /* destination is not full, so we really are at the end now */ + if (flags & TCL_ENCODING_STOPONERROR) { + result = TCL_CONVERT_SYNTAX; + } else { + dst += Tcl_UniCharToUtf(0xFFFD, dst); + numChars++; + src++; + } + } + } *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; |