summaryrefslogtreecommitdiffstats
path: root/generic
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2023-02-22 17:34:22 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2023-02-22 17:34:22 (GMT)
commitc3e063b0aa8200f75aef7612fe3d63d09796ad67 (patch)
tree5ce99c50beac47b134c5a9142a9463a796974be7 /generic
parent5219c94af582e25644cda9972735c8c913efc24e (diff)
parentc606ae1574a7d66bcbf8666506e91840875f6d45 (diff)
downloadtcl-c3e063b0aa8200f75aef7612fe3d63d09796ad67.zip
tcl-c3e063b0aa8200f75aef7612fe3d63d09796ad67.tar.gz
tcl-c3e063b0aa8200f75aef7612fe3d63d09796ad67.tar.bz2
Fix [d19fe0a5b]: Handling incomplete byte sequences for utf-16/utf-32
Diffstat (limited to 'generic')
-rw-r--r--generic/tclEncoding.c27
1 files changed, 24 insertions, 3 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index dfa7907..ecec6e9 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -2457,21 +2457,27 @@ UnicodeToUtfProc(
}
result = TCL_OK;
- /* check alignment with utf-16 (2 == sizeof(UTF-16)) */
+ /*
+ * Check alignment with utf-16 (2 == sizeof(UTF-16))
+ */
+
if ((srcLen % 2) != 0) {
result = TCL_CONVERT_MULTIBYTE;
srcLen--;
}
+#if TCL_UTF_MAX > 3
/*
- * If last code point is a high surrogate, we cannot handle that yet.
+ * If last code point is a high surrogate, we cannot handle that yet,
+ * unless we are at the end.
*/
- if ((srcLen >= 2) &&
+ if (!(flags & TCL_ENCODING_END) && (srcLen >= 2) &&
((src[srcLen - (clientData?1:2)] & 0xFC) == 0xD8)) {
result = TCL_CONVERT_MULTIBYTE;
srcLen-= 2;
}
+#endif
srcStart = src;
srcEnd = src + srcLen;
@@ -2504,6 +2510,21 @@ UnicodeToUtfProc(
src += sizeof(unsigned short);
}
+ if ((flags & TCL_ENCODING_END) && (result == TCL_CONVERT_MULTIBYTE)) {
+ /* We have a single byte left-over at the end */
+ if (dst > dstEnd) {
+ result = TCL_CONVERT_NOSPACE;
+ } else {
+ /* destination is not full, so we really are at the end now */
+ if (flags & TCL_ENCODING_STOPONERROR) {
+ result = TCL_CONVERT_SYNTAX;
+ } else {
+ dst += Tcl_UniCharToUtf(0xFFFD, dst);
+ numChars++;
+ src++;
+ }
+ }
+ }
*srcReadPtr = src - srcStart;
*dstWrotePtr = dst - dstStart;
*dstCharsPtr = numChars;