summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2019-09-16 12:47:19 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2019-09-16 12:47:19 (GMT)
commit19eb51a4cbc12b6d10a2ff7c488ae0471bc30503 (patch)
tree306ea67ff656656129d79158d55078a174340246
parent93022718af12833e135ad743bc6169bcfd443ddf (diff)
downloadtcl-19eb51a4cbc12b6d10a2ff7c488ae0471bc30503.zip
tcl-19eb51a4cbc12b6d10a2ff7c488ae0471bc30503.tar.gz
tcl-19eb51a4cbc12b6d10a2ff7c488ae0471bc30503.tar.bz2
Fix Utf16ToUtfProc() (from TIP #548): If last code-point is higher surrogate, make sure that actual conversion is delayed until the next round, assuring proper merging of two surrogates into a single UTF-8 character.
-rw-r--r--generic/tclEncoding.c12
1 files changed, 9 insertions, 3 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 9896f85..0ec0649 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -2485,10 +2485,16 @@ Utf16ToUtfProc(
charLimit = *dstCharsPtr;
}
result = TCL_OK;
- if ((srcLen % sizeof(unsigned short)) != 0) {
+
+ /* check alignment with utf-16 (2 == sizeof(UTF-16)) */
+ if ((srcLen % 2) != 0) {
+ result = TCL_CONVERT_MULTIBYTE;
+ srcLen--;
+ }
+ /* If last code point is a high surrogate, we cannot handle that yet */
+ if ((srcLen >= 2) && ((src[srcLen - (clientData?1:2)] & 0xFC) == 0xD8)) {
result = TCL_CONVERT_MULTIBYTE;
- srcLen /= sizeof(unsigned short);
- srcLen *= sizeof(unsigned short);
+ srcLen-= 2;
}
srcStart = src;