diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2019-09-16 12:47:19 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2019-09-16 12:47:19 (GMT) |
commit | 19eb51a4cbc12b6d10a2ff7c488ae0471bc30503 (patch) | |
tree | 306ea67ff656656129d79158d55078a174340246 | |
parent | 93022718af12833e135ad743bc6169bcfd443ddf (diff) | |
download | tcl-19eb51a4cbc12b6d10a2ff7c488ae0471bc30503.zip tcl-19eb51a4cbc12b6d10a2ff7c488ae0471bc30503.tar.gz tcl-19eb51a4cbc12b6d10a2ff7c488ae0471bc30503.tar.bz2 |
Fix Utf16ToUtfProc() (from TIP #548): If last code-point is higher surrogate, make sure that actual conversion is delayed until the next round, assuring proper merging of two surrogates into a single UTF-8 character.
-rw-r--r-- | generic/tclEncoding.c | 12 |
1 files changed, 9 insertions, 3 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 9896f85..0ec0649 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2485,10 +2485,16 @@ Utf16ToUtfProc( charLimit = *dstCharsPtr; } result = TCL_OK; - if ((srcLen % sizeof(unsigned short)) != 0) { + + /* check alignment with utf-16 (2 == sizeof(UTF-16)) */ + if ((srcLen % 2) != 0) { + result = TCL_CONVERT_MULTIBYTE; + srcLen--; + } + /* If last code point is a high surrogate, we cannot handle that yet */ + if ((srcLen >= 2) && ((src[srcLen - (clientData?1:2)] & 0xFC) == 0xD8)) { result = TCL_CONVERT_MULTIBYTE; - srcLen /= sizeof(unsigned short); - srcLen *= sizeof(unsigned short); + srcLen-= 2; } srcStart = src; |