diff options
| -rw-r--r-- | generic/tclUtf.c | 19 |
1 files changed, 13 insertions, 6 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 923b1f8..ab4e142 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -148,15 +148,22 @@ Tcl_UniCharToUtf( if ((ch & 0xF800) == 0xD800) { if (ch & 0x0400) { /* Low surrogate */ - buf[3] = (char) ((ch | 0x80) & 0xBF); - buf[2] |= (char) (((ch >> 6) | 0x80) & 0x8F); - return 4; + if (((buf[0] & 0xF8) == 0xF0) && ((buf[1] & 0xC0) == 0x80) + && ((buf[2] & 0xCF) == 0)) { + /* Previous Tcl_UniChar was a High surrogate, so combine */ + buf[3] = (char) ((ch & 0x3F) | 0x80); + buf[2] |= (char) (((ch >> 6) & 0x0F) | 0x80); + return 4; + } + /* Previous Tcl_UniChar was not a High surrogate, so just output */ } else { /* High surrogate */ ch += 0x40; - buf[2] = (char) (((ch << 4) | 0x80) & 0xB0); - buf[1] = (char) (((ch >> 2) | 0x80) & 0xBF); - buf[0] = (char) (((ch >> 8) | 0xF0) & 0xF7); + /* Fill buffer with specific 3-byte (invalid) byte combination, + so following Low surrogate can recognize it and combine */ + buf[2] = (char) ((ch << 4) & 0x30); + buf[1] = (char) (((ch >> 2) & 0x3F) | 0x80); + buf[0] = (char) (((ch >> 8) & 0x07) | 0xF0); return 0; } } |
