diff options
Diffstat (limited to 'generic')
-rw-r--r-- | generic/tclUtf.c | 21 |
1 files changed, 14 insertions, 7 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 6255a4e..0d88d36 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -154,19 +154,26 @@ Tcl_UniCharToUtf( return 2; } if (ch <= 0xFFFF) { -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX > 3 if ((ch & 0xF800) == 0xD800) { if (ch & 0x0400) { /* Low surrogate */ - buf[3] = (char) ((ch | 0x80) & 0xBF); - buf[2] |= (char) (((ch >> 6) | 0x80) & 0x8F); - return 4; + if (((buf[0] & 0xF8) == 0xF0) && ((buf[1] & 0xC0) == 0x80) + && ((buf[2] & 0xCF) == 0)) { + /* Previous Tcl_UniChar was a High surrogate, so combine */ + buf[3] = (char) ((ch & 0x3F) | 0x80); + buf[2] |= (char) (((ch >> 6) & 0x0F) | 0x80); + return 4; + } + /* Previous Tcl_UniChar was not a High surrogate, so just output */ } else { /* High surrogate */ ch += 0x40; - buf[2] = (char) (((ch << 4) | 0x80) & 0xB0); - buf[1] = (char) (((ch >> 2) | 0x80) & 0xBF); - buf[0] = (char) (((ch >> 8) | 0xF0) & 0xF7); + /* Fill buffer with specific 3-byte (invalid) byte combination, + so following Low surrogate can recognize it and combine */ + buf[2] = (char) ((ch << 4) & 0x30); + buf[1] = (char) (((ch >> 2) & 0x3F) | 0x80); + buf[0] = (char) (((ch >> 8) & 0x07) | 0xF0); return 0; } } |