diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2018-04-19 22:29:00 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2018-04-19 22:29:00 (GMT) |
commit | 1734eed89f76598661a4ce4c7d5e43ce7fe4368c (patch) | |
tree | f019c100046dd4fd62c771847d696e8f3aa12f7c /generic | |
parent | 75bd116527ce94efc1c14c6dc82c526614ed6c7f (diff) | |
download | tcl-1734eed89f76598661a4ce4c7d5e43ce7fe4368c.zip tcl-1734eed89f76598661a4ce4c7d5e43ce7fe4368c.tar.gz tcl-1734eed89f76598661a4ce4c7d5e43ce7fe4368c.tar.bz2 |
Slightly improved (more fail-safe) surrogate handling for TCL_UTF_MAX>3. Backported from latest TIP 389 implementation. (to be used for androwish)
Diffstat (limited to 'generic')
-rw-r--r-- | generic/tclUtf.c | 21 |
1 files changed, 14 insertions, 7 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 6255a4e..0d88d36 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -154,19 +154,26 @@ Tcl_UniCharToUtf( return 2; } if (ch <= 0xFFFF) { -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX > 3 if ((ch & 0xF800) == 0xD800) { if (ch & 0x0400) { /* Low surrogate */ - buf[3] = (char) ((ch | 0x80) & 0xBF); - buf[2] |= (char) (((ch >> 6) | 0x80) & 0x8F); - return 4; + if (((buf[0] & 0xF8) == 0xF0) && ((buf[1] & 0xC0) == 0x80) + && ((buf[2] & 0xCF) == 0)) { + /* Previous Tcl_UniChar was a High surrogate, so combine */ + buf[3] = (char) ((ch & 0x3F) | 0x80); + buf[2] |= (char) (((ch >> 6) & 0x0F) | 0x80); + return 4; + } + /* Previous Tcl_UniChar was not a High surrogate, so just output */ } else { /* High surrogate */ ch += 0x40; - buf[2] = (char) (((ch << 4) | 0x80) & 0xB0); - buf[1] = (char) (((ch >> 2) | 0x80) & 0xBF); - buf[0] = (char) (((ch >> 8) | 0xF0) & 0xF7); + /* Fill buffer with specific 3-byte (invalid) byte combination, + so following Low surrogate can recognize it and combine */ + buf[2] = (char) ((ch << 4) & 0x30); + buf[1] = (char) (((ch >> 2) & 0x3F) | 0x80); + buf[0] = (char) (((ch >> 8) & 0x07) | 0xF0); return 0; } } |