summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2019-10-15 09:19:35 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2019-10-15 09:19:35 (GMT)
commitbe42ed39b45938000bdf6dc5b4f2c7cab5624927 (patch)
tree4a5eb21c8bb2a8402dd4bdc554e17b28a907e53a
parent74b5e64e6a54ac5f6c52b2c374438d0c30365dff (diff)
downloadtk-be42ed39b45938000bdf6dc5b4f2c7cab5624927.zip
tk-be42ed39b45938000bdf6dc5b4f2c7cab5624927.tar.gz
tk-be42ed39b45938000bdf6dc5b4f2c7cab5624927.tar.bz2
One step more in fully fixing [a179564826]: Tk 8.6: prevent issues when encountering non-BMP Unicode characters.
Now that Tcl 8.6 doesn't handle 4-byte UTF-8 characters as invalid anymore, we don't need the trick in Tk any more to spit out two surrogates: Tcl already handles that correctly.
-rw-r--r--generic/tkUtil.c14
1 files changed, 8 insertions, 6 deletions
diff --git a/generic/tkUtil.c b/generic/tkUtil.c
index 1942975..8e3e2ee 100644
--- a/generic/tkUtil.c
+++ b/generic/tkUtil.c
@@ -1244,8 +1244,8 @@ TkUtfToUniChar(
*
* TkUniCharToUtf --
*
- * Almost the same as Tcl_UniCharToUtf but producing surrogates if
- * TCL_UTF_MAX==3. So, up to 6 bytes might be produced.
+ * Almost the same as Tcl_UniCharToUtf but producing 4-byte UTF-8
+ * sequences even when TCL_UTF_MAX==3. So, up to 4 bytes might be produced.
*
* Results:
* *buf is filled with the UTF-8 string, and the return value is the
@@ -1262,10 +1262,12 @@ int TkUniCharToUtf(int ch, char *buf)
int size = Tcl_UniCharToUtf(ch, buf);
if ((((unsigned)(ch - 0x10000) <= 0xFFFFF)) && (size < 4)) {
/* Hey, this is wrong, we must be running TCL_UTF_MAX==3
- * The best thing we can do is spit out 2 surrogates */
- ch -= 0x10000;
- size = Tcl_UniCharToUtf(((ch >> 10) | 0xd800), buf);
- size += Tcl_UniCharToUtf(((ch & 0x3ff) | 0xdc00), buf+size);
+ * The best thing we can do is spit out a 4-byte UTF-8 character */
+ buf[3] = (char) ((ch | 0x80) & 0xBF);
+ buf[2] = (char) (((ch >> 6) | 0x80) & 0xBF);
+ buf[1] = (char) (((ch >> 12) | 0x80) & 0xBF);
+ buf[0] = (char) ((ch >> 18) | 0xF0);
+ size = 4;
}
return size;
}