summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2019-11-26 20:33:36 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2019-11-26 20:33:36 (GMT)
commit504bc5bedfd67043ba779b992162efdd5adf0302 (patch)
treecd8cc2502faf80b61cce5eb2234bc3bf51a7e6bf
parentb5a443a3cf449d0a29cff305026358daebfb0897 (diff)
parent176b57b663e02f53cd58f9476bc885b0097fc9a9 (diff)
downloadtk-504bc5bedfd67043ba779b992162efdd5adf0302.zip
tk-504bc5bedfd67043ba779b992162efdd5adf0302.tar.gz
tk-504bc5bedfd67043ba779b992162efdd5adf0302.tar.bz2
Various cleanups in Unicode handling. Note that without Xft on X11 we don't have Emoji.
-rw-r--r--generic/tkEntry.c2
-rw-r--r--generic/tkUtil.c38
-rw-r--r--generic/ttk/ttkEntry.c2
-rw-r--r--library/demos/unicodeout.tcl28
-rw-r--r--unix/tkUnixFont.c46
-rw-r--r--win/tkWinFont.c4
-rw-r--r--win/tkWinKey.c2
-rw-r--r--win/tkWinX.c3
8 files changed, 61 insertions, 64 deletions
diff --git a/generic/tkEntry.c b/generic/tkEntry.c
index 0dfacd7..161e581 100644
--- a/generic/tkEntry.c
+++ b/generic/tkEntry.c
@@ -1923,7 +1923,7 @@ EntryComputeGeometry(
if (entryPtr->showChar != NULL) {
int ch;
- char buf[6];
+ char buf[4];
int size;
/*
diff --git a/generic/tkUtil.c b/generic/tkUtil.c
index 8e3e2ee..33faab8 100644
--- a/generic/tkUtil.c
+++ b/generic/tkUtil.c
@@ -1216,26 +1216,23 @@ TkSendVirtualEvent(
int
TkUtfToUniChar(
const char *src, /* The UTF-8 string. */
- int *chPtr) /* Filled with the Tcl_UniChar represented by
+ int *chPtr) /* Filled with the Unicode value represented by
* the UTF-8 string. */
{
Tcl_UniChar uniChar = 0;
int len = Tcl_UtfToUniChar(src, &uniChar);
- if ((uniChar & 0xfc00) == 0xd800) {
- Tcl_UniChar high = uniChar;
+ if ((uniChar & 0xFC00) == 0xD800) {
+ Tcl_UniChar low = uniChar;
/* This can only happen if Tcl is compiled with TCL_UTF_MAX=4,
* or when a high surrogate character is detected in UTF-8 form */
- int len2 = Tcl_UtfToUniChar(src+len, &uniChar);
- if ((uniChar & 0xfc00) == 0xdc00) {
- *chPtr = (((high & 0x3ff) << 10) | (uniChar & 0x3ff)) + 0x10000;
- len += len2;
- } else {
- *chPtr = high;
+ int len2 = Tcl_UtfToUniChar(src+len, &low);
+ if ((uniChar & 0xFC00) == 0xDC00) {
+ *chPtr = (((uniChar & 0x3FF) << 10) | (low & 0x3FF)) + 0x10000;
+ return len + len2;
}
- } else {
- *chPtr = uniChar;
}
+ *chPtr = uniChar;
return len;
}
@@ -1259,17 +1256,16 @@ TkUtfToUniChar(
int TkUniCharToUtf(int ch, char *buf)
{
- int size = Tcl_UniCharToUtf(ch, buf);
- if ((((unsigned)(ch - 0x10000) <= 0xFFFFF)) && (size < 4)) {
- /* Hey, this is wrong, we must be running TCL_UTF_MAX==3
- * The best thing we can do is spit out a 4-byte UTF-8 character */
- buf[3] = (char) ((ch | 0x80) & 0xBF);
- buf[2] = (char) (((ch >> 6) | 0x80) & 0xBF);
- buf[1] = (char) (((ch >> 12) | 0x80) & 0xBF);
- buf[0] = (char) ((ch >> 18) | 0xF0);
- size = 4;
+ if (((unsigned)(ch - 0x10000) <= 0xFFFFF)) {
+ /* Spit out a 4-byte UTF-8 character */
+ *buf++ = (char) ((ch >> 18) | 0xF0);
+ *buf++ = (char) (((ch >> 12) | 0x80) & 0xBF);
+ *buf++ = (char) (((ch >> 6) | 0x80) & 0xBF);
+ *buf = (char) ((ch | 0x80) & 0xBF);
+ return 4;
+ } else {
+ return Tcl_UniCharToUtf(ch, buf);
}
- return size;
}
diff --git a/generic/ttk/ttkEntry.c b/generic/ttk/ttkEntry.c
index 1579a32..4862e99 100644
--- a/generic/ttk/ttkEntry.c
+++ b/generic/ttk/ttkEntry.c
@@ -279,7 +279,7 @@ static char *EntryDisplayString(const char *showChar, int numChars)
char *displayString, *p;
int size;
int ch;
- char buf[6];
+ char buf[4];
TkUtfToUniChar(showChar, &ch);
size = TkUniCharToUtf(ch, buf);
diff --git a/library/demos/unicodeout.tcl b/library/demos/unicodeout.tcl
index b3c5fd0..bb4d8f8 100644
--- a/library/demos/unicodeout.tcl
+++ b/library/demos/unicodeout.tcl
@@ -109,10 +109,10 @@ if {[usePresentationFormsFor Arabic]} {
}
addSample $w "Trad. Chinese" "\u4E2D\u570B\u7684\u6F22\u5B57"
addSample $w "Simpl. Chinese" "\u6C49\u8BED"
-addSample $w French "Langue fran\u00E7aise"
+addSample $w French "Langue fran\xE7aise"
addSample $w Greek \
- "\u0395\u03BB\u03BB\u03B7\u03BD\u03B9\u03BA\u03AE " \
- "\u03B3\u03BB\u03CE\u03C3\u03C3\u03B1"
+ "\u0395\u03BB\u03BB\u03B7\u03BD\u03B9\u03BA\u03AE " \
+ "\u03B3\u03BB\u03CE\u03C3\u03C3\u03B1"
if {[usePresentationFormsFor Hebrew]} {
# Visual order (pre-layouted)
addSample $w Hebrew \
@@ -123,20 +123,22 @@ if {[usePresentationFormsFor Hebrew]} {
"\u05DB\u05EA\u05D1 \u05E2\u05D1\u05E8\u05D9\u05EA"
}
addSample $w Hindi \
- "\u0939\u093f\u0928\u094d\u0926\u0940 \u092d\u093e\u0937\u093e"
-addSample $w Icelandic "\u00CDslenska"
+ "\u0939\u093F\u0928\u094D\u0926\u0940 \u092D\u093E\u0937\u093E"
+addSample $w Icelandic "\xCDslenska"
addSample $w Japanese \
- "\u65E5\u672C\u8A9E\u306E\u3072\u3089\u304C\u306A, " \
- "\u6F22\u5B57\u3068\u30AB\u30BF\u30AB\u30CA"
+ "\u65E5\u672C\u8A9E\u306E\u3072\u3089\u304C\u306A, " \
+ "\u6F22\u5B57\u3068\u30AB\u30BF\u30AB\u30CA"
addSample $w Korean "\uB300\uD55C\uBBFC\uAD6D\uC758 \uD55C\uAE00"
addSample $w Russian \
"\u0420\u0443\u0441\u0441\u043A\u0438\u0439 \u044F\u0437\u044B\u043A"
-if {[package vsatisfies [package provide Tcl] 8.7-]} {
- addSample $w Emoji \
- "\U1F600\U1F4A9\U1F44D\U1F1F3\U1F1F1"
-} elseif {([tk windowingsystem] ne "x11") || (![catch {tk::pkgconfig get fontsystem} fs] && ($fs eq "xft"))} {
- addSample $w Emoji \
- "\uD83D\uDE00\uD83D\uDCA9\uD83D\uDC4D\uD83C\uDDF3\uD83C\uDDF1"
+if {([tk windowingsystem] ne "x11") || (![catch {tk::pkgconfig get fontsystem} fs] && ($fs eq "xft"))} {
+ if {[package vsatisfies [package provide Tcl] 8.7-]} {
+ addSample $w Emoji \
+ "\U1F600\U1F4A9\U1F44D\U1F1F3\U1F1F1"
+ } else {
+ addSample $w Emoji \
+ "\uD83D\uDE00\uD83D\uDCA9\uD83D\uDC4D\uD83C\uDDF3\uD83C\uDDF1"
+ }
}
## We're done processing, so change things back to normal running...
diff --git a/unix/tkUnixFont.c b/unix/tkUnixFont.c
index 8c0e6fe..3893a0a 100644
--- a/unix/tkUnixFont.c
+++ b/unix/tkUnixFont.c
@@ -12,8 +12,6 @@
#include "tkUnixInt.h"
#include "tkFont.h"
-#include <netinet/in.h> /* for htons() prototype */
-#include <arpa/inet.h> /* inet_ntoa() */
/*
* The preferred font encodings.
@@ -487,9 +485,13 @@ Ucs2beToUtfProc(
* output buffer. */
{
const char *srcStart, *srcEnd;
- char *dstEnd, *dstStart;
- int result, numChars;
+ const char *dstEnd, *dstStart;
+ int result, numChars, charLimit = INT_MAX;
+ unsigned short ch;
+ if (flags & TCL_ENCODING_CHAR_LIMIT) {
+ charLimit = *dstCharsPtr;
+ }
result = TCL_OK;
/* check alignment with ucs-2 (2 == sizeof(UCS-2)) */
@@ -507,21 +509,26 @@ Ucs2beToUtfProc(
srcEnd = src + srcLen;
dstStart = dst;
- dstEnd = dst + dstLen - TCL_UTF_MAX;
+ dstEnd = dst + dstLen - 4;
- for (numChars = 0; src < srcEnd; numChars++) {
+ for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) {
if (dst > dstEnd) {
result = TCL_CONVERT_NOSPACE;
break;
}
+ ch = (src[0] & 0xFF) << 8 | (src[1] & 0xFF);
+ src += 2 /* sizeof(UTF-16) */;
+
/*
- * Need to swap byte-order on little-endian machines (x86) for
- * UCS-2BE. We know this is an LE->BE swap.
+ * Special case for 1-byte utf chars for speed. Make sure we work with
+ * unsigned short-size data.
*/
-
- dst += Tcl_UniCharToUtf(htons(*((short *)src)), dst);
- src += 2 /* sizeof(UCS-2) */;
+ if (ch && ch < 0x80) {
+ *dst++ = (ch & 0xFF);
+ } else {
+ dst += Tcl_UniCharToUtf(ch, dst);
+ }
}
*srcReadPtr = src - srcStart;
@@ -576,17 +583,13 @@ UtfToUcs2beProc(
{
const char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd;
int result, numChars;
- Tcl_UniChar *chPtr = (Tcl_UniChar *)statePtr;
-
- if (flags & TCL_ENCODING_START) {
- *statePtr = 0;
- }
+ int ch;
srcStart = src;
srcEnd = src + srcLen;
srcClose = srcEnd;
if (!(flags & TCL_ENCODING_END)) {
- srcClose -= TCL_UTF_MAX;
+ srcClose -= 6;
}
dstStart = dst;
@@ -606,17 +609,14 @@ UtfToUcs2beProc(
result = TCL_CONVERT_NOSPACE;
break;
}
- src += Tcl_UtfToUniChar(src, chPtr);
+ src += TkUtfToUniChar(src, &ch);
/*
* Ensure big-endianness (store big bits first).
- * XXX: This hard-codes the assumed size of Tcl_UniChar as 2. Make
- * sure to work in char* for Tcl_UtfToUniChar alignment. [Bug 1122671]
*/
-
- *dst++ = (char)(*chPtr >> 8);
- *dst++ = (char)*chPtr;
+ *dst++ = (char)(ch >> 8);
+ *dst++ = (char)ch;
}
*srcReadPtr = src - srcStart;
*dstWrotePtr = dst - dstStart;
diff --git a/win/tkWinFont.c b/win/tkWinFont.c
index 604a667..7b70a08 100644
--- a/win/tkWinFont.c
+++ b/win/tkWinFont.c
@@ -2172,9 +2172,9 @@ FontMapLoadPage(
{
FontFamily *familyPtr;
Tcl_Encoding encoding;
- char src[XMaxTransChars], buf[16];
- USHORT *startCount, *endCount;
int i, j, bitOffset, end, segCount;
+ USHORT *startCount, *endCount;
+ char buf[16], src[4];
subFontPtr->fontMap[row] = ckalloc(FONTMAP_BITSPERPAGE / 8);
memset(subFontPtr->fontMap[row], 0, FONTMAP_BITSPERPAGE / 8);
diff --git a/win/tkWinKey.c b/win/tkWinKey.c
index 8a83874..29f2ff0 100644
--- a/win/tkWinKey.c
+++ b/win/tkWinKey.c
@@ -97,8 +97,8 @@ TkpGetString(
* result. */
{
XKeyEvent *keyEv = &eventPtr->xkey;
- char buf[6];
int len;
+ char buf[4];
Tcl_DStringInit(dsPtr);
if (keyEv->send_event == -1) {
diff --git a/win/tkWinX.c b/win/tkWinX.c
index c01096e..d2af130 100644
--- a/win/tkWinX.c
+++ b/win/tkWinX.c
@@ -1478,8 +1478,7 @@ GetTranslatedKey(
if ((msg.message == WM_CHAR) && (msg.lParam & 0x20000000)) {
xkey->state = 0;
}
- xkey->trans_chars[xkey->nbytes] = (char) msg.wParam;
- xkey->nbytes++;
+ xkey->trans_chars[xkey->nbytes++] = (char) msg.wParam;
if (((unsigned short) msg.wParam) > ((unsigned short) 0xff)) {
/*