diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2019-11-26 20:33:36 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2019-11-26 20:33:36 (GMT) |
commit | 504bc5bedfd67043ba779b992162efdd5adf0302 (patch) | |
tree | cd8cc2502faf80b61cce5eb2234bc3bf51a7e6bf | |
parent | b5a443a3cf449d0a29cff305026358daebfb0897 (diff) | |
parent | 176b57b663e02f53cd58f9476bc885b0097fc9a9 (diff) | |
download | tk-504bc5bedfd67043ba779b992162efdd5adf0302.zip tk-504bc5bedfd67043ba779b992162efdd5adf0302.tar.gz tk-504bc5bedfd67043ba779b992162efdd5adf0302.tar.bz2 |
Various cleanups in Unicode handling. Note that without Xft on X11 we don't have Emoji.
-rw-r--r-- | generic/tkEntry.c | 2 | ||||
-rw-r--r-- | generic/tkUtil.c | 38 | ||||
-rw-r--r-- | generic/ttk/ttkEntry.c | 2 | ||||
-rw-r--r-- | library/demos/unicodeout.tcl | 28 | ||||
-rw-r--r-- | unix/tkUnixFont.c | 46 | ||||
-rw-r--r-- | win/tkWinFont.c | 4 | ||||
-rw-r--r-- | win/tkWinKey.c | 2 | ||||
-rw-r--r-- | win/tkWinX.c | 3 |
8 files changed, 61 insertions, 64 deletions
diff --git a/generic/tkEntry.c b/generic/tkEntry.c index 0dfacd7..161e581 100644 --- a/generic/tkEntry.c +++ b/generic/tkEntry.c @@ -1923,7 +1923,7 @@ EntryComputeGeometry( if (entryPtr->showChar != NULL) { int ch; - char buf[6]; + char buf[4]; int size; /* diff --git a/generic/tkUtil.c b/generic/tkUtil.c index 8e3e2ee..33faab8 100644 --- a/generic/tkUtil.c +++ b/generic/tkUtil.c @@ -1216,26 +1216,23 @@ TkSendVirtualEvent( int TkUtfToUniChar( const char *src, /* The UTF-8 string. */ - int *chPtr) /* Filled with the Tcl_UniChar represented by + int *chPtr) /* Filled with the Unicode value represented by * the UTF-8 string. */ { Tcl_UniChar uniChar = 0; int len = Tcl_UtfToUniChar(src, &uniChar); - if ((uniChar & 0xfc00) == 0xd800) { - Tcl_UniChar high = uniChar; + if ((uniChar & 0xFC00) == 0xD800) { + Tcl_UniChar low = uniChar; /* This can only happen if Tcl is compiled with TCL_UTF_MAX=4, * or when a high surrogate character is detected in UTF-8 form */ - int len2 = Tcl_UtfToUniChar(src+len, &uniChar); - if ((uniChar & 0xfc00) == 0xdc00) { - *chPtr = (((high & 0x3ff) << 10) | (uniChar & 0x3ff)) + 0x10000; - len += len2; - } else { - *chPtr = high; + int len2 = Tcl_UtfToUniChar(src+len, &low); + if ((uniChar & 0xFC00) == 0xDC00) { + *chPtr = (((uniChar & 0x3FF) << 10) | (low & 0x3FF)) + 0x10000; + return len + len2; } - } else { - *chPtr = uniChar; } + *chPtr = uniChar; return len; } @@ -1259,17 +1256,16 @@ TkUtfToUniChar( int TkUniCharToUtf(int ch, char *buf) { - int size = Tcl_UniCharToUtf(ch, buf); - if ((((unsigned)(ch - 0x10000) <= 0xFFFFF)) && (size < 4)) { - /* Hey, this is wrong, we must be running TCL_UTF_MAX==3 - * The best thing we can do is spit out a 4-byte UTF-8 character */ - buf[3] = (char) ((ch | 0x80) & 0xBF); - buf[2] = (char) (((ch >> 6) | 0x80) & 0xBF); - buf[1] = (char) (((ch >> 12) | 0x80) & 0xBF); - buf[0] = (char) ((ch >> 18) | 0xF0); - size = 4; + if (((unsigned)(ch - 0x10000) <= 0xFFFFF)) { + /* Spit out a 4-byte UTF-8 character */ + *buf++ = (char) ((ch >> 18) | 0xF0); + *buf++ = (char) (((ch >> 12) | 0x80) & 0xBF); + *buf++ = (char) (((ch >> 6) | 0x80) & 0xBF); + *buf = (char) ((ch | 0x80) & 0xBF); + return 4; + } else { + return Tcl_UniCharToUtf(ch, buf); } - return size; } diff --git a/generic/ttk/ttkEntry.c b/generic/ttk/ttkEntry.c index 1579a32..4862e99 100644 --- a/generic/ttk/ttkEntry.c +++ b/generic/ttk/ttkEntry.c @@ -279,7 +279,7 @@ static char *EntryDisplayString(const char *showChar, int numChars) char *displayString, *p; int size; int ch; - char buf[6]; + char buf[4]; TkUtfToUniChar(showChar, &ch); size = TkUniCharToUtf(ch, buf); diff --git a/library/demos/unicodeout.tcl b/library/demos/unicodeout.tcl index b3c5fd0..bb4d8f8 100644 --- a/library/demos/unicodeout.tcl +++ b/library/demos/unicodeout.tcl @@ -109,10 +109,10 @@ if {[usePresentationFormsFor Arabic]} { } addSample $w "Trad. Chinese" "\u4E2D\u570B\u7684\u6F22\u5B57" addSample $w "Simpl. Chinese" "\u6C49\u8BED" -addSample $w French "Langue fran\u00E7aise" +addSample $w French "Langue fran\xE7aise" addSample $w Greek \ - "\u0395\u03BB\u03BB\u03B7\u03BD\u03B9\u03BA\u03AE " \ - "\u03B3\u03BB\u03CE\u03C3\u03C3\u03B1" + "\u0395\u03BB\u03BB\u03B7\u03BD\u03B9\u03BA\u03AE " \ + "\u03B3\u03BB\u03CE\u03C3\u03C3\u03B1" if {[usePresentationFormsFor Hebrew]} { # Visual order (pre-layouted) addSample $w Hebrew \ @@ -123,20 +123,22 @@ if {[usePresentationFormsFor Hebrew]} { "\u05DB\u05EA\u05D1 \u05E2\u05D1\u05E8\u05D9\u05EA" } addSample $w Hindi \ - "\u0939\u093f\u0928\u094d\u0926\u0940 \u092d\u093e\u0937\u093e" -addSample $w Icelandic "\u00CDslenska" + "\u0939\u093F\u0928\u094D\u0926\u0940 \u092D\u093E\u0937\u093E" +addSample $w Icelandic "\xCDslenska" addSample $w Japanese \ - "\u65E5\u672C\u8A9E\u306E\u3072\u3089\u304C\u306A, " \ - "\u6F22\u5B57\u3068\u30AB\u30BF\u30AB\u30CA" + "\u65E5\u672C\u8A9E\u306E\u3072\u3089\u304C\u306A, " \ + "\u6F22\u5B57\u3068\u30AB\u30BF\u30AB\u30CA" addSample $w Korean "\uB300\uD55C\uBBFC\uAD6D\uC758 \uD55C\uAE00" addSample $w Russian \ "\u0420\u0443\u0441\u0441\u043A\u0438\u0439 \u044F\u0437\u044B\u043A" -if {[package vsatisfies [package provide Tcl] 8.7-]} { - addSample $w Emoji \ - "\U1F600\U1F4A9\U1F44D\U1F1F3\U1F1F1" -} elseif {([tk windowingsystem] ne "x11") || (![catch {tk::pkgconfig get fontsystem} fs] && ($fs eq "xft"))} { - addSample $w Emoji \ - "\uD83D\uDE00\uD83D\uDCA9\uD83D\uDC4D\uD83C\uDDF3\uD83C\uDDF1" +if {([tk windowingsystem] ne "x11") || (![catch {tk::pkgconfig get fontsystem} fs] && ($fs eq "xft"))} { + if {[package vsatisfies [package provide Tcl] 8.7-]} { + addSample $w Emoji \ + "\U1F600\U1F4A9\U1F44D\U1F1F3\U1F1F1" + } else { + addSample $w Emoji \ + "\uD83D\uDE00\uD83D\uDCA9\uD83D\uDC4D\uD83C\uDDF3\uD83C\uDDF1" + } } ## We're done processing, so change things back to normal running... diff --git a/unix/tkUnixFont.c b/unix/tkUnixFont.c index 8c0e6fe..3893a0a 100644 --- a/unix/tkUnixFont.c +++ b/unix/tkUnixFont.c @@ -12,8 +12,6 @@ #include "tkUnixInt.h" #include "tkFont.h" -#include <netinet/in.h> /* for htons() prototype */ -#include <arpa/inet.h> /* inet_ntoa() */ /* * The preferred font encodings. @@ -487,9 +485,13 @@ Ucs2beToUtfProc( * output buffer. */ { const char *srcStart, *srcEnd; - char *dstEnd, *dstStart; - int result, numChars; + const char *dstEnd, *dstStart; + int result, numChars, charLimit = INT_MAX; + unsigned short ch; + if (flags & TCL_ENCODING_CHAR_LIMIT) { + charLimit = *dstCharsPtr; + } result = TCL_OK; /* check alignment with ucs-2 (2 == sizeof(UCS-2)) */ @@ -507,21 +509,26 @@ Ucs2beToUtfProc( srcEnd = src + srcLen; dstStart = dst; - dstEnd = dst + dstLen - TCL_UTF_MAX; + dstEnd = dst + dstLen - 4; - for (numChars = 0; src < srcEnd; numChars++) { + for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) { if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; break; } + ch = (src[0] & 0xFF) << 8 | (src[1] & 0xFF); + src += 2 /* sizeof(UTF-16) */; + /* - * Need to swap byte-order on little-endian machines (x86) for - * UCS-2BE. We know this is an LE->BE swap. + * Special case for 1-byte utf chars for speed. Make sure we work with + * unsigned short-size data. */ - - dst += Tcl_UniCharToUtf(htons(*((short *)src)), dst); - src += 2 /* sizeof(UCS-2) */; + if (ch && ch < 0x80) { + *dst++ = (ch & 0xFF); + } else { + dst += Tcl_UniCharToUtf(ch, dst); + } } *srcReadPtr = src - srcStart; @@ -576,17 +583,13 @@ UtfToUcs2beProc( { const char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd; int result, numChars; - Tcl_UniChar *chPtr = (Tcl_UniChar *)statePtr; - - if (flags & TCL_ENCODING_START) { - *statePtr = 0; - } + int ch; srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; if (!(flags & TCL_ENCODING_END)) { - srcClose -= TCL_UTF_MAX; + srcClose -= 6; } dstStart = dst; @@ -606,17 +609,14 @@ UtfToUcs2beProc( result = TCL_CONVERT_NOSPACE; break; } - src += Tcl_UtfToUniChar(src, chPtr); + src += TkUtfToUniChar(src, &ch); /* * Ensure big-endianness (store big bits first). - * XXX: This hard-codes the assumed size of Tcl_UniChar as 2. Make - * sure to work in char* for Tcl_UtfToUniChar alignment. [Bug 1122671] */ - - *dst++ = (char)(*chPtr >> 8); - *dst++ = (char)*chPtr; + *dst++ = (char)(ch >> 8); + *dst++ = (char)ch; } *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; diff --git a/win/tkWinFont.c b/win/tkWinFont.c index 604a667..7b70a08 100644 --- a/win/tkWinFont.c +++ b/win/tkWinFont.c @@ -2172,9 +2172,9 @@ FontMapLoadPage( { FontFamily *familyPtr; Tcl_Encoding encoding; - char src[XMaxTransChars], buf[16]; - USHORT *startCount, *endCount; int i, j, bitOffset, end, segCount; + USHORT *startCount, *endCount; + char buf[16], src[4]; subFontPtr->fontMap[row] = ckalloc(FONTMAP_BITSPERPAGE / 8); memset(subFontPtr->fontMap[row], 0, FONTMAP_BITSPERPAGE / 8); diff --git a/win/tkWinKey.c b/win/tkWinKey.c index 8a83874..29f2ff0 100644 --- a/win/tkWinKey.c +++ b/win/tkWinKey.c @@ -97,8 +97,8 @@ TkpGetString( * result. */ { XKeyEvent *keyEv = &eventPtr->xkey; - char buf[6]; int len; + char buf[4]; Tcl_DStringInit(dsPtr); if (keyEv->send_event == -1) { diff --git a/win/tkWinX.c b/win/tkWinX.c index c01096e..d2af130 100644 --- a/win/tkWinX.c +++ b/win/tkWinX.c @@ -1478,8 +1478,7 @@ GetTranslatedKey( if ((msg.message == WM_CHAR) && (msg.lParam & 0x20000000)) { xkey->state = 0; } - xkey->trans_chars[xkey->nbytes] = (char) msg.wParam; - xkey->nbytes++; + xkey->trans_chars[xkey->nbytes++] = (char) msg.wParam; if (((unsigned short) msg.wParam) > ((unsigned short) 0xff)) { /* |