diff options
Diffstat (limited to 'generic')
-rw-r--r-- | generic/tkEntry.c | 16 | ||||
-rw-r--r-- | generic/tkFont.c | 47 | ||||
-rw-r--r-- | generic/tkInt.h | 6 | ||||
-rw-r--r-- | generic/tkText.c | 10 | ||||
-rw-r--r-- | generic/tkTextDisp.c | 4 | ||||
-rw-r--r-- | generic/tkTextIndex.c | 16 | ||||
-rw-r--r-- | generic/tkUtil.c | 99 | ||||
-rw-r--r-- | generic/ttk/ttkEntry.c | 16 |
8 files changed, 41 insertions, 173 deletions
diff --git a/generic/tkEntry.c b/generic/tkEntry.c index 5faf4ef..a66cf18 100644 --- a/generic/tkEntry.c +++ b/generic/tkEntry.c @@ -1926,6 +1926,7 @@ EntryComputeGeometry( */ if (entryPtr->showChar != NULL) { + Tcl_UniChar ch; char buf[4]; int size; @@ -1935,15 +1936,8 @@ EntryComputeGeometry( * characters might end up looking like one valid UTF character in the * resulting string. */ -#if TCL_UTF_MAX == 4 - int ch; - - TkUtfToUniChar32(entryPtr->showChar, &ch); -#else - Tcl_UniChar ch; Tcl_UtfToUniChar(entryPtr->showChar, &ch); -#endif size = Tcl_UniCharToUtf(ch, buf); entryPtr->numDisplayBytes = entryPtr->numChars * size; @@ -3420,11 +3414,7 @@ ExpandPercents( * list element. */ int number, length; register const char *string; -#if TCL_UTF_MAX == 4 - int ch; -#else Tcl_UniChar ch; -#endif char numStorage[2*TCL_INTEGER_SPACE]; while (1) { @@ -3457,11 +3447,7 @@ ExpandPercents( before++; /* skip over % */ if (*before != '\0') { -#if TCL_UTF_MAX == 4 - before += TkUtfToUniChar32(before, &ch); -#else before += Tcl_UtfToUniChar(before, &ch); -#endif } else { ch = '%'; } diff --git a/generic/tkFont.c b/generic/tkFont.c index 3088959..4a45691 100644 --- a/generic/tkFont.c +++ b/generic/tkFont.c @@ -497,11 +497,7 @@ Tk_FontObjCmd( const char *s; Tk_Font tkfont; Tcl_Obj *optPtr, *charPtr, *resultPtr; -#if TCL_UTF_MAX == 4 int uniChar = 0; -#else - Tcl_UniChar uniChar = 0; -#endif const TkFontAttributes *faPtr; TkFontAttributes fa; @@ -566,32 +562,19 @@ Tk_FontObjCmd( */ if (charPtr != NULL) { -#if TCL_UTF_MAX == 4 - Tcl_UniChar *ucPtr; -#endif + const char *string = Tcl_GetString(charPtr); + int len = TkUtfToUniChar2(string, &uniChar); - if (Tcl_GetCharLength(charPtr) != 1) { + if (len != charPtr->length) { resultPtr = Tcl_NewStringObj( "expected a single character but got \"", -1); - Tcl_AppendLimitedToObj(resultPtr, Tcl_GetString(charPtr), + Tcl_AppendLimitedToObj(resultPtr, string, -1, 40, "..."); Tcl_AppendToObj(resultPtr, "\"", -1); Tcl_SetObjResult(interp, resultPtr); Tcl_SetErrorCode(interp, "TK", "VALUE", "FONT_SAMPLE", NULL); return TCL_ERROR; } -#if TCL_UTF_MAX == 4 - ucPtr = Tcl_GetUnicodeFromObj(charPtr, NULL); - uniChar = *ucPtr; - if (((uniChar & 0xFC00) == 0xD800) && (ucPtr[1] != 0x000)) { - if ((ucPtr[1] & 0xFC00) == 0xDC00) { - uniChar = ((uniChar & 0x3FF) << 10) + (ucPtr[1] & 0x3FF); - uniChar += 0x10000; - } - } -#else - uniChar = Tcl_GetUniChar(charPtr, 0); -#endif } /* @@ -1713,11 +1696,7 @@ Tk_PostscriptFontName( } else if (strcasecmp(family, "ZapfDingbats") == 0) { family = "ZapfDingbats"; } else { -#if TCL_UTF_MAX == 4 int ch; -#else - Tcl_UniChar ch; -#endif /* * Inline, capitalize the first letter of each word, lowercase the @@ -1735,11 +1714,7 @@ Tk_PostscriptFontName( src++; upper = 1; } -#if TCL_UTF_MAX == 4 - src += TkUtfToUniChar32(src, &ch); -#else - src += Tcl_UtfToUniChar(src, &ch); -#endif + src += TkUtfToUniChar2(src, &ch); if (upper) { ch = (Tcl_UniChar) Tcl_UniCharToUpper(ch); upper = 0; @@ -3276,11 +3251,7 @@ Tk_TextLayoutToPostscript( int i, j, len; const char *p, *glyphname; char uindex[5], c, *ps; -#if TCL_UTF_MAX == 4 int ch; -#else - Tcl_UniChar ch; -#endif Tcl_AppendToObj(psObj, "[(", -1); for (i = 0; i < layoutPtr->numChunks; i++, chunkPtr++) { @@ -3303,11 +3274,7 @@ Tk_TextLayoutToPostscript( * international postscript fonts. */ -#if TCL_UTF_MAX == 4 - p += TkUtfToUniChar32(p, &ch); -#else - p += Tcl_UtfToUniChar(p, &ch); -#endif + p += TkUtfToUniChar2(p, &ch); if ((ch == '(') || (ch == ')') || (ch == '\\') || (ch < 0x20)) { /* * Tricky point: the "03" is necessary in the sprintf below, @@ -3333,11 +3300,9 @@ Tk_TextLayoutToPostscript( * use the full glyph name. */ -#if TCL_UTF_MAX > 3 if (ch > 0xffff) { goto noMapping; } -#endif sprintf(uindex, "%04X", ch); /* endianness? */ glyphname = Tcl_GetVar2(interp, "::tk::psglyphs", uindex, 0); if (glyphname) { diff --git a/generic/tkInt.h b/generic/tkInt.h index 367ef3a..6d86e08 100644 --- a/generic/tkInt.h +++ b/generic/tkInt.h @@ -1232,8 +1232,10 @@ MODULE_SCOPE Status TkParseColor (Display * display, MODULE_SCOPE void TkUnixSetXftClipRegion(TkRegion clipRegion); #endif -#if TCL_UTF_MAX == 4 -MODULE_SCOPE int TkUtfToUniChar32(const char *src, int *chPtr); +#if TCL_UTF_MAX > 4 +# define TkUtfToUniChar2 Tcl_UtfToUniChar +#else + MODULE_SCOPE int TkUtfToUniChar2(const char *src, int *chPtr); #endif /* diff --git a/generic/tkText.c b/generic/tkText.c index 834e842..dacadbe 100644 --- a/generic/tkText.c +++ b/generic/tkText.c @@ -4508,11 +4508,7 @@ TkTextGetTabs( Tcl_Obj **objv; TkTextTabArray *tabArrayPtr; TkTextTab *tabPtr; -#if TCL_UTF_MAX == 4 int ch; -#else - Tcl_UniChar ch; -#endif double prevStop, lastStop; /* * Map these strings to TkTextTabAlign values. @@ -4619,11 +4615,7 @@ TkTextGetTabs( * There may be a more efficient way of getting this. */ -#if TCL_UTF_MAX == 4 - TkUtfToUniChar32(Tcl_GetString(objv[i+1]), &ch); -#else - Tcl_UtfToUniChar(Tcl_GetString(objv[i+1]), &ch); -#endif + TkUtfToUniChar2(Tcl_GetString(objv[i+1]), &ch); if (!Tcl_UniCharIsAlpha(ch)) { continue; } diff --git a/generic/tkTextDisp.c b/generic/tkTextDisp.c index 5faab36..026023e 100644 --- a/generic/tkTextDisp.c +++ b/generic/tkTextDisp.c @@ -7581,8 +7581,8 @@ TkTextCharLayoutProc( if (bytesThatFit < maxBytes) { if ((bytesThatFit == 0) && noCharsYet) { - Tcl_UniChar ch; - int chLen = Tcl_UtfToUniChar(p, &ch); + int ch; + int chLen = TkUtfToUniChar2(p, &ch); #if TK_LAYOUT_WITH_BASE_CHUNKS bytesThatFit = CharChunkMeasureChars(chunkPtr, line, diff --git a/generic/tkTextIndex.c b/generic/tkTextIndex.c index f64a6d2..b794cdb 100644 --- a/generic/tkTextIndex.c +++ b/generic/tkTextIndex.c @@ -2298,13 +2298,9 @@ StartEnd( int chSize = 1; if (segPtr->typePtr == &tkTextCharType) { -#if TCL_UTF_MAX == 4 int ch; - chSize = TkUtfToUniChar32(segPtr->body.chars + offset, &ch); -#else - Tcl_UniChar ch; - chSize = Tcl_UtfToUniChar(segPtr->body.chars + offset, &ch); -#endif + + chSize = TkUtfToUniChar2(segPtr->body.chars + offset, &ch); if (!Tcl_UniCharIsWordChar(ch)) { break; } @@ -2347,13 +2343,9 @@ StartEnd( int chSize = 1; if (segPtr->typePtr == &tkTextCharType) { -#if TCL_UTF_MAX == 4 + int ch; - TkUtfToUniChar32(segPtr->body.chars + offset, &ch); -#else - Tcl_UniChar ch; - Tcl_UtfToUniChar(segPtr->body.chars + offset, &ch); -#endif + TkUtfToUniChar2(segPtr->body.chars + offset, &ch); if (!Tcl_UniCharIsWordChar(ch)) { break; } diff --git a/generic/tkUtil.c b/generic/tkUtil.c index fb796fd..a266cb3 100644 --- a/generic/tkUtil.c +++ b/generic/tkUtil.c @@ -1193,24 +1193,15 @@ TkSendVirtualEvent( Tk_QueueWindowEvent(&event.general, TCL_QUEUE_TAIL); } -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX <= 4 /* *--------------------------------------------------------------------------- * - * TkUtfToUniChar32 -- + * TkUtfToUniChar2 -- * - * Copied from Tcl_UtfToUniChar but using int instead of Tcl_UniChar! - * - * Extract the Tcl_UniChar represented by the UTF-8 string. Bad UTF-8 - * sequences are converted to valid Tcl_UniChars and processing - * continues. Equivalent to Plan 9 chartorune(). - * - * The caller must ensure that the source buffer is long enough that this - * routine does not run off the end and dereference non-existent memory - * looking for trail bytes. If the source buffer is known to be '\0' - * terminated, this cannot happen. Otherwise, the caller should call - * Tcl_UtfCharComplete() before calling this routine to ensure that - * enough bytes remain in the string. + * Almost the same as Tcl_UtfToUniChar but using int instead of Tcl_UniChar. + * This function is capable of collapsing a upper/lower pair to a single + * unicode character. So, up to 6 bytes (two UTF-8 characters) might be read. * * Results: * *chPtr is filled with the Tcl_UniChar, and the return value is the @@ -1223,75 +1214,29 @@ TkSendVirtualEvent( */ int -TkUtfToUniChar32( +TkUtfToUniChar2( const char *src, /* The UTF-8 string. */ int *chPtr) /* Filled with the Tcl_UniChar represented by * the UTF-8 string. */ { - int byte; - - /* - * Unroll 1 to 3 byte UTF-8 sequences, use loop to handle longer ones. - */ - - byte = *((unsigned char *) src); - if (byte < 0xC0) { - /* - * Handles properly formed UTF-8 characters between 0x01 and 0x7F. - * Also treats \0 and naked trail bytes 0x80 to 0xBF as valid - * characters representing themselves. - */ - - *chPtr = byte; - return 1; - } else if (byte < 0xE0) { - if ((src[1] & 0xC0) == 0x80) { - /* - * Two-byte-character lead-byte followed by a trail-byte. - */ - - *chPtr = ((byte & 0x1F) << 6) | (src[1] & 0x3F); - return 2; + Tcl_UniChar uniChar = 0; + + int len = Tcl_UtfToUniChar(src, &uniChar); + if ((uniChar & 0xfc00) == 0xd800) { + Tcl_UniChar high = uniChar; + /* This can only happen when Tcl is compiled with TCL_UTF_MAX=4, + * or when a high surrogate character is detected */ + int len2 = Tcl_UtfToUniChar(src+len, &uniChar); + if ((uniChar & 0xfc00) == 0xdc00) { + *chPtr = ((high & 0x3ff) << 10) | (uniChar & 0x3ff) | 0x10000; + len += len2; + } else { + *chPtr = high; } - - /* - * A two-byte-character lead-byte not followed by trail-byte - * represents itself. - */ - } else if (byte < 0xF0) { - if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80)) { - /* - * Three-byte-character lead byte followed by two trail bytes. - */ - - *chPtr = ((byte & 0x0F) << 12) - | ((src[1] & 0x3F) << 6) | (src[2] & 0x3F); - return 3; - } - - /* - * A three-byte-character lead-byte not followed by two trail-bytes - * represents itself. - */ - } else if (byte < 0xF8) { - if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80) && ((src[3] & 0xC0) == 0x80)) { - /* - * Four-byte-character lead byte followed by three trail bytes. - */ - - *chPtr = ((byte & 0x0E) << 18) | ((src[1] & 0x3F) << 12) - | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F); - return 4; - } - - /* - * A three-byte-character lead-byte not followed by two trail-bytes - * represents itself. - */ + } else { + *chPtr = uniChar; } - - *chPtr = byte; - return 1; + return len; } #endif /* diff --git a/generic/ttk/ttkEntry.c b/generic/ttk/ttkEntry.c index 63ebc5f..d80e1fd 100644 --- a/generic/ttk/ttkEntry.c +++ b/generic/ttk/ttkEntry.c @@ -282,16 +282,10 @@ static char *EntryDisplayString(const char *showChar, int numChars) { char *displayString, *p; int size; - char buf[4]; -#if TCL_UTF_MAX == 4 - int ch; - - TkUtfToUniChar32(showChar, &ch); -#else Tcl_UniChar ch; + char buf[4]; Tcl_UtfToUniChar(showChar, &ch); -#endif size = Tcl_UniCharToUtf(ch, buf); p = displayString = ckalloc(numChars * size + 1); @@ -412,11 +406,7 @@ ExpandPercents( int number, length; const char *string; int stringLength; -#if TCL_UTF_MAX == 4 - int ch; -#else Tcl_UniChar ch; -#endif char numStorage[2*TCL_INTEGER_SPACE]; while (*template) { @@ -440,11 +430,7 @@ ExpandPercents( */ ++template; /* skip over % */ if (*template != '\0') { -#if TCL_UTF_MAX == 4 - template += TkUtfToUniChar32(template, &ch); -#else template += Tcl_UtfToUniChar(template, &ch); -#endif } else { ch = '%'; } |