From d71a648c788bdbe6f17db19dcb90cafb174abc83 Mon Sep 17 00:00:00 2001 From: culler Date: Tue, 19 May 2020 02:31:05 +0000 Subject: Add surrogate-aware TkUtfAtIndex and TkNumUtfChars. This is enough to allow editing emoji text (without variant selectors) in Entry widgets on macOS. --- generic/tkEntry.c | 18 ++++++------ generic/tkFont.c | 20 ++++++------- generic/tkInt.h | 4 +++ generic/tkUtil.c | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++ generic/ttk/ttkEntry.c | 22 +++++++------- macosx/tkMacOSXFont.c | 4 +-- 6 files changed, 115 insertions(+), 31 deletions(-) diff --git a/generic/tkEntry.c b/generic/tkEntry.c index 9c53769..af796eb 100644 --- a/generic/tkEntry.c +++ b/generic/tkEntry.c @@ -2049,7 +2049,7 @@ InsertChars( char *newStr; string = entryPtr->string; - byteIndex = Tcl_UtfAtIndex(string, index) - string; + byteIndex = TkUtfAtIndex(string, index) - string; byteCount = strlen(value); if (byteCount == 0) { return TCL_OK; @@ -2073,16 +2073,18 @@ InsertChars( entryPtr->string = newStr; /* + * ??? Is this construction still needed with TkNumUtfChars ??? + * * The following construction is used because inserting improperly formed * UTF-8 sequences between other improperly formed UTF-8 sequences could * result in actually forming valid UTF-8 sequences; the number of - * characters added may not be Tcl_NumUtfChars(string, -1), because of + * characters added may not be TkNumUtfChars(string, -1), because of * context. The actual number of characters added is how many characters * are in the string now minus the number that used to be there. */ oldChars = entryPtr->numChars; - entryPtr->numChars = Tcl_NumUtfChars(newStr, -1); + entryPtr->numChars = TkNumUtfChars(newStr, -1); charsAdded = entryPtr->numChars - oldChars; entryPtr->numBytes += byteCount; @@ -2153,8 +2155,8 @@ DeleteChars( } string = entryPtr->string; - byteIndex = Tcl_UtfAtIndex(string, index) - string; - byteCount = Tcl_UtfAtIndex(string + byteIndex, count) - (string+byteIndex); + byteIndex = TkUtfAtIndex(string, index) - string; + byteCount = TkUtfAtIndex(string + byteIndex, count) - (string+byteIndex); newByteCount = entryPtr->numBytes + 1 - byteCount; newStr = ckalloc(newByteCount); @@ -2382,7 +2384,7 @@ EntrySetValue( entryPtr->string = tmp; } entryPtr->numBytes = valueLen; - entryPtr->numChars = Tcl_NumUtfChars(value, valueLen); + entryPtr->numChars = TkNumUtfChars(value, valueLen); if (entryPtr->displayString == oldSource) { entryPtr->displayString = entryPtr->string; @@ -2815,8 +2817,8 @@ EntryFetchSelection( return -1; } string = entryPtr->displayString; - selStart = Tcl_UtfAtIndex(string, entryPtr->selectFirst); - selEnd = Tcl_UtfAtIndex(selStart, + selStart = TkUtfAtIndex(string, entryPtr->selectFirst); + selEnd = TkUtfAtIndex(selStart, entryPtr->selectLast - entryPtr->selectFirst); byteCount = selEnd - selStart - offset; if (byteCount > maxBytes) { diff --git a/generic/tkFont.c b/generic/tkFont.c index bf35626..5a8e87f 100644 --- a/generic/tkFont.c +++ b/generic/tkFont.c @@ -1998,7 +1998,7 @@ Tk_ComputeTextLayout( height = fmPtr->ascent + fmPtr->descent; if (numChars < 0) { - numChars = Tcl_NumUtfChars(string, -1); + numChars = TkNumUtfChars(string, -1); } if (wrapLength == 0) { wrapLength = -1; @@ -2021,7 +2021,7 @@ Tk_ComputeTextLayout( curX = 0; - end = Tcl_UtfAtIndex(string, numChars); + end = TkUtfAtIndex(string, numChars); special = string; flags &= TK_IGNORE_TABS | TK_IGNORE_NEWLINES; @@ -2138,7 +2138,7 @@ Tk_ComputeTextLayout( bytesThisChunk = Tk_MeasureChars(tkfont, end, bytesThisChunk, -1, 0, &chunkPtr->totalWidth); chunkPtr->numBytes += bytesThisChunk; - chunkPtr->numChars += Tcl_NumUtfChars(end, bytesThisChunk); + chunkPtr->numChars += TkNumUtfChars(end, bytesThisChunk); chunkPtr->totalWidth += curX; } } @@ -2325,14 +2325,14 @@ Tk_DrawTextLayout( firstChar = 0; firstByte = chunkPtr->start; } else { - firstByte = Tcl_UtfAtIndex(chunkPtr->start, firstChar); + firstByte = TkUtfAtIndex(chunkPtr->start, firstChar); Tk_MeasureChars(layoutPtr->tkfont, chunkPtr->start, firstByte - chunkPtr->start, -1, 0, &drawX); } if (lastChar < numDisplayChars) { numDisplayChars = lastChar; } - lastByte = Tcl_UtfAtIndex(chunkPtr->start, numDisplayChars); + lastByte = TkUtfAtIndex(chunkPtr->start, numDisplayChars); Tk_DrawChars(display, drawable, gc, layoutPtr->tkfont, firstByte, lastByte - firstByte, x+chunkPtr->x+drawX, y+chunkPtr->y); } @@ -2387,14 +2387,14 @@ TkDrawAngledTextLayout( firstChar = 0; firstByte = chunkPtr->start; } else { - firstByte = Tcl_UtfAtIndex(chunkPtr->start, firstChar); + firstByte = TkUtfAtIndex(chunkPtr->start, firstChar); Tk_MeasureChars(layoutPtr->tkfont, chunkPtr->start, firstByte - chunkPtr->start, -1, 0, &drawX); } if (lastChar < numDisplayChars) { numDisplayChars = lastChar; } - lastByte = Tcl_UtfAtIndex(chunkPtr->start, numDisplayChars); + lastByte = TkUtfAtIndex(chunkPtr->start, numDisplayChars); dx = cosA * (chunkPtr->x + drawX) + sinA * (chunkPtr->y); dy = -sinA * (chunkPtr->x + drawX) + cosA * (chunkPtr->y); if (angle == 0.0) { @@ -2631,7 +2631,7 @@ Tk_PointToChar( } n = Tk_MeasureChars((Tk_Font) fontPtr, chunkPtr->start, chunkPtr->numBytes, x - chunkPtr->x, 0, &dummy); - return numChars + Tcl_NumUtfChars(chunkPtr->start, n); + return numChars + TkNumUtfChars(chunkPtr->start, n); } numChars += chunkPtr->numChars; lastPtr = chunkPtr; @@ -2736,7 +2736,7 @@ Tk_CharBbox( goto check; } } else if (index < chunkPtr->numChars) { - end = Tcl_UtfAtIndex(chunkPtr->start, index); + end = TkUtfAtIndex(chunkPtr->start, index); if (xPtr != NULL) { Tk_MeasureChars(tkfont, chunkPtr->start, end - chunkPtr->start, -1, 0, &x); @@ -3757,7 +3757,7 @@ NewChunk( *layoutPtrPtr = layoutPtr; *maxPtr = maxChunks; } - numChars = Tcl_NumUtfChars(start, numBytes); + numChars = TkNumUtfChars(start, numBytes); chunkPtr = &layoutPtr->chunks[layoutPtr->numChunks]; chunkPtr->start = start; chunkPtr->numBytes = numBytes; diff --git a/generic/tkInt.h b/generic/tkInt.h index c63748a..d426d55 100644 --- a/generic/tkInt.h +++ b/generic/tkInt.h @@ -1291,10 +1291,14 @@ MODULE_SCOPE void TkUnixSetXftClipRegion(TkRegion clipRegion); # define TkUtfToUniChar Tcl_UtfToUniChar # define TkUniCharToUtf Tcl_UniCharToUtf # define TkUtfPrev Tcl_UtfPrev +# define TkUtfAtIndex Tcl_UtfAtIndex +# define TkNumUtfChars Tcl_NumUtfChars #else MODULE_SCOPE int TkUtfToUniChar(const char *, int *); MODULE_SCOPE int TkUniCharToUtf(int, char *); MODULE_SCOPE const char *TkUtfPrev(const char *, const char *); + MODULE_SCOPE const char *TkUtfAtIndex(const char *src, int index); + MODULE_SCOPE int TkNumUtfChars(const char *src, int length); #endif /* diff --git a/generic/tkUtil.c b/generic/tkUtil.c index 49fd118..5d9d966 100644 --- a/generic/tkUtil.c +++ b/generic/tkUtil.c @@ -1295,7 +1295,85 @@ TkUtfPrev( return (first + TkUtfToUniChar(first, &ch) >= src) ? first : p ; } +/* + *--------------------------------------------------------------------------- + * + * TkUtfAtIndex -- + * + * Returns a pointer to the specified character (not byte) position in + * a CESU-8 string. That is, a pair of CESU-8 encoded surrogates counts + * as a single character. + * + * Results: + * As above. + * + * Side effects: + * None. + * + *--------------------------------------------------------------------------- + */ + +const char * +TkUtfAtIndex( + const char *src, /* The UTF-8 string. */ + int index) /* The position of the desired character. */ +{ + int len = 0; + int ch; + + while (index-- > 0) { + len = TkUtfToUniChar(src, &ch); + src += len; + } + return src; +} + +/* + *--------------------------------------------------------------------------- + * + * TkNumUtfChars -- + * + * Returns the number of characters (not bytes) in the UTF-8 string, not + * including the terminating NULL byte. This differs from Tcl_NumUtfChars + * in that a pair of CESU-8 encoded surrogates counts as one unicode + * character. + * + * Results: + * As above. + * + * Side effects: + * None. + * + *--------------------------------------------------------------------------- + */ + +int +TkNumUtfChars( + const char *src, /* The UTF-8 string to measure. */ + int length) /* The length of the string in bytes, or -1 + * for strlen(string). */ +{ + int ch = 0; + int i = 0; + + if (length < 0) { + /* string is NUL-terminated, so TclUtfToUniChar calls are safe. */ + while ((*src != '\0') && (i < INT_MAX)) { + src += TkUtfToUniChar(src, &ch); + i++; + } + } else { + /* Pointer to the end of string. Never read endPtr[0] */ + const char *endPtr = src + length; + while (src < endPtr) { + src += TkUtfToUniChar(src, &ch); + i++; + } + } + return i; +} #endif + /* * Local Variables: * mode: c diff --git a/generic/ttk/ttkEntry.c b/generic/ttk/ttkEntry.c index 96f3cf2..9a91ecc 100644 --- a/generic/ttk/ttkEntry.c +++ b/generic/ttk/ttkEntry.c @@ -339,8 +339,8 @@ EntryFetchSelection( } string = entryPtr->entry.displayString; - selStart = Tcl_UtfAtIndex(string, entryPtr->entry.selectFirst); - selEnd = Tcl_UtfAtIndex(selStart, + selStart = TkUtfAtIndex(string, entryPtr->entry.selectFirst); + selEnd = TkUtfAtIndex(selStart, entryPtr->entry.selectLast - entryPtr->entry.selectFirst); byteCount = selEnd - selStart - offset; if (byteCount > maxBytes) { @@ -458,11 +458,11 @@ ExpandPercents( break; case 'S': /* string to be inserted/deleted, if any */ if (reason == VALIDATE_INSERT) { - string = Tcl_UtfAtIndex(new, index); - stringLength = Tcl_UtfAtIndex(string, count) - string; + string = TkUtfAtIndex(new, index); + stringLength = TkUtfAtIndex(string, count) - string; } else if (reason == VALIDATE_DELETE) { - string = Tcl_UtfAtIndex(entryPtr->entry.string, index); - stringLength = Tcl_UtfAtIndex(string, count) - string; + string = TkUtfAtIndex(entryPtr->entry.string, index); + stringLength = TkUtfAtIndex(string, count) - string; } else { string = ""; stringLength = 0; @@ -707,7 +707,7 @@ static void EntryStoreValue(Entry *entryPtr, const char *value) { size_t numBytes = strlen(value); - int numChars = Tcl_NumUtfChars(value, numBytes); + int numChars = TkNumUtfChars(value, numBytes); if (entryPtr->core.flags & VALIDATING) entryPtr->core.flags |= VALIDATION_SET_VALUE; @@ -812,9 +812,9 @@ InsertChars( const char *value) /* New characters to add */ { char *string = entryPtr->entry.string; - size_t byteIndex = Tcl_UtfAtIndex(string, index) - string; + size_t byteIndex = TkUtfAtIndex(string, index) - string; size_t byteCount = strlen(value); - int charsAdded = Tcl_NumUtfChars(value, byteCount); + int charsAdded = TkNumUtfChars(value, byteCount); size_t newByteCount = entryPtr->entry.numBytes + byteCount + 1; char *new; int code; @@ -866,8 +866,8 @@ DeleteChars( return TCL_OK; } - byteIndex = Tcl_UtfAtIndex(string, index) - string; - byteCount = Tcl_UtfAtIndex(string+byteIndex, count) - (string+byteIndex); + byteIndex = TkUtfAtIndex(string, index) - string; + byteCount = TkUtfAtIndex(string+byteIndex, count) - (string+byteIndex); newByteCount = entryPtr->entry.numBytes + 1 - byteCount; new = ckalloc(newByteCount); diff --git a/macosx/tkMacOSXFont.c b/macosx/tkMacOSXFont.c index 7fc0113..70bb9e8 100644 --- a/macosx/tkMacOSXFont.c +++ b/macosx/tkMacOSXFont.c @@ -105,7 +105,7 @@ static void DrawCharsInContext(Display *display, Drawable drawable, * To avoid an extra copy, a TKNSString object wraps a Tcl_DString with an * NSString that uses the DString's buffer as its character buffer. It can be * constructed from a Tcl_DString and it has a DString property that handles - * converting from an NSString to a Tcl_DString + * converting from an NSString to a Tcl_DString. */ @implementation TKNSString @@ -1056,7 +1056,7 @@ TkpMeasureCharsInContext( [attributedString release]; [string release]; length = ceil(width - offset); - fit = (Tcl_UtfAtIndex(source, index) - source) - rangeStart; + fit = (TkUtfAtIndex(source, index) - source) - rangeStart; done: #ifdef TK_MAC_DEBUG_FONTS TkMacOSXDbgMsg("measure: source=\"%s\" range=\"%.*s\" maxLength=%d " -- cgit v0.12 From e2d53f617b2bc55da830e4b7ba566d920873e83e Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 19 May 2020 19:44:51 +0000 Subject: Use Glyph indexing in more places (underscoring and canvas text and some more) --- generic/tkCanvText.c | 14 +++++++------- generic/tkMessage.c | 4 ++-- generic/tkSelect.c | 6 +++--- generic/tkText.c | 14 +++++++------- generic/tkTextIndex.c | 10 +++++----- macosx/tkMacOSXFont.c | 8 ++++---- unix/tkUnixMenu.c | 2 +- win/tkWinMenu.c | 2 +- 8 files changed, 30 insertions(+), 30 deletions(-) diff --git a/generic/tkCanvText.c b/generic/tkCanvText.c index 1e58ce9..6a6de21 100644 --- a/generic/tkCanvText.c +++ b/generic/tkCanvText.c @@ -504,7 +504,7 @@ ConfigureText( */ textPtr->numBytes = strlen(textPtr->text); - textPtr->numChars = Tcl_NumUtfChars(textPtr->text, textPtr->numBytes); + textPtr->numChars = TkNumUtfChars(textPtr->text, textPtr->numBytes); if (textInfoPtr->selItemPtr == itemPtr) { if (textInfoPtr->selectFirst >= textPtr->numChars) { @@ -1025,7 +1025,7 @@ TextInsert( if (index > textPtr->numChars) { index = textPtr->numChars; } - byteIndex = Tcl_UtfAtIndex(text, index) - text; + byteIndex = TkUtfAtIndex(text, index) - text; byteCount = strlen(string); if (byteCount == 0) { return; @@ -1038,7 +1038,7 @@ TextInsert( ckfree(text); textPtr->text = newStr; - charsAdded = Tcl_NumUtfChars(string, byteCount); + charsAdded = TkNumUtfChars(string, byteCount); textPtr->numChars += charsAdded; textPtr->numBytes += byteCount; @@ -1108,8 +1108,8 @@ TextDeleteChars( } charsRemoved = last + 1 - first; - byteIndex = Tcl_UtfAtIndex(text, first) - text; - byteCount = Tcl_UtfAtIndex(text + byteIndex, charsRemoved) + byteIndex = TkUtfAtIndex(text, first) - text; + byteCount = TkUtfAtIndex(text + byteIndex, charsRemoved) - (text + byteIndex); newStr = ckalloc(textPtr->numBytes + 1 - byteCount); @@ -1497,8 +1497,8 @@ GetSelText( return 0; } text = textPtr->text; - selStart = Tcl_UtfAtIndex(text, textInfoPtr->selectFirst); - selEnd = Tcl_UtfAtIndex(selStart, + selStart = TkUtfAtIndex(text, textInfoPtr->selectFirst); + selEnd = TkUtfAtIndex(selStart, textInfoPtr->selectLast + 1 - textInfoPtr->selectFirst); byteCount = selEnd - selStart - offset; if (byteCount > maxBytes) { diff --git a/generic/tkMessage.c b/generic/tkMessage.c index 1a3c6de..f631ca5 100644 --- a/generic/tkMessage.c +++ b/generic/tkMessage.c @@ -491,7 +491,7 @@ ConfigureMessage( * be specified to Tk_ConfigureWidget. */ - msgPtr->numChars = Tcl_NumUtfChars(msgPtr->string, -1); + msgPtr->numChars = TkNumUtfChars(msgPtr->string, -1); if (msgPtr->highlightWidth < 0) { msgPtr->highlightWidth = 0; @@ -881,7 +881,7 @@ MessageTextVarProc( if (msgPtr->string != NULL) { ckfree(msgPtr->string); } - msgPtr->numChars = Tcl_NumUtfChars(value, -1); + msgPtr->numChars = TkNumUtfChars(value, -1); msgPtr->string = ckalloc(strlen(value) + 1); strcpy(msgPtr->string, value); ComputeMessageGeometry(msgPtr); diff --git a/generic/tkSelect.c b/generic/tkSelect.c index 5c71465..396ffe3 100644 --- a/generic/tkSelect.c +++ b/generic/tkSelect.c @@ -1397,15 +1397,15 @@ HandleTclCommand( if (cmdInfoPtr->interp != NULL) { if (length <= maxBytes) { - cmdInfoPtr->charOffset += Tcl_NumUtfChars(string, -1); + cmdInfoPtr->charOffset += TkNumUtfChars(string, -1); cmdInfoPtr->buffer[0] = '\0'; } else { - Tcl_UniChar ch = 0; + int ch; p = string; string += count; numChars = 0; while (p < string) { - p += Tcl_UtfToUniChar(p, &ch); + p += TkUtfToUniChar(p, &ch); numChars++; } cmdInfoPtr->charOffset += numChars; diff --git a/generic/tkText.c b/generic/tkText.c index b696647..a97f8cd 100644 --- a/generic/tkText.c +++ b/generic/tkText.c @@ -4056,12 +4056,12 @@ TextSearchIndexInLine( if (searchSpecPtr->exact) { index += leftToScan; } else { - index += Tcl_NumUtfChars(segPtr->body.chars, leftToScan); + index += TkNumUtfChars(segPtr->body.chars, leftToScan); } } else if (searchSpecPtr->exact) { index += segPtr->size; } else { - index += Tcl_NumUtfChars(segPtr->body.chars, -1); + index += TkNumUtfChars(segPtr->body.chars, -1); } } leftToScan -= segPtr->size; @@ -4256,7 +4256,7 @@ TextSearchFoundMatch( if (searchSpecPtr->exact) { const char *startOfLine = Tcl_GetString(theLine); - numChars = Tcl_NumUtfChars(startOfLine + matchOffset, matchLength); + numChars = TkNumUtfChars(startOfLine + matchOffset, matchLength); } else { numChars = matchLength; } @@ -4315,13 +4315,13 @@ TextSearchFoundMatch( if (searchSpecPtr->exact) { matchOffset += segPtr->size; } else { - matchOffset += Tcl_NumUtfChars(segPtr->body.chars, -1); + matchOffset += TkNumUtfChars(segPtr->body.chars, -1); } } else { if (searchSpecPtr->exact) { leftToScan -= segPtr->size; } else { - leftToScan -= Tcl_NumUtfChars(segPtr->body.chars, -1); + leftToScan -= TkNumUtfChars(segPtr->body.chars, -1); } } curIndex.byteIndex += segPtr->size; @@ -4406,13 +4406,13 @@ TextSearchFoundMatch( continue; } else if (!searchSpecPtr->searchElide && TkTextIsElided(textPtr, &curIndex, NULL)) { - numChars += Tcl_NumUtfChars(segPtr->body.chars, -1); + numChars += TkNumUtfChars(segPtr->body.chars, -1); continue; } if (searchSpecPtr->exact) { leftToScan -= segPtr->size; } else { - leftToScan -= Tcl_NumUtfChars(segPtr->body.chars, -1); + leftToScan -= TkNumUtfChars(segPtr->body.chars, -1); } } diff --git a/generic/tkTextIndex.c b/generic/tkTextIndex.c index 8f30b7d..ef6df0d 100644 --- a/generic/tkTextIndex.c +++ b/generic/tkTextIndex.c @@ -1053,7 +1053,7 @@ TkTextPrintIndex( break; } if (segPtr->typePtr == &tkTextCharType) { - charIndex += Tcl_NumUtfChars(segPtr->body.chars, segPtr->size); + charIndex += TkNumUtfChars(segPtr->body.chars, segPtr->size); } else { charIndex += segPtr->size; } @@ -1061,7 +1061,7 @@ TkTextPrintIndex( } if (segPtr->typePtr == &tkTextCharType) { - charIndex += Tcl_NumUtfChars(segPtr->body.chars, numBytes); + charIndex += TkNumUtfChars(segPtr->body.chars, numBytes); } else { charIndex += numBytes; } @@ -1858,11 +1858,11 @@ TkTextIndexCount( /* * This is a speed sensitive function, so run specially over * the string to count continuous ascii characters before - * resorting to the Tcl_NumUtfChars call. This is a long form + * resorting to the TkNumUtfChars call. This is a long form * of: * * stringPtr->numChars = - * Tcl_NumUtfChars(objPtr->bytes, objPtr->length); + * TkNumUtfChars(objPtr->bytes, objPtr->length); */ while (i && (*str < 0xC0)) { @@ -1871,7 +1871,7 @@ TkTextIndexCount( } count += byteLen - i; if (i) { - count += Tcl_NumUtfChars(segPtr->body.chars + byteOffset + count += TkNumUtfChars(segPtr->body.chars + byteOffset + (byteLen - i), i); } } else { diff --git a/macosx/tkMacOSXFont.c b/macosx/tkMacOSXFont.c index 70bb9e8..6f1ae77 100644 --- a/macosx/tkMacOSXFont.c +++ b/macosx/tkMacOSXFont.c @@ -954,8 +954,8 @@ TkpMeasureCharsInContext( attributes:fontPtr->nsAttributes]; typesetter = CTTypesetterCreateWithAttributedString( (CFAttributedStringRef)attributedString); - start = Tcl_NumUtfChars(source, rangeStart); - len = Tcl_NumUtfChars(source + rangeStart, rangeLength); + start = TkNumUtfChars(source, rangeStart); + len = TkNumUtfChars(source + rangeStart, rangeLength); if (start > 0) { range.length = start; line = CTTypesetterCreateLine(typesetter, range); @@ -1252,8 +1252,8 @@ DrawCharsInContext( -textX, -textY); } CGContextConcatCTM(context, t); - start = Tcl_NumUtfChars(source, rangeStart); - length = Tcl_NumUtfChars(source, rangeStart + rangeLength) - start; + start = TkNumUtfChars(source, rangeStart); + length = TkNumUtfChars(source, rangeStart + rangeLength) - start; line = CTTypesetterCreateLine(typesetter, CFRangeMake(start, length)); if (start > 0) { diff --git a/unix/tkUnixMenu.c b/unix/tkUnixMenu.c index 40e3f94..aa54897 100644 --- a/unix/tkUnixMenu.c +++ b/unix/tkUnixMenu.c @@ -863,7 +863,7 @@ DrawMenuUnderline( const char *label, *start, *end; label = Tcl_GetString(mePtr->labelPtr); - start = Tcl_UtfAtIndex(label, mePtr->underline); + start = TkUtfAtIndex(label, mePtr->underline); end = start + TkUtfToUniChar(start, &ch); Tk_GetPixelsFromObj(NULL, menuPtr->tkwin, diff --git a/win/tkWinMenu.c b/win/tkWinMenu.c index 1f345ff..6c55164 100644 --- a/win/tkWinMenu.c +++ b/win/tkWinMenu.c @@ -2138,7 +2138,7 @@ DrawMenuUnderline( int ch; label = Tcl_GetString(mePtr->labelPtr); - start = Tcl_UtfAtIndex(label, mePtr->underline); + start = TkUtfAtIndex(label, mePtr->underline); end = start + TkUtfToUniChar(start, &ch); Tk_UnderlineChars(menuPtr->display, d, gc, tkfont, label, x + mePtr->indicatorSpace, -- cgit v0.12 From 5f29a0196dbc94ae23df0ff6d5d9b5d1ffbd7d7f Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 19 May 2020 20:10:17 +0000 Subject: Little variation on bug-a179564826, in which Character indexing is kept, but with surrogate protection --- generic/tkInt.h | 3 +-- generic/tkUtil.c | 71 ++++---------------------------------------------------- 2 files changed, 6 insertions(+), 68 deletions(-) diff --git a/generic/tkInt.h b/generic/tkInt.h index a6304f8..c27bede 100644 --- a/generic/tkInt.h +++ b/generic/tkInt.h @@ -1287,19 +1287,18 @@ MODULE_SCOPE void TkUnixSetXftClipRegion(TkRegion clipRegion); # define c_class class #endif +#define TkNumUtfChars Tcl_NumUtfChars #if TCL_UTF_MAX > 4 # define TkUtfToUniChar Tcl_UtfToUniChar # define TkUniCharToUtf Tcl_UniCharToUtf # define TkUtfPrev Tcl_UtfPrev # define TkUtfAtIndex Tcl_UtfAtIndex -# define TkNumUtfChars Tcl_NumUtfChars # define TkUtfCharComplete Tcl_UtfCharComplete #else MODULE_SCOPE int TkUtfToUniChar(const char *, int *); MODULE_SCOPE int TkUniCharToUtf(int, char *); MODULE_SCOPE const char *TkUtfPrev(const char *, const char *); MODULE_SCOPE const char *TkUtfAtIndex(const char *src, int index); - MODULE_SCOPE int TkNumUtfChars(const char *src, int length); # define TkUtfCharComplete(src, length) (((unsigned)(UCHAR(*(src)) - 0xF0) < 5) \ ? ((length) >= 4) : (UCHAR(*(src)) == 0xED) ? ((length) >= 6) : Tcl_UtfCharComplete((src), (length))) #endif diff --git a/generic/tkUtil.c b/generic/tkUtil.c index e055b0d..172bf23 100644 --- a/generic/tkUtil.c +++ b/generic/tkUtil.c @@ -1308,8 +1308,7 @@ TkUtfPrev( * TkUtfAtIndex -- * * Returns a pointer to the specified character (not byte) position in - * a CESU-8 string. That is, a pair of CESU-8 encoded surrogates counts - * as a single character. + * a CESU-8 string. This will never point at a low surrogate. * * Results: * As above. @@ -1325,72 +1324,12 @@ TkUtfAtIndex( const char *src, /* The UTF-8 string. */ int index) /* The position of the desired character. */ { - int len = 0; int ch; - - while (index-- > 0) { - len = TkUtfToUniChar(src, &ch); - src += len; + const char *p = Tcl_UtfAtIndex(src, index); + if ((p > src) && (UCHAR(p[-1]) > 0xF0)) { + return p + TkUtfToUniChar(p - 1, &ch); } - return src; -} - -/* - *--------------------------------------------------------------------------- - * - * TkNumUtfChars -- - * - * Returns the number of characters (not bytes) in the UTF-8 string, not - * including the terminating NULL byte. This differs from Tcl_NumUtfChars - * in that a pair of CESU-8 encoded surrogates counts as one unicode - * character. - * - * Results: - * As above. - * - * Side effects: - * None. - * - *--------------------------------------------------------------------------- - */ - -int -TkNumUtfChars( - const char *src, /* The UTF-8 string to measure. */ - int length) /* The length of the string in bytes, or -1 - * for strlen(string). */ -{ - int ch; - int i = 0; - Tcl_UniChar ch2 = 0; - - if (length < 0) { - /* string is NUL-terminated, so TclUtfToUniChar calls are safe. */ - while ((*src != '\0') && (i < INT_MAX)) { - src += TkUtfToUniChar(src, &ch); - i++; - } - } else { - /* No need to call TkUtfCharComplete() up to endPtr */ - const char *endPtr = src + length - 6; - while (src < endPtr) { - src += TkUtfToUniChar(src, &ch); - i++; - } - /* Pointer to the end of string. Never read endPtr[0] */ - endPtr += 6; - while (src < endPtr) { - if (TkUtfCharComplete(src, endPtr - src)) { - src += TkUtfToUniChar(src, &ch); - } else if (Tcl_UtfCharComplete(src, endPtr - src)) { - src += Tcl_UtfToUniChar(src, &ch2); - } else { - src++; - } - i++; - } - } - return i; + return p; } #endif -- cgit v0.12 From 5e2cf435c8008214b25114de4509a7facd345dbd Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 19 May 2020 20:13:03 +0000 Subject: > ... >= --- generic/tkUtil.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/tkUtil.c b/generic/tkUtil.c index 172bf23..8d2f42e 100644 --- a/generic/tkUtil.c +++ b/generic/tkUtil.c @@ -1326,7 +1326,7 @@ TkUtfAtIndex( { int ch; const char *p = Tcl_UtfAtIndex(src, index); - if ((p > src) && (UCHAR(p[-1]) > 0xF0)) { + if ((p > src) && (UCHAR(p[-1]) >= 0xF0)) { return p + TkUtfToUniChar(p - 1, &ch); } return p; -- cgit v0.12 From 75d83ece93c7c0f3a2a3c8e3a925b35353ccddeb Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 20 May 2020 10:10:40 +0000 Subject: Fix TkUtfAtIndex(), still was not 100% correct. Simplify TKNSString::DString, since Tcl_UniCharToUtf() is already capable of surrogate handling with a proper Tcl version. --- generic/tkUtil.c | 3 ++- macosx/tkMacOSXBitmap.c | 1 - macosx/tkMacOSXFont.c | 29 ++++++----------------------- macosx/tkMacOSXPrivate.h | 2 +- 4 files changed, 9 insertions(+), 26 deletions(-) diff --git a/generic/tkUtil.c b/generic/tkUtil.c index 8d2f42e..17ba443 100644 --- a/generic/tkUtil.c +++ b/generic/tkUtil.c @@ -1327,7 +1327,8 @@ TkUtfAtIndex( int ch; const char *p = Tcl_UtfAtIndex(src, index); if ((p > src) && (UCHAR(p[-1]) >= 0xF0)) { - return p + TkUtfToUniChar(p - 1, &ch); + --p; + return p + TkUtfToUniChar(p, &ch); } return p; } diff --git a/macosx/tkMacOSXBitmap.c b/macosx/tkMacOSXBitmap.c index 615192b..2b08235 100644 --- a/macosx/tkMacOSXBitmap.c +++ b/macosx/tkMacOSXBitmap.c @@ -317,7 +317,6 @@ TkpGetNativeAppBitmap( OSType iconType; if (OSTypeFromString(name, &iconType) == TCL_OK) { NSString *iconUTI = OSTYPE_TO_UTI(iconType); - printf("Found image for UTI %s\n", iconUTI.UTF8String); NSImage *iconImage = [[NSWorkspace sharedWorkspace] iconForFileType: iconUTI]; pixmap = PixmapFromImage(display, iconImage, NSSizeToCGSize(size)); diff --git a/macosx/tkMacOSXFont.c b/macosx/tkMacOSXFont.c index 6f1ae77..44e25d2 100644 --- a/macosx/tkMacOSXFont.c +++ b/macosx/tkMacOSXFont.c @@ -133,7 +133,6 @@ static void DrawCharsInContext(Display *display, Drawable drawable, _string = [[NSString alloc] initWithString:aString]; self.UTF8String = _string.UTF8String; } - printf("Initialized with string %s\n", self.UTF8String); return self; } @@ -166,32 +165,16 @@ static void DrawCharsInContext(Display *display, Drawable drawable, * The DString has not been initialized. Construct it from * our string's unicode characters. */ - - char buffer[2*TCL_UTF_MAX]; - unsigned int index, length, ch; + char *p; + int index; Tcl_DStringInit(&_ds); -#if TCL_UTF_MAX == 3 - for (index = 0; index < [_string length]; index++) { - UniChar uni = [_string characterAtIndex: index]; - - if (CFStringIsSurrogateHighCharacter(uni)) { - UniChar low = [_string characterAtIndex: ++index]; - ch = CFStringGetLongCharacterForSurrogatePair(uni, low); - } else { - ch = uni; - } - length = TkUniCharToUtf(ch, buffer); - Tcl_DStringAppend(&_ds, buffer, length); - } -#else + Tcl_DStringSetLength(&_ds, 3 * [_string length]); + p = Tcl_DStringValue(&_ds); for (index = 0; index < [_string length]; index++) { - ch = (int) [_string characterAtIndex: index]; - length = Tcl_UniCharToUtf(ch, buffer); - Tcl_DStringAppend(&_ds, buffer, length); + p += Tcl_UniCharToUtf([_string characterAtIndex: index], p); } - -#endif + Tcl_DStringSetLength(&_ds, p - Tcl_DStringValue(&_ds)); } return _ds; } diff --git a/macosx/tkMacOSXPrivate.h b/macosx/tkMacOSXPrivate.h index a0645f7..be69fcd 100644 --- a/macosx/tkMacOSXPrivate.h +++ b/macosx/tkMacOSXPrivate.h @@ -527,7 +527,7 @@ VISIBILITY_HIDDEN * byte sequence as initial data. So we add a new class which does provide * such a constructor. It also has a DString property which is a DString whose * string pointer is a byte sequence encoding the NSString with the current Tk - * encoding, namely UTF-8 if TCL_MAX >= 4 or CESU-8 if TCL_MAX = 3. + * encoding, namely UTF-8 if TCL_UTF_MAX >= 4 or CESU-8 if TCL_UTF_MAX = 3. * *--------------------------------------------------------------------------- */ -- cgit v0.12