diff options
-rw-r--r-- | generic/tkInt.h | 3 | ||||
-rw-r--r-- | generic/tkUtil.c | 71 |
2 files changed, 6 insertions, 68 deletions
diff --git a/generic/tkInt.h b/generic/tkInt.h index a6304f8..c27bede 100644 --- a/generic/tkInt.h +++ b/generic/tkInt.h @@ -1287,19 +1287,18 @@ MODULE_SCOPE void TkUnixSetXftClipRegion(TkRegion clipRegion); # define c_class class #endif +#define TkNumUtfChars Tcl_NumUtfChars #if TCL_UTF_MAX > 4 # define TkUtfToUniChar Tcl_UtfToUniChar # define TkUniCharToUtf Tcl_UniCharToUtf # define TkUtfPrev Tcl_UtfPrev # define TkUtfAtIndex Tcl_UtfAtIndex -# define TkNumUtfChars Tcl_NumUtfChars # define TkUtfCharComplete Tcl_UtfCharComplete #else MODULE_SCOPE int TkUtfToUniChar(const char *, int *); MODULE_SCOPE int TkUniCharToUtf(int, char *); MODULE_SCOPE const char *TkUtfPrev(const char *, const char *); MODULE_SCOPE const char *TkUtfAtIndex(const char *src, int index); - MODULE_SCOPE int TkNumUtfChars(const char *src, int length); # define TkUtfCharComplete(src, length) (((unsigned)(UCHAR(*(src)) - 0xF0) < 5) \ ? ((length) >= 4) : (UCHAR(*(src)) == 0xED) ? ((length) >= 6) : Tcl_UtfCharComplete((src), (length))) #endif diff --git a/generic/tkUtil.c b/generic/tkUtil.c index e055b0d..172bf23 100644 --- a/generic/tkUtil.c +++ b/generic/tkUtil.c @@ -1308,8 +1308,7 @@ TkUtfPrev( * TkUtfAtIndex -- * * Returns a pointer to the specified character (not byte) position in - * a CESU-8 string. That is, a pair of CESU-8 encoded surrogates counts - * as a single character. + * a CESU-8 string. This will never point at a low surrogate. * * Results: * As above. @@ -1325,72 +1324,12 @@ TkUtfAtIndex( const char *src, /* The UTF-8 string. */ int index) /* The position of the desired character. */ { - int len = 0; int ch; - - while (index-- > 0) { - len = TkUtfToUniChar(src, &ch); - src += len; + const char *p = Tcl_UtfAtIndex(src, index); + if ((p > src) && (UCHAR(p[-1]) > 0xF0)) { + return p + TkUtfToUniChar(p - 1, &ch); } - return src; -} - -/* - *--------------------------------------------------------------------------- - * - * TkNumUtfChars -- - * - * Returns the number of characters (not bytes) in the UTF-8 string, not - * including the terminating NULL byte. This differs from Tcl_NumUtfChars - * in that a pair of CESU-8 encoded surrogates counts as one unicode - * character. - * - * Results: - * As above. - * - * Side effects: - * None. - * - *--------------------------------------------------------------------------- - */ - -int -TkNumUtfChars( - const char *src, /* The UTF-8 string to measure. */ - int length) /* The length of the string in bytes, or -1 - * for strlen(string). */ -{ - int ch; - int i = 0; - Tcl_UniChar ch2 = 0; - - if (length < 0) { - /* string is NUL-terminated, so TclUtfToUniChar calls are safe. */ - while ((*src != '\0') && (i < INT_MAX)) { - src += TkUtfToUniChar(src, &ch); - i++; - } - } else { - /* No need to call TkUtfCharComplete() up to endPtr */ - const char *endPtr = src + length - 6; - while (src < endPtr) { - src += TkUtfToUniChar(src, &ch); - i++; - } - /* Pointer to the end of string. Never read endPtr[0] */ - endPtr += 6; - while (src < endPtr) { - if (TkUtfCharComplete(src, endPtr - src)) { - src += TkUtfToUniChar(src, &ch); - } else if (Tcl_UtfCharComplete(src, endPtr - src)) { - src += Tcl_UtfToUniChar(src, &ch2); - } else { - src++; - } - i++; - } - } - return i; + return p; } #endif |