summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--generic/tkInt.h3
-rw-r--r--generic/tkUtil.c71
2 files changed, 6 insertions, 68 deletions
diff --git a/generic/tkInt.h b/generic/tkInt.h
index a6304f8..c27bede 100644
--- a/generic/tkInt.h
+++ b/generic/tkInt.h
@@ -1287,19 +1287,18 @@ MODULE_SCOPE void TkUnixSetXftClipRegion(TkRegion clipRegion);
# define c_class class
#endif
+#define TkNumUtfChars Tcl_NumUtfChars
#if TCL_UTF_MAX > 4
# define TkUtfToUniChar Tcl_UtfToUniChar
# define TkUniCharToUtf Tcl_UniCharToUtf
# define TkUtfPrev Tcl_UtfPrev
# define TkUtfAtIndex Tcl_UtfAtIndex
-# define TkNumUtfChars Tcl_NumUtfChars
# define TkUtfCharComplete Tcl_UtfCharComplete
#else
MODULE_SCOPE int TkUtfToUniChar(const char *, int *);
MODULE_SCOPE int TkUniCharToUtf(int, char *);
MODULE_SCOPE const char *TkUtfPrev(const char *, const char *);
MODULE_SCOPE const char *TkUtfAtIndex(const char *src, int index);
- MODULE_SCOPE int TkNumUtfChars(const char *src, int length);
# define TkUtfCharComplete(src, length) (((unsigned)(UCHAR(*(src)) - 0xF0) < 5) \
? ((length) >= 4) : (UCHAR(*(src)) == 0xED) ? ((length) >= 6) : Tcl_UtfCharComplete((src), (length)))
#endif
diff --git a/generic/tkUtil.c b/generic/tkUtil.c
index e055b0d..172bf23 100644
--- a/generic/tkUtil.c
+++ b/generic/tkUtil.c
@@ -1308,8 +1308,7 @@ TkUtfPrev(
* TkUtfAtIndex --
*
* Returns a pointer to the specified character (not byte) position in
- * a CESU-8 string. That is, a pair of CESU-8 encoded surrogates counts
- * as a single character.
+ * a CESU-8 string. This will never point at a low surrogate.
*
* Results:
* As above.
@@ -1325,72 +1324,12 @@ TkUtfAtIndex(
const char *src, /* The UTF-8 string. */
int index) /* The position of the desired character. */
{
- int len = 0;
int ch;
-
- while (index-- > 0) {
- len = TkUtfToUniChar(src, &ch);
- src += len;
+ const char *p = Tcl_UtfAtIndex(src, index);
+ if ((p > src) && (UCHAR(p[-1]) > 0xF0)) {
+ return p + TkUtfToUniChar(p - 1, &ch);
}
- return src;
-}
-
-/*
- *---------------------------------------------------------------------------
- *
- * TkNumUtfChars --
- *
- * Returns the number of characters (not bytes) in the UTF-8 string, not
- * including the terminating NULL byte. This differs from Tcl_NumUtfChars
- * in that a pair of CESU-8 encoded surrogates counts as one unicode
- * character.
- *
- * Results:
- * As above.
- *
- * Side effects:
- * None.
- *
- *---------------------------------------------------------------------------
- */
-
-int
-TkNumUtfChars(
- const char *src, /* The UTF-8 string to measure. */
- int length) /* The length of the string in bytes, or -1
- * for strlen(string). */
-{
- int ch;
- int i = 0;
- Tcl_UniChar ch2 = 0;
-
- if (length < 0) {
- /* string is NUL-terminated, so TclUtfToUniChar calls are safe. */
- while ((*src != '\0') && (i < INT_MAX)) {
- src += TkUtfToUniChar(src, &ch);
- i++;
- }
- } else {
- /* No need to call TkUtfCharComplete() up to endPtr */
- const char *endPtr = src + length - 6;
- while (src < endPtr) {
- src += TkUtfToUniChar(src, &ch);
- i++;
- }
- /* Pointer to the end of string. Never read endPtr[0] */
- endPtr += 6;
- while (src < endPtr) {
- if (TkUtfCharComplete(src, endPtr - src)) {
- src += TkUtfToUniChar(src, &ch);
- } else if (Tcl_UtfCharComplete(src, endPtr - src)) {
- src += Tcl_UtfToUniChar(src, &ch2);
- } else {
- src++;
- }
- i++;
- }
- }
- return i;
+ return p;
}
#endif