summaryrefslogtreecommitdiffstats
path: root/generic
diff options
context:
space:
mode:
Diffstat (limited to 'generic')
-rw-r--r--generic/tkEntry.c16
-rw-r--r--generic/tkFont.c47
-rw-r--r--generic/tkInt.h6
-rw-r--r--generic/tkText.c10
-rw-r--r--generic/tkTextDisp.c4
-rw-r--r--generic/tkTextIndex.c16
-rw-r--r--generic/tkUtil.c99
-rw-r--r--generic/ttk/ttkEntry.c16
8 files changed, 41 insertions, 173 deletions
diff --git a/generic/tkEntry.c b/generic/tkEntry.c
index 5faf4ef..a66cf18 100644
--- a/generic/tkEntry.c
+++ b/generic/tkEntry.c
@@ -1926,6 +1926,7 @@ EntryComputeGeometry(
*/
if (entryPtr->showChar != NULL) {
+ Tcl_UniChar ch;
char buf[4];
int size;
@@ -1935,15 +1936,8 @@ EntryComputeGeometry(
* characters might end up looking like one valid UTF character in the
* resulting string.
*/
-#if TCL_UTF_MAX == 4
- int ch;
-
- TkUtfToUniChar32(entryPtr->showChar, &ch);
-#else
- Tcl_UniChar ch;
Tcl_UtfToUniChar(entryPtr->showChar, &ch);
-#endif
size = Tcl_UniCharToUtf(ch, buf);
entryPtr->numDisplayBytes = entryPtr->numChars * size;
@@ -3420,11 +3414,7 @@ ExpandPercents(
* list element. */
int number, length;
register const char *string;
-#if TCL_UTF_MAX == 4
- int ch;
-#else
Tcl_UniChar ch;
-#endif
char numStorage[2*TCL_INTEGER_SPACE];
while (1) {
@@ -3457,11 +3447,7 @@ ExpandPercents(
before++; /* skip over % */
if (*before != '\0') {
-#if TCL_UTF_MAX == 4
- before += TkUtfToUniChar32(before, &ch);
-#else
before += Tcl_UtfToUniChar(before, &ch);
-#endif
} else {
ch = '%';
}
diff --git a/generic/tkFont.c b/generic/tkFont.c
index 3088959..4a45691 100644
--- a/generic/tkFont.c
+++ b/generic/tkFont.c
@@ -497,11 +497,7 @@ Tk_FontObjCmd(
const char *s;
Tk_Font tkfont;
Tcl_Obj *optPtr, *charPtr, *resultPtr;
-#if TCL_UTF_MAX == 4
int uniChar = 0;
-#else
- Tcl_UniChar uniChar = 0;
-#endif
const TkFontAttributes *faPtr;
TkFontAttributes fa;
@@ -566,32 +562,19 @@ Tk_FontObjCmd(
*/
if (charPtr != NULL) {
-#if TCL_UTF_MAX == 4
- Tcl_UniChar *ucPtr;
-#endif
+ const char *string = Tcl_GetString(charPtr);
+ int len = TkUtfToUniChar2(string, &uniChar);
- if (Tcl_GetCharLength(charPtr) != 1) {
+ if (len != charPtr->length) {
resultPtr = Tcl_NewStringObj(
"expected a single character but got \"", -1);
- Tcl_AppendLimitedToObj(resultPtr, Tcl_GetString(charPtr),
+ Tcl_AppendLimitedToObj(resultPtr, string,
-1, 40, "...");
Tcl_AppendToObj(resultPtr, "\"", -1);
Tcl_SetObjResult(interp, resultPtr);
Tcl_SetErrorCode(interp, "TK", "VALUE", "FONT_SAMPLE", NULL);
return TCL_ERROR;
}
-#if TCL_UTF_MAX == 4
- ucPtr = Tcl_GetUnicodeFromObj(charPtr, NULL);
- uniChar = *ucPtr;
- if (((uniChar & 0xFC00) == 0xD800) && (ucPtr[1] != 0x000)) {
- if ((ucPtr[1] & 0xFC00) == 0xDC00) {
- uniChar = ((uniChar & 0x3FF) << 10) + (ucPtr[1] & 0x3FF);
- uniChar += 0x10000;
- }
- }
-#else
- uniChar = Tcl_GetUniChar(charPtr, 0);
-#endif
}
/*
@@ -1713,11 +1696,7 @@ Tk_PostscriptFontName(
} else if (strcasecmp(family, "ZapfDingbats") == 0) {
family = "ZapfDingbats";
} else {
-#if TCL_UTF_MAX == 4
int ch;
-#else
- Tcl_UniChar ch;
-#endif
/*
* Inline, capitalize the first letter of each word, lowercase the
@@ -1735,11 +1714,7 @@ Tk_PostscriptFontName(
src++;
upper = 1;
}
-#if TCL_UTF_MAX == 4
- src += TkUtfToUniChar32(src, &ch);
-#else
- src += Tcl_UtfToUniChar(src, &ch);
-#endif
+ src += TkUtfToUniChar2(src, &ch);
if (upper) {
ch = (Tcl_UniChar) Tcl_UniCharToUpper(ch);
upper = 0;
@@ -3276,11 +3251,7 @@ Tk_TextLayoutToPostscript(
int i, j, len;
const char *p, *glyphname;
char uindex[5], c, *ps;
-#if TCL_UTF_MAX == 4
int ch;
-#else
- Tcl_UniChar ch;
-#endif
Tcl_AppendToObj(psObj, "[(", -1);
for (i = 0; i < layoutPtr->numChunks; i++, chunkPtr++) {
@@ -3303,11 +3274,7 @@ Tk_TextLayoutToPostscript(
* international postscript fonts.
*/
-#if TCL_UTF_MAX == 4
- p += TkUtfToUniChar32(p, &ch);
-#else
- p += Tcl_UtfToUniChar(p, &ch);
-#endif
+ p += TkUtfToUniChar2(p, &ch);
if ((ch == '(') || (ch == ')') || (ch == '\\') || (ch < 0x20)) {
/*
* Tricky point: the "03" is necessary in the sprintf below,
@@ -3333,11 +3300,9 @@ Tk_TextLayoutToPostscript(
* use the full glyph name.
*/
-#if TCL_UTF_MAX > 3
if (ch > 0xffff) {
goto noMapping;
}
-#endif
sprintf(uindex, "%04X", ch); /* endianness? */
glyphname = Tcl_GetVar2(interp, "::tk::psglyphs", uindex, 0);
if (glyphname) {
diff --git a/generic/tkInt.h b/generic/tkInt.h
index 367ef3a..6d86e08 100644
--- a/generic/tkInt.h
+++ b/generic/tkInt.h
@@ -1232,8 +1232,10 @@ MODULE_SCOPE Status TkParseColor (Display * display,
MODULE_SCOPE void TkUnixSetXftClipRegion(TkRegion clipRegion);
#endif
-#if TCL_UTF_MAX == 4
-MODULE_SCOPE int TkUtfToUniChar32(const char *src, int *chPtr);
+#if TCL_UTF_MAX > 4
+# define TkUtfToUniChar2 Tcl_UtfToUniChar
+#else
+ MODULE_SCOPE int TkUtfToUniChar2(const char *src, int *chPtr);
#endif
/*
diff --git a/generic/tkText.c b/generic/tkText.c
index 834e842..dacadbe 100644
--- a/generic/tkText.c
+++ b/generic/tkText.c
@@ -4508,11 +4508,7 @@ TkTextGetTabs(
Tcl_Obj **objv;
TkTextTabArray *tabArrayPtr;
TkTextTab *tabPtr;
-#if TCL_UTF_MAX == 4
int ch;
-#else
- Tcl_UniChar ch;
-#endif
double prevStop, lastStop;
/*
* Map these strings to TkTextTabAlign values.
@@ -4619,11 +4615,7 @@ TkTextGetTabs(
* There may be a more efficient way of getting this.
*/
-#if TCL_UTF_MAX == 4
- TkUtfToUniChar32(Tcl_GetString(objv[i+1]), &ch);
-#else
- Tcl_UtfToUniChar(Tcl_GetString(objv[i+1]), &ch);
-#endif
+ TkUtfToUniChar2(Tcl_GetString(objv[i+1]), &ch);
if (!Tcl_UniCharIsAlpha(ch)) {
continue;
}
diff --git a/generic/tkTextDisp.c b/generic/tkTextDisp.c
index 5faab36..026023e 100644
--- a/generic/tkTextDisp.c
+++ b/generic/tkTextDisp.c
@@ -7581,8 +7581,8 @@ TkTextCharLayoutProc(
if (bytesThatFit < maxBytes) {
if ((bytesThatFit == 0) && noCharsYet) {
- Tcl_UniChar ch;
- int chLen = Tcl_UtfToUniChar(p, &ch);
+ int ch;
+ int chLen = TkUtfToUniChar2(p, &ch);
#if TK_LAYOUT_WITH_BASE_CHUNKS
bytesThatFit = CharChunkMeasureChars(chunkPtr, line,
diff --git a/generic/tkTextIndex.c b/generic/tkTextIndex.c
index f64a6d2..b794cdb 100644
--- a/generic/tkTextIndex.c
+++ b/generic/tkTextIndex.c
@@ -2298,13 +2298,9 @@ StartEnd(
int chSize = 1;
if (segPtr->typePtr == &tkTextCharType) {
-#if TCL_UTF_MAX == 4
int ch;
- chSize = TkUtfToUniChar32(segPtr->body.chars + offset, &ch);
-#else
- Tcl_UniChar ch;
- chSize = Tcl_UtfToUniChar(segPtr->body.chars + offset, &ch);
-#endif
+
+ chSize = TkUtfToUniChar2(segPtr->body.chars + offset, &ch);
if (!Tcl_UniCharIsWordChar(ch)) {
break;
}
@@ -2347,13 +2343,9 @@ StartEnd(
int chSize = 1;
if (segPtr->typePtr == &tkTextCharType) {
-#if TCL_UTF_MAX == 4
+
int ch;
- TkUtfToUniChar32(segPtr->body.chars + offset, &ch);
-#else
- Tcl_UniChar ch;
- Tcl_UtfToUniChar(segPtr->body.chars + offset, &ch);
-#endif
+ TkUtfToUniChar2(segPtr->body.chars + offset, &ch);
if (!Tcl_UniCharIsWordChar(ch)) {
break;
}
diff --git a/generic/tkUtil.c b/generic/tkUtil.c
index fb796fd..a266cb3 100644
--- a/generic/tkUtil.c
+++ b/generic/tkUtil.c
@@ -1193,24 +1193,15 @@ TkSendVirtualEvent(
Tk_QueueWindowEvent(&event.general, TCL_QUEUE_TAIL);
}
-#if TCL_UTF_MAX == 4
+#if TCL_UTF_MAX <= 4
/*
*---------------------------------------------------------------------------
*
- * TkUtfToUniChar32 --
+ * TkUtfToUniChar2 --
*
- * Copied from Tcl_UtfToUniChar but using int instead of Tcl_UniChar!
- *
- * Extract the Tcl_UniChar represented by the UTF-8 string. Bad UTF-8
- * sequences are converted to valid Tcl_UniChars and processing
- * continues. Equivalent to Plan 9 chartorune().
- *
- * The caller must ensure that the source buffer is long enough that this
- * routine does not run off the end and dereference non-existent memory
- * looking for trail bytes. If the source buffer is known to be '\0'
- * terminated, this cannot happen. Otherwise, the caller should call
- * Tcl_UtfCharComplete() before calling this routine to ensure that
- * enough bytes remain in the string.
+ * Almost the same as Tcl_UtfToUniChar but using int instead of Tcl_UniChar.
+ * This function is capable of collapsing a upper/lower pair to a single
+ * unicode character. So, up to 6 bytes (two UTF-8 characters) might be read.
*
* Results:
* *chPtr is filled with the Tcl_UniChar, and the return value is the
@@ -1223,75 +1214,29 @@ TkSendVirtualEvent(
*/
int
-TkUtfToUniChar32(
+TkUtfToUniChar2(
const char *src, /* The UTF-8 string. */
int *chPtr) /* Filled with the Tcl_UniChar represented by
* the UTF-8 string. */
{
- int byte;
-
- /*
- * Unroll 1 to 3 byte UTF-8 sequences, use loop to handle longer ones.
- */
-
- byte = *((unsigned char *) src);
- if (byte < 0xC0) {
- /*
- * Handles properly formed UTF-8 characters between 0x01 and 0x7F.
- * Also treats \0 and naked trail bytes 0x80 to 0xBF as valid
- * characters representing themselves.
- */
-
- *chPtr = byte;
- return 1;
- } else if (byte < 0xE0) {
- if ((src[1] & 0xC0) == 0x80) {
- /*
- * Two-byte-character lead-byte followed by a trail-byte.
- */
-
- *chPtr = ((byte & 0x1F) << 6) | (src[1] & 0x3F);
- return 2;
+ Tcl_UniChar uniChar = 0;
+
+ int len = Tcl_UtfToUniChar(src, &uniChar);
+ if ((uniChar & 0xfc00) == 0xd800) {
+ Tcl_UniChar high = uniChar;
+ /* This can only happen when Tcl is compiled with TCL_UTF_MAX=4,
+ * or when a high surrogate character is detected */
+ int len2 = Tcl_UtfToUniChar(src+len, &uniChar);
+ if ((uniChar & 0xfc00) == 0xdc00) {
+ *chPtr = ((high & 0x3ff) << 10) | (uniChar & 0x3ff) | 0x10000;
+ len += len2;
+ } else {
+ *chPtr = high;
}
-
- /*
- * A two-byte-character lead-byte not followed by trail-byte
- * represents itself.
- */
- } else if (byte < 0xF0) {
- if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80)) {
- /*
- * Three-byte-character lead byte followed by two trail bytes.
- */
-
- *chPtr = ((byte & 0x0F) << 12)
- | ((src[1] & 0x3F) << 6) | (src[2] & 0x3F);
- return 3;
- }
-
- /*
- * A three-byte-character lead-byte not followed by two trail-bytes
- * represents itself.
- */
- } else if (byte < 0xF8) {
- if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80) && ((src[3] & 0xC0) == 0x80)) {
- /*
- * Four-byte-character lead byte followed by three trail bytes.
- */
-
- *chPtr = ((byte & 0x0E) << 18) | ((src[1] & 0x3F) << 12)
- | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F);
- return 4;
- }
-
- /*
- * A three-byte-character lead-byte not followed by two trail-bytes
- * represents itself.
- */
+ } else {
+ *chPtr = uniChar;
}
-
- *chPtr = byte;
- return 1;
+ return len;
}
#endif
/*
diff --git a/generic/ttk/ttkEntry.c b/generic/ttk/ttkEntry.c
index 63ebc5f..d80e1fd 100644
--- a/generic/ttk/ttkEntry.c
+++ b/generic/ttk/ttkEntry.c
@@ -282,16 +282,10 @@ static char *EntryDisplayString(const char *showChar, int numChars)
{
char *displayString, *p;
int size;
- char buf[4];
-#if TCL_UTF_MAX == 4
- int ch;
-
- TkUtfToUniChar32(showChar, &ch);
-#else
Tcl_UniChar ch;
+ char buf[4];
Tcl_UtfToUniChar(showChar, &ch);
-#endif
size = Tcl_UniCharToUtf(ch, buf);
p = displayString = ckalloc(numChars * size + 1);
@@ -412,11 +406,7 @@ ExpandPercents(
int number, length;
const char *string;
int stringLength;
-#if TCL_UTF_MAX == 4
- int ch;
-#else
Tcl_UniChar ch;
-#endif
char numStorage[2*TCL_INTEGER_SPACE];
while (*template) {
@@ -440,11 +430,7 @@ ExpandPercents(
*/
++template; /* skip over % */
if (*template != '\0') {
-#if TCL_UTF_MAX == 4
- template += TkUtfToUniChar32(template, &ch);
-#else
template += Tcl_UtfToUniChar(template, &ch);
-#endif
} else {
ch = '%';
}