summaryrefslogtreecommitdiffstats
path: root/generic
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2016-09-19 12:05:28 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2016-09-19 12:05:28 (GMT)
commit06f3173fc0a3402010291caa151baa8fef3e5a9d (patch)
treecdabe9c60a5c8e415cbb4f7f05dd9b2b59372edf /generic
parentb5deacd1f7c7553a45b73fffe25053df18895a8c (diff)
parentbdb580aa2563588e6e1f5090bb9b89369e2f2d4f (diff)
downloadtk-06f3173fc0a3402010291caa151baa8fef3e5a9d.zip
tk-06f3173fc0a3402010291caa151baa8fef3e5a9d.tar.gz
tk-06f3173fc0a3402010291caa151baa8fef3e5a9d.tar.bz2
Enhanced proposed fix [6c0d7aec67]: unicode text input Windows 8. More places where Unicode surrogate handling could be improved.
Diffstat (limited to 'generic')
-rw-r--r--generic/tkEntry.c8
-rw-r--r--generic/tkFont.c22
-rw-r--r--generic/tkInt.h10
-rw-r--r--generic/tkText.c8
-rw-r--r--generic/tkTextDisp.c4
-rw-r--r--generic/tkTextIndex.c8
-rw-r--r--generic/tkUtil.c80
-rw-r--r--generic/ttk/ttkEntry.c14
8 files changed, 124 insertions, 30 deletions
diff --git a/generic/tkEntry.c b/generic/tkEntry.c
index c0ce47b..ff3f134 100644
--- a/generic/tkEntry.c
+++ b/generic/tkEntry.c
@@ -1924,8 +1924,8 @@ EntryComputeGeometry(
*/
if (entryPtr->showChar != NULL) {
- Tcl_UniChar ch;
- char buf[TCL_UTF_MAX];
+ int ch;
+ char buf[6];
int size;
/*
@@ -1935,8 +1935,8 @@ EntryComputeGeometry(
* resulting string.
*/
- Tcl_UtfToUniChar(entryPtr->showChar, &ch);
- size = Tcl_UniCharToUtf(ch, buf);
+ TkUtfToUniChar(entryPtr->showChar, &ch);
+ size = TkUniCharToUtf(ch, buf);
entryPtr->numDisplayBytes = entryPtr->numChars * size;
p = ckalloc(entryPtr->numDisplayBytes + 1);
diff --git a/generic/tkFont.c b/generic/tkFont.c
index 102fc6e..ca7b34e 100644
--- a/generic/tkFont.c
+++ b/generic/tkFont.c
@@ -497,7 +497,7 @@ Tk_FontObjCmd(
const char *s;
Tk_Font tkfont;
Tcl_Obj *optPtr, *charPtr, *resultPtr;
- Tcl_UniChar uniChar = 0;
+ int uniChar = 0;
const TkFontAttributes *faPtr;
TkFontAttributes fa;
@@ -562,17 +562,19 @@ Tk_FontObjCmd(
*/
if (charPtr != NULL) {
- if (Tcl_GetCharLength(charPtr) != 1) {
+ const char *string = Tcl_GetString(charPtr);
+ int len = TkUtfToUniChar(string, &uniChar);
+
+ if (len != charPtr->length) {
resultPtr = Tcl_NewStringObj(
"expected a single character but got \"", -1);
- Tcl_AppendLimitedToObj(resultPtr, Tcl_GetString(charPtr),
+ Tcl_AppendLimitedToObj(resultPtr, string,
-1, 40, "...");
Tcl_AppendToObj(resultPtr, "\"", -1);
Tcl_SetObjResult(interp, resultPtr);
Tcl_SetErrorCode(interp, "TK", "VALUE", "FONT_SAMPLE", NULL);
return TCL_ERROR;
}
- uniChar = Tcl_GetUniChar(charPtr, 0);
}
/*
@@ -1694,7 +1696,7 @@ Tk_PostscriptFontName(
} else if (strcasecmp(family, "ZapfDingbats") == 0) {
family = "ZapfDingbats";
} else {
- Tcl_UniChar ch;
+ int ch;
/*
* Inline, capitalize the first letter of each word, lowercase the
@@ -1712,7 +1714,7 @@ Tk_PostscriptFontName(
src++;
upper = 1;
}
- src += Tcl_UtfToUniChar(src, &ch);
+ src += TkUtfToUniChar(src, &ch);
if (upper) {
ch = Tcl_UniCharToUpper(ch);
upper = 0;
@@ -3249,7 +3251,7 @@ Tk_TextLayoutToPostscript(
int i, j, len;
const char *p, *glyphname;
char uindex[5], c, *ps;
- Tcl_UniChar ch;
+ int ch;
Tcl_AppendToObj(psObj, "[(", -1);
for (i = 0; i < layoutPtr->numChunks; i++, chunkPtr++) {
@@ -3272,7 +3274,7 @@ Tk_TextLayoutToPostscript(
* international postscript fonts.
*/
- p += Tcl_UtfToUniChar(p, &ch);
+ p += TkUtfToUniChar(p, &ch);
if ((ch == '(') || (ch == ')') || (ch == '\\') || (ch < 0x20)) {
/*
* Tricky point: the "03" is necessary in the sprintf below,
@@ -3298,6 +3300,9 @@ Tk_TextLayoutToPostscript(
* use the full glyph name.
*/
+ if (ch > 0xffff) {
+ goto noMapping;
+ }
sprintf(uindex, "%04X", ch); /* endianness? */
glyphname = Tcl_GetVar2(interp, "::tk::psglyphs", uindex, 0);
if (glyphname) {
@@ -3318,6 +3323,7 @@ Tk_TextLayoutToPostscript(
* No known mapping for the character into the space of
* PostScript glyphs. Ignore it. :-(
*/
+noMapping: ;
#ifdef TK_DEBUG_POSTSCRIPT_OUTPUT
fprintf(stderr, "Warning: no mapping to PostScript "
diff --git a/generic/tkInt.h b/generic/tkInt.h
index dd5dcad..1615a81 100644
--- a/generic/tkInt.h
+++ b/generic/tkInt.h
@@ -1196,7 +1196,7 @@ MODULE_SCOPE void TkUnderlineCharsInContext(Display *display,
const char *string, int numBytes, int x, int y,
int firstByte, int lastByte);
MODULE_SCOPE void TkpGetFontAttrsForChar(Tk_Window tkwin, Tk_Font tkfont,
- Tcl_UniChar c, struct TkFontAttributes *faPtr);
+ int c, struct TkFontAttributes *faPtr);
MODULE_SCOPE Tcl_Obj * TkNewWindowObj(Tk_Window tkwin);
MODULE_SCOPE void TkpShowBusyWindow(TkBusy busy);
MODULE_SCOPE void TkpHideBusyWindow(TkBusy busy);
@@ -1232,6 +1232,14 @@ MODULE_SCOPE Status TkParseColor (Display * display,
MODULE_SCOPE void TkUnixSetXftClipRegion(TkRegion clipRegion);
#endif
+#if TCL_UTF_MAX > 4
+# define TkUtfToUniChar Tcl_UtfToUniChar
+# define TkUniCharToUtf Tcl_UniCharToUtf
+#else
+ MODULE_SCOPE int TkUtfToUniChar(const char *, int *);
+ MODULE_SCOPE int TkUniCharToUtf(int, char *);
+#endif
+
/*
* Unsupported commands.
*/
diff --git a/generic/tkText.c b/generic/tkText.c
index 0e41ac8..412a7f2 100644
--- a/generic/tkText.c
+++ b/generic/tkText.c
@@ -4459,7 +4459,7 @@ TkTextGetTabs(
Tcl_Obj **objv;
TkTextTabArray *tabArrayPtr;
TkTextTab *tabPtr;
- Tcl_UniChar ch;
+ int ch;
double prevStop, lastStop;
/*
* Map these strings to TkTextTabAlign values.
@@ -4566,7 +4566,7 @@ TkTextGetTabs(
* There may be a more efficient way of getting this.
*/
- Tcl_UtfToUniChar(Tcl_GetString(objv[i+1]), &ch);
+ TkUtfToUniChar(Tcl_GetString(objv[i+1]), &ch);
if (!Tcl_UniCharIsAlpha(ch)) {
continue;
}
@@ -5880,7 +5880,7 @@ SearchCore(
CLANG_ASSERT(pattern);
do {
- Tcl_UniChar ch;
+ int ch;
const char *p;
int lastFullLine = lastOffset;
@@ -6110,7 +6110,7 @@ SearchCore(
}
} else {
firstOffset = p - startOfLine +
- Tcl_UtfToUniChar(startOfLine+matchOffset,&ch);
+ TkUtfToUniChar(startOfLine+matchOffset,&ch);
}
}
} while (searchSpecPtr->all);
diff --git a/generic/tkTextDisp.c b/generic/tkTextDisp.c
index a135084..f28ec6a 100644
--- a/generic/tkTextDisp.c
+++ b/generic/tkTextDisp.c
@@ -7581,8 +7581,8 @@ TkTextCharLayoutProc(
if (bytesThatFit < maxBytes) {
if ((bytesThatFit == 0) && noCharsYet) {
- Tcl_UniChar ch;
- int chLen = Tcl_UtfToUniChar(p, &ch);
+ int ch;
+ int chLen = TkUtfToUniChar(p, &ch);
#if TK_LAYOUT_WITH_BASE_CHUNKS
bytesThatFit = CharChunkMeasureChars(chunkPtr, line,
diff --git a/generic/tkTextIndex.c b/generic/tkTextIndex.c
index 8820191..d227bd8 100644
--- a/generic/tkTextIndex.c
+++ b/generic/tkTextIndex.c
@@ -2298,9 +2298,9 @@ StartEnd(
int chSize = 1;
if (segPtr->typePtr == &tkTextCharType) {
- Tcl_UniChar ch;
+ int ch;
- chSize = Tcl_UtfToUniChar(segPtr->body.chars + offset, &ch);
+ chSize = TkUtfToUniChar(segPtr->body.chars + offset, &ch);
if (!Tcl_UniCharIsWordChar(ch)) {
break;
}
@@ -2343,9 +2343,9 @@ StartEnd(
int chSize = 1;
if (segPtr->typePtr == &tkTextCharType) {
- Tcl_UniChar ch;
- Tcl_UtfToUniChar(segPtr->body.chars + offset, &ch);
+ int ch;
+ TkUtfToUniChar(segPtr->body.chars + offset, &ch);
if (!Tcl_UniCharIsWordChar(ch)) {
break;
}
diff --git a/generic/tkUtil.c b/generic/tkUtil.c
index 6563165..7ff0b9e 100644
--- a/generic/tkUtil.c
+++ b/generic/tkUtil.c
@@ -1192,6 +1192,86 @@ TkSendVirtualEvent(
Tk_QueueWindowEvent(&event.general, TCL_QUEUE_TAIL);
}
+
+#if TCL_UTF_MAX <= 4
+/*
+ *---------------------------------------------------------------------------
+ *
+ * TkUtfToUniChar --
+ *
+ * Almost the same as Tcl_UtfToUniChar but using int instead of Tcl_UniChar.
+ * This function is capable of collapsing a upper/lower pair to a single
+ * unicode character. So, up to 6 bytes (two UTF-8 characters) might be read.
+ *
+ * Results:
+ * *chPtr is filled with the Tcl_UniChar, and the return value is the
+ * number of bytes from the UTF-8 string that were consumed.
+ *
+ * Side effects:
+ * None.
+ *
+ *---------------------------------------------------------------------------
+ */
+
+int
+TkUtfToUniChar(
+ const char *src, /* The UTF-8 string. */
+ int *chPtr) /* Filled with the Tcl_UniChar represented by
+ * the UTF-8 string. */
+{
+ Tcl_UniChar uniChar = 0;
+
+ int len = Tcl_UtfToUniChar(src, &uniChar);
+ if ((uniChar & 0xfc00) == 0xd800) {
+ Tcl_UniChar high = uniChar;
+ /* This can only happen when Tcl is compiled with TCL_UTF_MAX=4,
+ * or when a high surrogate character is detected */
+ int len2 = Tcl_UtfToUniChar(src+len, &uniChar);
+ if ((uniChar & 0xfc00) == 0xdc00) {
+ *chPtr = ((high & 0x3ff) << 10) | (uniChar & 0x3ff) | 0x10000;
+ len += len2;
+ } else {
+ *chPtr = high;
+ }
+ } else {
+ *chPtr = uniChar;
+ }
+ return len;
+}
+
+/*
+ *---------------------------------------------------------------------------
+ *
+ * TkUniCharToUtf --
+ *
+ * Almost the same as Tcl_UniCharToUtf but producing surrogates if
+ * TCL_UTF_MAX==3.
+ *
+ * Results:
+ * *buf is filled with the UTF-8 string, and the return value is the
+ * number of bytes produced.
+ *
+ * Side effects:
+ * None.
+ *
+ *---------------------------------------------------------------------------
+ */
+
+int TkUniCharToUtf(int ch, char *buf)
+{
+ int size = Tcl_UniCharToUtf(ch, buf);
+ if ((ch > 0xffff) && (size < 4)) {
+ /* Hey, this is wrong, we must be running TCL_UTF_MAX==3
+ * The best thing we can do is spit out 2 surrogates */
+ ch -= 0x10000;
+ size = Tcl_UniCharToUtf(((ch >> 10) | 0xd800), buf);
+ size += Tcl_UniCharToUtf(((ch & 0x3ff) | 0xdc00), buf+size);
+ }
+ return size;
+}
+
+
+#endif
/*
* Local Variables:
* mode: c
diff --git a/generic/ttk/ttkEntry.c b/generic/ttk/ttkEntry.c
index 533637d..a25574a 100644
--- a/generic/ttk/ttkEntry.c
+++ b/generic/ttk/ttkEntry.c
@@ -282,11 +282,11 @@ static char *EntryDisplayString(const char *showChar, int numChars)
{
char *displayString, *p;
int size;
- Tcl_UniChar ch;
- char buf[TCL_UTF_MAX];
+ int ch;
+ char buf[6];
- Tcl_UtfToUniChar(showChar, &ch);
- size = Tcl_UniCharToUtf(ch, buf);
+ TkUtfToUniChar(showChar, &ch);
+ size = TkUniCharToUtf(ch, buf);
p = displayString = ckalloc(numChars * size + 1);
while (numChars--) {
@@ -406,7 +406,7 @@ ExpandPercents(
int number, length;
const char *string;
int stringLength;
- Tcl_UniChar ch;
+ int ch;
char numStorage[2*TCL_INTEGER_SPACE];
while (*template) {
@@ -430,7 +430,7 @@ ExpandPercents(
*/
++template; /* skip over % */
if (*template != '\0') {
- template += Tcl_UtfToUniChar(template, &ch);
+ template += TkUtfToUniChar(template, &ch);
} else {
ch = '%';
}
@@ -480,7 +480,7 @@ ExpandPercents(
string = Tk_PathName(entryPtr->core.tkwin);
break;
default:
- length = Tcl_UniCharToUtf(ch, numStorage);
+ length = TkUniCharToUtf(ch, numStorage);
numStorage[length] = '\0';
string = numStorage;
break;