Patch from Christian Werner, for evaluation

author: jan.nijtmans <nijtmans@users.sourceforge.net> 2016-09-16 07:49:21 (GMT)
committer: jan.nijtmans <nijtmans@users.sourceforge.net> 2016-09-16 07:49:21 (GMT)
commit: 41d3c8f67e62fe307c46c91e5a42b71e39c59334 (patch)
tree: 0c7c6348e421d05be040ec77ef8a7200c85c391d /generic
parent: 9759a4f9b6f61757dc505a923c07951e560e47a3 (diff)
download: tk-41d3c8f67e62fe307c46c91e5a42b71e39c59334.zip
tk-41d3c8f67e62fe307c46c91e5a42b71e39c59334.tar.gz
tk-41d3c8f67e62fe307c46c91e5a42b71e39c59334.tar.bz2
7 files changed, 195 insertions, 4 deletions
diff --git a/generic/tkEntry.c b/generic/tkEntry.c
index a66cf18..5faf4ef 100644
--- a/generic/tkEntry.c
+++ b/generic/tkEntry.c
@@ -1926,7 +1926,6 @@ EntryComputeGeometry(
      */
 
     if (entryPtr->showChar != NULL) {
-	Tcl_UniChar ch;
 	char buf[4];
 	int size;
 
@@ -1936,8 +1935,15 @@ EntryComputeGeometry(
 	 * characters might end up looking like one valid UTF character in the
 	 * resulting string.
 	 */
+#if TCL_UTF_MAX == 4
+	int ch;
+
+	TkUtfToUniChar32(entryPtr->showChar, &ch);
+#else
+	Tcl_UniChar ch;
 
 	Tcl_UtfToUniChar(entryPtr->showChar, &ch);
+#endif
 	size = Tcl_UniCharToUtf(ch, buf);
 
 	entryPtr->numDisplayBytes = entryPtr->numChars * size;
@@ -3414,7 +3420,11 @@ ExpandPercents(
 				 * list element. */
     int number, length;
     register const char *string;
+#if TCL_UTF_MAX == 4
+    int ch;
+#else
     Tcl_UniChar ch;
+#endif
     char numStorage[2*TCL_INTEGER_SPACE];
 
     while (1) {
@@ -3447,7 +3457,11 @@ ExpandPercents(
 
 	before++; /* skip over % */
 	if (*before != '\0') {
+#if TCL_UTF_MAX == 4
+	    before += TkUtfToUniChar32(before, &ch);
+#else
 	    before += Tcl_UtfToUniChar(before, &ch);
+#endif
 	} else {
 	    ch = '%';
 	}
diff --git a/generic/tkFont.c b/generic/tkFont.c
index 1ffac16..3088959 100644
--- a/generic/tkFont.c
+++ b/generic/tkFont.c
@@ -497,7 +497,11 @@ Tk_FontObjCmd(
 	const char *s;
 	Tk_Font tkfont;
 	Tcl_Obj *optPtr, *charPtr, *resultPtr;
+#if TCL_UTF_MAX == 4
 	int uniChar = 0;
+#else
+	Tcl_UniChar uniChar = 0;
+#endif
 	const TkFontAttributes *faPtr;
 	TkFontAttributes fa;
 
@@ -562,6 +566,10 @@ Tk_FontObjCmd(
 	 */
 
 	if (charPtr != NULL) {
+#if TCL_UTF_MAX == 4
+	    Tcl_UniChar *ucPtr;
+#endif
+
 	    if (Tcl_GetCharLength(charPtr) != 1) {
 		resultPtr = Tcl_NewStringObj(
 			"expected a single character but got \"", -1);
@@ -572,7 +580,18 @@ Tk_FontObjCmd(
 		Tcl_SetErrorCode(interp, "TK", "VALUE", "FONT_SAMPLE", NULL);
 		return TCL_ERROR;
 	    }
+#if TCL_UTF_MAX == 4
+	    ucPtr = Tcl_GetUnicodeFromObj(charPtr, NULL);
+	    uniChar = *ucPtr;
+	    if (((uniChar & 0xFC00) == 0xD800) && (ucPtr[1] != 0x000)) {
+		if ((ucPtr[1] & 0xFC00) == 0xDC00) {
+		    uniChar = ((uniChar & 0x3FF) << 10) + (ucPtr[1] & 0x3FF);
+		    uniChar += 0x10000;
+		}
+	    }
+#else
 	    uniChar = Tcl_GetUniChar(charPtr, 0);
+#endif
 	}
 
 	/*
@@ -1694,7 +1713,11 @@ Tk_PostscriptFontName(
     } else if (strcasecmp(family, "ZapfDingbats") == 0) {
 	family = "ZapfDingbats";
     } else {
+#if TCL_UTF_MAX == 4
+	int ch;
+#else
 	Tcl_UniChar ch;
+#endif
 
 	/*
 	 * Inline, capitalize the first letter of each word, lowercase the
@@ -1712,7 +1735,11 @@ Tk_PostscriptFontName(
 		src++;
 		upper = 1;
 	    }
+#if TCL_UTF_MAX == 4
+	    src += TkUtfToUniChar32(src, &ch);
+#else
 	    src += Tcl_UtfToUniChar(src, &ch);
+#endif
 	    if (upper) {
 		ch = (Tcl_UniChar) Tcl_UniCharToUpper(ch);
 		upper = 0;
@@ -3249,7 +3276,11 @@ Tk_TextLayoutToPostscript(
     int i, j, len;
     const char *p, *glyphname;
     char uindex[5], c, *ps;
+#if TCL_UTF_MAX == 4
+    int ch;
+#else
     Tcl_UniChar ch;
+#endif
 
     Tcl_AppendToObj(psObj, "[(", -1);
     for (i = 0; i < layoutPtr->numChunks; i++, chunkPtr++) {
@@ -3272,7 +3303,11 @@ Tk_TextLayoutToPostscript(
 	     * international postscript fonts.
 	     */
 
+#if TCL_UTF_MAX == 4
+	    p += TkUtfToUniChar32(p, &ch);
+#else
 	    p += Tcl_UtfToUniChar(p, &ch);
+#endif
 	    if ((ch == '(') || (ch == ')') || (ch == '\\') || (ch < 0x20)) {
 		/*
 		 * Tricky point: the "03" is necessary in the sprintf below,
@@ -3298,6 +3333,11 @@ Tk_TextLayoutToPostscript(
 	     * use the full glyph name.
 	     */
 
+#if TCL_UTF_MAX > 3
+	    if (ch > 0xffff) {
+		goto noMapping;
+	    }
+#endif
 	    sprintf(uindex, "%04X", ch);		/* endianness? */
 	    glyphname = Tcl_GetVar2(interp, "::tk::psglyphs", uindex, 0);
 	    if (glyphname) {
@@ -3318,6 +3358,7 @@ Tk_TextLayoutToPostscript(
 		 * No known mapping for the character into the space of
 		 * PostScript glyphs. Ignore it. :-(
 		 */
+noMapping:	;
 
 #ifdef TK_DEBUG_POSTSCRIPT_OUTPUT
 		fprintf(stderr, "Warning: no mapping to PostScript "
diff --git a/generic/tkInt.h b/generic/tkInt.h
index 0b502e4..367ef3a 100644
--- a/generic/tkInt.h
+++ b/generic/tkInt.h
@@ -1232,6 +1232,10 @@ MODULE_SCOPE Status TkParseColor (Display * display,
 MODULE_SCOPE void	TkUnixSetXftClipRegion(TkRegion clipRegion);
 #endif
 
+#if TCL_UTF_MAX == 4
+MODULE_SCOPE int	TkUtfToUniChar32(const char *src, int *chPtr);
+#endif
+
 /*
  * Unsupported commands.
  */
diff --git a/generic/tkText.c b/generic/tkText.c
index 1227e7b..834e842 100644
--- a/generic/tkText.c
+++ b/generic/tkText.c
@@ -4508,7 +4508,11 @@ TkTextGetTabs(
     Tcl_Obj **objv;
     TkTextTabArray *tabArrayPtr;
     TkTextTab *tabPtr;
+#if TCL_UTF_MAX == 4
+    int ch;
+#else
     Tcl_UniChar ch;
+#endif
     double prevStop, lastStop;
     /*
      * Map these strings to TkTextTabAlign values.
@@ -4615,7 +4619,11 @@ TkTextGetTabs(
 	 * There may be a more efficient way of getting this.
 	 */
 
+#if TCL_UTF_MAX == 4
+	TkUtfToUniChar32(Tcl_GetString(objv[i+1]), &ch);
+#else
 	Tcl_UtfToUniChar(Tcl_GetString(objv[i+1]), &ch);
+#endif
 	if (!Tcl_UniCharIsAlpha(ch)) {
 	    continue;
 	}
diff --git a/generic/tkTextIndex.c b/generic/tkTextIndex.c
index 92ca03b..f64a6d2 100644
--- a/generic/tkTextIndex.c
+++ b/generic/tkTextIndex.c
@@ -2298,9 +2298,13 @@ StartEnd(
 	    int chSize = 1;
 
 	    if (segPtr->typePtr == &tkTextCharType) {
+#if TCL_UTF_MAX == 4
+		int ch;
+		chSize = TkUtfToUniChar32(segPtr->body.chars + offset, &ch);
+#else
 		Tcl_UniChar ch;
-
 		chSize = Tcl_UtfToUniChar(segPtr->body.chars + offset, &ch);
+#endif
 		if (!Tcl_UniCharIsWordChar(ch)) {
 		    break;
 		}
@@ -2343,9 +2347,13 @@ StartEnd(
 	    int chSize = 1;
 
 	    if (segPtr->typePtr == &tkTextCharType) {
+#if TCL_UTF_MAX == 4
+		int ch;
+		TkUtfToUniChar32(segPtr->body.chars + offset, &ch);
+#else
 		Tcl_UniChar ch;
-
 		Tcl_UtfToUniChar(segPtr->body.chars + offset, &ch);
+#endif
 		if (!Tcl_UniCharIsWordChar(ch)) {
 		    break;
 		}
diff --git a/generic/tkUtil.c b/generic/tkUtil.c
index d4c4d2d..fb796fd 100644
--- a/generic/tkUtil.c
+++ b/generic/tkUtil.c
@@ -1192,6 +1192,108 @@ TkSendVirtualEvent(
 
     Tk_QueueWindowEvent(&event.general, TCL_QUEUE_TAIL);
 }
+
+#if TCL_UTF_MAX == 4
+/*
+ *---------------------------------------------------------------------------
+ *
+ * TkUtfToUniChar32 --
+ *
+ *	Copied from Tcl_UtfToUniChar but using int instead of Tcl_UniChar!
+ *
+ *	Extract the Tcl_UniChar represented by the UTF-8 string. Bad UTF-8
+ *	sequences are converted to valid Tcl_UniChars and processing
+ *	continues. Equivalent to Plan 9 chartorune().
+ *
+ *	The caller must ensure that the source buffer is long enough that this
+ *	routine does not run off the end and dereference non-existent memory
+ *	looking for trail bytes. If the source buffer is known to be '\0'
+ *	terminated, this cannot happen. Otherwise, the caller should call
+ *	Tcl_UtfCharComplete() before calling this routine to ensure that
+ *	enough bytes remain in the string.
+ *
+ * Results:
+ *	*chPtr is filled with the Tcl_UniChar, and the return value is the
+ *	number of bytes from the UTF-8 string that were consumed.
+ *
+ * Side effects:
+ *	None.
+ *
+ *---------------------------------------------------------------------------
+ */
+
+int
+TkUtfToUniChar32(
+    const char *src,	/* The UTF-8 string. */
+    int *chPtr)		/* Filled with the Tcl_UniChar represented by
+			 * the UTF-8 string. */
+{
+    int byte;
+
+    /*
+     * Unroll 1 to 3 byte UTF-8 sequences, use loop to handle longer ones.
+     */
+
+    byte = *((unsigned char *) src);
+    if (byte < 0xC0) {
+	/*
+	 * Handles properly formed UTF-8 characters between 0x01 and 0x7F.
+	 * Also treats \0 and naked trail bytes 0x80 to 0xBF as valid
+	 * characters representing themselves.
+	 */
+
+	*chPtr = byte;
+	return 1;
+    } else if (byte < 0xE0) {
+	if ((src[1] & 0xC0) == 0x80) {
+	    /*
+	     * Two-byte-character lead-byte followed by a trail-byte.
+	     */
+
+	    *chPtr = ((byte & 0x1F) << 6) | (src[1] & 0x3F);
+	    return 2;
+	}
+
+	/*
+	 * A two-byte-character lead-byte not followed by trail-byte
+	 * represents itself.
+	 */
+    } else if (byte < 0xF0) {
+	if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80)) {
+	    /*
+	     * Three-byte-character lead byte followed by two trail bytes.
+	     */
+
+	    *chPtr = ((byte & 0x0F) << 12)
+		    | ((src[1] & 0x3F) << 6) | (src[2] & 0x3F);
+	    return 3;
+	}
+
+	/*
+	 * A three-byte-character lead-byte not followed by two trail-bytes
+	 * represents itself.
+	 */
+    } else if (byte < 0xF8) {
+	if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80) && ((src[3] & 0xC0) == 0x80)) {
+	    /*
+	     * Four-byte-character lead byte followed by three trail bytes.
+	     */
+
+	    *chPtr = ((byte & 0x0E) << 18) | ((src[1] & 0x3F) << 12)
+		    | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F);
+	    return 4;
+	}
+
+	/*
+	 * A three-byte-character lead-byte not followed by two trail-bytes
+	 * represents itself.
+	 */
+    }
+
+    *chPtr = byte;
+    return 1;
+}
+#endif
 /*
  * Local Variables:
  * mode: c
diff --git a/generic/ttk/ttkEntry.c b/generic/ttk/ttkEntry.c
index d80e1fd..63ebc5f 100644
--- a/generic/ttk/ttkEntry.c
+++ b/generic/ttk/ttkEntry.c
@@ -282,10 +282,16 @@ static char *EntryDisplayString(const char *showChar, int numChars)
 {
     char *displayString, *p;
     int size;
-    Tcl_UniChar ch;
     char buf[4];
+#if TCL_UTF_MAX == 4
+    int ch;
+
+    TkUtfToUniChar32(showChar, &ch);
+#else
+    Tcl_UniChar ch;
 
     Tcl_UtfToUniChar(showChar, &ch);
+#endif
     size = Tcl_UniCharToUtf(ch, buf);
     p = displayString = ckalloc(numChars * size + 1);
 
@@ -406,7 +412,11 @@ ExpandPercents(
     int number, length;
     const char *string;
     int stringLength;
+#if TCL_UTF_MAX == 4
+    int ch;
+#else
     Tcl_UniChar ch;
+#endif
     char numStorage[2*TCL_INTEGER_SPACE];
 
     while (*template) {
@@ -430,7 +440,11 @@ ExpandPercents(
 	 */
 	++template; /* skip over % */
 	if (*template != '\0') {
+#if TCL_UTF_MAX == 4
+	    template += TkUtfToUniChar32(template, &ch);
+#else
 	    template += Tcl_UtfToUniChar(template, &ch);
+#endif
 	} else {
 	    ch = '%';
 	}
author	jan.nijtmans <nijtmans@users.sourceforge.net>	2016-09-16 07:49:21 (GMT)
committer	jan.nijtmans <nijtmans@users.sourceforge.net>	2016-09-16 07:49:21 (GMT)
commit	41d3c8f67e62fe307c46c91e5a42b71e39c59334 (patch)
tree	0c7c6348e421d05be040ec77ef8a7200c85c391d /generic
parent	9759a4f9b6f61757dc505a923c07951e560e47a3 (diff)
download	tk-41d3c8f67e62fe307c46c91e5a42b71e39c59334.zip tk-41d3c8f67e62fe307c46c91e5a42b71e39c59334.tar.gz tk-41d3c8f67e62fe307c46c91e5a42b71e39c59334.tar.bz2