2 files changed, 88 insertions, 0 deletions
diff --git a/generic/tkInt.h b/generic/tkInt.h
index 0b502e4..1615a81 100644
--- a/generic/tkInt.h
+++ b/generic/tkInt.h
@@ -1232,6 +1232,14 @@ MODULE_SCOPE Status TkParseColor (Display * display,
 MODULE_SCOPE void	TkUnixSetXftClipRegion(TkRegion clipRegion);
 #endif
 
+#if TCL_UTF_MAX > 4
+#   define TkUtfToUniChar Tcl_UtfToUniChar
+#   define TkUniCharToUtf Tcl_UniCharToUtf
+#else
+    MODULE_SCOPE int TkUtfToUniChar(const char *, int *);
+    MODULE_SCOPE int TkUniCharToUtf(int, char *);
+#endif
+
 /*
  * Unsupported commands.
  */
diff --git a/generic/tkUtil.c b/generic/tkUtil.c
index d4c4d2d..d89282f 100644
--- a/generic/tkUtil.c
+++ b/generic/tkUtil.c
@@ -1192,6 +1192,86 @@ TkSendVirtualEvent(
 
     Tk_QueueWindowEvent(&event.general, TCL_QUEUE_TAIL);
 }
+
+#if TCL_UTF_MAX <= 4
+/*
+ *---------------------------------------------------------------------------
+ *
+ * TkUtfToUniChar --
+ *
+ *	Almost the same as Tcl_UtfToUniChar but using int instead of Tcl_UniChar.
+ *	This function is capable of collapsing a upper/lower surrogate pair to a
+ *	single unicode character. So, up to 6 bytes might be consumed.
+ *
+ * Results:
+ *	*chPtr is filled with the Tcl_UniChar, and the return value is the
+ *	number of bytes from the UTF-8 string that were consumed.
+ *
+ * Side effects:
+ *	None.
+ *
+ *---------------------------------------------------------------------------
+ */
+
+int
+TkUtfToUniChar(
+    const char *src,	/* The UTF-8 string. */
+    int *chPtr)		/* Filled with the Tcl_UniChar represented by
+			 * the UTF-8 string. */
+{
+    Tcl_UniChar uniChar = 0;
+
+    int len = Tcl_UtfToUniChar(src, &uniChar);
+    if ((uniChar & 0xfc00) == 0xd800) {
+	Tcl_UniChar high = uniChar;
+	/* This can only happen if Tcl is compiled with TCL_UTF_MAX=4,
+	 * or when a high surrogate character is detected in UTF-8 form */
+	int len2 = Tcl_UtfToUniChar(src+len, &uniChar);
+	if ((uniChar & 0xfc00) == 0xdc00) {
+	    *chPtr = (((high & 0x3ff) << 10) | (uniChar & 0x3ff)) + 0x10000;
+	    len += len2;
+	} else {
+	    *chPtr = high;
+	}
+    } else {
+	*chPtr = uniChar;
+    }
+    return len;
+}
+
+/*
+ *---------------------------------------------------------------------------
+ *
+ * TkUniCharToUtf --
+ *
+ *	Almost the same as Tcl_UniCharToUtf but producing surrogates if
+ *	TCL_UTF_MAX==3. So, up to 6 bytes might be produced.
+ *
+ * Results:
+ *	*buf is filled with the UTF-8 string, and the return value is the
+ *	number of bytes produced.
+ *
+ * Side effects:
+ *	None.
+ *
+ *---------------------------------------------------------------------------
+ */
+
+int TkUniCharToUtf(int ch, char *buf)
+{
+    int size = Tcl_UniCharToUtf(ch, buf);
+    if ((ch > 0xffff) && (ch <= 0x10ffff) && (size < 4)) {
+	/* Hey, this is wrong, we must be running TCL_UTF_MAX==3
+	 * The best thing we can do is spit out 2 surrogates */
+	ch -= 0x10000;
+	size = Tcl_UniCharToUtf(((ch >> 10) | 0xd800), buf);
+	size += Tcl_UniCharToUtf(((ch & 0x3ff) | 0xdc00), buf+size);
+    }
+    return size;
+}
+
+
+#endif
 /*
  * Local Variables:
  * mode: c