1 files changed, 279 insertions, 4 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 2548b73..b6dcb8f 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -234,6 +234,9 @@ static int		TableToUtfProc(ClientData clientData, const char *src,
 			    char *dst, int dstLen, int *srcReadPtr,
 			    int *dstWrotePtr, int *dstCharsPtr);
 static size_t		unilen(const char *src);
+#if TCL_UTF_MAX > 4
+static size_t		unilen4(const char *src);
+#endif
 static int		UnicodeToUtfProc(ClientData clientData,
 			    const char *src, int srcLen, int flags,
 			    Tcl_EncodingState *statePtr, char *dst, int dstLen,
@@ -269,6 +272,18 @@ static int		Iso88591ToUtfProc(ClientData clientData,
 			    Tcl_EncodingState *statePtr, char *dst,
 			    int dstLen, int *srcReadPtr, int *dstWrotePtr,
 			    int *dstCharsPtr);
+#if TCL_UTF_MAX > 4
+static int		Utf16ToUtfProc(ClientData clientData,
+			    const char *src, int srcLen, int flags,
+			    Tcl_EncodingState *statePtr, char *dst, int dstLen,
+			    int *srcReadPtr, int *dstWrotePtr,
+			    int *dstCharsPtr);
+static int		UtfToUtf16Proc(ClientData clientData,
+			    const char *src, int srcLen, int flags,
+			    Tcl_EncodingState *statePtr, char *dst, int dstLen,
+			    int *srcReadPtr, int *dstWrotePtr,
+			    int *dstCharsPtr);
+#endif
 
 /*
  * A Tcl_ObjType for holding a cached Tcl_Encoding in the twoPtrValue.ptr1 field
@@ -578,13 +593,31 @@ TclInitEncodingSubsystem(void)
     type.clientData	= NULL;
     Tcl_CreateEncoding(&type);
 
+#if TCL_UTF_MAX > 4
+    type.encodingName   = "utf-32";
+#else
     type.encodingName   = "unicode";
+#endif
     type.toUtfProc	= UnicodeToUtfProc;
     type.fromUtfProc    = UtfToUnicodeProc;
     type.freeProc	= NULL;
+#if TCL_UTF_MAX > 4
+    type.nullSize	= 4;
+#else
+    type.nullSize	= 2;
+#endif
+    type.clientData	= NULL;
+    Tcl_CreateEncoding(&type);
+
+#if TCL_UTF_MAX > 4
+    type.encodingName   = "unicode";
+    type.toUtfProc	= Utf16ToUtfProc;
+    type.fromUtfProc    = UtfToUtf16Proc;
+    type.freeProc	= NULL;
     type.nullSize	= 2;
     type.clientData	= NULL;
     Tcl_CreateEncoding(&type);
+#endif
 
     /*
      * Need the iso8859-1 encoding in order to process binary data, so force
@@ -1071,6 +1104,10 @@ Tcl_CreateEncoding(
     encodingPtr->clientData	= typePtr->clientData;
     if (typePtr->nullSize == 1) {
 	encodingPtr->lengthProc = (LengthProc *) strlen;
+#if TCL_UTF_MAX > 4
+    } else if (typePtr->nullSize == 4) {
+	encodingPtr->lengthProc = (LengthProc *) unilen4;
+#endif
     } else {
 	encodingPtr->lengthProc = (LengthProc *) unilen;
     }
@@ -1441,10 +1478,10 @@ Tcl_UtfToExternal(
 /*
  *---------------------------------------------------------------------------
  *
- * Tcl_FindExecutable --
+ * Tcl_InitSubsystems/Tcl_FindExecutable --
  *
- *	This function computes the absolute path name of the current
- *	application, given its argv[0] value.
+ *	This function initializes everything needed for the Tcl library
+ *	to be able to operate.
  *
  * Results:
  *	None.
@@ -1455,6 +1492,23 @@ Tcl_UtfToExternal(
  *
  *---------------------------------------------------------------------------
  */
+MODULE_SCOPE const TclStubs tclStubs;
+
+static const struct {
+    const TclStubs *stubs;
+    const char version[12];
+} stubInfo = {
+    &tclStubs, TCL_PATCH_LEVEL
+};
+
+const char *
+Tcl_InitSubsystems(TCL_NORETURN1 Tcl_PanicProc *panicProc)
+{
+    Tcl_SetPanicProc(panicProc);
+    TclInitSubsystems();
+    return stubInfo.version;
+}
+
 #undef Tcl_FindExecutable
 void
 Tcl_FindExecutable(
@@ -2987,6 +3041,212 @@ Iso88591FromUtfProc(
     return result;
 }
 
+#if TCL_UTF_MAX > 4
+/*
+ *-------------------------------------------------------------------------
+ *
+ * Utf16ToUtfProc --
+ *
+ *	Convert from UTF-16 to UTF-8.
+ *
+ * Results:
+ *	Returns TCL_OK if conversion was successful.
+ *
+ * Side effects:
+ *	None.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+static int
+Utf16ToUtfProc(
+    ClientData clientData,	/* Not used. */
+    const char *src,		/* Source string in Unicode. */
+    int srcLen,			/* Source string length in bytes. */
+    int flags,			/* Conversion control flags. */
+    Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
+				 * information used during a piecewise
+				 * conversion. Contents of statePtr are
+				 * initialized and/or reset by conversion
+				 * routine under control of flags argument. */
+    char *dst,			/* Output buffer in which converted string is
+				 * stored. */
+    int dstLen,			/* The maximum length of output buffer in
+				 * bytes. */
+    int *srcReadPtr,		/* Filled with the number of bytes from the
+				 * source string that were converted. This may
+				 * be less than the original source length if
+				 * there was a problem converting some source
+				 * characters. */
+    int *dstWrotePtr,		/* Filled with the number of bytes that were
+				 * stored in the output buffer as a result of
+				 * the conversion. */
+    int *dstCharsPtr)		/* Filled with the number of characters that
+				 * correspond to the bytes stored in the
+				 * output buffer. */
+{
+    const char *srcStart, *srcEnd;
+    const char *dstEnd, *dstStart;
+    int result, numChars, charLimit = INT_MAX;
+    Tcl_UniChar ch;
+
+    if (flags & TCL_ENCODING_CHAR_LIMIT) {
+	charLimit = *dstCharsPtr;
+    }
+    result = TCL_OK;
+    if ((srcLen % sizeof(unsigned short)) != 0) {
+	result = TCL_CONVERT_MULTIBYTE;
+	srcLen /= sizeof(unsigned short);
+	srcLen *= sizeof(unsigned short);
+    }
+
+    srcStart = src;
+    srcEnd = src + srcLen;
+
+    dstStart = dst;
+    dstEnd = dst + dstLen - TCL_UTF_MAX;
+
+    for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) {
+	if (dst > dstEnd) {
+	    result = TCL_CONVERT_NOSPACE;
+	    break;
+	}
+	ch = *(unsigned short *)src;
+	if (ch && ch < 0x80) {
+	    *dst++ = (ch & 0xFF);
+	} else if ((ch >= 0xD800) && (ch <= 0xDBFF)) {
+	    Tcl_UniChar ch2 = *(unsigned short *)(src + sizeof(unsigned short));
+
+	    if ((ch2 >= 0xDC00) && (ch2 <= 0xDFFF)) {
+		ch = ((ch & 0x3FF) << 10) + 0x10000 + (ch2 & 0x3FF);
+		src += sizeof(unsigned short);
+	    }
+	    dst += Tcl_UniCharToUtf(ch, dst);
+	} else {
+	    dst += Tcl_UniCharToUtf(ch, dst);
+	}
+	src += sizeof(unsigned short);
+    }
+
+    *srcReadPtr = src - srcStart;
+    *dstWrotePtr = dst - dstStart;
+    *dstCharsPtr = numChars;
+    return result;
+}
+#endif
+
+#if TCL_UTF_MAX > 4
+/*
+ *-------------------------------------------------------------------------
+ *
+ * UtfToUtf16Proc --
+ *
+ *	Convert from UTF-8 to UTF-16.
+ *
+ * Results:
+ *	Returns TCL_OK if conversion was successful.
+ *
+ * Side effects:
+ *	None.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+static int
+UtfToUtf16Proc(
+    ClientData clientData,	/* TableEncodingData that specifies
+				 * encoding. */
+    const char *src,		/* Source string in UTF-8. */
+    int srcLen,			/* Source string length in bytes. */
+    int flags,			/* Conversion control flags. */
+    Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
+				 * information used during a piecewise
+				 * conversion. Contents of statePtr are
+				 * initialized and/or reset by conversion
+				 * routine under control of flags argument. */
+    char *dst,			/* Output buffer in which converted string is
+				 * stored. */
+    int dstLen,			/* The maximum length of output buffer in
+				 * bytes. */
+    int *srcReadPtr,		/* Filled with the number of bytes from the
+				 * source string that were converted. This may
+				 * be less than the original source length if
+				 * there was a problem converting some source
+				 * characters. */
+    int *dstWrotePtr,		/* Filled with the number of bytes that were
+				 * stored in the output buffer as a result of
+				 * the conversion. */
+    int *dstCharsPtr)		/* Filled with the number of characters that
+				 * correspond to the bytes stored in the
+				 * output buffer. */
+{
+    const char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd;
+    int result, numChars;
+    Tcl_UniChar ch;
+
+    srcStart = src;
+    srcEnd = src + srcLen;
+    srcClose = srcEnd;
+    if ((flags & TCL_ENCODING_END) == 0) {
+	srcClose -= TCL_UTF_MAX;
+    }
+
+    dstStart = dst;
+    dstEnd   = dst + dstLen - 2 * sizeof(unsigned short);
+
+    result = TCL_OK;
+    for (numChars = 0; src < srcEnd; numChars++) {
+	if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) {
+	    /*
+	     * If there is more string to follow, this will ensure that the
+	     * last UTF-8 character in the source buffer hasn't been cut off.
+	     */
+
+	    result = TCL_CONVERT_MULTIBYTE;
+	    break;
+	}
+	if (dst > dstEnd) {
+	    result = TCL_CONVERT_NOSPACE;
+	    break;
+        }
+	src += TclUtfToUniChar(src, &ch);
+
+	if (ch > 0x10FFFF) {
+	    ch = 0xFFFD;
+	}
+
+	/*
+	 * Need to handle this in a way that won't cause misalignment by
+	 * casting dst to a Tcl_UniChar. [Bug 1122671]
+	 */
+
+	if (ch > 0xFFFF) {
+	    int high = (((ch - 0x10000) >> 10) & 0x3FF) | 0xD800;
+
+	    ch = ((ch - 0x10000) & 0x3FF) | 0xDC00;
+#ifdef WORDS_BIGENDIAN
+	    *dst++ = ((high >> 8) & 0xFF);
+    	    *dst++ = (high & 0xFF);
+#else
+    	    *dst++ = (high & 0xFF);
+	    *dst++ = ((high >> 8) & 0xFF);
+#endif
+	}
+#ifdef WORDS_BIGENDIAN
+	*dst++ = ((ch >> 8) & 0xFF);
+	*dst++ = (ch & 0xFF);
+#else
+    	*dst++ = (ch & 0xFF);
+	*dst++ = ((ch >> 8) & 0xFF);
+#endif
+    }
+    *srcReadPtr = src - srcStart;
+    *dstWrotePtr = dst - dstStart;
+    *dstCharsPtr = numChars;
+    return result;
+}
+#endif
+
 /*
  *---------------------------------------------------------------------------
  *
@@ -3545,7 +3805,7 @@ GetTableEncoding(
 /*
  *---------------------------------------------------------------------------
  *
- * unilen --
+ * unilen, unilen4 --
  *
  *	A helper function for the Tcl_ExternalToUtf functions. This function
  *	is similar to strlen for double-byte characters: it returns the number
@@ -3572,6 +3832,21 @@ unilen(
     }
     return (char *) p - src;
 }
+
+#if TCL_UTF_MAX > 4
+static size_t
+unilen4(
+    const char *src)
+{
+    unsigned int *p;
+
+    p = (unsigned int *) src;
+    while (*p != 0x00000000) {
+	p++;
+    }
+    return (char *) p - src;
+}
+#endif
 
 /*
  *-------------------------------------------------------------------------