1 files changed, 1252 insertions, 402 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 92217f3..fc2835d 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -3,13 +3,14 @@
  *
  *	Contains the implementation of the encoding conversion package.
  *
- * Copyright (c) 1996-1998 Sun Microsystems, Inc.
+ * Copyright © 1996-1998 Sun Microsystems, Inc.
  *
  * See the file "license.terms" for information on usage and redistribution of
  * this file, and for a DISCLAIMER OF ALL WARRANTIES.
  */
 
 #include "tclInt.h"
+#include "tclIO.h"
 
 typedef size_t (LengthProc)(const char *src);
 
@@ -33,20 +34,22 @@ typedef struct {
     Tcl_EncodingFreeProc *freeProc;
 				/* If non-NULL, function to call when this
 				 * encoding is deleted. */
-    int nullSize;		/* Number of 0x00 bytes that signify
+    void *clientData;	/* Arbitrary value associated with encoding
+				 * type. Passed to conversion functions. */
+    Tcl_Size nullSize;	/* Number of 0x00 bytes that signify
 				 * end-of-string in this encoding. This number
 				 * is used to determine the source string
 				 * length when the srcLen argument is
-				 * negative. This number can be 1 or 2. */
-    ClientData clientData;	/* Arbitrary value associated with encoding
-				 * type. Passed to conversion functions. */
+				 * negative. This number can be 1, 2, or 4. */
     LengthProc *lengthProc;	/* Function to compute length of
 				 * null-terminated strings in this encoding.
 				 * If nullSize is 1, this is strlen; if
 				 * nullSize is 2, this is a function that
 				 * returns the number of bytes in a 0x0000
-				 * terminated string. */
-    int refCount;		/* Number of uses of this structure. */
+				 * terminated string; if nullSize is 4, this
+				 * is a function that returns the number of
+				 * bytes in a 0x00000000 terminated string. */
+    size_t refCount;		/* Number of uses of this structure. */
     Tcl_HashEntry *hPtr;	/* Hash table entry that owns this encoding. */
 } Encoding;
 
@@ -185,6 +188,29 @@ static Tcl_Encoding systemEncoding = NULL;
 Tcl_Encoding tclIdentityEncoding = NULL;
 
 /*
+ * Names of encoding profiles and corresponding integer values.
+ * Keep alphabetical order for error messages.
+ */
+static struct TclEncodingProfiles {
+    const char *name;
+    int value;
+} encodingProfiles[] = {
+    {"replace", TCL_ENCODING_PROFILE_REPLACE},
+    {"strict", TCL_ENCODING_PROFILE_STRICT},
+    {"tcl8", TCL_ENCODING_PROFILE_TCL8},
+};
+#define PROFILE_STRICT(flags_)                                         \
+    (CHANNEL_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_STRICT)
+
+#define PROFILE_REPLACE(flags_)                                         \
+    (CHANNEL_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_REPLACE)
+
+#define UNICODE_REPLACE_CHAR ((Tcl_UniChar)0xFFFD)
+#define SURROGATE(c_)      (((c_) & ~0x7FF) == 0xD800)
+#define HIGH_SURROGATE(c_) (((c_) & ~0x3FF) == 0xD800)
+#define LOW_SURROGATE(c_)  (((c_) & ~0x3FF) == 0xDC00)
+
+/*
  * The following variable is used in the sparse matrix code for a
  * TableEncoding to represent a page in the table that has no entries.
  */
@@ -214,51 +240,19 @@ static Tcl_Encoding		LoadEscapeEncoding(const char *name,
 static Tcl_Channel		OpenEncodingFileChannel(Tcl_Interp *interp,
 				    const char *name);
 static Tcl_EncodingFreeProc	TableFreeProc;
-static int		TableFromUtfProc(ClientData clientData,
-			    const char *src, int srcLen, int flags,
-			    Tcl_EncodingState *statePtr, char *dst, int dstLen,
-			    int *srcReadPtr, int *dstWrotePtr,
-			    int *dstCharsPtr);
-static int		TableToUtfProc(ClientData clientData, const char *src,
-			    int srcLen, int flags, Tcl_EncodingState *statePtr,
-			    char *dst, int dstLen, int *srcReadPtr,
-			    int *dstWrotePtr, int *dstCharsPtr);
+static Tcl_EncodingConvertProc	TableFromUtfProc;
+static Tcl_EncodingConvertProc	TableToUtfProc;
 static size_t		unilen(const char *src);
-static int		UnicodeToUtfProc(ClientData clientData,
-			    const char *src, int srcLen, int flags,
-			    Tcl_EncodingState *statePtr, char *dst, int dstLen,
-			    int *srcReadPtr, int *dstWrotePtr,
-			    int *dstCharsPtr);
-static int		UtfToUnicodeProc(ClientData clientData,
-			    const char *src, int srcLen, int flags,
-			    Tcl_EncodingState *statePtr, char *dst, int dstLen,
-			    int *srcReadPtr, int *dstWrotePtr,
-			    int *dstCharsPtr);
-static int		UtfToUtfProc(ClientData clientData,
-			    const char *src, int srcLen, int flags,
-			    Tcl_EncodingState *statePtr, char *dst, int dstLen,
-			    int *srcReadPtr, int *dstWrotePtr,
-			    int *dstCharsPtr, int pureNullMode);
-static int		UtfIntToUtfExtProc(ClientData clientData,
-			    const char *src, int srcLen, int flags,
-			    Tcl_EncodingState *statePtr, char *dst, int dstLen,
-			    int *srcReadPtr, int *dstWrotePtr,
-			    int *dstCharsPtr);
-static int		UtfExtToUtfIntProc(ClientData clientData,
-			    const char *src, int srcLen, int flags,
-			    Tcl_EncodingState *statePtr, char *dst, int dstLen,
-			    int *srcReadPtr, int *dstWrotePtr,
-			    int *dstCharsPtr);
-static int		Iso88591FromUtfProc(ClientData clientData,
-			    const char *src, int srcLen, int flags,
-			    Tcl_EncodingState *statePtr, char *dst, int dstLen,
-			    int *srcReadPtr, int *dstWrotePtr,
-			    int *dstCharsPtr);
-static int		Iso88591ToUtfProc(ClientData clientData,
-			    const char *src, int srcLen, int flags,
-			    Tcl_EncodingState *statePtr, char *dst,
-			    int dstLen, int *srcReadPtr, int *dstWrotePtr,
-			    int *dstCharsPtr);
+static size_t		unilen4(const char *src);
+static Tcl_EncodingConvertProc	Utf32ToUtfProc;
+static Tcl_EncodingConvertProc	UtfToUtf32Proc;
+static Tcl_EncodingConvertProc	Utf16ToUtfProc;
+static Tcl_EncodingConvertProc	UtfToUtf16Proc;
+static Tcl_EncodingConvertProc	UtfToUcs2Proc;
+static Tcl_EncodingConvertProc	UtfToUtfProc;
+static Tcl_EncodingConvertProc	Iso88591FromUtfProc;
+static Tcl_EncodingConvertProc	Iso88591ToUtfProc;
+
 
 /*
  * A Tcl_ObjType for holding a cached Tcl_Encoding in the twoPtrValue.ptr1 field
@@ -267,8 +261,28 @@ static int		Iso88591ToUtfProc(ClientData clientData,
  */
 
 static const Tcl_ObjType encodingType = {
-    "encoding", FreeEncodingInternalRep, DupEncodingInternalRep, NULL, NULL
+    "encoding",
+    FreeEncodingInternalRep,
+    DupEncodingInternalRep,
+    NULL,
+    NULL
 };
+
+#define EncodingSetInternalRep(objPtr, encoding)				\
+    do {								\
+	Tcl_ObjInternalRep ir;						\
+	ir.twoPtrValue.ptr1 = (encoding);				\
+	ir.twoPtrValue.ptr2 = NULL;					\
+	Tcl_StoreInternalRep((objPtr), &encodingType, &ir);			\
+    } while (0)
+
+#define EncodingGetInternalRep(objPtr, encoding)				\
+    do {								\
+	const Tcl_ObjInternalRep *irPtr;					\
+	irPtr = TclFetchInternalRep ((objPtr), &encodingType);		\
+	(encoding) = irPtr ? (Tcl_Encoding)irPtr->twoPtrValue.ptr1 : NULL;		\
+    } while (0)
+
 
 /*
  *----------------------------------------------------------------------
@@ -295,17 +309,16 @@ Tcl_GetEncodingFromObj(
     Tcl_Obj *objPtr,
     Tcl_Encoding *encodingPtr)
 {
+    Tcl_Encoding encoding;
     const char *name = TclGetString(objPtr);
 
-    if (objPtr->typePtr != &encodingType) {
-	Tcl_Encoding encoding = Tcl_GetEncoding(interp, name);
-
+    EncodingGetInternalRep(objPtr, encoding);
+    if (encoding == NULL) {
+	encoding = Tcl_GetEncoding(interp, name);
 	if (encoding == NULL) {
 	    return TCL_ERROR;
 	}
-	TclFreeIntRep(objPtr);
-	objPtr->internalRep.twoPtrValue.ptr1 = encoding;
-	objPtr->typePtr = &encodingType;
+	EncodingSetInternalRep(objPtr, encoding);
     }
     *encodingPtr = Tcl_GetEncoding(NULL, name);
     return TCL_OK;
@@ -325,8 +338,10 @@ static void
 FreeEncodingInternalRep(
     Tcl_Obj *objPtr)
 {
-    Tcl_FreeEncoding((Tcl_Encoding)objPtr->internalRep.twoPtrValue.ptr1);
-    objPtr->typePtr = NULL;
+    Tcl_Encoding encoding;
+
+    EncodingGetInternalRep(objPtr, encoding);
+    Tcl_FreeEncoding(encoding);
 }
 
 /*
@@ -344,7 +359,8 @@ DupEncodingInternalRep(
     Tcl_Obj *srcPtr,
     Tcl_Obj *dupPtr)
 {
-    dupPtr->internalRep.twoPtrValue.ptr1 = Tcl_GetEncoding(NULL, srcPtr->bytes);
+    Tcl_Encoding encoding = Tcl_GetEncoding(NULL, TclGetString(srcPtr));
+    EncodingSetInternalRep(dupPtr, encoding);
 }
 
 /*
@@ -382,9 +398,9 @@ int
 Tcl_SetEncodingSearchPath(
     Tcl_Obj *searchPath)
 {
-    int dummy;
+    Tcl_Size dummy;
 
-    if (TCL_ERROR == TclListObjLength(NULL, searchPath, &dummy)) {
+    if (TCL_ERROR == TclListObjLengthM(NULL, searchPath, &dummy)) {
 	return TCL_ERROR;
     }
     TclSetProcessGlobalValue(&encodingSearchPath, searchPath, NULL);
@@ -429,9 +445,9 @@ void
 TclSetLibraryPath(
     Tcl_Obj *path)
 {
-    int dummy;
+    Tcl_Size dummy;
 
-    if (TCL_ERROR == TclListObjLength(NULL, path, &dummy)) {
+    if (TCL_ERROR == TclListObjLengthM(NULL, path, &dummy)) {
 	return;
     }
     TclSetProcessGlobalValue(&libraryPath, path, NULL);
@@ -465,22 +481,22 @@ TclSetLibraryPath(
 static void
 FillEncodingFileMap(void)
 {
-    int i, numDirs = 0;
+    Tcl_Size i, numDirs = 0;
     Tcl_Obj *map, *searchPath;
 
     searchPath = Tcl_GetEncodingSearchPath();
     Tcl_IncrRefCount(searchPath);
-    TclListObjLength(NULL, searchPath, &numDirs);
+    TclListObjLengthM(NULL, searchPath, &numDirs);
     map = Tcl_NewDictObj();
     Tcl_IncrRefCount(map);
 
-    for (i = numDirs-1; i >= 0; i--) {
+    for (i = numDirs-1; i != TCL_INDEX_NONE; i--) {
 	/*
 	 * Iterate backwards through the search path so as we overwrite
 	 * entries found, we favor files earlier on the search path.
 	 */
 
-	int j, numFiles;
+	Tcl_Size j, numFiles;
 	Tcl_Obj *directory, *matchFileList;
 	Tcl_Obj **filev;
 	Tcl_GlobTypeData readableFiles = {
@@ -494,7 +510,7 @@ FillEncodingFileMap(void)
 	Tcl_FSMatchInDirectory(NULL, matchFileList, directory, "*.enc",
 		&readableFiles);
 
-	TclListObjGetElements(NULL, matchFileList, &numFiles, &filev);
+	TclListObjGetElementsM(NULL, matchFileList, &numFiles, &filev);
 	for (j=0; j<numFiles; j++) {
 	    Tcl_Obj *encodingName, *fileObj;
 
@@ -529,6 +545,17 @@ FillEncodingFileMap(void)
  *---------------------------------------------------------------------------
  */
 
+/*
+ * NOTE: THESE BIT DEFINITIONS SHOULD NOT OVERLAP WITH INTERNAL USE BITS
+ * DEFINED IN tcl.h (TCL_ENCODING_* et al). Be cognizant of this
+ * when adding bits. TODO - should really be defined in a single file.
+ *
+ * To prevent conflicting bits, only define bits within 0xff00 mask here.
+ */
+#define TCL_ENCODING_LE	0x100   /* Used to distinguish LE/BE variants */
+#define ENCODING_UTF	0x200	/* For UTF-8 encoding, allow 4-byte output sequences */
+#define ENCODING_INPUT	0x400   /* For UTF-8/CESU-8 encoding, means external -> internal */
+
 void
 TclInitEncodingSubsystem(void)
 {
@@ -540,12 +567,16 @@ TclInitEncodingSubsystem(void)
         char c;
         short s;
     } isLe;
+    int leFlags;
 
     if (encodingsInitialized) {
 	return;
     }
 
+    /* Note: This DEPENDS on TCL_ENCODING_LE being defined in least sig byte */
     isLe.s = 1;
+    leFlags = isLe.c ? TCL_ENCODING_LE : 0;
+
     Tcl_MutexLock(&encodingMutex);
     Tcl_InitHashTable(&encodingTable, TCL_STRING_KEYS);
     Tcl_MutexUnlock(&encodingMutex);
@@ -556,7 +587,7 @@ TclInitEncodingSubsystem(void)
      * properly formed stream.
      */
 
-    type.encodingName	= "identity";
+    type.encodingName	= NULL;
     type.toUtfProc	= BinaryProc;
     type.fromUtfProc	= BinaryProc;
     type.freeProc	= NULL;
@@ -565,21 +596,63 @@ TclInitEncodingSubsystem(void)
     tclIdentityEncoding = Tcl_CreateEncoding(&type);
 
     type.encodingName	= "utf-8";
-    type.toUtfProc	= UtfExtToUtfIntProc;
-    type.fromUtfProc	= UtfIntToUtfExtProc;
+    type.toUtfProc	= UtfToUtfProc;
+    type.fromUtfProc	= UtfToUtfProc;
     type.freeProc	= NULL;
     type.nullSize	= 1;
-    type.clientData	= NULL;
+    type.clientData	= INT2PTR(ENCODING_UTF);
+    Tcl_CreateEncoding(&type);
+    type.clientData	= INT2PTR(0);
+    type.encodingName	= "cesu-8";
     Tcl_CreateEncoding(&type);
 
-    type.encodingName   = "unicode";
-    type.toUtfProc	= UnicodeToUtfProc;
-    type.fromUtfProc    = UtfToUnicodeProc;
+    type.toUtfProc	= Utf16ToUtfProc;
+    type.fromUtfProc    = UtfToUcs2Proc;
     type.freeProc	= NULL;
     type.nullSize	= 2;
-    type.clientData	= INT2PTR(isLe.c);
+    type.encodingName   = "ucs-2le";
+    type.clientData	= INT2PTR(TCL_ENCODING_LE);
+    Tcl_CreateEncoding(&type);
+    type.encodingName   = "ucs-2be";
+    type.clientData	= INT2PTR(0);
+    Tcl_CreateEncoding(&type);
+    type.encodingName   = "ucs-2";
+    type.clientData	= INT2PTR(leFlags);
     Tcl_CreateEncoding(&type);
 
+    type.toUtfProc	= Utf32ToUtfProc;
+    type.fromUtfProc    = UtfToUtf32Proc;
+    type.freeProc	= NULL;
+    type.nullSize	= 4;
+    type.encodingName   = "utf-32le";
+    type.clientData	= INT2PTR(TCL_ENCODING_LE);
+    Tcl_CreateEncoding(&type);
+    type.encodingName   = "utf-32be";
+    type.clientData	= INT2PTR(0);
+    Tcl_CreateEncoding(&type);
+    type.encodingName   = "utf-32";
+    type.clientData	= INT2PTR(leFlags);
+    Tcl_CreateEncoding(&type);
+
+    type.toUtfProc	= Utf16ToUtfProc;
+    type.fromUtfProc    = UtfToUtf16Proc;
+    type.freeProc	= NULL;
+    type.nullSize	= 2;
+    type.encodingName   = "utf-16le";
+    type.clientData	= INT2PTR(TCL_ENCODING_LE|ENCODING_UTF);
+    Tcl_CreateEncoding(&type);
+    type.encodingName   = "utf-16be";
+    type.clientData	= INT2PTR(ENCODING_UTF);
+    Tcl_CreateEncoding(&type);
+    type.encodingName   = "utf-16";
+    type.clientData	= INT2PTR(leFlags|ENCODING_UTF);
+    Tcl_CreateEncoding(&type);
+
+#ifndef TCL_NO_DEPRECATED
+    type.encodingName   = "unicode";
+    Tcl_CreateEncoding(&type);
+#endif
+
     /*
      * Need the iso8859-1 encoding in order to process binary data, so force
      * it to always be embedded. Note that this encoding *must* be a proper
@@ -686,13 +759,14 @@ TclFinalizeEncodingSubsystem(void)
  *-------------------------------------------------------------------------
  */
 
+#if !defined(TCL_NO_DEPRECATED) && TCL_MAJOR_VERSION < 9
 const char *
 Tcl_GetDefaultEncodingDir(void)
 {
     int numDirs;
     Tcl_Obj *first, *searchPath = Tcl_GetEncodingSearchPath();
 
-    TclListObjLength(NULL, searchPath, &numDirs);
+    TclListObjLengthM(NULL, searchPath, &numDirs);
     if (numDirs == 0) {
 	return NULL;
     }
@@ -723,12 +797,13 @@ Tcl_SetDefaultEncodingDir(
     const char *path)
 {
     Tcl_Obj *searchPath = Tcl_GetEncodingSearchPath();
-    Tcl_Obj *directory = Tcl_NewStringObj(path, -1);
+    Tcl_Obj *directory = Tcl_NewStringObj(path, TCL_INDEX_NONE);
 
     searchPath = Tcl_DuplicateObj(searchPath);
     Tcl_ListObjReplace(NULL, searchPath, 0, 0, 1, &directory);
     Tcl_SetEncodingSearchPath(searchPath);
 }
+#endif
 
 /*
  *-------------------------------------------------------------------------
@@ -834,9 +909,6 @@ FreeEncoding(
     if (encodingPtr == NULL) {
 	return;
     }
-    if (encodingPtr->refCount<=0) {
-	Tcl_Panic("FreeEncoding: refcount problem !!!");
-    }
     if (encodingPtr->refCount-- <= 1) {
 	if (encodingPtr->freeProc != NULL) {
 	    encodingPtr->freeProc(encodingPtr->clientData);
@@ -856,7 +928,7 @@ FreeEncoding(
  *
  * Tcl_GetEncodingName --
  *
- *	Given an encoding, return the name that was used to constuct the
+ *	Given an encoding, return the name that was used to construct the
  *	encoding.
  *
  * Results:
@@ -921,7 +993,7 @@ Tcl_GetEncodingNames(
 	Encoding *encodingPtr = (Encoding *)Tcl_GetHashValue(hPtr);
 
 	Tcl_CreateHashEntry(&table,
-		Tcl_NewStringObj(encodingPtr->name, -1), &dummy);
+		Tcl_NewStringObj(encodingPtr->name, TCL_INDEX_NONE), &dummy);
     }
     Tcl_MutexUnlock(&encodingMutex);
 
@@ -951,6 +1023,33 @@ Tcl_GetEncodingNames(
 }
 
 /*
+ *-------------------------------------------------------------------------
+ *
+ * Tcl_GetEncodingNulLength --
+ *
+ *	Given an encoding, return the number of nul bytes used for the
+ *      string termination.
+ *
+ * Results:
+ *	The number of nul bytes used for the string termination.
+ *
+ * Side effects:
+ *	None.
+ *
+ *---------------------------------------------------------------------------
+ */
+Tcl_Size
+Tcl_GetEncodingNulLength(
+    Tcl_Encoding encoding)
+{
+    if (encoding == NULL) {
+	encoding = systemEncoding;
+    }
+
+    return ((Encoding *) encoding)->nullSize;
+}
+
+/*
  *------------------------------------------------------------------------
  *
  * Tcl_SetSystemEncoding --
@@ -1035,9 +1134,26 @@ Tcl_CreateEncoding(
     const Tcl_EncodingType *typePtr)
 				/* The encoding type. */
 {
+    Encoding *encodingPtr = (Encoding *)ckalloc(sizeof(Encoding));
+    encodingPtr->name		= NULL;
+    encodingPtr->toUtfProc	= typePtr->toUtfProc;
+    encodingPtr->fromUtfProc	= typePtr->fromUtfProc;
+    encodingPtr->freeProc	= typePtr->freeProc;
+    encodingPtr->nullSize	= typePtr->nullSize;
+    encodingPtr->clientData	= typePtr->clientData;
+    if (typePtr->nullSize == 2) {
+	encodingPtr->lengthProc = (LengthProc *) unilen;
+    } else if (typePtr->nullSize == 4) {
+	encodingPtr->lengthProc = (LengthProc *) unilen4;
+    } else {
+	encodingPtr->lengthProc = (LengthProc *) strlen;
+    }
+    encodingPtr->refCount	= 1;
+    encodingPtr->hPtr		= NULL;
+
+  if (typePtr->encodingName) {
     Tcl_HashEntry *hPtr;
     int isNew;
-    Encoding *encodingPtr;
     char *name;
 
     Tcl_MutexLock(&encodingMutex);
@@ -1048,30 +1164,17 @@ Tcl_CreateEncoding(
 	 * reference goes away.
 	 */
 
-	encodingPtr = (Encoding *)Tcl_GetHashValue(hPtr);
-	encodingPtr->hPtr = NULL;
+	Encoding *replaceMe = (Encoding *)Tcl_GetHashValue(hPtr);
+	replaceMe->hPtr = NULL;
     }
 
     name = (char *)ckalloc(strlen(typePtr->encodingName) + 1);
-
-    encodingPtr = (Encoding *)ckalloc(sizeof(Encoding));
     encodingPtr->name		= strcpy(name, typePtr->encodingName);
-    encodingPtr->toUtfProc	= typePtr->toUtfProc;
-    encodingPtr->fromUtfProc	= typePtr->fromUtfProc;
-    encodingPtr->freeProc	= typePtr->freeProc;
-    encodingPtr->nullSize	= typePtr->nullSize;
-    encodingPtr->clientData	= typePtr->clientData;
-    if (typePtr->nullSize == 1) {
-	encodingPtr->lengthProc = (LengthProc *) strlen;
-    } else {
-	encodingPtr->lengthProc = (LengthProc *) unilen;
-    }
-    encodingPtr->refCount	= 1;
     encodingPtr->hPtr		= hPtr;
     Tcl_SetHashValue(hPtr, encodingPtr);
 
     Tcl_MutexUnlock(&encodingMutex);
-
+  }
     return (Tcl_Encoding) encodingPtr;
 }
 
@@ -1101,24 +1204,102 @@ Tcl_ExternalToUtfDString(
     Tcl_Encoding encoding,	/* The encoding for the source string, or NULL
 				 * for the default system encoding. */
     const char *src,		/* Source string in specified encoding. */
-    int srcLen,			/* Source string length in bytes, or < 0 for
+    Tcl_Size srcLen,		/* Source string length in bytes, or < 0 for
 				 * encoding-specific string length. */
     Tcl_DString *dstPtr)	/* Uninitialized or free DString in which the
 				 * converted string is stored. */
 {
+    Tcl_ExternalToUtfDStringEx(
+	NULL, encoding, src, srcLen, TCL_ENCODING_PROFILE_TCL8, dstPtr, NULL);
+    return Tcl_DStringValue(dstPtr);
+}
+
+
+/*
+ *-------------------------------------------------------------------------
+ *
+ * Tcl_ExternalToUtfDStringEx --
+ *
+ *	Convert a source buffer from the specified encoding into UTF-8.
+ *	The parameter flags controls the behavior, if any of the bytes in
+ *	the source buffer are invalid or cannot be represented in utf-8.
+ *	Possible flags values:
+ *	target encoding. It should be composed by OR-ing the following:
+ *	- *At most one* of TCL_ENCODING_PROFILE{DEFAULT,TCL8,STRICT}
+ *	- TCL_ENCODING_STOPONERROR: Backward compatibility. Sets the profile
+ *	  to TCL_ENCODING_PROFILE_STRICT overriding any specified profile flags
+ *      Any other flag bits will cause an error to be returned (for future
+ *      compatibility)
+ *
+ * Results:
+ *      The return value is one of
+ *        TCL_OK: success. Converted string in *dstPtr
+ *        TCL_ERROR: error in passed parameters. Error message in interp
+ *        TCL_CONVERT_MULTIBYTE: source ends in truncated multibyte sequence
+ *        TCL_CONVERT_SYNTAX: source is not conformant to encoding definition
+ *        TCL_CONVERT_UNKNOWN: source contained a character that could not
+ *            be represented in target encoding.
+ *
+ * Side effects:
+ *
+ *      TCL_OK: The converted bytes are stored in the DString and NUL
+ *          terminated in an encoding-specific manner.
+ *      TCL_ERROR: an error, message is stored in the interp if not NULL.
+ *      TCL_CONVERT_*: if errorLocPtr is NULL, an error message is stored
+ *          in the interpreter (if not NULL). If errorLocPtr is not NULL,
+ *          no error message is stored as it is expected the caller is
+ *          interested in whatever is decoded so far and not treating this
+ *          as an error condition.
+ *
+ *      In addition, *dstPtr is always initialized and must be cleared
+ *      by the caller irrespective of the return code.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+int
+Tcl_ExternalToUtfDStringEx(
+    Tcl_Interp *interp,         /* For error messages. May be NULL. */
+    Tcl_Encoding encoding,	/* The encoding for the source string, or NULL
+				 * for the default system encoding. */
+    const char *src,		/* Source string in specified encoding. */
+    Tcl_Size srcLen,			/* Source string length in bytes, or < 0 for
+				 * encoding-specific string length. */
+    int flags,			/* Conversion control flags. */
+    Tcl_DString *dstPtr,	/* Uninitialized or free DString in which the
+				 * converted string is stored. */
+    Tcl_Size *errorLocPtr)      /* Where to store the error location
+                                   (or TCL_INDEX_NONE if no error). May
+				   be NULL. */
+{
     char *dst;
     Tcl_EncodingState state;
     const Encoding *encodingPtr;
-    int flags, dstLen, result, soFar, srcRead, dstWrote, dstChars;
+    int result, srcRead, dstWrote, dstChars;
+    Tcl_Size dstLen, soFar;
+    const char *srcStart = src;
 
+    /* DO FIRST - Must always be initialized before returning */
     Tcl_DStringInit(dstPtr);
+
+    if (flags & (TCL_ENCODING_START|TCL_ENCODING_END)) {
+	/* TODO - what other flags are illegal? - See TIP 656 */
+	Tcl_SetObjResult(
+	    interp,
+	    Tcl_NewStringObj(
+		"Parameter error: TCL_ENCODING_{START,STOP} bits set in flags.",
+		TCL_INDEX_NONE));
+	Tcl_SetErrorCode(interp, "TCL", "ENCODING", "ILLEGALFLAGS", NULL);
+	return TCL_ERROR;
+    }
+
     dst = Tcl_DStringValue(dstPtr);
     dstLen = dstPtr->spaceAvl - 1;
 
     if (encoding == NULL) {
 	encoding = systemEncoding;
     }
-    encodingPtr = (Encoding *) encoding;
+    encodingPtr = (Encoding *)encoding;
 
     if (src == NULL) {
 	srcLen = 0;
@@ -1126,20 +1307,48 @@ Tcl_ExternalToUtfDString(
 	srcLen = encodingPtr->lengthProc(src);
     }
 
-    flags = TCL_ENCODING_START | TCL_ENCODING_END;
+    flags |= TCL_ENCODING_START | TCL_ENCODING_END;
+    if (encodingPtr->toUtfProc == UtfToUtfProc) {
+	flags |= ENCODING_INPUT;
+    }
 
     while (1) {
-	result = encodingPtr->toUtfProc(encodingPtr->clientData, src, srcLen,
-		flags, &state, dst, dstLen, &srcRead, &dstWrote, &dstChars);
+	result = encodingPtr->toUtfProc(encodingPtr->clientData, src,
+					srcLen, flags, &state, dst, dstLen,
+					&srcRead, &dstWrote, &dstChars);
 	soFar = dst + dstWrote - Tcl_DStringValue(dstPtr);
 
+	src += srcRead;
 	if (result != TCL_CONVERT_NOSPACE) {
+	    Tcl_Size nBytesProcessed = (src - srcStart);
+
 	    Tcl_DStringSetLength(dstPtr, soFar);
-	    return Tcl_DStringValue(dstPtr);
+	    if (errorLocPtr) {
+		/*
+		 * Do not write error message into interpreter if caller
+		 * wants to know error location.
+		 */
+		*errorLocPtr = result == TCL_OK ? TCL_INDEX_NONE : nBytesProcessed;
+	    } else {
+		/* Caller wants error message on failure */
+		if (result != TCL_OK && interp != NULL) {
+		    char buf[TCL_INTEGER_SPACE];
+		    sprintf(buf, "%u", nBytesProcessed);
+		    Tcl_SetObjResult(
+			interp,
+			Tcl_ObjPrintf("unexpected byte sequence starting at index %"
+				      "u: '\\x%02X'",
+				      nBytesProcessed,
+				      UCHAR(srcStart[nBytesProcessed])));
+		    Tcl_SetErrorCode(
+			interp, "TCL", "ENCODING", "ILLEGALSEQUENCE", buf, NULL);
+		}
+	    }
+	    return result;
 	}
 
+	/* Expand space and continue */
 	flags &= ~TCL_ENCODING_START;
-	src += srcRead;
 	srcLen -= srcRead;
 	if (Tcl_DStringLength(dstPtr) == 0) {
 	    Tcl_DStringSetLength(dstPtr, dstLen);
@@ -1170,11 +1379,11 @@ Tcl_ExternalToUtfDString(
 
 int
 Tcl_ExternalToUtf(
-    Tcl_Interp *interp,		/* Interp for error return, if not NULL. */
+    TCL_UNUSED(Tcl_Interp *),	/* TODO: Re-examine this. */
     Tcl_Encoding encoding,	/* The encoding for the source string, or NULL
 				 * for the default system encoding. */
     const char *src,		/* Source string in specified encoding. */
-    int srcLen,			/* Source string length in bytes, or < 0 for
+    Tcl_Size srcLen,		/* Source string length in bytes, or < 0 for
 				 * encoding-specific string length. */
     int flags,			/* Conversion control flags. */
     Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
@@ -1184,7 +1393,7 @@ Tcl_ExternalToUtf(
 				 * routine under control of flags argument. */
     char *dst,			/* Output buffer in which converted string is
 				 * stored. */
-    int dstLen,			/* The maximum length of output buffer in
+    Tcl_Size dstLen,		/* The maximum length of output buffer in
 				 * bytes. */
     int *srcReadPtr,		/* Filled with the number of bytes from the
 				 * source string that were converted. This may
@@ -1233,21 +1442,24 @@ Tcl_ExternalToUtf(
     }
 
     if (!noTerminate) {
-        if (dstLen < 1) {
-            return TCL_CONVERT_NOSPACE;
-        }
+	if (dstLen < 1) {
+	    return TCL_CONVERT_NOSPACE;
+	}
 	/*
 	 * If there are any null characters in the middle of the buffer,
-	 * they will converted to the UTF-8 null character (\xC080). To get
+	 * they will converted to the UTF-8 null character (\xC0\x80). To get
 	 * the actual \0 at the end of the destination buffer, we need to
 	 * append it manually.  First make room for it...
 	 */
 
 	dstLen--;
     } else {
-        if (dstLen < 0) {
-            return TCL_CONVERT_NOSPACE;
-        }
+	if (dstLen < 0) {
+	    return TCL_CONVERT_NOSPACE;
+	}
+    }
+    if (encodingPtr->toUtfProc == UtfToUtfProc) {
+	flags |= ENCODING_INPUT;
     }
     do {
 	Tcl_EncodingState savedState = *statePtr;
@@ -1258,7 +1470,7 @@ Tcl_ExternalToUtf(
 	if (*dstCharsPtr <= maxChars) {
 	    break;
 	}
-	dstLen = Tcl_UtfAtIndex(dst, maxChars) - dst + (TCL_UTF_MAX - 1);
+	dstLen = TclUtfAtIndex(dst, maxChars) - dst + (TCL_UTF_MAX - 1);
 	*statePtr = savedState;
     } while (1);
     if (!noTerminate) {
@@ -1295,17 +1507,92 @@ Tcl_UtfToExternalDString(
     Tcl_Encoding encoding,	/* The encoding for the converted string, or
 				 * NULL for the default system encoding. */
     const char *src,		/* Source string in UTF-8. */
-    int srcLen,			/* Source string length in bytes, or < 0 for
+    Tcl_Size srcLen,		/* Source string length in bytes, or < 0 for
 				 * strlen(). */
     Tcl_DString *dstPtr)	/* Uninitialized or free DString in which the
 				 * converted string is stored. */
 {
+    Tcl_UtfToExternalDStringEx(
+	NULL, encoding, src, srcLen, TCL_ENCODING_PROFILE_TCL8, dstPtr, NULL);
+    return Tcl_DStringValue(dstPtr);
+}
+
+
+/*
+ *-------------------------------------------------------------------------
+ *
+ * Tcl_UtfToExternalDStringEx --
+ *
+ *	Convert a source buffer from UTF-8 to the specified encoding.
+ *	The parameter flags controls the behavior, if any of the bytes in
+ *	the source buffer are invalid or cannot be represented in the
+ *	target encoding. It should be composed by OR-ing the following:
+ *	- *At most one* of TCL_ENCODING_PROFILE{DEFAULT,TCL8,STRICT}
+ *	- TCL_ENCODING_STOPONERROR: Backward compatibility. Sets the profile
+ *	  to TCL_ENCODING_PROFILE_STRICT overriding any specified profile flags
+ *
+ * Results:
+ *      The return value is one of
+ *        TCL_OK: success. Converted string in *dstPtr
+ *        TCL_ERROR: error in passed parameters. Error message in interp
+ *        TCL_CONVERT_MULTIBYTE: source ends in truncated multibyte sequence
+ *        TCL_CONVERT_SYNTAX: source is not conformant to encoding definition
+ *        TCL_CONVERT_UNKNOWN: source contained a character that could not
+ *            be represented in target encoding.
+ *
+ * Side effects:
+ *
+ *      TCL_OK: The converted bytes are stored in the DString and NUL
+ *          terminated in an encoding-specific manner
+ *      TCL_ERROR: an error, message is stored in the interp if not NULL.
+ *      TCL_CONVERT_*: if errorLocPtr is NULL, an error message is stored
+ *          in the interpreter (if not NULL). If errorLocPtr is not NULL,
+ *          no error message is stored as it is expected the caller is
+ *          interested in whatever is decoded so far and not treating this
+ *          as an error condition.
+ *
+ *      In addition, *dstPtr is always initialized and must be cleared
+ *      by the caller irrespective of the return code.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+int
+Tcl_UtfToExternalDStringEx(
+    Tcl_Interp *interp,         /* For error messages. May be NULL. */
+    Tcl_Encoding encoding,	/* The encoding for the converted string, or
+				 * NULL for the default system encoding. */
+    const char *src,		/* Source string in UTF-8. */
+    Tcl_Size srcLen,		/* Source string length in bytes, or < 0 for
+				 * strlen(). */
+    int flags,			/* Conversion control flags. */
+    Tcl_DString *dstPtr,	/* Uninitialized or free DString in which the
+				 * converted string is stored. */
+    Tcl_Size *errorLocPtr)      /* Where to store the error location
+                                   (or TCL_INDEX_NONE if no error). May
+				   be NULL. */
+{
     char *dst;
     Tcl_EncodingState state;
     const Encoding *encodingPtr;
-    int flags, dstLen, result, soFar, srcRead, dstWrote, dstChars;
+    int result, srcRead, dstWrote, dstChars;
+    const char *srcStart = src;
+    Tcl_Size dstLen, soFar;
 
+    /* DO FIRST - must always be initialized on return */
     Tcl_DStringInit(dstPtr);
+
+    if (flags & (TCL_ENCODING_START|TCL_ENCODING_END)) {
+	/* TODO - what other flags are illegal? - See TIP 656 */
+	Tcl_SetObjResult(
+	    interp,
+	    Tcl_NewStringObj(
+		"Parameter error: TCL_ENCODING_{START,STOP} bits set in flags.",
+		TCL_INDEX_NONE));
+	Tcl_SetErrorCode(interp, "TCL", "ENCODING", "ILLEGALFLAGS", NULL);
+	return TCL_ERROR;
+    }
+
     dst = Tcl_DStringValue(dstPtr);
     dstLen = dstPtr->spaceAvl - 1;
 
@@ -1319,23 +1606,49 @@ Tcl_UtfToExternalDString(
     } else if (srcLen < 0) {
 	srcLen = strlen(src);
     }
-    flags = TCL_ENCODING_START | TCL_ENCODING_END;
+
+    flags |= TCL_ENCODING_START | TCL_ENCODING_END;
     while (1) {
 	result = encodingPtr->fromUtfProc(encodingPtr->clientData, src,
-		srcLen, flags, &state, dst, dstLen, &srcRead, &dstWrote,
-		&dstChars);
+					  srcLen, flags, &state, dst, dstLen,
+					  &srcRead, &dstWrote, &dstChars);
 	soFar = dst + dstWrote - Tcl_DStringValue(dstPtr);
 
+	src += srcRead;
 	if (result != TCL_CONVERT_NOSPACE) {
-	    if (encodingPtr->nullSize == 2) {
-		Tcl_DStringSetLength(dstPtr, soFar + 1);
+	    Tcl_Size nBytesProcessed = (src - srcStart);
+	    int i = soFar + encodingPtr->nullSize - 1;
+	    while (i >= soFar) {
+		Tcl_DStringSetLength(dstPtr, i--);
 	    }
-	    Tcl_DStringSetLength(dstPtr, soFar);
-	    return Tcl_DStringValue(dstPtr);
+	    if (errorLocPtr) {
+		/*
+		 * Do not write error message into interpreter if caller
+		 * wants to know error location.
+		 */
+		*errorLocPtr = result == TCL_OK ? TCL_INDEX_NONE : nBytesProcessed;
+	    } else {
+		/* Caller wants error message on failure */
+		if (result != TCL_OK && interp != NULL) {
+		    int pos = Tcl_NumUtfChars(srcStart, nBytesProcessed);
+		    int ucs4;
+		    char buf[TCL_INTEGER_SPACE];
+		    TclUtfToUCS4(&srcStart[nBytesProcessed], &ucs4);
+		    sprintf(buf, "%u", nBytesProcessed);
+		    Tcl_SetObjResult(
+			interp,
+			Tcl_ObjPrintf(
+			    "unexpected character at index %d: 'U+%06X'",
+			    pos,
+			    ucs4));
+		    Tcl_SetErrorCode(interp, "TCL", "ENCODING", "ILLEGALSEQUENCE",
+				     buf, NULL);
+		}
+	    }
+	    return result;
 	}
 
 	flags &= ~TCL_ENCODING_START;
-	src += srcRead;
 	srcLen -= srcRead;
 	if (Tcl_DStringLength(dstPtr) == 0) {
 	    Tcl_DStringSetLength(dstPtr, dstLen);
@@ -1366,11 +1679,11 @@ Tcl_UtfToExternalDString(
 
 int
 Tcl_UtfToExternal(
-    Tcl_Interp *interp,		/* Interp for error return, if not NULL. */
+    TCL_UNUSED(Tcl_Interp *),	/* TODO: Re-examine this. */
     Tcl_Encoding encoding,	/* The encoding for the converted string, or
 				 * NULL for the default system encoding. */
     const char *src,		/* Source string in UTF-8. */
-    int srcLen,			/* Source string length in bytes, or < 0 for
+    Tcl_Size srcLen,		/* Source string length in bytes, or < 0 for
 				 * strlen(). */
     int flags,			/* Conversion control flags. */
     Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
@@ -1380,7 +1693,7 @@ Tcl_UtfToExternal(
 				 * routine under control of flags argument. */
     char *dst,			/* Output buffer in which converted string
 				 * is stored. */
-    int dstLen,			/* The maximum length of output buffer in
+    Tcl_Size dstLen,		/* The maximum length of output buffer in
 				 * bytes. */
     int *srcReadPtr,		/* Filled with the number of bytes from the
 				 * source string that were converted. This may
@@ -1423,20 +1736,17 @@ Tcl_UtfToExternal(
     }
 
     if (dstLen < encodingPtr->nullSize) {
-        return TCL_CONVERT_NOSPACE;
+	return TCL_CONVERT_NOSPACE;
     }
     dstLen -= encodingPtr->nullSize;
     result = encodingPtr->fromUtfProc(encodingPtr->clientData, src, srcLen,
-	    flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr,
-	    dstCharsPtr);
+	    flags, statePtr, dst, dstLen, srcReadPtr,
+	    dstWrotePtr, dstCharsPtr);
     /*
      * Buffer is terminated irrespective of result. Not sure this is
      * reasonable but keep for historical/compatibility reasons.
      */
-    if (encodingPtr->nullSize == 2) {
-	dst[*dstWrotePtr + 1] = '\0';
-    }
-    dst[*dstWrotePtr] = '\0';
+    memset(&dst[*dstWrotePtr], '\0', encodingPtr->nullSize);
 
     return result;
 }
@@ -1459,14 +1769,15 @@ Tcl_UtfToExternal(
  *---------------------------------------------------------------------------
  */
 #undef Tcl_FindExecutable
-void
+const char *
 Tcl_FindExecutable(
     const char *argv0)		/* The value of the application's argv[0]
 				 * (native). */
 {
-    TclInitSubsystems();
+    const char *version = Tcl_InitSubsystems();
     TclpSetInitialEncodings();
     TclpFindExecutable(argv0);
+    return version;
 }
 
 /*
@@ -1494,17 +1805,17 @@ OpenEncodingFileChannel(
     const char *name)		/* The name of the encoding file on disk and
 				 * also the name for new encoding. */
 {
-    Tcl_Obj *nameObj = Tcl_NewStringObj(name, -1);
+    Tcl_Obj *nameObj = Tcl_NewStringObj(name, TCL_INDEX_NONE);
     Tcl_Obj *fileNameObj = Tcl_DuplicateObj(nameObj);
     Tcl_Obj *searchPath = Tcl_DuplicateObj(Tcl_GetEncodingSearchPath());
     Tcl_Obj *map = TclGetProcessGlobalValue(&encodingFileMap);
     Tcl_Obj **dir, *path, *directory = NULL;
     Tcl_Channel chan = NULL;
-    int i, numDirs;
+    Tcl_Size i, numDirs;
 
-    TclListObjGetElements(NULL, searchPath, &numDirs, &dir);
+    TclListObjGetElementsM(NULL, searchPath, &numDirs, &dir);
     Tcl_IncrRefCount(nameObj);
-    Tcl_AppendToObj(fileNameObj, ".enc", -1);
+    Tcl_AppendToObj(fileNameObj, ".enc", TCL_INDEX_NONE);
     Tcl_IncrRefCount(fileNameObj);
     Tcl_DictObjGet(NULL, map, nameObj, &directory);
 
@@ -1723,7 +2034,7 @@ LoadTableEncoding(
     };
 
     Tcl_DStringInit(&lineString);
-    if (Tcl_Gets(chan, &lineString) == -1) {
+    if (Tcl_Gets(chan, &lineString) == TCL_IO_FAILURE) {
 	return NULL;
     }
     line = Tcl_DStringValue(&lineString);
@@ -1765,7 +2076,7 @@ LoadTableEncoding(
     for (i = 0; i < numPages; i++) {
 	int ch;
 	const char *p;
-	int expected = 3 + 16 * (16 * 4 + 1);
+	Tcl_Size expected = 3 + 16 * (16 * 4 + 1);
 
 	if (Tcl_ReadChars(chan, objPtr, expected, 0) != expected) {
 	    return NULL;
@@ -1849,8 +2160,8 @@ LoadTableEncoding(
 	 */
 
 	if (dataPtr->fromUnicode[0] != NULL) {
-	    if (dataPtr->fromUnicode[0]['\\'] == '\0') {
-		dataPtr->fromUnicode[0]['\\'] = '\\';
+	    if (dataPtr->fromUnicode[0][(int)'\\'] == '\0') {
+		dataPtr->fromUnicode[0][(int)'\\'] = '\\';
 	    }
 	}
     }
@@ -2001,7 +2312,7 @@ LoadEscapeEncoding(
     Tcl_DStringInit(&escapeData);
 
     while (1) {
-	int argc;
+	Tcl_Size argc;
 	const char **argv;
 	char *line;
 	Tcl_DString lineString;
@@ -2053,7 +2364,7 @@ LoadEscapeEncoding(
 	Tcl_DStringFree(&lineString);
     }
 
-    size = TclOffset(EscapeEncodingData, subTables)
+    size = offsetof(EscapeEncodingData, subTables)
 	    + Tcl_DStringLength(&escapeData);
     dataPtr = (EscapeEncodingData *)ckalloc(size);
     dataPtr->initLen = strlen(init);
@@ -2111,15 +2422,11 @@ LoadEscapeEncoding(
 
 static int
 BinaryProc(
-    ClientData clientData,	/* Not used. */
+    TCL_UNUSED(void *),
     const char *src,		/* Source string (unknown encoding). */
     int srcLen,			/* Source string length in bytes. */
     int flags,			/* Conversion control flags. */
-    Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
-				 * information used during a piecewise
-				 * conversion. Contents of statePtr are
-				 * initialized and/or reset by conversion
-				 * routine under control of flags argument. */
+    TCL_UNUSED(Tcl_EncodingState *),
     char *dst,			/* Output buffer in which converted string is
 				 * stored. */
     int dstLen,			/* The maximum length of output buffer in
@@ -2140,6 +2447,7 @@ BinaryProc(
     if (dstLen < 0) {
 	dstLen = 0;
     }
+    flags = TclEncodingSetProfileFlags(flags);
     if ((flags & TCL_ENCODING_CHAR_LIMIT) && srcLen > *dstCharsPtr) {
 	srcLen = *dstCharsPtr;
     }
@@ -2158,11 +2466,11 @@ BinaryProc(
 /*
  *-------------------------------------------------------------------------
  *
- * UtfIntToUtfExtProc --
+ * UtfToUtfProc --
  *
- *	Convert from UTF-8 to UTF-8. While converting null-bytes from the
- *	Tcl's internal representation (0xC0, 0x80) to the official
- *	representation (0x00). See UtfToUtfProc for details.
+ *	Converts from UTF-8 to UTF-8. Note that the UTF-8 to UTF-8 translation
+ *	is not a no-op, because it turns a stream of improperly formed
+ *	UTF-8 into a properly-formed stream.
  *
  * Results:
  *	Returns TCL_OK if conversion was successful.
@@ -2174,18 +2482,14 @@ BinaryProc(
  */
 
 static int
-UtfIntToUtfExtProc(
-    ClientData clientData,	/* Not used. */
+UtfToUtfProc(
+    void *clientData,		/* additional flags */
     const char *src,		/* Source string in UTF-8. */
     int srcLen,			/* Source string length in bytes. */
-    int flags,			/* Conversion control flags. */
-    Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
-				 * information used during a piecewise
-				 * conversion. Contents of statePtr are
-				 * initialized and/or reset by conversion
-				 * routine under control of flags argument. */
-    char *dst,			/* Output buffer in which converted string
-				 * is stored. */
+    int flags,			/* TCL_ENCODING_* conversion control flags. */
+    TCL_UNUSED(Tcl_EncodingState *),
+    char *dst,			/* Output buffer in which converted string is
+				 * stored. */
     int dstLen,			/* The maximum length of output buffer in
 				 * bytes. */
     int *srcReadPtr,		/* Filled with the number of bytes from the
@@ -2200,18 +2504,195 @@ UtfIntToUtfExtProc(
 				 * correspond to the bytes stored in the
 				 * output buffer. */
 {
-    return UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
-	    srcReadPtr, dstWrotePtr, dstCharsPtr, 1);
-}
+    const char *srcStart, *srcEnd, *srcClose;
+    const char *dstStart, *dstEnd;
+    int result, numChars, charLimit = INT_MAX;
+    int ch;
+    int profile;
+
+    result = TCL_OK;
+
+    srcStart = src;
+    srcEnd = src + srcLen;
+    srcClose = srcEnd;
+    flags = TclEncodingSetProfileFlags(flags);
+    if ((flags & TCL_ENCODING_END) == 0) {
+	srcClose -= 6;
+    }
+    if (flags & TCL_ENCODING_CHAR_LIMIT) {
+	charLimit = *dstCharsPtr;
+    }
+
+    dstStart = dst;
+    flags |= PTR2INT(clientData);
+    dstEnd = dst + dstLen - ((flags & ENCODING_UTF) ? TCL_UTF_MAX : 6);
+
+    profile = CHANNEL_PROFILE_GET(flags);
+    for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) {
+
+	if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) {
+	    /*
+	     * If there is more string to follow, this will ensure that the
+	     * last UTF-8 character in the source buffer hasn't been cut off.
+	     */
+
+	    result = TCL_CONVERT_MULTIBYTE;
+	    break;
+	}
+	if (dst > dstEnd) {
+	    result = TCL_CONVERT_NOSPACE;
+	    break;
+	}
+	if (UCHAR(*src) < 0x80 && !((UCHAR(*src) == 0) && (flags & ENCODING_INPUT))) {
+	    /*
+	     * Copy 7bit characters, but skip null-bytes when we are in input
+	     * mode, so that they get converted to \xC0\x80.
+	     */
+	    *dst++ = *src++;
+	} else if ((UCHAR(*src) == 0xC0) && (src + 1 < srcEnd) &&
+		 (UCHAR(src[1]) == 0x80) &&
+		 (!(flags & ENCODING_INPUT) || PROFILE_STRICT(profile) ||
+		  PROFILE_REPLACE(profile))) {
+	    /* Special sequence \xC0\x80 */
+	    if ((PROFILE_STRICT(profile) || PROFILE_REPLACE(profile)) && (flags & ENCODING_INPUT)) {
+		if (PROFILE_REPLACE(profile)) {
+		   dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst);
+		   src += 2;
+		} else {
+		   /* PROFILE_STRICT */
+		   result = TCL_CONVERT_SYNTAX;
+		   break;
+		}
+	    } else {
+		/*
+		 * Convert 0xC080 to real nulls when we are in output mode,
+		 * irrespective of the profile.
+		 */
+		*dst++ = 0;
+		src += 2;
+	    }
+
+	} else if (!Tcl_UtfCharComplete(src, srcEnd - src)) {
+	    /*
+	     * Incomplete byte sequence.
+	     * Always check before using TclUtfToUCS4. Not doing can so
+	     * cause it run beyond the end of the buffer! If we happen such an
+	     * incomplete char its bytes are made to represent themselves
+	     * unless the user has explicitly asked to be told.
+	     */
+
+	    if (flags & ENCODING_INPUT) {
+		/* Incomplete bytes for modified UTF-8 target */
+		if (PROFILE_STRICT(profile)) {
+		    result = (flags & TCL_ENCODING_CHAR_LIMIT)
+			       ? TCL_CONVERT_MULTIBYTE
+			       : TCL_CONVERT_SYNTAX;
+		    break;
+		}
+	    }
+	    if (PROFILE_REPLACE(profile)) {
+		ch = UNICODE_REPLACE_CHAR;
+		++src;
+	    } else {
+		/* TCL_ENCODING_PROFILE_TCL8 */
+		char chbuf[2];
+		chbuf[0] = UCHAR(*src++); chbuf[1] = 0;
+		TclUtfToUCS4(chbuf, &ch);
+	    }
+	    dst += Tcl_UniCharToUtf(ch, dst);
+	} else {
+	    int low;
+	    int isInvalid = 0;
+	    size_t len = TclUtfToUCS4(src, &ch);
+	    if (flags & ENCODING_INPUT) {
+		if ((len < 2) && (ch != 0)) {
+		    isInvalid = 1;
+		} else if ((ch > 0xFFFF) && !(flags & ENCODING_UTF)) {
+		    isInvalid = 1;
+		}
+		if (isInvalid) {
+		    if (PROFILE_STRICT(profile)) {
+			result = TCL_CONVERT_SYNTAX;
+			break;
+		    } else if (PROFILE_REPLACE(profile)) {
+			ch = UNICODE_REPLACE_CHAR;
+		    }
+		}
+	    }
 
+	    const char *saveSrc = src;
+	    src += len;
+	    if (!(flags & ENCODING_UTF) && !(flags & ENCODING_INPUT) && (ch > 0x3FF)) {
+		if (ch > 0xFFFF) {
+		    /* CESU-8 6-byte sequence for chars > U+FFFF */
+		    ch -= 0x10000;
+		    *dst++ = 0xED;
+		    *dst++ = (char) (((ch >> 16) & 0x0F) | 0xA0);
+		    *dst++ = (char) (((ch >> 10) & 0x3F) | 0x80);
+		    ch = (ch & 0x0CFF) | 0xDC00;
+		}
+		goto cesu8;
+	    } else if ((ch | 0x7FF) == 0xDFFF) {
+		/*
+		 * A surrogate character is detected, handle especially.
+		 */
+		if (PROFILE_STRICT(profile) && (flags & ENCODING_UTF)) {
+		    result = TCL_CONVERT_UNKNOWN;
+		    src = saveSrc;
+		    break;
+		}
+		if (PROFILE_REPLACE(profile)) {
+		    ch = UNICODE_REPLACE_CHAR;
+		} else {
+		    low = ch;
+		    len = (src <= srcEnd - 3) ? TclUtfToUCS4(src, &low) : 0;
+
+		    if ((!LOW_SURROGATE(low)) || (ch & 0x400)) {
+
+			if (PROFILE_STRICT(profile)) {
+			    result = TCL_CONVERT_UNKNOWN;
+			    src = saveSrc;
+			    break;
+			}
+cesu8:
+			*dst++ = (char)(((ch >> 12) | 0xE0) & 0xEF);
+			*dst++ = (char)(((ch >> 6) | 0x80) & 0xBF);
+			*dst++ = (char)((ch | 0x80) & 0xBF);
+			continue;
+		    }
+		    src += len;
+		    dst += Tcl_UniCharToUtf(ch, dst);
+		    ch = low;
+		}
+	    } else if (PROFILE_STRICT(profile) &&
+		       (!(flags & ENCODING_INPUT)) &&
+		       SURROGATE(ch)) {
+		result = TCL_CONVERT_UNKNOWN;
+		src = saveSrc;
+		break;
+	    } else if (PROFILE_STRICT(profile) &&
+		       (flags & ENCODING_INPUT) &&
+		       SURROGATE(ch)) {
+		result = TCL_CONVERT_SYNTAX;
+		src = saveSrc;
+		break;
+	    }
+	    dst += Tcl_UniCharToUtf(ch, dst);
+	}
+    }
+
+    *srcReadPtr = src - srcStart;
+    *dstWrotePtr = dst - dstStart;
+    *dstCharsPtr = numChars;
+    return result;
+}
+
 /*
  *-------------------------------------------------------------------------
  *
- * UtfExtToUtfIntProc --
+ * Utf32ToUtfProc --
  *
- *	Convert from UTF-8 to UTF-8 while converting null-bytes from the
- *	official representation (0x00) to Tcl's internal representation (0xC0,
- *	0x80). See UtfToUtfProc for details.
+ *	Convert from UTF-32 to UTF-8.
  *
  * Results:
  *	Returns TCL_OK if conversion was successful.
@@ -2223,16 +2704,12 @@ UtfIntToUtfExtProc(
  */
 
 static int
-UtfExtToUtfIntProc(
-    ClientData clientData,	/* Not used. */
-    const char *src,		/* Source string in UTF-8. */
+Utf32ToUtfProc(
+    void *clientData,	/* additional flags, e.g. TCL_ENCODING_LE */
+    const char *src,		/* Source string in Unicode. */
     int srcLen,			/* Source string length in bytes. */
     int flags,			/* Conversion control flags. */
-    Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
-				 * information used during a piecewise
-				 * conversion. Contents of statePtr are
-				 * initialized and/or reset by conversion
-				 * routine under control of flags argument. */
+    TCL_UNUSED(Tcl_EncodingState *),
     char *dst,			/* Output buffer in which converted string is
 				 * stored. */
     int dstLen,			/* The maximum length of output buffer in
@@ -2249,18 +2726,128 @@ UtfExtToUtfIntProc(
 				 * correspond to the bytes stored in the
 				 * output buffer. */
 {
-    return UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
-	    srcReadPtr, dstWrotePtr, dstCharsPtr, 0);
-}
+    const char *srcStart, *srcEnd;
+    const char *dstEnd, *dstStart;
+    int result, numChars, charLimit = INT_MAX;
+    int ch = 0, bytesLeft = srcLen % 4;
+
+    flags = TclEncodingSetProfileFlags(flags);
+    flags |= PTR2INT(clientData);
+    if (flags & TCL_ENCODING_CHAR_LIMIT) {
+	charLimit = *dstCharsPtr;
+    }
+    result = TCL_OK;
+
+    /*
+     * Check alignment with utf-32 (4 == sizeof(UTF-32))
+     */
+    if (bytesLeft != 0) {
+	/* We have a truncated code unit */
+	result = TCL_CONVERT_MULTIBYTE;
+	srcLen -= bytesLeft;
+    }
+
+    /*
+     * If last code point is a high surrogate, we cannot handle that yet,
+     * unless we are at the end.
+     */
+
+    if (!(flags & TCL_ENCODING_END) && (srcLen >= 4) &&
+	    ((src[srcLen - ((flags & TCL_ENCODING_LE)?3:2)] & 0xFC) == 0xD8) &&
+	    ((src[srcLen - ((flags & TCL_ENCODING_LE)?2:3)]) == 0) &&
+	    ((src[srcLen - ((flags & TCL_ENCODING_LE)?1:4)]) == 0)) {
+	result = TCL_CONVERT_MULTIBYTE;
+	srcLen-= 4;
+    }
+
+    srcStart = src;
+    srcEnd = src + srcLen;
+
+    dstStart = dst;
+    dstEnd = dst + dstLen - TCL_UTF_MAX;
+
+    for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) {
+	if (dst > dstEnd) {
+	    result = TCL_CONVERT_NOSPACE;
+	    break;
+	}
+
+	int prev = ch;
+	if (flags & TCL_ENCODING_LE) {
+	    ch = (src[3] & 0xFF) << 24 | (src[2] & 0xFF) << 16 | (src[1] & 0xFF) << 8 | (src[0] & 0xFF);
+	} else {
+	    ch = (src[0] & 0xFF) << 24 | (src[1] & 0xFF) << 16 | (src[2] & 0xFF) << 8 | (src[3] & 0xFF);
+	}
+	if (HIGH_SURROGATE(prev) && !LOW_SURROGATE(ch)) {
+	    /* Bug [10c2c17c32]. If Hi surrogate not followed by Lo surrogate, finish 3-byte UTF-8 */
+	    dst += Tcl_UniCharToUtf(-1, dst);
+	}
+
+	if ((unsigned)ch > 0x10FFFF) {
+	    ch = UNICODE_REPLACE_CHAR;
+	    if (PROFILE_STRICT(flags)) {
+		result = TCL_CONVERT_SYNTAX;
+		break;
+	    }
+	} else if (PROFILE_STRICT(flags) && SURROGATE(ch)) {
+	    result = TCL_CONVERT_SYNTAX;
+	    ch = 0;
+	    break;
+	} else if (PROFILE_REPLACE(flags) && SURROGATE(ch)) {
+	    ch = UNICODE_REPLACE_CHAR;
+	}
+
+	/*
+	 * Special case for 1-byte utf chars for speed. Make sure we work with
+	 * unsigned short-size data.
+	 */
+
+	if ((unsigned)ch - 1 < 0x7F) {
+	    *dst++ = (ch & 0xFF);
+	} else {
+	    if (!HIGH_SURROGATE(prev) && LOW_SURROGATE(ch)) {
+		*dst = 0; /* In case of lower surrogate, don't try to combine */
+	    }
+	    dst += Tcl_UniCharToUtf(ch, dst);
+	}
+	src += 4;
+    }
+
+    if (HIGH_SURROGATE(ch)) {
+	/* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */
+	dst += Tcl_UniCharToUtf(-1, dst);
+    }
+
+    if ((flags & TCL_ENCODING_END) && (result == TCL_CONVERT_MULTIBYTE)) {
+	/* We have a code fragment left-over at the end */
+	if (dst > dstEnd) {
+	    result = TCL_CONVERT_NOSPACE;
+	} else {
+	    /* destination is not full, so we really are at the end now */
+	    if (PROFILE_STRICT(flags)) {
+		result = TCL_CONVERT_SYNTAX;
+	    } else {
+		/* PROFILE_REPLACE or PROFILE_TCL8 */
+		result = TCL_OK;
+		dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst);
+		numChars++;
+		src += bytesLeft; /* Go past truncated code unit */
+	    }
+	}
+    }
 
+    *srcReadPtr = src - srcStart;
+    *dstWrotePtr = dst - dstStart;
+    *dstCharsPtr = numChars;
+    return result;
+}
+
 /*
  *-------------------------------------------------------------------------
  *
- * UtfToUtfProc --
+ * UtfToUtf32Proc --
  *
- *	Converts from UTF-8 to UTF-8. Note that the UTF-8 to UTF-8 translation
- *	is not a no-op, because it turns a stream of improperly formed
- *	UTF-8 into a properly-formed stream.
+ *	Convert from UTF-8 to UTF-32.
  *
  * Results:
  *	Returns TCL_OK if conversion was successful.
@@ -2272,16 +2859,12 @@ UtfExtToUtfIntProc(
  */
 
 static int
-UtfToUtfProc(
-    ClientData clientData,	/* Not used. */
+UtfToUtf32Proc(
+    void *clientData,	/* additional flags, e.g. TCL_ENCODING_LE */
     const char *src,		/* Source string in UTF-8. */
     int srcLen,			/* Source string length in bytes. */
     int flags,			/* Conversion control flags. */
-    Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
-				 * information used during a piecewise
-				 * conversion. Contents of statePtr are
-				 * initialized and/or reset by conversion
-				 * routine under control of flags argument. */
+    TCL_UNUSED(Tcl_EncodingState *),
     char *dst,			/* Output buffer in which converted string is
 				 * stored. */
     int dstLen,			/* The maximum length of output buffer in
@@ -2294,37 +2877,28 @@ UtfToUtfProc(
     int *dstWrotePtr,		/* Filled with the number of bytes that were
 				 * stored in the output buffer as a result of
 				 * the conversion. */
-    int *dstCharsPtr,		/* Filled with the number of characters that
+    int *dstCharsPtr)		/* Filled with the number of characters that
 				 * correspond to the bytes stored in the
 				 * output buffer. */
-    int pureNullMode)		/* Convert embedded nulls from internal
-				 * representation to real null-bytes or vice
-				 * versa. Also combine or separate surrogate pairs */
 {
-    const char *srcStart, *srcEnd, *srcClose;
-    const char *dstStart, *dstEnd;
-    int result, numChars, charLimit = INT_MAX;
-    Tcl_UniChar *chPtr = (Tcl_UniChar *) statePtr;
-
-    if (flags & TCL_ENCODING_START) {
-    	*statePtr = 0;
-    }
-    result = TCL_OK;
+    const char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd;
+    int result, numChars;
+    int ch, len;
 
     srcStart = src;
     srcEnd = src + srcLen;
     srcClose = srcEnd;
+    flags = TclEncodingSetProfileFlags(flags);
     if ((flags & TCL_ENCODING_END) == 0) {
-	srcClose -= 6;
-    }
-    if (flags & TCL_ENCODING_CHAR_LIMIT) {
-	charLimit = *dstCharsPtr;
+	srcClose -= TCL_UTF_MAX;
     }
 
     dstStart = dst;
-    dstEnd = dst + dstLen - ((pureNullMode == 1) ? 4 : TCL_UTF_MAX);
+    dstEnd = dst + dstLen - sizeof(Tcl_UniChar);
+    flags |= PTR2INT(clientData);
 
-    for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) {
+    result = TCL_OK;
+    for (numChars = 0; src < srcEnd; numChars++) {
 	if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) {
 	    /*
 	     * If there is more string to follow, this will ensure that the
@@ -2338,77 +2912,27 @@ UtfToUtfProc(
 	    result = TCL_CONVERT_NOSPACE;
 	    break;
 	}
-	if (UCHAR(*src) < 0x80 && !((UCHAR(*src) == 0) && (pureNullMode == 0))) {
-	    /*
-	     * Copy 7bit characters, but skip null-bytes when we are in input
-	     * mode, so that they get converted to 0xC080.
-	     */
-
-	    *dst++ = *src++;
-	    *chPtr = 0; /* reset surrogate handling */
-	} else if ((UCHAR(*src) == 0xC0) && (src + 1 < srcEnd)
-		&& (UCHAR(src[1]) == 0x80) && (pureNullMode == 1)) {
-	    /*
-	     * Convert 0xC080 to real nulls when we are in output mode.
-	     */
-
-	    *dst++ = 0;
-	    *chPtr = 0; /* reset surrogate handling */
-	    src += 2;
-	} else if (!Tcl_UtfCharComplete(src, srcEnd - src)) {
-	    /*
-	     * Always check before using TclUtfToUniChar. Not doing can so
-	     * cause it run beyond the end of the buffer! If we happen such an
-	     * incomplete char its bytes are made to represent themselves
-	     * unless the user has explicitly asked to be told.
-	     */
-
-	    if ((flags & TCL_ENCODING_STOPONERROR) && (pureNullMode == 0)) {
-		result = TCL_CONVERT_MULTIBYTE;
+	len = TclUtfToUCS4(src, &ch);
+	if (SURROGATE(ch)) {
+	    if (PROFILE_STRICT(flags)) {
+		result = TCL_CONVERT_UNKNOWN;
 		break;
 	    }
-	    *chPtr = UCHAR(*src);
-	    src += 1;
-	    dst += Tcl_UniCharToUtf(*chPtr, dst);
-	} else {
-	    size_t len = TclUtfToUniChar(src, chPtr);
-	    if ((len < 2) && (*chPtr != 0) && (flags & TCL_ENCODING_STOPONERROR)
-		    && ((*chPtr & ~0x7FF) != 0xD800) && (pureNullMode == 0)) {
-		result = TCL_CONVERT_SYNTAX;
-		break;
+	    if (PROFILE_REPLACE(flags)) {
+		ch = UNICODE_REPLACE_CHAR;
 	    }
-	    src += len;
-	    if ((*chPtr & ~0x7FF) == 0xD800) {
-		Tcl_UniChar low;
-		/* A surrogate character is detected, handle especially */
-#if TCL_UTF_MAX <= 4
-	    if ((len < 3) && ((src[3 - len] & 0xC0) != 0x80)) {
-	    /* It's invalid. See [ed29806ba] */
-		*chPtr = UCHAR(src[-1]);
-		dst += Tcl_UniCharToUtf(*chPtr, dst);
-		continue;
-	    }
-#endif
-		low = *chPtr;
-		len = (src <= srcEnd-3) ? Tcl_UtfToUniChar(src, &low) : 0;
-		if (((low & ~0x3FF) != 0xDC00) || (*chPtr & 0x400)) {
-		    *dst++ = (char) (((*chPtr >> 12) | 0xE0) & 0xEF);
-		    *dst++ = (char) (((*chPtr >> 6) | 0x80) & 0xBF);
-		    *dst++ = (char) ((*chPtr | 0x80) & 0xBF);
-		    *chPtr = 0; /* reset surrogate handling */
-		    continue;
-		} else if ((TCL_UTF_MAX > 3) || (pureNullMode == 1)) {
-		    int full = (((*chPtr & 0x3FF) << 10) | (low & 0x3FF)) + 0x10000;
-		    *dst++ = (char) (((full >> 18) | 0xF0) & 0xF7);
-		    *dst++ = (char) (((full >> 12) | 0x80) & 0xBF);
-		    *dst++ = (char) (((full >> 6) | 0x80) & 0xBF);
-		    *dst++ = (char) ((full | 0x80) & 0xBF);
-			*chPtr = 0; /* reset surrogate handling */
-		    src += len;
-		    continue;
-		}
-	    }
-	    dst += Tcl_UniCharToUtf(*chPtr, dst);
+	}
+	src += len;
+	if (flags & TCL_ENCODING_LE) {
+	    *dst++ = (ch & 0xFF);
+	    *dst++ = ((ch >> 8) & 0xFF);
+	    *dst++ = ((ch >> 16) & 0xFF);
+	    *dst++ = ((ch >> 24) & 0xFF);
+	} else {
+	    *dst++ = ((ch >> 24) & 0xFF);
+	    *dst++ = ((ch >> 16) & 0xFF);
+	    *dst++ = ((ch >> 8) & 0xFF);
+	    *dst++ = (ch & 0xFF);
 	}
     }
 
@@ -2421,7 +2945,7 @@ UtfToUtfProc(
 /*
  *-------------------------------------------------------------------------
  *
- * UnicodeToUtfProc --
+ * Utf16ToUtfProc --
  *
  *	Convert from UTF-16 to UTF-8.
  *
@@ -2435,16 +2959,12 @@ UtfToUtfProc(
  */
 
 static int
-UnicodeToUtfProc(
-    ClientData clientData,	/* != NULL means LE, == NUL means BE */
+Utf16ToUtfProc(
+    void *clientData,	/* additional flags, e.g. TCL_ENCODING_LE */
     const char *src,		/* Source string in Unicode. */
     int srcLen,			/* Source string length in bytes. */
     int flags,			/* Conversion control flags. */
-    Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
-				 * information used during a piecewise
-				 * conversion. Contents of statePtr are
-				 * initialized and/or reset by conversion
-				 * routine under control of flags argument. */
+    TCL_UNUSED(Tcl_EncodingState *),
     char *dst,			/* Output buffer in which converted string is
 				 * stored. */
     int dstLen,			/* The maximum length of output buffer in
@@ -2464,8 +2984,10 @@ UnicodeToUtfProc(
     const char *srcStart, *srcEnd;
     const char *dstEnd, *dstStart;
     int result, numChars, charLimit = INT_MAX;
-    unsigned short ch;
+    unsigned short ch = 0;
 
+    flags = TclEncodingSetProfileFlags(flags);
+    flags |= PTR2INT(clientData);
     if (flags & TCL_ENCODING_CHAR_LIMIT) {
 	charLimit = *dstCharsPtr;
     }
@@ -2480,18 +3002,16 @@ UnicodeToUtfProc(
 	srcLen--;
     }
 
-#if TCL_UTF_MAX > 3
     /*
      * If last code point is a high surrogate, we cannot handle that yet,
      * unless we are at the end.
      */
 
     if (!(flags & TCL_ENCODING_END) && (srcLen >= 2) &&
-	    ((src[srcLen - (clientData?1:2)] & 0xFC) == 0xD8)) {
+	    ((src[srcLen - ((flags & TCL_ENCODING_LE)?1:2)] & 0xFC) == 0xD8)) {
 	result = TCL_CONVERT_MULTIBYTE;
 	srcLen-= 2;
     }
-#endif
 
     srcStart = src;
     srcEnd = src + srcLen;
@@ -2505,37 +3025,76 @@ UnicodeToUtfProc(
 	    break;
 	}
 
-	if (clientData) {
+	unsigned short prev = ch;
+	if (flags & TCL_ENCODING_LE) {
 	    ch = (src[1] & 0xFF) << 8 | (src[0] & 0xFF);
 	} else {
 	    ch = (src[0] & 0xFF) << 8 | (src[1] & 0xFF);
 	}
+	if (HIGH_SURROGATE(prev) && !LOW_SURROGATE(ch)) {
+	    if (PROFILE_STRICT(flags)) {
+		result = TCL_CONVERT_SYNTAX;
+		src -= 2; /* Go back to beginning of high surrogate */
+		dst--; /* Also undo writing a single byte too much */
+		numChars--;
+		break;
+	    }
+	    /* Bug [10c2c17c32]. If Hi surrogate not followed by Lo surrogate, finish 3-byte UTF-8 */
+	    dst += Tcl_UniCharToUtf(-1, dst);
+	}
 
 	/*
 	 * Special case for 1-byte utf chars for speed. Make sure we work with
 	 * unsigned short-size data.
 	 */
 
-	if (ch && ch < 0x80) {
+	if ((unsigned)ch - 1 < 0x7F) {
 	    *dst++ = (ch & 0xFF);
+	} else if (HIGH_SURROGATE(prev) || HIGH_SURROGATE(ch)) {
+	    dst += Tcl_UniCharToUtf(ch, dst);
+	} else if (LOW_SURROGATE(ch) && PROFILE_STRICT(flags)) {
+	    /* Lo surrogate not preceded by Hi surrogate */
+	    result = TCL_CONVERT_SYNTAX;
+	    break;
 	} else {
+	    *dst = 0; /* In case of lower surrogate, don't try to combine */
 	    dst += Tcl_UniCharToUtf(ch, dst);
 	}
 	src += sizeof(unsigned short);
     }
 
+    if (HIGH_SURROGATE(ch)) {
+	if (PROFILE_STRICT(flags)) {
+	    result = TCL_CONVERT_SYNTAX;
+	    src -= 2;
+	    dst--;
+	    numChars--;
+	} else {
+	    /* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */
+	    dst += Tcl_UniCharToUtf(-1, dst);
+	}
+    }
+
+    /*
+     * If we had a truncated code unit at the end AND this is the last
+     * fragment AND profile is not "strict", stick FFFD in its place.
+     */
     if ((flags & TCL_ENCODING_END) && (result == TCL_CONVERT_MULTIBYTE)) {
-	/* We have a single byte left-over at the end */
 	if (dst > dstEnd) {
 	    result = TCL_CONVERT_NOSPACE;
 	} else {
-	    /* destination is not full, so we really are at the end now */
-	    result = TCL_OK;
-	    dst += Tcl_UniCharToUtf(0xFFFD, dst);
-	    numChars++;
-	    src++;
+	    if (PROFILE_STRICT(flags)) {
+		result = TCL_CONVERT_SYNTAX;
+	    } else {
+		/* PROFILE_REPLACE or PROFILE_TCL8 */
+		result = TCL_OK;
+		dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst);
+		numChars++;
+		src++; /* Go past truncated code unit */
+	    }
 	}
     }
+
     *srcReadPtr = src - srcStart;
     *dstWrotePtr = dst - dstStart;
     *dstCharsPtr = numChars;
@@ -2545,7 +3104,7 @@ UnicodeToUtfProc(
 /*
  *-------------------------------------------------------------------------
  *
- * UtfToUnicodeProc --
+ * UtfToUtf16Proc --
  *
  *	Convert from UTF-8 to UTF-16.
  *
@@ -2559,16 +3118,12 @@ UnicodeToUtfProc(
  */
 
 static int
-UtfToUnicodeProc(
-    ClientData clientData,	/* != NULL means LE, == NUL means BE */
+UtfToUtf16Proc(
+    void *clientData,	/* additional flags, e.g. TCL_ENCODING_LE */
     const char *src,		/* Source string in UTF-8. */
     int srcLen,			/* Source string length in bytes. */
     int flags,			/* Conversion control flags. */
-    Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
-				 * information used during a piecewise
-				 * conversion. Contents of statePtr are
-				 * initialized and/or reset by conversion
-				 * routine under control of flags argument. */
+    TCL_UNUSED(Tcl_EncodingState *),
     char *dst,			/* Output buffer in which converted string is
 				 * stored. */
     int dstLen,			/* The maximum length of output buffer in
@@ -2587,20 +3142,19 @@ UtfToUnicodeProc(
 {
     const char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd;
     int result, numChars;
-    Tcl_UniChar *chPtr = (Tcl_UniChar *) statePtr;
+    int ch, len;
 
-    if (flags & TCL_ENCODING_START) {
-    	*statePtr = 0;
-    }
     srcStart = src;
     srcEnd = src + srcLen;
     srcClose = srcEnd;
+    flags = TclEncodingSetProfileFlags(flags);
     if ((flags & TCL_ENCODING_END) == 0) {
 	srcClose -= TCL_UTF_MAX;
     }
 
     dstStart = dst;
     dstEnd   = dst + dstLen - sizeof(Tcl_UniChar);
+    flags |= PTR2INT(clientData);
 
     result = TCL_OK;
     for (numChars = 0; src < srcEnd; numChars++) {
@@ -2617,38 +3171,154 @@ UtfToUnicodeProc(
 	    result = TCL_CONVERT_NOSPACE;
 	    break;
 	}
-	src += TclUtfToUniChar(src, chPtr);
-
-	if (clientData) {
-#if TCL_UTF_MAX > 4
-	    if (*chPtr <= 0xFFFF) {
-		*dst++ = (*chPtr & 0xFF);
-		*dst++ = (*chPtr >> 8);
+	len = TclUtfToUCS4(src, &ch);
+	if (SURROGATE(ch)) {
+	    if (PROFILE_STRICT(flags)) {
+		result = TCL_CONVERT_UNKNOWN;
+		break;
+	    }
+	    if (PROFILE_REPLACE(flags)) {
+		ch = UNICODE_REPLACE_CHAR;
+	    }
+	}
+	src += len;
+	if (flags & TCL_ENCODING_LE) {
+	    if (ch <= 0xFFFF) {
+		*dst++ = (ch & 0xFF);
+		*dst++ = (ch >> 8);
 	    } else {
-		*dst++ = (((*chPtr - 0x10000) >> 10) & 0xFF);
-		*dst++ = (((*chPtr - 0x10000) >> 18) & 0x3) | 0xD8;
-		*dst++ = (*chPtr & 0xFF);
-		*dst++ = ((*chPtr >> 8) & 0x3) | 0xDC;
+		*dst++ = (((ch - 0x10000) >> 10) & 0xFF);
+		*dst++ = (((ch - 0x10000) >> 18) & 0x3) | 0xD8;
+		*dst++ = (ch & 0xFF);
+		*dst++ = ((ch >> 8) & 0x3) | 0xDC;
 	    }
-#else
-	    *dst++ = (*chPtr & 0xFF);
-	    *dst++ = (*chPtr >> 8);
-#endif
 	} else {
-#if TCL_UTF_MAX > 4
-	    if (*chPtr <= 0xFFFF) {
-		*dst++ = (*chPtr >> 8);
-		*dst++ = (*chPtr & 0xFF);
+	    if (ch <= 0xFFFF) {
+		*dst++ = (ch >> 8);
+		*dst++ = (ch & 0xFF);
 	    } else {
-		*dst++ = (((*chPtr - 0x10000) >> 18) & 0x3) | 0xD8;
-		*dst++ = (((*chPtr - 0x10000) >> 10) & 0xFF);
-		*dst++ = ((*chPtr >> 8) & 0x3) | 0xDC;
-		*dst++ = (*chPtr & 0xFF);
+		*dst++ = (((ch - 0x10000) >> 18) & 0x3) | 0xD8;
+		*dst++ = (((ch - 0x10000) >> 10) & 0xFF);
+		*dst++ = ((ch >> 8) & 0x3) | 0xDC;
+		*dst++ = (ch & 0xFF);
+	    }
+	}
+    }
+    *srcReadPtr = src - srcStart;
+    *dstWrotePtr = dst - dstStart;
+    *dstCharsPtr = numChars;
+    return result;
+}
+
+/*
+ *-------------------------------------------------------------------------
+ *
+ * UtfToUcs2Proc --
+ *
+ *	Convert from UTF-8 to UCS-2.
+ *
+ * Results:
+ *	Returns TCL_OK if conversion was successful.
+ *
+ * Side effects:
+ *	None.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+static int
+UtfToUcs2Proc(
+    void *clientData,	/* additional flags, e.g. TCL_ENCODING_LE */
+    const char *src,		/* Source string in UTF-8. */
+    int srcLen,			/* Source string length in bytes. */
+    int flags,			/* Conversion control flags. */
+    TCL_UNUSED(Tcl_EncodingState *),
+    char *dst,			/* Output buffer in which converted string is
+				 * stored. */
+    int dstLen,			/* The maximum length of output buffer in
+				 * bytes. */
+    int *srcReadPtr,		/* Filled with the number of bytes from the
+				 * source string that were converted. This may
+				 * be less than the original source length if
+				 * there was a problem converting some source
+				 * characters. */
+    int *dstWrotePtr,		/* Filled with the number of bytes that were
+				 * stored in the output buffer as a result of
+				 * the conversion. */
+    int *dstCharsPtr)		/* Filled with the number of characters that
+				 * correspond to the bytes stored in the
+				 * output buffer. */
+{
+    const char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd;
+    int result, numChars, len;
+    Tcl_UniChar ch = 0;
+
+    flags = TclEncodingSetProfileFlags(flags);
+    flags |= PTR2INT(clientData);
+    srcStart = src;
+    srcEnd = src + srcLen;
+    srcClose = srcEnd;
+    if ((flags & TCL_ENCODING_END) == 0) {
+	srcClose -= TCL_UTF_MAX;
+    }
+
+    dstStart = dst;
+    dstEnd   = dst + dstLen - sizeof(Tcl_UniChar);
+
+    result = TCL_OK;
+    for (numChars = 0; src < srcEnd; numChars++) {
+	if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) {
+	    /*
+	     * If there is more string to follow, this will ensure that the
+	     * last UTF-8 character in the source buffer hasn't been cut off.
+	     */
+
+	    result = TCL_CONVERT_MULTIBYTE;
+	    break;
+	}
+	if (dst > dstEnd) {
+	    result = TCL_CONVERT_NOSPACE;
+	    break;
+	}
+#if TCL_UTF_MAX < 4
+	len = TclUtfToUniChar(src, &ch);
+	if ((ch >= 0xD800) && (len < 3)) {
+	    if (PROFILE_STRICT(flags)) {
+		result = TCL_CONVERT_UNKNOWN;
+		break;
 	    }
+	    src += len;
+	    src += TclUtfToUniChar(src, &ch);
+	    ch = UNICODE_REPLACE_CHAR;
+	}
 #else
-	    *dst++ = (*chPtr >> 8);
-	    *dst++ = (*chPtr & 0xFF);
+	len = TclUtfToUniChar(src, &ch);
+	if (ch > 0xFFFF) {
+	    if (PROFILE_STRICT(flags)) {
+		result = TCL_CONVERT_UNKNOWN;
+		break;
+	    }
+	    ch = UNICODE_REPLACE_CHAR;
+	}
 #endif
+	if (PROFILE_STRICT(flags) && SURROGATE(ch)) {
+	    result = TCL_CONVERT_SYNTAX;
+	    break;
+	}
+
+	src += len;
+
+	/*
+	 * Need to handle this in a way that won't cause misalignment by
+	 * casting dst to a Tcl_UniChar. [Bug 1122671]
+	 */
+
+	if (flags & TCL_ENCODING_LE) {
+	    *dst++ = (ch & 0xFF);
+	    *dst++ = (ch >> 8);
+	} else {
+	    *dst++ = (ch >> 8);
+	    *dst++ = (ch & 0xFF);
 	}
     }
     *srcReadPtr = src - srcStart;
@@ -2676,16 +3346,12 @@ UtfToUnicodeProc(
 
 static int
 TableToUtfProc(
-    ClientData clientData,	/* TableEncodingData that specifies
+    void *clientData,	/* TableEncodingData that specifies
 				 * encoding. */
     const char *src,		/* Source string in specified encoding. */
     int srcLen,			/* Source string length in bytes. */
     int flags,			/* Conversion control flags. */
-    Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
-				 * information used during a piecewise
-				 * conversion. Contents of statePtr are
-				 * initialized and/or reset by conversion
-				 * routine under control of flags argument. */
+    TCL_UNUSED(Tcl_EncodingState *),
     char *dst,			/* Output buffer in which converted string is
 				 * stored. */
     int dstLen,			/* The maximum length of output buffer in
@@ -2710,6 +3376,7 @@ TableToUtfProc(
     const unsigned short *pageZero;
     TableEncodingData *dataPtr = (TableEncodingData *)clientData;
 
+    flags = TclEncodingSetProfileFlags(flags);
     if (flags & TCL_ENCODING_CHAR_LIMIT) {
 	charLimit = *dstCharsPtr;
     }
@@ -2733,30 +3400,47 @@ TableToUtfProc(
 	if (prefixBytes[byte]) {
 	    src++;
 	    if (src >= srcEnd) {
-		src--;
-		result = TCL_CONVERT_MULTIBYTE;
-		break;
+		if (!(flags & TCL_ENCODING_END)) {
+		    src--;
+		    result = TCL_CONVERT_MULTIBYTE;
+		    break;
+		} else if (PROFILE_STRICT(flags)) {
+		    src--;
+		    result = TCL_CONVERT_SYNTAX;
+		    break;
+		} else if (PROFILE_REPLACE(flags)) {
+		    ch = UNICODE_REPLACE_CHAR;
+		} else {
+		    src--; /* See bug [bdcb5126c0] */
+		    result = TCL_CONVERT_MULTIBYTE;
+		    break;
+		}
+	    } else {
+		ch = toUnicode[byte][*((unsigned char *)src)];
 	    }
-	    ch = toUnicode[byte][*((unsigned char *) src)];
 	} else {
 	    ch = pageZero[byte];
 	}
 	if ((ch == 0) && (byte != 0)) {
-	    if (flags & TCL_ENCODING_STOPONERROR) {
+	    if (PROFILE_STRICT(flags)) {
 		result = TCL_CONVERT_SYNTAX;
 		break;
 	    }
 	    if (prefixBytes[byte]) {
 		src--;
 	    }
-	    ch = (Tcl_UniChar) byte;
+	    if (PROFILE_REPLACE(flags)) {
+		ch = UNICODE_REPLACE_CHAR;
+	    } else {
+		ch = (Tcl_UniChar)byte;
+	    }
 	}
 
 	/*
 	 * Special case for 1-byte utf chars for speed.
 	 */
 
-	if (ch && ch < 0x80) {
+	if ((unsigned)ch - 1 < 0x7F) {
 	    *dst++ = (char) ch;
 	} else {
 	    dst += Tcl_UniCharToUtf(ch, dst);
@@ -2789,16 +3473,12 @@ TableToUtfProc(
 
 static int
 TableFromUtfProc(
-    ClientData clientData,	/* TableEncodingData that specifies
+    void *clientData,	/* TableEncodingData that specifies
 				 * encoding. */
     const char *src,		/* Source string in UTF-8. */
     int srcLen,			/* Source string length in bytes. */
     int flags,			/* Conversion control flags. */
-    Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
-				 * information used during a piecewise
-				 * conversion. Contents of statePtr are
-				 * initialized and/or reset by conversion
-				 * routine under control of flags argument. */
+    TCL_UNUSED(Tcl_EncodingState *),
     char *dst,			/* Output buffer in which converted string is
 				 * stored. */
     int dstLen,			/* The maximum length of output buffer in
@@ -2830,6 +3510,7 @@ TableFromUtfProc(
     srcStart = src;
     srcEnd = src + srcLen;
     srcClose = srcEnd;
+    flags = TclEncodingSetProfileFlags(flags);
     if ((flags & TCL_ENCODING_END) == 0) {
 	srcClose -= TCL_UTF_MAX;
     }
@@ -2849,12 +3530,12 @@ TableFromUtfProc(
 	}
 	len = TclUtfToUniChar(src, &ch);
 
-#if TCL_UTF_MAX > 4
+#if TCL_UTF_MAX > 3
 	/* Unicode chars > +U0FFFF cannot be represented in any table encoding */
 	if (ch & 0xFFFF0000) {
 	    word = 0;
 	} else
-#elif TCL_UTF_MAX == 4
+#else
 	if (!len) {
 	    word = 0;
 	} else
@@ -2862,11 +3543,11 @@ TableFromUtfProc(
 	    word = fromUnicode[(ch >> 8)][ch & 0xFF];
 
 	if ((word == 0) && (ch != 0)) {
-	    if (flags & TCL_ENCODING_STOPONERROR) {
+	    if (PROFILE_STRICT(flags)) {
 		result = TCL_CONVERT_UNKNOWN;
 		break;
 	    }
-	    word = dataPtr->fallback;
+	    word = dataPtr->fallback; /* Both profiles REPLACE and TCL8 */
 	}
 	if (prefixBytes[(word >> 8)] != 0) {
 	    if (dst + 1 > dstEnd) {
@@ -2911,15 +3592,11 @@ TableFromUtfProc(
 
 static int
 Iso88591ToUtfProc(
-    ClientData clientData,	/* Ignored. */
+    TCL_UNUSED(void *),
     const char *src,		/* Source string in specified encoding. */
     int srcLen,			/* Source string length in bytes. */
     int flags,			/* Conversion control flags. */
-    Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
-				 * information used during a piecewise
-				 * conversion. Contents of statePtr are
-				 * initialized and/or reset by conversion
-				 * routine under control of flags argument. */
+    TCL_UNUSED(Tcl_EncodingState *),
     char *dst,			/* Output buffer in which converted string is
 				 * stored. */
     int dstLen,			/* The maximum length of output buffer in
@@ -2940,6 +3617,7 @@ Iso88591ToUtfProc(
     const char *dstEnd, *dstStart;
     int result, numChars, charLimit = INT_MAX;
 
+    flags = TclEncodingSetProfileFlags(flags);
     if (flags & TCL_ENCODING_CHAR_LIMIT) {
 	charLimit = *dstCharsPtr;
     }
@@ -2963,7 +3641,7 @@ Iso88591ToUtfProc(
 	 * Special case for 1-byte utf chars for speed.
 	 */
 
-	if (ch && ch < 0x80) {
+	if ((unsigned)ch - 1 < 0x7F) {
 	    *dst++ = (char) ch;
 	} else {
 	    dst += Tcl_UniCharToUtf(ch, dst);
@@ -2995,15 +3673,11 @@ Iso88591ToUtfProc(
 
 static int
 Iso88591FromUtfProc(
-    ClientData clientData,	/* Ignored. */
+    TCL_UNUSED(void *),
     const char *src,		/* Source string in UTF-8. */
     int srcLen,			/* Source string length in bytes. */
     int flags,			/* Conversion control flags. */
-    Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
-				 * information used during a piecewise
-				 * conversion. Contents of statePtr are
-				 * initialized and/or reset by conversion
-				 * routine under control of flags argument. */
+    TCL_UNUSED(Tcl_EncodingState *),
     char *dst,			/* Output buffer in which converted string is
 				 * stored. */
     int dstLen,			/* The maximum length of output buffer in
@@ -3028,6 +3702,7 @@ Iso88591FromUtfProc(
     srcStart = src;
     srcEnd = src + srcLen;
     srcClose = srcEnd;
+    flags = TclEncodingSetProfileFlags(flags);
     if ((flags & TCL_ENCODING_END) == 0) {
 	srcClose -= TCL_UTF_MAX;
     }
@@ -3054,15 +3729,15 @@ Iso88591FromUtfProc(
 	 */
 
 	if (ch > 0xFF
-#if TCL_UTF_MAX == 4
+#if TCL_UTF_MAX < 4
 		|| ((ch >= 0xD800) && (len < 3))
 #endif
 		) {
-	    if (flags & TCL_ENCODING_STOPONERROR) {
+	    if (PROFILE_STRICT(flags)) {
 		result = TCL_CONVERT_UNKNOWN;
 		break;
 	    }
-#if TCL_UTF_MAX == 4
+#if TCL_UTF_MAX < 4
 	    if ((ch >= 0xD800) && (len < 3)) {
 		len = 4;
 	    }
@@ -3071,7 +3746,7 @@ Iso88591FromUtfProc(
 	     * Plunge on, using '?' as a fallback character.
 	     */
 
-	    ch = (Tcl_UniChar) '?';
+	    ch = (Tcl_UniChar) '?'; /* Profiles TCL8 and REPLACE */
 	}
 
 	if (dst > dstEnd) {
@@ -3107,10 +3782,10 @@ Iso88591FromUtfProc(
 
 static void
 TableFreeProc(
-    ClientData clientData)	/* TableEncodingData that specifies
+    void *clientData)	/* TableEncodingData that specifies
 				 * encoding. */
 {
-    TableEncodingData *dataPtr = (TableEncodingData *) clientData;
+    TableEncodingData *dataPtr = (TableEncodingData *)clientData;
 
     /*
      * Make sure we aren't freeing twice on shutdown. [Bug 219314]
@@ -3142,7 +3817,7 @@ TableFreeProc(
 
 static int
 EscapeToUtfProc(
-    ClientData clientData,	/* EscapeEncodingData that specifies
+    void *clientData,	/* EscapeEncodingData that specifies
 				 * encoding. */
     const char *src,		/* Source string in specified encoding. */
     int srcLen,			/* Source string length in bytes. */
@@ -3168,13 +3843,14 @@ EscapeToUtfProc(
 				 * correspond to the bytes stored in the
 				 * output buffer. */
 {
-    EscapeEncodingData *dataPtr = (EscapeEncodingData *) clientData;
+    EscapeEncodingData *dataPtr = (EscapeEncodingData *)clientData;
     const char *prefixBytes, *tablePrefixBytes, *srcStart, *srcEnd;
     const unsigned short *const *tableToUnicode;
     const Encoding *encodingPtr;
     int state, result, numChars, charLimit = INT_MAX;
     const char *dstStart, *dstEnd;
 
+    flags = TclEncodingSetProfileFlags(flags);
     if (flags & TCL_ENCODING_CHAR_LIMIT) {
 	charLimit = *dstCharsPtr;
     }
@@ -3285,9 +3961,10 @@ EscapeToUtfProc(
 
 	    if ((checked == dataPtr->numSubTables + 2)
 		    || (flags & TCL_ENCODING_END)) {
-		if ((flags & TCL_ENCODING_STOPONERROR) == 0) {
+		if (!PROFILE_STRICT(flags)) {
 		    /*
-		     * Skip the unknown escape sequence.
+		     * Skip the unknown escape sequence. TODO - bug?
+		     * May be replace with UNICODE_REPLACE_CHAR?
 		     */
 
 		    src += longest;
@@ -3356,7 +4033,7 @@ EscapeToUtfProc(
 
 static int
 EscapeFromUtfProc(
-    ClientData clientData,	/* EscapeEncodingData that specifies
+    void *clientData,	/* EscapeEncodingData that specifies
 				 * encoding. */
     const char *src,		/* Source string in UTF-8. */
     int srcLen,			/* Source string length in bytes. */
@@ -3397,6 +4074,7 @@ EscapeFromUtfProc(
     srcStart = src;
     srcEnd = src + srcLen;
     srcClose = srcEnd;
+    flags = TclEncodingSetProfileFlags(flags);
     if ((flags & TCL_ENCODING_END) == 0) {
 	srcClose -= TCL_UTF_MAX;
     }
@@ -3460,7 +4138,7 @@ EscapeFromUtfProc(
 
 	    if (word == 0) {
 		state = oldState;
-		if (flags & TCL_ENCODING_STOPONERROR) {
+		if (PROFILE_STRICT(flags)) {
 		    result = TCL_CONVERT_UNKNOWN;
 		    break;
 		}
@@ -3493,8 +4171,7 @@ EscapeFromUtfProc(
 		    result = TCL_CONVERT_NOSPACE;
 		    break;
 		}
-		memcpy(dst, subTablePtr->sequence,
-			subTablePtr->sequenceLen);
+		memcpy(dst, subTablePtr->sequence, subTablePtr->sequenceLen);
 		dst += subTablePtr->sequenceLen;
 	    }
 	}
@@ -3568,7 +4245,7 @@ EscapeFromUtfProc(
 
 static void
 EscapeFreeProc(
-    ClientData clientData)	/* EscapeEncodingData that specifies
+    void *clientData)	/* EscapeEncodingData that specifies
 				 * encoding. */
 {
     EscapeEncodingData *dataPtr = (EscapeEncodingData *)clientData;
@@ -3645,7 +4322,7 @@ GetTableEncoding(
 /*
  *---------------------------------------------------------------------------
  *
- * unilen --
+ * unilen, unilen4 --
  *
  *	A helper function for the Tcl_ExternalToUtf functions. This function
  *	is similar to strlen for double-byte characters: it returns the number
@@ -3672,6 +4349,19 @@ unilen(
     }
     return (char *) p - src;
 }
+
+static size_t
+unilen4(
+    const char *src)
+{
+    unsigned int *p;
+
+    p = (unsigned int *) src;
+    while (*p != 0x00000000) {
+	p++;
+    }
+    return (char *) p - src;
+}
 
 /*
  *-------------------------------------------------------------------------
@@ -3699,11 +4389,11 @@ unilen(
 static void
 InitializeEncodingSearchPath(
     char **valuePtr,
-    int *lengthPtr,
+    TCL_HASH_TYPE *lengthPtr,
     Tcl_Encoding *encodingPtr)
 {
     const char *bytes;
-    int i, numDirs, numBytes;
+    Tcl_Size i, numDirs, numBytes;
     Tcl_Obj *libPathObj, *encodingObj, *searchPathObj;
 
     TclNewLiteralStringObj(encodingObj, "encoding");
@@ -3712,7 +4402,7 @@ InitializeEncodingSearchPath(
     Tcl_IncrRefCount(searchPathObj);
     libPathObj = TclGetLibraryPath();
     Tcl_IncrRefCount(libPathObj);
-    TclListObjLength(NULL, libPathObj, &numDirs);
+    TclListObjLengthM(NULL, libPathObj, &numDirs);
 
     for (i = 0; i < numDirs; i++) {
 	Tcl_Obj *directoryObj, *pathObj;
@@ -3733,7 +4423,7 @@ InitializeEncodingSearchPath(
     if (*encodingPtr) {
 	((Encoding *)(*encodingPtr))->refCount++;
     }
-    bytes = Tcl_GetStringFromObj(searchPathObj, &numBytes);
+    bytes = TclGetStringFromObj(searchPathObj, &numBytes);
 
     *lengthPtr = numBytes;
     *valuePtr = (char *)ckalloc(numBytes + 1);
@@ -3742,6 +4432,166 @@ InitializeEncodingSearchPath(
 }
 
 /*
+ *------------------------------------------------------------------------
+ *
+ * TclEncodingProfileParseName --
+ *
+ *	Maps an encoding profile name to its integer equivalent.
+ *
+ * Results:
+ *	TCL_OK on success or TCL_ERROR on failure.
+ *
+ * Side effects:
+ *	Returns the profile enum value in *profilePtr
+ *
+ *------------------------------------------------------------------------
+ */
+int
+TclEncodingProfileNameToId(
+    Tcl_Interp *interp,		/* For error messages. May be NULL */
+    const char *profileName,	/* Name of profile */
+    int *profilePtr)  		/* Output */
+{
+    size_t i;
+    size_t numProfiles = sizeof(encodingProfiles) / sizeof(encodingProfiles[0]);
+
+    for (i = 0; i < numProfiles; ++i) {
+	if (!strcmp(profileName, encodingProfiles[i].name)) {
+	    *profilePtr = encodingProfiles[i].value;
+	    return TCL_OK;
+	}
+    }
+    if (interp) {
+	Tcl_Obj *errorObj;
+	/* This code assumes at least two profiles :-) */
+	errorObj =
+	    Tcl_ObjPrintf("bad profile name \"%s\": must be",
+		profileName);
+	for (i = 0; i < (numProfiles - 1); ++i) {
+	    Tcl_AppendStringsToObj(
+		errorObj, " ", encodingProfiles[i].name, ",", NULL);
+	}
+	Tcl_AppendStringsToObj(
+	    errorObj, " or ", encodingProfiles[numProfiles-1].name, NULL);
+
+	Tcl_SetObjResult(interp, errorObj);
+	Tcl_SetErrorCode(
+	    interp, "TCL", "ENCODING", "PROFILE", profileName, NULL);
+    }
+    return TCL_ERROR;
+}
+
+/*
+ *------------------------------------------------------------------------
+ *
+ * TclEncodingProfileValueToName --
+ *
+ *	Maps an encoding profile value to its name.
+ *
+ * Results:
+ *	Pointer to the name or NULL on failure. Caller must not make
+ *	not modify the string and must make a copy to hold on to it.
+ *
+ * Side effects:
+ *	None.
+ *------------------------------------------------------------------------
+ */
+const char *
+TclEncodingProfileIdToName(
+    Tcl_Interp *interp,		/* For error messages. May be NULL */
+    int profileValue)		/* Profile #define value */
+{
+    size_t i;
+
+    for (i = 0; i < sizeof(encodingProfiles) / sizeof(encodingProfiles[0]); ++i) {
+	if (profileValue == encodingProfiles[i].value) {
+	    return encodingProfiles[i].name;
+	}
+    }
+    if (interp) {
+	Tcl_SetObjResult(
+	    interp,
+	    Tcl_ObjPrintf(
+		"Internal error. Bad profile id \"%d\".",
+		profileValue));
+	Tcl_SetErrorCode(
+	    interp, "TCL", "ENCODING", "PROFILEID", NULL);
+    }
+    return NULL;
+}
+
+/*
+ *------------------------------------------------------------------------
+ *
+ * TclEncodingSetProfileFlags --
+ *
+ *	Maps the flags supported in the encoding C API's to internal flags.
+ *
+ *	For backward compatibility reasons, TCL_ENCODING_STOPONERROR is
+ *	is mapped to the TCL_ENCODING_PROFILE_STRICT overwriting any profile
+ *	specified.
+ *
+ *	If no profile or an invalid profile is specified, it is set to
+ *	the default.
+ *
+ * Results:
+ *    Internal encoding flag mask.
+ *
+ * Side effects:
+ *    None.
+ *
+ *------------------------------------------------------------------------
+ */
+int TclEncodingSetProfileFlags(int flags)
+{
+    if (flags & TCL_ENCODING_STOPONERROR) {
+	CHANNEL_PROFILE_SET(flags, TCL_ENCODING_PROFILE_STRICT);
+    } else {
+	int profile = CHANNEL_PROFILE_GET(flags);
+	switch (profile) {
+	case TCL_ENCODING_PROFILE_TCL8:
+	case TCL_ENCODING_PROFILE_STRICT:
+	case TCL_ENCODING_PROFILE_REPLACE:
+	    break;
+	case 0: /* Unspecified by caller */
+	default:
+	    CHANNEL_PROFILE_SET(flags, TCL_ENCODING_PROFILE_TCL8);
+	    break;
+	}
+    }
+    return flags;
+}
+
+/*
+ *------------------------------------------------------------------------
+ *
+ * TclGetEncodingProfiles --
+ *
+ *	Get the list of supported encoding profiles.
+ *
+ * Results:
+ *	None.
+ *
+ * Side effects:
+ *	The list of profile names is stored in the interpreter result.
+ *
+ *------------------------------------------------------------------------
+ */
+void
+TclGetEncodingProfiles(Tcl_Interp *interp)
+{
+    size_t i, n;
+    Tcl_Obj *objPtr;
+    n = sizeof(encodingProfiles) / sizeof(encodingProfiles[0]);
+    objPtr = Tcl_NewListObj(n, NULL);
+    for (i = 0; i < n; ++i) {
+	Tcl_ListObjAppendElement(
+	    interp, objPtr, Tcl_NewStringObj(encodingProfiles[i].name, TCL_INDEX_NONE));
+    }
+    Tcl_SetObjResult(interp, objPtr);
+}
+
+/*
  * Local Variables:
  * mode: c
  * c-basic-offset: 4