1 files changed, 799 insertions, 193 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 23d71ff..a87eb7f 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -10,6 +10,7 @@
  */
 
 #include "tclInt.h"
+#include "tclIO.h"
 
 typedef size_t (LengthProc)(const char *src);
 
@@ -35,7 +36,7 @@ typedef struct {
 				 * encoding is deleted. */
     void *clientData;	/* Arbitrary value associated with encoding
 				 * type. Passed to conversion functions. */
-    size_t nullSize;		/* Number of 0x00 bytes that signify
+    Tcl_Size nullSize;	/* Number of 0x00 bytes that signify
 				 * end-of-string in this encoding. This number
 				 * is used to determine the source string
 				 * length when the srcLen argument is
@@ -187,6 +188,36 @@ static Tcl_Encoding systemEncoding = NULL;
 Tcl_Encoding tclIdentityEncoding = NULL;
 
 /*
+ * Names of encoding profiles and corresponding integer values.
+ * Keep alphabetical order for error messages.
+ */
+static struct TclEncodingProfiles {
+    const char *name;
+    int value;
+} encodingProfiles[] = {
+    {"replace", TCL_ENCODING_PROFILE_REPLACE},
+    {"strict", TCL_ENCODING_PROFILE_STRICT},
+    {"tcl8", TCL_ENCODING_PROFILE_TCL8},
+};
+#define PROFILE_TCL8(flags_)                                           \
+    ((CHANNEL_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_TCL8)   \
+     || (CHANNEL_PROFILE_GET(flags_) == 0                         \
+	 && TCL_ENCODING_PROFILE_DEFAULT == TCL_ENCODING_PROFILE_TCL8))
+#define PROFILE_STRICT(flags_)                                         \
+    ((CHANNEL_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_STRICT) \
+     || (CHANNEL_PROFILE_GET(flags_) == 0                         \
+	 && TCL_ENCODING_PROFILE_DEFAULT == TCL_ENCODING_PROFILE_STRICT))
+#define PROFILE_REPLACE(flags_)                                         \
+    ((CHANNEL_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_REPLACE) \
+     || (CHANNEL_PROFILE_GET(flags_) == 0                          \
+	 && TCL_ENCODING_PROFILE_DEFAULT == TCL_ENCODING_PROFILE_REPLACE))
+
+#define UNICODE_REPLACE_CHAR ((Tcl_UniChar)0xFFFD)
+#define SURROGATE(c_)      (((c_) & ~0x7FF) == 0xD800)
+#define HIGH_SURROGATE(c_) (((c_) & ~0x3FF) == 0xD800)
+#define LOW_SURROGATE(c_)  (((c_) & ~0x3FF) == 0xDC00)
+
+/*
  * The following variable is used in the sparse matrix code for a
  * TableEncoding to represent a page in the table that has no entries.
  */
@@ -229,6 +260,7 @@ static Tcl_EncodingConvertProc	UtfToUtfProc;
 static Tcl_EncodingConvertProc	Iso88591FromUtfProc;
 static Tcl_EncodingConvertProc	Iso88591ToUtfProc;
 
+
 /*
  * A Tcl_ObjType for holding a cached Tcl_Encoding in the twoPtrValue.ptr1 field
  * of the internalrep. This should help the lifetime of encodings be more useful.
@@ -374,7 +406,7 @@ int
 Tcl_SetEncodingSearchPath(
     Tcl_Obj *searchPath)
 {
-    size_t dummy;
+    Tcl_Size dummy;
 
     if (TCL_ERROR == TclListObjLengthM(NULL, searchPath, &dummy)) {
 	return TCL_ERROR;
@@ -421,7 +453,7 @@ void
 TclSetLibraryPath(
     Tcl_Obj *path)
 {
-    size_t dummy;
+    Tcl_Size dummy;
 
     if (TCL_ERROR == TclListObjLengthM(NULL, path, &dummy)) {
 	return;
@@ -457,7 +489,7 @@ TclSetLibraryPath(
 static void
 FillEncodingFileMap(void)
 {
-    size_t i, numDirs = 0;
+    Tcl_Size i, numDirs = 0;
     Tcl_Obj *map, *searchPath;
 
     searchPath = Tcl_GetEncodingSearchPath();
@@ -472,7 +504,7 @@ FillEncodingFileMap(void)
 	 * entries found, we favor files earlier on the search path.
 	 */
 
-	size_t j, numFiles;
+	Tcl_Size j, numFiles;
 	Tcl_Obj *directory, *matchFileList;
 	Tcl_Obj **filev;
 	Tcl_GlobTypeData readableFiles = {
@@ -521,10 +553,16 @@ FillEncodingFileMap(void)
  *---------------------------------------------------------------------------
  */
 
-/* Since TCL_ENCODING_MODIFIED is only used for utf-8/cesu-8 and
- * TCL_ENCODING_LE is only used for  utf-16/utf-32/ucs-2. re-use the same value */
-#define TCL_ENCODING_LE		TCL_ENCODING_MODIFIED	/* Little-endian encoding */
-#define TCL_ENCODING_UTF	0x200	/* For UTF-8 encoding, allow 4-byte output sequences */
+/*
+ * NOTE: THESE BIT DEFINITIONS SHOULD NOT OVERLAP WITH INTERNAL USE BITS
+ * DEFINED IN tcl.h (TCL_ENCODING_* et al). Be cognizant of this
+ * when adding bits. TODO - should really be defined in a single file.
+ *
+ * To prevent conflicting bits, only define bits within 0xff00 mask here.
+ */
+#define TCL_ENCODING_LE	0x100   /* Used to distinguish LE/BE variants */
+#define ENCODING_UTF	0x200	/* For UTF-8 encoding, allow 4-byte output sequences */
+#define ENCODING_INPUT	0x400   /* For UTF-8/CESU-8 encoding, means external -> internal */
 
 void
 TclInitEncodingSubsystem(void)
@@ -537,12 +575,16 @@ TclInitEncodingSubsystem(void)
         char c;
         short s;
     } isLe;
+    int leFlags;
 
     if (encodingsInitialized) {
 	return;
     }
 
-    isLe.s = TCL_ENCODING_LE;
+    /* Note: This DEPENDS on TCL_ENCODING_LE being defined in least sig byte */
+    isLe.s = 1;
+    leFlags = isLe.c ? TCL_ENCODING_LE : 0;
+
     Tcl_MutexLock(&encodingMutex);
     Tcl_InitHashTable(&encodingTable, TCL_STRING_KEYS);
     Tcl_MutexUnlock(&encodingMutex);
@@ -566,9 +608,9 @@ TclInitEncodingSubsystem(void)
     type.fromUtfProc	= UtfToUtfProc;
     type.freeProc	= NULL;
     type.nullSize	= 1;
-    type.clientData	= INT2PTR(TCL_ENCODING_UTF);
+    type.clientData	= INT2PTR(ENCODING_UTF);
     Tcl_CreateEncoding(&type);
-    type.clientData	= INT2PTR(TCL_ENCODING_NOCOMPLAIN);
+    type.clientData	= INT2PTR(0);
     type.encodingName	= "cesu-8";
     Tcl_CreateEncoding(&type);
 
@@ -577,13 +619,13 @@ TclInitEncodingSubsystem(void)
     type.freeProc	= NULL;
     type.nullSize	= 2;
     type.encodingName   = "ucs-2le";
-    type.clientData	= INT2PTR(TCL_ENCODING_LE|TCL_ENCODING_NOCOMPLAIN);
+    type.clientData	= INT2PTR(TCL_ENCODING_LE);
     Tcl_CreateEncoding(&type);
     type.encodingName   = "ucs-2be";
-    type.clientData	= INT2PTR(TCL_ENCODING_NOCOMPLAIN);
+    type.clientData	= INT2PTR(0);
     Tcl_CreateEncoding(&type);
     type.encodingName   = "ucs-2";
-    type.clientData	= INT2PTR(isLe.c|TCL_ENCODING_NOCOMPLAIN);
+    type.clientData	= INT2PTR(leFlags);
     Tcl_CreateEncoding(&type);
 
     type.toUtfProc	= Utf32ToUtfProc;
@@ -597,7 +639,7 @@ TclInitEncodingSubsystem(void)
     type.clientData	= INT2PTR(0);
     Tcl_CreateEncoding(&type);
     type.encodingName   = "utf-32";
-    type.clientData	= INT2PTR(isLe.c);
+    type.clientData	= INT2PTR(leFlags);
     Tcl_CreateEncoding(&type);
 
     type.toUtfProc	= Utf16ToUtfProc;
@@ -605,13 +647,13 @@ TclInitEncodingSubsystem(void)
     type.freeProc	= NULL;
     type.nullSize	= 2;
     type.encodingName   = "utf-16le";
-    type.clientData	= INT2PTR(TCL_ENCODING_LE);
+    type.clientData	= INT2PTR(TCL_ENCODING_LE|ENCODING_UTF);
     Tcl_CreateEncoding(&type);
     type.encodingName   = "utf-16be";
-    type.clientData	= INT2PTR(0);
+    type.clientData	= INT2PTR(ENCODING_UTF);
     Tcl_CreateEncoding(&type);
     type.encodingName   = "utf-16";
-    type.clientData	= INT2PTR(isLe.c);
+    type.clientData	= INT2PTR(leFlags|ENCODING_UTF);
     Tcl_CreateEncoding(&type);
 
 #ifndef TCL_NO_DEPRECATED
@@ -895,7 +937,7 @@ Tcl_GetEncodingNames(
 	Encoding *encodingPtr = (Encoding *)Tcl_GetHashValue(hPtr);
 
 	Tcl_CreateHashEntry(&table,
-		Tcl_NewStringObj(encodingPtr->name, -1), &dummy);
+		Tcl_NewStringObj(encodingPtr->name, TCL_INDEX_NONE), &dummy);
     }
     Tcl_MutexUnlock(&encodingMutex);
 
@@ -940,7 +982,7 @@ Tcl_GetEncodingNames(
  *
  *---------------------------------------------------------------------------
  */
-size_t
+Tcl_Size
 Tcl_GetEncodingNulLength(
     Tcl_Encoding encoding)
 {
@@ -1107,12 +1149,13 @@ Tcl_ExternalToUtfDString(
     Tcl_Encoding encoding,	/* The encoding for the source string, or NULL
 				 * for the default system encoding. */
     const char *src,		/* Source string in specified encoding. */
-    size_t srcLen,		/* Source string length in bytes, or -1 for
+    Tcl_Size srcLen,		/* Source string length in bytes, or < 0 for
 				 * encoding-specific string length. */
     Tcl_DString *dstPtr)	/* Uninitialized or free DString in which the
 				 * converted string is stored. */
 {
-    Tcl_ExternalToUtfDStringEx(encoding, src, srcLen, TCL_ENCODING_NOCOMPLAIN, dstPtr);
+    Tcl_ExternalToUtfDStringEx(
+	NULL, encoding, src, srcLen, TCL_ENCODING_PROFILE_TCL8, dstPtr, NULL);
     return Tcl_DStringValue(dstPtr);
 }
 
@@ -1126,50 +1169,82 @@ Tcl_ExternalToUtfDString(
  *	The parameter flags controls the behavior, if any of the bytes in
  *	the source buffer are invalid or cannot be represented in utf-8.
  *	Possible flags values:
- *	TCL_ENCODING_NOCOMPLAIN: replace invalid characters/bytes by a default
- *	fallback character. Always return -1 (Default in Tcl 8.7).
- *	TCL_ENCODING_MODIFIED: convert NULL bytes to \xC0\x80 in stead of 0x00.
- *	Only valid for "utf-8" and "cesu-8". This flag may be used together
- *	with the other flags.
+ *	target encoding. It should be composed by OR-ing the following:
+ *	- *At most one* of TCL_ENCODING_PROFILE{DEFAULT,TCL8,STRICT}
+ *	- TCL_ENCODING_STOPONERROR: Backward compatibility. Sets the profile
+ *	  to TCL_ENCODING_PROFILE_STRICT overriding any specified profile flags
+ *      Any other flag bits will cause an error to be returned (for future
+ *      compatibility)
  *
  * Results:
- *	The converted bytes are stored in the DString, which is then NULL
- *	terminated in an encoding-specific manner. The return value is
- *	the error position in the source string or -1 if no conversion error
- *	is reported.
-  *
+ *      The return value is one of
+ *        TCL_OK: success. Converted string in *dstPtr
+ *        TCL_ERROR: error in passed parameters. Error message in interp
+ *        TCL_CONVERT_MULTIBYTE: source ends in truncated multibyte sequence
+ *        TCL_CONVERT_SYNTAX: source is not conformant to encoding definition
+ *        TCL_CONVERT_UNKNOWN: source contained a character that could not
+ *            be represented in target encoding.
+ *
  * Side effects:
- *	None.
+ *
+ *      TCL_OK: The converted bytes are stored in the DString and NUL
+ *          terminated in an encoding-specific manner.
+ *      TCL_ERROR: an error, message is stored in the interp if not NULL.
+ *      TCL_CONVERT_*: if errorLocPtr is NULL, an error message is stored
+ *          in the interpreter (if not NULL). If errorLocPtr is not NULL,
+ *          no error message is stored as it is expected the caller is
+ *          interested in whatever is decoded so far and not treating this
+ *          as an error condition.
+ *
+ *      In addition, *dstPtr is always initialized and must be cleared
+ *      by the caller irrespective of the return code.
  *
  *-------------------------------------------------------------------------
  */
 
-size_t
+int
 Tcl_ExternalToUtfDStringEx(
+    Tcl_Interp *interp,         /* For error messages. May be NULL. */
     Tcl_Encoding encoding,	/* The encoding for the source string, or NULL
 				 * for the default system encoding. */
     const char *src,		/* Source string in specified encoding. */
-    size_t srcLen,			/* Source string length in bytes, or TCL_INDEX_NONE for
+    Tcl_Size srcLen,			/* Source string length in bytes, or < 0 for
 				 * encoding-specific string length. */
     int flags,			/* Conversion control flags. */
-    Tcl_DString *dstPtr)	/* Uninitialized or free DString in which the
+    Tcl_DString *dstPtr,	/* Uninitialized or free DString in which the
 				 * converted string is stored. */
+    Tcl_Size *errorLocPtr)      /* Where to store the error location
+                                   (or TCL_INDEX_NONE if no error). May
+				   be NULL. */
 {
     char *dst;
     Tcl_EncodingState state;
     const Encoding *encodingPtr;
-    int result, soFar, srcRead, dstWrote, dstChars;
-    size_t dstLen;
+    int result;
+    Tcl_Size dstLen, soFar;
     const char *srcStart = src;
 
+    /* DO FIRST - Must always be initialized before returning */
     Tcl_DStringInit(dstPtr);
+
+    if (flags & (TCL_ENCODING_START|TCL_ENCODING_END)) {
+	/* TODO - what other flags are illegal? - See TIP 656 */
+	Tcl_SetObjResult(
+	    interp,
+	    Tcl_NewStringObj(
+		"Parameter error: TCL_ENCODING_{START,STOP} bits set in flags.",
+		TCL_INDEX_NONE));
+	Tcl_SetErrorCode(interp, "TCL", "ENCODING", "ILLEGALFLAGS", NULL);
+	return TCL_ERROR;
+    }
+
     dst = Tcl_DStringValue(dstPtr);
     dstLen = dstPtr->spaceAvl - 1;
 
     if (encoding == NULL) {
 	encoding = systemEncoding;
     }
-    encodingPtr = (Encoding *) encoding;
+    encodingPtr = (Encoding *)encoding;
 
     if (src == NULL) {
 	srcLen = 0;
@@ -1177,23 +1252,70 @@ Tcl_ExternalToUtfDStringEx(
 	srcLen = encodingPtr->lengthProc(src);
     }
 
-    flags |= TCL_ENCODING_START | TCL_ENCODING_END;
+    flags |= TCL_ENCODING_START;
     if (encodingPtr->toUtfProc == UtfToUtfProc) {
-	flags |= TCL_ENCODING_MODIFIED | TCL_ENCODING_UTF;
+	flags |= ENCODING_INPUT;
     }
 
     while (1) {
-	result = encodingPtr->toUtfProc(encodingPtr->clientData, src, srcLen,
-		flags, &state, dst, dstLen, &srcRead, &dstWrote, &dstChars);
-	soFar = dst + dstWrote - Tcl_DStringValue(dstPtr);
+	int srcChunkLen, srcChunkRead;
+	int dstChunkLen, dstChunkWrote, dstChunkChars;
+
+	if (srcLen > INT_MAX) {
+	    srcChunkLen = INT_MAX;
+	} else {
+	    srcChunkLen = srcLen;
+	    flags |= TCL_ENCODING_END; /* Last chunk */
+	}
+	dstChunkLen = dstLen > INT_MAX ? INT_MAX : dstLen;
+
+	result = encodingPtr->toUtfProc(encodingPtr->clientData, src,
+		srcChunkLen, flags, &state, dst, dstChunkLen,
+		&srcChunkRead, &dstChunkWrote, &dstChunkChars);
+	soFar = dst + dstChunkWrote - Tcl_DStringValue(dstPtr);
+
+	src += srcChunkRead;
+
+	/*
+	 * Keep looping in two case -
+	 *   - our destination buffer did not have enough room
+	 *   - we had not passed in all the data and error indicated fragment
+	 *     of a multibyte character
+	 * In both cases we have to grow buffer, move the input source pointer
+	 * and loop. Otherwise, return the result we got.
+	 */
+	if ((result != TCL_CONVERT_NOSPACE) &&
+	    !(result == TCL_CONVERT_MULTIBYTE && (flags & TCL_ENCODING_END))) {
+	    Tcl_Size nBytesProcessed = (src - srcStart);
 
-	src += srcRead;
-	if (result != TCL_CONVERT_NOSPACE) {
 	    Tcl_DStringSetLength(dstPtr, soFar);
-	    return (result == TCL_OK) ? TCL_INDEX_NONE : (size_t)(src - srcStart);
+	    if (errorLocPtr) {
+		/*
+		 * Do not write error message into interpreter if caller
+		 * wants to know error location.
+		 */
+		*errorLocPtr = result == TCL_OK ? TCL_INDEX_NONE : nBytesProcessed;
+	    } else {
+		/* Caller wants error message on failure */
+		if (result != TCL_OK && interp != NULL) {
+		    char buf[TCL_INTEGER_SPACE];
+		    sprintf(buf, "%" TCL_Z_MODIFIER "u", nBytesProcessed);
+		    Tcl_SetObjResult(
+			interp,
+			Tcl_ObjPrintf("unexpected byte sequence starting at index %"
+				      TCL_Z_MODIFIER "u: '\\x%02X'",
+				      nBytesProcessed,
+				      UCHAR(srcStart[nBytesProcessed])));
+		    Tcl_SetErrorCode(
+			interp, "TCL", "ENCODING", "ILLEGALSEQUENCE", buf, NULL);
+		}
+	    }
+	    return result;
 	}
+
 	flags &= ~TCL_ENCODING_START;
-	srcLen -= srcRead;
+	srcLen -= srcChunkRead;
+
 	if (Tcl_DStringLength(dstPtr) == 0) {
 	    Tcl_DStringSetLength(dstPtr, dstLen);
 	}
@@ -1227,8 +1349,8 @@ Tcl_ExternalToUtf(
     Tcl_Encoding encoding,	/* The encoding for the source string, or NULL
 				 * for the default system encoding. */
     const char *src,		/* Source string in specified encoding. */
-    size_t srcLen,		/* Source string length in bytes, or -1
-				 * for encoding-specific string length. */
+    Tcl_Size srcLen,		/* Source string length in bytes, or TCL_INDEX_NONE for
+				 * encoding-specific string length. */
     int flags,			/* Conversion control flags. */
     Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
 				 * information used during a piecewise
@@ -1237,7 +1359,7 @@ Tcl_ExternalToUtf(
 				 * routine under control of flags argument. */
     char *dst,			/* Output buffer in which converted string is
 				 * stored. */
-    size_t dstLen,		/* The maximum length of output buffer in
+    Tcl_Size dstLen,		/* The maximum length of output buffer in
 				 * bytes. */
     int *srcReadPtr,		/* Filled with the number of bytes from the
 				 * source string that were converted. This may
@@ -1272,6 +1394,13 @@ Tcl_ExternalToUtf(
 	flags |= TCL_ENCODING_START | TCL_ENCODING_END;
 	statePtr = &state;
     }
+    if (srcLen > INT_MAX) {
+	srcLen = INT_MAX;
+	flags &= ~TCL_ENCODING_END;
+    }
+    if (dstLen > INT_MAX) {
+	dstLen = INT_MAX;
+    }
     if (srcReadPtr == NULL) {
 	srcReadPtr = &srcRead;
     }
@@ -1286,6 +1415,9 @@ Tcl_ExternalToUtf(
     }
 
     if (!noTerminate) {
+	if ((int) dstLen < 1) {
+	    return TCL_CONVERT_NOSPACE;
+	}
 	/*
 	 * If there are any null characters in the middle of the buffer,
 	 * they will converted to the UTF-8 null character (\xC0\x80). To get
@@ -1294,9 +1426,13 @@ Tcl_ExternalToUtf(
 	 */
 
 	dstLen--;
+    } else {
+	if (dstLen <= 0 && srcLen > 0) {
+	    return TCL_CONVERT_NOSPACE;
+	}
     }
     if (encodingPtr->toUtfProc == UtfToUtfProc) {
-	flags |= TCL_ENCODING_MODIFIED | TCL_ENCODING_UTF;
+	flags |= ENCODING_INPUT;
     }
     do {
 	Tcl_EncodingState savedState = *statePtr;
@@ -1344,12 +1480,13 @@ Tcl_UtfToExternalDString(
     Tcl_Encoding encoding,	/* The encoding for the converted string, or
 				 * NULL for the default system encoding. */
     const char *src,		/* Source string in UTF-8. */
-    size_t srcLen,		/* Source string length in bytes, or -1 for
+    Tcl_Size srcLen,		/* Source string length in bytes, or < 0 for
 				 * strlen(). */
     Tcl_DString *dstPtr)	/* Uninitialized or free DString in which the
 				 * converted string is stored. */
 {
-    Tcl_UtfToExternalDStringEx(encoding, src, srcLen, TCL_ENCODING_NOCOMPLAIN, dstPtr);
+    Tcl_UtfToExternalDStringEx(
+	NULL, encoding, src, srcLen, TCL_ENCODING_PROFILE_TCL8, dstPtr, NULL);
     return Tcl_DStringValue(dstPtr);
 }
 
@@ -1362,45 +1499,73 @@ Tcl_UtfToExternalDString(
  *	Convert a source buffer from UTF-8 to the specified encoding.
  *	The parameter flags controls the behavior, if any of the bytes in
  *	the source buffer are invalid or cannot be represented in the
- *	target encoding.
- *	Possible flags values:
- *	TCL_ENCODING_NOCOMPLAIN: replace invalid characters/bytes by a default
- *	fallback character. Always return -1 (Default in Tcl 8.7).
- *	TCL_ENCODING_MODIFIED: convert NULL bytes to \xC0\x80 in stead of 0x00.
- *	Only valid for "utf-8" and "cesu-8". This flag may be used together
- *	with the other flags.
+ *	target encoding. It should be composed by OR-ing the following:
+ *	- *At most one* of TCL_ENCODING_PROFILE{DEFAULT,TCL8,STRICT}
+ *	- TCL_ENCODING_STOPONERROR: Backward compatibility. Sets the profile
+ *	  to TCL_ENCODING_PROFILE_STRICT overriding any specified profile flags
  *
  * Results:
- *	The converted bytes are stored in the DString, which is then NULL
- *	terminated in an encoding-specific manner. The return value is
- *	the error position in the source string or -1 if no conversion error
- *	is reported.
+ *      The return value is one of
+ *        TCL_OK: success. Converted string in *dstPtr
+ *        TCL_ERROR: error in passed parameters. Error message in interp
+ *        TCL_CONVERT_MULTIBYTE: source ends in truncated multibyte sequence
+ *        TCL_CONVERT_SYNTAX: source is not conformant to encoding definition
+ *        TCL_CONVERT_UNKNOWN: source contained a character that could not
+ *            be represented in target encoding.
  *
  * Side effects:
- *	None.
+ *
+ *      TCL_OK: The converted bytes are stored in the DString and NUL
+ *          terminated in an encoding-specific manner
+ *      TCL_ERROR: an error, message is stored in the interp if not NULL.
+ *      TCL_CONVERT_*: if errorLocPtr is NULL, an error message is stored
+ *          in the interpreter (if not NULL). If errorLocPtr is not NULL,
+ *          no error message is stored as it is expected the caller is
+ *          interested in whatever is decoded so far and not treating this
+ *          as an error condition.
+ *
+ *      In addition, *dstPtr is always initialized and must be cleared
+ *      by the caller irrespective of the return code.
  *
  *-------------------------------------------------------------------------
  */
 
-size_t
+int
 Tcl_UtfToExternalDStringEx(
+    Tcl_Interp *interp,         /* For error messages. May be NULL. */
     Tcl_Encoding encoding,	/* The encoding for the converted string, or
 				 * NULL for the default system encoding. */
     const char *src,		/* Source string in UTF-8. */
-    size_t srcLen,			/* Source string length in bytes, or < 0 for
+    Tcl_Size srcLen,		/* Source string length in bytes, or < 0 for
 				 * strlen(). */
-    int flags,	/* Conversion control flags. */
-    Tcl_DString *dstPtr)	/* Uninitialized or free DString in which the
+    int flags,			/* Conversion control flags. */
+    Tcl_DString *dstPtr,	/* Uninitialized or free DString in which the
 				 * converted string is stored. */
+    Tcl_Size *errorLocPtr)      /* Where to store the error location
+                                   (or TCL_INDEX_NONE if no error). May
+				   be NULL. */
 {
     char *dst;
     Tcl_EncodingState state;
     const Encoding *encodingPtr;
-    int result, soFar, srcRead, dstWrote, dstChars;
+    int result;
     const char *srcStart = src;
-    size_t dstLen;
+    Tcl_Size dstLen, soFar;
 
+    /* DO FIRST - must always be initialized on return */
     Tcl_DStringInit(dstPtr);
+
+    if (flags & (TCL_ENCODING_START|TCL_ENCODING_END)) {
+	/* TODO - what other flags are illegal? - See TIP 656 */
+	Tcl_SetObjResult(
+	    interp,
+	    Tcl_NewStringObj(
+		"Parameter error: TCL_ENCODING_{START,STOP} bits set in flags.",
+		TCL_INDEX_NONE));
+	Tcl_SetErrorCode(interp, "TCL", "ENCODING", "ILLEGALFLAGS", NULL);
+	return TCL_ERROR;
+    }
+
     dst = Tcl_DStringValue(dstPtr);
     dstLen = dstPtr->spaceAvl - 1;
 
@@ -1414,24 +1579,75 @@ Tcl_UtfToExternalDStringEx(
     } else if (srcLen == TCL_INDEX_NONE) {
 	srcLen = strlen(src);
     }
-    flags |= TCL_ENCODING_START | TCL_ENCODING_END;
+
+    flags |= TCL_ENCODING_START;
     while (1) {
+	int srcChunkLen, srcChunkRead;
+	int dstChunkLen, dstChunkWrote, dstChunkChars;
+
+	if (srcLen > INT_MAX) {
+	    srcChunkLen = INT_MAX;
+	} else {
+	    srcChunkLen = srcLen;
+	    flags |= TCL_ENCODING_END; /* Last chunk */
+	}
+	dstChunkLen = dstLen > INT_MAX ? INT_MAX : dstLen;
+
 	result = encodingPtr->fromUtfProc(encodingPtr->clientData, src,
-		srcLen, flags, &state, dst, dstLen,
-		&srcRead, &dstWrote, &dstChars);
-	soFar = dst + dstWrote - Tcl_DStringValue(dstPtr);
-
-	src += srcRead;
-	if (result != TCL_CONVERT_NOSPACE) {
-	    int i = soFar + encodingPtr->nullSize - 1;
-	    while (i >= soFar) {
+					  srcChunkLen, flags, &state, dst, dstChunkLen,
+					  &srcChunkRead, &dstChunkWrote, &dstChunkChars);
+	soFar = dst + dstChunkWrote - Tcl_DStringValue(dstPtr);
+
+	/* Move past the part processed in this go around */
+	src += srcChunkRead;
+
+	/*
+	 * Keep looping in two case -
+	 *   - our destination buffer did not have enough room
+	 *   - we had not passed in all the data and error indicated fragment
+	 *     of a multibyte character
+	 * In both cases we have to grow buffer, move the input source pointer
+	 * and loop. Otherwise, return the result we got.
+	 */
+	if ((result != TCL_CONVERT_NOSPACE) &&
+	    !(result == TCL_CONVERT_MULTIBYTE && (flags & TCL_ENCODING_END))) {
+	    Tcl_Size nBytesProcessed = (src - srcStart);
+	    size_t i = soFar + encodingPtr->nullSize - 1;
+	    /* Loop as DStringSetLength only stores one nul byte at a time */
+	    while (i+1 >= soFar+1) {
 		Tcl_DStringSetLength(dstPtr, i--);
 	    }
-	    return (result == TCL_OK) ? TCL_INDEX_NONE : (size_t)(src - srcStart);
+	    if (errorLocPtr) {
+		/*
+		 * Do not write error message into interpreter if caller
+		 * wants to know error location.
+		 */
+		*errorLocPtr = result == TCL_OK ? TCL_INDEX_NONE : nBytesProcessed;
+	    } else {
+		/* Caller wants error message on failure */
+		if (result != TCL_OK && interp != NULL) {
+		    Tcl_Size pos = Tcl_NumUtfChars(srcStart, nBytesProcessed);
+		    int ucs4;
+		    char buf[TCL_INTEGER_SPACE];
+		    TclUtfToUCS4(&srcStart[nBytesProcessed], &ucs4);
+		    sprintf(buf, "%" TCL_Z_MODIFIER "u", nBytesProcessed);
+		    Tcl_SetObjResult(
+			interp,
+			Tcl_ObjPrintf(
+			    "unexpected character at index %" TCL_Z_MODIFIER
+			    "u: 'U+%06X'",
+			    pos,
+			    ucs4));
+		    Tcl_SetErrorCode(interp, "TCL", "ENCODING", "ILLEGALSEQUENCE",
+				     buf, NULL);
+		}
+	    }
+	    return result;
 	}
 
 	flags &= ~TCL_ENCODING_START;
-	srcLen -= srcRead;
+	srcLen -= srcChunkRead;
+
 	if (Tcl_DStringLength(dstPtr) == 0) {
 	    Tcl_DStringSetLength(dstPtr, dstLen);
 	}
@@ -1465,8 +1681,8 @@ Tcl_UtfToExternal(
     Tcl_Encoding encoding,	/* The encoding for the converted string, or
 				 * NULL for the default system encoding. */
     const char *src,		/* Source string in UTF-8. */
-    size_t srcLen,		/* Source string length in bytes, or -1
-				 * for strlen(). */
+    Tcl_Size srcLen,		/* Source string length in bytes, or TCL_INDEX_NONE for
+				 * strlen(). */
     int flags,			/* Conversion control flags. */
     Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
 				 * information used during a piecewise
@@ -1475,7 +1691,7 @@ Tcl_UtfToExternal(
 				 * routine under control of flags argument. */
     char *dst,			/* Output buffer in which converted string
 				 * is stored. */
-    size_t dstLen,		/* The maximum length of output buffer in
+    Tcl_Size dstLen,		/* The maximum length of output buffer in
 				 * bytes. */
     int *srcReadPtr,		/* Filled with the number of bytes from the
 				 * source string that were converted. This may
@@ -1507,6 +1723,13 @@ Tcl_UtfToExternal(
 	flags |= TCL_ENCODING_START | TCL_ENCODING_END;
 	statePtr = &state;
     }
+    if (srcLen > INT_MAX) {
+	srcLen = INT_MAX;
+	flags &= ~TCL_ENCODING_END;
+    }
+    if (dstLen > INT_MAX) {
+	dstLen = INT_MAX;
+    }
     if (srcReadPtr == NULL) {
 	srcReadPtr = &srcRead;
     }
@@ -1517,10 +1740,17 @@ Tcl_UtfToExternal(
 	dstCharsPtr = &dstChars;
     }
 
+    if (dstLen < encodingPtr->nullSize) {
+	return TCL_CONVERT_NOSPACE;
+    }
     dstLen -= encodingPtr->nullSize;
     result = encodingPtr->fromUtfProc(encodingPtr->clientData, src, srcLen,
 	    flags, statePtr, dst, dstLen, srcReadPtr,
 	    dstWrotePtr, dstCharsPtr);
+    /*
+     * Buffer is terminated irrespective of result. Not sure this is
+     * reasonable but keep for historical/compatibility reasons.
+     */
     memset(&dst[*dstWrotePtr], '\0', encodingPtr->nullSize);
 
     return result;
@@ -1580,17 +1810,17 @@ OpenEncodingFileChannel(
     const char *name)		/* The name of the encoding file on disk and
 				 * also the name for new encoding. */
 {
-    Tcl_Obj *nameObj = Tcl_NewStringObj(name, -1);
+    Tcl_Obj *nameObj = Tcl_NewStringObj(name, TCL_INDEX_NONE);
     Tcl_Obj *fileNameObj = Tcl_DuplicateObj(nameObj);
     Tcl_Obj *searchPath = Tcl_DuplicateObj(Tcl_GetEncodingSearchPath());
     Tcl_Obj *map = TclGetProcessGlobalValue(&encodingFileMap);
     Tcl_Obj **dir, *path, *directory = NULL;
     Tcl_Channel chan = NULL;
-    size_t i, numDirs;
+    Tcl_Size i, numDirs;
 
     TclListObjGetElementsM(NULL, searchPath, &numDirs, &dir);
     Tcl_IncrRefCount(nameObj);
-    Tcl_AppendToObj(fileNameObj, ".enc", -1);
+    Tcl_AppendToObj(fileNameObj, ".enc", TCL_INDEX_NONE);
     Tcl_IncrRefCount(fileNameObj);
     Tcl_DictObjGet(NULL, map, nameObj, &directory);
 
@@ -1851,7 +2081,7 @@ LoadTableEncoding(
     for (i = 0; i < numPages; i++) {
 	int ch;
 	const char *p;
-	size_t expected = 3 + 16 * (16 * 4 + 1);
+	Tcl_Size expected = 3 + 16 * (16 * 4 + 1);
 
 	if (Tcl_ReadChars(chan, objPtr, expected, 0) != expected) {
 	    return NULL;
@@ -2087,7 +2317,7 @@ LoadEscapeEncoding(
     Tcl_DStringInit(&escapeData);
 
     while (1) {
-	size_t argc;
+	Tcl_Size argc;
 	const char **argv;
 	char *line;
 	Tcl_DString lineString;
@@ -2135,7 +2365,7 @@ LoadEscapeEncoding(
 		Tcl_DStringAppend(&escapeData, (char *) &est, sizeof(est));
 	    }
 	}
-	Tcl_Free((void *)argv);
+	Tcl_Free(argv);
 	Tcl_DStringFree(&lineString);
     }
 
@@ -2222,6 +2452,7 @@ BinaryProc(
     if (dstLen < 0) {
 	dstLen = 0;
     }
+    flags = TclEncodingSetProfileFlags(flags);
     if ((flags & TCL_ENCODING_CHAR_LIMIT) && srcLen > *dstCharsPtr) {
 	srcLen = *dstCharsPtr;
     }
@@ -2242,9 +2473,9 @@ BinaryProc(
  *
  * UtfToUtfProc --
  *
- *	Convert from UTF-8 to UTF-8. Note that the UTF-8 to UTF-8 translation
- *	is not a no-op, because it will turn a stream of improperly formed
- *	UTF-8 into a properly formed stream.
+ *	Converts from UTF-8 to UTF-8. Note that the UTF-8 to UTF-8 translation
+ *	is not a no-op, because it turns a stream of improperly formed
+ *	UTF-8 into a properly-formed stream.
  *
  * Results:
  *	Returns TCL_OK if conversion was successful.
@@ -2255,14 +2486,12 @@ BinaryProc(
  *-------------------------------------------------------------------------
  */
 
-#define STOPONERROR (!(flags & TCL_ENCODING_NOCOMPLAIN))
-
 static int
 UtfToUtfProc(
-    void *clientData,	/* additional flags, e.g. TCL_ENCODING_MODIFIED */
+    void *clientData,		/* additional flags */
     const char *src,		/* Source string in UTF-8. */
     int srcLen,			/* Source string length in bytes. */
-    int flags,			/* Conversion control flags. */
+    int flags,			/* TCL_ENCODING_* conversion control flags. */
     TCL_UNUSED(Tcl_EncodingState *),
     char *dst,			/* Output buffer in which converted string is
 				 * stored. */
@@ -2284,12 +2513,14 @@ UtfToUtfProc(
     const char *dstStart, *dstEnd;
     int result, numChars, charLimit = INT_MAX;
     int ch;
+    int profile;
 
     result = TCL_OK;
 
     srcStart = src;
     srcEnd = src + srcLen;
     srcClose = srcEnd;
+    flags = TclEncodingSetProfileFlags(flags);
     if ((flags & TCL_ENCODING_END) == 0) {
 	srcClose -= 6;
     }
@@ -2299,9 +2530,11 @@ UtfToUtfProc(
 
     dstStart = dst;
     flags |= PTR2INT(clientData);
-    dstEnd = dst + dstLen - ((flags & TCL_ENCODING_UTF) ? TCL_UTF_MAX : 6);
+    dstEnd = dst + dstLen - ((flags & ENCODING_UTF) ? TCL_UTF_MAX : 6);
 
+    profile = CHANNEL_PROFILE_GET(flags);
     for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) {
+
 	if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) {
 	    /*
 	     * If there is more string to follow, this will ensure that the
@@ -2315,62 +2548,85 @@ UtfToUtfProc(
 	    result = TCL_CONVERT_NOSPACE;
 	    break;
 	}
-	if (UCHAR(*src) < 0x80 && !((UCHAR(*src) == 0) && (flags & TCL_ENCODING_MODIFIED))) {
+	if (UCHAR(*src) < 0x80 && !((UCHAR(*src) == 0) && (flags & ENCODING_INPUT))) {
 	    /*
 	     * Copy 7bit characters, but skip null-bytes when we are in input
-	     * mode, so that they get converted to 0xC080.
+	     * mode, so that they get converted to \xC0\x80.
 	     */
-
 	    *dst++ = *src++;
-	} else if ((UCHAR(*src) == 0xC0) && (src + 1 < srcEnd)
-		&& (UCHAR(src[1]) == 0x80) && (!(flags & TCL_ENCODING_MODIFIED) || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT))) {
-	    /*
-	     * If in input mode, and -strict is specified: This is an error.
-	     */
-	    if (flags & TCL_ENCODING_MODIFIED) {
-		result = TCL_CONVERT_SYNTAX;
-		break;
+	} else if ((UCHAR(*src) == 0xC0) && (src + 1 < srcEnd) &&
+		 (UCHAR(src[1]) == 0x80) &&
+		 (!(flags & ENCODING_INPUT) || PROFILE_STRICT(profile) ||
+		  PROFILE_REPLACE(profile))) {
+	    /* Special sequence \xC0\x80 */
+	    if ((PROFILE_STRICT(profile) || PROFILE_REPLACE(profile)) && (flags & ENCODING_INPUT)) {
+		if (PROFILE_REPLACE(profile)) {
+		   dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst);
+		   src += 2;
+		} else {
+		   /* PROFILE_STRICT */
+		   result = TCL_CONVERT_SYNTAX;
+		   break;
+		}
+	    } else {
+		/*
+		 * Convert 0xC080 to real nulls when we are in output mode,
+		 * irrespective of the profile.
+		 */
+		*dst++ = 0;
+		src += 2;
 	    }
 
-	    /*
-	     * Convert 0xC080 to real nulls when we are in output mode, with or without '-strict'.
-	     */
-	    *dst++ = 0;
-	    src += 2;
 	} else if (!Tcl_UtfCharComplete(src, srcEnd - src)) {
 	    /*
+	     * Incomplete byte sequence.
 	     * Always check before using TclUtfToUCS4. Not doing can so
 	     * cause it run beyond the end of the buffer! If we happen such an
 	     * incomplete char its bytes are made to represent themselves
 	     * unless the user has explicitly asked to be told.
 	     */
 
-	    if (flags & TCL_ENCODING_MODIFIED) {
-		if ((STOPONERROR) && (flags & TCL_ENCODING_CHAR_LIMIT)) {
-		    result = TCL_CONVERT_MULTIBYTE;
+	    if (flags & ENCODING_INPUT) {
+		/* Incomplete bytes for modified UTF-8 target */
+		if (PROFILE_STRICT(profile)) {
+		    result = (flags & TCL_ENCODING_CHAR_LIMIT)
+			       ? TCL_CONVERT_MULTIBYTE
+			       : TCL_CONVERT_SYNTAX;
 		    break;
 		}
-	    if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)) {
-		result = TCL_CONVERT_SYNTAX;
-		break;
 	    }
-		ch = UCHAR(*src++);
+	    if (PROFILE_REPLACE(profile)) {
+		ch = UNICODE_REPLACE_CHAR;
+		++src;
 	    } else {
+		/* TCL_ENCODING_PROFILE_TCL8 */
 		char chbuf[2];
 		chbuf[0] = UCHAR(*src++); chbuf[1] = 0;
 		TclUtfToUCS4(chbuf, &ch);
 	    }
 	    dst += Tcl_UniCharToUtf(ch, dst);
 	} else {
-	    const char *saveSrc = src;
+	    int isInvalid = 0;
 	    size_t len = TclUtfToUCS4(src, &ch);
-	    if ((len < 2) && (ch != 0) && (flags & TCL_ENCODING_MODIFIED)
-		    && (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT))) {
-		result = TCL_CONVERT_SYNTAX;
-		break;
+	    if (flags & ENCODING_INPUT) {
+		if ((len < 2) && (ch != 0)) {
+		    isInvalid = 1;
+		} else if ((ch > 0xFFFF) && !(flags & ENCODING_UTF)) {
+		    isInvalid = 1;
+		}
+		if (isInvalid) {
+		    if (PROFILE_STRICT(profile)) {
+			result = TCL_CONVERT_SYNTAX;
+			break;
+		    } else if (PROFILE_REPLACE(profile)) {
+			ch = UNICODE_REPLACE_CHAR;
+		    }
+		}
 	    }
+
+	    const char *saveSrc = src;
 	    src += len;
-	    if (!(flags & TCL_ENCODING_UTF) && (ch > 0x3FF)) {
+	    if (!(flags & ENCODING_UTF) && !(flags & ENCODING_INPUT) && (ch > 0x3FF)) {
 		if (ch > 0xFFFF) {
 		    /* CESU-8 6-byte sequence for chars > U+FFFF */
 		    ch -= 0x10000;
@@ -2387,33 +2643,45 @@ UtfToUtfProc(
 		*dst++ = (char) ((ch | 0x80) & 0xBF);
 		continue;
 #if TCL_UTF_MAX < 4
-	    } else if ((ch | 0x7FF) == 0xDFFF) {
+	    } else if (SURROGATE(ch)) {
 		/*
 		 * A surrogate character is detected, handle especially.
 		 */
-
-		int low = ch;
-		len = (src <= srcEnd-3) ? TclUtfToUCS4(src, &low) : 0;
-
-		if (((low & ~0x3FF) != 0xDC00) || (ch & 0x400)) {
-
-		    if (STOPONERROR) {
-			result = TCL_CONVERT_UNKNOWN;
-			src = saveSrc;
-			break;
+		if (PROFILE_STRICT(profile) && (flags & ENCODING_UTF)) {
+		    result = TCL_CONVERT_UNKNOWN;
+		    src = saveSrc;
+		    break;
+		}
+		if (PROFILE_REPLACE(profile)) {
+		    /* TODO - is this right for cesu8 or should we fall through below? */
+		    ch = UNICODE_REPLACE_CHAR;
+		} else {
+		    int low = ch;
+		    len = (src <= srcEnd - 3) ? TclUtfToUCS4(src, &low) : 0;
+
+		    if ((!LOW_SURROGATE(low)) || (ch & 0x400)) {
+
+			if (PROFILE_STRICT(profile)) {
+			    result = TCL_CONVERT_UNKNOWN;
+			    src = saveSrc;
+			    break;
+			}
+			goto cesu8;
 		    }
-		    goto cesu8;
+		    src += len;
+		    dst += Tcl_UniCharToUtf(ch, dst);
+		    ch = low;
 		}
-		src += len;
-		dst += Tcl_UniCharToUtf(ch, dst);
-		ch = low;
 #endif
-	    } else if (STOPONERROR && !(flags & TCL_ENCODING_MODIFIED) && (((ch  & ~0x7FF) == 0xD800))) {
+	    } else if (PROFILE_STRICT(profile) &&
+		       (!(flags & ENCODING_INPUT)) &&
+		       SURROGATE(ch)) {
 		result = TCL_CONVERT_UNKNOWN;
 		src = saveSrc;
 		break;
-	    } else if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)
-		    && (flags & TCL_ENCODING_MODIFIED) && ((ch  & ~0x7FF) == 0xD800)) {
+	    } else if (PROFILE_STRICT(profile) &&
+		       (flags & ENCODING_INPUT) &&
+		       SURROGATE(ch)) {
 		result = TCL_CONVERT_SYNTAX;
 		src = saveSrc;
 		break;
@@ -2470,8 +2738,9 @@ Utf32ToUtfProc(
     const char *srcStart, *srcEnd;
     const char *dstEnd, *dstStart;
     int result, numChars, charLimit = INT_MAX;
-    int ch;
+    int ch = 0, bytesLeft = srcLen % 4;
 
+    flags = TclEncodingSetProfileFlags(flags);
     flags |= PTR2INT(clientData);
     if (flags & TCL_ENCODING_CHAR_LIMIT) {
 	charLimit = *dstCharsPtr;
@@ -2481,11 +2750,26 @@ Utf32ToUtfProc(
     /*
      * Check alignment with utf-32 (4 == sizeof(UTF-32))
      */
+    if (bytesLeft != 0) {
+	/* We have a truncated code unit */
+	result = TCL_CONVERT_MULTIBYTE;
+	srcLen -= bytesLeft;
+    }
 
-    if ((srcLen % 4) != 0) {
+#if TCL_UTF_MAX < 4
+    /*
+     * If last code point is a high surrogate, we cannot handle that yet,
+     * unless we are at the end.
+     */
+
+    if (!(flags & TCL_ENCODING_END) && (srcLen >= 4) &&
+	    ((src[srcLen - ((flags & TCL_ENCODING_LE)?3:2)] & 0xFC) == 0xD8) &&
+	    ((src[srcLen - ((flags & TCL_ENCODING_LE)?2:3)]) == 0) &&
+	    ((src[srcLen - ((flags & TCL_ENCODING_LE)?1:4)]) == 0)) {
 	result = TCL_CONVERT_MULTIBYTE;
-	srcLen &= -4;
+	srcLen-= 4;
     }
+#endif
 
     srcStart = src;
     srcEnd = src + srcLen;
@@ -2499,17 +2783,35 @@ Utf32ToUtfProc(
 	    break;
 	}
 
+#if TCL_UTF_MAX < 4
+	int prev = ch;
+#endif
 	if (flags & TCL_ENCODING_LE) {
 	    ch = (src[3] & 0xFF) << 24 | (src[2] & 0xFF) << 16 | (src[1] & 0xFF) << 8 | (src[0] & 0xFF);
 	} else {
 	    ch = (src[0] & 0xFF) << 24 | (src[1] & 0xFF) << 16 | (src[2] & 0xFF) << 8 | (src[3] & 0xFF);
 	}
-	if  ((unsigned)ch > 0x10FFFF || (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)
-		&& ((ch  & ~0x7FF) == 0xD800))) {
-	    if (STOPONERROR) {
+#if TCL_UTF_MAX < 4
+	if (HIGH_SURROGATE(prev) && !LOW_SURROGATE(ch)) {
+	    /* Bug [10c2c17c32]. If Hi surrogate not followed by Lo surrogate, finish 3-byte UTF-8 */
+	    dst += Tcl_UniCharToUtf(-1, dst);
+	}
+#endif
+
+	if ((unsigned)ch > 0x10FFFF) {
+	    ch = UNICODE_REPLACE_CHAR;
+	    if (PROFILE_STRICT(flags)) {
 		result = TCL_CONVERT_SYNTAX;
 		break;
 	    }
+	} else if (PROFILE_STRICT(flags) && SURROGATE(ch)) {
+	    result = TCL_CONVERT_SYNTAX;
+#if TCL_UTF_MAX < 4
+	    ch = 0;
+#endif
+	    break;
+	} else if (PROFILE_REPLACE(flags) && SURROGATE(ch)) {
+	    ch = UNICODE_REPLACE_CHAR;
 	}
 
 	/*
@@ -2517,12 +2819,39 @@ Utf32ToUtfProc(
 	 * unsigned short-size data.
 	 */
 
-	if ((ch > 0) && (ch < 0x80)) {
+	if ((unsigned)ch - 1 < 0x7F) {
 	    *dst++ = (ch & 0xFF);
 	} else {
 	    dst += Tcl_UniCharToUtf(ch, dst);
 	}
-	src += sizeof(unsigned int);
+	src += 4;
+    }
+
+    /*
+     * If we had a truncated code unit at the end AND this is the last
+     * fragment AND profile is not "strict", stick FFFD in its place.
+     */
+#if TCL_UTF_MAX < 4
+    if (HIGH_SURROGATE(ch)) {
+	/* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */
+	dst += Tcl_UniCharToUtf(-1, dst);
+    }
+#endif
+
+    if ((flags & TCL_ENCODING_END) && (result == TCL_CONVERT_MULTIBYTE)) {
+	if (dst > dstEnd) {
+	    result = TCL_CONVERT_NOSPACE;
+	} else {
+	    if (PROFILE_STRICT(flags)) {
+		result = TCL_CONVERT_SYNTAX;
+	    } else {
+		/* PROFILE_REPLACE or PROFILE_TCL8 */
+		result = TCL_OK;
+		dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst);
+		numChars++;
+		src += bytesLeft; /* Go past truncated code unit */
+	    }
+	}
     }
 
     *srcReadPtr = src - srcStart;
@@ -2577,6 +2906,7 @@ UtfToUtf32Proc(
     srcStart = src;
     srcEnd = src + srcLen;
     srcClose = srcEnd;
+    flags = TclEncodingSetProfileFlags(flags);
     if ((flags & TCL_ENCODING_END) == 0) {
 	srcClose -= TCL_UTF_MAX;
     }
@@ -2601,11 +2931,14 @@ UtfToUtf32Proc(
 	    break;
 	}
 	len = TclUtfToUCS4(src, &ch);
-	if ((ch  & ~0x7FF) == 0xD800) {
-	    if (STOPONERROR) {
+	if (SURROGATE(ch)) {
+	    if (PROFILE_STRICT(flags)) {
 		result = TCL_CONVERT_UNKNOWN;
 		break;
 	    }
+	    if (PROFILE_REPLACE(flags)) {
+		ch = UNICODE_REPLACE_CHAR;
+	    }
 	}
 	src += len;
 	if (flags & TCL_ENCODING_LE) {
@@ -2669,8 +3002,9 @@ Utf16ToUtfProc(
     const char *srcStart, *srcEnd;
     const char *dstEnd, *dstStart;
     int result, numChars, charLimit = INT_MAX;
-    unsigned short ch;
+    unsigned short ch = 0;
 
+    flags = TclEncodingSetProfileFlags(flags);
     flags |= PTR2INT(clientData);
     if (flags & TCL_ENCODING_CHAR_LIMIT) {
 	charLimit = *dstCharsPtr;
@@ -2687,10 +3021,11 @@ Utf16ToUtfProc(
     }
 
     /*
-     * If last code point is a high surrogate, we cannot handle that yet.
+     * If last code point is a high surrogate, we cannot handle that yet,
+     * unless we are at the end.
      */
 
-    if ((srcLen >= 2) &&
+    if (!(flags & TCL_ENCODING_END) && (srcLen >= 2) &&
 	    ((src[srcLen - ((flags & TCL_ENCODING_LE)?1:2)] & 0xFC) == 0xD8)) {
 	result = TCL_CONVERT_MULTIBYTE;
 	srcLen-= 2;
@@ -2702,29 +3037,100 @@ Utf16ToUtfProc(
     dstStart = dst;
     dstEnd = dst + dstLen - TCL_UTF_MAX;
 
-    for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) {
+    for (numChars = 0; src < srcEnd && numChars <= charLimit; src += 2, numChars++) {
 	if (dst > dstEnd) {
 	    result = TCL_CONVERT_NOSPACE;
 	    break;
 	}
 
+	unsigned short prev = ch;
 	if (flags & TCL_ENCODING_LE) {
 	    ch = (src[1] & 0xFF) << 8 | (src[0] & 0xFF);
 	} else {
 	    ch = (src[0] & 0xFF) << 8 | (src[1] & 0xFF);
 	}
+	if (HIGH_SURROGATE(prev) && !LOW_SURROGATE(ch)) {
+	    if (PROFILE_STRICT(flags)) {
+		result = TCL_CONVERT_SYNTAX;
+		src -= 2; /* Go back to beginning of high surrogate */
+		dst--; /* Also undo writing a single byte too much */
+		numChars--;
+		break;
+	    } else if (PROFILE_REPLACE(flags)) {
+		/*
+		 * Previous loop wrote a single byte to mark the high surrogate.
+		 * Replace it with the replacement character. Further, restart
+		 * current loop iteration since need to recheck destination space
+		 * and reset processing of current character.
+		 */
+		ch = UNICODE_REPLACE_CHAR;
+		dst--;
+		dst += Tcl_UniCharToUtf(ch, dst);
+		src -= 2;
+		numChars--;
+		continue;
+	    } else {
+	    /* Bug [10c2c17c32]. If Hi surrogate not followed by Lo surrogate, finish 3-byte UTF-8 */
+		dst += Tcl_UniCharToUtf(-1, dst);
+	    }
+	}
 
 	/*
 	 * Special case for 1-byte utf chars for speed. Make sure we work with
 	 * unsigned short-size data.
 	 */
 
-	if (ch && ch < 0x80) {
+	if ((unsigned)ch - 1 < 0x7F) {
 	    *dst++ = (ch & 0xFF);
-	} else {
+	} else if (HIGH_SURROGATE(prev) || HIGH_SURROGATE(ch)) {
 	    dst += Tcl_UniCharToUtf(ch | TCL_COMBINE, dst);
+	} else if (LOW_SURROGATE(ch) && !PROFILE_TCL8(flags)) {
+	    /* Lo surrogate not preceded by Hi surrogate and not tcl8 profile */
+	    if (PROFILE_STRICT(flags)) {
+		result = TCL_CONVERT_UNKNOWN;
+		break;
+	    } else {
+		/* PROFILE_REPLACE */
+		dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst);
+	    }
+	} else {
+	    dst += Tcl_UniCharToUtf(ch, dst);
+	}
+    }
+
+    if (HIGH_SURROGATE(ch)) {
+	if (PROFILE_STRICT(flags)) {
+	    result = TCL_CONVERT_SYNTAX;
+	    src -= 2;
+	    dst--;
+	    numChars--;
+	} else if (PROFILE_REPLACE(flags)) {
+	    dst--;
+	    dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst);
+	} else {
+	    /* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */
+	    dst += Tcl_UniCharToUtf(-1, dst);
+	}
+    }
+
+    /*
+     * If we had a truncated code unit at the end AND this is the last
+     * fragment AND profile is not "strict", stick FFFD in its place.
+     */
+    if ((flags & TCL_ENCODING_END) && (result == TCL_CONVERT_MULTIBYTE)) {
+	if (dst > dstEnd) {
+	    result = TCL_CONVERT_NOSPACE;
+	} else {
+	    if (PROFILE_STRICT(flags)) {
+		result = TCL_CONVERT_SYNTAX;
+	    } else {
+		/* PROFILE_REPLACE or PROFILE_TCL8 */
+		result = TCL_OK;
+		dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst);
+		numChars++;
+		src++; /* Go past truncated code unit */
+	    }
 	}
-	src += sizeof(unsigned short);
     }
 
     *srcReadPtr = src - srcStart;
@@ -2779,6 +3185,7 @@ UtfToUtf16Proc(
     srcStart = src;
     srcEnd = src + srcLen;
     srcClose = srcEnd;
+    flags = TclEncodingSetProfileFlags(flags);
     if ((flags & TCL_ENCODING_END) == 0) {
 	srcClose -= TCL_UTF_MAX;
     }
@@ -2803,11 +3210,14 @@ UtfToUtf16Proc(
 	    break;
 	}
 	len = TclUtfToUCS4(src, &ch);
-	if ((ch  & ~0x7FF) == 0xD800) {
-	    if (STOPONERROR) {
+	if (SURROGATE(ch)) {
+	    if (PROFILE_STRICT(flags)) {
 		result = TCL_CONVERT_UNKNOWN;
 		break;
 	    }
+	    if (PROFILE_REPLACE(flags)) {
+		ch = UNICODE_REPLACE_CHAR;
+	    }
 	}
 	src += len;
 	if (flags & TCL_ENCODING_LE) {
@@ -2878,12 +3288,10 @@ UtfToUcs2Proc(
 				 * output buffer. */
 {
     const char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd;
-    int result, numChars;
-#if TCL_UTF_MAX < 4
-    int len;
-#endif
+    int result, numChars, len;
     Tcl_UniChar ch = 0;
 
+    flags = TclEncodingSetProfileFlags(flags);
     flags |= PTR2INT(clientData);
     srcStart = src;
     srcEnd = src + srcLen;
@@ -2911,17 +3319,32 @@ UtfToUcs2Proc(
 	    break;
 	}
 #if TCL_UTF_MAX < 4
-	src += (len = TclUtfToUniChar(src, &ch));
+	len = TclUtfToUniChar(src, &ch);
 	if ((ch >= 0xD800) && (len < 3)) {
+	    if (PROFILE_STRICT(flags)) {
+		result = TCL_CONVERT_UNKNOWN;
+		break;
+	    }
+	    src += len;
 	    src += TclUtfToUniChar(src, &ch);
-	    ch = 0xFFFD;
+	    ch = UNICODE_REPLACE_CHAR;
 	}
 #else
-	src += TclUtfToUniChar(src, &ch);
+	len = TclUtfToUniChar(src, &ch);
 	if (ch > 0xFFFF) {
-	    ch = 0xFFFD;
+	    if (PROFILE_STRICT(flags)) {
+		result = TCL_CONVERT_UNKNOWN;
+		break;
+	    }
+	    ch = UNICODE_REPLACE_CHAR;
 	}
 #endif
+	if (PROFILE_STRICT(flags) && SURROGATE(ch)) {
+	    result = TCL_CONVERT_SYNTAX;
+	    break;
+	}
+
+	src += len;
 
 	/*
 	 * Need to handle this in a way that won't cause misalignment by
@@ -2991,6 +3414,7 @@ TableToUtfProc(
     const unsigned short *pageZero;
     TableEncodingData *dataPtr = (TableEncodingData *)clientData;
 
+    flags = TclEncodingSetProfileFlags(flags);
     if (flags & TCL_ENCODING_CHAR_LIMIT) {
 	charLimit = *dstCharsPtr;
     }
@@ -3014,30 +3438,47 @@ TableToUtfProc(
 	if (prefixBytes[byte]) {
 	    src++;
 	    if (src >= srcEnd) {
-		src--;
-		result = TCL_CONVERT_MULTIBYTE;
-		break;
+		if (!(flags & TCL_ENCODING_END)) {
+		    src--;
+		    result = TCL_CONVERT_MULTIBYTE;
+		    break;
+		} else if (PROFILE_STRICT(flags)) {
+		    src--;
+		    result = TCL_CONVERT_SYNTAX;
+		    break;
+		} else if (PROFILE_REPLACE(flags)) {
+		    ch = UNICODE_REPLACE_CHAR;
+		} else {
+		    src--; /* See bug [bdcb5126c0] */
+		    result = TCL_CONVERT_MULTIBYTE;
+		    break;
+		}
+	    } else {
+		ch = toUnicode[byte][*((unsigned char *)src)];
 	    }
-	    ch = toUnicode[byte][*((unsigned char *) src)];
 	} else {
 	    ch = pageZero[byte];
 	}
 	if ((ch == 0) && (byte != 0)) {
-	    if (STOPONERROR) {
+	    if (PROFILE_STRICT(flags)) {
 		result = TCL_CONVERT_SYNTAX;
 		break;
 	    }
 	    if (prefixBytes[byte]) {
 		src--;
 	    }
-	    ch = (Tcl_UniChar) byte;
+	    if (PROFILE_REPLACE(flags)) {
+		ch = UNICODE_REPLACE_CHAR;
+	    } else {
+		ch = (Tcl_UniChar)byte;
+	    }
 	}
 
 	/*
 	 * Special case for 1-byte utf chars for speed.
 	 */
 
-	if (ch && ch < 0x80) {
+	if ((unsigned)ch - 1 < 0x7F) {
 	    *dst++ = (char) ch;
 	} else {
 	    dst += Tcl_UniCharToUtf(ch, dst);
@@ -3107,6 +3548,7 @@ TableFromUtfProc(
     srcStart = src;
     srcEnd = src + srcLen;
     srcClose = srcEnd;
+    flags = TclEncodingSetProfileFlags(flags);
     if ((flags & TCL_ENCODING_END) == 0) {
 	srcClose -= TCL_UTF_MAX;
     }
@@ -3139,11 +3581,11 @@ TableFromUtfProc(
 	    word = fromUnicode[(ch >> 8)][ch & 0xFF];
 
 	if ((word == 0) && (ch != 0)) {
-	    if ((STOPONERROR) && (flags & TCL_ENCODING_CHAR_LIMIT)) {
+	    if (PROFILE_STRICT(flags)) {
 		result = TCL_CONVERT_UNKNOWN;
 		break;
 	    }
-	    word = dataPtr->fallback;
+	    word = dataPtr->fallback; /* Both profiles REPLACE and TCL8 */
 	}
 	if (prefixBytes[(word >> 8)] != 0) {
 	    if (dst + 1 > dstEnd) {
@@ -3213,6 +3655,7 @@ Iso88591ToUtfProc(
     const char *dstEnd, *dstStart;
     int result, numChars, charLimit = INT_MAX;
 
+    flags = TclEncodingSetProfileFlags(flags);
     if (flags & TCL_ENCODING_CHAR_LIMIT) {
 	charLimit = *dstCharsPtr;
     }
@@ -3236,7 +3679,7 @@ Iso88591ToUtfProc(
 	 * Special case for 1-byte utf chars for speed.
 	 */
 
-	if (ch && ch < 0x80) {
+	if ((unsigned)ch - 1 < 0x7F) {
 	    *dst++ = (char) ch;
 	} else {
 	    dst += Tcl_UniCharToUtf(ch, dst);
@@ -3297,6 +3740,7 @@ Iso88591FromUtfProc(
     srcStart = src;
     srcEnd = src + srcLen;
     srcClose = srcEnd;
+    flags = TclEncodingSetProfileFlags(flags);
     if ((flags & TCL_ENCODING_END) == 0) {
 	srcClose -= TCL_UTF_MAX;
     }
@@ -3327,7 +3771,7 @@ Iso88591FromUtfProc(
 		|| ((ch >= 0xD800) && (len < 3))
 #endif
 		) {
-	    if (STOPONERROR) {
+	    if (PROFILE_STRICT(flags)) {
 		result = TCL_CONVERT_UNKNOWN;
 		break;
 	    }
@@ -3340,7 +3784,7 @@ Iso88591FromUtfProc(
 	     * Plunge on, using '?' as a fallback character.
 	     */
 
-	    ch = (Tcl_UniChar) '?';
+	    ch = (Tcl_UniChar) '?'; /* Profiles TCL8 and REPLACE */
 	}
 
 	if (dst > dstEnd) {
@@ -3444,6 +3888,7 @@ EscapeToUtfProc(
     int state, result, numChars, charLimit = INT_MAX;
     const char *dstStart, *dstEnd;
 
+    flags = TclEncodingSetProfileFlags(flags);
     if (flags & TCL_ENCODING_CHAR_LIMIT) {
 	charLimit = *dstCharsPtr;
     }
@@ -3554,9 +3999,10 @@ EscapeToUtfProc(
 
 	    if ((checked == dataPtr->numSubTables + 2)
 		    || (flags & TCL_ENCODING_END)) {
-		if (!STOPONERROR) {
+		if (!PROFILE_STRICT(flags)) {
 		    /*
-		     * Skip the unknown escape sequence.
+		     * Skip the unknown escape sequence. TODO - bug?
+		     * May be replace with UNICODE_REPLACE_CHAR?
 		     */
 
 		    src += longest;
@@ -3666,6 +4112,7 @@ EscapeFromUtfProc(
     srcStart = src;
     srcEnd = src + srcLen;
     srcClose = srcEnd;
+    flags = TclEncodingSetProfileFlags(flags);
     if ((flags & TCL_ENCODING_END) == 0) {
 	srcClose -= TCL_UTF_MAX;
     }
@@ -3729,7 +4176,7 @@ EscapeFromUtfProc(
 
 	    if (word == 0) {
 		state = oldState;
-		if (STOPONERROR) {
+		if (PROFILE_STRICT(flags)) {
 		    result = TCL_CONVERT_UNKNOWN;
 		    break;
 		}
@@ -3980,12 +4427,11 @@ unilen4(
 static void
 InitializeEncodingSearchPath(
     char **valuePtr,
-    size_t *lengthPtr,
+    TCL_HASH_TYPE *lengthPtr,
     Tcl_Encoding *encodingPtr)
 {
     const char *bytes;
-    size_t i, numDirs;
-    size_t numBytes;
+    Tcl_Size i, numDirs, numBytes;
     Tcl_Obj *libPathObj, *encodingObj, *searchPathObj;
 
     TclNewLiteralStringObj(encodingObj, "encoding");
@@ -4024,6 +4470,166 @@ InitializeEncodingSearchPath(
 }
 
 /*
+ *------------------------------------------------------------------------
+ *
+ * TclEncodingProfileParseName --
+ *
+ *	Maps an encoding profile name to its integer equivalent.
+ *
+ * Results:
+ *	TCL_OK on success or TCL_ERROR on failure.
+ *
+ * Side effects:
+ *	Returns the profile enum value in *profilePtr
+ *
+ *------------------------------------------------------------------------
+ */
+int
+TclEncodingProfileNameToId(
+    Tcl_Interp *interp,		/* For error messages. May be NULL */
+    const char *profileName,	/* Name of profile */
+    int *profilePtr)  		/* Output */
+{
+    size_t i;
+    size_t numProfiles = sizeof(encodingProfiles) / sizeof(encodingProfiles[0]);
+
+    for (i = 0; i < numProfiles; ++i) {
+	if (!strcmp(profileName, encodingProfiles[i].name)) {
+	    *profilePtr = encodingProfiles[i].value;
+	    return TCL_OK;
+	}
+    }
+    if (interp) {
+	Tcl_Obj *errorObj;
+	/* This code assumes at least two profiles :-) */
+	errorObj =
+	    Tcl_ObjPrintf("bad profile name \"%s\": must be",
+		profileName);
+	for (i = 0; i < (numProfiles - 1); ++i) {
+	    Tcl_AppendStringsToObj(
+		errorObj, " ", encodingProfiles[i].name, ",", NULL);
+	}
+	Tcl_AppendStringsToObj(
+	    errorObj, " or ", encodingProfiles[numProfiles-1].name, NULL);
+
+	Tcl_SetObjResult(interp, errorObj);
+	Tcl_SetErrorCode(
+	    interp, "TCL", "ENCODING", "PROFILE", profileName, NULL);
+    }
+    return TCL_ERROR;
+}
+
+/*
+ *------------------------------------------------------------------------
+ *
+ * TclEncodingProfileValueToName --
+ *
+ *	Maps an encoding profile value to its name.
+ *
+ * Results:
+ *	Pointer to the name or NULL on failure. Caller must not make
+ *	not modify the string and must make a copy to hold on to it.
+ *
+ * Side effects:
+ *	None.
+ *------------------------------------------------------------------------
+ */
+const char *
+TclEncodingProfileIdToName(
+    Tcl_Interp *interp,		/* For error messages. May be NULL */
+    int profileValue)		/* Profile #define value */
+{
+    size_t i;
+
+    for (i = 0; i < sizeof(encodingProfiles) / sizeof(encodingProfiles[0]); ++i) {
+	if (profileValue == encodingProfiles[i].value) {
+	    return encodingProfiles[i].name;
+	}
+    }
+    if (interp) {
+	Tcl_SetObjResult(
+	    interp,
+	    Tcl_ObjPrintf(
+		"Internal error. Bad profile id \"%d\".",
+		profileValue));
+	Tcl_SetErrorCode(
+	    interp, "TCL", "ENCODING", "PROFILEID", NULL);
+    }
+    return NULL;
+}
+
+/*
+ *------------------------------------------------------------------------
+ *
+ * TclEncodingSetProfileFlags --
+ *
+ *	Maps the flags supported in the encoding C API's to internal flags.
+ *
+ *	For backward compatibility reasons, TCL_ENCODING_STOPONERROR is
+ *	is mapped to the TCL_ENCODING_PROFILE_STRICT overwriting any profile
+ *	specified.
+ *
+ *	If no profile or an invalid profile is specified, it is set to
+ *	the default.
+ *
+ * Results:
+ *    Internal encoding flag mask.
+ *
+ * Side effects:
+ *    None.
+ *
+ *------------------------------------------------------------------------
+ */
+int TclEncodingSetProfileFlags(int flags)
+{
+    if (flags & TCL_ENCODING_STOPONERROR) {
+	CHANNEL_PROFILE_SET(flags, TCL_ENCODING_PROFILE_STRICT);
+    } else {
+	int profile = CHANNEL_PROFILE_GET(flags);
+	switch (profile) {
+	case TCL_ENCODING_PROFILE_TCL8:
+	case TCL_ENCODING_PROFILE_STRICT:
+	case TCL_ENCODING_PROFILE_REPLACE:
+	    break;
+	case 0: /* Unspecified by caller */
+	default:
+	    CHANNEL_PROFILE_SET(flags, TCL_ENCODING_PROFILE_DEFAULT);
+	    break;
+	}
+    }
+    return flags;
+}
+
+/*
+ *------------------------------------------------------------------------
+ *
+ * TclGetEncodingProfiles --
+ *
+ *	Get the list of supported encoding profiles.
+ *
+ * Results:
+ *	None.
+ *
+ * Side effects:
+ *	The list of profile names is stored in the interpreter result.
+ *
+ *------------------------------------------------------------------------
+ */
+void
+TclGetEncodingProfiles(Tcl_Interp *interp)
+{
+    size_t i, n;
+    Tcl_Obj *objPtr;
+    n = sizeof(encodingProfiles) / sizeof(encodingProfiles[0]);
+    objPtr = Tcl_NewListObj(n, NULL);
+    for (i = 0; i < n; ++i) {
+	Tcl_ListObjAppendElement(
+	    interp, objPtr, Tcl_NewStringObj(encodingProfiles[i].name, TCL_INDEX_NONE));
+    }
+    Tcl_SetObjResult(interp, objPtr);
+}
+
+/*
  * Local Variables:
  * mode: c
  * c-basic-offset: 4