From b185a55c3b335a847e148680c628136c7c16640f Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Thu, 9 Feb 2023 07:55:29 +0000
Subject: Add 4 testcases, showing that the bug fix introduces another (minor)
 problem. To be fixed soon

---
 tests/encoding.test | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/tests/encoding.test b/tests/encoding.test
index 05d9918..e42c3b9 100644
--- a/tests/encoding.test
+++ b/tests/encoding.test
@@ -485,6 +485,18 @@ test encoding-16.8 {Utf32ToUtfProc} -body {
     set val [encoding convertfrom -nocomplain utf-32 \x41\x00\x00\x41]
     list $val [format %x [scan $val %c]]
 } -result "\uFFFD fffd"
+test encoding-16.9 {Utf32ToUtfProc} -body {
+    encoding convertfrom utf-32le \x00\xD8\x00\x00
+} -result \uD800
+test encoding-16.10 {Utf32ToUtfProc} -body {
+    encoding convertfrom utf-32le \x00\xDC\x00\x00
+} -result \uDC00
+test encoding-16.11 {Utf32ToUtfProc} -body {
+    encoding convertfrom utf-32le \x00\xD8\x00\x00\x00\xDC\x00\x00
+} -result \uD800\uDC00
+test encoding-16.12 {Utf32ToUtfProc} -body {
+    encoding convertfrom utf-32le \x00\xDC\x00\x00\x00\xD8\x00\x00
+} -result \uDC00\uD800
 
 test encoding-16.9 {
     Utf16ToUtfProc, Tcl_UniCharToUtf, surrogate pairs in utf-16
-- 
cgit v0.12


From fd83fb931e43901b77f4e480ef63841e10b39f22 Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Thu, 9 Feb 2023 19:52:00 +0000
Subject: Add 4 more testcases, showing that the same bug is present in utf-16
 as well. Also fix the bug (really, now!)

---
 generic/tclEncoding.c | 44 ++++++++++++++++++++++++++++++++++++--------
 tests/encoding.test   | 12 ++++++++++++
 2 files changed, 48 insertions(+), 8 deletions(-)

diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index d19e237..0941f14 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -2531,7 +2531,7 @@ Utf32ToUtfProc(
     const char *srcStart, *srcEnd;
     const char *dstEnd, *dstStart;
     int result, numChars, charLimit = INT_MAX;
-    int ch;
+    int ch = 0;
 
     flags |= PTR2INT(clientData);
     if (flags & TCL_ENCODING_CHAR_LIMIT) {
@@ -2548,6 +2548,19 @@ Utf32ToUtfProc(
 	srcLen &= -4;
     }
 
+    /*
+     * If last code point is a high surrogate, we cannot handle that yet,
+     * unless we are at the end.
+     */
+
+    if (!(flags & TCL_ENCODING_END) && (srcLen >= 4) &&
+	    ((src[srcLen - ((flags & TCL_ENCODING_LE)?3:2)] & 0xFC) == 0xD8) &&
+	    ((src[srcLen - ((flags & TCL_ENCODING_LE)?2:3)]) == 0) &&
+	    ((src[srcLen - ((flags & TCL_ENCODING_LE)?1:4)]) == 0)) {
+	result = TCL_CONVERT_MULTIBYTE;
+	srcLen-= 4;
+    }
+
     srcStart = src;
     srcEnd = src + srcLen;
 
@@ -2560,11 +2573,16 @@ Utf32ToUtfProc(
 	    break;
 	}
 
+	int prev = ch;
 	if (flags & TCL_ENCODING_LE) {
 	    ch = (src[3] & 0xFF) << 24 | (src[2] & 0xFF) << 16 | (src[1] & 0xFF) << 8 | (src[0] & 0xFF);
 	} else {
 	    ch = (src[0] & 0xFF) << 24 | (src[1] & 0xFF) << 16 | (src[2] & 0xFF) << 8 | (src[3] & 0xFF);
 	}
+	if (((prev  & ~0x3FF) == 0xD800) && ((ch  & ~0x3FF) != 0xDC00)) {
+	    /* Bug [10c2c17c32]. If Hi surrogate not followed by Lo surrogate, finish 3-byte UTF-8 */
+	    dst += Tcl_UniCharToUtf(-1, dst);
+	}
 	if  ((unsigned)ch > 0x10FFFF || (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)
 		&& ((ch  & ~0x7FF) == 0xD800))) {
 	    if (STOPONERROR) {
@@ -2582,14 +2600,14 @@ Utf32ToUtfProc(
 	    *dst++ = (ch & 0xFF);
 	} else {
 	    dst += Tcl_UniCharToUtf(ch, dst);
-	    if ((ch  & ~0x3FF) == 0xD800) {
-		/* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */
-		dst += Tcl_UniCharToUtf(-1, dst);
-	    }
 	}
 	src += sizeof(unsigned int);
     }
 
+    if ((ch  & ~0x3FF) == 0xD800) {
+	/* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */
+	dst += Tcl_UniCharToUtf(-1, dst);
+    }
     *srcReadPtr = src - srcStart;
     *dstWrotePtr = dst - dstStart;
     *dstCharsPtr = numChars;
@@ -2734,7 +2752,7 @@ Utf16ToUtfProc(
     const char *srcStart, *srcEnd;
     const char *dstEnd, *dstStart;
     int result, numChars, charLimit = INT_MAX;
-    unsigned short ch;
+    unsigned short ch = 0;
 
     flags |= PTR2INT(clientData);
     if (flags & TCL_ENCODING_CHAR_LIMIT) {
@@ -2752,10 +2770,11 @@ Utf16ToUtfProc(
     }
 
     /*
-     * If last code point is a high surrogate, we cannot handle that yet.
+     * If last code point is a high surrogate, we cannot handle that yet,
+     * unless we are at the end.
      */
 
-    if ((srcLen >= 2) &&
+    if (!(flags & TCL_ENCODING_END) && (srcLen >= 2) &&
 	    ((src[srcLen - ((flags & TCL_ENCODING_LE)?1:2)] & 0xFC) == 0xD8)) {
 	result = TCL_CONVERT_MULTIBYTE;
 	srcLen-= 2;
@@ -2773,11 +2792,16 @@ Utf16ToUtfProc(
 	    break;
 	}
 
+	unsigned short prev = ch;
 	if (flags & TCL_ENCODING_LE) {
 	    ch = (src[1] & 0xFF) << 8 | (src[0] & 0xFF);
 	} else {
 	    ch = (src[0] & 0xFF) << 8 | (src[1] & 0xFF);
 	}
+	if (((prev  & ~0x3FF) == 0xD800) && ((ch  & ~0x3FF) != 0xDC00)) {
+	    /* Bug [10c2c17c32]. If Hi surrogate not followed by Lo surrogate, finish 3-byte UTF-8 */
+	    dst += Tcl_UniCharToUtf(-1, dst);
+	}
 
 	/*
 	 * Special case for 1-byte utf chars for speed. Make sure we work with
@@ -2792,6 +2816,10 @@ Utf16ToUtfProc(
 	src += sizeof(unsigned short);
     }
 
+    if ((ch  & ~0x3FF) == 0xD800) {
+	/* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */
+	dst += Tcl_UniCharToUtf(-1, dst);
+    }
     *srcReadPtr = src - srcStart;
     *dstWrotePtr = dst - dstStart;
     *dstCharsPtr = numChars;
diff --git a/tests/encoding.test b/tests/encoding.test
index e42c3b9..b2b029e 100644
--- a/tests/encoding.test
+++ b/tests/encoding.test
@@ -497,6 +497,18 @@ test encoding-16.11 {Utf32ToUtfProc} -body {
 test encoding-16.12 {Utf32ToUtfProc} -body {
     encoding convertfrom utf-32le \x00\xDC\x00\x00\x00\xD8\x00\x00
 } -result \uDC00\uD800
+test encoding-16.13 {Utf16ToUtfProc} -body {
+    encoding convertfrom utf-16le \x00\xD8
+} -result \uD800
+test encoding-16.14 {Utf16ToUtfProc} -body {
+    encoding convertfrom utf-16le \x00\xDC
+} -result \uDC00
+test encoding-16.15 {Utf16ToUtfProc} -body {
+    encoding convertfrom utf-16le \x00\xD8\x00\xDC
+} -result \uD800\uDC00
+test encoding-16.16 {Utf16ToUtfProc} -body {
+    encoding convertfrom utf-16le \x00\xDC\x00\xD8
+} -result \uDC00\uD800
 
 test encoding-16.9 {
     Utf16ToUtfProc, Tcl_UniCharToUtf, surrogate pairs in utf-16
-- 
cgit v0.12


From bab9170bdca67622ada57df9a0e7f55c5ac92b2f Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Fri, 10 Feb 2023 20:22:07 +0000
Subject: Proposed fix (and testcases) for [4a7397e0b3]: Tcl 9: fcopy with
 -strictencoding 1 UTF-8 channels

---
 generic/tclIO.c |  8 +++++
 tests/io.test   | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 105 insertions(+)

diff --git a/generic/tclIO.c b/generic/tclIO.c
index fed469c..2e0cd1f 100644
--- a/generic/tclIO.c
+++ b/generic/tclIO.c
@@ -9820,12 +9820,20 @@ CopyData(
 	    Tcl_SetErrno(inStatePtr->unreportedError);
 	    inStatePtr->unreportedError = 0;
 	    goto readError;
+	} else if (inStatePtr->flags & CHANNEL_ENCODING_ERROR) {
+	    Tcl_SetErrno(EILSEQ);
+	    inStatePtr->flags &= ~CHANNEL_ENCODING_ERROR;
+	    goto readError;
 	}
 	Tcl_GetChannelError(outChan, &msg);
 	if ((outStatePtr->unreportedError != 0) || (msg != NULL)) {
 	    Tcl_SetErrno(outStatePtr->unreportedError);
 	    outStatePtr->unreportedError = 0;
 	    goto writeError;
+	} else if (outStatePtr->flags & CHANNEL_ENCODING_ERROR) {
+	    Tcl_SetErrno(EILSEQ);
+	    outStatePtr->flags &= ~CHANNEL_ENCODING_ERROR;
+	    goto writeError;
 	}
 
 	if (cmdPtr && (mask == 0)) {
diff --git a/tests/io.test b/tests/io.test
index 2708906..7b8182e 100644
--- a/tests/io.test
+++ b/tests/io.test
@@ -7609,6 +7609,103 @@ test io-52.19 {coverage of eofChar handling} {
     close $out
     file size $path(test2)
 } 8
+test io-52.20 {TclCopyChannel & encodings} -setup {
+    set out [open $path(utf8-fcopy.txt) w]
+    fconfigure $out -encoding utf-8 -translation lf
+    puts $out "Á"
+    close $out
+} -constraints {fcopy} -body {
+    # binary to encoding => the input has to be
+    # in utf-8 to make sense to the encoder
+
+    set in  [open $path(utf8-fcopy.txt) r]
+    set out [open $path(kyrillic.txt) w]
+
+    # Using "-encoding ascii" means reading the "Á" gives an error
+    fconfigure $in  -encoding ascii -strictencoding 1
+    fconfigure $out -encoding koi8-r -translation lf
+
+    fcopy $in $out
+} -cleanup {
+    close $in
+    close $out
+} -returnCodes 1 -match glob -result {error reading "file*": illegal byte sequence}
+test io-52.21 {TclCopyChannel & encodings} -setup {
+    set out [open $path(utf8-fcopy.txt) w]
+    fconfigure $out -encoding utf-8 -translation lf
+    puts $out "Á"
+    close $out
+} -constraints {fcopy} -body {
+    # binary to encoding => the input has to be
+    # in utf-8 to make sense to the encoder
+
+    set in  [open $path(utf8-fcopy.txt) r]
+    set out [open $path(kyrillic.txt) w]
+
+    # Using "-encoding ascii" means writing the "Á" gives an error
+    fconfigure $in  -encoding utf-8
+    fconfigure $out -encoding ascii -translation lf -strictencoding 1
+
+    fcopy $in $out
+} -cleanup {
+    close $in
+    close $out
+} -returnCodes 1 -match glob -result {error writing "file*": illegal byte sequence}
+test io-52.22 {TclCopyChannel & encodings} -setup {
+    set out [open $path(utf8-fcopy.txt) w]
+    fconfigure $out -encoding utf-8 -translation lf
+    puts $out "Á"
+    close $out
+} -constraints {fcopy} -body {
+    # binary to encoding => the input has to be
+    # in utf-8 to make sense to the encoder
+
+    set in  [open $path(utf8-fcopy.txt) r]
+    set out [open $path(kyrillic.txt) w]
+
+    # Using "-encoding ascii" means reading the "Á" gives an error
+    fconfigure $in  -encoding ascii -strictencoding 1
+    fconfigure $out -encoding koi8-r -translation lf
+    proc ::xxx args {
+        set ::s0 $args
+    }
+
+    fcopy $in $out -command ::xxx
+    vwait ::s0
+    set ::s0
+} -cleanup {
+    close $in
+    close $out
+    unset ::s0
+} -match glob -result {0 {error reading "file*": illegal byte sequence}}
+test io-52.23 {TclCopyChannel & encodings} -setup {
+    set out [open $path(utf8-fcopy.txt) w]
+    fconfigure $out -encoding utf-8 -translation lf
+    puts $out "Á"
+    close $out
+} -constraints {fcopy} -body {
+    # binary to encoding => the input has to be
+    # in utf-8 to make sense to the encoder
+
+    set in  [open $path(utf8-fcopy.txt) r]
+    set out [open $path(kyrillic.txt) w]
+
+    # Using "-encoding ascii" means writing the "Á" gives an error
+    fconfigure $in  -encoding utf-8
+    fconfigure $out -encoding ascii -translation lf -strictencoding 1
+    proc ::xxx args {
+        set ::s0 $args
+    }
+
+    fcopy $in $out -command ::xxx
+    vwait ::s0
+    set ::s0
+} -cleanup {
+    close $in
+    close $out
+    unset ::s0
+} -match glob -result {0 {error writing "file*": illegal byte sequence}}
+
 
 test io-53.1 {CopyData} {fcopy} {
     file delete $path(test1)
-- 
cgit v0.12


From 2974b5727951737a5b67789f4b7712cf72096ed0 Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Tue, 14 Feb 2023 07:29:36 +0000
Subject: Make a start fixing [bd1a60eb9c]. WIP

---
 generic/tclEncoding.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 0941f14..01c4eb1 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -519,7 +519,8 @@ FillEncodingFileMap(void)
 /* Since TCL_ENCODING_MODIFIED is only used for utf-8/cesu-8 and
  * TCL_ENCODING_LE is only used for  utf-16/utf-32/ucs-2. re-use the same value */
 #define TCL_ENCODING_LE		TCL_ENCODING_MODIFIED	/* Little-endian encoding */
-#define TCL_ENCODING_UTF	0x200	/* For UTF-8 encoding, allow 4-byte output sequences */
+#define ENCODING_UTF	0x200	/* For UTF-8 encoding, allow 4-byte output sequences */
+#define ENCODING_INPUT	0x400 /* For UTF-8/CESU-8 encoding, means external -> internal */
 
 void
 TclInitEncodingSubsystem(void)
@@ -561,7 +562,7 @@ TclInitEncodingSubsystem(void)
     type.fromUtfProc	= UtfToUtfProc;
     type.freeProc	= NULL;
     type.nullSize	= 1;
-    type.clientData	= INT2PTR(TCL_ENCODING_UTF);
+    type.clientData	= INT2PTR(ENCODING_UTF);
     Tcl_CreateEncoding(&type);
     type.clientData	= INT2PTR(TCL_ENCODING_NOCOMPLAIN);
     type.encodingName	= "cesu-8";
@@ -1238,7 +1239,7 @@ Tcl_ExternalToUtfDStringEx(
 
     flags |= TCL_ENCODING_START | TCL_ENCODING_END;
     if (encodingPtr->toUtfProc == UtfToUtfProc) {
-	flags |= TCL_ENCODING_MODIFIED | TCL_ENCODING_UTF;
+	flags |= ENCODING_INPUT;
     }
 
     while (1) {
@@ -1355,7 +1356,7 @@ Tcl_ExternalToUtf(
 	dstLen--;
     }
     if (encodingPtr->toUtfProc == UtfToUtfProc) {
-	flags |= TCL_ENCODING_MODIFIED | TCL_ENCODING_UTF;
+	flags |= ENCODING_INPUT;
     }
     do {
 	Tcl_EncodingState savedState = *statePtr;
@@ -1450,7 +1451,7 @@ Tcl_UtfToExternalDStringEx(
     const char *src,		/* Source string in UTF-8. */
     int srcLen,			/* Source string length in bytes, or < 0 for
 				 * strlen(). */
-    int flags,	/* Conversion control flags. */
+    int flags,			/* Conversion control flags. */
     Tcl_DString *dstPtr)	/* Uninitialized or free DString in which the
 				 * converted string is stored. */
 {
@@ -2363,7 +2364,7 @@ UtfToUtfProc(
 
     dstStart = dst;
     flags |= PTR2INT(clientData);
-    dstEnd = dst + dstLen - ((flags & TCL_ENCODING_UTF) ? TCL_UTF_MAX : 6);
+    dstEnd = dst + dstLen - ((flags & ENCODING_UTF) ? TCL_UTF_MAX : 6);
 
     for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) {
 	if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) {
@@ -2435,7 +2436,7 @@ UtfToUtfProc(
 		break;
 	    }
 	    src += len;
-	    if (!(flags & TCL_ENCODING_UTF) && (ch > 0x3FF)) {
+	    if (!(flags & ENCODING_UTF) && (ch > 0x3FF)) {
 		if (ch > 0xFFFF) {
 		    /* CESU-8 6-byte sequence for chars > U+FFFF */
 		    ch -= 0x10000;
-- 
cgit v0.12


From 38df35585000fd7245c6604e845663751a7bd524 Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Tue, 14 Feb 2023 20:50:26 +0000
Subject: Complete fix for [bd1a60eb9c]. Also fix a bug in the tableencoding.
 With testcases.

---
 generic/tclEncoding.c | 24 ++++++++++++++++--------
 tests/encoding.test   | 38 ++++++++++++++++++++++++++++++++++++--
 2 files changed, 52 insertions(+), 10 deletions(-)

diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 01c4eb1..c5ecc46 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -2380,7 +2380,7 @@ UtfToUtfProc(
 	    result = TCL_CONVERT_NOSPACE;
 	    break;
 	}
-	if (UCHAR(*src) < 0x80 && !((UCHAR(*src) == 0) && (flags & TCL_ENCODING_MODIFIED))) {
+	if (UCHAR(*src) < 0x80 && !((UCHAR(*src) == 0) && (flags & ENCODING_INPUT))) {
 	    /*
 	     * Copy 7bit characters, but skip null-bytes when we are in input
 	     * mode, so that they get converted to 0xC080.
@@ -2388,11 +2388,13 @@ UtfToUtfProc(
 
 	    *dst++ = *src++;
 	} else if ((UCHAR(*src) == 0xC0) && (src + 1 < srcEnd)
-		&& (UCHAR(src[1]) == 0x80) && (!(flags & TCL_ENCODING_MODIFIED) || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) || (flags & ENCODING_FAILINDEX))) {
+		&& (UCHAR(src[1]) == 0x80) && (flags & ENCODING_UTF) && (!(flags & ENCODING_INPUT)
+			|| ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)
+			|| (flags & ENCODING_FAILINDEX))) {
 	    /*
 	     * If in input mode, and -strict or -failindex is specified: This is an error.
 	     */
-	    if (flags & TCL_ENCODING_MODIFIED) {
+	    if (flags & ENCODING_INPUT) {
 		result = TCL_CONVERT_SYNTAX;
 		break;
 	    }
@@ -2410,7 +2412,7 @@ UtfToUtfProc(
 	     * unless the user has explicitly asked to be told.
 	     */
 
-	    if (flags & TCL_ENCODING_MODIFIED) {
+	    if (flags & ENCODING_INPUT) {
 		if ((STOPONERROR) && (flags & TCL_ENCODING_CHAR_LIMIT)) {
 		    result = TCL_CONVERT_MULTIBYTE;
 		    break;
@@ -2430,7 +2432,7 @@ UtfToUtfProc(
 	    int low;
 	    const char *saveSrc = src;
 	    size_t len = TclUtfToUCS4(src, &ch);
-	    if ((len < 2) && (ch != 0) && (flags & TCL_ENCODING_MODIFIED)
+	    if ((len < 2) && (ch != 0) && (flags & ENCODING_INPUT)
 		    && (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT))) {
 		result = TCL_CONVERT_SYNTAX;
 		break;
@@ -2451,6 +2453,11 @@ UtfToUtfProc(
 		 * A surrogate character is detected, handle especially.
 		 */
 
+		if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) && (flags & ENCODING_UTF)) {
+		    result = TCL_CONVERT_UNKNOWN;
+		    src = saveSrc;
+		    break;
+		}
 		low = ch;
 		len = (src <= srcEnd-3) ? TclUtfToUCS4(src, &low) : 0;
 
@@ -2470,12 +2477,12 @@ UtfToUtfProc(
 		src += len;
 		dst += Tcl_UniCharToUtf(ch, dst);
 		ch = low;
-	    } else if (STOPONERROR && !(flags & TCL_ENCODING_MODIFIED) && (((ch  & ~0x7FF) == 0xD800))) {
+	    } else if (STOPONERROR && !(flags & ENCODING_INPUT) && (((ch  & ~0x7FF) == 0xD800))) {
 		result = TCL_CONVERT_UNKNOWN;
 		src = saveSrc;
 		break;
 	    } else if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)
-		    && (flags & TCL_ENCODING_MODIFIED) && ((ch  & ~0x7FF) == 0xD800)) {
+		    && (flags & ENCODING_INPUT) && ((ch  & ~0x7FF) == 0xD800)) {
 		result = TCL_CONVERT_SYNTAX;
 		src = saveSrc;
 		break;
@@ -3117,7 +3124,8 @@ TableToUtfProc(
 	    ch = pageZero[byte];
 	}
 	if ((ch == 0) && (byte != 0)) {
-	    if (STOPONERROR) {
+	    if ((flags & ENCODING_FAILINDEX)
+		    || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)) {
 		result = TCL_CONVERT_SYNTAX;
 		break;
 	    }
diff --git a/tests/encoding.test b/tests/encoding.test
index b2b029e..bbb40d7 100644
--- a/tests/encoding.test
+++ b/tests/encoding.test
@@ -452,6 +452,24 @@ test encoding-15.24 {UtfToUtfProc CESU-8 bug [048dd20b4171c8da]} {
     binary scan $y H* z
     list [string length $y] $z
 } {2 cfbf}
+test encoding-15.25 {UtfToUtfProc CESU-8} {
+    encoding convertfrom cesu-8 \x00
+} \x00
+test encoding-15.26 {UtfToUtfProc CESU-8} {
+    encoding convertfrom cesu-8 \xC0\x80
+} \x00
+test encoding-15.27 {UtfToUtfProc -strict CESU-8} {
+    encoding convertfrom -strict cesu-8 \xC0\x80
+} \x00
+test encoding-15.28 {UtfToUtfProc -strict CESU-8} {
+    encoding convertfrom -strict cesu-8 \xC0\x80
+} \x00
+test encoding-15.29 {UtfToUtfProc CESU-8} {
+    encoding convertto cesu-8 \x00
+} \xC0\x80
+test encoding-15.30 {UtfToUtfProc -strict CESU-8} {
+    encoding convertto -strict cesu-8 \x00
+} \xC0\x80
 
 test encoding-16.1 {Utf16ToUtfProc} -body {
     set val [encoding convertfrom utf-16 NN]
@@ -584,8 +602,21 @@ test encoding-18.6 {TableToUtfProc on invalid input with -nocomplain} -body {
 	list [catch {encoding convertto -nocomplain jis0208 \\} res] $res
 } -result {0 !)}
 
-test encoding-19.1 {TableFromUtfProc} {
-} {}
+test encoding-19.1 {TableFromUtfProc} -body {
+    encoding convertfrom ascii AÁ
+} -result AÁ
+test encoding-19.2 {TableFromUtfProc} -body {
+    encoding convertfrom -nocomplain ascii AÁ
+} -result AÁ
+test encoding-19.3 {TableFromUtfProc} -body {
+    encoding convertfrom -strict ascii AÁ
+} -returnCodes 1 -result {unexpected byte sequence starting at index 1: '\xC1'}
+test encoding-19.4 {TableFromUtfProc} -body {
+    list [encoding convertfrom -failindex idx ascii AÁ] [set idx]
+} -result {A 1}
+test encoding-19.4 {TableFromUtfProc} -body {
+    list [encoding convertfrom -failindex idx -strict ascii AÁ] [set idx]
+} -result {A 1}
 
 test encoding-20.1 {TableFreefProc} {
 } {}
@@ -804,6 +835,9 @@ test encoding-24.39 {Try to generate invalid utf-8 with -strict} -body {
 test encoding-24.40 {Try to generate invalid utf-8 with -nocomplain} -body {
     encoding convertto -nocomplain utf-8 \uD800
 } -result \xED\xA0\x80
+test encoding-24.41 {Parse invalid utf-8 with -strict} -body {
+    encoding convertfrom -strict utf-8 \xED\xA0\x80\xED\xB0\x80
+} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xED'}
 
 file delete [file join [temporaryDirectory] iso2022.txt]
 
-- 
cgit v0.12