From e9b1c88a4142a059a821fdd2f2b02272a53c9151 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Tue, 14 Apr 2020 20:03:33 +0000
Subject: The function of Tcl_UtfNext() is to advance a pointer. There's
 nothing inherent in that task that requires decoding of the characters, but
 the implementation does that. Let's try a simpler solution for callers that
 do not need the content decoded.

---
 generic/tclUtf.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index fbdba4c..a03fa30 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -644,9 +644,19 @@ CONST char *
 Tcl_UtfNext(
     CONST char *src)		/* The current location in the string. */
 {
-    Tcl_UniChar ch;
-
-    return src + TclUtfToUniChar(src, &ch);
+    int byte = *((unsigned char *) src);
+    int left = totalBytes[byte];
+
+    src++;
+    while (--left) {
+	byte = *((unsigned char *) src);
+	if ((byte & 0xC0) != 0x80) {
+	    /* src points to non-trail byte; return it */
+	    return src;
+	}
+	src++;
+    }
+    return src;
 }
 
 /*
-- 
cgit v0.12


From 279c54dbff724a62d6739a9cc71ba31a83325c98 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Tue, 14 Apr 2020 20:18:55 +0000
Subject: Create and use an optimized macro TclUtfNext() for Tcl_UtfNext().

---
 generic/tclInt.h  | 3 +++
 generic/tclUtil.c | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/generic/tclInt.h b/generic/tclInt.h
index 15bc000..e92cd18 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -3691,6 +3691,9 @@ MODULE_SCOPE void	TclDbInitNewObj(Tcl_Obj *objPtr, CONST char *file,
 	    ((*(chPtr) = (unsigned char) *(str)), 1)	\
 	    : Tcl_UtfToUniChar(str, chPtr))
 
+#define TclUtfNext(src)	\
+	((((unsigned char) *(src)) < 0xC0) ? src + 1 : Tcl_UtfNext(src))
+
 /*
  *----------------------------------------------------------------
  * Macro that encapsulates the logic that determines when it is safe to
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index 3dd9a32..e87cf83 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -1691,7 +1691,7 @@ TclTrim(
 	 * that we will not trim. Skip over it. */
 	if (numBytes > 0) {
 	    const char *first = bytes + trimLeft;
-	    bytes = Tcl_UtfNext(first);
+	    bytes = TclUtfNext(first);
 	    numBytes -= (bytes - first);
 
 	    if (numBytes > 0) {
-- 
cgit v0.12


From 6df0ccf3997e397b860c47c770ba0fc31a2a9961 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Tue, 14 Apr 2020 21:16:43 +0000
Subject: Replace calls of TclUtfToUniChar() with TclUtfNext() when caller has
 no decoding need. Failing test string-22.14 indicates something is still not
 quite right. Now that Tcl_NumUtfChars() is not paying decoding prices, we let
 it spend to properly protect against overflow. [2738427]

---
 generic/tclCompExpr.c |  5 ++---
 generic/tclUtf.c      | 19 ++++++-------------
 2 files changed, 8 insertions(+), 16 deletions(-)

diff --git a/generic/tclCompExpr.c b/generic/tclCompExpr.c
index 27d7503..42321af 100644
--- a/generic/tclCompExpr.c
+++ b/generic/tclCompExpr.c
@@ -1801,7 +1801,6 @@ ParseLexeme(
 {
     const char *end;
     int scanned;
-    Tcl_UniChar ch;
     Tcl_Obj *literal = NULL;
     unsigned char byte;
 
@@ -1979,12 +1978,12 @@ ParseLexeme(
 
     if (!TclIsBareword(*start) || *start == '_') {
 	if (Tcl_UtfCharComplete(start, numBytes)) {
-	    scanned = Tcl_UtfToUniChar(start, &ch);
+	    scanned = TclUtfNext(start) - start;
 	} else {
 	    char utfBytes[TCL_UTF_MAX];
 	    memcpy(utfBytes, start, (size_t) numBytes);
 	    utfBytes[numBytes] = '\0';
-	    scanned = Tcl_UtfToUniChar(utfBytes, &ch);
+	    scanned = TclUtfNext(utfBytes) - utfBytes;
 	}
 	*lexemePtr = INVALID;
 	Tcl_DecrRefCount(literal);
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index a03fa30..25d52d0 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -504,7 +504,6 @@ Tcl_NumUtfChars(
     int length)			/* The length of the string in bytes, or -1
 				 * for strlen(string). */
 {
-    Tcl_UniChar ch;
     register int i;
 
     /*
@@ -516,21 +515,20 @@ Tcl_NumUtfChars(
 
     i = 0;
     if (length < 0) {
-	while (*src != '\0') {
-	    src += TclUtfToUniChar(src, &ch);
+	while ((*src != '\0') && (i < INT_MAX)) {
+	    src = TclUtfNext(src);
 	    i++;
 	}
-	if (i < 0) i = INT_MAX; /* Bug [2738427] */
     } else {
 	register const char *endPtr = src + length - TCL_UTF_MAX;
 
 	while (src < endPtr) {
-	    src += TclUtfToUniChar(src, &ch);
+	    src = TclUtfNext(src);
 	    i++;
 	}
 	endPtr += TCL_UTF_MAX;
 	while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) {
-	    src += TclUtfToUniChar(src, &ch);
+	    src = TclUtfNext(src);
 	    i++;
 	}
 	if (src < endPtr) {
@@ -764,10 +762,7 @@ Tcl_UniCharAtIndex(
 {
     Tcl_UniChar ch;
 
-    while (index >= 0) {
-	index--;
-	src += TclUtfToUniChar(src, &ch);
-    }
+    TclUtfToUniChar(Tcl_UtfAtIndex(src, index), &ch);
     return ch;
 }
 
@@ -793,11 +788,9 @@ Tcl_UtfAtIndex(
     register CONST char *src,	/* The UTF-8 string. */
     register int index)		/* The position of the desired character. */
 {
-    Tcl_UniChar ch;
-
     while (index > 0) {
 	index--;
-	src += TclUtfToUniChar(src, &ch);
+	src = TclUtfNext(src);
     }
     return src;
 }
-- 
cgit v0.12


From e2b7ad1627665c99b128ff5a023e9f772fe467b4 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Tue, 14 Apr 2020 21:25:33 +0000
Subject: Fix the bad logic in Tcl_UtfNext().

---
 generic/tclUtf.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 25d52d0..7dd8598 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -644,17 +644,21 @@ Tcl_UtfNext(
 {
     int byte = *((unsigned char *) src);
     int left = totalBytes[byte];
+    const char *next = src + 1;
 
-    src++;
     while (--left) {
-	byte = *((unsigned char *) src);
+	byte = *((unsigned char *) next);
 	if ((byte & 0xC0) != 0x80) {
-	    /* src points to non-trail byte; return it */
-	    return src;
+	    /*
+	     * src points to non-trail byte; We ran out of trail bytes
+	     * before the needs of the lead bytes were satisfied.
+	     * Let the (malformed) lead byte alone be a character
+	     */
+	    return src + 1;
 	}
-	src++;
+	next++;
     }
-    return src;
+    return next;
 }
 
 /*
-- 
cgit v0.12


From 7aff227882dd8bfaa8972ecaf1e129bb9ef1e6e3 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Tue, 14 Apr 2020 21:32:22 +0000
Subject: typo

---
 generic/tclUtf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 7dd8598..078ecf4 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -651,7 +651,7 @@ Tcl_UtfNext(
 	if ((byte & 0xC0) != 0x80) {
 	    /*
 	     * src points to non-trail byte; We ran out of trail bytes
-	     * before the needs of the lead bytes were satisfied.
+	     * before the needs of the lead byte were satisfied.
 	     * Let the (malformed) lead byte alone be a character
 	     */
 	    return src + 1;
-- 
cgit v0.12


From f0f59ae8a31a818d78cb449dc4532762cfb2bb00 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Tue, 14 Apr 2020 21:39:53 +0000
Subject: New testing command [testutfnext].

---
 generic/tclTest.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 51 insertions(+), 2 deletions(-)

diff --git a/generic/tclTest.c b/generic/tclTest.c
index 31d3a7f..782b9a2 100644
--- a/generic/tclTest.c
+++ b/generic/tclTest.c
@@ -433,6 +433,7 @@ static int		SimpleMatchInDirectory(
 			    Tcl_Interp *interp, Tcl_Obj *resultPtr,
 			    Tcl_Obj *dirPtr, const char *pattern,
 			    Tcl_GlobTypeData *types);
+static Tcl_ObjCmdProc	TestUtfNextCmd;
 static Tcl_ObjCmdProc	TestUtfPrevCmd;
 static int		TestNumUtfCharsCmd(ClientData clientData,
 			    Tcl_Interp *interp, int objc,
@@ -697,8 +698,10 @@ Tcltest_Init(
 	    (ClientData) 0, NULL);
     Tcl_CreateObjCommand(interp, "testsetobjerrorcode",
 	    TestsetobjerrorcodeCmd, NULL, NULL);
+    Tcl_CreateObjCommand(interp, "testutfnext",
+	    TestUtfNextCmd, NULL, NULL);
     Tcl_CreateObjCommand(interp, "testutfprev",
-	    TestUtfPrevCmd, (ClientData) 0, NULL);
+	    TestUtfPrevCmd, NULL, NULL);
     Tcl_CreateObjCommand(interp, "testnumutfchars",
 	    TestNumUtfCharsCmd, NULL, NULL);
     Tcl_CreateObjCommand(interp, "testfindfirst",
@@ -7107,6 +7110,52 @@ SimpleListVolumes(void)
 }
 
 /*
+ * Used to check operations of Tcl_UtfNext.
+ *
+ * Usage: testutfnext $bytes $offset
+ */
+
+static int
+TestUtfNextCmd(
+    ClientData clientData,
+    Tcl_Interp *interp,
+    int objc,
+    Tcl_Obj *const objv[])
+{
+    int numBytes, offset = 0;
+    char *bytes;
+    const char *result;
+    Tcl_Obj *copy;
+    
+    if (objc < 2 || objc > 3) {
+	Tcl_WrongNumArgs(interp, 1, objv, "bytes ?offset?");
+	return TCL_ERROR;
+    }
+
+    bytes = (char *) Tcl_GetByteArrayFromObj(objv[1], &numBytes);
+    
+    if (objc == 3) {
+	if (TCL_OK != Tcl_GetIntFromObj(interp, objv[2], &offset)) {
+	    return TCL_ERROR;
+	}
+	if (offset < 0) {
+	    offset = 0;
+	}
+	if (offset > numBytes) {
+	    offset = numBytes;
+	}
+    }
+    copy = Tcl_DuplicateObj(objv[1]);
+    bytes = (char *) Tcl_SetByteArrayLength(copy, numBytes+1);
+    bytes[numBytes] = '\0';
+
+    result = Tcl_UtfNext(bytes + offset);
+    Tcl_SetObjResult(interp, Tcl_NewIntObj(result - bytes));
+
+    Tcl_DecrRefCount(copy);
+    return TCL_OK;
+}
+/*
  * Used to check operations of Tcl_UtfPrev.
  *
  * Usage: testutfprev $bytes $offset
@@ -7149,9 +7198,9 @@ TestUtfPrevCmd(
     bytes[numBytes] = '\0';
 
     result = Tcl_UtfPrev(bytes + offset, bytes);
+    Tcl_SetObjResult(interp, Tcl_NewIntObj(result - bytes));
 
     Tcl_DecrRefCount(copy);
-    Tcl_SetObjResult(interp, Tcl_NewIntObj(result - bytes));
     return TCL_OK;
 }
 
-- 
cgit v0.12


From 532ec4fa923534f592e04cc3c5679ce5771c684c Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Wed, 15 Apr 2020 14:42:41 +0000
Subject: Collection of coverage tests for Tcl_UtfNext.

---
 tests/utf.test | 246 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 244 insertions(+), 2 deletions(-)

diff --git a/tests/utf.test b/tests/utf.test
index 56ca1b9..b5358cc 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -111,8 +111,250 @@ test utf-5.2 {Tcl_UtfFindLast} testfindlast {
     testfindlast [bytestring "abcbc"] 98
 } {bc}
 
-test utf-6.1 {Tcl_UtfNext} {
-} {}
+testConstraint testutfnext [llength [info commands testutfnext]]
+
+test utf-6.1 {Tcl_UtfNext} testutfnext {
+    # This takes the pointer one past the terminating NUL.
+    # This is really an invalid call.
+    testutfnext {}
+} 1
+test utf-6.2 {Tcl_UtfNext} testutfnext {
+    testutfnext A
+} 1
+test utf-6.3 {Tcl_UtfNext} testutfnext {
+    testutfnext AA
+} 1
+test utf-6.4 {Tcl_UtfNext} testutfnext {
+    testutfnext A\xA0
+} 1
+test utf-6.5 {Tcl_UtfNext} testutfnext {
+    testutfnext A\xD0
+} 1
+test utf-6.6 {Tcl_UtfNext} testutfnext {
+    testutfnext A\xE8
+} 1
+test utf-6.7 {Tcl_UtfNext} testutfnext {
+    testutfnext A\xF4
+} 1
+test utf-6.8 {Tcl_UtfNext} testutfnext {
+    testutfnext A\xF8
+} 1
+test utf-6.9 {Tcl_UtfNext} testutfnext {
+    testutfnext \xA0
+} 1
+test utf-6.10 {Tcl_UtfNext} testutfnext {
+    testutfnext \xA0G
+} 1
+test utf-6.11 {Tcl_UtfNext} testutfnext {
+    testutfnext \xA0\xA0
+} 1
+test utf-6.12 {Tcl_UtfNext} testutfnext {
+    testutfnext \xA0\xD0
+} 1
+test utf-6.13 {Tcl_UtfNext} testutfnext {
+    testutfnext \xA0\xE8
+} 1
+test utf-6.14 {Tcl_UtfNext} testutfnext {
+    testutfnext \xA0\xF4
+} 1
+test utf-6.15 {Tcl_UtfNext} testutfnext {
+    testutfnext \xA0\xF8
+} 1
+test utf-6.16 {Tcl_UtfNext} testutfnext {
+    testutfnext \xD0
+} 1
+test utf-6.17 {Tcl_UtfNext} testutfnext {
+    testutfnext \xD0A
+} 1
+test utf-6.18 {Tcl_UtfNext} testutfnext {
+    testutfnext \xD0\xA0
+} 2
+test utf-6.19 {Tcl_UtfNext} testutfnext {
+    testutfnext \xD0\xD0
+} 1
+test utf-6.20 {Tcl_UtfNext} testutfnext {
+    testutfnext \xD0\xE8
+} 1
+test utf-6.21 {Tcl_UtfNext} testutfnext {
+    testutfnext \xD0\xF4
+} 1
+test utf-6.22 {Tcl_UtfNext} testutfnext {
+    testutfnext \xD0\xF8
+} 1
+test utf-6.23 {Tcl_UtfNext} testutfnext {
+    testutfnext \xE8
+} 1
+test utf-6.24 {Tcl_UtfNext} testutfnext {
+    testutfnext \xE8A
+} 1
+test utf-6.25 {Tcl_UtfNext} testutfnext {
+    testutfnext \xE8\xA0
+} 1
+test utf-6.26 {Tcl_UtfNext} testutfnext {
+    testutfnext \xE8\xD0
+} 1
+test utf-6.27 {Tcl_UtfNext} testutfnext {
+    testutfnext \xE8\xE8
+} 1
+test utf-6.28 {Tcl_UtfNext} testutfnext {
+    testutfnext \xE8\xF4
+} 1
+test utf-6.29 {Tcl_UtfNext} testutfnext {
+    testutfnext \xE8\xF8
+} 1
+test utf-6.30 {Tcl_UtfNext} testutfnext {
+    testutfnext \xF4
+} 1
+test utf-6.31 {Tcl_UtfNext} testutfnext {
+    testutfnext \xF4A
+} 1
+test utf-6.32 {Tcl_UtfNext} testutfnext {
+    testutfnext \xF4\xA0
+} 1
+test utf-6.33 {Tcl_UtfNext} testutfnext {
+    testutfnext \xF4\xD0
+} 1
+test utf-6.34 {Tcl_UtfNext} testutfnext {
+    testutfnext \xF4\xE8
+} 1
+test utf-6.35 {Tcl_UtfNext} testutfnext {
+    testutfnext \xF4\xF4
+} 1
+test utf-6.36 {Tcl_UtfNext} testutfnext {
+    testutfnext \xF4\xF8
+} 1
+test utf-6.37 {Tcl_UtfNext} testutfnext {
+    testutfnext \xF8
+} 1
+test utf-6.38 {Tcl_UtfNext} testutfnext {
+    testutfnext \xF8A
+} 1
+test utf-6.39 {Tcl_UtfNext} testutfnext {
+    testutfnext \xF8\xA0
+} 1
+test utf-6.40 {Tcl_UtfNext} testutfnext {
+    testutfnext \xF8\xD0
+} 1
+test utf-6.41 {Tcl_UtfNext} testutfnext {
+    testutfnext \xF8\xE8
+} 1
+test utf-6.42 {Tcl_UtfNext} testutfnext {
+    testutfnext \xF8\xF4
+} 1
+test utf-6.43 {Tcl_UtfNext} testutfnext {
+    testutfnext \xF8\xF8
+} 1
+test utf-6.44 {Tcl_UtfNext} testutfnext {
+    testutfnext \xD0\xA0G
+} 2
+test utf-6.45 {Tcl_UtfNext} testutfnext {
+    testutfnext \xD0\xA0\xA0
+} 2
+test utf-6.46 {Tcl_UtfNext} testutfnext {
+    testutfnext \xD0\xA0\xD0
+} 2
+test utf-6.47 {Tcl_UtfNext} testutfnext {
+    testutfnext \xD0\xA0\xE8
+} 2
+test utf-6.48 {Tcl_UtfNext} testutfnext {
+    testutfnext \xD0\xA0\xF4
+} 2
+test utf-6.49 {Tcl_UtfNext} testutfnext {
+    testutfnext \xD0\xA0\xF8
+} 2
+test utf-6.50 {Tcl_UtfNext} testutfnext {
+    testutfnext \xE8\xA0G
+} 1
+test utf-6.51 {Tcl_UtfNext} testutfnext {
+    testutfnext \xE8\xA0\xA0
+} 3
+test utf-6.52 {Tcl_UtfNext} testutfnext {
+    testutfnext \xE8\xA0\xD0
+} 1
+test utf-6.53 {Tcl_UtfNext} testutfnext {
+    testutfnext \xE8\xA0\xE8
+} 1
+test utf-6.54 {Tcl_UtfNext} testutfnext {
+    testutfnext \xE8\xA0\xF4
+} 1
+test utf-6.55 {Tcl_UtfNext} testutfnext {
+    testutfnext \xE8\xA0\xF8
+} 1
+test utf-6.56 {Tcl_UtfNext} testutfnext {
+    testutfnext \xF4\xA0G
+} 1
+test utf-6.57 {Tcl_UtfNext} testutfnext {
+    testutfnext \xF4\xA0\xA0
+} 1
+test utf-6.58 {Tcl_UtfNext} testutfnext {
+    testutfnext \xF4\xA0\xD0
+} 1
+test utf-6.59 {Tcl_UtfNext} testutfnext {
+    testutfnext \xF4\xA0\xE8
+} 1
+test utf-6.60 {Tcl_UtfNext} testutfnext {
+    testutfnext \xF4\xA0\xF4
+} 1
+test utf-6.61 {Tcl_UtfNext} testutfnext {
+    testutfnext \xF4\xA0\xF8
+} 1
+test utf-6.62 {Tcl_UtfNext} testutfnext {
+    testutfnext \xE8\xA0\xA0G
+} 3
+test utf-6.63 {Tcl_UtfNext} testutfnext {
+    testutfnext \xE8\xA0\xA0\xA0
+} 3
+test utf-6.64 {Tcl_UtfNext} testutfnext {
+    testutfnext \xE8\xA0\xA0\xD0
+} 3
+test utf-6.65 {Tcl_UtfNext} testutfnext {
+    testutfnext \xE8\xA0\xA0\xE8
+} 3
+test utf-6.66 {Tcl_UtfNext} testutfnext {
+    testutfnext \xE8\xA0\xA0\xF4
+} 3
+test utf-6.67 {Tcl_UtfNext} testutfnext {
+    testutfnext \xE8\xA0\xA0\xF8
+} 3
+test utf-6.68 {Tcl_UtfNext} testutfnext {
+    testutfnext \xF4\xA0\xA0G
+} 1
+test utf-6.69 {Tcl_UtfNext} testutfnext {
+    testutfnext \xF4\xA0\xA0\xA0
+} 1
+test utf-6.70 {Tcl_UtfNext} testutfnext {
+    testutfnext \xF4\xA0\xA0\xD0
+} 1
+test utf-6.71 {Tcl_UtfNext} testutfnext {
+    testutfnext \xF4\xA0\xA0\xE8
+} 1
+test utf-6.71 {Tcl_UtfNext} testutfnext {
+    testutfnext \xF4\xA0\xA0\xF4
+} 1
+test utf-6.73 {Tcl_UtfNext} testutfnext {
+    testutfnext \xF4\xA0\xA0\xF8
+} 1
+test utf-6.74 {Tcl_UtfNext} testutfnext {
+    testutfnext \xF4\xA0\xA0\xA0G
+} 1
+test utf-6.75 {Tcl_UtfNext} testutfnext {
+    testutfnext \xF4\xA0\xA0\xA0\xA0
+} 1
+test utf-6.76 {Tcl_UtfNext} testutfnext {
+    testutfnext \xF4\xA0\xA0\xA0\xD0
+} 1
+test utf-6.77 {Tcl_UtfNext} testutfnext {
+    testutfnext \xF4\xA0\xA0\xA0\xE8
+} 1
+test utf-6.78 {Tcl_UtfNext} testutfnext {
+    testutfnext \xF4\xA0\xA0\xA0\xF4
+} 1
+test utf-6.79 {Tcl_UtfNext} testutfnext {
+    testutfnext \xF4\xA0\xA0\xA0G\xF8
+} 1
+
+
+
 
 testConstraint testutfprev [llength [info commands testutfprev]]
 
-- 
cgit v0.12


From e2a3a358c95196a7bf142d591fa5ef729b3b0d69 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Wed, 15 Apr 2020 16:42:32 +0000
Subject: Add test demonstrating that Tcl_UtfNext accepts overlong byte
 sequences, which is in conflict with what Tcl_UtfToUniChar does.

---
 tests/utf.test | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/utf.test b/tests/utf.test
index b5358cc..a930aae 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -352,9 +352,9 @@ test utf-6.78 {Tcl_UtfNext} testutfnext {
 test utf-6.79 {Tcl_UtfNext} testutfnext {
     testutfnext \xF4\xA0\xA0\xA0G\xF8
 } 1
-
-
-
+test utf-6.80 {Tcl_UtfNext - overlong sequences} {
+    testutfnext \xC0\x81
+} 1
 
 testConstraint testutfprev [llength [info commands testutfprev]]
 
-- 
cgit v0.12


From fe893becb4e69049ea40d04930490b24784faeed Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Wed, 15 Apr 2020 17:05:52 +0000
Subject: New tests checking Tcl_UtfPrev handling of overlong sequences. Bug
 demonstrated.

---
 tests/utf.test | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/tests/utf.test b/tests/utf.test
index 56ca1b9..9982f81 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -275,6 +275,33 @@ test utf-7.22 {Tcl_UtfPrev} testutfprev {
 test utf-7.23 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xA0\xA0\xA0\xA0
 } 4
+test utf-7.24 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
+    testutfprev A\xC0\x81
+} 2
+test utf-7.25 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
+    testutfprev A\xC0\x81 2
+} 1
+test utf-7.26 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
+    testutfprev A\xE0\x80\x80
+} 3
+test utf-7.27 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
+    testutfprev A\xE0\x80\x80 3
+} 2
+test utf-7.28 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
+    testutfprev A\xE0\x80\x80 2
+} 1
+test utf-7.29 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
+    testutfprev A\xF0\x80\x80\x80
+} 4
+test utf-7.30 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
+    testutfprev A\xF0\x80\x80\x80 4
+} 3
+test utf-7.31 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
+    testutfprev A\xF0\x80\x80\x80 3
+} 2
+test utf-7.32 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
+    testutfprev A\xF0\x80\x80\x80 2
+} 1
 
 test utf-8.1 {Tcl_UniCharAtIndex: index = 0} {
     string index abcd 0
-- 
cgit v0.12


From e1e00509b78441a068cf98453c40e9af82ab8104 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Wed, 15 Apr 2020 21:48:57 +0000
Subject: Rework Tcl_UtfPrev so it properly handles overlong sequences.

---
 generic/tclUtf.c | 132 +++++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 113 insertions(+), 19 deletions(-)

diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index fbdba4c..a50a7fc 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -703,31 +703,125 @@ Tcl_UtfPrev(
     CONST char *src,		/* A location in a UTF-8 string. */
     CONST char *start)		/* Pointer to the beginning of the string */
 {
-    CONST char *look;
-    int i, byte;
-
-    src--;
-    look = src;
-    for (i = 0; i < TCL_UTF_MAX; i++) {
-	if (look < start) {
-	    if (src < start) {
-		src = start;
-	    }
-	    break;
-	}
-	byte = *((unsigned char *) look);
+    int trailBytesSeen = 0;	/* How many trail bytes have been verified? */
+    CONST char *fallback = src - 1;
+				/* If we cannot find a lead byte that might
+				 * start a prefix of a valid UTF byte sequence,
+				 * we will fallback to a one-byte back step */
+    unsigned char *look = (unsigned char *)fallback;
+				/* Start search at the fallback position */
+
+    /* Quick boundary case exit. */
+    if (fallback <= start) {
+	return start;
+    }
+
+    do {
+	unsigned char byte = look[0];
+
 	if (byte < 0x80) {
-	    break;
+	    /*
+	     * Single byte character. Either this is a correct previous
+	     * character, or it is followed by at least one trail byte
+	     * which indicates a malformed sequence. In either case the
+	     * correct result is to return the fallback.
+	     */
+	    return fallback;
 	}
 	if (byte >= 0xC0) {
-	    if (totalBytes[byte] <= i) {
-		break;
+	    /* Non-trail byte; May be multibyte lead. */
+
+	    if ((trailBytesSeen == 0)
+		/*
+		 * We've seen no trailing context to use to check
+		 * anything. From what we know, this non-trail byte
+		 * is a prefix of a previous character, and accepting
+		 * it (the fallback) is correct.
+		 */
+
+		    || (trailBytesSeen >= totalBytes[byte])) {
+		/*
+		 * That is, (1 + trailBytesSeen > needed).
+		 * We've examined more bytes than needed to complete
+		 * this lead byte. No matter about well-formedness or
+		 * validity, the sequence starting with this lead byte
+		 * will never include the fallback location, so we must
+		 * return the fallback location.
+		 *
+		 * EXAMPLE:	bytes = "ab\C0\x80\x81def";
+		 *		Tcl_UtfPrev(bytes+5, bytes);
+	    	 *
+		 * When we get here, look == bytes+2, trailBytesSeen == 2,
+		 * needed = 2, and we need to return bytes+4 that points to
+		 * the malformed \x81.
+		 */
+		return fallback;
 	    }
-	    return look;
+
+	    /*
+	     * trailBytesSeen > 0, so we can examine look[1] safely.
+	     * Use that capability to screen out overlong sequences.
+	     */
+
+	    switch (byte) {
+	    case 0xC0:
+		if (look[1] == 0x80) {
+		    /* Valid sequence: \xC0\x80 for \u0000 */
+		    return (CONST char *)look;
+		}
+		/* Reject overlong: \xC0\x81 - \xC0\xBF */
+		return fallback;
+	    case 0xC1:
+		/* Reject overlong: \xC1\x80 - \xC1\xBF */
+		return fallback;
+	    case 0xE0:
+		if (look[1] < 0xA0) {
+		    /* Reject overlong: \xE0\x80\x80 - \xE0\x9F\xBF */
+		    return fallback;
+		}
+		/* Valid sequence: \xE0\xA0\x80 for \u0800 , etc. */
+		return (CONST char *)look;
+#if TCL_UTF_MAX > 3
+	    case 0xF0:
+		if (look[1] < 0x90) {
+		    /* Reject overlong: \xF0\x80\x80\x80 - \xF0\x8F\xBF\xBF */
+		    return fallback;
+		}
+		/* Valid sequence: \xF0\x90\x80\x80 for \U10000 , etc. */
+		return (CONST char *)look;
+#endif
+	    default:
+		/* All other lead bytes lead only valid sequences */
+		return (CONST char *)look;
+	    }
+	}
+
+	/* We saw a trail byte. */
+	trailBytesSeen++;
+
+	if ((CONST char *)look == start) {
+	    /*
+	     * Do not read before the start of the string
+	     *
+	     * If we get here, we've examined bytes at every location
+	     * >= start and < src and all of them are trail bytes,
+	     * including (*start).  We need to return our fallback
+	     * and exit this loop before we run past the start of the string.
+	     */
+	    return fallback;
 	}
+
+	/* Continue the search backwards... */
 	look--;
-    }
-    return src;
+    } while (trailBytesSeen < TCL_UTF_MAX);
+
+    /*
+     * We've seen TCL_UTF_MAX trail bytes, so we know there will not be a
+     * properly formed byte sequence to find, and we can stop looking,
+     * accepting the fallback.
+     */
+
+    return fallback;
 }
 
 /*
-- 
cgit v0.12


From 6c579332631ddb93103506d954e17eda60d2d747 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Wed, 15 Apr 2020 22:14:33 +0000
Subject: More test coverage

---
 tests/utf.test | 42 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/tests/utf.test b/tests/utf.test
index 9982f81..62b85a7 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -302,6 +302,48 @@ test utf-7.31 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
 test utf-7.32 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
     testutfprev A\xF0\x80\x80\x80 2
 } 1
+test utf-7.33 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
+    testutfprev A\xC0\x80
+} 1
+test utf-7.34 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
+    testutfprev A\xC1\x80
+} 2
+test utf-7.35 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
+    testutfprev A\xC2\x80
+} 1
+test utf-7.36 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
+    testutfprev A\xE0\xA0\x80
+} 1
+test utf-7.37 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
+    testutfprev A\xE0\xA0\x80 3
+} 1
+test utf-7.38 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
+    testutfprev A\xE0\xA0\x80 2
+} 1
+test utf-7.39 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
+    testutfprev A\xF0\x90\x80\x80
+} 4
+test utf-7.40 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
+    testutfprev A\xF0\x90\x80\x80 4
+} 3
+test utf-7.41 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
+    testutfprev A\xF0\x90\x80\x80 3
+} 2
+test utf-7.42 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
+    testutfprev A\xF0\x90\x80\x80 2
+} 1
+test utf-7.43 {Tcl_UtfPrev -- no lead byte at start}  testutfprev {
+    testutfprev \xA0
+} 0
+test utf-7.44 {Tcl_UtfPrev -- no lead byte at start}  testutfprev {
+    testutfprev \xA0\xA0
+} 1
+test utf-7.45 {Tcl_UtfPrev -- no lead byte at start}  testutfprev {
+    testutfprev \xA0\xA0\xA0
+} 2
+test utf-7.46 {Tcl_UtfPrev -- no lead byte at start}  testutfprev {
+    testutfprev \xA0\xA0\xA0\xA0
+} 3
 
 test utf-8.1 {Tcl_UniCharAtIndex: index = 0} {
     string index abcd 0
-- 
cgit v0.12


From 1f6e0698a6ecceec504384a0d228e56b2c1aba42 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Wed, 15 Apr 2020 22:28:18 +0000
Subject: Use test existence to shorten comment.

---
 generic/tclUtf.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index a50a7fc..6003b75 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -746,14 +746,7 @@ Tcl_UtfPrev(
 		 * this lead byte. No matter about well-formedness or
 		 * validity, the sequence starting with this lead byte
 		 * will never include the fallback location, so we must
-		 * return the fallback location.
-		 *
-		 * EXAMPLE:	bytes = "ab\C0\x80\x81def";
-		 *		Tcl_UtfPrev(bytes+5, bytes);
-	    	 *
-		 * When we get here, look == bytes+2, trailBytesSeen == 2,
-		 * needed = 2, and we need to return bytes+4 that points to
-		 * the malformed \x81.
+		 * return the fallback location. See test utf-7.17
 		 */
 		return fallback;
 	    }
-- 
cgit v0.12


From d491ca6385bb8c8e630dea43cc287b5a881233ce Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Wed, 15 Apr 2020 22:39:53 +0000
Subject: Refactor the Overlong test into a utility routine.

---
 generic/tclUtf.c | 85 +++++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 56 insertions(+), 29 deletions(-)

diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 6003b75..f3b2097 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -81,6 +81,7 @@ static CONST unsigned char totalBytes[256] = {
  */
 
 static int		UtfCount(int ch);
+static int		Overlong(unsigned char *src);
 
 /*
  *---------------------------------------------------------------------------
@@ -115,7 +116,59 @@ UtfCount(
 #endif
     return 3;
 }
+
+/*
+ *---------------------------------------------------------------------------
+ *
+ * Overlong --
+ *
+ *	Utility routine to report whether /src/ points to the start of an
+ *	overlong byte sequence that should be rejected.
+ *
+ * Results:
+ *	A boolean.
+ *---------------------------------------------------------------------------
+ */
 
+INLINE static int
+Overlong(
+    unsigned char *src)		/* Points to lead byte of a UTF-8 byte
+				 * sequence. Caller guarantees it is safe
+				 * to read src[0] and src[1]. */
+{
+    switch (*src) {
+    case 0xC0:
+	if (src[1] == 0x80) {
+	    /* Valid sequence: \xC0\x80 for \u0000 */
+	    return 0;
+	}
+	/* Reject overlong: \xC0\x81 - \xC0\xBF */
+	return 1;
+    case 0xC1:
+	/* Reject overlong: \xC1\x80 - \xC1\xBF */
+	return 1;
+    case 0xE0:
+	if (src[1] < 0xA0) {
+	    /* Reject overlong: \xE0\x80\x80 - \xE0\x9F\xBF */
+	    return 1;
+	}
+	/* Valid sequence: \xE0\xA0\x80 for \u0800 , etc. */
+	return 0;
+#if TCL_UTF_MAX > 3
+    case 0xF0:
+	if (src[1] < 0x90) {
+	    /* Reject overlong: \xF0\x80\x80\x80 - \xF0\x8F\xBF\xBF */
+	    return 1;
+	}
+	/* Valid sequence: \xF0\x90\x80\x80 for \U10000 , etc. */
+	return 0
+#endif
+    default:
+	/* All other lead bytes lead only valid sequences */
+	return 0;
+    }
+}
+
 /*
  *---------------------------------------------------------------------------
  *
@@ -756,37 +809,11 @@ Tcl_UtfPrev(
 	     * Use that capability to screen out overlong sequences.
 	     */
 
-	    switch (byte) {
-	    case 0xC0:
-		if (look[1] == 0x80) {
-		    /* Valid sequence: \xC0\x80 for \u0000 */
-		    return (CONST char *)look;
-		}
-		/* Reject overlong: \xC0\x81 - \xC0\xBF */
+	    if (Overlong(look)) {
+		/* Reject */
 		return fallback;
-	    case 0xC1:
-		/* Reject overlong: \xC1\x80 - \xC1\xBF */
-		return fallback;
-	    case 0xE0:
-		if (look[1] < 0xA0) {
-		    /* Reject overlong: \xE0\x80\x80 - \xE0\x9F\xBF */
-		    return fallback;
-		}
-		/* Valid sequence: \xE0\xA0\x80 for \u0800 , etc. */
-		return (CONST char *)look;
-#if TCL_UTF_MAX > 3
-	    case 0xF0:
-		if (look[1] < 0x90) {
-		    /* Reject overlong: \xF0\x80\x80\x80 - \xF0\x8F\xBF\xBF */
-		    return fallback;
-		}
-		/* Valid sequence: \xF0\x90\x80\x80 for \U10000 , etc. */
-		return (CONST char *)look;
-#endif
-	    default:
-		/* All other lead bytes lead only valid sequences */
-		return (CONST char *)look;
 	    }
+	    return (CONST char *)look;
 	}
 
 	/* We saw a trail byte. */
-- 
cgit v0.12


From a0c779ad224a47f5fe74dd94960022dfbd2b7aa4 Mon Sep 17 00:00:00 2001
From: sebres <sebres@users.sourceforge.net>
Date: Thu, 16 Apr 2020 12:55:47 +0000
Subject: zlib, *BO*: fixes possible segfault (or buffer overrun), for instance
 if limit (max 65K) is set larger as allocated buffer for inflate-input
 (default 4K)

---
 generic/tclZlib.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/generic/tclZlib.c b/generic/tclZlib.c
index 002c6ae..ed29ff9 100644
--- a/generic/tclZlib.c
+++ b/generic/tclZlib.c
@@ -3027,7 +3027,9 @@ ZlibTransformInput(
 	 * reading over the border.
 	 */
 
-	readBytes = Tcl_ReadRaw(cd->parent, cd->inBuffer, cd->readAheadLimit);
+	readBytes = Tcl_ReadRaw(cd->parent, cd->inBuffer, 
+		cd->readAheadLimit <= cd->inAllocated ?
+		cd->readAheadLimit  : cd->inAllocated);
 
 	/*
 	 * Three cases here:
-- 
cgit v0.12


From c6eae962e7f9f3c91353bcc12415a110885d12f5 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Thu, 16 Apr 2020 16:22:34 +0000
Subject: When we reject overlong sequences, \xC1 is as invalid a lead byte as
 \xFF.

---
 generic/tclUtf.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 5ba8ff5..556a5a2 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -66,7 +66,7 @@ static CONST unsigned char totalBytes[256] = {
     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+    2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
     3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
 #if TCL_UTF_MAX > 3
     4,4,4,4,4,
@@ -123,7 +123,9 @@ UtfCount(
  * Overlong --
  *
  *	Utility routine to report whether /src/ points to the start of an
- *	overlong byte sequence that should be rejected.
+ *	overlong byte sequence that should be rejected. Caller guarantees
+ *	/src/ points to a byte that can lead a multi-byte sequence, and
+ *	that src[0] and src[1] are readable.
  *
  * Results:
  *	A boolean.
@@ -132,9 +134,7 @@ UtfCount(
 
 INLINE static int
 Overlong(
-    unsigned char *src)		/* Points to lead byte of a UTF-8 byte
-				 * sequence. Caller guarantees it is safe
-				 * to read src[0] and src[1]. */
+    unsigned char *src)	/* Points to lead byte of a UTF-8 byte sequence */
 {
     switch (*src) {
     case 0xC0:
@@ -144,9 +144,6 @@ Overlong(
 	}
 	/* Reject overlong: \xC0\x81 - \xC0\xBF */
 	return 1;
-    case 0xC1:
-	/* Reject overlong: \xC1\x80 - \xC1\xBF */
-	return 1;
     case 0xE0:
 	if (src[1] < 0xA0) {
 	    /* Reject overlong: \xE0\x80\x80 - \xE0\x9F\xBF */
-- 
cgit v0.12


From 0f4abadd9600089ea03eb89b0aa5533592a8259a Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Thu, 16 Apr 2020 17:30:29 +0000
Subject: Convert Overlong() to use a lookup table.

---
 generic/tclUtf.c | 50 +++++++++++++++++++++++++++-----------------------
 1 file changed, 27 insertions(+), 23 deletions(-)

diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 556a5a2..f8d89f8 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -124,46 +124,50 @@ UtfCount(
  *
  *	Utility routine to report whether /src/ points to the start of an
  *	overlong byte sequence that should be rejected. Caller guarantees
- *	/src/ points to a byte that can lead a multi-byte sequence, and
- *	that src[0] and src[1] are readable.
+ *	that src[0] and src[1] are readable, and
+ *
+ *	(src[0] >= 0xC0) && (src[0] != 0xC1)
+ * 	(src[1] >= 0x80) && (src[1] < 0xC0)
+ *	(src[0] < ((TCL_UTF_MAX > 3) ? 0xF8 : 0xF0))
  *
  * Results:
  *	A boolean.
  *---------------------------------------------------------------------------
  */
 
+static CONST unsigned char overlong[3] = {
+    0x80,	/* \xD0 -- all sequences valid */
+    0xA0,	/* \xE0\x80 through \xE0\x9F are invalid prefixes */
+#if TCL_UTF_MAX > 3
+    0x90	/* \xF0\x80 through \xF0\x8F are invalid prefixes */
+#else
+    0xC0	/* Not used, but reject all again for safety. */
+#endif
+};
+
 INLINE static int
 Overlong(
     unsigned char *src)	/* Points to lead byte of a UTF-8 byte sequence */
 {
-    switch (*src) {
-    case 0xC0:
+    unsigned char byte = *src;
+
+    if (byte % 0x10) {
+	/* Only lead bytes 0xC0, 0xE0, 0xF0 need examination */
+	return 0;
+    }
+    if (byte == 0xC0) {
 	if (src[1] == 0x80) {
 	    /* Valid sequence: \xC0\x80 for \u0000 */
 	    return 0;
 	}
 	/* Reject overlong: \xC0\x81 - \xC0\xBF */
 	return 1;
-    case 0xE0:
-	if (src[1] < 0xA0) {
-	    /* Reject overlong: \xE0\x80\x80 - \xE0\x9F\xBF */
-	    return 1;
-	}
-	/* Valid sequence: \xE0\xA0\x80 for \u0800 , etc. */
-	return 0;
-#if TCL_UTF_MAX > 3
-    case 0xF0:
-	if (src[1] < 0x90) {
-	    /* Reject overlong: \xF0\x80\x80\x80 - \xF0\x8F\xBF\xBF */
-	    return 1;
-	}
-	/* Valid sequence: \xF0\x90\x80\x80 for \U10000 , etc. */
-	return 0
-#endif
-    default:
-	/* All other lead bytes lead only valid sequences */
-	return 0;
     }
+    if (src[1] < overlong[(byte >> 4) - 0x0D]) {
+	/* Reject overlong */
+	return 1;
+    }
+    return 0;
 }
 
 /*
-- 
cgit v0.12


From 2b4c279e0ba71a2727bb76740ba90ea2f800fc90 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Thu, 16 Apr 2020 18:14:55 +0000
Subject: Create and use macro TclUtfPrev for Tcl_UtfPrev.

---
 generic/tclCmdMZ.c     | 2 +-
 generic/tclInt.h       | 5 +++++
 generic/tclStringObj.c | 6 +++---
 generic/tclUtil.c      | 2 +-
 4 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index 6515d98..f6bdd3e 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -2406,7 +2406,7 @@ StringStartCmd(
 		break;
 	    }
 
-	    next = Tcl_UtfPrev(p, string);
+	    next = TclUtfPrev(p, string);
 	    do {
 		next += delta;
 		delta = TclUtfToUniChar(next, &ch);
diff --git a/generic/tclInt.h b/generic/tclInt.h
index 15bc000..3dc3d1d 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -3691,6 +3691,11 @@ MODULE_SCOPE void	TclDbInitNewObj(Tcl_Obj *objPtr, CONST char *file,
 	    ((*(chPtr) = (unsigned char) *(str)), 1)	\
 	    : Tcl_UtfToUniChar(str, chPtr))
 
+#define TclUtfPrev(src, start) \
+	(((src) < (start)+2) ? (start) : \
+	((unsigned char) *(src - 1)) < 0x80 ? (src)-1 : \
+	Tcl_UtfPrev(src, start))
+
 /*
  *----------------------------------------------------------------
  * Macro that encapsulates the logic that determines when it is safe to
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index c3c85dc..d62fc72 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -1139,10 +1139,10 @@ Tcl_AppendLimitedToObj(
 	}
 	eLen = strlen(ellipsis);
 	while (eLen > limit) {
-	    eLen = Tcl_UtfPrev(ellipsis+eLen, ellipsis) - ellipsis;
+	    eLen = TclUtfPrev(ellipsis+eLen, ellipsis) - ellipsis;
 	}
 
-	toCopy = Tcl_UtfPrev(bytes+limit+1-eLen, bytes) - bytes;
+	toCopy = TclUtfPrev(bytes+limit+1-eLen, bytes) - bytes;
     }
 
     /*
@@ -2585,7 +2585,7 @@ AppendPrintfToObjVA(
 		 * multi-byte characters.
 		 */
 
-		q = Tcl_UtfPrev(end, bytes);
+		q = TclUtfPrev(end, bytes);
 		if (!Tcl_UtfCharComplete(q, (int)(end - q))) {
 		    end = q;
 		}
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index 3dd9a32..50922bf 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -1554,7 +1554,7 @@ TclTrimRight(
 	const char *q = trim;
 	int pInc = 0, bytesLeft = numTrim;
 
-	pp = Tcl_UtfPrev(p, bytes);
+	pp = TclUtfPrev(p, bytes);
 	do {
 	    pp += pInc;
  	    pInc = TclUtfToUniChar(pp, &ch1);
-- 
cgit v0.12


From e6faa58e6df3292b2c0735ba4921af4be0e215fa Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Thu, 16 Apr 2020 18:40:40 +0000
Subject: More tests and fix for overlong handling in revised Tcl_UtfNext.

---
 generic/tclUtf.c |  3 +++
 tests/utf.test   | 23 ++++++++++++++++++++++-
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index e41e7a5..00ca94e 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -712,6 +712,9 @@ Tcl_UtfNext(
 	}
 	next++;
     }
+    if (Overlong(src)) {
+	return src + 1;
+    }
     return next;
 }
 
diff --git a/tests/utf.test b/tests/utf.test
index 72165f9..02b7002 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -352,9 +352,30 @@ test utf-6.78 {Tcl_UtfNext} testutfnext {
 test utf-6.79 {Tcl_UtfNext} testutfnext {
     testutfnext \xF4\xA0\xA0\xA0G\xF8
 } 1
-test utf-6.80 {Tcl_UtfNext - overlong sequences} {
+test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext {
+    testutfnext \xC0\x80
+} 2
+test utf-6.81 {Tcl_UtfNext - overlong sequences} testutfnext {
     testutfnext \xC0\x81
 } 1
+test utf-6.82 {Tcl_UtfNext - overlong sequences} testutfnext {
+    testutfnext \xC1\x80
+} 1
+test utf-6.83 {Tcl_UtfNext - overlong sequences} testutfnext {
+    testutfnext \xC2\x80
+} 2
+test utf-6.84 {Tcl_UtfNext - overlong sequences} testutfnext {
+    testutfnext \xE0\x80\x80
+} 1
+test utf-6.85 {Tcl_UtfNext - overlong sequences} testutfnext {
+    testutfnext \xE0\xA0\x80
+} 3
+test utf-6.86 {Tcl_UtfNext - overlong sequences} testutfnext {
+    testutfnext \xF0\x80\x80\x80
+} 1
+test utf-6.87 {Tcl_UtfNext - overlong sequences} testutfnext {
+    testutfnext \xF0\x90\x80\x80
+} 1
 
 testConstraint testutfprev [llength [info commands testutfprev]]
 
-- 
cgit v0.12


From 443928c10f1ac94e6a6adfafb478eb9fa09ac39a Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Thu, 16 Apr 2020 18:42:37 +0000
Subject: compiler warning

---
 generic/tclUtf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 00ca94e..91e9c73 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -712,7 +712,7 @@ Tcl_UtfNext(
 	}
 	next++;
     }
-    if (Overlong(src)) {
+    if (Overlong((unsigned char *)src)) {
 	return src + 1;
     }
     return next;
-- 
cgit v0.12


From 60d5424069845124e51d4032f295d913a17454a1 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Thu, 16 Apr 2020 19:02:40 +0000
Subject: More detailed comments.

---
 generic/tclUtf.c | 70 +++++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 46 insertions(+), 24 deletions(-)

diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 91e9c73..67603af 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -678,13 +678,35 @@ Tcl_UtfFindLast(
  *
  * Tcl_UtfNext --
  *
- *	Given a pointer to some current location in a UTF-8 string, move
- *	forward one character. The caller must ensure that they are not asking
- *	for the next character after the last character in the string.
+ *	The aim of this routine is to provide a way to iterate forward
+ *	through a UTF-8 string. The caller is expected to pass a non-NULL
+ *	pointer argument /src/ which points to a location within a string.
+ *	(*src) will be read, so /src/ must not point to an unreadable
+ *	location past the end of the string. If /src/ points to the
+ *	beginning of a complete, well-formed and valid UTF_8 byte sequence
+ *	of no more than TCL_UTF_MAX bytes, Tcl_UtfNext returns the pointer
+ *	just past the end of that sequence. In any other circumstance,
+ *	Tcl_UtfNext returns /src/+1.
+ *
+ *	Because this routine always returns a value > /src/, it is useful
+ *	as a forward iterator that will always make progress. If the string
+ *	is NUL-terminated, Tcl_UtfNext will not read beyond the terminating
+ *	NUL character. If it is not NUL-terminated, the caller must make
+ *	use of the companion routine Tcl_UtfCharComplete to test whether
+ *	there is risk that Tcl_UtfNext will read beyond the end of the string.
+ *	Tcl_UtfNext will never read more than TCL_UTF_MAX bytes.
+ *
+ *	In a string where all characters are complete and properly formed,
+ *	and /src/ points to the first byte of a character, repeated
+ *	Tcl_UtfNext calls will step to the starting bytes of characters, one
+ *	character at a time. Within those limitations, Tcl_UtfPrev and
+ *	Tcl_UtfNext are inverses. If either condition cannot be met,
+ *	Tcl_UtfPrev and Tcl_UtfNext may not function as inverses and the
+ *	caller will have to take greater care.
  *
  * Results:
- *	The return value is the pointer to the next character in the UTF-8
- *	string.
+ *	A pointer to the start of the next character in the string (or to
+ *	the end of the string) as described above.
  *
  * Side effects:
  *	None.
@@ -725,37 +747,37 @@ Tcl_UtfNext(
  *
  *	The aim of this routine is to provide a way to move backward
  *	through a UTF-8 string. The caller is expected to pass non-NULL
- *	pointer arguments start and src. start points to the beginning
- *	of a string, and src >= start points to a location within (or just
- *	past the end) of the string. This routine always returns a
- *	pointer within the string (>= start).  When (src == start), it
- *	returns start. When (src > start), it returns a pointer (< src)
- *	and (>= src - TCL_UTF_MAX).  Subject to these constraints, the
- *	routine returns a pointer to the earliest byte in the string that
- *	starts a character when characters are read starting at start and
+ *	pointer arguments /start/ and /src/. /start/ points to the beginning
+ *	of a string, and /src/ (>= /start/) points to a location within (or
+ *	just past the end) of the string. This routine always returns a
+ *	pointer within the string (>= /start/).  When (/src/ == /start/),
+ *	it returns /start/. When (/src/ > /start/), it returns a pointer
+ *	(< /src/) and (>= /src/ - TCL_UTF_MAX).  Subject to these constraints,
+ *	the routine returns a pointer to the earliest byte in the string that
+ *	starts a character when characters are read starting at /start/ and
  *	that character might include the byte src[-1]. The routine will
  *	examine only those bytes in the range that might be returned.
- *	It will not examine the byte *src, and because of that cannot
+ *	It will not examine the byte (*src), and because of that cannot
  *	determine for certain in all circumstances whether the character
  *	that begins with the returned pointer will or will not include
- *	the byte src[-1]. In the scenario, where src points to the end of
- *	a buffer being filled, the returned pointer point to either the
+ *	the byte src[-1]. In the scenario where /src/ points to the end of
+ *	a buffer being filled, the returned pointer points to either the
  *	final complete character in the string or to the earliest byte
  *	that might start an incomplete character waiting for more bytes to
  *	complete.
  *
- *	Because this routine always returns a value < src until the point
- *	it is forced to return start, it is useful as a backward iterator
+ *	Because this routine always returns a value < /src/ until the point
+ *	it is forced to return /start/, it is useful as a backward iterator
  *	through a string that will always make progress and always be
  *	prevented from running past the beginning of the string.
  *
  *	In a string where all characters are complete and properly formed,
- *	and the value of src points to the first byte of a character,
- *	repeated Tcl_UtfPrev calls will step to the starting bytes of
- *	characters, one character at a time. Within those limitations,
- *	Tcl_UtfPrev and Tcl_UtfNext are inverses. If either condition cannot
- *	be met, Tcl_UtfPrev and Tcl_UtfNext may not function as inverses and
- *	the caller will have to take greater care.
+ *	and /src/ points to the first byte of a character, repeated
+ *	Tcl_UtfPrev calls will step to the starting bytes of characters, one
+ *	character at a time. Within those limitations, Tcl_UtfPrev and
+ *	Tcl_UtfNext are inverses. If either condition cannot be met,
+ *	Tcl_UtfPrev and Tcl_UtfNext may not function as inverses and the
+ *	caller will have to take greater care.
  *
  * Results:
  *	A pointer to the start of a character in the string as described
-- 
cgit v0.12


From c2479a465e6bef08275d47d0277deda87e6e014e Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Thu, 16 Apr 2020 19:04:47 +0000
Subject: delete merge litter

---
 generic/tclTest.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/generic/tclTest.c b/generic/tclTest.c
index b8507bf..6e0fbed 100644
--- a/generic/tclTest.c
+++ b/generic/tclTest.c
@@ -325,7 +325,6 @@ static Tcl_FSPathInFilesystemProc SimplePathInFilesystem;
 static Tcl_Obj *	SimpleRedirect(Tcl_Obj *pathPtr);
 static Tcl_FSMatchInDirectoryProc SimpleMatchInDirectory;
 static Tcl_ObjCmdProc	TestUtfNextCmd;
-static Tcl_ObjCmdProc	TestUtfNextCmd;
 static Tcl_ObjCmdProc	TestUtfPrevCmd;
 static Tcl_ObjCmdProc	TestNumUtfCharsCmd;
 static Tcl_ObjCmdProc	TestFindFirstCmd;
-- 
cgit v0.12


From 66197fff215f60690a444b3f2af67a0c3c87c8af Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Thu, 16 Apr 2020 19:39:36 +0000
Subject: Improve the docs for Tcl_UtfNext.

---
 doc/Utf.3 | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/doc/Utf.3 b/doc/Utf.3
index 87d1318..cb82699 100644
--- a/doc/Utf.3
+++ b/doc/Utf.3
@@ -217,11 +217,20 @@ returns a pointer to the last occurrence of the Tcl_UniChar \fIch\fR
 in the null-terminated UTF-8 string \fIsrc\fR.  The null terminator is
 considered part of the UTF-8 string.  
 .PP
-Given \fIsrc\fR, a pointer to some location in a UTF-8 string,
-\fBTcl_UtfNext\fR returns a pointer to the next UTF-8 character in the
-string.  The caller must not ask for the next character after the last
-character in the string if the string is not terminated by a null
-character.
+\fBTcl_UtfNext\fR is used to step forward through a UTF-8 string.
+If the UTF-8 string is made up entirely of complete, well-formed, and
+valid character byte sequences, and \fIsrc\fR points to the lead byte
+of one of those sequences, then repeated calls of \fBTcl_UtfNext\fR will
+return pointers to the lead bytes of each character in the string, one
+character at a time. In any other circumstance, \fBTcl_UtfNext\fR
+returns \fIsrc\fR+1.  \fBTcl_UtfNext\fR will always read \fIsrc[0]\fR
+and may read as many following bytes (up to a total of \fBTCL_UTF_MAX\fR)
+as needed to find the end of the byte sequence. If the string is
+\fBNUL\fR-terminated, \fBTcl_UtfNext\fR will not read beyond the terminating
+\fBNUL\fR byte. If not, the caller must use the companion routine
+\fBTcl_UtfCharComplete\fR to determine whether there is any risk
+\fBTcl_UtfNext\fR might read beyond the readable memory occupied
+by the string.
 .PP
 \fBTcl_UtfPrev\fR is used to step backward through but not beyond the
 UTF-8 string that begins at \fIstart\fR.  If the UTF-8 string is made
-- 
cgit v0.12


From 1924a68fc604e0939f0222a3fe68aa1c35deec7a Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Thu, 16 Apr 2020 20:24:49 +0000
Subject: Add "knownBug" testcase, showing a situation in which Tcl_UtfNext
 doesn't behave as described in the documentation

---
 tests/utf.test | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tests/utf.test b/tests/utf.test
index 1b7b409..8742a5e 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -153,8 +153,13 @@ test utf-5.2 {Tcl_UtfFindLast} {testfindlast testbytestring} {
     testfindlast [testbytestring "abcbc"] 98
 } {bc}
 
+testConstraint testutfnext [llength [info commands testutfnext]]
+
 test utf-6.1 {Tcl_UtfNext} {
 } {}
+test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext knownBug} {
+    testutfnext \xE8\xA0\xA0 1
+} 3
 
 testConstraint testutfprev [llength [info commands testutfprev]]
 
-- 
cgit v0.12


From ae8350301e553be8131de6a4cc47b223fb405386 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Thu, 16 Apr 2020 20:28:15 +0000
Subject: merge litter

---
 generic/tclUtf.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 67603af..b5c430b 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -558,7 +558,7 @@ Tcl_NumUtfChars(
     int length)			/* The length of the string in bytes, or -1
 				 * for strlen(string). */
 {
-    register int i;
+    register int i = 0;
 
     /*
      * The separate implementations are faster.
@@ -567,7 +567,6 @@ Tcl_NumUtfChars(
      * single-byte char case specially.
      */
 
-    i = 0;
     if (length < 0) {
 	while ((*src != '\0') && (i < INT_MAX)) {
 	    src = TclUtfNext(src);
-- 
cgit v0.12


From d168b44ff69105f80110a01cda37279becc68dc5 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Thu, 16 Apr 2020 20:59:11 +0000
Subject: Adjust test results and implementation for Tcl 8.6 current support of
 4-byte sequences in a TCL_UTF_MAX=3 build.

---
 generic/tclUtf.c |  4 ++--
 tests/utf.test   | 14 +++++++-------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index cd57d12..d6ba15c 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -897,10 +897,10 @@ Tcl_UtfPrev(
 
 	/* Continue the search backwards... */
 	look--;
-    } while (trailBytesSeen < TCL_UTF_MAX);
+    } while (trailBytesSeen < /* was TCL_UTF_MAX */ 4);
 
     /*
-     * We've seen TCL_UTF_MAX trail bytes, so we know there will not be a
+     * We've seen 4 (was TCL_UTF_MAX) trail bytes, so we know there will not be a
      * properly formed byte sequence to find, and we can stop looking,
      * accepting the fallback.
      */
diff --git a/tests/utf.test b/tests/utf.test
index 3a2911f..76cf3fe 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -363,7 +363,7 @@ test utf-6.68 {Tcl_UtfNext} testutfnext {
 } 1
 test utf-6.69 {Tcl_UtfNext} testutfnext {
     testutfnext \xF4\xA0\xA0\xA0
-} 1
+} 4
 test utf-6.70 {Tcl_UtfNext} testutfnext {
     testutfnext \xF4\xA0\xA0\xD0
 } 1
@@ -378,22 +378,22 @@ test utf-6.73 {Tcl_UtfNext} testutfnext {
 } 1
 test utf-6.74 {Tcl_UtfNext} testutfnext {
     testutfnext \xF4\xA0\xA0\xA0G
-} 1
+} 4
 test utf-6.75 {Tcl_UtfNext} testutfnext {
     testutfnext \xF4\xA0\xA0\xA0\xA0
-} 1
+} 4
 test utf-6.76 {Tcl_UtfNext} testutfnext {
     testutfnext \xF4\xA0\xA0\xA0\xD0
-} 1
+} 4
 test utf-6.77 {Tcl_UtfNext} testutfnext {
     testutfnext \xF4\xA0\xA0\xA0\xE8
-} 1
+} 4
 test utf-6.78 {Tcl_UtfNext} testutfnext {
     testutfnext \xF4\xA0\xA0\xA0\xF4
-} 1
+} 4
 test utf-6.79 {Tcl_UtfNext} testutfnext {
     testutfnext \xF4\xA0\xA0\xA0G\xF8
-} 1
+} 4
 test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext {
     testutfnext \xC0\x80
 } 2
-- 
cgit v0.12


From 335f3c8fb761a9499b9df27bce308a6f631dc082 Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Thu, 16 Apr 2020 22:06:54 +0000
Subject: Fix more test-cases for TCL_UTF_MAX=3

---
 generic/tclUtf.c | 18 ++++++++++--------
 tests/utf.test   |  2 +-
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index d6ba15c..b37d55a 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -82,7 +82,7 @@ static const unsigned char complete[256] = {
     3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
     3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
 #endif
-    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+    2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
     3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
 #if TCL_UTF_MAX > 4
     4,4,4,4,4,
@@ -589,8 +589,9 @@ Tcl_NumUtfChars(
     int length)			/* The length of the string in bytes, or -1
 				 * for strlen(string). */
 {
-    register int i = 0;
-
+    Tcl_UniChar ch = 0;
+    int i = 0;
+ 
     /*
      * The separate implementations are faster.
      *
@@ -600,19 +601,20 @@ Tcl_NumUtfChars(
 
     if (length < 0) {
 	while ((*src != '\0') && (i < INT_MAX)) {
-	    src = TclUtfNext(src);
+	    src += Tcl_UtfToUniChar(src, &ch);
 	    i++;
 	}
+	if (i < 0) i = INT_MAX; /* Bug [2738427] */
     } else {
-	register const char *endPtr = src + length - TCL_UTF_MAX;
+	const char *endPtr = src + length - TCL_UTF_MAX;
 
 	while (src < endPtr) {
-	    src = TclUtfNext(src);
+	    src += Tcl_UtfToUniChar(src, &ch);
 	    i++;
 	}
 	endPtr += TCL_UTF_MAX;
 	while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) {
-	    src = TclUtfNext(src);
+	    src += Tcl_UtfToUniChar(src, &ch);
 	    i++;
 	}
 	if (src < endPtr) {
@@ -958,7 +960,7 @@ Tcl_UtfAtIndex(
     register const char *src,	/* The UTF-8 string. */
     register int index)		/* The position of the desired character. */
 {
-#if 0
+#if 1
 /* The Tcl 8.6 implementation */
     Tcl_UniChar ch = 0;
     int len = 0;
diff --git a/tests/utf.test b/tests/utf.test
index 76cf3fe..b5d1f5c 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -418,7 +418,7 @@ test utf-6.86 {Tcl_UtfNext - overlong sequences} testutfnext {
 test utf-6.87 {Tcl_UtfNext - overlong sequences} testutfnext {
     testutfnext \xF0\x90\x80\x80
 } 1
-test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} {
+test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext knownBug} {
     testutfnext \xE8\xA0\xA0 1
 } 3
 
-- 
cgit v0.12


From 1045aefa35184677d8bede823e91e80cc17db5ee Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Thu, 16 Apr 2020 22:19:15 +0000
Subject: Fix build for TCL_UTF_MAX=4.  Mark some failing tests with
 "knownBug". Those still need to be fixed!

---
 generic/tclUtf.c | 2 +-
 tests/utf.test   | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index b37d55a..0768992 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -972,7 +972,7 @@ Tcl_UtfAtIndex(
 #if TCL_UTF_MAX == 4
     if ((ch >= 0xD800) && (len < 3)) {
 	/* Index points at character following high Surrogate */
-	src = TclUtfToUniChar(src, &ch);
+	src += TclUtfToUniChar(src, &ch);
     }
 #endif
     return src;
diff --git a/tests/utf.test b/tests/utf.test
index b5d1f5c..a730feb 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -415,7 +415,7 @@ test utf-6.85 {Tcl_UtfNext - overlong sequences} testutfnext {
 test utf-6.86 {Tcl_UtfNext - overlong sequences} testutfnext {
     testutfnext \xF0\x80\x80\x80
 } 1
-test utf-6.87 {Tcl_UtfNext - overlong sequences} testutfnext {
+test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext knownBug} { # Doesn't work with TCL_UTF_MAX>3
     testutfnext \xF0\x90\x80\x80
 } 1
 test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext knownBug} {
@@ -628,13 +628,13 @@ test utf-7.37 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
 test utf-7.38 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
     testutfprev A\xE0\xA0\x80 2
 } 1
-test utf-7.39 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
+test utf-7.39 {Tcl_UtfPrev -- overlong sequence}  {testutfprev knownBug} { # Doesn't work with TCL_UTF_MAX>3
     testutfprev A\xF0\x90\x80\x80
 } 4
-test utf-7.40 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
+test utf-7.40 {Tcl_UtfPrev -- overlong sequence}  {testutfprev knownBug} { # Doesn't work with TCL_UTF_MAX>3
     testutfprev A\xF0\x90\x80\x80 4
 } 3
-test utf-7.41 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
+test utf-7.41 {Tcl_UtfPrev -- overlong sequence}  {testutfprev knownBug} { # Doesn't work with TCL_UTF_MAX>3
     testutfprev A\xF0\x90\x80\x80 3
 } 2
 test utf-7.42 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
-- 
cgit v0.12


From 882fcc12b24d44674254eabaacfe15be718f3b73 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Fri, 17 Apr 2020 03:54:50 +0000
Subject: Fix the bad tests utf-2.11 and utf-6.88 that expected the wrong
 results. Also reconcile the merge from 8.5 to the new decoupling of
 bytesequence counts from indexed code unit couints. Docs still need an
 update.

---
 generic/tclUtf.c | 50 ++++++++++++++++++++------------------------------
 tests/utf.test   |  4 ++--
 2 files changed, 22 insertions(+), 32 deletions(-)

diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index d6ba15c..24fd418 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -589,6 +589,7 @@ Tcl_NumUtfChars(
     int length)			/* The length of the string in bytes, or -1
 				 * for strlen(string). */
 {
+    const char *next;
     register int i = 0;
 
     /*
@@ -600,20 +601,23 @@ Tcl_NumUtfChars(
 
     if (length < 0) {
 	while ((*src != '\0') && (i < INT_MAX)) {
-	    src = TclUtfNext(src);
-	    i++;
+	    next = TclUtfNext(src);
+	    i += 1 + ((next - src) > 3);
+	    src = next;
 	}
     } else {
 	register const char *endPtr = src + length - TCL_UTF_MAX;
 
 	while (src < endPtr) {
-	    src = TclUtfNext(src);
-	    i++;
+	    next = TclUtfNext(src);
+	    i += 1 + ((next - src) > 3);
+	    src = next;
 	}
 	endPtr += TCL_UTF_MAX;
 	while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) {
-	    src = TclUtfNext(src);
-	    i++;
+	    next = TclUtfNext(src);
+	    i += 1 + ((next - src) > 3);
+	    src = next;
 	}
 	if (src < endPtr) {
 	    i += endPtr - src;
@@ -958,33 +962,19 @@ Tcl_UtfAtIndex(
     register const char *src,	/* The UTF-8 string. */
     register int index)		/* The position of the desired character. */
 {
-#if 0
-/* The Tcl 8.6 implementation */
-    Tcl_UniChar ch = 0;
-    int len = 0;
-
     while (index-- > 0) {
-	len = TclUtfToUniChar(src, &ch);
-	src += len;
-    }
-#if TCL_UTF_MAX == 4
-    if ((ch >= 0xD800) && (len < 3)) {
-	/* Index points at character following high Surrogate */
-	src = TclUtfToUniChar(src, &ch);
-    }
-#endif
-    return src;
-#else
-/* The Tcl 8.5 implementation */
-    while (index > 0) {
-        index--;
-        src = TclUtfNext(src);	/* NOTE: counts each valid byte sequence
-				 * as one character, maybe including those
-				 * that will get stored as two UCS-2 units
-				 * in the UTF-16 encoding. */
+        const char *next = TclUtfNext(src);
+
+	/*
+	 * 4-byte sequences generate two UCS-2 code units in the
+	 * UTF-16 representation, so in the current indexing scheme
+	 * we need to account for an extra index (total of two).
+	 */
+	index -= ((next - src) > 3);
+
+	src = next;
     }
     return src;
-#endif
 }
 
 /*
diff --git a/tests/utf.test b/tests/utf.test
index 76cf3fe..dd94c54 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -96,7 +96,7 @@ test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} t
 } {4}
 test utf-2.11 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, overflow} testbytestring {
     string length [testbytestring "\xF4\x90\x80\x80"]
-} {4}
+} {2}
 test utf-2.12 {Tcl_UtfToUniChar: longer UTF sequences not supported} testbytestring {
     string length [testbytestring "\xF8\xA2\xA2\xA2\xA2"]
 } {5}
@@ -420,7 +420,7 @@ test utf-6.87 {Tcl_UtfNext - overlong sequences} testutfnext {
 } 1
 test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} {
     testutfnext \xE8\xA0\xA0 1
-} 3
+} 2
 
 testConstraint testutfprev [llength [info commands testutfprev]]
 
-- 
cgit v0.12


From e1f67c4c92c08a2f8dfa465b14b877c7572822ec Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Fri, 17 Apr 2020 04:08:46 +0000
Subject: Bring the single-byte marker for invalid lead byte \xC1 into the
 complete table

---
 generic/tclUtf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 24fd418..fdf2e32 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -82,7 +82,7 @@ static const unsigned char complete[256] = {
     3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
     3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
 #endif
-    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+    2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
     3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
 #if TCL_UTF_MAX > 4
     4,4,4,4,4,
-- 
cgit v0.12


From 700b782299b5ac82c829eae5725cdc7b577390a6 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Fri, 17 Apr 2020 04:17:51 +0000
Subject: enable the tests on a bug fix branch

---
 tests/utf.test | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/utf.test b/tests/utf.test
index b89be25..2004625 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -415,7 +415,7 @@ test utf-6.85 {Tcl_UtfNext - overlong sequences} testutfnext {
 test utf-6.86 {Tcl_UtfNext - overlong sequences} testutfnext {
     testutfnext \xF0\x80\x80\x80
 } 1
-test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext knownBug} { # Doesn't work with TCL_UTF_MAX>3
+test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext} { # Doesn't work with TCL_UTF_MAX>3
     testutfnext \xF0\x90\x80\x80
 } 1
 test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} {
@@ -628,13 +628,13 @@ test utf-7.37 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
 test utf-7.38 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
     testutfprev A\xE0\xA0\x80 2
 } 1
-test utf-7.39 {Tcl_UtfPrev -- overlong sequence}  {testutfprev knownBug} { # Doesn't work with TCL_UTF_MAX>3
+test utf-7.39 {Tcl_UtfPrev -- overlong sequence}  {testutfprev} { # Doesn't work with TCL_UTF_MAX>3
     testutfprev A\xF0\x90\x80\x80
 } 4
-test utf-7.40 {Tcl_UtfPrev -- overlong sequence}  {testutfprev knownBug} { # Doesn't work with TCL_UTF_MAX>3
+test utf-7.40 {Tcl_UtfPrev -- overlong sequence}  {testutfprev} { # Doesn't work with TCL_UTF_MAX>3
     testutfprev A\xF0\x90\x80\x80 4
 } 3
-test utf-7.41 {Tcl_UtfPrev -- overlong sequence}  {testutfprev knownBug} { # Doesn't work with TCL_UTF_MAX>3
+test utf-7.41 {Tcl_UtfPrev -- overlong sequence}  {testutfprev} { # Doesn't work with TCL_UTF_MAX>3
     testutfprev A\xF0\x90\x80\x80 3
 } 2
 test utf-7.42 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
-- 
cgit v0.12


From c998be1b068916a8009a76867ed4eaed8938d19b Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Fri, 17 Apr 2020 04:45:26 +0000
Subject: When supporting 4-byte sequences, make sure the Overlong test does
 too, and make sure the test results reflect it.

---
 generic/tclUtf.c | 2 +-
 tests/utf.test   | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index fdf2e32..e637263 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -154,7 +154,7 @@ UtfCount(
 static CONST unsigned char overlong[3] = {
     0x80,	/* \xD0 -- all sequences valid */
     0xA0,	/* \xE0\x80 through \xE0\x9F are invalid prefixes */
-#if TCL_UTF_MAX > 3
+#if TCL_UTF_MAX >= 3
     0x90	/* \xF0\x80 through \xF0\x8F are invalid prefixes */
 #else
     0xC0	/* Not used, but reject all again for safety. */
diff --git a/tests/utf.test b/tests/utf.test
index 2004625..e8b1d51 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -630,13 +630,13 @@ test utf-7.38 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
 } 1
 test utf-7.39 {Tcl_UtfPrev -- overlong sequence}  {testutfprev} { # Doesn't work with TCL_UTF_MAX>3
     testutfprev A\xF0\x90\x80\x80
-} 4
+} 1
 test utf-7.40 {Tcl_UtfPrev -- overlong sequence}  {testutfprev} { # Doesn't work with TCL_UTF_MAX>3
     testutfprev A\xF0\x90\x80\x80 4
-} 3
+} 1
 test utf-7.41 {Tcl_UtfPrev -- overlong sequence}  {testutfprev} { # Doesn't work with TCL_UTF_MAX>3
     testutfprev A\xF0\x90\x80\x80 3
-} 2
+} 1
 test utf-7.42 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
     testutfprev A\xF0\x90\x80\x80 2
 } 1
-- 
cgit v0.12


From 1edc023972ada28abd9649c5e153f0705ba1268d Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Fri, 17 Apr 2020 04:51:48 +0000
Subject: more test fixes

---
 tests/utf.test | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/utf.test b/tests/utf.test
index e8b1d51..c584ea1 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -415,9 +415,9 @@ test utf-6.85 {Tcl_UtfNext - overlong sequences} testutfnext {
 test utf-6.86 {Tcl_UtfNext - overlong sequences} testutfnext {
     testutfnext \xF0\x80\x80\x80
 } 1
-test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext} { # Doesn't work with TCL_UTF_MAX>3
+test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext} {
     testutfnext \xF0\x90\x80\x80
-} 1
+} 4
 test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} {
     testutfnext \xE8\xA0\xA0 1
 } 2
@@ -628,13 +628,13 @@ test utf-7.37 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
 test utf-7.38 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
     testutfprev A\xE0\xA0\x80 2
 } 1
-test utf-7.39 {Tcl_UtfPrev -- overlong sequence}  {testutfprev} { # Doesn't work with TCL_UTF_MAX>3
+test utf-7.39 {Tcl_UtfPrev -- overlong sequence}  {testutfprev} {
     testutfprev A\xF0\x90\x80\x80
 } 1
-test utf-7.40 {Tcl_UtfPrev -- overlong sequence}  {testutfprev} { # Doesn't work with TCL_UTF_MAX>3
+test utf-7.40 {Tcl_UtfPrev -- overlong sequence}  {testutfprev} {
     testutfprev A\xF0\x90\x80\x80 4
 } 1
-test utf-7.41 {Tcl_UtfPrev -- overlong sequence}  {testutfprev} { # Doesn't work with TCL_UTF_MAX>3
+test utf-7.41 {Tcl_UtfPrev -- overlong sequence}  {testutfprev} {
     testutfprev A\xF0\x90\x80\x80 3
 } 1
 test utf-7.42 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
-- 
cgit v0.12


From ad85391b59eba72ecf8c0af3e83f800c61e52f26 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Fri, 17 Apr 2020 05:14:43 +0000
Subject: When supporting 4-byte sequences even with TCL_UTF_MAX = 3, need to
 paramterize a few things differently. (utf-4.11 failures).

---
 generic/tclUtf.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index e637263..4b5d500 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -84,11 +84,7 @@ static const unsigned char complete[256] = {
 #endif
     2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
     3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
-#if TCL_UTF_MAX > 4
     4,4,4,4,4,
-#else
-    3,3,3,3,3, /* Tcl_UtfCharComplete() only checks TCL_UTF_MAX bytes */
-#endif
     1,1,1,1,1,1,1,1,1,1,1
 };
 
@@ -606,14 +602,14 @@ Tcl_NumUtfChars(
 	    src = next;
 	}
     } else {
-	register const char *endPtr = src + length - TCL_UTF_MAX;
+	register const char *endPtr = src + length - /*TCL_UTF_MAX*/ 4;
 
 	while (src < endPtr) {
 	    next = TclUtfNext(src);
 	    i += 1 + ((next - src) > 3);
 	    src = next;
 	}
-	endPtr += TCL_UTF_MAX;
+	endPtr += /*TCL_UTF_MAX*/ 4;
 	while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) {
 	    next = TclUtfNext(src);
 	    i += 1 + ((next - src) > 3);
-- 
cgit v0.12


From b4ecc6b747780c309fbf05d525ecb3c5f74f3ff2 Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Fri, 17 Apr 2020 09:47:01 +0000
Subject: More test-cases

---
 tests/utf.test | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/tests/utf.test b/tests/utf.test
index 02b7002..7b7b5c2 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -376,6 +376,18 @@ test utf-6.86 {Tcl_UtfNext - overlong sequences} testutfnext {
 test utf-6.87 {Tcl_UtfNext - overlong sequences} testutfnext {
     testutfnext \xF0\x90\x80\x80
 } 1
+test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} {
+    testutfnext \xA0\xA0
+} 1
+test utf-6.88.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} {
+    testutfnext \xE8\xA0\xA0 1
+} 2
+test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} {
+    testutfnext \x80\x80
+} 1
+test utf-6.89.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} {
+    testutfnext \xF0\x80\x80 1
+} 2
 
 testConstraint testutfprev [llength [info commands testutfprev]]
 
@@ -460,6 +472,9 @@ test utf-7.11.1 {Tcl_UtfPrev} testutfprev {
 test utf-7.11.2 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xE8\xA0\xF8\xA0 3
 } 1
+test utf-7.11.3 {Tcl_UtfPrev} testutfprev {
+    testutfprev A\xE8\xA0\xF8 3
+} 1
 test utf-7.12 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xD0\xA0
 } 1
@@ -548,9 +563,15 @@ test utf-7.26 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
     testutfprev A\xE0\x80\x80
 } 3
 test utf-7.27 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
+    testutfprev A\xE0\x80
+} 2
+test utf-7.27.1 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
     testutfprev A\xE0\x80\x80 3
 } 2
 test utf-7.28 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
+    testutfprev A\xE0
+} 1
+test utf-7.28.1 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
     testutfprev A\xE0\x80\x80 2
 } 1
 test utf-7.29 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
@@ -607,6 +628,15 @@ test utf-7.45 {Tcl_UtfPrev -- no lead byte at start}  testutfprev {
 test utf-7.46 {Tcl_UtfPrev -- no lead byte at start}  testutfprev {
     testutfprev \xA0\xA0\xA0\xA0
 } 3
+test utf-7.47 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {testutfprev} {
+    testutfprev \xE8\xA0
+} 0
+test utf-7.47.1 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {testutfprev} {
+    testutfprev \xE8\xA0\xA0 2
+} 0
+test utf-7.47.2 {Tcl_UtfPrev, pointing to 3th byte of 3-byte invalid sequence} {testutfprev} {
+    testutfprev \xE8\xA0\x00 2
+} 0
 
 test utf-8.1 {Tcl_UniCharAtIndex: index = 0} {
     string index abcd 0
-- 
cgit v0.12


From 4de1b924a8f4f41a574d576427acfea490cd06ca Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Fri, 17 Apr 2020 11:02:19 +0000
Subject: Fix implementation of Tcl_UtfAtIndex() for TCL_UTF_MAX=6  (There's no
 test-case for this in the core-8-6-branch, but there is in core-8-branch).

---
 generic/tclUtf.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 94fa5f6..1ba474e 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -971,14 +971,16 @@ Tcl_UtfAtIndex(
     register int index)		/* The position of the desired character. */
 {
     while (index-- > 0) {
-        const char *next = TclUtfNext(src);
+	const char *next = TclUtfNext(src);
 
+#if TCL_UTF_MAX <= 4
 	/*
 	 * 4-byte sequences generate two UCS-2 code units in the
 	 * UTF-16 representation, so in the current indexing scheme
 	 * we need to account for an extra index (total of two).
 	 */
 	index -= ((next - src) > 3);
+#endif
 
 	src = next;
     }
-- 
cgit v0.12


From aea602a07ae4eba9adb21f2d11c7f96b5f632f79 Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Fri, 17 Apr 2020 12:49:32 +0000
Subject: Clean-up some unnecessary spacing.

---
 compat/unistd.h                 |  2 +-
 generic/tclCompile.h            |  2 +-
 generic/tclStubInit.c           |  2 +-
 generic/tclStubLib.c            |  2 +-
 generic/tclTomMath.h            | 10 +++++-----
 libtommath/tommath.h            | 12 ++++++------
 libtommath/tommath_superclass.h |  4 ++--
 unix/tclUnixFCmd.c              |  2 +-
 unix/tclUnixFile.c              |  2 +-
 unix/tclUnixPort.h              |  6 +++---
 unix/tclUnixThrd.h              |  2 +-
 unix/tclUnixTime.c              |  4 ++--
 unix/tclXtTest.c                |  2 +-
 win/tclWinInt.h                 | 34 +++++++++++++++++-----------------
 14 files changed, 43 insertions(+), 43 deletions(-)

diff --git a/compat/unistd.h b/compat/unistd.h
index 2de5bd0..a8f14f2 100644
--- a/compat/unistd.h
+++ b/compat/unistd.h
@@ -20,7 +20,7 @@
 #define NULL    0
 #endif
 
-/* 
+/*
  * Strict POSIX stuff goes here. Extensions go down below, in the ifndef
  * _POSIX_SOURCE section.
  */
diff --git a/generic/tclCompile.h b/generic/tclCompile.h
index 9ee60c3..ccfa4be 100644
--- a/generic/tclCompile.h
+++ b/generic/tclCompile.h
@@ -1257,7 +1257,7 @@ MODULE_SCOPE int	TclWordKnownAtCompileTime(Tcl_Token *tokenPtr,
  * If the second macro is defined, logging to file starts immediately,
  * otherwise only after the first call to [tcl::dtrace]. Note that the debug
  * probe data is always computed, even when it is not logged to file.
- * 
+ *
  * Defining the third macro enables debug logging of inst probes (disabled
  * by default due to the significant performance impact).
  */
diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c
index d6f1da9..1a83752 100644
--- a/generic/tclStubInit.c
+++ b/generic/tclStubInit.c
@@ -1,4 +1,4 @@
-/* 
+/*
  * tclStubInit.c --
  *
  *	This file contains the initializers for the Tcl stub vectors.
diff --git a/generic/tclStubLib.c b/generic/tclStubLib.c
index 31fc865..c895ffe 100644
--- a/generic/tclStubLib.c
+++ b/generic/tclStubLib.c
@@ -142,7 +142,7 @@ TclTomMathInitializeStubs(
     const char* packageName = "tcl::tommath";
     const char* errMsg = NULL;
     ClientData pkgClientData = NULL;
-    const char* actualVersion = 
+    const char* actualVersion =
 	tclStubsPtr->tcl_PkgRequireEx(interp, packageName, version, exact, &pkgClientData);
     TclTomMathStubs* stubsPtr = (TclTomMathStubs*) pkgClientData;
     if (actualVersion == NULL) {
diff --git a/generic/tclTomMath.h b/generic/tclTomMath.h
index b219405..1e455fe 100644
--- a/generic/tclTomMath.h
+++ b/generic/tclTomMath.h
@@ -80,10 +80,10 @@ extern "C" {
    #define DIGIT_BIT          60
 #else
    /* this is the default case, 28-bit digits */
-   
+
    /* this is to make porting into LibTomCrypt easier :-) */
 #ifndef CRYPT
-   #if defined(_MSC_VER) || defined(__BORLANDC__) 
+   #if defined(_MSC_VER) || defined(__BORLANDC__)
       typedef unsigned __int64   ulong64;
       typedef signed __int64     long64;
    #else
@@ -98,21 +98,21 @@ extern "C" {
 #endif
    typedef ulong64            mp_word;
 
-#ifdef MP_31BIT   
+#ifdef MP_31BIT
    /* this is an extension that uses 31-bit digits */
    #define DIGIT_BIT          31
 #else
    /* default case is 28-bit digits, defines MP_28BIT as a handy macro to test */
    #define DIGIT_BIT          28
    #define MP_28BIT
-#endif   
+#endif
 #endif
 
 /* define heap macros */
 #if 0 /* these are macros in tclTomMathDecls.h */
 #ifndef CRYPT
    /* default to libc stuff */
-   #ifndef XMALLOC 
+   #ifndef XMALLOC
        #define XMALLOC  malloc
        #define XFREE    free
        #define XREALLOC realloc
diff --git a/libtommath/tommath.h b/libtommath/tommath.h
index df460f6..babb478 100644
--- a/libtommath/tommath.h
+++ b/libtommath/tommath.h
@@ -36,7 +36,7 @@ extern "C" {
 
 
 /* detect 64-bit mode if possible */
-#if defined(__x86_64__) 
+#if defined(__x86_64__)
    #if !(defined(MP_64BIT) && defined(MP_16BIT) && defined(MP_8BIT))
       #define MP_64BIT
    #endif
@@ -69,10 +69,10 @@ extern "C" {
    #define DIGIT_BIT          60
 #else
    /* this is the default case, 28-bit digits */
-   
+
    /* this is to make porting into LibTomCrypt easier :-) */
 #ifndef CRYPT
-   #if defined(_MSC_VER) || defined(__BORLANDC__) 
+   #if defined(_MSC_VER) || defined(__BORLANDC__)
       typedef unsigned __int64   ulong64;
       typedef signed __int64     long64;
    #else
@@ -84,20 +84,20 @@ extern "C" {
    typedef unsigned long      mp_digit;
    typedef ulong64            mp_word;
 
-#ifdef MP_31BIT   
+#ifdef MP_31BIT
    /* this is an extension that uses 31-bit digits */
    #define DIGIT_BIT          31
 #else
    /* default case is 28-bit digits, defines MP_28BIT as a handy macro to test */
    #define DIGIT_BIT          28
    #define MP_28BIT
-#endif   
+#endif
 #endif
 
 /* define heap macros */
 #ifndef CRYPT
    /* default to libc stuff */
-   #ifndef XMALLOC 
+   #ifndef XMALLOC
        #define XMALLOC  malloc
        #define XFREE    free
        #define XREALLOC realloc
diff --git a/libtommath/tommath_superclass.h b/libtommath/tommath_superclass.h
index e3926df..c4ec19f 100644
--- a/libtommath/tommath_superclass.h
+++ b/libtommath/tommath_superclass.h
@@ -60,9 +60,9 @@
    #undef  BN_FAST_MP_INVMOD_C
 
    /* To safely undefine these you have to make sure your RSA key won't exceed the Comba threshold
-    * which is roughly 255 digits [7140 bits for 32-bit machines, 15300 bits for 64-bit machines] 
+    * which is roughly 255 digits [7140 bits for 32-bit machines, 15300 bits for 64-bit machines]
     * which means roughly speaking you can handle upto 2536-bit RSA keys with these defined without
-    * trouble.  
+    * trouble.
     */
    #undef  BN_S_MP_MUL_DIGS_C
    #undef  BN_S_MP_SQR_C
diff --git a/unix/tclUnixFCmd.c b/unix/tclUnixFCmd.c
index 7360c32..a582223 100644
--- a/unix/tclUnixFCmd.c
+++ b/unix/tclUnixFCmd.c
@@ -1051,7 +1051,7 @@ TraverseUnixTree(
 	unsigned short pathlen = ent->fts_pathlen - sourceLen;
 	int type;
 	Tcl_StatBuf *statBufPtr = NULL;
-	
+
 	if (info == FTS_DNR || info == FTS_ERR || info == FTS_NS) {
 	    errfile = ent->fts_path;
 	    break;
diff --git a/unix/tclUnixFile.c b/unix/tclUnixFile.c
index 038cbf8..3b09799 100644
--- a/unix/tclUnixFile.c
+++ b/unix/tclUnixFile.c
@@ -332,7 +332,7 @@ TclpMatchInDirectory(
 
 	matchHiddenPat = (pattern[0] == '.')
 		|| ((pattern[0] == '\\') && (pattern[1] == '.'));
-	matchHidden = matchHiddenPat 
+	matchHidden = matchHiddenPat
 		|| (types && (types->perm & TCL_GLOB_PERM_HIDDEN));
 	while ((entryPtr = TclOSreaddir(d)) != NULL) {	/* INTL: Native. */
 	    Tcl_DString utfDs;
diff --git a/unix/tclUnixPort.h b/unix/tclUnixPort.h
index 086dd91..0e1bce8 100644
--- a/unix/tclUnixPort.h
+++ b/unix/tclUnixPort.h
@@ -590,8 +590,8 @@ extern char **environ;
 
 /*
  *---------------------------------------------------------------------------
- * The following macros and declarations represent the interface between 
- * generic and unix-specific parts of Tcl.  Some of the macros may override 
+ * The following macros and declarations represent the interface between
+ * generic and unix-specific parts of Tcl.  Some of the macros may override
  * functions declared in tclInt.h.
  *---------------------------------------------------------------------------
  */
@@ -608,7 +608,7 @@ typedef int socklen_t;
 #endif
 
 /*
- * The following macros have trivial definitions, allowing generic code to 
+ * The following macros have trivial definitions, allowing generic code to
  * address platform-specific issues.
  */
 
diff --git a/unix/tclUnixThrd.h b/unix/tclUnixThrd.h
index 6a73132..f03b530 100644
--- a/unix/tclUnixThrd.h
+++ b/unix/tclUnixThrd.h
@@ -8,7 +8,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  */
- 
+
 #ifndef _TCLUNIXTHRD
 #define _TCLUNIXTHRD
 
diff --git a/unix/tclUnixTime.c b/unix/tclUnixTime.c
index 4860876..a6572f9 100644
--- a/unix/tclUnixTime.c
+++ b/unix/tclUnixTime.c
@@ -228,7 +228,7 @@ TclpWideClicksToNanoseconds(
 #ifdef MAC_OSX_TCL
 	static mach_timebase_info_data_t tb;
 	static uint64_t maxClicksForUInt64;
-	
+
 	if (!tb.denom) {
 	    mach_timebase_info(&tb);
 	    maxClicksForUInt64 = UINT64_MAX / tb.numer;
@@ -251,7 +251,7 @@ TclpWideClicksToNanoseconds(
  *
  * TclpWideClickInMicrosec --
  *
- *	This procedure return scale to convert click values from the 
+ *	This procedure return scale to convert click values from the
  *	TclpGetWideClicks native resolution to microsecond resolution
  *	and back.
  *
diff --git a/unix/tclXtTest.c b/unix/tclXtTest.c
index 8437f2a..1393dfe 100644
--- a/unix/tclXtTest.c
+++ b/unix/tclXtTest.c
@@ -1,4 +1,4 @@
-/* 
+/*
  * tclXtTest.c --
  *
  *	Contains commands for Xt notifier specific tests on Unix.
diff --git a/win/tclWinInt.h b/win/tclWinInt.h
index af6619f..39790a0 100644
--- a/win/tclWinInt.h
+++ b/win/tclWinInt.h
@@ -59,7 +59,7 @@ typedef struct TCLEXCEPTION_REGISTRATION {
 #endif
 
 /*
- * The following structure keeps track of whether we are using the 
+ * The following structure keeps track of whether we are using the
  * multi-byte or the wide-character interfaces to the operating system.
  * System calls should be made through the following function table.
  */
@@ -76,10 +76,10 @@ typedef struct TclWinProcs {
     TCHAR *(WINAPI *charLowerProc)(TCHAR *);
     BOOL (WINAPI *copyFileProc)(CONST TCHAR *, CONST TCHAR *, BOOL);
     BOOL (WINAPI *createDirectoryProc)(CONST TCHAR *, LPSECURITY_ATTRIBUTES);
-    HANDLE (WINAPI *createFileProc)(CONST TCHAR *, DWORD, DWORD, 
+    HANDLE (WINAPI *createFileProc)(CONST TCHAR *, DWORD, DWORD,
 	    LPSECURITY_ATTRIBUTES, DWORD, DWORD, HANDLE);
-    BOOL (WINAPI *createProcessProc)(CONST TCHAR *, TCHAR *, 
-	    LPSECURITY_ATTRIBUTES, LPSECURITY_ATTRIBUTES, BOOL, DWORD, 
+    BOOL (WINAPI *createProcessProc)(CONST TCHAR *, TCHAR *,
+	    LPSECURITY_ATTRIBUTES, LPSECURITY_ATTRIBUTES, BOOL, DWORD,
 	    LPVOID, CONST TCHAR *, LPSTARTUPINFOA, LPPROCESS_INFORMATION);
     BOOL (WINAPI *deleteFileProc)(CONST TCHAR *);
     HANDLE (WINAPI *findFirstFileProc)(CONST TCHAR *, WIN32_FIND_DATAT *);
@@ -87,35 +87,35 @@ typedef struct TclWinProcs {
     BOOL (WINAPI *getComputerNameProc)(WCHAR *, LPDWORD);
     DWORD (WINAPI *getCurrentDirectoryProc)(DWORD, WCHAR *);
     DWORD (WINAPI *getFileAttributesProc)(CONST TCHAR *);
-    DWORD (WINAPI *getFullPathNameProc)(CONST TCHAR *, DWORD nBufferLength, 
+    DWORD (WINAPI *getFullPathNameProc)(CONST TCHAR *, DWORD nBufferLength,
 	    WCHAR *, TCHAR **);
     DWORD (WINAPI *getModuleFileNameProc)(HMODULE, WCHAR *, int);
-    DWORD (WINAPI *getShortPathNameProc)(CONST TCHAR *, WCHAR *, DWORD); 
-    UINT (WINAPI *getTempFileNameProc)(CONST TCHAR *, CONST TCHAR *, UINT, 
+    DWORD (WINAPI *getShortPathNameProc)(CONST TCHAR *, WCHAR *, DWORD);
+    UINT (WINAPI *getTempFileNameProc)(CONST TCHAR *, CONST TCHAR *, UINT,
 	    WCHAR *);
     DWORD (WINAPI *getTempPathProc)(DWORD, WCHAR *);
-    BOOL (WINAPI *getVolumeInformationProc)(CONST TCHAR *, WCHAR *, DWORD, 
+    BOOL (WINAPI *getVolumeInformationProc)(CONST TCHAR *, WCHAR *, DWORD,
 	    LPDWORD, LPDWORD, LPDWORD, WCHAR *, DWORD);
     HINSTANCE (WINAPI *loadLibraryExProc)(CONST TCHAR *, HANDLE, DWORD);
     TCHAR (WINAPI *lstrcpyProc)(WCHAR *, CONST TCHAR *);
     BOOL (WINAPI *moveFileProc)(CONST TCHAR *, CONST TCHAR *);
     BOOL (WINAPI *removeDirectoryProc)(CONST TCHAR *);
-    DWORD (WINAPI *searchPathProc)(CONST TCHAR *, CONST TCHAR *, 
+    DWORD (WINAPI *searchPathProc)(CONST TCHAR *, CONST TCHAR *,
 	    CONST TCHAR *, DWORD, WCHAR *, TCHAR **);
     BOOL (WINAPI *setCurrentDirectoryProc)(CONST TCHAR *);
     BOOL (WINAPI *setFileAttributesProc)(CONST TCHAR *, DWORD);
-    /* 
+    /*
      * These two function pointers will only be set when
      * Tcl_FindExecutable is called.  If you don't ever call that
      * function, the application will crash whenever WinTcl tries to call
      * functions through these null pointers.  That is not a bug in Tcl
      * -- Tcl_FindExecutable is obligatory in recent Tcl releases.
      */
-    BOOL (WINAPI *getFileAttributesExProc)(CONST TCHAR *, 
+    BOOL (WINAPI *getFileAttributesExProc)(CONST TCHAR *,
 	    GET_FILEEX_INFO_LEVELS, LPVOID);
-    BOOL (WINAPI *createHardLinkProc)(CONST TCHAR*, CONST TCHAR*, 
+    BOOL (WINAPI *createHardLinkProc)(CONST TCHAR*, CONST TCHAR*,
 				      LPSECURITY_ATTRIBUTES);
-    
+
     /* deleted INT (__cdecl *utimeProc)(CONST TCHAR*, struct _utimbuf *); */
     /* These two are also NULL at start; see comment above */
     HANDLE (WINAPI *findFirstFileExProc)(CONST TCHAR*, UINT,
@@ -123,7 +123,7 @@ typedef struct TclWinProcs {
 					 LPVOID, DWORD);
     BOOL (WINAPI *getVolumeNameForVMPProc)(CONST TCHAR*, TCHAR*, DWORD);
     DWORD (WINAPI *getLongPathNameProc)(CONST TCHAR*, TCHAR*, DWORD);
-    /* 
+    /*
      * These six are for the security sdk to get correct file
      * permissions on NT, 2000, XP, etc.  On 95,98,ME they are
      * always null.
@@ -131,9 +131,9 @@ typedef struct TclWinProcs {
     BOOL (WINAPI *getFileSecurityProc)(LPCTSTR lpFileName,
 		     SECURITY_INFORMATION RequestedInformation,
 		     PSECURITY_DESCRIPTOR pSecurityDescriptor,
-		     DWORD nLength, 
+		     DWORD nLength,
 		     LPDWORD lpnLengthNeeded);
-    BOOL (WINAPI *impersonateSelfProc) (SECURITY_IMPERSONATION_LEVEL 
+    BOOL (WINAPI *impersonateSelfProc) (SECURITY_IMPERSONATION_LEVEL
 		      ImpersonationLevel);
     BOOL (WINAPI *openThreadTokenProc) (HANDLE ThreadHandle,
 		      DWORD DesiredAccess, BOOL OpenAsSelf,
@@ -191,7 +191,7 @@ MODULE_SCOPE HANDLE	TclWinSerialOpen(HANDLE handle, CONST TCHAR *name,
 			    DWORD access);
 MODULE_SCOPE int	TclWinSymLinkCopyDirectory(CONST TCHAR* LinkOriginal,
 			    CONST TCHAR* LinkCopy);
-MODULE_SCOPE int	TclWinSymLinkDelete(CONST TCHAR* LinkOriginal, 
+MODULE_SCOPE int	TclWinSymLinkDelete(CONST TCHAR* LinkOriginal,
 			    int linkOnly);
 #if defined(TCL_THREADS) && defined(USE_THREAD_ALLOC)
 MODULE_SCOPE void	TclWinFreeAllocCache(void);
-- 
cgit v0.12


From 54b62ddd622894072bea921ed8c3a5af0d7d3de9 Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Fri, 17 Apr 2020 13:01:35 +0000
Subject: Unbreak shared windows build. Remove some ARGSUSED usage.

---
 generic/tclTest.c  | 49 ++++++++++++++++++++-----------------------------
 unix/tclUnixChan.c |  6 ------
 unix/tclUnixPipe.c |  4 ----
 win/tclWinPipe.c   |  1 -
 win/tclWinSock.c   |  3 ---
 5 files changed, 20 insertions(+), 43 deletions(-)

diff --git a/generic/tclTest.c b/generic/tclTest.c
index 6e0fbed..8c29aa7 100644
--- a/generic/tclTest.c
+++ b/generic/tclTest.c
@@ -457,12 +457,6 @@ Tcltest_Init(
 	"-appinitprocclosestderr", "-appinitprocsetrcfile", NULL
     };
 
-    if (Tcl_InitStubs(interp, "8.5", 0) == NULL) {
-	return TCL_ERROR;
-    }
-    if (Tcl_TomMath_InitStubs(interp, "8.5") == NULL) {
-	return TCL_ERROR;
-    }
     /* TIP #268: Full patchlevel instead of just major.minor */
 
     if (Tcl_PkgProvide(interp, "Tcltest", TCL_PATCH_LEVEL) == TCL_ERROR) {
@@ -683,7 +677,6 @@ Tcltest_Init(
  *----------------------------------------------------------------------
  */
 
-	/* ARGSUSED */
 static int
 TestasyncCmd(
     ClientData dummy,			/* Not used. */
@@ -913,7 +906,7 @@ AsyncThreadProc(
  *----------------------------------------------------------------------
  */
 
-	/* ARGSUSED */
+
 static int
 TestcmdinfoCmd(
     ClientData dummy,		/* Not used. */
@@ -985,7 +978,6 @@ TestcmdinfoCmd(
     return TCL_OK;
 }
 
-	/*ARGSUSED*/
 static int
 CmdProc1(
     ClientData clientData,	/* String to return. */
@@ -997,7 +989,6 @@ CmdProc1(
     return TCL_OK;
 }
 
-	/*ARGSUSED*/
 static int
 CmdProc2(
     ClientData clientData,	/* String to return. */
@@ -1044,7 +1035,7 @@ CmdDelProc2(
  *----------------------------------------------------------------------
  */
 
-	/* ARGSUSED */
+
 static int
 TestcmdtokenCmd(
     ClientData dummy,		/* Not used. */
@@ -1108,7 +1099,7 @@ TestcmdtokenCmd(
  *----------------------------------------------------------------------
  */
 
-	/* ARGSUSED */
+
 static int
 TestcmdtraceCmd(
     ClientData dummy,		/* Not used. */
@@ -1398,7 +1389,7 @@ CreatedCommandProc2(
  *----------------------------------------------------------------------
  */
 
-	/* ARGSUSED */
+
 static int
 TestdcallCmd(
     ClientData dummy,		/* Not used. */
@@ -1463,7 +1454,7 @@ DelCallbackProc(
  *----------------------------------------------------------------------
  */
 
-	/* ARGSUSED */
+
 static int
 TestdelCmd(
     ClientData dummy,		/* Not used. */
@@ -1668,7 +1659,7 @@ TestdoubledigitsObjCmd(ClientData unused,
  *----------------------------------------------------------------------
  */
 
-	/* ARGSUSED */
+
 static int
 TestdstringCmd(
     ClientData dummy,		/* Not used. */
@@ -1795,7 +1786,7 @@ static void SpecialFree(blockPtr)
  *----------------------------------------------------------------------
  */
 
-	/* ARGSUSED */
+
 static int
 TestencodingObjCmd(
     ClientData dummy,		/* Not used. */
@@ -2650,7 +2641,7 @@ TestgetplatformCmd(
  *----------------------------------------------------------------------
  */
 
-	/* ARGSUSED */
+
 static int
 TestinterpdeleteCmd(
     ClientData dummy,		/* Not used. */
@@ -2691,7 +2682,7 @@ TestinterpdeleteCmd(
  *----------------------------------------------------------------------
  */
 
-	/* ARGSUSED */
+
 static int
 TestlinkCmd(
     ClientData dummy,		/* Not used. */
@@ -3222,7 +3213,7 @@ TestlocaleCmd(
  *----------------------------------------------------------------------
  */
 
-	/* ARGSUSED */
+
 static int
 TestMathFunc(
     ClientData clientData,	/* Integer value to return. */
@@ -3252,7 +3243,7 @@ TestMathFunc(
  *----------------------------------------------------------------------
  */
 
-	/* ARGSUSED */
+
 static int
 TestMathFunc2(
     ClientData clientData,	/* Integer value to return. */
@@ -3359,7 +3350,7 @@ TestMathFunc2(
  *
  *----------------------------------------------------------------------
  */
-	/* ARGSUSED */
+
 static void
 CleanupTestSetassocdataTests(
     ClientData clientData,	/* Data to be released. */
@@ -3693,7 +3684,7 @@ TestparsevarnameObjCmd(
  *----------------------------------------------------------------------
  */
 
-	/* ARGSUSED */
+
 static int
 TestregexpObjCmd(
     ClientData dummy,		/* Not used. */
@@ -4019,7 +4010,7 @@ TestregexpXflags(
  *----------------------------------------------------------------------
  */
 
-	/* ARGSUSED */
+
 static int
 TestreturnObjCmd(
     ClientData dummy,		/* Not used. */
@@ -4241,7 +4232,7 @@ TesttranslatefilenameCmd(
  *----------------------------------------------------------------------
  */
 
-	/* ARGSUSED */
+
 static int
 TestupvarCmd(
     ClientData dummy,		/* Not used. */
@@ -4294,7 +4285,7 @@ TestupvarCmd(
  *----------------------------------------------------------------------
  */
 
-	/* ARGSUSED */
+
 static int
 TestseterrorcodeCmd(
     ClientData dummy,		/* Not used. */
@@ -4347,7 +4338,7 @@ TestseterrorcodeCmd(
  *----------------------------------------------------------------------
  */
 
-	/* ARGSUSED */
+
 static int
 TestsetobjerrorcodeCmd(
     ClientData dummy,		/* Not used. */
@@ -4376,7 +4367,7 @@ TestsetobjerrorcodeCmd(
  *----------------------------------------------------------------------
  */
 
-	/* ARGSUSED */
+
 static int
 TestfeventCmd(
     ClientData clientData,	/* Not used. */
@@ -5768,7 +5759,7 @@ TestOpenFileChannelProc3(
  *----------------------------------------------------------------------
  */
 
-	/* ARGSUSED */
+
 static int
 TestChannelCmd(
     ClientData clientData,	/* Not used. */
@@ -6237,7 +6228,7 @@ TestChannelCmd(
  *----------------------------------------------------------------------
  */
 
-	/* ARGSUSED */
+
 static int
 TestChannelEventCmd(
     ClientData dummy,		/* Not used. */
diff --git a/unix/tclUnixChan.c b/unix/tclUnixChan.c
index 9cac4ae..d3207e2 100644
--- a/unix/tclUnixChan.c
+++ b/unix/tclUnixChan.c
@@ -344,7 +344,6 @@ static Tcl_ChannelType tcpChannelType = {
  *----------------------------------------------------------------------
  */
 
-	/* ARGSUSED */
 static int
 FileBlockModeProc(
     ClientData instanceData,	/* File state. */
@@ -1840,7 +1839,6 @@ Tcl_MakeFileChannel(
  *----------------------------------------------------------------------
  */
 
-	/* ARGSUSED */
 static int
 TcpBlockModeProc(
     ClientData instanceData,	/* Socket state. */
@@ -1938,7 +1936,6 @@ WaitForConnect(
  *----------------------------------------------------------------------
  */
 
-	/* ARGSUSED */
 static int
 TcpInputProc(
     ClientData instanceData,	/* Socket state. */
@@ -2033,7 +2030,6 @@ TcpOutputProc(
  *----------------------------------------------------------------------
  */
 
-	/* ARGSUSED */
 static int
 TcpCloseProc(
     ClientData instanceData,	/* The socket to close. */
@@ -2331,7 +2327,6 @@ TcpWatchProc(
  *----------------------------------------------------------------------
  */
 
-	/* ARGSUSED */
 static int
 TcpGetHandleProc(
     ClientData instanceData,	/* The socket state. */
@@ -2853,7 +2848,6 @@ Tcl_OpenTcpServer(
  *----------------------------------------------------------------------
  */
 
-	/* ARGSUSED */
 static void
 TcpAccept(
     ClientData data,		/* Callback token. */
diff --git a/unix/tclUnixPipe.c b/unix/tclUnixPipe.c
index d0a5e53..f2a2daa 100644
--- a/unix/tclUnixPipe.c
+++ b/unix/tclUnixPipe.c
@@ -410,7 +410,6 @@ TclpCloseFile(
  *---------------------------------------------------------------------------
  */
 
-    /* ARGSUSED */
 int
 TclpCreateProcess(
     Tcl_Interp *interp,		/* Interpreter in which to leave errors that
@@ -889,7 +888,6 @@ TclGetAndDetachPids(
  *----------------------------------------------------------------------
  */
 
-	/* ARGSUSED */
 static int
 PipeBlockModeProc(
     ClientData instanceData,	/* Pipe state. */
@@ -933,7 +931,6 @@ PipeBlockModeProc(
  *----------------------------------------------------------------------
  */
 
-	/* ARGSUSED */
 static int
 PipeCloseProc(
     ClientData instanceData,	/* The pipe to close. */
@@ -1231,7 +1228,6 @@ Tcl_WaitPid(
  *----------------------------------------------------------------------
  */
 
-	/* ARGSUSED */
 int
 Tcl_PidObjCmd(
     ClientData dummy,		/* Not used. */
diff --git a/win/tclWinPipe.c b/win/tclWinPipe.c
index e33273b..cdb955f 100644
--- a/win/tclWinPipe.c
+++ b/win/tclWinPipe.c
@@ -2782,7 +2782,6 @@ TclWinAddProcess(
  *----------------------------------------------------------------------
  */
 
-	/* ARGSUSED */
 int
 Tcl_PidObjCmd(
     ClientData dummy,		/* Not used. */
diff --git a/win/tclWinSock.c b/win/tclWinSock.c
index 8f565b9..882aa4a 100644
--- a/win/tclWinSock.c
+++ b/win/tclWinSock.c
@@ -358,7 +358,6 @@ InitSockets(void)
  *----------------------------------------------------------------------
  */
 
-    /* ARGSUSED */
 static int
 SocketsEnabled(void)
 {
@@ -387,7 +386,6 @@ SocketsEnabled(void)
  *----------------------------------------------------------------------
  */
 
-    /* ARGSUSED */
 static void
 SocketExitHandler(
     ClientData clientData)		/* Not used. */
@@ -786,7 +784,6 @@ TcpBlockProc(
  *----------------------------------------------------------------------
  */
 
-    /* ARGSUSED */
 static int
 TcpCloseProc(
     ClientData instanceData,	/* The socket to close. */
-- 
cgit v0.12


From d569ea2c8be5fab1d16b51b2bdb862b5fe5a9283 Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Fri, 17 Apr 2020 13:31:06 +0000
Subject: Add mem-debug build to Travis. Update Xcode from 8 to 8.3, 9 to 9.2
 and 11.3 to 11.4

---
 .travis.yml | 59 +++++++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 49 insertions(+), 10 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 02e3102..e10ca7c 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,6 +1,5 @@
 sudo: false
 language: c
-
 matrix:
   include:
 # Testing on Linux with various compilers
@@ -24,6 +23,13 @@ matrix:
       env:
         - BUILD_DIR=unix
         - CFGOPT="--enable-symbols"
+    - name: "Linux/GCC/Mem-Debug"
+      os: linux
+      dist: bionic
+      compiler: gcc
+      env:
+        - BUILD_DIR=unix
+        - CFGOPT="--enable-symbols=mem"
 # Older versions of GCC...
     - name: "Linux/GCC 7/Shared"
       os: linux
@@ -82,15 +88,17 @@ matrix:
       env:
         - BUILD_DIR=unix
         - CFGOPT="--enable-symbols"
-# Testing on Mac, various styles
-    - name: "macOS/Xcode 11.3/Shared/Unix-like"
-      os: osx
-      osx_image: xcode11.3
+    - name: "Linux/Clang/Mem-Debug"
+      os: linux
+      dist: bionic
+      compiler: clang
       env:
         - BUILD_DIR=unix
-    - name: "macOS/Xcode 11.3/Shared"
+        - CFGOPT="--enable-symbols=mem"
+# Testing on Mac, various styles
+    - name: "macOS/Xcode 11.4/Shared"
       os: osx
-      osx_image: xcode11.3
+      osx_image: xcode11.4
       env:
         - BUILD_DIR=macosx
       install: []
@@ -98,6 +106,12 @@ matrix:
         - make all
         # The styles=develop avoids some weird problems on OSX
         - make test styles=develop
+    - name: "macOS/Xcode 11.4/Shared/Unix-like"
+      os: osx
+      osx_image: xcode11.4
+      env:
+        - BUILD_DIR=unix
+# Older MacOS versions
     - name: "macOS/Xcode 11/Shared"
       os: osx
       osx_image: xcode11
@@ -114,14 +128,14 @@ matrix:
       script: *mactest
     - name: "macOS/Xcode 9/Shared"
       os: osx
-      osx_image: xcode9
+      osx_image: xcode9.2
       env:
         - BUILD_DIR=macosx
       install: []
       script: *mactest
     - name: "macOS/Xcode 8/Shared"
       os: osx
-      osx_image: xcode8
+      osx_image: xcode8.3
       env:
         - BUILD_DIR=macosx
       install: []
@@ -197,6 +211,15 @@ matrix:
       script:
         - cmd.exe //C vcvarsall.bat x64 '&&' nmake 'OPTS=symbols,threads' '-f' makefile.vc all tcltest
         - cmd.exe //C vcvarsall.bat x64 '&&' nmake 'OPTS=symbols,threads' '-f' makefile.vc test
+    - name: "Windows/MSVC/Mem-Debug"
+      os: windows
+      compiler: cl
+      env: *vcenv
+      before_install: *vcpreinst
+      install: []
+      script:
+        - cmd.exe //C vcvarsall.bat x64 '&&' nmake 'STATS=memdbg OPTS=threads' '-f' makefile.vc all tcltest
+        - cmd.exe //C vcvarsall.bat x64 '&&' nmake 'STATS=memdbg OPTS=threads' '-f' makefile.vc test
 # Test on Windows with MSVC native (32-bit)
     - name: "Windows/MSVC-x86/Shared"
       os: windows
@@ -225,6 +248,15 @@ matrix:
       script:
         - cmd.exe //C vcvarsall.bat x86 '&&' nmake 'OPTS=symbols,threads' '-f' makefile.vc all tcltest
         - cmd.exe //C vcvarsall.bat x86 '&&' nmake 'OPTS=symbols,threads' '-f' makefile.vc test
+    - name: "Windows/MSVC-x86/Mem-Debug"
+      os: windows
+      compiler: cl
+      env: *vcenv
+      before_install: *vcpreinst
+      install: []
+      script:
+        - cmd.exe //C vcvarsall.bat x86 '&&' nmake 'STATS=memdbg OPTS=threads' '-f' makefile.vc all tcltest
+        - cmd.exe //C vcvarsall.bat x86 '&&' nmake 'STATS=memdbg OPTS=threads' '-f' makefile.vc test
 # Test on Windows with GCC native
     - name: "Windows/GCC/Shared"
       os: windows
@@ -233,7 +265,7 @@ matrix:
         - BUILD_DIR=win
         - CFGOPT="--enable-64bit --enable-threads"
       before_install: &makepreinst
-        - choco install make
+        - choco install -y make
         - cd ${BUILD_DIR}
     - name: "Windows/GCC/Static"
       os: windows
@@ -271,6 +303,13 @@ matrix:
         - BUILD_DIR=win
         - CFGOPT="--enable-threads --enable-symbols"
       before_install: *makepreinst
+    - name: "Windows/GCC-x86/Mem-Debug"
+      os: windows
+      compiler: gcc
+      env:
+        - BUILD_DIR=win
+        - CFGOPT="--enable-threads --enable-symbols=mem"
+      before_install: *makepreinst
 before_install:
   - cd ${BUILD_DIR}
 install:
-- 
cgit v0.12


From 4b0626ddd7e1f7450781deb2508d94a98c8db93d Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Fri, 17 Apr 2020 16:56:51 +0000
Subject: Bring back the test utf-2.11; it fails in a TCL_UTF_MAX=4 build.

---
 tests/utf.test | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tests/utf.test b/tests/utf.test
index 7b7b5c2..a22dafe 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -13,6 +13,8 @@ if {[lsearch [namespace children] ::tcltest] == -1} {
     namespace import -force ::tcltest::*
 }
 
+testConstraint testbytestring [llength [info commands testbytestring]]
+
 catch {unset x}
 
 test utf-1.1 {Tcl_UniCharToUtf: 1 byte sequences} {
@@ -59,6 +61,12 @@ test utf-2.8 {Tcl_UtfToUniChar: longer UTF sequences not supported} {
     string length [bytestring "\xF4\xA2\xA2\xA2"]
 } {4}
 
+test utf-2.11 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, invalid} testbytestring {
+    # Would decode to U+110000 but that is outside the Unicode range.
+    string length [testbytestring "\xF4\x90\x80\x80"]
+} {4}
+
+
 test utf-3.1 {Tcl_UtfCharComplete} {
 } {}
 
-- 
cgit v0.12


From 1e5043ff451573bf735f6aec84208af7f0c24cc2 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Fri, 17 Apr 2020 20:15:07 +0000
Subject: Backport a collection of tests for consistency between branches.

---
 tests/utf.test | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/tests/utf.test b/tests/utf.test
index a22dafe..ff4f4a9 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -57,15 +57,22 @@ test utf-2.6 {Tcl_UtfToUniChar: lead (3-byte) followed by 1 trail} {
 test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} {
     string length [bytestring "\xE4\xb9\x8e"]
 } {1}
-test utf-2.8 {Tcl_UtfToUniChar: longer UTF sequences not supported} {
-    string length [bytestring "\xF4\xA2\xA2\xA2"]
+test utf-2.8 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring} -body {
+    string length [testbytestring "\xF0\x90\x80\x80"]
+} -result {4}
+test utf-2.9 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring} -body {
+    string length [testbytestring "\xF4\x8F\xBF\xBF"]
+} -result {4}
+test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} testbytestring {
+    string length [testbytestring "\xF0\x8F\xBF\xBF"]
 } {4}
-
-test utf-2.11 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, invalid} testbytestring {
+test utf-2.11 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, overflow} testbytestring {
     # Would decode to U+110000 but that is outside the Unicode range.
     string length [testbytestring "\xF4\x90\x80\x80"]
 } {4}
-
+test utf-2.12 {Tcl_UtfToUniChar: longer UTF sequences not supported} testbytestring {
+    string length [testbytestring "\xF8\xA2\xA2\xA2\xA2"]
+} {5}
 
 test utf-3.1 {Tcl_UtfCharComplete} {
 } {}
-- 
cgit v0.12


From 7067acad796b0536c589101c3f61fbae9fd268aa Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Fri, 17 Apr 2020 20:23:49 +0000
Subject: Corrections for many tests, changing lead byte \xF4 to \xF2. The
 tested sequences were always intended to be valid 4-byte sequences. Also a
 few errors with greedy \xHHHH .

---
 tests/utf.test | 92 +++++++++++++++++++++++++++++-----------------------------
 1 file changed, 46 insertions(+), 46 deletions(-)

diff --git a/tests/utf.test b/tests/utf.test
index ff4f4a9..7953a68 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -149,7 +149,7 @@ test utf-6.6 {Tcl_UtfNext} testutfnext {
     testutfnext A\xE8
 } 1
 test utf-6.7 {Tcl_UtfNext} testutfnext {
-    testutfnext A\xF4
+    testutfnext A\xF2
 } 1
 test utf-6.8 {Tcl_UtfNext} testutfnext {
     testutfnext A\xF8
@@ -170,7 +170,7 @@ test utf-6.13 {Tcl_UtfNext} testutfnext {
     testutfnext \xA0\xE8
 } 1
 test utf-6.14 {Tcl_UtfNext} testutfnext {
-    testutfnext \xA0\xF4
+    testutfnext \xA0\xF2
 } 1
 test utf-6.15 {Tcl_UtfNext} testutfnext {
     testutfnext \xA0\xF8
@@ -179,7 +179,7 @@ test utf-6.16 {Tcl_UtfNext} testutfnext {
     testutfnext \xD0
 } 1
 test utf-6.17 {Tcl_UtfNext} testutfnext {
-    testutfnext \xD0A
+    testutfnext \xD0G
 } 1
 test utf-6.18 {Tcl_UtfNext} testutfnext {
     testutfnext \xD0\xA0
@@ -191,7 +191,7 @@ test utf-6.20 {Tcl_UtfNext} testutfnext {
     testutfnext \xD0\xE8
 } 1
 test utf-6.21 {Tcl_UtfNext} testutfnext {
-    testutfnext \xD0\xF4
+    testutfnext \xD0\xF2
 } 1
 test utf-6.22 {Tcl_UtfNext} testutfnext {
     testutfnext \xD0\xF8
@@ -200,7 +200,7 @@ test utf-6.23 {Tcl_UtfNext} testutfnext {
     testutfnext \xE8
 } 1
 test utf-6.24 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8A
+    testutfnext \xE8G
 } 1
 test utf-6.25 {Tcl_UtfNext} testutfnext {
     testutfnext \xE8\xA0
@@ -212,37 +212,37 @@ test utf-6.27 {Tcl_UtfNext} testutfnext {
     testutfnext \xE8\xE8
 } 1
 test utf-6.28 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8\xF4
+    testutfnext \xE8\xF2
 } 1
 test utf-6.29 {Tcl_UtfNext} testutfnext {
     testutfnext \xE8\xF8
 } 1
 test utf-6.30 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4
+    testutfnext \xF2
 } 1
 test utf-6.31 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4A
+    testutfnext \xF2G
 } 1
 test utf-6.32 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0
+    testutfnext \xF2\xA0
 } 1
 test utf-6.33 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xD0
+    testutfnext \xF2\xD0
 } 1
 test utf-6.34 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xE8
+    testutfnext \xF2\xE8
 } 1
 test utf-6.35 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xF4
+    testutfnext \xF2\xF2
 } 1
 test utf-6.36 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xF8
+    testutfnext \xF2\xF8
 } 1
 test utf-6.37 {Tcl_UtfNext} testutfnext {
     testutfnext \xF8
 } 1
 test utf-6.38 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF8A
+    testutfnext \xF8G
 } 1
 test utf-6.39 {Tcl_UtfNext} testutfnext {
     testutfnext \xF8\xA0
@@ -254,7 +254,7 @@ test utf-6.41 {Tcl_UtfNext} testutfnext {
     testutfnext \xF8\xE8
 } 1
 test utf-6.42 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF8\xF4
+    testutfnext \xF8\xF2
 } 1
 test utf-6.43 {Tcl_UtfNext} testutfnext {
     testutfnext \xF8\xF8
@@ -272,7 +272,7 @@ test utf-6.47 {Tcl_UtfNext} testutfnext {
     testutfnext \xD0\xA0\xE8
 } 2
 test utf-6.48 {Tcl_UtfNext} testutfnext {
-    testutfnext \xD0\xA0\xF4
+    testutfnext \xD0\xA0\xF2
 } 2
 test utf-6.49 {Tcl_UtfNext} testutfnext {
     testutfnext \xD0\xA0\xF8
@@ -290,28 +290,28 @@ test utf-6.53 {Tcl_UtfNext} testutfnext {
     testutfnext \xE8\xA0\xE8
 } 1
 test utf-6.54 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8\xA0\xF4
+    testutfnext \xE8\xA0\xF2
 } 1
 test utf-6.55 {Tcl_UtfNext} testutfnext {
     testutfnext \xE8\xA0\xF8
 } 1
 test utf-6.56 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0G
+    testutfnext \xF2\xA0G
 } 1
 test utf-6.57 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0\xA0
+    testutfnext \xF2\xA0\xA0
 } 1
 test utf-6.58 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0\xD0
+    testutfnext \xF2\xA0\xD0
 } 1
 test utf-6.59 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0\xE8
+    testutfnext \xF2\xA0\xE8
 } 1
 test utf-6.60 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0\xF4
+    testutfnext \xF2\xA0\xF2
 } 1
 test utf-6.61 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0\xF8
+    testutfnext \xF2\xA0\xF8
 } 1
 test utf-6.62 {Tcl_UtfNext} testutfnext {
     testutfnext \xE8\xA0\xA0G
@@ -326,46 +326,46 @@ test utf-6.65 {Tcl_UtfNext} testutfnext {
     testutfnext \xE8\xA0\xA0\xE8
 } 3
 test utf-6.66 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8\xA0\xA0\xF4
+    testutfnext \xE8\xA0\xA0\xF2
 } 3
 test utf-6.67 {Tcl_UtfNext} testutfnext {
     testutfnext \xE8\xA0\xA0\xF8
 } 3
 test utf-6.68 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0\xA0G
+    testutfnext \xF2\xA0\xA0G
 } 1
 test utf-6.69 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0\xA0\xA0
+    testutfnext \xF2\xA0\xA0\xA0
 } 1
 test utf-6.70 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0\xA0\xD0
+    testutfnext \xF2\xA0\xA0\xD0
 } 1
 test utf-6.71 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0\xA0\xE8
+    testutfnext \xF2\xA0\xA0\xE8
 } 1
 test utf-6.71 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0\xA0\xF4
+    testutfnext \xF2\xA0\xA0\xF2
 } 1
 test utf-6.73 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0\xA0\xF8
+    testutfnext \xF2\xA0\xA0\xF8
 } 1
 test utf-6.74 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0\xA0\xA0G
+    testutfnext \xF2\xA0\xA0\xA0G
 } 1
 test utf-6.75 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0\xA0\xA0\xA0
+    testutfnext \xF2\xA0\xA0\xA0\xA0
 } 1
 test utf-6.76 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0\xA0\xA0\xD0
+    testutfnext \xF2\xA0\xA0\xA0\xD0
 } 1
 test utf-6.77 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0\xA0\xA0\xE8
+    testutfnext \xF2\xA0\xA0\xA0\xE8
 } 1
 test utf-6.78 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0\xA0\xA0\xF4
+    testutfnext \xF2\xA0\xA0\xA0\xF2
 } 1
 test utf-6.79 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0\xA0\xA0G\xF8
+    testutfnext \xF2\xA0\xA0\xA0G\xF8
 } 1
 test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext {
     testutfnext \xC0\x80
@@ -425,13 +425,13 @@ test utf-7.4.2 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xF8\xF8\xA0\xA0 2
 } 1
 test utf-7.5 {Tcl_UtfPrev} testutfprev {
-    testutfprev A\xF4
+    testutfprev A\xF2
 } 1
 test utf-7.5.1 {Tcl_UtfPrev} testutfprev {
-    testutfprev A\xF4\xA0\xA0\xA0 2
+    testutfprev A\xF2\xA0\xA0\xA0 2
 } 1
 test utf-7.5.2 {Tcl_UtfPrev} testutfprev {
-    testutfprev A\xF4\xF8\xA0\xA0 2
+    testutfprev A\xF2\xF8\xA0\xA0 2
 } 1
 test utf-7.6 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xE8
@@ -470,13 +470,13 @@ test utf-7.9.2 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xF8\xA0\xF8\xA0 3
 } 2
 test utf-7.10 {Tcl_UtfPrev} testutfprev {
-    testutfprev A\xF4\xA0
+    testutfprev A\xF2\xA0
 } 2
 test utf-7.10.1 {Tcl_UtfPrev} testutfprev {
-    testutfprev A\xF4\xA0\xA0\xA0 3
+    testutfprev A\xF2\xA0\xA0\xA0 3
 } 2
 test utf-7.10.2 {Tcl_UtfPrev} testutfprev {
-    testutfprev A\xF4\xA0\xF8\xA0 3
+    testutfprev A\xF2\xA0\xF8\xA0 3
 } 2
 test utf-7.11 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xE8\xA0
@@ -518,13 +518,13 @@ test utf-7.14.2 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xF8\xA0\xA0\xF8 4
 } 3
 test utf-7.15 {Tcl_UtfPrev} testutfprev {
-    testutfprev A\xF4\xA0\xA0
+    testutfprev A\xF2\xA0\xA0
 } 3
 test utf-7.15.1 {Tcl_UtfPrev} testutfprev {
-    testutfprev A\xF4\xA0\xA0\xA0 4
+    testutfprev A\xF2\xA0\xA0\xA0 4
 } 3
 test utf-7.15.2 {Tcl_UtfPrev} testutfprev {
-    testutfprev A\xF4\xA0\xA0\xF8 4
+    testutfprev A\xF2\xA0\xA0\xF8 4
 } 3
 test utf-7.16 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xE8\xA0\xA0
@@ -557,7 +557,7 @@ test utf-7.19 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xF8\xA0\xA0\xA0
 } 4
 test utf-7.20 {Tcl_UtfPrev} testutfprev {
-    testutfprev A\xF4\xA0\xA0\xA0
+    testutfprev A\xF2\xA0\xA0\xA0
 } 4
 test utf-7.21 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xE8\xA0\xA0\xA0
-- 
cgit v0.12


From e19b1f1306ccd36f01270f8594f9315dbdf39846 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Fri, 17 Apr 2020 20:38:38 +0000
Subject: [493dccc2de] Coverage that Tcl_UtfPrev also checks the upper range
 validity.

---
 tests/utf.test | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/tests/utf.test b/tests/utf.test
index 7953a68..6d87928 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -652,6 +652,30 @@ test utf-7.47.1 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {te
 test utf-7.47.2 {Tcl_UtfPrev, pointing to 3th byte of 3-byte invalid sequence} {testutfprev} {
     testutfprev \xE8\xA0\x00 2
 } 0
+test utf-7.48 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
+    testutfprev A\xF4\x8F\xBF\xBF
+} 4
+test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
+    testutfprev A\xF4\x8F\xBF\xBF 4
+} 3
+test utf-7.48.2 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
+    testutfprev A\xF4\x8F\xBF\xBF 3
+} 2
+test utf-7.48.3 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
+    testutfprev A\xF4\x8F\xBF\xBF 2
+} 1
+test utf-7.49 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
+    testutfprev A\xF4\x90\x80\x80
+} 4
+test utf-7.49.1 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
+    testutfprev A\xF4\x90\x80\x80 4
+} 3
+test utf-7.49.2 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
+    testutfprev A\xF4\x90\x80\x80 3
+} 2
+test utf-7.49.3 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
+    testutfprev A\xF4\x90\x80\x80 2
+} 1
 
 test utf-8.1 {Tcl_UniCharAtIndex: index = 0} {
     string index abcd 0
-- 
cgit v0.12


From 4520aa1ca30a7b09dc9cfc4bc9007aa262793711 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Fri, 17 Apr 2020 21:03:30 +0000
Subject: More tests explicitly for Tcl_UtfNext near validity boundary U+110000

---
 tests/utf.test | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tests/utf.test b/tests/utf.test
index 6d87928..01e0bb2 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -403,6 +403,12 @@ test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {te
 test utf-6.89.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} {
     testutfnext \xF0\x80\x80 1
 } 2
+test utf-6.90 {Tcl_UtfNext, validity check [493dccc2de]} testutfnext {
+    testutfnext \xF4\x8F\xBF\xBF
+} 1
+test utf-6.91 {Tcl_UtfNext, validity check [493dccc2de]} testutfnext {
+    testutfnext \xF4\x90\x80\x80
+} 1
 
 testConstraint testutfprev [llength [info commands testutfprev]]
 
-- 
cgit v0.12


From aa9bb7f9e401573bc8c79e8336fdb74636b2702f Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Fri, 17 Apr 2020 21:07:09 +0000
Subject: [493dccc2de] Revise sequence validity check to reject out of range
 decodes too.

---
 generic/tclUtf.c | 53 +++++++++++++++++++++++++++--------------------------
 1 file changed, 27 insertions(+), 26 deletions(-)

diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index b5c430b..1883804 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -81,7 +81,7 @@ static CONST unsigned char totalBytes[256] = {
  */
 
 static int		UtfCount(int ch);
-static int		Overlong(unsigned char *src);
+static int		Invalid(unsigned char *src);
 
 /*
  *---------------------------------------------------------------------------
@@ -120,51 +120,52 @@ UtfCount(
 /*
  *---------------------------------------------------------------------------
  *
- * Overlong --
+ * Invalid --
  *
  *	Utility routine to report whether /src/ points to the start of an
- *	overlong byte sequence that should be rejected. Caller guarantees
- *	that src[0] and src[1] are readable, and
+ *	invald byte sequence that should be rejected. This might be because
+ *	it is an overlong encoding, or because it encodes something out of
+ *	the proper range. Caller guarantees that src[0] and src[1] are
+ *	readable, and
  *
  *	(src[0] >= 0xC0) && (src[0] != 0xC1)
  * 	(src[1] >= 0x80) && (src[1] < 0xC0)
- *	(src[0] < ((TCL_UTF_MAX > 3) ? 0xF8 : 0xF0))
+ *	(src[0] < ((TCL_UTF_MAX > 3) ? 0xF5 : 0xF0))
  *
  * Results:
  *	A boolean.
  *---------------------------------------------------------------------------
  */
 
-static CONST unsigned char overlong[3] = {
-    0x80,	/* \xD0 -- all sequences valid */
-    0xA0,	/* \xE0\x80 through \xE0\x9F are invalid prefixes */
+static CONST unsigned char bounds[28] = {
+    0x80, 0x80,		/* \xC0 accepts \x80 only */
+    0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF,
+    0x80, 0xBF,		/* (\xC4 - \xDC) -- all sequences valid */
+    0xA0, 0xBF,	/* \xE0\x80 through \xE0\x9F are invalid prefixes */
+    0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, /* (\xE4 - \xEC) -- all valid */
 #if TCL_UTF_MAX > 3
-    0x90	/* \xF0\x80 through \xF0\x8F are invalid prefixes */
+    0x90, 0xBF,	/* \xF0\x80 through \xF0\x8F are invalid prefixes */
+    0x80, 0x8F  /* \xF4\x90 and higher are invalid prefixes */
 #else
-    0xC0	/* Not used, but reject all again for safety. */
+    0xC0, 0xBF,	/* Not used, but reject all again for safety. */
+    0xC0, 0xBF	/* Not used, but reject all again for safety. */
 #endif
 };
 
 INLINE static int
-Overlong(
+Invalid(
     unsigned char *src)	/* Points to lead byte of a UTF-8 byte sequence */
 {
     unsigned char byte = *src;
+    int index;
 
-    if (byte % 0x10) {
-	/* Only lead bytes 0xC0, 0xE0, 0xF0 need examination */
+    if (byte % 0x04) {
+	/* Only lead bytes 0xC0, 0xE0, 0xF0, 0xF4 need examination */
 	return 0;
     }
-    if (byte == 0xC0) {
-	if (src[1] == 0x80) {
-	    /* Valid sequence: \xC0\x80 for \u0000 */
-	    return 0;
-	}
-	/* Reject overlong: \xC0\x81 - \xC0\xBF */
-	return 1;
-    }
-    if (src[1] < overlong[(byte >> 4) - 0x0D]) {
-	/* Reject overlong */
+    index = (byte - 0xC0) >> 1;
+    if (src[1] < bounds[index] || src[1] > bounds[index+1]) {
+	/* Out of bounds - report invalid. */
 	return 1;
     }
     return 0;
@@ -733,7 +734,7 @@ Tcl_UtfNext(
 	}
 	next++;
     }
-    if (Overlong((unsigned char *)src)) {
+    if (Invalid((unsigned char *)src)) {
 	return src + 1;
     }
     return next;
@@ -843,10 +844,10 @@ Tcl_UtfPrev(
 
 	    /*
 	     * trailBytesSeen > 0, so we can examine look[1] safely.
-	     * Use that capability to screen out overlong sequences.
+	     * Use that capability to screen out invalid sequences.
 	     */
 
-	    if (Overlong(look)) {
+	    if (Invalid(look)) {
 		/* Reject */
 		return fallback;
 	    }
-- 
cgit v0.12


From ec1723eeadcf9efe52b0f81a65d683dff9b160c5 Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Sat, 18 Apr 2020 12:46:54 +0000
Subject: Update documentation of Tcl_UtfPrev/Tcl_UtfNext back to how it was.
 Will be updated later, when implementation is ready and agreed upon.

---
 doc/Utf.3        | 37 +++++++++++------------------
 generic/tclUtf.c | 72 ++++++++++++++++++++------------------------------------
 2 files changed, 39 insertions(+), 70 deletions(-)

diff --git a/doc/Utf.3 b/doc/Utf.3
index cb82699..334fa6f 100644
--- a/doc/Utf.3
+++ b/doc/Utf.3
@@ -3,7 +3,7 @@
 '\"
 '\" See the file "license.terms" for information on usage and redistribution
 '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES.
-'\" 
+'\"
 .TH Utf 3 "8.1" Tcl "Tcl Library Procedures"
 .so man.macros
 .BS
@@ -13,7 +13,7 @@ Tcl_UniChar, Tcl_UniCharToUtf, Tcl_UtfToUniChar, Tcl_UniCharToUtfDString, Tcl_Ut
 .nf
 \fB#include <tcl.h>\fR
 .sp
-typedef ... Tcl_UniChar;
+typedef ... \fBTcl_UniChar\fR;
 .sp
 int
 \fBTcl_UniCharToUtf\fR(\fIch, buf\fR)
@@ -48,7 +48,7 @@ int
 int
 \fBTcl_UtfCharComplete\fR(\fIsrc, length\fR)
 .sp
-int 
+int
 \fBTcl_NumUtfChars\fR(\fIsrc, length\fR)
 .sp
 const char *
@@ -109,7 +109,7 @@ Pointer to the beginning of a UTF-8 string.
 .AP int index in
 The index of a character (not byte) in the UTF-8 string.
 .AP int *readPtr out
-If non-NULL, filled with the number of bytes in the backslash sequence, 
+If non-NULL, filled with the number of bytes in the backslash sequence,
 including the backslash character.
 .AP char *dst out
 Buffer in which the bytes represented by the backslash sequence are stored.
@@ -141,8 +141,8 @@ source buffer is long enough such that this routine does not run off the
 end and dereference non-existent or random memory; if the source buffer
 is known to be null-terminated, this will not happen.  If the input is
 not in proper UTF-8 format, \fBTcl_UtfToUniChar\fR will store the first
-byte of \fIsrc\fR in \fI*chPtr\fR as a Tcl_UniChar between 0x0000 and
-0x00FF and return 1.  
+byte of \fIsrc\fR in \fI*chPtr\fR as a Tcl_UniChar between 0x0080 and
+0x00FF and return 1.
 .PP
 \fBTcl_UniCharToUtfDString\fR converts the given Unicode string
 to UTF-8, storing the result in a previously initialized \fBTcl_DString\fR.
@@ -210,27 +210,18 @@ length is negative, all bytes up to the first null byte are used.
 \fBTcl_UtfFindFirst\fR corresponds to \fBstrchr\fR for UTF-8 strings.  It
 returns a pointer to the first occurrence of the Tcl_UniChar \fIch\fR
 in the null-terminated UTF-8 string \fIsrc\fR.  The null terminator is
-considered part of the UTF-8 string.  
+considered part of the UTF-8 string.
 .PP
 \fBTcl_UtfFindLast\fR corresponds to \fBstrrchr\fR for UTF-8 strings.  It
 returns a pointer to the last occurrence of the Tcl_UniChar \fIch\fR
 in the null-terminated UTF-8 string \fIsrc\fR.  The null terminator is
-considered part of the UTF-8 string.  
+considered part of the UTF-8 string.
 .PP
-\fBTcl_UtfNext\fR is used to step forward through a UTF-8 string.
-If the UTF-8 string is made up entirely of complete, well-formed, and
-valid character byte sequences, and \fIsrc\fR points to the lead byte
-of one of those sequences, then repeated calls of \fBTcl_UtfNext\fR will
-return pointers to the lead bytes of each character in the string, one
-character at a time. In any other circumstance, \fBTcl_UtfNext\fR
-returns \fIsrc\fR+1.  \fBTcl_UtfNext\fR will always read \fIsrc[0]\fR
-and may read as many following bytes (up to a total of \fBTCL_UTF_MAX\fR)
-as needed to find the end of the byte sequence. If the string is
-\fBNUL\fR-terminated, \fBTcl_UtfNext\fR will not read beyond the terminating
-\fBNUL\fR byte. If not, the caller must use the companion routine
-\fBTcl_UtfCharComplete\fR to determine whether there is any risk
-\fBTcl_UtfNext\fR might read beyond the readable memory occupied
-by the string.
+Given \fIsrc\fR, a pointer to some location in a UTF-8 string,
+\fBTcl_UtfNext\fR returns a pointer to the next UTF-8 character in the
+string.  The caller must not ask for the next character after the last
+character in the string if the string is not terminated by a null
+character.
 .PP
 \fBTcl_UtfPrev\fR is used to step backward through but not beyond the
 UTF-8 string that begins at \fIstart\fR.  If the UTF-8 string is made
@@ -262,7 +253,7 @@ characters.  Behavior is undefined if a negative \fIindex\fR is given.
 .PP
 \fBTcl_UtfAtIndex\fR returns a pointer to the specified character (not
 byte) \fIindex\fR in the UTF-8 string \fIsrc\fR.  The source string must
-contain at least \fIindex\fR characters.  This is equivalent to calling 
+contain at least \fIindex\fR characters.  This is equivalent to calling
 \fBTcl_UtfNext\fR \fIindex\fR times.  If a negative \fIindex\fR is given,
 the return pointer points to the first character in the source string.
 .PP
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 1883804..64ee0a8 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -678,35 +678,13 @@ Tcl_UtfFindLast(
  *
  * Tcl_UtfNext --
  *
- *	The aim of this routine is to provide a way to iterate forward
- *	through a UTF-8 string. The caller is expected to pass a non-NULL
- *	pointer argument /src/ which points to a location within a string.
- *	(*src) will be read, so /src/ must not point to an unreadable
- *	location past the end of the string. If /src/ points to the
- *	beginning of a complete, well-formed and valid UTF_8 byte sequence
- *	of no more than TCL_UTF_MAX bytes, Tcl_UtfNext returns the pointer
- *	just past the end of that sequence. In any other circumstance,
- *	Tcl_UtfNext returns /src/+1.
- *
- *	Because this routine always returns a value > /src/, it is useful
- *	as a forward iterator that will always make progress. If the string
- *	is NUL-terminated, Tcl_UtfNext will not read beyond the terminating
- *	NUL character. If it is not NUL-terminated, the caller must make
- *	use of the companion routine Tcl_UtfCharComplete to test whether
- *	there is risk that Tcl_UtfNext will read beyond the end of the string.
- *	Tcl_UtfNext will never read more than TCL_UTF_MAX bytes.
- *
- *	In a string where all characters are complete and properly formed,
- *	and /src/ points to the first byte of a character, repeated
- *	Tcl_UtfNext calls will step to the starting bytes of characters, one
- *	character at a time. Within those limitations, Tcl_UtfPrev and
- *	Tcl_UtfNext are inverses. If either condition cannot be met,
- *	Tcl_UtfPrev and Tcl_UtfNext may not function as inverses and the
- *	caller will have to take greater care.
+ *	Given a pointer to some current location in a UTF-8 string, move
+ *	forward one character. The caller must ensure that they are not asking
+ *	for the next character after the last character in the string.
  *
  * Results:
- *	A pointer to the start of the next character in the string (or to
- *	the end of the string) as described above.
+ *	The return value is the pointer to the next character in the UTF-8
+ *	string.
  *
  * Side effects:
  *	None.
@@ -747,37 +725,37 @@ Tcl_UtfNext(
  *
  *	The aim of this routine is to provide a way to move backward
  *	through a UTF-8 string. The caller is expected to pass non-NULL
- *	pointer arguments /start/ and /src/. /start/ points to the beginning
- *	of a string, and /src/ (>= /start/) points to a location within (or
- *	just past the end) of the string. This routine always returns a
- *	pointer within the string (>= /start/).  When (/src/ == /start/),
- *	it returns /start/. When (/src/ > /start/), it returns a pointer
- *	(< /src/) and (>= /src/ - TCL_UTF_MAX).  Subject to these constraints,
- *	the routine returns a pointer to the earliest byte in the string that
- *	starts a character when characters are read starting at /start/ and
+ *	pointer arguments start and src. start points to the beginning
+ *	of a string, and src >= start points to a location within (or just
+ *	past the end) of the string. This routine always returns a
+ *	pointer within the string (>= start).  When (src == start), it
+ *	returns start. When (src > start), it returns a pointer (< src)
+ *	and (>= src - TCL_UTF_MAX).  Subject to these constraints, the
+ *	routine returns a pointer to the earliest byte in the string that
+ *	starts a character when characters are read starting at start and
  *	that character might include the byte src[-1]. The routine will
  *	examine only those bytes in the range that might be returned.
- *	It will not examine the byte (*src), and because of that cannot
+ *	It will not examine the byte *src, and because of that cannot
  *	determine for certain in all circumstances whether the character
  *	that begins with the returned pointer will or will not include
- *	the byte src[-1]. In the scenario where /src/ points to the end of
- *	a buffer being filled, the returned pointer points to either the
+ *	the byte src[-1]. In the scenario, where src points to the end of
+ *	a buffer being filled, the returned pointer point to either the
  *	final complete character in the string or to the earliest byte
  *	that might start an incomplete character waiting for more bytes to
  *	complete.
  *
- *	Because this routine always returns a value < /src/ until the point
- *	it is forced to return /start/, it is useful as a backward iterator
+ *	Because this routine always returns a value < src until the point
+ *	it is forced to return start, it is useful as a backward iterator
  *	through a string that will always make progress and always be
  *	prevented from running past the beginning of the string.
  *
  *	In a string where all characters are complete and properly formed,
- *	and /src/ points to the first byte of a character, repeated
- *	Tcl_UtfPrev calls will step to the starting bytes of characters, one
- *	character at a time. Within those limitations, Tcl_UtfPrev and
- *	Tcl_UtfNext are inverses. If either condition cannot be met,
- *	Tcl_UtfPrev and Tcl_UtfNext may not function as inverses and the
- *	caller will have to take greater care.
+ *	and the value of src points to the first byte of a character,
+ *	repeated Tcl_UtfPrev calls will step to the starting bytes of
+ *	characters, one character at a time. Within those limitations,
+ *	Tcl_UtfPrev and Tcl_UtfNext are inverses. If either condition cannot
+ *	be met, Tcl_UtfPrev and Tcl_UtfNext may not function as inverses and
+ *	the caller will have to take greater care.
  *
  * Results:
  *	A pointer to the start of a character in the string as described
@@ -887,7 +865,7 @@ Tcl_UtfPrev(
  *
  * Tcl_UniCharAtIndex --
  *
- *	Returns the Unicode character represented at the specified character
+ *	Returns the Tcl_UniChar represented at the specified character
  *	(not byte) position in the UTF-8 string.
  *
  * Results:
-- 
cgit v0.12


From 6f00fef31d332688308f392fd5df4cab98d05161 Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Sat, 18 Apr 2020 13:47:06 +0000
Subject: Fix [c574e50a3b30e76f]: CRASH: utf-2.[89] in 8.5 built with
 TCL_UTF_MAX=4

---
 generic/regcustom.h |   2 +-
 generic/tcl.h       |   2 +-
 generic/tclUtf.c    |  84 +----------------
 tests/utf.test      | 259 ++++++++++++++++++++++++++--------------------------
 tests/util.test     |   1 +
 5 files changed, 132 insertions(+), 216 deletions(-)

diff --git a/generic/regcustom.h b/generic/regcustom.h
index 57a2d47..ac33087 100644
--- a/generic/regcustom.h
+++ b/generic/regcustom.h
@@ -97,7 +97,7 @@ typedef int celt;		/* Type to hold chr, or NOCELT */
 #define	NOCELT (-1)		/* Celt value which is not valid chr */
 #define	CHR(c) (UCHAR(c))	/* Turn char literal into chr literal */
 #define	DIGITVAL(c) ((c)-'0')	/* Turn chr digit into its value */
-#if TCL_UTF_MAX > 4
+#if TCL_UTF_MAX > 3
 #define	CHRBITS	32		/* Bits in a chr; must not use sizeof */
 #define	CHR_MIN	0x00000000	/* Smallest and largest chr; the value */
 #define	CHR_MAX	0xffffffff	/* CHR_MAX-CHR_MIN+1 should fit in uchr */
diff --git a/generic/tcl.h b/generic/tcl.h
index 7378a8f..d7d064c 100644
--- a/generic/tcl.h
+++ b/generic/tcl.h
@@ -2148,7 +2148,7 @@ typedef struct Tcl_Parse {
  * reflected in regcustom.h.
  */
 
-#if TCL_UTF_MAX > 4
+#if TCL_UTF_MAX > 3
     /*
      * unsigned int isn't 100% accurate as it should be a strict 4-byte value
      * (perhaps wchar_t). 64-bit systems may have troubles. The size of this
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 64ee0a8..3741d70 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -209,30 +209,6 @@ Tcl_UniCharToUtf(
 	    return 2;
 	}
 	if (ch <= 0xFFFF) {
-#if TCL_UTF_MAX == 4
-	    if ((ch & 0xF800) == 0xD800) {
-		if (ch & 0x0400) {
-		    /* Low surrogate */
-		    if (((buf[0] & 0xF8) == 0xF0) && ((buf[1] & 0xC0) == 0x80)
-			    && ((buf[2] & 0xCF) == 0)) {
-			/* Previous Tcl_UniChar was a High surrogate, so combine */
-			buf[3] = (char) ((ch & 0x3F) | 0x80);
-			buf[2] |= (char) (((ch >> 6) & 0x0F) | 0x80);
-			return 4;
-		    }
-		    /* Previous Tcl_UniChar was not a High surrogate, so just output */
-		} else {
-		    /* High surrogate */
-		    ch += 0x40;
-		    /* Fill buffer with specific 3-byte (invalid) byte combination,
-		       so following Low surrogate can recognize it and combine */
-		    buf[2] = (char) ((ch << 4) & 0x30);
-		    buf[1] = (char) (((ch >> 2) & 0x3F) | 0x80);
-		    buf[0] = (char) (((ch >> 8) & 0x07) | 0xF0);
-		    return 0;
-		}
-	    }
-#endif
 	    goto three;
 	}
 
@@ -321,15 +297,6 @@ Tcl_UniCharToUtfDString(
  *	Tcl_UtfCharComplete() before calling this routine to ensure that
  *	enough bytes remain in the string.
  *
- *	If TCL_UTF_MAX == 4, special handling of Surrogate pairs is done:
- *	For any UTF-8 string containing a character outside of the BMP, the
- *	first call to this function will fill *chPtr with the high surrogate
- *	and generate a return value of 0. Calling Tcl_UtfToUniChar again
- *	will produce the low surrogate and a return value of 4. Because *chPtr
- *	is used to remember whether the high surrogate is already produced, it
- *	is recommended to initialize the variable it points to as 0 before
- *	the first call to Tcl_UtfToUniChar is done.
- *
  * Results:
  *	*chPtr is filled with the Tcl_UniChar, and the return value is the
  *	number of bytes from the UTF-8 string that were consumed.
@@ -402,34 +369,15 @@ Tcl_UtfToUniChar(
 	    /*
 	     * Four-byte-character lead byte followed by three trail bytes.
 	     */
-#if TCL_UTF_MAX == 4
-	    Tcl_UniChar surrogate;
-
-	    byte = (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12)
-		    | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F)) - 0x10000;
-	    surrogate = (Tcl_UniChar) (0xD800 + (byte >> 10));
-	    if (byte & 0x100000) {
-		/* out of range, < 0x10000 or > 0x10ffff */
-	    } else if (*chPtr != surrogate) {
-		/* produce high surrogate, but don't advance source pointer */
-		*chPtr = surrogate;
-		return 0;
-	    } else {
-		/* produce low surrogate, and advance source pointer */
-		*chPtr = (Tcl_UniChar) (0xDC00 | (byte & 0x3FF));
-		return 4;
-	    }
-#else
 	    *chPtr = (Tcl_UniChar) (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12)
 		    | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F));
 	    if ((unsigned)(*chPtr - 0x10000) <= 0xFFFFF) {
 		return 4;
 	    }
-#endif
 	}
 
 	/*
-	 * A four-byte-character lead-byte not followed by two trail-bytes
+	 * A four-byte-character lead-byte not followed by three trail-bytes
 	 * represents itself.
 	 */
     }
@@ -1230,16 +1178,6 @@ Tcl_UtfNcmp(
 	cs += TclUtfToUniChar(cs, &ch1);
 	ct += TclUtfToUniChar(ct, &ch2);
 	if (ch1 != ch2) {
-#if TCL_UTF_MAX == 4
-	    /* Surrogates always report higher than non-surrogates */
-	    if (((ch1 & 0xFC00) == 0xD800)) {
-	    if ((ch2 & 0xFC00) != 0xD800) {
-		return ch1;
-	    }
-	    } else if ((ch2 & 0xFC00) == 0xD800) {
-		return -ch2;
-	    }
-#endif
 	    return (ch1 - ch2);
 	}
     }
@@ -1280,16 +1218,6 @@ Tcl_UtfNcasecmp(
 	cs += TclUtfToUniChar(cs, &ch1);
 	ct += TclUtfToUniChar(ct, &ch2);
 	if (ch1 != ch2) {
-#if TCL_UTF_MAX == 4
-	    /* Surrogates always report higher than non-surrogates */
-	    if (((ch1 & 0xFC00) == 0xD800)) {
-	    if ((ch2 & 0xFC00) != 0xD800) {
-		return ch1;
-	    }
-	    } else if ((ch2 & 0xFC00) == 0xD800) {
-		return -ch2;
-	    }
-#endif
 	    ch1 = Tcl_UniCharToLower(ch1);
 	    ch2 = Tcl_UniCharToLower(ch2);
 	    if (ch1 != ch2) {
@@ -1329,16 +1257,6 @@ TclUtfCasecmp(
 	cs += TclUtfToUniChar(cs, &ch1);
 	ct += TclUtfToUniChar(ct, &ch2);
 	if (ch1 != ch2) {
-#if TCL_UTF_MAX == 4
-	    /* Surrogates always report higher than non-surrogates */
-	    if (((ch1 & 0xFC00) == 0xD800)) {
-	    if ((ch2 & 0xFC00) != 0xD800) {
-		return ch1;
-	    }
-	    } else if ((ch2 & 0xFC00) == 0xD800) {
-		return -ch2;
-	    }
-#endif
 	    ch1 = Tcl_UniCharToLower(ch1);
 	    ch2 = Tcl_UniCharToLower(ch2);
 	    if (ch1 != ch2) {
diff --git a/tests/utf.test b/tests/utf.test
index 01e0bb2..9a55729 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -13,54 +13,61 @@ if {[lsearch [namespace children] ::tcltest] == -1} {
     namespace import -force ::tcltest::*
 }
 
+testConstraint compat85 [expr {[format %c 0x010000] == "\uFFFD"}]
 testConstraint testbytestring [llength [info commands testbytestring]]
+testConstraint testfindfirst [llength [info commands testfindfirst]]
+testConstraint testfindlast [llength [info commands testfindlast]]
+testConstraint testnumutfchars [llength [info commands testnumutfchars]]
+testConstraint teststringobj [llength [info commands teststringobj]]
+testConstraint testutfnext [llength [info commands testutfnext]]
+testConstraint testutfprev [llength [info commands testutfprev]]
 
 catch {unset x}
 
-test utf-1.1 {Tcl_UniCharToUtf: 1 byte sequences} {
+test utf-1.1 {Tcl_UniCharToUtf: 1 byte sequences} testbytestring {
     set x \x01
-} [bytestring "\x01"]
-test utf-1.2 {Tcl_UniCharToUtf: 2 byte sequences} {
+} [testbytestring "\x01"]
+test utf-1.2 {Tcl_UniCharToUtf: 2 byte sequences} testbytestring {
     set x "\x00"
-} [bytestring "\xc0\x80"]
-test utf-1.3 {Tcl_UniCharToUtf: 2 byte sequences} {
+} [testbytestring "\xC0\x80"]
+test utf-1.3 {Tcl_UniCharToUtf: 2 byte sequences} testbytestring {
     set x "\xe0"
-} [bytestring "\xc3\xa0"]
-test utf-1.4 {Tcl_UniCharToUtf: 3 byte sequences} {
-    set x "\u4e4e"
-} [bytestring "\xe4\xb9\x8e"]
-test utf-1.5 {Tcl_UniCharToUtf: overflowed Tcl_UniChar} {
+} [testbytestring "\xC3\xA0"]
+test utf-1.4 {Tcl_UniCharToUtf: 3 byte sequences} testbytestring {
+    set x "\u4E4E"
+} [testbytestring "\xE4\xB9\x8E"]
+test utf-1.5 {Tcl_UniCharToUtf: overflowed Tcl_UniChar} testbytestring {
     format %c 0x110000
-} [bytestring "\xef\xbf\xbd"]
-test utf-1.6 {Tcl_UniCharToUtf: negative Tcl_UniChar} {
+} [testbytestring "\xEF\xBF\xBD"]
+test utf-1.6 {Tcl_UniCharToUtf: negative Tcl_UniChar} testbytestring {
     format %c -1
-} [bytestring "\xef\xbf\xbd"]
+} [testbytestring "\xEF\xBF\xBD"]
 
 test utf-2.1 {Tcl_UtfToUniChar: low ascii} {
     string length "abc"
 } {3}
-test utf-2.2 {Tcl_UtfToUniChar: naked trail bytes} {
-    string length [bytestring "\x82\x83\x84"]
+test utf-2.2 {Tcl_UtfToUniChar: naked trail bytes} testbytestring {
+    string length [testbytestring "\x82\x83\x84"]
 } {3}
-test utf-2.3 {Tcl_UtfToUniChar: lead (2-byte) followed by non-trail} {
-    string length [bytestring "\xC2"]
+test utf-2.3 {Tcl_UtfToUniChar: lead (2-byte) followed by non-trail} testbytestring {
+    string length [testbytestring "\xC2"]
 } {1}
-test utf-2.4 {Tcl_UtfToUniChar: lead (2-byte) followed by trail} {
-    string length [bytestring "\xC2\xa2"]
+test utf-2.4 {Tcl_UtfToUniChar: lead (2-byte) followed by trail} testbytestring {
+    string length [testbytestring "\xC2\xA2"]
 } {1}
-test utf-2.5 {Tcl_UtfToUniChar: lead (3-byte) followed by non-trail} {
-    string length [bytestring "\xE2"]
+test utf-2.5 {Tcl_UtfToUniChar: lead (3-byte) followed by non-trail} testbytestring {
+    string length [testbytestring "\xE2"]
 } {1}
-test utf-2.6 {Tcl_UtfToUniChar: lead (3-byte) followed by 1 trail} {
-    string length [bytestring "\xE2\xA2"]
+test utf-2.6 {Tcl_UtfToUniChar: lead (3-byte) followed by 1 trail} testbytestring {
+    string length [testbytestring "\xE2\xA2"]
 } {2}
-test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} {
-    string length [bytestring "\xE4\xb9\x8e"]
+test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} testbytestring {
+    string length [testbytestring "\xE4\xb9\x8E"]
 } {1}
-test utf-2.8 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring} -body {
+test utf-2.8 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring compat85} -body {
     string length [testbytestring "\xF0\x90\x80\x80"]
 } -result {4}
-test utf-2.9 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring} -body {
+test utf-2.9 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring compat85} -body {
     string length [testbytestring "\xF4\x8F\xBF\xBF"]
 } -result {4}
 test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} testbytestring {
@@ -77,57 +84,51 @@ test utf-2.12 {Tcl_UtfToUniChar: longer UTF sequences not supported} testbytestr
 test utf-3.1 {Tcl_UtfCharComplete} {
 } {}
 
-testConstraint testnumutfchars [llength [info commands testnumutfchars]]
-testConstraint testfindfirst [llength [info commands testfindfirst]]
-testConstraint testfindlast [llength [info commands testfindlast]]
-
 test utf-4.1 {Tcl_NumUtfChars: zero length} testnumutfchars {
     testnumutfchars ""
 } {0}
-test utf-4.2 {Tcl_NumUtfChars: length 1} testnumutfchars {
-    testnumutfchars [bytestring "\xC2\xA2"]
+test utf-4.2 {Tcl_NumUtfChars: length 1} {testnumutfchars testbytestring} {
+    testnumutfchars [testbytestring "\xC2\xA2"]
 } {1}
-test utf-4.3 {Tcl_NumUtfChars: long string} testnumutfchars {
-    testnumutfchars [bytestring "abc\xC2\xA2\xe4\xb9\x8e\uA2\u4e4e"]
+test utf-4.3 {Tcl_NumUtfChars: long string} {testnumutfchars testbytestring} {
+    testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8e\uA2\u4E4E"]
 } {7}
-test utf-4.4 {Tcl_NumUtfChars: #u0000} testnumutfchars {
-    testnumutfchars [bytestring "\xC0\x80"]
+test utf-4.4 {Tcl_NumUtfChars: #u0000} {testnumutfchars testbytestring} {
+    testnumutfchars [testbytestring "\xC0\x80"]
 } {1}
 test utf-4.5 {Tcl_NumUtfChars: zero length, calc len} testnumutfchars {
     testnumutfchars "" 0
 } {0}
-test utf-4.6 {Tcl_NumUtfChars: length 1, calc len} testnumutfchars {
-    testnumutfchars [bytestring "\xC2\xA2"] 1
+test utf-4.6 {Tcl_NumUtfChars: length 1, calc len} {testnumutfchars testbytestring} {
+    testnumutfchars [testbytestring "\xC2\xA2"] 1
 } {1}
-test utf-4.7 {Tcl_NumUtfChars: long string, calc len} testnumutfchars {
-    testnumutfchars [bytestring "abc\xC2\xA2\xe4\xb9\x8e\uA2\u4e4e"] 10
+test utf-4.7 {Tcl_NumUtfChars: long string, calc len} {testnumutfchars testbytestring} {
+    testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\uA2\u4E4E"] 10
 } {7}
-test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} testnumutfchars {
-    testnumutfchars [bytestring "\xC0\x80"] 1
+test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} {testnumutfchars testbytestring} {
+    testnumutfchars [testbytestring "\xC0\x80"] 1
 } {1}
 # Bug [2738427]: Tcl_NumUtfChars(...) no overflow check
-test utf-4.9 {Tcl_NumUtfChars: #u20AC, calc len, incomplete} testnumutfchars {
-    testnumutfchars [bytestring "\xE2\x82\xAC"] 2
+test utf-4.9 {Tcl_NumUtfChars: #u20AC, calc len, incomplete} {testnumutfchars testbytestring} {
+    testnumutfchars [testbytestring "\xE2\x82\xAC"] 2
 } {2}
-test utf-4.10 {Tcl_NumUtfChars: #u0000, calc len, overcomplete} testnumutfchars {
-    testnumutfchars [bytestring "\x00"] 2
+test utf-4.10 {Tcl_NumUtfChars: #u0000, calc len, overcomplete} {testnumutfchars testbytestring} {
+    testnumutfchars [testbytestring "\x00"] 2
 } {2}
-test utf-4.11 {Tcl_NumUtfChars: 3 bytes of 4-byte UTF-8 characater} testnumutfchars {
-    testnumutfchars [bytestring \xf0\x9f\x92\xa9] 3
+test utf-4.11 {Tcl_NumUtfChars: 3 bytes of 4-byte UTF-8 characater} {testnumutfchars testbytestring} {
+    testnumutfchars [testbytestring \xf0\x9f\x92\xA9] 3
 } {3}
-test utf-4.12 {Tcl_NumUtfChars: #4-byte UTF-8 character} testnumutfchars {
-    testnumutfchars [bytestring \xf0\x9f\x92\xa9] 4
+test utf-4.12 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring compat85} {
+    testnumutfchars [testbytestring \xf0\x9f\x92\xA9] 4
 } {4}
 
-test utf-5.1 {Tcl_UtfFindFirst} testfindfirst {
-    testfindfirst [bytestring "abcbc"] 98
+test utf-5.1 {Tcl_UtfFindFirst} {testfindfirst testbytestring} {
+    testfindfirst [testbytestring "abcbc"] 98
 } {bcbc}
-test utf-5.2 {Tcl_UtfFindLast} testfindlast {
-    testfindlast [bytestring "abcbc"] 98
+test utf-5.2 {Tcl_UtfFindLast} {testfindlast testbytestring} {
+    testfindlast [testbytestring "abcbc"] 98
 } {bc}
 
-testConstraint testutfnext [llength [info commands testutfnext]]
-
 test utf-6.1 {Tcl_UtfNext} testutfnext {
     # This takes the pointer one past the terminating NUL.
     # This is really an invalid call.
@@ -334,7 +335,7 @@ test utf-6.67 {Tcl_UtfNext} testutfnext {
 test utf-6.68 {Tcl_UtfNext} testutfnext {
     testutfnext \xF2\xA0\xA0G
 } 1
-test utf-6.69 {Tcl_UtfNext} testutfnext {
+test utf-6.69 {Tcl_UtfNext} {testutfnext compat85} {
     testutfnext \xF2\xA0\xA0\xA0
 } 1
 test utf-6.70 {Tcl_UtfNext} testutfnext {
@@ -349,22 +350,22 @@ test utf-6.71 {Tcl_UtfNext} testutfnext {
 test utf-6.73 {Tcl_UtfNext} testutfnext {
     testutfnext \xF2\xA0\xA0\xF8
 } 1
-test utf-6.74 {Tcl_UtfNext} testutfnext {
+test utf-6.74 {Tcl_UtfNext} {testutfnext compat85} {
     testutfnext \xF2\xA0\xA0\xA0G
 } 1
-test utf-6.75 {Tcl_UtfNext} testutfnext {
+test utf-6.75 {Tcl_UtfNext} {testutfnext compat85} {
     testutfnext \xF2\xA0\xA0\xA0\xA0
 } 1
-test utf-6.76 {Tcl_UtfNext} testutfnext {
+test utf-6.76 {Tcl_UtfNext} {testutfnext compat85} {
     testutfnext \xF2\xA0\xA0\xA0\xD0
 } 1
-test utf-6.77 {Tcl_UtfNext} testutfnext {
+test utf-6.77 {Tcl_UtfNext} {testutfnext compat85} {
     testutfnext \xF2\xA0\xA0\xA0\xE8
 } 1
-test utf-6.78 {Tcl_UtfNext} testutfnext {
+test utf-6.78 {Tcl_UtfNext} {testutfnext compat85} {
     testutfnext \xF2\xA0\xA0\xA0\xF2
 } 1
-test utf-6.79 {Tcl_UtfNext} testutfnext {
+test utf-6.79 {Tcl_UtfNext} {testutfnext compat85} {
     testutfnext \xF2\xA0\xA0\xA0G\xF8
 } 1
 test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext {
@@ -388,7 +389,7 @@ test utf-6.85 {Tcl_UtfNext - overlong sequences} testutfnext {
 test utf-6.86 {Tcl_UtfNext - overlong sequences} testutfnext {
     testutfnext \xF0\x80\x80\x80
 } 1
-test utf-6.87 {Tcl_UtfNext - overlong sequences} testutfnext {
+test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext compat85} {
     testutfnext \xF0\x90\x80\x80
 } 1
 test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} {
@@ -403,15 +404,13 @@ test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {te
 test utf-6.89.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} {
     testutfnext \xF0\x80\x80 1
 } 2
-test utf-6.90 {Tcl_UtfNext, validity check [493dccc2de]} testutfnext {
+test utf-6.90 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext compat85} {
     testutfnext \xF4\x8F\xBF\xBF
 } 1
 test utf-6.91 {Tcl_UtfNext, validity check [493dccc2de]} testutfnext {
     testutfnext \xF4\x90\x80\x80
 } 1
 
-testConstraint testutfprev [llength [info commands testutfprev]]
-
 test utf-7.1 {Tcl_UtfPrev} testutfprev {
     testutfprev {}
 } 0
@@ -475,13 +474,13 @@ test utf-7.9.1 {Tcl_UtfPrev} testutfprev {
 test utf-7.9.2 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xF8\xA0\xF8\xA0 3
 } 2
-test utf-7.10 {Tcl_UtfPrev} testutfprev {
+test utf-7.10 {Tcl_UtfPrev} {testutfprev compat85} {
     testutfprev A\xF2\xA0
 } 2
-test utf-7.10.1 {Tcl_UtfPrev} testutfprev {
+test utf-7.10.1 {Tcl_UtfPrev} {testutfprev compat85} {
     testutfprev A\xF2\xA0\xA0\xA0 3
 } 2
-test utf-7.10.2 {Tcl_UtfPrev} testutfprev {
+test utf-7.10.2 {Tcl_UtfPrev} {testutfprev compat85} {
     testutfprev A\xF2\xA0\xF8\xA0 3
 } 2
 test utf-7.11 {Tcl_UtfPrev} testutfprev {
@@ -523,13 +522,13 @@ test utf-7.14.1 {Tcl_UtfPrev} testutfprev {
 test utf-7.14.2 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xF8\xA0\xA0\xF8 4
 } 3
-test utf-7.15 {Tcl_UtfPrev} testutfprev {
+test utf-7.15 {Tcl_UtfPrev} {testutfprev compat85} {
     testutfprev A\xF2\xA0\xA0
 } 3
-test utf-7.15.1 {Tcl_UtfPrev} testutfprev {
+test utf-7.15.1 {Tcl_UtfPrev} {testutfprev compat85} {
     testutfprev A\xF2\xA0\xA0\xA0 4
 } 3
-test utf-7.15.2 {Tcl_UtfPrev} testutfprev {
+test utf-7.15.2 {Tcl_UtfPrev} {testutfprev compat85} {
     testutfprev A\xF2\xA0\xA0\xF8 4
 } 3
 test utf-7.16 {Tcl_UtfPrev} testutfprev {
@@ -562,7 +561,7 @@ test utf-7.18.2 {Tcl_UtfPrev} testutfprev {
 test utf-7.19 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xF8\xA0\xA0\xA0
 } 4
-test utf-7.20 {Tcl_UtfPrev} testutfprev {
+test utf-7.20 {Tcl_UtfPrev} {testutfprev compat85} {
     testutfprev A\xF2\xA0\xA0\xA0
 } 4
 test utf-7.21 {Tcl_UtfPrev} testutfprev {
@@ -622,16 +621,16 @@ test utf-7.36 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
 test utf-7.37 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
     testutfprev A\xE0\xA0\x80 3
 } 1
-test utf-7.38 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
+test utf-7.38 {Tcl_UtfPrev -- overlong sequence}  {testutfprev compat85} {
     testutfprev A\xE0\xA0\x80 2
 } 1
-test utf-7.39 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
+test utf-7.39 {Tcl_UtfPrev -- overlong sequence}  {testutfprev compat85} {
     testutfprev A\xF0\x90\x80\x80
 } 4
-test utf-7.40 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
+test utf-7.40 {Tcl_UtfPrev -- overlong sequence}  {testutfprev compat85} {
     testutfprev A\xF0\x90\x80\x80 4
 } 3
-test utf-7.41 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
+test utf-7.41 {Tcl_UtfPrev -- overlong sequence}  {testutfprev compat85} {
     testutfprev A\xF0\x90\x80\x80 3
 } 2
 test utf-7.42 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
@@ -658,13 +657,13 @@ test utf-7.47.1 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {te
 test utf-7.47.2 {Tcl_UtfPrev, pointing to 3th byte of 3-byte invalid sequence} {testutfprev} {
     testutfprev \xE8\xA0\x00 2
 } 0
-test utf-7.48 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
+test utf-7.48 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev compat85} {
     testutfprev A\xF4\x8F\xBF\xBF
 } 4
-test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
+test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev compat85} {
     testutfprev A\xF4\x8F\xBF\xBF 4
 } 3
-test utf-7.48.2 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
+test utf-7.48.2 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev compat85} {
     testutfprev A\xF4\x8F\xBF\xBF 3
 } 2
 test utf-7.48.3 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
@@ -708,18 +707,18 @@ test utf-10.1 {Tcl_UtfBackslash: dst == NULL} {
     set x \n
 } {
 }
-test utf-10.2 {Tcl_UtfBackslash: \u subst} {
-    set x \ua2
-} [bytestring "\xc2\xa2"]
-test utf-10.3 {Tcl_UtfBackslash: longer \u subst} {
-    set x \u4e21
-} [bytestring "\xe4\xb8\xa1"]
-test utf-10.4 {Tcl_UtfBackslash: stops at first non-hex} {
-    set x \u4e2k
-} "[bytestring \xd3\xa2]k"
-test utf-10.5 {Tcl_UtfBackslash: stops after 4 hex chars} {
-    set x \u4e216
-} "[bytestring \xe4\xb8\xa1]6"
+test utf-10.2 {Tcl_UtfBackslash: \u subst} testbytestring {
+    set x \uA2
+} [testbytestring "\xC2\xA2"]
+test utf-10.3 {Tcl_UtfBackslash: longer \u subst} testbytestring {
+    set x \u4E21
+} [testbytestring "\xE4\xB8\xA1"]
+test utf-10.4 {Tcl_UtfBackslash: stops at first non-hex} testbytestring {
+    set x \u4E2k
+} "[testbytestring \xD3\xA2]k"
+test utf-10.5 {Tcl_UtfBackslash: stops after 4 hex chars} testbytestring {
+    set x \u4E216
+} "[testbytestring \xE4\xB8\xA1]6"
 proc bsCheck {char num} {
     global errNum
     test utf-10.$errNum {backslash substitution} {
@@ -774,11 +773,11 @@ test utf-11.2 {Tcl_UtfToUpper} {
     string toupper abc
 } ABC
 test utf-11.3 {Tcl_UtfToUpper} {
-    string toupper \u00e3ab
-} \u00c3AB
+    string toupper \u00E3ab
+} \u00C3AB
 test utf-11.4 {Tcl_UtfToUpper} {
-    string toupper \u01e3ab
-} \u01e2AB
+    string toupper \u01E3ab
+} \u01E2AB
 
 test utf-12.1 {Tcl_UtfToLower} {
     string tolower {}
@@ -787,11 +786,11 @@ test utf-12.2 {Tcl_UtfToLower} {
     string tolower ABC
 } abc
 test utf-12.3 {Tcl_UtfToLower} {
-    string tolower \u00c3AB
-} \u00e3ab
+    string tolower \u00C3AB
+} \u00E3ab
 test utf-12.4 {Tcl_UtfToLower} {
-    string tolower \u01e2AB
-} \u01e3ab
+    string tolower \u01E2AB
+} \u01E3ab
 
 test utf-13.1 {Tcl_UtfToTitle} {
     string totitle {}
@@ -800,11 +799,11 @@ test utf-13.2 {Tcl_UtfToTitle} {
     string totitle abc
 } Abc
 test utf-13.3 {Tcl_UtfToTitle} {
-    string totitle \u00e3ab
-} \u00c3ab
+    string totitle \u00E3ab
+} \u00C3ab
 test utf-13.4 {Tcl_UtfToTitle} {
-    string totitle \u01f3ab
-} \u01f2ab
+    string totitle \u01F3ab
+} \u01F2ab
 
 test utf-14.1 {Tcl_UtfNcasecmp} {
     string compare -nocase a b
@@ -823,7 +822,7 @@ test utf-15.1 {Tcl_UniCharToUpper, negative delta} {
     string toupper aA
 } AA
 test utf-15.2 {Tcl_UniCharToUpper, positive delta} {
-    string toupper \u0178\u00ff
+    string toupper \u0178\xFF
 } \u0178\u0178
 test utf-15.3 {Tcl_UniCharToUpper, no delta} {
     string toupper !
@@ -833,24 +832,24 @@ test utf-16.1 {Tcl_UniCharToLower, negative delta} {
     string tolower aA
 } aa
 test utf-16.2 {Tcl_UniCharToLower, positive delta} {
-    string tolower \u0178\u00ff\uA78D\u01c5
-} \u00ff\u00ff\u0265\u01c6
+    string tolower \u0178\xFF\uA78D\u01C5
+} \xFF\xFF\u0265\u01C6
 
 test utf-17.1 {Tcl_UniCharToLower, no delta} {
     string tolower !
 } !
 
 test utf-18.1 {Tcl_UniCharToTitle, add one for title} {
-    string totitle \u01c4
-} \u01c5
+    string totitle \u01C4
+} \u01C5
 test utf-18.2 {Tcl_UniCharToTitle, subtract one for title} {
-    string totitle \u01c6
-} \u01c5
+    string totitle \u01C6
+} \u01C5
 test utf-18.3 {Tcl_UniCharToTitle, subtract delta for title (positive)} {
-    string totitle \u017f
+    string totitle \u017F
 } \u0053
 test utf-18.4 {Tcl_UniCharToTitle, subtract delta for title (negative)} {
-    string totitle \u00ff
+    string totitle \xFF
 } \u0178
 test utf-18.5 {Tcl_UniCharToTitle, no delta} {
     string totitle !
@@ -865,15 +864,15 @@ test utf-20.1 {TclUniCharNcmp} {
 
 test utf-21.1 {TclUniCharIsAlnum} {
     # this returns 1 with Unicode 7 compliance
-    string is alnum \u1040\u021f\u0220
+    string is alnum \u1040\u021F\u0220
 } {1}
 test utf-21.2 {unicode alnum char in regc_locale.c} {
     # this returns 1 with Unicode 7 compliance
-    list [regexp {^[[:alnum:]]+$} \u1040\u021f\u0220] [regexp {^\w+$} \u1040\u021f\u0220_\u203f\u2040\u2054\ufe33\ufe34\ufe4d\ufe4e\ufe4f\uff3f]
+    list [regexp {^[[:alnum:]]+$} \u1040\u021F\u0220] [regexp {^\w+$} \u1040\u021F\u0220_\u203F\u2040\u2054\uFE33\uFE34\uFE4D\uFE4E\uFE4F\uFF3F]
 } {1 1}
 test utf-21.3 {unicode print char in regc_locale.c} {
     # this returns 1 with Unicode 7 compliance
-    regexp {^[[:print:]]+$} \ufbc1
+    regexp {^[[:print:]]+$} \uFBC1
 } 1
 test utf-21.4 {TclUniCharIsGraph} {
     # [Bug 3464428]
@@ -885,11 +884,11 @@ test utf-21.5 {unicode graph char in regc_locale.c} {
 } {1}
 test utf-21.6 {TclUniCharIsGraph} {
     # [Bug 3464428]
-    string is graph \u00a0
+    string is graph \xA0
 } {0}
 test utf-21.7 {unicode graph char in regc_locale.c} {
     # [Bug 3464428]
-    regexp {[[:graph:]]} \u0020\u00a0\u2028\u2029
+    regexp {[[:graph:]]} \x20\xA0\u2028\u2029
 } {0}
 test utf-21.8 {TclUniCharIsPrint} {
     # [Bug 3464428]
@@ -905,49 +904,47 @@ test utf-21.10 {unicode print char in regc_locale.c} {
 } {0}
 test utf-21.11 {TclUniCharIsControl} {
     # [Bug 3464428]
-    string is control \u0000\u001f\u00ad\u0605\u061c\u180e\u2066\ufeff
+    string is control \x00\x1F\xad\u0605\u061C\u180E\u2066\uFEFF
 } {1}
 test utf-21.12 {unicode control char in regc_locale.c} {
     # [Bug 3464428], [Bug a876646efe]
-    regexp {^[[:cntrl:]]*$} \u0000\u001f\u00ad\u0605\u061c\u180e\u2066\ufeff
+    regexp {^[[:cntrl:]]*$} \x00\x1F\xad\u0605\u061C\u180E\u2066\uFEFF
 } {1}
 
 test utf-22.1 {TclUniCharIsWordChar} {
     string wordend "xyz123_bar fg" 0
 } 10
 test utf-22.2 {TclUniCharIsWordChar} {
-    string wordend "x\u5080z123_bar\u203c fg" 0
+    string wordend "x\u5080z123_bar\u203C fg" 0
 } 10
 
 test utf-23.1 {TclUniCharIsAlpha} {
     # this returns 1 with Unicode 7 compliance
-    string is alpha \u021f\u0220\u037f\u052f
+    string is alpha \u021F\u0220\u037F\u052F
 } {1}
 test utf-23.2 {unicode alpha char in regc_locale.c} {
     # this returns 1 with Unicode 7 compliance
-    regexp {^[[:alpha:]]+$} \u021f\u0220\u037f\u052f
+    regexp {^[[:alpha:]]+$} \u021F\u0220\u037F\u052F
 } {1}
 
 test utf-24.1 {TclUniCharIsDigit} {
     # this returns 1 with Unicode 7 compliance
-    string is digit \u1040\uabf0
+    string is digit \u1040\uABF0
 } {1}
 test utf-24.2 {unicode digit char in regc_locale.c} {
     # this returns 1 with Unicode 7 compliance
-    list [regexp {^[[:digit:]]+$} \u1040\uabf0] [regexp {^\d+$} \u1040\uabf0]
+    list [regexp {^[[:digit:]]+$} \u1040\uABF0] [regexp {^\d+$} \u1040\uABF0]
 } {1 1}
 
 test utf-24.3 {TclUniCharIsSpace} {
     # this returns 1 with Unicode 7 compliance
-    string is space \u1680\u180e\u202f
+    string is space \u1680\u180E\u202F
 } {1}
 test utf-24.4 {unicode space char in regc_locale.c} {
     # this returns 1 with Unicode 7 compliance
-    list [regexp {^[[:space:]]+$} \u1680\u180e\u202f] [regexp {^\s+$} \u1680\u180e\u202f]
+    list [regexp {^[[:space:]]+$} \u1680\u180E\u202F] [regexp {^\s+$} \u1680\u180E\u202F]
 } {1 1}
 
-testConstraint teststringobj [llength [info commands teststringobj]]
-
 test utf-25.1 {Tcl_UniCharNcasecmp} -constraints teststringobj \
     -setup {
 	testobj freeallvars
diff --git a/tests/util.test b/tests/util.test
index 85c06dd..a483de1 100644
--- a/tests/util.test
+++ b/tests/util.test
@@ -15,6 +15,7 @@ if {[lsearch [namespace children] ::tcltest] == -1} {
 testConstraint testdstring [llength [info commands testdstring]]
 testConstraint testconcatobj [llength [info commands testconcatobj]]
 testConstraint testdoubledigits [llength [info commands testdoubledigits]]
+testConstraint compat85 [expr {[format %c 0x010000] == "\uFFFD"}]
 
 # Big test for correct ordering of data in [expr]
 
-- 
cgit v0.12


From ea9467702aecb854ba8cd803edbb38c4590aa928 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Sat, 18 Apr 2020 15:02:51 +0000
Subject: Make TCL_UTF_MAX=4 build test clean again.

---
 generic/tcl.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/generic/tcl.h b/generic/tcl.h
index d7d064c..7378a8f 100644
--- a/generic/tcl.h
+++ b/generic/tcl.h
@@ -2148,7 +2148,7 @@ typedef struct Tcl_Parse {
  * reflected in regcustom.h.
  */
 
-#if TCL_UTF_MAX > 3
+#if TCL_UTF_MAX > 4
     /*
      * unsigned int isn't 100% accurate as it should be a strict 4-byte value
      * (perhaps wchar_t). 64-bit systems may have troubles. The size of this
-- 
cgit v0.12


From f866e98a39dc53d4864e3b04119b7dc2fd65078d Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Sat, 18 Apr 2020 15:11:22 +0000
Subject: regexp engine has to agree about the sizeof(Tcl_UniChar).

---
 generic/regcustom.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/generic/regcustom.h b/generic/regcustom.h
index ac33087..57a2d47 100644
--- a/generic/regcustom.h
+++ b/generic/regcustom.h
@@ -97,7 +97,7 @@ typedef int celt;		/* Type to hold chr, or NOCELT */
 #define	NOCELT (-1)		/* Celt value which is not valid chr */
 #define	CHR(c) (UCHAR(c))	/* Turn char literal into chr literal */
 #define	DIGITVAL(c) ((c)-'0')	/* Turn chr digit into its value */
-#if TCL_UTF_MAX > 3
+#if TCL_UTF_MAX > 4
 #define	CHRBITS	32		/* Bits in a chr; must not use sizeof */
 #define	CHR_MIN	0x00000000	/* Smallest and largest chr; the value */
 #define	CHR_MAX	0xffffffff	/* CHR_MAX-CHR_MIN+1 should fit in uchr */
-- 
cgit v0.12


From 4f3cc7f661e8ae301fd9b4aaf7a4c66d94897ec3 Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Sat, 18 Apr 2020 19:54:08 +0000
Subject: Clean-up testcases: Constant use of uppercase in hex-values. Use
 "testbytestring" in stead of "bytestring". Mark tests not working with
 TCL_UTF_MAX>3 with "compat85"

---
 tests/utf.test | 278 ++++++++++++++++++++++++++++-----------------------------
 1 file changed, 137 insertions(+), 141 deletions(-)

diff --git a/tests/utf.test b/tests/utf.test
index 01e0bb2..189b85d 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -13,54 +13,60 @@ if {[lsearch [namespace children] ::tcltest] == -1} {
     namespace import -force ::tcltest::*
 }
 
+testConstraint compat85 [expr {[format %c 0x010000] == "\uFFFD"}]
 testConstraint testbytestring [llength [info commands testbytestring]]
+testConstraint testfindfirst [llength [info commands testfindfirst]]
+testConstraint testfindlast [llength [info commands testfindlast]]
+testConstraint testnumutfchars [llength [info commands testnumutfchars]]
+testConstraint teststringobj [llength [info commands teststringobj]]
+testConstraint testutfnext [llength [info commands testutfnext]]
+testConstraint testutfprev [llength [info commands testutfprev]]
 
 catch {unset x}
 
-test utf-1.1 {Tcl_UniCharToUtf: 1 byte sequences} {
-    set x \x01
-} [bytestring "\x01"]
-test utf-1.2 {Tcl_UniCharToUtf: 2 byte sequences} {
-    set x "\x00"
-} [bytestring "\xc0\x80"]
-test utf-1.3 {Tcl_UniCharToUtf: 2 byte sequences} {
-    set x "\xe0"
-} [bytestring "\xc3\xa0"]
-test utf-1.4 {Tcl_UniCharToUtf: 3 byte sequences} {
-    set x "\u4e4e"
-} [bytestring "\xe4\xb9\x8e"]
-test utf-1.5 {Tcl_UniCharToUtf: overflowed Tcl_UniChar} {
-    format %c 0x110000
-} [bytestring "\xef\xbf\xbd"]
-test utf-1.6 {Tcl_UniCharToUtf: negative Tcl_UniChar} {
-    format %c -1
-} [bytestring "\xef\xbf\xbd"]
-
+test utf-1.1 {Tcl_UniCharToUtf: 1 byte sequences} testbytestring {
+    expr {"\x01" eq [testbytestring "\x01"]}
+} 1
+test utf-1.2 {Tcl_UniCharToUtf: 2 byte sequences} testbytestring {
+    expr {"\x00" eq [testbytestring "\xC0\x80"]}
+} 1
+test utf-1.3 {Tcl_UniCharToUtf: 2 byte sequences} testbytestring {
+    expr {"\xE0" eq [testbytestring "\xC3\xA0"]}
+} 1
+test utf-1.4 {Tcl_UniCharToUtf: 3 byte sequences} testbytestring {
+    expr {"\u4E4E" eq [testbytestring "\xE4\xB9\x8E"]}
+} 1
+test utf-1.5 {Tcl_UniCharToUtf: overflowed Tcl_UniChar} testbytestring {
+    expr {[format %c 0x110000] eq [testbytestring "\xEF\xBF\xBD"]}
+} 1
+test utf-1.6 {Tcl_UniCharToUtf: negative Tcl_UniChar} testbytestring {
+    expr {[format %c -1] eq [testbytestring "\xEF\xBF\xBD"]}
+} 1
 test utf-2.1 {Tcl_UtfToUniChar: low ascii} {
     string length "abc"
 } {3}
-test utf-2.2 {Tcl_UtfToUniChar: naked trail bytes} {
-    string length [bytestring "\x82\x83\x84"]
+test utf-2.2 {Tcl_UtfToUniChar: naked trail bytes} testbytestring {
+    string length [testbytestring "\x82\x83\x84"]
 } {3}
-test utf-2.3 {Tcl_UtfToUniChar: lead (2-byte) followed by non-trail} {
-    string length [bytestring "\xC2"]
+test utf-2.3 {Tcl_UtfToUniChar: lead (2-byte) followed by non-trail} testbytestring {
+    string length [testbytestring "\xC2"]
 } {1}
-test utf-2.4 {Tcl_UtfToUniChar: lead (2-byte) followed by trail} {
-    string length [bytestring "\xC2\xa2"]
+test utf-2.4 {Tcl_UtfToUniChar: lead (2-byte) followed by trail} testbytestring {
+    string length [testbytestring "\xC2\xA2"]
 } {1}
-test utf-2.5 {Tcl_UtfToUniChar: lead (3-byte) followed by non-trail} {
-    string length [bytestring "\xE2"]
+test utf-2.5 {Tcl_UtfToUniChar: lead (3-byte) followed by non-trail} testbytestring {
+    string length [testbytestring "\xE2"]
 } {1}
-test utf-2.6 {Tcl_UtfToUniChar: lead (3-byte) followed by 1 trail} {
-    string length [bytestring "\xE2\xA2"]
+test utf-2.6 {Tcl_UtfToUniChar: lead (3-byte) followed by 1 trail} testbytestring {
+    string length [testbytestring "\xE2\xA2"]
 } {2}
-test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} {
-    string length [bytestring "\xE4\xb9\x8e"]
+test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} testbytestring {
+    string length [testbytestring "\xE4\xB9\x8E"]
 } {1}
-test utf-2.8 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring} -body {
+test utf-2.8 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring compat85} -body {
     string length [testbytestring "\xF0\x90\x80\x80"]
 } -result {4}
-test utf-2.9 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring} -body {
+test utf-2.9 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring compat85} -body {
     string length [testbytestring "\xF4\x8F\xBF\xBF"]
 } -result {4}
 test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} testbytestring {
@@ -77,57 +83,51 @@ test utf-2.12 {Tcl_UtfToUniChar: longer UTF sequences not supported} testbytestr
 test utf-3.1 {Tcl_UtfCharComplete} {
 } {}
 
-testConstraint testnumutfchars [llength [info commands testnumutfchars]]
-testConstraint testfindfirst [llength [info commands testfindfirst]]
-testConstraint testfindlast [llength [info commands testfindlast]]
-
 test utf-4.1 {Tcl_NumUtfChars: zero length} testnumutfchars {
     testnumutfchars ""
 } {0}
-test utf-4.2 {Tcl_NumUtfChars: length 1} testnumutfchars {
-    testnumutfchars [bytestring "\xC2\xA2"]
+test utf-4.2 {Tcl_NumUtfChars: length 1} {testnumutfchars testbytestring} {
+    testnumutfchars [testbytestring "\xC2\xA2"]
 } {1}
-test utf-4.3 {Tcl_NumUtfChars: long string} testnumutfchars {
-    testnumutfchars [bytestring "abc\xC2\xA2\xe4\xb9\x8e\uA2\u4e4e"]
+test utf-4.3 {Tcl_NumUtfChars: long string} {testnumutfchars testbytestring} {
+    testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\uA2\x4E"]
 } {7}
-test utf-4.4 {Tcl_NumUtfChars: #u0000} testnumutfchars {
-    testnumutfchars [bytestring "\xC0\x80"]
+test utf-4.4 {Tcl_NumUtfChars: #u0000} {testnumutfchars testbytestring} {
+    testnumutfchars [testbytestring "\xC0\x80"]
 } {1}
 test utf-4.5 {Tcl_NumUtfChars: zero length, calc len} testnumutfchars {
     testnumutfchars "" 0
 } {0}
-test utf-4.6 {Tcl_NumUtfChars: length 1, calc len} testnumutfchars {
-    testnumutfchars [bytestring "\xC2\xA2"] 1
+test utf-4.6 {Tcl_NumUtfChars: length 1, calc len} {testnumutfchars testbytestring} {
+    testnumutfchars [testbytestring "\xC2\xA2"] 1
 } {1}
-test utf-4.7 {Tcl_NumUtfChars: long string, calc len} testnumutfchars {
-    testnumutfchars [bytestring "abc\xC2\xA2\xe4\xb9\x8e\uA2\u4e4e"] 10
+test utf-4.7 {Tcl_NumUtfChars: long string, calc len} {testnumutfchars testbytestring} {
+    testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\uA2\x4E"] 10
 } {7}
-test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} testnumutfchars {
-    testnumutfchars [bytestring "\xC0\x80"] 1
+test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} {testnumutfchars testbytestring} {
+    testnumutfchars [testbytestring "\xC0\x80"] 1
 } {1}
 # Bug [2738427]: Tcl_NumUtfChars(...) no overflow check
-test utf-4.9 {Tcl_NumUtfChars: #u20AC, calc len, incomplete} testnumutfchars {
-    testnumutfchars [bytestring "\xE2\x82\xAC"] 2
+test utf-4.9 {Tcl_NumUtfChars: #u20AC, calc len, incomplete} {testnumutfchars testbytestring} {
+    testnumutfchars [testbytestring "\xE2\x82\xAC"] 2
 } {2}
-test utf-4.10 {Tcl_NumUtfChars: #u0000, calc len, overcomplete} testnumutfchars {
-    testnumutfchars [bytestring "\x00"] 2
+test utf-4.10 {Tcl_NumUtfChars: #u0000, calc len, overcomplete} {testnumutfchars testbytestring} {
+    testnumutfchars [testbytestring "\x00"] 2
 } {2}
-test utf-4.11 {Tcl_NumUtfChars: 3 bytes of 4-byte UTF-8 characater} testnumutfchars {
-    testnumutfchars [bytestring \xf0\x9f\x92\xa9] 3
+test utf-4.11 {Tcl_NumUtfChars: 3 bytes of 4-byte UTF-8 characater} {testnumutfchars testbytestring} {
+    testnumutfchars [testbytestring \xF0\x9F\x92\xA9] 3
 } {3}
-test utf-4.12 {Tcl_NumUtfChars: #4-byte UTF-8 character} testnumutfchars {
-    testnumutfchars [bytestring \xf0\x9f\x92\xa9] 4
+test utf-4.12 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring compat85} {
+    testnumutfchars [testbytestring \xF0\x9F\x92\xA9] 4
 } {4}
 
-test utf-5.1 {Tcl_UtfFindFirst} testfindfirst {
-    testfindfirst [bytestring "abcbc"] 98
+test utf-5.1 {Tcl_UtfFindFirst} {testfindfirst testbytestring} {
+    testfindfirst [testbytestring "abcbc"] 98
 } {bcbc}
-test utf-5.2 {Tcl_UtfFindLast} testfindlast {
-    testfindlast [bytestring "abcbc"] 98
+test utf-5.2 {Tcl_UtfFindLast} {testfindlast testbytestring} {
+    testfindlast [testbytestring "abcbc"] 98
 } {bc}
 
-testConstraint testutfnext [llength [info commands testutfnext]]
-
 test utf-6.1 {Tcl_UtfNext} testutfnext {
     # This takes the pointer one past the terminating NUL.
     # This is really an invalid call.
@@ -334,7 +334,7 @@ test utf-6.67 {Tcl_UtfNext} testutfnext {
 test utf-6.68 {Tcl_UtfNext} testutfnext {
     testutfnext \xF2\xA0\xA0G
 } 1
-test utf-6.69 {Tcl_UtfNext} testutfnext {
+test utf-6.69 {Tcl_UtfNext} {testutfnext compat85} {
     testutfnext \xF2\xA0\xA0\xA0
 } 1
 test utf-6.70 {Tcl_UtfNext} testutfnext {
@@ -349,22 +349,22 @@ test utf-6.71 {Tcl_UtfNext} testutfnext {
 test utf-6.73 {Tcl_UtfNext} testutfnext {
     testutfnext \xF2\xA0\xA0\xF8
 } 1
-test utf-6.74 {Tcl_UtfNext} testutfnext {
+test utf-6.74 {Tcl_UtfNext} {testutfnext compat85} {
     testutfnext \xF2\xA0\xA0\xA0G
 } 1
-test utf-6.75 {Tcl_UtfNext} testutfnext {
+test utf-6.75 {Tcl_UtfNext} {testutfnext compat85} {
     testutfnext \xF2\xA0\xA0\xA0\xA0
 } 1
-test utf-6.76 {Tcl_UtfNext} testutfnext {
+test utf-6.76 {Tcl_UtfNext} {testutfnext compat85} {
     testutfnext \xF2\xA0\xA0\xA0\xD0
 } 1
-test utf-6.77 {Tcl_UtfNext} testutfnext {
+test utf-6.77 {Tcl_UtfNext} {testutfnext compat85} {
     testutfnext \xF2\xA0\xA0\xA0\xE8
 } 1
-test utf-6.78 {Tcl_UtfNext} testutfnext {
+test utf-6.78 {Tcl_UtfNext} {testutfnext compat85} {
     testutfnext \xF2\xA0\xA0\xA0\xF2
 } 1
-test utf-6.79 {Tcl_UtfNext} testutfnext {
+test utf-6.79 {Tcl_UtfNext} {testutfnext compat85} {
     testutfnext \xF2\xA0\xA0\xA0G\xF8
 } 1
 test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext {
@@ -388,7 +388,7 @@ test utf-6.85 {Tcl_UtfNext - overlong sequences} testutfnext {
 test utf-6.86 {Tcl_UtfNext - overlong sequences} testutfnext {
     testutfnext \xF0\x80\x80\x80
 } 1
-test utf-6.87 {Tcl_UtfNext - overlong sequences} testutfnext {
+test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext compat85} {
     testutfnext \xF0\x90\x80\x80
 } 1
 test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} {
@@ -403,15 +403,13 @@ test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {te
 test utf-6.89.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} {
     testutfnext \xF0\x80\x80 1
 } 2
-test utf-6.90 {Tcl_UtfNext, validity check [493dccc2de]} testutfnext {
+test utf-6.90 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext compat85} {
     testutfnext \xF4\x8F\xBF\xBF
 } 1
 test utf-6.91 {Tcl_UtfNext, validity check [493dccc2de]} testutfnext {
     testutfnext \xF4\x90\x80\x80
 } 1
 
-testConstraint testutfprev [llength [info commands testutfprev]]
-
 test utf-7.1 {Tcl_UtfPrev} testutfprev {
     testutfprev {}
 } 0
@@ -475,13 +473,13 @@ test utf-7.9.1 {Tcl_UtfPrev} testutfprev {
 test utf-7.9.2 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xF8\xA0\xF8\xA0 3
 } 2
-test utf-7.10 {Tcl_UtfPrev} testutfprev {
+test utf-7.10 {Tcl_UtfPrev} {testutfprev compat85} {
     testutfprev A\xF2\xA0
 } 2
-test utf-7.10.1 {Tcl_UtfPrev} testutfprev {
+test utf-7.10.1 {Tcl_UtfPrev} {testutfprev compat85} {
     testutfprev A\xF2\xA0\xA0\xA0 3
 } 2
-test utf-7.10.2 {Tcl_UtfPrev} testutfprev {
+test utf-7.10.2 {Tcl_UtfPrev} {testutfprev compat85} {
     testutfprev A\xF2\xA0\xF8\xA0 3
 } 2
 test utf-7.11 {Tcl_UtfPrev} testutfprev {
@@ -523,13 +521,13 @@ test utf-7.14.1 {Tcl_UtfPrev} testutfprev {
 test utf-7.14.2 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xF8\xA0\xA0\xF8 4
 } 3
-test utf-7.15 {Tcl_UtfPrev} testutfprev {
+test utf-7.15 {Tcl_UtfPrev} {testutfprev compat85} {
     testutfprev A\xF2\xA0\xA0
 } 3
-test utf-7.15.1 {Tcl_UtfPrev} testutfprev {
+test utf-7.15.1 {Tcl_UtfPrev} {testutfprev compat85} {
     testutfprev A\xF2\xA0\xA0\xA0 4
 } 3
-test utf-7.15.2 {Tcl_UtfPrev} testutfprev {
+test utf-7.15.2 {Tcl_UtfPrev} {testutfprev compat85} {
     testutfprev A\xF2\xA0\xA0\xF8 4
 } 3
 test utf-7.16 {Tcl_UtfPrev} testutfprev {
@@ -562,7 +560,7 @@ test utf-7.18.2 {Tcl_UtfPrev} testutfprev {
 test utf-7.19 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xF8\xA0\xA0\xA0
 } 4
-test utf-7.20 {Tcl_UtfPrev} testutfprev {
+test utf-7.20 {Tcl_UtfPrev} {testutfprev compat85} {
     testutfprev A\xF2\xA0\xA0\xA0
 } 4
 test utf-7.21 {Tcl_UtfPrev} testutfprev {
@@ -622,16 +620,16 @@ test utf-7.36 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
 test utf-7.37 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
     testutfprev A\xE0\xA0\x80 3
 } 1
-test utf-7.38 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
+test utf-7.38 {Tcl_UtfPrev -- overlong sequence}  {testutfprev compat85} {
     testutfprev A\xE0\xA0\x80 2
 } 1
-test utf-7.39 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
+test utf-7.39 {Tcl_UtfPrev -- overlong sequence}  {testutfprev compat85} {
     testutfprev A\xF0\x90\x80\x80
 } 4
-test utf-7.40 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
+test utf-7.40 {Tcl_UtfPrev -- overlong sequence}  {testutfprev compat85} {
     testutfprev A\xF0\x90\x80\x80 4
 } 3
-test utf-7.41 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
+test utf-7.41 {Tcl_UtfPrev -- overlong sequence}  {testutfprev compat85} {
     testutfprev A\xF0\x90\x80\x80 3
 } 2
 test utf-7.42 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
@@ -658,13 +656,13 @@ test utf-7.47.1 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {te
 test utf-7.47.2 {Tcl_UtfPrev, pointing to 3th byte of 3-byte invalid sequence} {testutfprev} {
     testutfprev \xE8\xA0\x00 2
 } 0
-test utf-7.48 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
+test utf-7.48 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev compat85} {
     testutfprev A\xF4\x8F\xBF\xBF
 } 4
-test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
+test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev compat85} {
     testutfprev A\xF4\x8F\xBF\xBF 4
 } 3
-test utf-7.48.2 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
+test utf-7.48.2 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev compat85} {
     testutfprev A\xF4\x8F\xBF\xBF 3
 } 2
 test utf-7.48.3 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
@@ -708,18 +706,18 @@ test utf-10.1 {Tcl_UtfBackslash: dst == NULL} {
     set x \n
 } {
 }
-test utf-10.2 {Tcl_UtfBackslash: \u subst} {
-    set x \ua2
-} [bytestring "\xc2\xa2"]
-test utf-10.3 {Tcl_UtfBackslash: longer \u subst} {
-    set x \u4e21
-} [bytestring "\xe4\xb8\xa1"]
-test utf-10.4 {Tcl_UtfBackslash: stops at first non-hex} {
-    set x \u4e2k
-} "[bytestring \xd3\xa2]k"
-test utf-10.5 {Tcl_UtfBackslash: stops after 4 hex chars} {
-    set x \u4e216
-} "[bytestring \xe4\xb8\xa1]6"
+test utf-10.2 {Tcl_UtfBackslash: \u subst} testbytestring {
+    expr {"\uA2" eq [testbytestring "\xC2\xA2"]}
+} 1
+test utf-10.3 {Tcl_UtfBackslash: longer \u subst} testbytestring {
+    expr {"\u4E21" eq [testbytestring "\xE4\xB8\xA1"]}
+} 1
+test utf-10.4 {Tcl_UtfBackslash: stops at first non-hex} testbytestring {
+    expr {"\u4E2k" eq "[testbytestring \xD3\xA2]k"}
+} 1
+test utf-10.5 {Tcl_UtfBackslash: stops after 4 hex chars} testbytestring {
+    expr {"\u4E216" eq [testbytestring "\xE4\xB8\xA1"]6}
+} 1
 proc bsCheck {char num} {
     global errNum
     test utf-10.$errNum {backslash substitution} {
@@ -774,11 +772,11 @@ test utf-11.2 {Tcl_UtfToUpper} {
     string toupper abc
 } ABC
 test utf-11.3 {Tcl_UtfToUpper} {
-    string toupper \u00e3ab
-} \u00c3AB
+    string toupper \xE3gh
+} \xC3GH
 test utf-11.4 {Tcl_UtfToUpper} {
-    string toupper \u01e3ab
-} \u01e2AB
+    string toupper \u01E3ab
+} \u01E2AB
 
 test utf-12.1 {Tcl_UtfToLower} {
     string tolower {}
@@ -787,11 +785,11 @@ test utf-12.2 {Tcl_UtfToLower} {
     string tolower ABC
 } abc
 test utf-12.3 {Tcl_UtfToLower} {
-    string tolower \u00c3AB
-} \u00e3ab
+    string tolower \xC3GH
+} \xE3gh
 test utf-12.4 {Tcl_UtfToLower} {
-    string tolower \u01e2AB
-} \u01e3ab
+    string tolower \u01E2AB
+} \u01E3ab
 
 test utf-13.1 {Tcl_UtfToTitle} {
     string totitle {}
@@ -800,11 +798,11 @@ test utf-13.2 {Tcl_UtfToTitle} {
     string totitle abc
 } Abc
 test utf-13.3 {Tcl_UtfToTitle} {
-    string totitle \u00e3ab
-} \u00c3ab
+    string totitle \xE3GH
+} \xC3gh
 test utf-13.4 {Tcl_UtfToTitle} {
-    string totitle \u01f3ab
-} \u01f2ab
+    string totitle \u01F3AB
+} \u01F2ab
 
 test utf-14.1 {Tcl_UtfNcasecmp} {
     string compare -nocase a b
@@ -823,7 +821,7 @@ test utf-15.1 {Tcl_UniCharToUpper, negative delta} {
     string toupper aA
 } AA
 test utf-15.2 {Tcl_UniCharToUpper, positive delta} {
-    string toupper \u0178\u00ff
+    string toupper \u0178\xFF
 } \u0178\u0178
 test utf-15.3 {Tcl_UniCharToUpper, no delta} {
     string toupper !
@@ -833,24 +831,24 @@ test utf-16.1 {Tcl_UniCharToLower, negative delta} {
     string tolower aA
 } aa
 test utf-16.2 {Tcl_UniCharToLower, positive delta} {
-    string tolower \u0178\u00ff\uA78D\u01c5
-} \u00ff\u00ff\u0265\u01c6
+    string tolower \u0178\xFF\uA78D\u01C5
+} \xFF\xFF\u0265\u01C6
 
 test utf-17.1 {Tcl_UniCharToLower, no delta} {
     string tolower !
 } !
 
 test utf-18.1 {Tcl_UniCharToTitle, add one for title} {
-    string totitle \u01c4
-} \u01c5
+    string totitle \u01C4
+} \u01C5
 test utf-18.2 {Tcl_UniCharToTitle, subtract one for title} {
-    string totitle \u01c6
-} \u01c5
+    string totitle \u01C6
+} \u01C5
 test utf-18.3 {Tcl_UniCharToTitle, subtract delta for title (positive)} {
-    string totitle \u017f
-} \u0053
+    string totitle \u017F
+} \x53
 test utf-18.4 {Tcl_UniCharToTitle, subtract delta for title (negative)} {
-    string totitle \u00ff
+    string totitle \xFF
 } \u0178
 test utf-18.5 {Tcl_UniCharToTitle, no delta} {
     string totitle !
@@ -865,15 +863,15 @@ test utf-20.1 {TclUniCharNcmp} {
 
 test utf-21.1 {TclUniCharIsAlnum} {
     # this returns 1 with Unicode 7 compliance
-    string is alnum \u1040\u021f\u0220
+    string is alnum \u1040\u021F\u0220
 } {1}
 test utf-21.2 {unicode alnum char in regc_locale.c} {
     # this returns 1 with Unicode 7 compliance
-    list [regexp {^[[:alnum:]]+$} \u1040\u021f\u0220] [regexp {^\w+$} \u1040\u021f\u0220_\u203f\u2040\u2054\ufe33\ufe34\ufe4d\ufe4e\ufe4f\uff3f]
+    list [regexp {^[[:alnum:]]+$} \u1040\u021F\u0220] [regexp {^\w+$} \u1040\u021F\u0220_\u203F\u2040\u2054\uFE33\uFE34\uFE4D\uFE4E\uFE4F\uFF3F]
 } {1 1}
 test utf-21.3 {unicode print char in regc_locale.c} {
     # this returns 1 with Unicode 7 compliance
-    regexp {^[[:print:]]+$} \ufbc1
+    regexp {^[[:print:]]+$} \uFBC1
 } 1
 test utf-21.4 {TclUniCharIsGraph} {
     # [Bug 3464428]
@@ -885,69 +883,67 @@ test utf-21.5 {unicode graph char in regc_locale.c} {
 } {1}
 test utf-21.6 {TclUniCharIsGraph} {
     # [Bug 3464428]
-    string is graph \u00a0
+    string is graph \xA0
 } {0}
 test utf-21.7 {unicode graph char in regc_locale.c} {
     # [Bug 3464428]
-    regexp {[[:graph:]]} \u0020\u00a0\u2028\u2029
+    regexp {[[:graph:]]} \x20\xA0\u2028\u2029
 } {0}
 test utf-21.8 {TclUniCharIsPrint} {
     # [Bug 3464428]
-    string is print \u0009
+    string is print \x09
 } {0}
 test utf-21.9 {unicode print char in regc_locale.c} {
     # [Bug 3464428]
-    regexp {[[:print:]]} \u0009
+    regexp {[[:print:]]} \x09
 } {0}
 test utf-21.10 {unicode print char in regc_locale.c} {
     # [Bug 3464428]
-    regexp {[[:print:]]} \u0009
+    regexp {[[:print:]]} \x09
 } {0}
 test utf-21.11 {TclUniCharIsControl} {
     # [Bug 3464428]
-    string is control \u0000\u001f\u00ad\u0605\u061c\u180e\u2066\ufeff
+    string is control \x00\x1F\xAD\u0605\u061C\u180E\u2066\uFEFF
 } {1}
 test utf-21.12 {unicode control char in regc_locale.c} {
     # [Bug 3464428], [Bug a876646efe]
-    regexp {^[[:cntrl:]]*$} \u0000\u001f\u00ad\u0605\u061c\u180e\u2066\ufeff
+    regexp {^[[:cntrl:]]*$} \x00\x1F\xAD\u0605\u061C\u180E\u2066\uFEFF
 } {1}
 
 test utf-22.1 {TclUniCharIsWordChar} {
     string wordend "xyz123_bar fg" 0
 } 10
 test utf-22.2 {TclUniCharIsWordChar} {
-    string wordend "x\u5080z123_bar\u203c fg" 0
+    string wordend "x\u5080z123_bar\u203C fg" 0
 } 10
 
 test utf-23.1 {TclUniCharIsAlpha} {
     # this returns 1 with Unicode 7 compliance
-    string is alpha \u021f\u0220\u037f\u052f
+    string is alpha \u021F\u0220\u037F\u052F
 } {1}
 test utf-23.2 {unicode alpha char in regc_locale.c} {
     # this returns 1 with Unicode 7 compliance
-    regexp {^[[:alpha:]]+$} \u021f\u0220\u037f\u052f
+    regexp {^[[:alpha:]]+$} \u021F\u0220\u037F\u052F
 } {1}
 
 test utf-24.1 {TclUniCharIsDigit} {
     # this returns 1 with Unicode 7 compliance
-    string is digit \u1040\uabf0
+    string is digit \u1040\uABF0
 } {1}
 test utf-24.2 {unicode digit char in regc_locale.c} {
     # this returns 1 with Unicode 7 compliance
-    list [regexp {^[[:digit:]]+$} \u1040\uabf0] [regexp {^\d+$} \u1040\uabf0]
+    list [regexp {^[[:digit:]]+$} \u1040\uABF0] [regexp {^\d+$} \u1040\uABF0]
 } {1 1}
 
 test utf-24.3 {TclUniCharIsSpace} {
     # this returns 1 with Unicode 7 compliance
-    string is space \u1680\u180e\u202f
+    string is space \u1680\u180E\u202F
 } {1}
 test utf-24.4 {unicode space char in regc_locale.c} {
     # this returns 1 with Unicode 7 compliance
-    list [regexp {^[[:space:]]+$} \u1680\u180e\u202f] [regexp {^\s+$} \u1680\u180e\u202f]
+    list [regexp {^[[:space:]]+$} \u1680\u180E\u202F] [regexp {^\s+$} \u1680\u180E\u202F]
 } {1 1}
 
-testConstraint teststringobj [llength [info commands teststringobj]]
-
 test utf-25.1 {Tcl_UniCharNcasecmp} -constraints teststringobj \
     -setup {
 	testobj freeallvars
-- 
cgit v0.12


From 1a343cd043776b8acc3c4a047a10556c70f077dd Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Sun, 19 Apr 2020 19:37:05 +0000
Subject: More test-cases. Fix wrong quoting in testcase utf-10.5

---
 tests/utf.test | 52 ++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 44 insertions(+), 8 deletions(-)

diff --git a/tests/utf.test b/tests/utf.test
index 189b85d..946aa83 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -42,6 +42,7 @@ test utf-1.5 {Tcl_UniCharToUtf: overflowed Tcl_UniChar} testbytestring {
 test utf-1.6 {Tcl_UniCharToUtf: negative Tcl_UniChar} testbytestring {
     expr {[format %c -1] eq [testbytestring "\xEF\xBF\xBD"]}
 } 1
+
 test utf-2.1 {Tcl_UtfToUniChar: low ascii} {
     string length "abc"
 } {3}
@@ -90,7 +91,7 @@ test utf-4.2 {Tcl_NumUtfChars: length 1} {testnumutfchars testbytestring} {
     testnumutfchars [testbytestring "\xC2\xA2"]
 } {1}
 test utf-4.3 {Tcl_NumUtfChars: long string} {testnumutfchars testbytestring} {
-    testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\uA2\x4E"]
+    testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\xA2\x4E"]
 } {7}
 test utf-4.4 {Tcl_NumUtfChars: #u0000} {testnumutfchars testbytestring} {
     testnumutfchars [testbytestring "\xC0\x80"]
@@ -693,6 +694,18 @@ test utf-8.3 {Tcl_UniCharAtIndex: index > 0} {
 test utf-8.4 {Tcl_UniCharAtIndex: index > 0} {
     string index \u4E4E\u25A\xFF\u543 2
 } "\uFF"
+test utf-8.5 {Tcl_UniCharAtIndex: high surrogate} {
+    string index \uD842 0
+} "\uD842"
+test utf-8.6 {Tcl_UniCharAtIndex: low surrogate} {
+    string index \uDC42 0
+} "\uDC42"
+test utf-8.7 {Tcl_UniCharAtIndex: Emoji} compat85 {
+    string index \uD83D\uDE00 0
+} "\uD83D"
+test utf-8.8 {Tcl_UniCharAtIndex: Emoji} compat85 {
+    string index \uD83D\uDE00 1
+} "\uDE00"
 
 test utf-9.1 {Tcl_UtfAtIndex: index = 0} {
     string range abcd 0 2
@@ -700,6 +713,12 @@ test utf-9.1 {Tcl_UtfAtIndex: index = 0} {
 test utf-9.2 {Tcl_UtfAtIndex: index > 0} {
     string range \u4E4E\u25A\xFF\u543klmnop 1 5
 } "\u25A\xFF\u543kl"
+test utf-9.3 {Tcl_UtfAtIndex: index = 0, Emoji} compat85 {
+    string range \uD83D\uDE00G 0 0
+} "\uD83D"
+test utf-9.4 {Tcl_UtfAtIndex: index > 0, Emoji} compat85 {
+    string range \uD83D\uDE00G 1 1
+} "\uDE00"
 
 
 test utf-10.1 {Tcl_UtfBackslash: dst == NULL} {
@@ -716,7 +735,7 @@ test utf-10.4 {Tcl_UtfBackslash: stops at first non-hex} testbytestring {
     expr {"\u4E2k" eq "[testbytestring \xD3\xA2]k"}
 } 1
 test utf-10.5 {Tcl_UtfBackslash: stops after 4 hex chars} testbytestring {
-    expr {"\u4E216" eq [testbytestring "\xE4\xB8\xA1"]6}
+    expr {"\u4E216" eq "[testbytestring \xE4\xB8\xA1]6"}
 } 1
 proc bsCheck {char num} {
     global errNum
@@ -775,8 +794,8 @@ test utf-11.3 {Tcl_UtfToUpper} {
     string toupper \xE3gh
 } \xC3GH
 test utf-11.4 {Tcl_UtfToUpper} {
-    string toupper \u01E3ab
-} \u01E2AB
+    string toupper \u01E3gh
+} \u01E2GH
 
 test utf-12.1 {Tcl_UtfToLower} {
     string tolower {}
@@ -788,8 +807,14 @@ test utf-12.3 {Tcl_UtfToLower} {
     string tolower \xC3GH
 } \xE3gh
 test utf-12.4 {Tcl_UtfToLower} {
-    string tolower \u01E2AB
-} \u01E3ab
+    string tolower \u01E2GH
+} \u01E3gh
+test utf-12.5 {Tcl_UtfToLower Georgian (new in Unicode 11)} {
+    string tolower \u10D0\u1C90
+} \u10D0\u10D0
+test utf-12.6 {Tcl_UtfToUpper low/high surrogate)} {
+    string tolower \uDC24\uD824
+} \uDC24\uD824
 
 test utf-13.1 {Tcl_UtfToTitle} {
     string totitle {}
@@ -803,6 +828,15 @@ test utf-13.3 {Tcl_UtfToTitle} {
 test utf-13.4 {Tcl_UtfToTitle} {
     string totitle \u01F3AB
 } \u01F2ab
+test utf-13.5 {Tcl_UtfToTitle Georgian (new in Unicode 11)} {
+    string totitle \u10D0\u1C90
+} \u10D0\u1C90
+test utf-13.6 {Tcl_UtfToTitle Georgian (new in Unicode 11)} {
+    string totitle \u1C90\u10D0
+} \u1C90\u10D0
+test utf-13.7 {Tcl_UtfToTitle low/high surrogate)} {
+    string totitle \uDC24\uD824
+} \uDC24\uD824
 
 test utf-14.1 {Tcl_UtfNcasecmp} {
     string compare -nocase a b
@@ -854,9 +888,11 @@ test utf-18.5 {Tcl_UniCharToTitle, no delta} {
     string totitle !
 } !
 
-test utf-19.1 {TclUniCharLen} {
+test utf-19.1 {TclUniCharLen} -body {
     list [regexp \\d abc456def foo] $foo
-} {1 4}
+} -cleanup {
+    unset -nocomplain foo
+} -result {1 4}
 
 test utf-20.1 {TclUniCharNcmp} {
 } {}
-- 
cgit v0.12


From 9bcd48e5ffd32d1858e4d0f90a4eaee550ede17f Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Sun, 19 Apr 2020 22:02:10 +0000
Subject: typo

---
 tests/utf.test | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/utf.test b/tests/utf.test
index 946aa83..07863b9 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -103,7 +103,7 @@ test utf-4.6 {Tcl_NumUtfChars: length 1, calc len} {testnumutfchars testbytestri
     testnumutfchars [testbytestring "\xC2\xA2"] 1
 } {1}
 test utf-4.7 {Tcl_NumUtfChars: long string, calc len} {testnumutfchars testbytestring} {
-    testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\uA2\x4E"] 10
+    testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\xA2\x4E"] 10
 } {7}
 test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} {testnumutfchars testbytestring} {
     testnumutfchars [testbytestring "\xC0\x80"] 1
-- 
cgit v0.12


From bb5381f946565a91e146910d62c56b40c02c5193 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Mon, 20 Apr 2020 05:35:54 +0000
Subject: Reconcile tests to the 8.5 branch history.

---
 tests/utf.test | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/utf.test b/tests/utf.test
index 07863b9..1ca3647 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -621,7 +621,7 @@ test utf-7.36 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
 test utf-7.37 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
     testutfprev A\xE0\xA0\x80 3
 } 1
-test utf-7.38 {Tcl_UtfPrev -- overlong sequence}  {testutfprev compat85} {
+test utf-7.38 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
     testutfprev A\xE0\xA0\x80 2
 } 1
 test utf-7.39 {Tcl_UtfPrev -- overlong sequence}  {testutfprev compat85} {
-- 
cgit v0.12


From 534db753aefcbe8cbdbec69611e9c6e31ea3deec Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Mon, 20 Apr 2020 06:45:47 +0000
Subject: Backport the encoding fix for source-7.2 in TCL_UTF_MAX=6 build.

---
 generic/tclEncoding.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 6c16827..5a9d2d5 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -2470,20 +2470,33 @@ UtfToUnicodeProc(
 	if (dst > dstEnd) {
 	    result = TCL_CONVERT_NOSPACE;
 	    break;
-        }
+	}
 	src += TclUtfToUniChar(src, &ch);
 	/*
 	 * Need to handle this in a way that won't cause misalignment
 	 * by casting dst to a Tcl_UniChar. [Bug 1122671]
-	 * XXX: This hard-codes the assumed size of Tcl_UniChar as 2.
 	 */
 #ifdef WORDS_BIGENDIAN
+#if TCL_UTF_MAX > 4
+	*dst++ = (ch >> 24);
+	*dst++ = ((ch >> 16) & 0xFF);
+	*dst++ = ((ch >> 8) & 0xFF);
+	*dst++ = (ch & 0xFF);
+#else
 	*dst++ = (ch >> 8);
 	*dst++ = (ch & 0xFF);
+#endif
+#else
+#if TCL_UTF_MAX > 4
+	*dst++ = (ch & 0xFF);
+	*dst++ = ((ch >> 8) & 0xFF);
+	*dst++ = ((ch >> 16) & 0xFF);
+	*dst++ = (ch >> 24);
 #else
 	*dst++ = (ch & 0xFF);
 	*dst++ = (ch >> 8);
 #endif
+#endif
     }
     *srcReadPtr = src - srcStart;
     *dstWrotePtr = dst - dstStart;
-- 
cgit v0.12


From 0424b820bc8101075ba4673a8d07df870348f134 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Mon, 20 Apr 2020 07:34:28 +0000
Subject: Backport the fix for encoding-16.1 in a TCL_UTF_MAX=6 build.

---
 generic/tclEncoding.c | 240 +++++++++++++++++++++++++++-----------------------
 1 file changed, 128 insertions(+), 112 deletions(-)

diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 5a9d2d5..da03055 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -83,7 +83,7 @@ typedef struct TableEncodingData {
 } TableEncodingData;
 
 /*
- * The following structures is the clientData for a dynamically-loaded,
+ * Each of the following structures is the clientData for a dynamically-loaded
  * escape-driven encoding that is itself comprised of other simpler encodings.
  * An example is "iso-2022-jp", which uses escape sequences to switch between
  * ascii, jis0208, jis0212, gb2312, and ksc5601. Note that "escape-driven"
@@ -117,8 +117,8 @@ typedef struct EscapeEncodingData {
 				 * 0. */
     int numSubTables;		/* Length of following array. */
     EscapeSubTable subTables[1];/* Information about each EscapeSubTable used
-				 * by this encoding type. The actual size will
-				 * be as large as necessary to hold all
+				 * by this encoding type. The actual size is
+				 * as large as necessary to hold all
 				 * EscapeSubTables. */
 } EscapeEncodingData;
 
@@ -156,7 +156,7 @@ static ProcessGlobalValue encodingFileMap = {
  * A list of directories making up the "library path". Historically this
  * search path has served many uses, but the only one remaining is a base for
  * the encodingSearchPath above. If the application does not explicitly set
- * the encodingSearchPath, then it will be initialized by appending /encoding
+ * the encodingSearchPath, then it is initialized by appending /encoding
  * to each directory in this "libraryPath".
  */
 
@@ -177,7 +177,7 @@ TCL_DECLARE_MUTEX(encodingMutex)
 /*
  * The following are used to hold the default and current system encodings.
  * If NULL is passed to one of the conversion routines, the current setting of
- * the system encoding will be used to perform the conversion.
+ * the system encoding is used to perform the conversion.
  */
 
 static Tcl_Encoding defaultEncoding;
@@ -429,9 +429,8 @@ TclGetLibraryPath(void)
  *	Keeps the per-thread copy of the library path current with changes to
  *	the global copy.
  *
- *	NOTE: this routine returns void, so there's no way to report the error
- *	that searchPath is not a valid list. In that case, this routine will
- *	silently do nothing.
+ *	Since the result of this routine is void, if searchPath is not a valid
+ *	list this routine silently does nothing.
  *
  *----------------------------------------------------------------------
  */
@@ -453,17 +452,16 @@ TclSetLibraryPath(
  *
  * FillEncodingFileMap --
  *
- * 	Called to bring the encoding file map in sync with the current value
- * 	of the encoding search path.
+ *	Called to update the encoding file map with the current value
+ *	of the encoding search path.
  *
- *	Scan the directories on the encoding search path, find the *.enc
- *	files, and store the found pathnames in a map associated with the
- *	encoding name.
+ *	Finds *.end files in the directories on the encoding search path and
+ *	stores the found pathnames in a map associated with the encoding name.
  *
- *	In particular, if $dir is on the encoding search path, and the file
- *	$dir/foo.enc is found, then store a "foo" -> $dir entry in the map.
- *	Later, any need for the "foo" encoding will quickly * be able to
- *	construct the $dir/foo.enc pathname for reading the encoding data.
+ *	If $dir is on the encoding search path and the file $dir/foo.enc is
+ *	found, stores a "foo" -> $dir entry in the map.  if the "foo" encoding
+ *	is needed later, the $dir/foo.enc name can be quickly constructed in
+ *	order to read the encoding data.
  *
  * Results:
  *	None.
@@ -544,19 +542,24 @@ void
 TclInitEncodingSubsystem(void)
 {
     Tcl_EncodingType type;
+    union {
+        char c;
+        short s;
+    } isLe;
 
     if (encodingsInitialized) {
 	return;
     }
 
+    isLe.s = 1;
     Tcl_MutexLock(&encodingMutex);
     Tcl_InitHashTable(&encodingTable, TCL_STRING_KEYS);
     Tcl_MutexUnlock(&encodingMutex);
 
     /*
-     * Create a few initial encodings. Note that the UTF-8 to UTF-8
-     * translation is not a no-op, because it will turn a stream of improperly
-     * formed UTF-8 into a properly formed stream.
+     * Create a few initial encodings.  UTF-8 to UTF-8 translation is not a
+     * no-op because it turns a stream of improperly formed UTF-8 into a
+     * properly formed stream.
      */
 
     type.encodingName	= "identity";
@@ -583,7 +586,7 @@ TclInitEncodingSubsystem(void)
     type.fromUtfProc    = UtfToUnicodeProc;
     type.freeProc	= NULL;
     type.nullSize	= 2;
-    type.clientData	= NULL;
+    type.clientData	= INT2PTR(isLe.c);
     Tcl_CreateEncoding(&type);
 
     /*
@@ -755,11 +758,7 @@ Tcl_SetDefaultEncodingDir(
  *	interp was NULL.
  *
  * Side effects:
- *	The new encoding type is entered into a table visible to all
- *	interpreters, keyed off the encoding's name. For each call to this
- *	function, there should eventually be a call to Tcl_FreeEncoding, so
- *	that the database can be cleaned up when encodings aren't needed
- *	anymore.
+ *	LoadEncodingFile is called if necessary.
  *
  *-------------------------------------------------------------------------
  */
@@ -797,15 +796,15 @@ Tcl_GetEncoding(
  *
  * Tcl_FreeEncoding --
  *
- *	This function is called to release an encoding allocated by
- *	Tcl_CreateEncoding() or Tcl_GetEncoding().
+ *	Releases an encoding allocated by Tcl_CreateEncoding() or
+ *	Tcl_GetEncoding().
  *
  * Results:
  *	None.
  *
  * Side effects:
  *	The reference count associated with the encoding is decremented and
- *	the encoding may be deleted if nothing is using it anymore.
+ *	the encoding is deleted if nothing is using it anymore.
  *
  *---------------------------------------------------------------------------
  */
@@ -824,13 +823,14 @@ Tcl_FreeEncoding(
  *
  * FreeEncoding --
  *
- *	This function is called to release an encoding by functions that
- *	already have the encodingMutex.
+ *	Decrements the reference count of an encoding.  The caller must hold
+ *	encodingMutes.
  *
  * Results:
  *	None.
  *
  * Side effects:
+ *	Releases the resource for an encoding if it is now unused.
  *	The reference count associated with the encoding is decremented and
  *	the encoding may be deleted if nothing is using it anymore.
  *
@@ -850,16 +850,17 @@ FreeEncoding(
     if (encodingPtr->refCount<=0) {
 	Tcl_Panic("FreeEncoding: refcount problem !!!");
     }
-    encodingPtr->refCount--;
-    if (encodingPtr->refCount == 0) {
+    if (encodingPtr->refCount-- <= 1) {
 	if (encodingPtr->freeProc != NULL) {
 	    (*encodingPtr->freeProc)(encodingPtr->clientData);
 	}
 	if (encodingPtr->hPtr != NULL) {
 	    Tcl_DeleteHashEntry(encodingPtr->hPtr);
 	}
-	ckfree((char *) encodingPtr->name);
-	ckfree((char *) encodingPtr);
+	if (encodingPtr->name) {
+	    ckfree((char *)encodingPtr->name);
+	}
+	ckfree((char *)encodingPtr);
     }
 }
 
@@ -1020,23 +1021,22 @@ Tcl_SetSystemEncoding(
  *
  * Tcl_CreateEncoding --
  *
- *	This function is called to define a new encoding and the functions
- *	that are used to convert between the specified encoding and Unicode.
+ *	Defines a new encoding, along with the functions that are used to
+ *	convert to and from Unicode.
  *
  * Results:
  *	Returns a token that represents the encoding. If an encoding with the
  *	same name already existed, the old encoding token remains valid and
- *	continues to behave as it used to, and will eventually be garbage
- *	collected when the last reference to it goes away. Any subsequent
- *	calls to Tcl_GetEncoding with the specified name will retrieve the
- *	most recent encoding token.
+ *	continues to behave as it used to, and is eventually garbage collected
+ *	when the last reference to it goes away. Any subsequent calls to
+ *	Tcl_GetEncoding with the specified name retrieve the most recent
+ *	encoding token.
  *
  * Side effects:
- *	The new encoding type is entered into a table visible to all
- *	interpreters, keyed off the encoding's name. For each call to this
- *	function, there should eventually be a call to Tcl_FreeEncoding, so
- *	that the database can be cleaned up when encodings aren't needed
- *	anymore.
+ *	A new record having the name of the encoding is entered into a table of
+ *	encodings visible to all interpreters.  For each call to this function,
+ *	there should eventually be a call to Tcl_FreeEncoding, which cleans
+ *	deletes the record in the table when an encoding is no longer needed.
  *
  *---------------------------------------------------------------------------
  */
@@ -1258,10 +1258,9 @@ Tcl_ExternalToUtf(
  *
  * Tcl_UtfToExternalDString --
  *
- *	Convert a source buffer from UTF-8 into the specified encoding. If any
+ *	Convert a source buffer from UTF-8 to the specified encoding. If any
  *	of the bytes in the source buffer are invalid or cannot be represented
- *	in the target encoding, a default fallback character will be
- *	substituted.
+ *	in the target encoding, a default fallback character is substituted.
  *
  * Results:
  *	The converted bytes are stored in the DString, which is then NULL
@@ -1570,13 +1569,13 @@ OpenEncodingFileChannel(
  *	the data.
  *
  * Results:
- *	The return value is the newly loaded Encoding, or NULL if the file
- *	didn't exist of was in the incorrect format. If NULL was returned, an
- *	error message is left in interp's result object, unless interp was
- *	NULL.
+ *	The return value is the newly loaded Tcl_Encoding or NULL if the file
+ *	didn't exist or could not be processed. If NULL is returned and interp
+ *	is not NULL, an error message is left in interp's result object.
  *
  * Side effects:
- *	File read from disk.
+ *	A corresponding encoding file might be read from persistent storage, in
+ *	which case LoadTableEncoding is called.
  *
  *---------------------------------------------------------------------------
  */
@@ -1584,8 +1583,8 @@ OpenEncodingFileChannel(
 static Tcl_Encoding
 LoadEncodingFile(
     Tcl_Interp *interp,		/* Interp for error reporting, if not NULL. */
-    const char *name)		/* The name of the encoding file on disk and
-				 * also the name for new encoding. */
+    const char *name)		/* The name of both the encoding file
+				 * and the new encoding. */
 {
     Tcl_Channel chan = NULL;
     Tcl_Encoding encoding = NULL;
@@ -1637,27 +1636,27 @@ LoadEncodingFile(
  *
  * LoadTableEncoding --
  *
- *	Helper function for LoadEncodingTable(). Loads a table to that
- *	converts between Unicode and some other encoding and creates an
- *	encoding (using a TableEncoding structure) from that information.
+ *	Helper function for LoadEncodingFile().  Creates a Tcl_EncodingType
+ *	structure along with its corresponding TableEncodingData structure, and
+ *	passes it to Tcl_Createncoding.
  *
- *	File contains binary data, but begins with a marker to indicate
- *	byte-ordering, so that same binary file can be read on either endian
- *	platforms.
+ *	The file contains binary data but begins with a marker to indicate
+ *	byte-ordering so a single binary file can be read on big or
+ *	little-endian systems.
  *
  * Results:
- *	The return value is the new encoding, or NULL if the encoding could
- *	not be created (because the file contained invalid data).
+ *	Returns the new Tcl_Encoding,  or NULL if it could could
+ *	not be created because the file contained invalid data.
  *
  * Side effects:
- *	None.
+ *	See Tcl_CreateEncoding().
  *
  *-------------------------------------------------------------------------
  */
 
 static Tcl_Encoding
 LoadTableEncoding(
-    const char *name,		/* Name for new encoding. */
+    const char *name,		/* Name of the new encoding. */
     int type,			/* Type of encoding (ENCODING_?????). */
     Tcl_Channel chan)		/* File containing new encoding. */
 {
@@ -1769,10 +1768,10 @@ LoadTableEncoding(
     }
 
     /*
-     * Invert toUnicode array to produce the fromUnicode array. Performs a
+     * Invert the toUnicode array to produce the fromUnicode array. Performs a
      * single malloc to get the memory for the array and all the pages needed
-     * by the array. While reading in the toUnicode array, we remembered what
-     * pages that would be needed for the fromUnicode array.
+     * by the array. While reading in the toUnicode array remember what
+     * pages are needed for the fromUnicode array.
      */
 
     if (symbol) {
@@ -1814,8 +1813,8 @@ LoadTableEncoding(
     if (type == ENCODING_MULTIBYTE) {
 	/*
 	 * If multibyte encodings don't have a backslash character, define
-	 * one. Otherwise, on Windows, native file names won't work because
-	 * the backslash in the file name will map to the unknown character
+	 * one. Otherwise, on Windows, native file names don't work because
+	 * the backslash in the file name maps to the unknown character
 	 * (question mark) when converting from UTF-8 to external encoding.
 	 */
 
@@ -1829,13 +1828,13 @@ LoadTableEncoding(
 	unsigned short *page;
 
 	/*
-	 * Make a special symbol encoding that not only maps the symbol
-	 * characters from their Unicode code points down into page 0, but
-	 * also ensure that the characters on page 0 map to themselves. This
-	 * is so that a symbol font can be used to display a simple string
-	 * like "abcd" and have alpha, beta, chi, delta show up, rather than
-	 * have "unknown" chars show up because strictly speaking the symbol
-	 * font doesn't have glyphs for those low ascii chars.
+	 * Make a special symbol encoding that maps each symbol character from
+	 * its Unicode code point down into page 0, and also ensure that each
+	 * characters on page 0 maps to itself so that a symbol font can be
+	 * used to display a simple string like "abcd" and have alpha, beta,
+	 * chi, delta show up, rather than have "unknown" chars show up because
+	 * strictly speaking the symbol font doesn't have glyphs for those low
+	 * ASCII chars.
 	 */
 
 	page = dataPtr->fromUnicode[0];
@@ -1939,7 +1938,7 @@ LoadTableEncoding(
 
 static Tcl_Encoding
 LoadEscapeEncoding(
-    const char *name,		/* Name for new encoding. */
+    const char *name,		/* Name of the new encoding. */
     Tcl_Channel chan)		/* File containing new encoding. */
 {
     int i;
@@ -2318,7 +2317,7 @@ UtfToUtfProc(
  *
  * UnicodeToUtfProc --
  *
- *	Convert from Unicode to UTF-8.
+ *	Convert from UTF-16 to UTF-8.
  *
  * Results:
  *	Returns TCL_OK if conversion was successful.
@@ -2331,7 +2330,7 @@ UtfToUtfProc(
 
 static int
 UnicodeToUtfProc(
-    ClientData clientData,	/* Not used. */
+    ClientData clientData,	/* != NULL means LE, == NUL means BE */
     const char *src,		/* Source string in Unicode. */
     int srcLen,			/* Source string length in bytes. */
     int flags,			/* Conversion control flags. */
@@ -2359,13 +2358,19 @@ UnicodeToUtfProc(
     const char *srcStart, *srcEnd;
     char *dstEnd, *dstStart;
     int result, numChars;
-    Tcl_UniChar ch;
+    unsigned short ch;
 
     result = TCL_OK;
-    if ((srcLen % sizeof(Tcl_UniChar)) != 0) {
+
+    /* check alignment with utf-16 (2 == sizeof(UTF-16)) */
+    if ((srcLen % 2) != 0) {
+	result = TCL_CONVERT_MULTIBYTE;
+	srcLen--;
+    }
+    /* If last code point is a high surrogate, we cannot handle that yet */
+    if ((srcLen >= 2) && ((src[srcLen - (clientData?1:2)] & 0xFC) == 0xD8)) {
 	result = TCL_CONVERT_MULTIBYTE;
-	srcLen /= sizeof(Tcl_UniChar);
-	srcLen *= sizeof(Tcl_UniChar);
+	srcLen-= 2;
     }
 
     srcStart = src;
@@ -2379,17 +2384,21 @@ UnicodeToUtfProc(
 	    result = TCL_CONVERT_NOSPACE;
 	    break;
 	}
+	if (clientData) {
+	    ch = (src[1] & 0xFF) << 8 | (src[0] & 0xFF);
+	} else {
+	    ch = (src[0] & 0xFF) << 8 | (src[1] & 0xFF);
+	}
 	/*
-	 * Special case for 1-byte utf chars for speed.  Make sure we
-	 * work with Tcl_UniChar-size data.
+	 * Special case for 1-byte utf chars for speed. Make sure we work with
+	 * unsigned short-size data.
 	 */
-	ch = *(Tcl_UniChar *)src;
 	if (ch && ch < 0x80) {
 	    *dst++ = (ch & 0xFF);
 	} else {
 	    dst += Tcl_UniCharToUtf(ch, dst);
 	}
-	src += sizeof(Tcl_UniChar);
+	src += sizeof(unsigned short);
     }
 
     *srcReadPtr = src - srcStart;
@@ -2403,7 +2412,7 @@ UnicodeToUtfProc(
  *
  * UtfToUnicodeProc --
  *
- *	Convert from UTF-8 to Unicode.
+ *	Convert from UTF-8 to UTF-16.
  *
  * Results:
  *	Returns TCL_OK if conversion was successful.
@@ -2416,8 +2425,7 @@ UnicodeToUtfProc(
 
 static int
 UtfToUnicodeProc(
-    ClientData clientData,	/* TableEncodingData that specifies
-				 * encoding. */
+    ClientData clientData,	/* != NULL means LE, == NUL means BE */
     const char *src,		/* Source string in UTF-8. */
     int srcLen,			/* Source string length in bytes. */
     int flags,			/* Conversion control flags. */
@@ -2444,7 +2452,7 @@ UtfToUnicodeProc(
 {
     const char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd;
     int result, numChars;
-    Tcl_UniChar ch;
+    Tcl_UniChar ch = 0;
 
     srcStart = src;
     srcEnd = src + srcLen;
@@ -2476,27 +2484,37 @@ UtfToUnicodeProc(
 	 * Need to handle this in a way that won't cause misalignment
 	 * by casting dst to a Tcl_UniChar. [Bug 1122671]
 	 */
-#ifdef WORDS_BIGENDIAN
+	if (clientData) {
 #if TCL_UTF_MAX > 4
-	*dst++ = (ch >> 24);
-	*dst++ = ((ch >> 16) & 0xFF);
-	*dst++ = ((ch >> 8) & 0xFF);
-	*dst++ = (ch & 0xFF);
+	    if (ch <= 0xFFFF) {
+		*dst++ = (ch & 0xFF);
+		*dst++ = (ch >> 8);
+	    } else {
+		*dst++ = (((ch - 0x10000) >> 10) & 0xFF);
+		*dst++ = (((ch - 0x10000) >> 18) & 0x3) | 0xD8;
+		*dst++ = (ch & 0xFF);
+		*dst++ = ((ch & 0x3) >> 8) | 0xDC;
+	    }
 #else
-	*dst++ = (ch >> 8);
-	*dst++ = (ch & 0xFF);
+	    *dst++ = (ch & 0xFF);
+	    *dst++ = (ch >> 8);
 #endif
-#else
+	} else {
 #if TCL_UTF_MAX > 4
-	*dst++ = (ch & 0xFF);
-	*dst++ = ((ch >> 8) & 0xFF);
-	*dst++ = ((ch >> 16) & 0xFF);
-	*dst++ = (ch >> 24);
+	    if (ch <= 0xFFFF) {
+		*dst++ = (ch >> 8);
+		*dst++ = (ch & 0xFF);
+	    } else {
+		*dst++ = ((ch & 0x3) >> 8) | 0xDC;
+		*dst++ = (ch & 0xFF);
+		*dst++ = (((ch - 0x10000) >> 18) & 0x3) | 0xD8;
+		*dst++ = (((ch - 0x10000) >> 10) & 0xFF);
+	    }
 #else
-	*dst++ = (ch & 0xFF);
-	*dst++ = (ch >> 8);
-#endif
+	    *dst++ = (ch >> 8);
+	    *dst++ = (ch & 0xFF);
 #endif
+	}
     }
     *srcReadPtr = src - srcStart;
     *dstWrotePtr = dst - dstStart;
@@ -2899,7 +2917,6 @@ Iso88591FromUtfProc(
 		result = TCL_CONVERT_UNKNOWN;
 		break;
 	    }
-
 	    /*
 	     * Plunge on, using '?' as a fallback character.
 	     */
@@ -3387,14 +3404,13 @@ EscapeFromUtfProc(
  *
  * EscapeFreeProc --
  *
- *	This function is invoked when an EscapeEncodingData encoding is
- *	deleted. It deletes the memory used by the encoding.
+ *	Frees resources used by the encoding.
  *
  * Results:
  *	None.
  *
  * Side effects:
- *	Memory freed.
+ *	Memory is freed.
  *
  *---------------------------------------------------------------------------
  */
-- 
cgit v0.12


From 2958d5196de3452ea46a083603d4ce1dc0d05d2a Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Mon, 20 Apr 2020 07:50:22 +0000
Subject: Move the needed apt package in .travis.yml to the top, so they can be
 shared between the images.

---
 .travis.yml | 25 ++++++++++---------------
 1 file changed, 10 insertions(+), 15 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index e10ca7c..5672c0b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,5 +1,15 @@
 sudo: false
 language: c
+addons:
+  apt:
+    packages:
+      - binutils-mingw-w64-i686
+      - binutils-mingw-w64-x86-64
+      - gcc-mingw-w64
+      - gcc-mingw-w64-base
+      - gcc-mingw-w64-i686
+      - gcc-mingw-w64-x86-64
+      - gcc-multilib
 matrix:
   include:
 # Testing on Linux with various compilers
@@ -146,13 +156,6 @@ matrix:
       os: linux
       dist: bionic
       compiler: x86_64-w64-mingw32-gcc
-      addons:
-        apt:
-          packages:
-            - gcc-mingw-w64-base
-            - binutils-mingw-w64-x86-64
-            - gcc-mingw-w64-x86-64
-            - gcc-mingw-w64
       env:
         - BUILD_DIR=win
         - CFGOPT="--host=x86_64-w64-mingw32 --enable-64bit --enable-threads"
@@ -167,14 +170,6 @@ matrix:
       os: linux
       dist: bionic
       compiler: i686-w64-mingw32-gcc
-      addons:
-        apt:
-          packages:
-            - gcc-mingw-w64-base
-            - binutils-mingw-w64-i686
-            - gcc-mingw-w64-i686
-            - gcc-mingw-w64
-            - gcc-multilib
       env:
         - BUILD_DIR=win
         - CFGOPT="--host=i686-w64-mingw32 --enable-threads"
-- 
cgit v0.12


From eb263e1e057977a19cf246eeda844bb4bd31342a Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Mon, 20 Apr 2020 11:30:31 +0000
Subject: Code cleanup (more typecasts), making it more compatible with
 -Wc++-compat. Less use of /* ARGUSED */.

---
 generic/tclEncoding.c | 116 ++++++++++++++++++++++----------------------------
 generic/tclUtil.c     |  61 +++++++++++++-------------
 generic/tclZlib.c     |  64 ++++++++++++++--------------
 win/tclWinPipe.c      |   1 -
 4 files changed, 114 insertions(+), 128 deletions(-)

diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index b879ec8..e012570 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -18,7 +18,7 @@ typedef size_t (LengthProc)(const char *src);
  * convert between various character sets and UTF-8.
  */
 
-typedef struct Encoding {
+typedef struct {
     char *name;			/* Name of encoding. Malloced because (1) hash
 				 * table entry that owns this encoding may be
 				 * freed prior to this encoding being freed,
@@ -57,7 +57,7 @@ typedef struct Encoding {
  * encoding.
  */
 
-typedef struct TableEncodingData {
+typedef struct {
     int fallback;		/* Character (in this encoding) to substitute
 				 * when this encoding cannot represent a UTF-8
 				 * character. */
@@ -91,7 +91,7 @@ typedef struct TableEncodingData {
  * for switching character sets.
  */
 
-typedef struct EscapeSubTable {
+typedef struct {
     unsigned sequenceLen;	/* Length of following string. */
     char sequence[16];		/* Escape code that marks this encoding. */
     char name[32];		/* Name for encoding. */
@@ -100,7 +100,7 @@ typedef struct EscapeSubTable {
 				 * yet. */
 } EscapeSubTable;
 
-typedef struct EscapeEncodingData {
+typedef struct {
     int fallback;		/* Character (in this encoding) to substitute
 				 * when this encoding cannot represent a UTF-8
 				 * character. */
@@ -195,35 +195,25 @@ static unsigned short emptyPage[256];
  * Functions used only in this module.
  */
 
-static int		BinaryProc(ClientData clientData,
-			    const char *src, int srcLen, int flags,
-			    Tcl_EncodingState *statePtr, char *dst, int dstLen,
-			    int *srcReadPtr, int *dstWrotePtr,
-			    int *dstCharsPtr);
-static void		DupEncodingIntRep(Tcl_Obj *srcPtr, Tcl_Obj *dupPtr);
-static void		EscapeFreeProc(ClientData clientData);
-static int		EscapeFromUtfProc(ClientData clientData,
-			    const char *src, int srcLen, int flags,
-			    Tcl_EncodingState *statePtr, char *dst, int dstLen,
-			    int *srcReadPtr, int *dstWrotePtr,
-			    int *dstCharsPtr);
-static int		EscapeToUtfProc(ClientData clientData,
-			    const char *src, int srcLen, int flags,
-			    Tcl_EncodingState *statePtr, char *dst, int dstLen,
-			    int *srcReadPtr, int *dstWrotePtr,
-			    int *dstCharsPtr);
-static void		FillEncodingFileMap(void);
-static void		FreeEncoding(Tcl_Encoding encoding);
-static void		FreeEncodingIntRep(Tcl_Obj *objPtr);
-static Encoding *	GetTableEncoding(EscapeEncodingData *dataPtr,
-			    int state);
-static Tcl_Encoding	LoadEncodingFile(Tcl_Interp *interp, const char *name);
-static Tcl_Encoding	LoadTableEncoding(const char *name, int type,
-			    Tcl_Channel chan);
-static Tcl_Encoding	LoadEscapeEncoding(const char *name, Tcl_Channel chan);
-static Tcl_Channel	OpenEncodingFileChannel(Tcl_Interp *interp,
-			    const char *name);
-static void		TableFreeProc(ClientData clientData);
+static Tcl_EncodingConvertProc	BinaryProc;
+static Tcl_DupInternalRepProc	DupEncodingIntRep;
+static Tcl_EncodingFreeProc	EscapeFreeProc;
+static Tcl_EncodingConvertProc	EscapeFromUtfProc;
+static Tcl_EncodingConvertProc	EscapeToUtfProc;
+static void			FillEncodingFileMap(void);
+static void			FreeEncoding(Tcl_Encoding encoding);
+static Tcl_FreeInternalRepProc	FreeEncodingIntRep;
+static Encoding *		GetTableEncoding(EscapeEncodingData *dataPtr,
+				    int state);
+static Tcl_Encoding		LoadEncodingFile(Tcl_Interp *interp,
+				    const char *name);
+static Tcl_Encoding		LoadTableEncoding(const char *name, int type,
+				    Tcl_Channel chan);
+static Tcl_Encoding		LoadEscapeEncoding(const char *name,
+				    Tcl_Channel chan);
+static Tcl_Channel		OpenEncodingFileChannel(Tcl_Interp *interp,
+				    const char *name);
+static Tcl_EncodingFreeProc	TableFreeProc;
 static int		TableFromUtfProc(ClientData clientData,
 			    const char *src, int srcLen, int flags,
 			    Tcl_EncodingState *statePtr, char *dst, int dstLen,
@@ -335,7 +325,7 @@ static void
 FreeEncodingIntRep(
     Tcl_Obj *objPtr)
 {
-    Tcl_FreeEncoding(objPtr->internalRep.twoPtrValue.ptr1);
+    Tcl_FreeEncoding((Tcl_Encoding)objPtr->internalRep.twoPtrValue.ptr1);
     objPtr->typePtr = NULL;
 }
 
@@ -596,14 +586,14 @@ TclInitEncodingSubsystem(void)
      * code to duplicate the structure of a table encoding here.
      */
 
-    dataPtr = ckalloc(sizeof(TableEncodingData));
+    dataPtr = (TableEncodingData *)ckalloc(sizeof(TableEncodingData));
     memset(dataPtr, 0, sizeof(TableEncodingData));
     dataPtr->fallback = '?';
 
     size = 256*(sizeof(unsigned short *) + sizeof(unsigned short));
-    dataPtr->toUnicode = ckalloc(size);
+    dataPtr->toUnicode = (unsigned short **)ckalloc(size);
     memset(dataPtr->toUnicode, 0, size);
-    dataPtr->fromUnicode = ckalloc(size);
+    dataPtr->fromUnicode = (unsigned short **)ckalloc(size);
     memset(dataPtr->fromUnicode, 0, size);
 
     dataPtr->toUnicode[0] = (unsigned short *) (dataPtr->toUnicode + 256);
@@ -669,7 +659,7 @@ TclFinalizeEncodingSubsystem(void)
 	 * cleaned up.
 	 */
 
-	FreeEncoding(Tcl_GetHashValue(hPtr));
+	FreeEncoding((Tcl_Encoding)Tcl_GetHashValue(hPtr));
 	hPtr = Tcl_FirstHashEntry(&encodingTable, &search);
     }
 
@@ -778,7 +768,7 @@ Tcl_GetEncoding(
 
     hPtr = Tcl_FindHashEntry(&encodingTable, name);
     if (hPtr != NULL) {
-	encodingPtr = Tcl_GetHashValue(hPtr);
+	encodingPtr = (Encoding *)Tcl_GetHashValue(hPtr);
 	encodingPtr->refCount++;
 	Tcl_MutexUnlock(&encodingMutex);
 	return (Tcl_Encoding) encodingPtr;
@@ -926,7 +916,7 @@ Tcl_GetEncodingNames(
     Tcl_MutexLock(&encodingMutex);
     for (hPtr = Tcl_FirstHashEntry(&encodingTable, &search); hPtr != NULL;
 	    hPtr = Tcl_NextHashEntry(&search)) {
-	Encoding *encodingPtr = Tcl_GetHashValue(hPtr);
+	Encoding *encodingPtr = (Encoding *)Tcl_GetHashValue(hPtr);
 
 	Tcl_CreateHashEntry(&table,
 		Tcl_NewStringObj(encodingPtr->name, -1), &dummy);
@@ -1056,13 +1046,13 @@ Tcl_CreateEncoding(
 	 * reference goes away.
 	 */
 
-	encodingPtr = Tcl_GetHashValue(hPtr);
+	encodingPtr = (Encoding *)Tcl_GetHashValue(hPtr);
 	encodingPtr->hPtr = NULL;
     }
 
-    name = ckalloc(strlen(typePtr->encodingName) + 1);
+    name = (char *)ckalloc(strlen(typePtr->encodingName) + 1);
 
-    encodingPtr = ckalloc(sizeof(Encoding));
+    encodingPtr = (Encoding *)ckalloc(sizeof(Encoding));
     encodingPtr->name		= strcpy(name, typePtr->encodingName);
     encodingPtr->toUtfProc	= typePtr->toUtfProc;
     encodingPtr->fromUtfProc	= typePtr->fromUtfProc;
@@ -1740,7 +1730,7 @@ LoadTableEncoding(
 #undef PAGESIZE
 #define PAGESIZE    (256 * sizeof(unsigned short))
 
-    dataPtr = ckalloc(sizeof(TableEncodingData));
+    dataPtr = (TableEncodingData *)ckalloc(sizeof(TableEncodingData));
     memset(dataPtr, 0, sizeof(TableEncodingData));
 
     dataPtr->fallback = fallback;
@@ -1752,7 +1742,7 @@ LoadTableEncoding(
      */
 
     size = 256 * sizeof(unsigned short *) + numPages * PAGESIZE;
-    dataPtr->toUnicode = ckalloc(size);
+    dataPtr->toUnicode = (unsigned short **)ckalloc(size);
     memset(dataPtr->toUnicode, 0, size);
     pageMemPtr = (unsigned short *) (dataPtr->toUnicode + 256);
 
@@ -1813,7 +1803,7 @@ LoadTableEncoding(
 	}
     }
     size = 256 * sizeof(unsigned short *) + numPages * PAGESIZE;
-    dataPtr->fromUnicode = ckalloc(size);
+    dataPtr->fromUnicode = (unsigned short **)ckalloc(size);
     memset(dataPtr->fromUnicode, 0, size);
     pageMemPtr = (unsigned short *) (dataPtr->fromUnicode + 256);
 
@@ -2051,7 +2041,7 @@ LoadEscapeEncoding(
 
     size = sizeof(EscapeEncodingData) - sizeof(EscapeSubTable)
 	    + Tcl_DStringLength(&escapeData);
-    dataPtr = ckalloc(size);
+    dataPtr = (EscapeEncodingData *)ckalloc(size);
     dataPtr->initLen = strlen(init);
     memcpy(dataPtr->init, init, dataPtr->initLen + 1);
     dataPtr->finalLen = strlen(final);
@@ -2661,7 +2651,7 @@ TableToUtfProc(
     Tcl_UniChar ch = 0;
     const unsigned short *const *toUnicode;
     const unsigned short *pageZero;
-    TableEncodingData *dataPtr = clientData;
+    TableEncodingData *dataPtr = (TableEncodingData *)clientData;
 
     if (flags & TCL_ENCODING_CHAR_LIMIT) {
 	charLimit = *dstCharsPtr;
@@ -2772,7 +2762,7 @@ TableFromUtfProc(
     const char *dstStart, *dstEnd, *prefixBytes;
     Tcl_UniChar ch = 0;
     int result, len, word, numChars;
-    TableEncodingData *dataPtr = clientData;
+    TableEncodingData *dataPtr = (TableEncodingData *)clientData;
     const unsigned short *const *fromUnicode;
 
     result = TCL_OK;
@@ -2979,11 +2969,9 @@ Iso88591FromUtfProc(
 {
     const char *srcStart, *srcEnd, *srcClose;
     const char *dstStart, *dstEnd;
-    int result, numChars;
+    int result = TCL_OK, numChars;
     Tcl_UniChar ch = 0;
 
-    result = TCL_OK;
-
     srcStart = src;
     srcEnd = src + srcLen;
     srcClose = srcEnd;
@@ -3067,7 +3055,7 @@ TableFreeProc(
     ClientData clientData)	/* TableEncodingData that specifies
 				 * encoding. */
 {
-    TableEncodingData *dataPtr = clientData;
+    TableEncodingData *dataPtr = (TableEncodingData *)clientData;
 
     /*
      * Make sure we aren't freeing twice on shutdown. [Bug 219314]
@@ -3125,7 +3113,7 @@ EscapeToUtfProc(
 				 * correspond to the bytes stored in the
 				 * output buffer. */
 {
-    EscapeEncodingData *dataPtr = clientData;
+    EscapeEncodingData *dataPtr = (EscapeEncodingData *)clientData;
     const char *prefixBytes, *tablePrefixBytes, *srcStart, *srcEnd;
     const unsigned short *const *tableToUnicode;
     const Encoding *encodingPtr;
@@ -3136,8 +3124,8 @@ EscapeToUtfProc(
 	charLimit = *dstCharsPtr;
     }
     result = TCL_OK;
-    tablePrefixBytes = NULL;	/* lint. */
-    tableToUnicode = NULL;	/* lint. */
+    tablePrefixBytes = NULL;
+    tableToUnicode = NULL;
     prefixBytes = dataPtr->prefixBytes;
     encodingPtr = NULL;
 
@@ -3261,7 +3249,7 @@ EscapeToUtfProc(
 	    TableEncodingData *tableDataPtr;
 
 	    encodingPtr = GetTableEncoding(dataPtr, state);
-	    tableDataPtr = encodingPtr->clientData;
+	    tableDataPtr = (TableEncodingData *)encodingPtr->clientData;
 	    tablePrefixBytes = tableDataPtr->prefixBytes;
 	    tableToUnicode = (const unsigned short *const*)
 		    tableDataPtr->toUnicode;
@@ -3339,7 +3327,7 @@ EscapeFromUtfProc(
 				 * correspond to the bytes stored in the
 				 * output buffer. */
 {
-    EscapeEncodingData *dataPtr = clientData;
+    EscapeEncodingData *dataPtr = (EscapeEncodingData *)clientData;
     const Encoding *encodingPtr;
     const char *srcStart, *srcEnd, *srcClose;
     const char *dstStart, *dstEnd;
@@ -3380,7 +3368,7 @@ EscapeFromUtfProc(
     }
 
     encodingPtr = GetTableEncoding(dataPtr, state);
-    tableDataPtr = encodingPtr->clientData;
+    tableDataPtr = (const TableEncodingData *)encodingPtr->clientData;
     tablePrefixBytes = tableDataPtr->prefixBytes;
     tableFromUnicode = (const unsigned short *const *)
 	    tableDataPtr->fromUnicode;
@@ -3408,7 +3396,7 @@ EscapeFromUtfProc(
 	    oldState = state;
 	    for (state = 0; state < dataPtr->numSubTables; state++) {
 		encodingPtr = GetTableEncoding(dataPtr, state);
-		tableDataPtr = encodingPtr->clientData;
+		tableDataPtr = (const TableEncodingData *)encodingPtr->clientData;
 		word = tableDataPtr->fromUnicode[(ch >> 8)][ch & 0xFF];
 		if (word != 0) {
 		    break;
@@ -3422,7 +3410,7 @@ EscapeFromUtfProc(
 		    break;
 		}
 		encodingPtr = GetTableEncoding(dataPtr, state);
-		tableDataPtr = encodingPtr->clientData;
+		tableDataPtr = (const TableEncodingData *)encodingPtr->clientData;
 		word = tableDataPtr->fallback;
 	    }
 
@@ -3528,7 +3516,7 @@ EscapeFreeProc(
     ClientData clientData)	/* EscapeEncodingData that specifies
 				 * encoding. */
 {
-    EscapeEncodingData *dataPtr = clientData;
+    EscapeEncodingData *dataPtr = (EscapeEncodingData *)clientData;
     EscapeSubTable *subTablePtr;
     int i;
 
@@ -3693,8 +3681,8 @@ InitializeEncodingSearchPath(
     bytes = Tcl_GetStringFromObj(searchPathObj, &numBytes);
 
     *lengthPtr = numBytes;
-    *valuePtr = ckalloc(numBytes + 1);
-    memcpy(*valuePtr, bytes, (size_t) numBytes + 1);
+    *valuePtr = (char *)ckalloc(numBytes + 1);
+    memcpy(*valuePtr, bytes, numBytes + 1);
     Tcl_DecrRefCount(searchPathObj);
 }
 
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index 4139804..13b0d55 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -577,7 +577,7 @@ FindElement(
     const char *limit;		/* Points just after list/dict's last byte. */
     int openBraces = 0;		/* Brace nesting level during parse. */
     int inQuotes = 0;
-    int size = 0;		/* lint. */
+    int size = 0;
     int numChars;
     int literal = 1;
     const char *p2;
@@ -869,7 +869,7 @@ Tcl_SplitList(
 
     size = TclMaxListLength(list, -1, &end) + 1;
     length = end - list;
-    argv = ckalloc((size * sizeof(char *)) + length + 1);
+    argv = (const char **)ckalloc((size * sizeof(char *)) + length + 1);
 
     for (i = 0, p = ((char *) argv) + size*sizeof(char *);
 	    *list != 0;  i++) {
@@ -1577,7 +1577,7 @@ Tcl_Merge(
      */
 
     if (argc == 0) {
-	result = ckalloc(1);
+	result = (char *)ckalloc(1);
 	result[0] = '\0';
 	return result;
     }
@@ -1589,7 +1589,7 @@ Tcl_Merge(
     if (argc <= LOCAL_SIZE) {
 	flagPtr = localFlags;
     } else {
-	flagPtr = ckalloc(argc);
+	flagPtr = (char *)ckalloc(argc);
     }
     for (i = 0; i < argc; i++) {
 	flagPtr[i] = ( i ? TCL_DONT_QUOTE_HASH : 0 );
@@ -1607,7 +1607,7 @@ Tcl_Merge(
      * Pass two: copy into the result area.
      */
 
-    result = ckalloc(bytesNeeded);
+    result = (char *)ckalloc(bytesNeeded);
     dst = result;
     for (i = 0; i < argc; i++) {
 	flagPtr[i] |= ( i ? TCL_DONT_QUOTE_HASH : 0 );
@@ -1931,7 +1931,7 @@ Tcl_Concat(
      * All element bytes + (argc - 1) spaces + 1 terminating NULL.
      */
 
-    result = ckalloc(bytesNeeded + argc);
+    result = (char *)ckalloc(bytesNeeded + argc);
 
     for (p = result, i = 0;  i < argc;  i++) {
 	int triml, trimr, elemLength;
@@ -2666,7 +2666,7 @@ Tcl_DStringAppend(
     if (newSize >= dsPtr->spaceAvl) {
 	dsPtr->spaceAvl = newSize * 2;
 	if (dsPtr->string == dsPtr->staticSpace) {
-	    char *newString = ckalloc(dsPtr->spaceAvl);
+	    char *newString = (char *)ckalloc(dsPtr->spaceAvl);
 
 	    memcpy(newString, dsPtr->string, dsPtr->length);
 	    dsPtr->string = newString;
@@ -2679,7 +2679,7 @@ Tcl_DStringAppend(
 		offset = bytes - dsPtr->string;
 	    }
 
-	    dsPtr->string = ckrealloc(dsPtr->string, dsPtr->spaceAvl);
+	    dsPtr->string = (char *)ckrealloc(dsPtr->string, dsPtr->spaceAvl);
 
 	    if (offset >= 0) {
 		bytes = dsPtr->string + offset;
@@ -2797,7 +2797,7 @@ Tcl_DStringAppendElement(
     if (newSize >= dsPtr->spaceAvl) {
 	dsPtr->spaceAvl = newSize * 2;
 	if (dsPtr->string == dsPtr->staticSpace) {
-	    char *newString = ckalloc(dsPtr->spaceAvl);
+	    char *newString = (char *)ckalloc(dsPtr->spaceAvl);
 
 	    memcpy(newString, dsPtr->string, dsPtr->length);
 	    dsPtr->string = newString;
@@ -2810,7 +2810,7 @@ Tcl_DStringAppendElement(
 		offset = element - dsPtr->string;
 	    }
 
-	    dsPtr->string = ckrealloc(dsPtr->string, dsPtr->spaceAvl);
+	    dsPtr->string = (char *)ckrealloc(dsPtr->string, dsPtr->spaceAvl);
 
 	    if (offset >= 0) {
 		element = dsPtr->string + offset;
@@ -2884,12 +2884,12 @@ Tcl_DStringSetLength(
 	    dsPtr->spaceAvl = length + 1;
 	}
 	if (dsPtr->string == dsPtr->staticSpace) {
-	    char *newString = ckalloc(dsPtr->spaceAvl);
+	    char *newString = (char *)ckalloc(dsPtr->spaceAvl);
 
 	    memcpy(newString, dsPtr->string, dsPtr->length);
 	    dsPtr->string = newString;
 	} else {
-	    dsPtr->string = ckrealloc(dsPtr->string, dsPtr->spaceAvl);
+	    dsPtr->string = (char *)ckrealloc(dsPtr->string, dsPtr->spaceAvl);
 	}
     }
     dsPtr->length = length;
@@ -3034,7 +3034,7 @@ Tcl_DStringGetResult(
 	    dsPtr->string = iPtr->result;
 	    dsPtr->spaceAvl = dsPtr->length+1;
 	} else {
-	    dsPtr->string = ckalloc(dsPtr->length+1);
+	    dsPtr->string = (char *)ckalloc(dsPtr->length+1);
 	    memcpy(dsPtr->string, iPtr->result, dsPtr->length+1);
 	    iPtr->freeProc(iPtr->result);
 	}
@@ -3045,7 +3045,7 @@ Tcl_DStringGetResult(
 	    dsPtr->string = dsPtr->staticSpace;
 	    dsPtr->spaceAvl = TCL_DSTRING_STATIC_SIZE;
 	} else {
-	    dsPtr->string = ckalloc(dsPtr->length+1);
+	    dsPtr->string = (char *)ckalloc(dsPtr->length+1);
 	    dsPtr->spaceAvl = dsPtr->length + 1;
 	}
 	memcpy(dsPtr->string, iPtr->result, dsPtr->length+1);
@@ -3203,7 +3203,7 @@ Tcl_PrintDouble(
     int signum;
     char *digits;
     char *end;
-    int *precisionPtr = Tcl_GetThreadData(&precisionKey, sizeof(int));
+    int *precisionPtr = (int *)Tcl_GetThreadData(&precisionKey, sizeof(int));
 
     /*
      * Handle NaN.
@@ -3365,7 +3365,6 @@ Tcl_PrintDouble(
  *----------------------------------------------------------------------
  */
 
-	/* ARGSUSED */
 char *
 TclPrecTraceProc(
     ClientData clientData,	/* Not used. */
@@ -3376,7 +3375,7 @@ TclPrecTraceProc(
 {
     Tcl_Obj *value;
     int prec;
-    int *precisionPtr = Tcl_GetThreadData(&precisionKey, (int) sizeof(int));
+    int *precisionPtr = (int *)Tcl_GetThreadData(&precisionKey, sizeof(int));
 
     /*
      * If the variable is unset, then recreate the trace.
@@ -3724,8 +3723,8 @@ UpdateStringOfEndOffset(
 	buffer[len++] = '-';
 	len += TclFormatInt(buffer+len, -(objPtr->internalRep.longValue));
     }
-    objPtr->bytes = ckalloc((unsigned) len+1);
-    memcpy(objPtr->bytes, buffer, (unsigned) len+1);
+    objPtr->bytes = (char *)ckalloc(len+1);
+    memcpy(objPtr->bytes, buffer, len+1);
     objPtr->length = len;
 }
 
@@ -3734,14 +3733,14 @@ UpdateStringOfEndOffset(
  *
  * GetEndOffsetFromObj --
  *
- *      Look for a string of the form "end[+-]offset" and convert it to an
- *      internal representation holding the offset.
+ *	Look for a string of the form "end[+-]offset" and convert it to an
+ *	internal representation holding the offset.
  *
  * Results:
- *      Tcl return code.
+ *	Tcl return code.
  *
  * Side effects:
- *      May store a Tcl_ObjType.
+ *	May store a Tcl_ObjType.
  *
  *----------------------------------------------------------------------
  */
@@ -4086,7 +4085,7 @@ ClearHash(
 
     for (hPtr = Tcl_FirstHashEntry(tablePtr, &search); hPtr != NULL;
 	    hPtr = Tcl_NextHashEntry(&search)) {
-	Tcl_Obj *objPtr = Tcl_GetHashValue(hPtr);
+	Tcl_Obj *objPtr = (Tcl_Obj *)Tcl_GetHashValue(hPtr);
 
 	Tcl_DecrRefCount(objPtr);
 	Tcl_DeleteHashEntry(hPtr);
@@ -4116,10 +4115,10 @@ GetThreadHash(
     Tcl_ThreadDataKey *keyPtr)
 {
     Tcl_HashTable **tablePtrPtr =
-	    Tcl_GetThreadData(keyPtr, sizeof(Tcl_HashTable *));
+	    (Tcl_HashTable **)Tcl_GetThreadData(keyPtr, sizeof(Tcl_HashTable *));
 
     if (NULL == *tablePtrPtr) {
-	*tablePtrPtr = ckalloc(sizeof(Tcl_HashTable));
+	*tablePtrPtr = (Tcl_HashTable *)ckalloc(sizeof(Tcl_HashTable));
 	Tcl_CreateThreadExitHandler(FreeThreadHash, *tablePtrPtr);
 	Tcl_InitHashTable(*tablePtrPtr, TCL_ONE_WORD_KEYS);
     }
@@ -4144,7 +4143,7 @@ static void
 FreeThreadHash(
     ClientData clientData)
 {
-    Tcl_HashTable *tablePtr = clientData;
+    Tcl_HashTable *tablePtr = (Tcl_HashTable *)clientData;
 
     ClearHash(tablePtr);
     Tcl_DeleteHashTable(tablePtr);
@@ -4166,7 +4165,7 @@ static void
 FreeProcessGlobalValue(
     ClientData clientData)
 {
-    ProcessGlobalValue *pgvPtr = clientData;
+    ProcessGlobalValue *pgvPtr = (ProcessGlobalValue *)clientData;
 
     pgvPtr->epoch++;
     pgvPtr->numBytes = 0;
@@ -4214,7 +4213,7 @@ TclSetProcessGlobalValue(
 	Tcl_CreateExitHandler(FreeProcessGlobalValue, pgvPtr);
     }
     bytes = Tcl_GetStringFromObj(newValue, &pgvPtr->numBytes);
-    pgvPtr->value = ckalloc(pgvPtr->numBytes + 1);
+    pgvPtr->value = (char *)ckalloc(pgvPtr->numBytes + 1);
     memcpy(pgvPtr->value, bytes, pgvPtr->numBytes + 1);
     if (pgvPtr->encoding) {
 	Tcl_FreeEncoding(pgvPtr->encoding);
@@ -4278,7 +4277,7 @@ TclGetProcessGlobalValue(
 	    Tcl_DStringLength(&native), &newValue);
 	    Tcl_DStringFree(&native);
 	    ckfree(pgvPtr->value);
-	    pgvPtr->value = ckalloc(Tcl_DStringLength(&newValue) + 1);
+	    pgvPtr->value = (char *)ckalloc(Tcl_DStringLength(&newValue) + 1);
 	    memcpy(pgvPtr->value, Tcl_DStringValue(&newValue),
 		    Tcl_DStringLength(&newValue) + 1);
 	    Tcl_DStringFree(&newValue);
@@ -4328,7 +4327,7 @@ TclGetProcessGlobalValue(
 	Tcl_SetHashValue(hPtr, value);
 	Tcl_IncrRefCount(value);
     }
-    return Tcl_GetHashValue(hPtr);
+    return (Tcl_Obj *)Tcl_GetHashValue(hPtr);
 }
 
 /*
diff --git a/generic/tclZlib.c b/generic/tclZlib.c
index ed29ff9..86fda86 100644
--- a/generic/tclZlib.c
+++ b/generic/tclZlib.c
@@ -593,7 +593,7 @@ SetInflateDictionary(
 	int length;
 	unsigned char *bytes = Tcl_GetByteArrayFromObj(compDictObj, &length);
 
-	return inflateSetDictionary(strm, bytes, (unsigned) length);
+	return inflateSetDictionary(strm, bytes, length);
     }
     return Z_OK;
 }
@@ -607,7 +607,7 @@ SetDeflateDictionary(
 	int length;
 	unsigned char *bytes = Tcl_GetByteArrayFromObj(compDictObj, &length);
 
-	return deflateSetDictionary(strm, bytes, (unsigned) length);
+	return deflateSetDictionary(strm, bytes, length);
     }
     return Z_OK;
 }
@@ -623,7 +623,7 @@ Deflate(
     int e;
 
     strm->next_out = (Bytef *) bufferPtr;
-    strm->avail_out = (unsigned) bufferSize;
+    strm->avail_out = bufferSize;
     e = deflate(strm, flush);
     if (writtenPtr != NULL) {
 	*writtenPtr = bufferSize - strm->avail_out;
@@ -697,7 +697,7 @@ Tcl_ZlibStreamInit(
 	case TCL_ZLIB_FORMAT_GZIP:
 	    wbits = WBITS_GZIP;
 	    if (dictObj) {
-		gzHeaderPtr = ckalloc(sizeof(GzipHeader));
+		gzHeaderPtr = (GzipHeader *)ckalloc(sizeof(GzipHeader));
 		memset(gzHeaderPtr, 0, sizeof(GzipHeader));
 		if (GenerateHeader(interp, dictObj, gzHeaderPtr,
 			NULL) != TCL_OK) {
@@ -731,7 +731,7 @@ Tcl_ZlibStreamInit(
 	    break;
 	case TCL_ZLIB_FORMAT_GZIP:
 	    wbits = WBITS_GZIP;
-	    gzHeaderPtr = ckalloc(sizeof(GzipHeader));
+	    gzHeaderPtr = (GzipHeader *)ckalloc(sizeof(GzipHeader));
 	    memset(gzHeaderPtr, 0, sizeof(GzipHeader));
 	    gzHeaderPtr->header.name = (Bytef *)
 		    gzHeaderPtr->nativeFilenameBuf;
@@ -757,7 +757,7 @@ Tcl_ZlibStreamInit(
 		" TCL_ZLIB_STREAM_INFLATE");
     }
 
-    zshPtr = ckalloc(sizeof(ZlibStreamHandle));
+    zshPtr = (ZlibStreamHandle *)ckalloc(sizeof(ZlibStreamHandle));
     zshPtr->interp = interp;
     zshPtr->mode = mode;
     zshPtr->format = format;
@@ -884,7 +884,7 @@ static void
 ZlibStreamCmdDelete(
     ClientData cd)
 {
-    ZlibStreamHandle *zshPtr = cd;
+    ZlibStreamHandle *zshPtr = (ZlibStreamHandle *)cd;
 
     zshPtr->cmd = NULL;
     ZlibStreamCleanup(zshPtr);
@@ -1231,7 +1231,7 @@ Tcl_ZlibStreamPut(
 	if (outSize > BUFFER_SIZE_LIMIT) {
 	    outSize = BUFFER_SIZE_LIMIT;
 	}
-	dataTmp = ckalloc(outSize);
+	dataTmp = (char *)ckalloc(outSize);
 
 	while (1) {
 	    e = Deflate(&zshPtr->stream, dataTmp, outSize, flush, &toStore);
@@ -1265,7 +1265,7 @@ Tcl_ZlibStreamPut(
 	    if (outSize < BUFFER_SIZE_LIMIT) {
 		outSize = BUFFER_SIZE_LIMIT;
 		/* There may be *lots* of data left to output... */
-		dataTmp = ckrealloc(dataTmp, outSize);
+		dataTmp = (char *)ckrealloc(dataTmp, outSize);
 	    }
 	}
 
@@ -1750,10 +1750,10 @@ Tcl_ZlibInflate(
     if (gzipHeaderDictObj) {
 	headerPtr = &header;
 	memset(headerPtr, 0, sizeof(gz_header));
-	nameBuf = ckalloc(MAXPATHLEN);
+	nameBuf = (char *)ckalloc(MAXPATHLEN);
 	header.name = (Bytef *) nameBuf;
 	header.name_max = MAXPATHLEN - 1;
-	commentBuf = ckalloc(MAX_COMMENT_LEN);
+	commentBuf = (char *)ckalloc(MAX_COMMENT_LEN);
 	header.comment = (Bytef *) commentBuf;
 	header.comm_max = MAX_COMMENT_LEN - 1;
     }
@@ -1858,7 +1858,7 @@ Tcl_ZlibInflate(
     if (headerPtr != NULL) {
 	ExtractHeader(&header, gzipHeaderDictObj);
 	SetValue(gzipHeaderDictObj, "size",
-		Tcl_NewLongObj((long) stream.total_out));
+		Tcl_NewLongObj(stream.total_out));
 	ckfree(nameBuf);
 	ckfree(commentBuf);
     }
@@ -1894,7 +1894,7 @@ Tcl_ZlibCRC32(
     int len)
 {
     /* Nothing much to do, just wrap the crc32(). */
-    return crc32(crc, (Bytef *) buf, (unsigned) len);
+    return crc32(crc, (Bytef *) buf, len);
 }
 
 unsigned int
@@ -1903,7 +1903,7 @@ Tcl_ZlibAdler32(
     const unsigned char *buf,
     int len)
 {
-    return adler32(adler, (Bytef *) buf, (unsigned) len);
+    return adler32(adler, (Bytef *) buf, len);
 }
 
 /*
@@ -2519,7 +2519,7 @@ ZlibStreamCmd(
     int objc,
     Tcl_Obj *const objv[])
 {
-    Tcl_ZlibStream zstream = cd;
+    Tcl_ZlibStream zstream = (Tcl_ZlibStream)cd;
     int command, count, code;
     Tcl_Obj *obj;
     static const char *const cmds[] = {
@@ -2645,7 +2645,7 @@ ZlibStreamAddCmd(
     int objc,
     Tcl_Obj *const objv[])
 {
-    Tcl_ZlibStream zstream = cd;
+    Tcl_ZlibStream zstream = (Tcl_ZlibStream)cd;
     int index, code, buffersize = -1, flush = -1, i;
     Tcl_Obj *obj, *compDictObj = NULL;
     static const char *const add_options[] = {
@@ -2769,7 +2769,7 @@ ZlibStreamPutCmd(
     int objc,
     Tcl_Obj *const objv[])
 {
-    Tcl_ZlibStream zstream = cd;
+    Tcl_ZlibStream zstream = (Tcl_ZlibStream)cd;
     int index, flush = -1, i;
     Tcl_Obj *compDictObj = NULL;
     static const char *const put_options[] = {
@@ -2858,7 +2858,7 @@ ZlibStreamHeaderCmd(
     int objc,
     Tcl_Obj *const objv[])
 {
-    ZlibStreamHandle *zshPtr = cd;
+    ZlibStreamHandle *zshPtr = (ZlibStreamHandle *)cd;
     Tcl_Obj *resultObj;
 
     if (objc != 2) {
@@ -2895,7 +2895,7 @@ ZlibTransformClose(
     ClientData instanceData,
     Tcl_Interp *interp)
 {
-    ZlibChannelData *cd = instanceData;
+    ZlibChannelData *cd = (ZlibChannelData *)instanceData;
     int e, written, result = TCL_OK;
 
     /*
@@ -2988,7 +2988,7 @@ ZlibTransformInput(
     int toRead,
     int *errorCodePtr)
 {
-    ZlibChannelData *cd = instanceData;
+    ZlibChannelData *cd = (ZlibChannelData *)instanceData;
     Tcl_DriverInputProc *inProc =
 	    Tcl_ChannelInputProc(Tcl_GetChannelType(cd->parent));
     int readBytes, gotBytes, copied;
@@ -3027,7 +3027,7 @@ ZlibTransformInput(
 	 * reading over the border.
 	 */
 
-	readBytes = Tcl_ReadRaw(cd->parent, cd->inBuffer, 
+	readBytes = Tcl_ReadRaw(cd->parent, cd->inBuffer,
 		cd->readAheadLimit <= cd->inAllocated ?
 		cd->readAheadLimit  : cd->inAllocated);
 
@@ -3104,7 +3104,7 @@ ZlibTransformOutput(
     int toWrite,
     int *errorCodePtr)
 {
-    ZlibChannelData *cd = instanceData;
+    ZlibChannelData *cd = (ZlibChannelData *)instanceData;
     Tcl_DriverOutputProc *outProc =
 	    Tcl_ChannelOutputProc(Tcl_GetChannelType(cd->parent));
     int e, produced;
@@ -3225,7 +3225,7 @@ ZlibTransformSetOption(			/* not used */
     const char *optionName,
     const char *value)
 {
-    ZlibChannelData *cd = instanceData;
+    ZlibChannelData *cd = (ZlibChannelData *)instanceData;
     Tcl_DriverSetOptionProc *setOptionProc =
 	    Tcl_ChannelSetOptionProc(Tcl_GetChannelType(cd->parent));
     static const char *compressChanOptions = "dictionary flush";
@@ -3338,7 +3338,7 @@ ZlibTransformGetOption(
     const char *optionName,
     Tcl_DString *dsPtr)
 {
-    ZlibChannelData *cd = instanceData;
+    ZlibChannelData *cd = (ZlibChannelData *)instanceData;
     Tcl_DriverGetOptionProc *getOptionProc =
 	    Tcl_ChannelGetOptionProc(Tcl_GetChannelType(cd->parent));
     static const char *compressChanOptions = "checksum dictionary";
@@ -3456,7 +3456,7 @@ ZlibTransformWatch(
     ClientData instanceData,
     int mask)
 {
-    ZlibChannelData *cd = instanceData;
+    ZlibChannelData *cd = (ZlibChannelData *)instanceData;
     Tcl_DriverWatchProc *watchProc;
 
     /*
@@ -3479,7 +3479,7 @@ ZlibTransformEventHandler(
     ClientData instanceData,
     int interestMask)
 {
-    ZlibChannelData *cd = instanceData;
+    ZlibChannelData *cd = (ZlibChannelData *)instanceData;
 
     ZlibTransformEventTimerKill(cd);
     return interestMask;
@@ -3499,7 +3499,7 @@ static void
 ZlibTransformTimerRun(
     ClientData clientData)
 {
-    ZlibChannelData *cd = clientData;
+    ZlibChannelData *cd = (ZlibChannelData *)clientData;
 
     cd->timer = NULL;
     Tcl_NotifyChannel(cd->chan, TCL_READABLE);
@@ -3522,7 +3522,7 @@ ZlibTransformGetHandle(
     int direction,
     ClientData *handlePtr)
 {
-    ZlibChannelData *cd = instanceData;
+    ZlibChannelData *cd = (ZlibChannelData *)instanceData;
 
     return Tcl_GetChannelHandle(cd->parent, direction, handlePtr);
 }
@@ -3542,7 +3542,7 @@ ZlibTransformBlockMode(
     ClientData instanceData,
     int mode)
 {
-    ZlibChannelData *cd = instanceData;
+    ZlibChannelData *cd = (ZlibChannelData *)instanceData;
 
     if (mode == TCL_MODE_NONBLOCKING) {
 	cd->flags |= ASYNC;
@@ -3592,7 +3592,7 @@ ZlibStackChannelTransform(
 				 * dictionary (not dictObj!) to use if
 				 * necessary. */
 {
-    ZlibChannelData *cd = ckalloc(sizeof(ZlibChannelData));
+    ZlibChannelData *cd = (ZlibChannelData *)ckalloc(sizeof(ZlibChannelData));
     Tcl_Channel chan;
     int wbits = 0;
 
@@ -3652,7 +3652,7 @@ ZlibStackChannelTransform(
 	    goto error;
 	}
 	cd->inAllocated = DEFAULT_BUFFER_SIZE;
-	cd->inBuffer = ckalloc(cd->inAllocated);
+	cd->inBuffer = (char *)ckalloc(cd->inAllocated);
 	if (cd->flags & IN_HEADER) {
 	    if (inflateGetHeader(&cd->inStream, &cd->inHeader.header) != Z_OK) {
 		goto error;
@@ -3669,7 +3669,7 @@ ZlibStackChannelTransform(
 	    goto error;
 	}
 	cd->outAllocated = DEFAULT_BUFFER_SIZE;
-	cd->outBuffer = ckalloc(cd->outAllocated);
+	cd->outBuffer = (char *)ckalloc(cd->outAllocated);
 	if (cd->flags & OUT_HEADER) {
 	    if (deflateSetHeader(&cd->outStream, &cd->outHeader.header) != Z_OK) {
 		goto error;
diff --git a/win/tclWinPipe.c b/win/tclWinPipe.c
index 69c38d4..04c371e 100644
--- a/win/tclWinPipe.c
+++ b/win/tclWinPipe.c
@@ -2752,7 +2752,6 @@ TclWinAddProcess(
  *----------------------------------------------------------------------
  */
 
-	/* ARGSUSED */
 int
 Tcl_PidObjCmd(
     ClientData dummy,		/* Not used. */
-- 
cgit v0.12


From effab20437479dde3420488467f7ab773743d849 Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Mon, 20 Apr 2020 11:56:27 +0000
Subject: (cherry-pick): Proposed fix for [27944a3661]: Taming test utf-6.88.

---
 generic/tclUtf.c | 38 ++++++++++++--------------------------
 tests/utf.test   | 22 +++++++++++++++++-----
 2 files changed, 29 insertions(+), 31 deletions(-)

diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 1ba474e..aa949ca 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -716,31 +716,11 @@ Tcl_UtfFindLast(
  *
  * Tcl_UtfNext --
  *
- *	The aim of this routine is to provide a way to iterate forward
- *	through a UTF-8 string. The caller is expected to pass a non-NULL
- *	pointer argument /src/ which points to a location within a string.
- *	(*src) will be read, so /src/ must not point to an unreadable
- *	location past the end of the string. If /src/ points to the
- *	beginning of a complete, well-formed and valid UTF_8 byte sequence
- *	of no more than TCL_UTF_MAX bytes, Tcl_UtfNext returns the pointer
- *	just past the end of that sequence. In any other circumstance,
- *	Tcl_UtfNext returns /src/+1.
- *
- *	Because this routine always returns a value > /src/, it is useful
- *	as a forward iterator that will always make progress. If the string
- *	is NUL-terminated, Tcl_UtfNext will not read beyond the terminating
- *	NUL character. If it is not NUL-terminated, the caller must make
- *	use of the companion routine Tcl_UtfCharComplete to test whether
- *	there is risk that Tcl_UtfNext will read beyond the end of the string.
- *	Tcl_UtfNext will never read more than TCL_UTF_MAX bytes.
- *
- *	In a string where all characters are complete and properly formed,
- *	and /src/ points to the first byte of a character, repeated
- *	Tcl_UtfNext calls will step to the starting bytes of characters, one
- *	character at a time. Within those limitations, Tcl_UtfPrev and
- *	Tcl_UtfNext are inverses. If either condition cannot be met,
- *	Tcl_UtfPrev and Tcl_UtfNext may not function as inverses and the
- *	caller will have to take greater care.
+ *  Given a pointer to some location in a UTF-8 string, Tcl_UtfNext
+ *  returns a pointer to the next UTF-8 character in the string.
+ *  The caller must not ask for the next character after the last
+ *	character in the string if the string is not terminated by a null
+ *  character.
  *
  * Results:
  *	A pointer to the start of the next character in the string (or to
@@ -760,13 +740,19 @@ Tcl_UtfNext(
     int left = totalBytes[byte];
     const char *next = src + 1;
 
+    if (((*src) & 0xC0) == 0x80) {
+	if ((((*++src) & 0xC0) == 0x80) && (((*++src) & 0xC0) == 0x80)) {
+	    ++src;
+	}
+	return src;
+    }
+
     while (--left) {
 	byte = *((unsigned char *) next);
 	if ((byte & 0xC0) != 0x80) {
 	    /*
 	     * src points to non-trail byte; We ran out of trail bytes
 	     * before the needs of the lead byte were satisfied.
-	     * Let the (malformed) lead byte alone be a character
 	     */
 	    return src + 1;
 	}
diff --git a/tests/utf.test b/tests/utf.test
index 0ba2b85..f56fabc 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -189,7 +189,7 @@ test utf-6.10 {Tcl_UtfNext} testutfnext {
 } 1
 test utf-6.11 {Tcl_UtfNext} testutfnext {
     testutfnext \xA0\xA0
-} 1
+} 2
 test utf-6.12 {Tcl_UtfNext} testutfnext {
     testutfnext \xA0\xD0
 } 1
@@ -420,18 +420,30 @@ test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext} {
 } 4
 test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} {
     testutfnext \xA0\xA0
-} 1
+} 2
 test utf-6.88.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} {
     testutfnext \xE8\xA0\xA0 1
-} 2
+} 3
 test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} {
     testutfnext \x80\x80
-} 1
+} 2
 test utf-6.89.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} {
     testutfnext \xF0\x80\x80 1
-} 2
+} 3
 
 testConstraint testutfprev [llength [info commands testutfprev]]
+test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} testutfnext {
+    testutfnext \xA0\xA0\xA0
+} 3
+test utf-6.92.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} testutfnext {
+    testutfnext \xF2\xA0\xA0\xA0 1
+} 4
+test utf-6.93 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} testutfnext {
+    testutfnext \x80\x80\x80
+} 3
+test utf-6.93.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} testutfnext {
+    testutfnext \xF0\x80\x80\x80 1
+} 4
 
 test utf-7.1 {Tcl_UtfPrev} testutfprev {
     testutfprev {}
-- 
cgit v0.12


From 4f2621d9d59b2df9183fc4a90bb530dbccd18fc2 Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Mon, 20 Apr 2020 15:20:55 +0000
Subject: Proposed fix for [c11e0c5ce4]: Regression in Tcl_UtfCharComplete.

---
 generic/tclUtf.c |  32 ++++++-------
 tests/utf.test   | 136 ++++++++++++++++++++++++++++++++++++++++++++-----------
 2 files changed, 123 insertions(+), 45 deletions(-)

diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index aa949ca..842744d 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -64,17 +64,6 @@ static const unsigned char totalBytes[256] = {
     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-    2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-    3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1
-};
-
-static const unsigned char complete[256] = {
-    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 #if TCL_UTF_MAX > 4
     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
@@ -84,7 +73,11 @@ static const unsigned char complete[256] = {
 #endif
     2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
     3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
-    4,4,4,4,4,
+#if TCL_UTF_MAX > 3
+	4,4,4,4,4,
+#else
+    1,1,1,1,1,
+#endif
     1,1,1,1,1,1,1,1,1,1,1
 };
 
@@ -558,7 +551,7 @@ Tcl_UtfCharComplete(
 				 * a complete UTF-8 character. */
     int length)			/* Length of above string in bytes. */
 {
-    return length >= complete[(unsigned char)*src];
+    return length >= totalBytes[(unsigned char)*src];
 }
 
 /*
@@ -606,7 +599,7 @@ Tcl_NumUtfChars(
 	    src = next;
 	}
     } else {
-	register const char *endPtr = src + length - /*TCL_UTF_MAX*/ 4;
+	register const char *endPtr = src + length - TCL_UTF_MAX;
 
 	while (src < endPtr) {
 	    next = TclUtfNext(src);
@@ -617,7 +610,7 @@ Tcl_NumUtfChars(
 #endif
 	    src = next;
 	}
-	endPtr += /*TCL_UTF_MAX*/ 4;
+	endPtr += TCL_UTF_MAX;
 	while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) {
 	    next = TclUtfNext(src);
 #if TCL_UTF_MAX > 4
@@ -895,15 +888,18 @@ Tcl_UtfPrev(
 
 	/* Continue the search backwards... */
 	look--;
-    } while (trailBytesSeen < /* was TCL_UTF_MAX */ 4);
+    } while (trailBytesSeen < TCL_UTF_MAX);
 
     /*
-     * We've seen 4 (was TCL_UTF_MAX) trail bytes, so we know there will not be a
+     * We've seen TCL_UTF_MAX trail bytes, so we know there will not be a
      * properly formed byte sequence to find, and we can stop looking,
      * accepting the fallback.
      */
-
+#if TCL_UTF_MAX < 4
+    return src - TCL_UTF_MAX;
+#else
     return fallback;
+#endif
 }
 
 /*
diff --git a/tests/utf.test b/tests/utf.test
index f56fabc..3301dde 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -21,6 +21,7 @@ testConstraint testbytestring [llength [info commands testbytestring]]
 catch {unset x}
 
 # Some tests require support for 4-byte UTF-8 sequences
+testConstraint smallutf [expr {[format %c 0x010000] == "\uFFFD"}]
 testConstraint fullutf [expr {[format %c 0x010000] != "\uFFFD"}]
 testConstraint tip389 [expr {[string length \U010000] == 2}]
 
@@ -361,7 +362,10 @@ test utf-6.67 {Tcl_UtfNext} testutfnext {
 test utf-6.68 {Tcl_UtfNext} testutfnext {
     testutfnext \xF4\xA0\xA0G
 } 1
-test utf-6.69 {Tcl_UtfNext} testutfnext {
+test utf-6.69 {Tcl_UtfNext} {testutfnext smallutf} {
+    testutfnext \xF4\xA0\xA0\xA0
+} 1
+test utf-6.69.1 {Tcl_UtfNext} {testutfnext fullutf} {
     testutfnext \xF4\xA0\xA0\xA0
 } 4
 test utf-6.70 {Tcl_UtfNext} testutfnext {
@@ -376,22 +380,40 @@ test utf-6.71 {Tcl_UtfNext} testutfnext {
 test utf-6.73 {Tcl_UtfNext} testutfnext {
     testutfnext \xF4\xA0\xA0\xF8
 } 1
-test utf-6.74 {Tcl_UtfNext} testutfnext {
+test utf-6.74 {Tcl_UtfNext} {testutfnext smallutf} {
+    testutfnext \xF4\xA0\xA0\xA0G
+} 1
+test utf-6.74.1 {Tcl_UtfNext} {testutfnext fullutf} {
     testutfnext \xF4\xA0\xA0\xA0G
 } 4
-test utf-6.75 {Tcl_UtfNext} testutfnext {
+test utf-6.75 {Tcl_UtfNext} {testutfnext smallutf} {
+    testutfnext \xF4\xA0\xA0\xA0\xA0
+} 1
+test utf-6.75.1 {Tcl_UtfNext} {testutfnext fullutf} {
     testutfnext \xF4\xA0\xA0\xA0\xA0
 } 4
-test utf-6.76 {Tcl_UtfNext} testutfnext {
+test utf-6.76 {Tcl_UtfNext} {testutfnext smallutf} {
+    testutfnext \xF4\xA0\xA0\xA0\xD0
+} 1
+test utf-6.76.1 {Tcl_UtfNext} {testutfnext fullutf} {
     testutfnext \xF4\xA0\xA0\xA0\xD0
 } 4
-test utf-6.77 {Tcl_UtfNext} testutfnext {
+test utf-6.77 {Tcl_UtfNext} {testutfnext smallutf} {
+    testutfnext \xF4\xA0\xA0\xA0\xE8
+} 1
+test utf-6.77.1 {Tcl_UtfNext} {testutfnext fullutf} {
     testutfnext \xF4\xA0\xA0\xA0\xE8
 } 4
-test utf-6.78 {Tcl_UtfNext} testutfnext {
+test utf-6.78 {Tcl_UtfNext} {testutfnext smallutf} {
+    testutfnext \xF4\xA0\xA0\xA0\xF4
+} 1
+test utf-6.78.1 {Tcl_UtfNext} {testutfnext fullutf} {
     testutfnext \xF4\xA0\xA0\xA0\xF4
 } 4
-test utf-6.79 {Tcl_UtfNext} testutfnext {
+test utf-6.79 {Tcl_UtfNext} {testutfnext smallutf} {
+    testutfnext \xF4\xA0\xA0\xA0G\xF8
+} 1
+test utf-6.79.1 {Tcl_UtfNext} {testutfnext fullutf} {
     testutfnext \xF4\xA0\xA0\xA0G\xF8
 } 4
 test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext {
@@ -415,7 +437,10 @@ test utf-6.85 {Tcl_UtfNext - overlong sequences} testutfnext {
 test utf-6.86 {Tcl_UtfNext - overlong sequences} testutfnext {
     testutfnext \xF0\x80\x80\x80
 } 1
-test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext} {
+test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext smallutf} {
+    testutfnext \xF0\x90\x80\x80
+} 1
+test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext fullutf} {
     testutfnext \xF0\x90\x80\x80
 } 4
 test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} {
@@ -508,13 +533,22 @@ test utf-7.9.1 {Tcl_UtfPrev} testutfprev {
 test utf-7.9.2 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xF8\xA0\xF8\xA0 3
 } 2
-test utf-7.10 {Tcl_UtfPrev} testutfprev {
+test utf-7.10 {Tcl_UtfPrev} {testutfprev smallutf} {
+    testutfprev A\xF4\xA0
+} 2
+test utf-7.10.1 {Tcl_UtfPrev} {testutfprev smallutf} {
+    testutfprev A\xF4\xA0\xA0\xA0 3
+} 2
+test utf-7.10.2 {Tcl_UtfPrev} {testutfprev smallutf} {
+    testutfprev A\xF4\xA0\xF8\xA0 3
+} 2
+test utf-7.10 {Tcl_UtfPrev} {testutfprev fullutf} {
     testutfprev A\xF4\xA0
 } 1
-test utf-7.10.1 {Tcl_UtfPrev} testutfprev {
+test utf-7.10.1 {Tcl_UtfPrev} {testutfprev fullutf} {
     testutfprev A\xF4\xA0\xA0\xA0 3
 } 1
-test utf-7.10.2 {Tcl_UtfPrev} testutfprev {
+test utf-7.10.2 {Tcl_UtfPrev} {testutfprev fullutf} {
     testutfprev A\xF4\xA0\xF8\xA0 3
 } 1
 test utf-7.11 {Tcl_UtfPrev} testutfprev {
@@ -556,13 +590,22 @@ test utf-7.14.1 {Tcl_UtfPrev} testutfprev {
 test utf-7.14.2 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xF8\xA0\xA0\xF8 4
 } 3
-test utf-7.15 {Tcl_UtfPrev} testutfprev {
+test utf-7.15 {Tcl_UtfPrev} {testutfprev smallutf} {
+    testutfprev A\xF4\xA0\xA0
+} 3
+test utf-7.15.1 {Tcl_UtfPrev} {testutfprev smallutf} {
+    testutfprev A\xF4\xA0\xA0\xA0 4
+} 3
+test utf-7.15.2 {Tcl_UtfPrev} {testutfprev smallutf} {
+    testutfprev A\xF4\xA0\xA0\xF8 4
+} 3
+test utf-7.15.3 {Tcl_UtfPrev} {testutfprev fullutf} {
     testutfprev A\xF4\xA0\xA0
 } 1
-test utf-7.15.1 {Tcl_UtfPrev} testutfprev {
+test utf-7.15.4 {Tcl_UtfPrev} {testutfprev fullutf} {
     testutfprev A\xF4\xA0\xA0\xA0 4
 } 1
-test utf-7.15.2 {Tcl_UtfPrev} testutfprev {
+test utf-7.15.5 {Tcl_UtfPrev} {testutfprev fullutf} {
     testutfprev A\xF4\xA0\xA0\xF8 4
 } 1
 test utf-7.16 {Tcl_UtfPrev} testutfprev {
@@ -583,28 +626,52 @@ test utf-7.17.1 {Tcl_UtfPrev} testutfprev {
 test utf-7.17.2 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xD0\xA0\xA0\xF8 4
 } 3
-test utf-7.18 {Tcl_UtfPrev} testutfprev {
+test utf-7.18 {Tcl_UtfPrev} {testutfprev smallutf} {
+    testutfprev A\xA0\xA0\xA0
+} 1
+test utf-7.18.1 {Tcl_UtfPrev} {testutfprev smallutf} {
+    testutfprev A\xA0\xA0\xA0\xA0 4
+} 1
+test utf-7.18.2 {Tcl_UtfPrev} {testutfprev smallutf} {
+    testutfprev A\xA0\xA0\xA0\xF8 4
+} 1
+test utf-7.18.3 {Tcl_UtfPrev} {testutfprev fullutf} {
     testutfprev A\xA0\xA0\xA0
 } 3
-test utf-7.18.1 {Tcl_UtfPrev} testutfprev {
+test utf-7.18.4 {Tcl_UtfPrev} {testutfprev fullutf} {
     testutfprev A\xA0\xA0\xA0\xA0 4
 } 3
-test utf-7.18.2 {Tcl_UtfPrev} testutfprev {
+test utf-7.18.5 {Tcl_UtfPrev} {testutfprev fullutf} {
     testutfprev A\xA0\xA0\xA0\xF8 4
 } 3
-test utf-7.19 {Tcl_UtfPrev} testutfprev {
+test utf-7.19 {Tcl_UtfPrev} {testutfprev smallutf} {
+    testutfprev A\xF8\xA0\xA0\xA0
+} 2
+test utf-7.19.1 {Tcl_UtfPrev} {testutfprev fullutf} {
     testutfprev A\xF8\xA0\xA0\xA0
 } 4
-test utf-7.20 {Tcl_UtfPrev} testutfprev {
+test utf-7.20 {Tcl_UtfPrev} {testutfprev smallutf} {
+    testutfprev A\xF4\xA0\xA0\xA0
+} 2
+test utf-7.20.1 {Tcl_UtfPrev} {testutfprev fullutf} {
     testutfprev A\xF4\xA0\xA0\xA0
 } 1
-test utf-7.21 {Tcl_UtfPrev} testutfprev {
+test utf-7.21 {Tcl_UtfPrev} {testutfprev smallutf} {
+    testutfprev A\xE8\xA0\xA0\xA0
+} 2
+test utf-7.21.1 {Tcl_UtfPrev} {testutfprev fullutf} {
     testutfprev A\xE8\xA0\xA0\xA0
 } 4
-test utf-7.22 {Tcl_UtfPrev} testutfprev {
+test utf-7.22 {Tcl_UtfPrev} {testutfprev smallutf} {
+    testutfprev A\xD0\xA0\xA0\xA0
+} 2
+test utf-7.22.1 {Tcl_UtfPrev} {testutfprev fullutf} {
     testutfprev A\xD0\xA0\xA0\xA0
 } 4
-test utf-7.23 {Tcl_UtfPrev} testutfprev {
+test utf-7.23 {Tcl_UtfPrev} {testutfprev smallutf} {
+    testutfprev A\xA0\xA0\xA0\xA0
+} 2
+test utf-7.23.1 {Tcl_UtfPrev} {testutfprev fullutf} {
     testutfprev A\xA0\xA0\xA0\xA0
 } 4
 test utf-7.24 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
@@ -628,7 +695,10 @@ test utf-7.28 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
 test utf-7.28.1 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
     testutfprev A\xE0\x80\x80 2
 } 1
-test utf-7.29 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
+test utf-7.29 {Tcl_UtfPrev -- overlong sequence}  {testutfprev smallutf} {
+    testutfprev A\xF0\x80\x80\x80
+} 2
+test utf-7.29.1 {Tcl_UtfPrev -- overlong sequence}  {testutfprev fullutf} {
     testutfprev A\xF0\x80\x80\x80
 } 4
 test utf-7.30 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
@@ -658,13 +728,22 @@ test utf-7.37 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
 test utf-7.38 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
     testutfprev A\xE0\xA0\x80 2
 } 1
-test utf-7.39 {Tcl_UtfPrev -- overlong sequence}  {testutfprev} {
+test utf-7.39 {Tcl_UtfPrev -- overlong sequence}  {testutfprev smallutf} {
+    testutfprev A\xF0\x90\x80\x80
+} 2
+test utf-7.39.1 {Tcl_UtfPrev -- overlong sequence}  {testutfprev fullutf} {
     testutfprev A\xF0\x90\x80\x80
 } 1
-test utf-7.40 {Tcl_UtfPrev -- overlong sequence}  {testutfprev} {
+test utf-7.40 {Tcl_UtfPrev -- overlong sequence}  {testutfprev smallutf} {
+    testutfprev A\xF0\x90\x80\x80 4
+} 3
+test utf-7.40.1 {Tcl_UtfPrev -- overlong sequence}  {testutfprev fullutf} {
     testutfprev A\xF0\x90\x80\x80 4
 } 1
-test utf-7.41 {Tcl_UtfPrev -- overlong sequence}  {testutfprev} {
+test utf-7.41 {Tcl_UtfPrev -- overlong sequence}  {testutfprev smallutf} {
+    testutfprev A\xF0\x90\x80\x80 3
+} 2
+test utf-7.41.1 {Tcl_UtfPrev -- overlong sequence}  {testutfprev fullutf} {
     testutfprev A\xF0\x90\x80\x80 3
 } 1
 test utf-7.42 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
@@ -679,7 +758,10 @@ test utf-7.44 {Tcl_UtfPrev -- no lead byte at start}  testutfprev {
 test utf-7.45 {Tcl_UtfPrev -- no lead byte at start}  testutfprev {
     testutfprev \xA0\xA0\xA0
 } 2
-test utf-7.46 {Tcl_UtfPrev -- no lead byte at start}  testutfprev {
+test utf-7.46 {Tcl_UtfPrev -- no lead byte at start}  {testutfprev smallutf} {
+    testutfprev \xA0\xA0\xA0\xA0
+} 1
+test utf-7.46 {Tcl_UtfPrev -- no lead byte at start}  {testutfprev fullutf} {
     testutfprev \xA0\xA0\xA0\xA0
 } 3
 test utf-7.47 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {testutfprev} {
-- 
cgit v0.12


From 58cf4db1ccb0602f9bd023ecc4e56830aea2453a Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Mon, 20 Apr 2020 22:35:39 +0000
Subject: Tie together the TCL_UTF_MAX=4 and TCL_UTF_MAX=6 builds to mean the
 same thing on the 8.5 branch -- use internal UCS-4 storage.

---
 generic/regcustom.h | 2 +-
 generic/tcl.h       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/generic/regcustom.h b/generic/regcustom.h
index 57a2d47..ac33087 100644
--- a/generic/regcustom.h
+++ b/generic/regcustom.h
@@ -97,7 +97,7 @@ typedef int celt;		/* Type to hold chr, or NOCELT */
 #define	NOCELT (-1)		/* Celt value which is not valid chr */
 #define	CHR(c) (UCHAR(c))	/* Turn char literal into chr literal */
 #define	DIGITVAL(c) ((c)-'0')	/* Turn chr digit into its value */
-#if TCL_UTF_MAX > 4
+#if TCL_UTF_MAX > 3
 #define	CHRBITS	32		/* Bits in a chr; must not use sizeof */
 #define	CHR_MIN	0x00000000	/* Smallest and largest chr; the value */
 #define	CHR_MAX	0xffffffff	/* CHR_MAX-CHR_MIN+1 should fit in uchr */
diff --git a/generic/tcl.h b/generic/tcl.h
index 7378a8f..d7d064c 100644
--- a/generic/tcl.h
+++ b/generic/tcl.h
@@ -2148,7 +2148,7 @@ typedef struct Tcl_Parse {
  * reflected in regcustom.h.
  */
 
-#if TCL_UTF_MAX > 4
+#if TCL_UTF_MAX > 3
     /*
      * unsigned int isn't 100% accurate as it should be a strict 4-byte value
      * (perhaps wchar_t). 64-bit systems may have troubles. The size of this
-- 
cgit v0.12


From 4c9bc32c393e100ae9caf7f06e57c798f96ada6d Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Mon, 20 Apr 2020 23:00:30 +0000
Subject: Pair every compat85 test with a fullutf test so that we cover all
 variants.

---
 tests/utf.test | 137 ++++++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 107 insertions(+), 30 deletions(-)

diff --git a/tests/utf.test b/tests/utf.test
index 1ca3647..1c79f32 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -13,7 +13,9 @@ if {[lsearch [namespace children] ::tcltest] == -1} {
     namespace import -force ::tcltest::*
 }
 
-testConstraint compat85 [expr {[format %c 0x010000] == "\uFFFD"}]
+testConstraint compat85 [expr {[format %c 0x010000] eq "\uFFFD"}]
+testConstraint fullutf  [expr {[format %c 0x010000] ne "\uFFFD"}]
+
 testConstraint testbytestring [llength [info commands testbytestring]]
 testConstraint testfindfirst [llength [info commands testfindfirst]]
 testConstraint testfindlast [llength [info commands testfindlast]]
@@ -64,12 +66,18 @@ test utf-2.6 {Tcl_UtfToUniChar: lead (3-byte) followed by 1 trail} testbytestrin
 test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} testbytestring {
     string length [testbytestring "\xE4\xB9\x8E"]
 } {1}
-test utf-2.8 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring compat85} -body {
+test utf-2.8.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring compat85} -body {
     string length [testbytestring "\xF0\x90\x80\x80"]
 } -result {4}
-test utf-2.9 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring compat85} -body {
+test utf-2.8.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring fullutf} -body {
+    string length [testbytestring "\xF0\x90\x80\x80"]
+} -result {1}
+test utf-2.9.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring compat85} -body {
     string length [testbytestring "\xF4\x8F\xBF\xBF"]
 } -result {4}
+test utf-2.9.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring fullutf} -body {
+    string length [testbytestring "\xF4\x8F\xBF\xBF"]
+} -result {1}
 test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} testbytestring {
     string length [testbytestring "\xF0\x8F\xBF\xBF"]
 } {4}
@@ -118,9 +126,12 @@ test utf-4.10 {Tcl_NumUtfChars: #u0000, calc len, overcomplete} {testnumutfchars
 test utf-4.11 {Tcl_NumUtfChars: 3 bytes of 4-byte UTF-8 characater} {testnumutfchars testbytestring} {
     testnumutfchars [testbytestring \xF0\x9F\x92\xA9] 3
 } {3}
-test utf-4.12 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring compat85} {
+test utf-4.12.0 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring compat85} {
     testnumutfchars [testbytestring \xF0\x9F\x92\xA9] 4
 } {4}
+test utf-4.12.1 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring fullutf} {
+    testnumutfchars [testbytestring \xF0\x9F\x92\xA9] 4
+} {1}
 
 test utf-5.1 {Tcl_UtfFindFirst} {testfindfirst testbytestring} {
     testfindfirst [testbytestring "abcbc"] 98
@@ -335,9 +346,12 @@ test utf-6.67 {Tcl_UtfNext} testutfnext {
 test utf-6.68 {Tcl_UtfNext} testutfnext {
     testutfnext \xF2\xA0\xA0G
 } 1
-test utf-6.69 {Tcl_UtfNext} {testutfnext compat85} {
+test utf-6.69.0 {Tcl_UtfNext} {testutfnext compat85} {
     testutfnext \xF2\xA0\xA0\xA0
 } 1
+test utf-6.69.1 {Tcl_UtfNext} {testutfnext fullutf} {
+    testutfnext \xF2\xA0\xA0\xA0
+} 4
 test utf-6.70 {Tcl_UtfNext} testutfnext {
     testutfnext \xF2\xA0\xA0\xD0
 } 1
@@ -350,24 +364,42 @@ test utf-6.71 {Tcl_UtfNext} testutfnext {
 test utf-6.73 {Tcl_UtfNext} testutfnext {
     testutfnext \xF2\xA0\xA0\xF8
 } 1
-test utf-6.74 {Tcl_UtfNext} {testutfnext compat85} {
+test utf-6.74.0 {Tcl_UtfNext} {testutfnext compat85} {
     testutfnext \xF2\xA0\xA0\xA0G
 } 1
-test utf-6.75 {Tcl_UtfNext} {testutfnext compat85} {
+test utf-6.74.1 {Tcl_UtfNext} {testutfnext fullutf} {
+    testutfnext \xF2\xA0\xA0\xA0G
+} 4
+test utf-6.75.0 {Tcl_UtfNext} {testutfnext compat85} {
     testutfnext \xF2\xA0\xA0\xA0\xA0
 } 1
-test utf-6.76 {Tcl_UtfNext} {testutfnext compat85} {
+test utf-6.75.1 {Tcl_UtfNext} {testutfnext fullutf} {
+    testutfnext \xF2\xA0\xA0\xA0\xA0
+} 4
+test utf-6.76.0 {Tcl_UtfNext} {testutfnext compat85} {
     testutfnext \xF2\xA0\xA0\xA0\xD0
 } 1
-test utf-6.77 {Tcl_UtfNext} {testutfnext compat85} {
+test utf-6.76.1 {Tcl_UtfNext} {testutfnext fullutf} {
+    testutfnext \xF2\xA0\xA0\xA0\xD0
+} 4
+test utf-6.77.0 {Tcl_UtfNext} {testutfnext compat85} {
     testutfnext \xF2\xA0\xA0\xA0\xE8
 } 1
-test utf-6.78 {Tcl_UtfNext} {testutfnext compat85} {
+test utf-6.77.1 {Tcl_UtfNext} {testutfnext fullutf} {
+    testutfnext \xF2\xA0\xA0\xA0\xE8
+} 4
+test utf-6.78.0 {Tcl_UtfNext} {testutfnext compat85} {
     testutfnext \xF2\xA0\xA0\xA0\xF2
 } 1
-test utf-6.79 {Tcl_UtfNext} {testutfnext compat85} {
+test utf-6.78.1 {Tcl_UtfNext} {testutfnext fullutf} {
+    testutfnext \xF2\xA0\xA0\xA0\xF2
+} 4
+test utf-6.79.0 {Tcl_UtfNext} {testutfnext compat85} {
     testutfnext \xF2\xA0\xA0\xA0G\xF8
 } 1
+test utf-6.79.1 {Tcl_UtfNext} {testutfnext fullutf} {
+    testutfnext \xF2\xA0\xA0\xA0G\xF8
+} 4
 test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext {
     testutfnext \xC0\x80
 } 2
@@ -389,9 +421,12 @@ test utf-6.85 {Tcl_UtfNext - overlong sequences} testutfnext {
 test utf-6.86 {Tcl_UtfNext - overlong sequences} testutfnext {
     testutfnext \xF0\x80\x80\x80
 } 1
-test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext compat85} {
+test utf-6.87.0 {Tcl_UtfNext - overlong sequences} {testutfnext compat85} {
     testutfnext \xF0\x90\x80\x80
 } 1
+test utf-6.87.1 {Tcl_UtfNext - overlong sequences} {testutfnext fullutf} {
+    testutfnext \xF0\x90\x80\x80
+} 4
 test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} {
     testutfnext \xA0\xA0
 } 1
@@ -404,9 +439,12 @@ test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {te
 test utf-6.89.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} {
     testutfnext \xF0\x80\x80 1
 } 2
-test utf-6.90 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext compat85} {
+test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext compat85} {
     testutfnext \xF4\x8F\xBF\xBF
 } 1
+test utf-6.90.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext fullutf} {
+    testutfnext \xF4\x8F\xBF\xBF
+} 4
 test utf-6.91 {Tcl_UtfNext, validity check [493dccc2de]} testutfnext {
     testutfnext \xF4\x90\x80\x80
 } 1
@@ -474,15 +512,24 @@ test utf-7.9.1 {Tcl_UtfPrev} testutfprev {
 test utf-7.9.2 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xF8\xA0\xF8\xA0 3
 } 2
-test utf-7.10 {Tcl_UtfPrev} {testutfprev compat85} {
+test utf-7.10.0 {Tcl_UtfPrev} {testutfprev compat85} {
     testutfprev A\xF2\xA0
 } 2
-test utf-7.10.1 {Tcl_UtfPrev} {testutfprev compat85} {
+test utf-7.10.1 {Tcl_UtfPrev} {testutfprev fullutf} {
+    testutfprev A\xF2\xA0
+} 1
+test utf-7.10.1.0 {Tcl_UtfPrev} {testutfprev compat85} {
     testutfprev A\xF2\xA0\xA0\xA0 3
 } 2
-test utf-7.10.2 {Tcl_UtfPrev} {testutfprev compat85} {
+test utf-7.10.1.1 {Tcl_UtfPrev} {testutfprev fullutf} {
+    testutfprev A\xF2\xA0\xA0\xA0 3
+} 1
+test utf-7.10.2.0 {Tcl_UtfPrev} {testutfprev compat85} {
     testutfprev A\xF2\xA0\xF8\xA0 3
 } 2
+test utf-7.10.2.1 {Tcl_UtfPrev} {testutfprev fullutf} {
+    testutfprev A\xF2\xA0\xF8\xA0 3
+} 1
 test utf-7.11 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xE8\xA0
 } 1
@@ -522,15 +569,24 @@ test utf-7.14.1 {Tcl_UtfPrev} testutfprev {
 test utf-7.14.2 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xF8\xA0\xA0\xF8 4
 } 3
-test utf-7.15 {Tcl_UtfPrev} {testutfprev compat85} {
+test utf-7.15.0 {Tcl_UtfPrev} {testutfprev compat85} {
     testutfprev A\xF2\xA0\xA0
 } 3
-test utf-7.15.1 {Tcl_UtfPrev} {testutfprev compat85} {
+test utf-7.15.1 {Tcl_UtfPrev} {testutfprev fullutf} {
+    testutfprev A\xF2\xA0\xA0
+} 1
+test utf-7.15.1.0 {Tcl_UtfPrev} {testutfprev compat85} {
     testutfprev A\xF2\xA0\xA0\xA0 4
 } 3
-test utf-7.15.2 {Tcl_UtfPrev} {testutfprev compat85} {
+test utf-7.15.1.1 {Tcl_UtfPrev} {testutfprev fullutf} {
+    testutfprev A\xF2\xA0\xA0\xA0 4
+} 1
+test utf-7.15.2.0 {Tcl_UtfPrev} {testutfprev compat85} {
     testutfprev A\xF2\xA0\xA0\xF8 4
 } 3
+test utf-7.15.2.1 {Tcl_UtfPrev} {testutfprev fullutf} {
+    testutfprev A\xF2\xA0\xA0\xF8 4
+} 1
 test utf-7.16 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xE8\xA0\xA0
 } 1
@@ -561,9 +617,12 @@ test utf-7.18.2 {Tcl_UtfPrev} testutfprev {
 test utf-7.19 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xF8\xA0\xA0\xA0
 } 4
-test utf-7.20 {Tcl_UtfPrev} {testutfprev compat85} {
+test utf-7.20.0 {Tcl_UtfPrev} {testutfprev compat85} {
     testutfprev A\xF2\xA0\xA0\xA0
 } 4
+test utf-7.20.1 {Tcl_UtfPrev} {testutfprev fullutf} {
+    testutfprev A\xF2\xA0\xA0\xA0
+} 1
 test utf-7.21 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xE8\xA0\xA0\xA0
 } 4
@@ -624,15 +683,24 @@ test utf-7.37 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
 test utf-7.38 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
     testutfprev A\xE0\xA0\x80 2
 } 1
-test utf-7.39 {Tcl_UtfPrev -- overlong sequence}  {testutfprev compat85} {
+test utf-7.39.0 {Tcl_UtfPrev -- overlong sequence}  {testutfprev compat85} {
     testutfprev A\xF0\x90\x80\x80
 } 4
-test utf-7.40 {Tcl_UtfPrev -- overlong sequence}  {testutfprev compat85} {
+test utf-7.39.1 {Tcl_UtfPrev -- overlong sequence}  {testutfprev fullutf} {
+    testutfprev A\xF0\x90\x80\x80
+} 1
+test utf-7.40.0 {Tcl_UtfPrev -- overlong sequence}  {testutfprev compat85} {
     testutfprev A\xF0\x90\x80\x80 4
 } 3
-test utf-7.41 {Tcl_UtfPrev -- overlong sequence}  {testutfprev compat85} {
+test utf-7.40.1 {Tcl_UtfPrev -- overlong sequence}  {testutfprev fullutf} {
+    testutfprev A\xF0\x90\x80\x80 4
+} 1
+test utf-7.41.0 {Tcl_UtfPrev -- overlong sequence}  {testutfprev compat85} {
     testutfprev A\xF0\x90\x80\x80 3
 } 2
+test utf-7.41.1 {Tcl_UtfPrev -- overlong sequence}  {testutfprev fullutf} {
+    testutfprev A\xF0\x90\x80\x80 3
+} 1
 test utf-7.42 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
     testutfprev A\xF0\x90\x80\x80 2
 } 1
@@ -657,15 +725,24 @@ test utf-7.47.1 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {te
 test utf-7.47.2 {Tcl_UtfPrev, pointing to 3th byte of 3-byte invalid sequence} {testutfprev} {
     testutfprev \xE8\xA0\x00 2
 } 0
-test utf-7.48 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev compat85} {
+test utf-7.48.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev compat85} {
     testutfprev A\xF4\x8F\xBF\xBF
 } 4
-test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev compat85} {
+test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} {
+    testutfprev A\xF4\x8F\xBF\xBF
+} 1
+test utf-7.48.1.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev compat85} {
     testutfprev A\xF4\x8F\xBF\xBF 4
 } 3
-test utf-7.48.2 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev compat85} {
+test utf-7.48.1.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} {
+    testutfprev A\xF4\x8F\xBF\xBF 4
+} 1
+test utf-7.48.2.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev compat85} {
     testutfprev A\xF4\x8F\xBF\xBF 3
 } 2
+test utf-7.48.2.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} {
+    testutfprev A\xF4\x8F\xBF\xBF 3
+} 1
 test utf-7.48.3 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
     testutfprev A\xF4\x8F\xBF\xBF 2
 } 1
@@ -700,10 +777,10 @@ test utf-8.5 {Tcl_UniCharAtIndex: high surrogate} {
 test utf-8.6 {Tcl_UniCharAtIndex: low surrogate} {
     string index \uDC42 0
 } "\uDC42"
-test utf-8.7 {Tcl_UniCharAtIndex: Emoji} compat85 {
+test utf-8.7 {Tcl_UniCharAtIndex: Emoji} {
     string index \uD83D\uDE00 0
 } "\uD83D"
-test utf-8.8 {Tcl_UniCharAtIndex: Emoji} compat85 {
+test utf-8.8 {Tcl_UniCharAtIndex: Emoji} {
     string index \uD83D\uDE00 1
 } "\uDE00"
 
@@ -713,10 +790,10 @@ test utf-9.1 {Tcl_UtfAtIndex: index = 0} {
 test utf-9.2 {Tcl_UtfAtIndex: index > 0} {
     string range \u4E4E\u25A\xFF\u543klmnop 1 5
 } "\u25A\xFF\u543kl"
-test utf-9.3 {Tcl_UtfAtIndex: index = 0, Emoji} compat85 {
+test utf-9.3 {Tcl_UtfAtIndex: index = 0, Emoji} {
     string range \uD83D\uDE00G 0 0
 } "\uD83D"
-test utf-9.4 {Tcl_UtfAtIndex: index > 0, Emoji} compat85 {
+test utf-9.4 {Tcl_UtfAtIndex: index > 0, Emoji} {
     string range \uD83D\uDE00G 1 1
 } "\uDE00"
 
-- 
cgit v0.12


From d8bc590eef94f9e9ec24150cf8208f38638290ef Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Tue, 21 Apr 2020 02:50:23 +0000
Subject: Revert the backport to tclEncoding.c that seems to redefine the
 "unicode" encoding to mean UTF-16. Don't want that behavior change in 8.5.

---
 generic/tclEncoding.c | 240 +++++++++++++++++++++++---------------------------
 1 file changed, 112 insertions(+), 128 deletions(-)

diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index da03055..5a9d2d5 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -83,7 +83,7 @@ typedef struct TableEncodingData {
 } TableEncodingData;
 
 /*
- * Each of the following structures is the clientData for a dynamically-loaded
+ * The following structures is the clientData for a dynamically-loaded,
  * escape-driven encoding that is itself comprised of other simpler encodings.
  * An example is "iso-2022-jp", which uses escape sequences to switch between
  * ascii, jis0208, jis0212, gb2312, and ksc5601. Note that "escape-driven"
@@ -117,8 +117,8 @@ typedef struct EscapeEncodingData {
 				 * 0. */
     int numSubTables;		/* Length of following array. */
     EscapeSubTable subTables[1];/* Information about each EscapeSubTable used
-				 * by this encoding type. The actual size is
-				 * as large as necessary to hold all
+				 * by this encoding type. The actual size will
+				 * be as large as necessary to hold all
 				 * EscapeSubTables. */
 } EscapeEncodingData;
 
@@ -156,7 +156,7 @@ static ProcessGlobalValue encodingFileMap = {
  * A list of directories making up the "library path". Historically this
  * search path has served many uses, but the only one remaining is a base for
  * the encodingSearchPath above. If the application does not explicitly set
- * the encodingSearchPath, then it is initialized by appending /encoding
+ * the encodingSearchPath, then it will be initialized by appending /encoding
  * to each directory in this "libraryPath".
  */
 
@@ -177,7 +177,7 @@ TCL_DECLARE_MUTEX(encodingMutex)
 /*
  * The following are used to hold the default and current system encodings.
  * If NULL is passed to one of the conversion routines, the current setting of
- * the system encoding is used to perform the conversion.
+ * the system encoding will be used to perform the conversion.
  */
 
 static Tcl_Encoding defaultEncoding;
@@ -429,8 +429,9 @@ TclGetLibraryPath(void)
  *	Keeps the per-thread copy of the library path current with changes to
  *	the global copy.
  *
- *	Since the result of this routine is void, if searchPath is not a valid
- *	list this routine silently does nothing.
+ *	NOTE: this routine returns void, so there's no way to report the error
+ *	that searchPath is not a valid list. In that case, this routine will
+ *	silently do nothing.
  *
  *----------------------------------------------------------------------
  */
@@ -452,16 +453,17 @@ TclSetLibraryPath(
  *
  * FillEncodingFileMap --
  *
- *	Called to update the encoding file map with the current value
- *	of the encoding search path.
+ * 	Called to bring the encoding file map in sync with the current value
+ * 	of the encoding search path.
  *
- *	Finds *.end files in the directories on the encoding search path and
- *	stores the found pathnames in a map associated with the encoding name.
+ *	Scan the directories on the encoding search path, find the *.enc
+ *	files, and store the found pathnames in a map associated with the
+ *	encoding name.
  *
- *	If $dir is on the encoding search path and the file $dir/foo.enc is
- *	found, stores a "foo" -> $dir entry in the map.  if the "foo" encoding
- *	is needed later, the $dir/foo.enc name can be quickly constructed in
- *	order to read the encoding data.
+ *	In particular, if $dir is on the encoding search path, and the file
+ *	$dir/foo.enc is found, then store a "foo" -> $dir entry in the map.
+ *	Later, any need for the "foo" encoding will quickly * be able to
+ *	construct the $dir/foo.enc pathname for reading the encoding data.
  *
  * Results:
  *	None.
@@ -542,24 +544,19 @@ void
 TclInitEncodingSubsystem(void)
 {
     Tcl_EncodingType type;
-    union {
-        char c;
-        short s;
-    } isLe;
 
     if (encodingsInitialized) {
 	return;
     }
 
-    isLe.s = 1;
     Tcl_MutexLock(&encodingMutex);
     Tcl_InitHashTable(&encodingTable, TCL_STRING_KEYS);
     Tcl_MutexUnlock(&encodingMutex);
 
     /*
-     * Create a few initial encodings.  UTF-8 to UTF-8 translation is not a
-     * no-op because it turns a stream of improperly formed UTF-8 into a
-     * properly formed stream.
+     * Create a few initial encodings. Note that the UTF-8 to UTF-8
+     * translation is not a no-op, because it will turn a stream of improperly
+     * formed UTF-8 into a properly formed stream.
      */
 
     type.encodingName	= "identity";
@@ -586,7 +583,7 @@ TclInitEncodingSubsystem(void)
     type.fromUtfProc    = UtfToUnicodeProc;
     type.freeProc	= NULL;
     type.nullSize	= 2;
-    type.clientData	= INT2PTR(isLe.c);
+    type.clientData	= NULL;
     Tcl_CreateEncoding(&type);
 
     /*
@@ -758,7 +755,11 @@ Tcl_SetDefaultEncodingDir(
  *	interp was NULL.
  *
  * Side effects:
- *	LoadEncodingFile is called if necessary.
+ *	The new encoding type is entered into a table visible to all
+ *	interpreters, keyed off the encoding's name. For each call to this
+ *	function, there should eventually be a call to Tcl_FreeEncoding, so
+ *	that the database can be cleaned up when encodings aren't needed
+ *	anymore.
  *
  *-------------------------------------------------------------------------
  */
@@ -796,15 +797,15 @@ Tcl_GetEncoding(
  *
  * Tcl_FreeEncoding --
  *
- *	Releases an encoding allocated by Tcl_CreateEncoding() or
- *	Tcl_GetEncoding().
+ *	This function is called to release an encoding allocated by
+ *	Tcl_CreateEncoding() or Tcl_GetEncoding().
  *
  * Results:
  *	None.
  *
  * Side effects:
  *	The reference count associated with the encoding is decremented and
- *	the encoding is deleted if nothing is using it anymore.
+ *	the encoding may be deleted if nothing is using it anymore.
  *
  *---------------------------------------------------------------------------
  */
@@ -823,14 +824,13 @@ Tcl_FreeEncoding(
  *
  * FreeEncoding --
  *
- *	Decrements the reference count of an encoding.  The caller must hold
- *	encodingMutes.
+ *	This function is called to release an encoding by functions that
+ *	already have the encodingMutex.
  *
  * Results:
  *	None.
  *
  * Side effects:
- *	Releases the resource for an encoding if it is now unused.
  *	The reference count associated with the encoding is decremented and
  *	the encoding may be deleted if nothing is using it anymore.
  *
@@ -850,17 +850,16 @@ FreeEncoding(
     if (encodingPtr->refCount<=0) {
 	Tcl_Panic("FreeEncoding: refcount problem !!!");
     }
-    if (encodingPtr->refCount-- <= 1) {
+    encodingPtr->refCount--;
+    if (encodingPtr->refCount == 0) {
 	if (encodingPtr->freeProc != NULL) {
 	    (*encodingPtr->freeProc)(encodingPtr->clientData);
 	}
 	if (encodingPtr->hPtr != NULL) {
 	    Tcl_DeleteHashEntry(encodingPtr->hPtr);
 	}
-	if (encodingPtr->name) {
-	    ckfree((char *)encodingPtr->name);
-	}
-	ckfree((char *)encodingPtr);
+	ckfree((char *) encodingPtr->name);
+	ckfree((char *) encodingPtr);
     }
 }
 
@@ -1021,22 +1020,23 @@ Tcl_SetSystemEncoding(
  *
  * Tcl_CreateEncoding --
  *
- *	Defines a new encoding, along with the functions that are used to
- *	convert to and from Unicode.
+ *	This function is called to define a new encoding and the functions
+ *	that are used to convert between the specified encoding and Unicode.
  *
  * Results:
  *	Returns a token that represents the encoding. If an encoding with the
  *	same name already existed, the old encoding token remains valid and
- *	continues to behave as it used to, and is eventually garbage collected
- *	when the last reference to it goes away. Any subsequent calls to
- *	Tcl_GetEncoding with the specified name retrieve the most recent
- *	encoding token.
+ *	continues to behave as it used to, and will eventually be garbage
+ *	collected when the last reference to it goes away. Any subsequent
+ *	calls to Tcl_GetEncoding with the specified name will retrieve the
+ *	most recent encoding token.
  *
  * Side effects:
- *	A new record having the name of the encoding is entered into a table of
- *	encodings visible to all interpreters.  For each call to this function,
- *	there should eventually be a call to Tcl_FreeEncoding, which cleans
- *	deletes the record in the table when an encoding is no longer needed.
+ *	The new encoding type is entered into a table visible to all
+ *	interpreters, keyed off the encoding's name. For each call to this
+ *	function, there should eventually be a call to Tcl_FreeEncoding, so
+ *	that the database can be cleaned up when encodings aren't needed
+ *	anymore.
  *
  *---------------------------------------------------------------------------
  */
@@ -1258,9 +1258,10 @@ Tcl_ExternalToUtf(
  *
  * Tcl_UtfToExternalDString --
  *
- *	Convert a source buffer from UTF-8 to the specified encoding. If any
+ *	Convert a source buffer from UTF-8 into the specified encoding. If any
  *	of the bytes in the source buffer are invalid or cannot be represented
- *	in the target encoding, a default fallback character is substituted.
+ *	in the target encoding, a default fallback character will be
+ *	substituted.
  *
  * Results:
  *	The converted bytes are stored in the DString, which is then NULL
@@ -1569,13 +1570,13 @@ OpenEncodingFileChannel(
  *	the data.
  *
  * Results:
- *	The return value is the newly loaded Tcl_Encoding or NULL if the file
- *	didn't exist or could not be processed. If NULL is returned and interp
- *	is not NULL, an error message is left in interp's result object.
+ *	The return value is the newly loaded Encoding, or NULL if the file
+ *	didn't exist of was in the incorrect format. If NULL was returned, an
+ *	error message is left in interp's result object, unless interp was
+ *	NULL.
  *
  * Side effects:
- *	A corresponding encoding file might be read from persistent storage, in
- *	which case LoadTableEncoding is called.
+ *	File read from disk.
  *
  *---------------------------------------------------------------------------
  */
@@ -1583,8 +1584,8 @@ OpenEncodingFileChannel(
 static Tcl_Encoding
 LoadEncodingFile(
     Tcl_Interp *interp,		/* Interp for error reporting, if not NULL. */
-    const char *name)		/* The name of both the encoding file
-				 * and the new encoding. */
+    const char *name)		/* The name of the encoding file on disk and
+				 * also the name for new encoding. */
 {
     Tcl_Channel chan = NULL;
     Tcl_Encoding encoding = NULL;
@@ -1636,27 +1637,27 @@ LoadEncodingFile(
  *
  * LoadTableEncoding --
  *
- *	Helper function for LoadEncodingFile().  Creates a Tcl_EncodingType
- *	structure along with its corresponding TableEncodingData structure, and
- *	passes it to Tcl_Createncoding.
+ *	Helper function for LoadEncodingTable(). Loads a table to that
+ *	converts between Unicode and some other encoding and creates an
+ *	encoding (using a TableEncoding structure) from that information.
  *
- *	The file contains binary data but begins with a marker to indicate
- *	byte-ordering so a single binary file can be read on big or
- *	little-endian systems.
+ *	File contains binary data, but begins with a marker to indicate
+ *	byte-ordering, so that same binary file can be read on either endian
+ *	platforms.
  *
  * Results:
- *	Returns the new Tcl_Encoding,  or NULL if it could could
- *	not be created because the file contained invalid data.
+ *	The return value is the new encoding, or NULL if the encoding could
+ *	not be created (because the file contained invalid data).
  *
  * Side effects:
- *	See Tcl_CreateEncoding().
+ *	None.
  *
  *-------------------------------------------------------------------------
  */
 
 static Tcl_Encoding
 LoadTableEncoding(
-    const char *name,		/* Name of the new encoding. */
+    const char *name,		/* Name for new encoding. */
     int type,			/* Type of encoding (ENCODING_?????). */
     Tcl_Channel chan)		/* File containing new encoding. */
 {
@@ -1768,10 +1769,10 @@ LoadTableEncoding(
     }
 
     /*
-     * Invert the toUnicode array to produce the fromUnicode array. Performs a
+     * Invert toUnicode array to produce the fromUnicode array. Performs a
      * single malloc to get the memory for the array and all the pages needed
-     * by the array. While reading in the toUnicode array remember what
-     * pages are needed for the fromUnicode array.
+     * by the array. While reading in the toUnicode array, we remembered what
+     * pages that would be needed for the fromUnicode array.
      */
 
     if (symbol) {
@@ -1813,8 +1814,8 @@ LoadTableEncoding(
     if (type == ENCODING_MULTIBYTE) {
 	/*
 	 * If multibyte encodings don't have a backslash character, define
-	 * one. Otherwise, on Windows, native file names don't work because
-	 * the backslash in the file name maps to the unknown character
+	 * one. Otherwise, on Windows, native file names won't work because
+	 * the backslash in the file name will map to the unknown character
 	 * (question mark) when converting from UTF-8 to external encoding.
 	 */
 
@@ -1828,13 +1829,13 @@ LoadTableEncoding(
 	unsigned short *page;
 
 	/*
-	 * Make a special symbol encoding that maps each symbol character from
-	 * its Unicode code point down into page 0, and also ensure that each
-	 * characters on page 0 maps to itself so that a symbol font can be
-	 * used to display a simple string like "abcd" and have alpha, beta,
-	 * chi, delta show up, rather than have "unknown" chars show up because
-	 * strictly speaking the symbol font doesn't have glyphs for those low
-	 * ASCII chars.
+	 * Make a special symbol encoding that not only maps the symbol
+	 * characters from their Unicode code points down into page 0, but
+	 * also ensure that the characters on page 0 map to themselves. This
+	 * is so that a symbol font can be used to display a simple string
+	 * like "abcd" and have alpha, beta, chi, delta show up, rather than
+	 * have "unknown" chars show up because strictly speaking the symbol
+	 * font doesn't have glyphs for those low ascii chars.
 	 */
 
 	page = dataPtr->fromUnicode[0];
@@ -1938,7 +1939,7 @@ LoadTableEncoding(
 
 static Tcl_Encoding
 LoadEscapeEncoding(
-    const char *name,		/* Name of the new encoding. */
+    const char *name,		/* Name for new encoding. */
     Tcl_Channel chan)		/* File containing new encoding. */
 {
     int i;
@@ -2317,7 +2318,7 @@ UtfToUtfProc(
  *
  * UnicodeToUtfProc --
  *
- *	Convert from UTF-16 to UTF-8.
+ *	Convert from Unicode to UTF-8.
  *
  * Results:
  *	Returns TCL_OK if conversion was successful.
@@ -2330,7 +2331,7 @@ UtfToUtfProc(
 
 static int
 UnicodeToUtfProc(
-    ClientData clientData,	/* != NULL means LE, == NUL means BE */
+    ClientData clientData,	/* Not used. */
     const char *src,		/* Source string in Unicode. */
     int srcLen,			/* Source string length in bytes. */
     int flags,			/* Conversion control flags. */
@@ -2358,19 +2359,13 @@ UnicodeToUtfProc(
     const char *srcStart, *srcEnd;
     char *dstEnd, *dstStart;
     int result, numChars;
-    unsigned short ch;
+    Tcl_UniChar ch;
 
     result = TCL_OK;
-
-    /* check alignment with utf-16 (2 == sizeof(UTF-16)) */
-    if ((srcLen % 2) != 0) {
-	result = TCL_CONVERT_MULTIBYTE;
-	srcLen--;
-    }
-    /* If last code point is a high surrogate, we cannot handle that yet */
-    if ((srcLen >= 2) && ((src[srcLen - (clientData?1:2)] & 0xFC) == 0xD8)) {
+    if ((srcLen % sizeof(Tcl_UniChar)) != 0) {
 	result = TCL_CONVERT_MULTIBYTE;
-	srcLen-= 2;
+	srcLen /= sizeof(Tcl_UniChar);
+	srcLen *= sizeof(Tcl_UniChar);
     }
 
     srcStart = src;
@@ -2384,21 +2379,17 @@ UnicodeToUtfProc(
 	    result = TCL_CONVERT_NOSPACE;
 	    break;
 	}
-	if (clientData) {
-	    ch = (src[1] & 0xFF) << 8 | (src[0] & 0xFF);
-	} else {
-	    ch = (src[0] & 0xFF) << 8 | (src[1] & 0xFF);
-	}
 	/*
-	 * Special case for 1-byte utf chars for speed. Make sure we work with
-	 * unsigned short-size data.
+	 * Special case for 1-byte utf chars for speed.  Make sure we
+	 * work with Tcl_UniChar-size data.
 	 */
+	ch = *(Tcl_UniChar *)src;
 	if (ch && ch < 0x80) {
 	    *dst++ = (ch & 0xFF);
 	} else {
 	    dst += Tcl_UniCharToUtf(ch, dst);
 	}
-	src += sizeof(unsigned short);
+	src += sizeof(Tcl_UniChar);
     }
 
     *srcReadPtr = src - srcStart;
@@ -2412,7 +2403,7 @@ UnicodeToUtfProc(
  *
  * UtfToUnicodeProc --
  *
- *	Convert from UTF-8 to UTF-16.
+ *	Convert from UTF-8 to Unicode.
  *
  * Results:
  *	Returns TCL_OK if conversion was successful.
@@ -2425,7 +2416,8 @@ UnicodeToUtfProc(
 
 static int
 UtfToUnicodeProc(
-    ClientData clientData,	/* != NULL means LE, == NUL means BE */
+    ClientData clientData,	/* TableEncodingData that specifies
+				 * encoding. */
     const char *src,		/* Source string in UTF-8. */
     int srcLen,			/* Source string length in bytes. */
     int flags,			/* Conversion control flags. */
@@ -2452,7 +2444,7 @@ UtfToUnicodeProc(
 {
     const char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd;
     int result, numChars;
-    Tcl_UniChar ch = 0;
+    Tcl_UniChar ch;
 
     srcStart = src;
     srcEnd = src + srcLen;
@@ -2484,37 +2476,27 @@ UtfToUnicodeProc(
 	 * Need to handle this in a way that won't cause misalignment
 	 * by casting dst to a Tcl_UniChar. [Bug 1122671]
 	 */
-	if (clientData) {
+#ifdef WORDS_BIGENDIAN
 #if TCL_UTF_MAX > 4
-	    if (ch <= 0xFFFF) {
-		*dst++ = (ch & 0xFF);
-		*dst++ = (ch >> 8);
-	    } else {
-		*dst++ = (((ch - 0x10000) >> 10) & 0xFF);
-		*dst++ = (((ch - 0x10000) >> 18) & 0x3) | 0xD8;
-		*dst++ = (ch & 0xFF);
-		*dst++ = ((ch & 0x3) >> 8) | 0xDC;
-	    }
+	*dst++ = (ch >> 24);
+	*dst++ = ((ch >> 16) & 0xFF);
+	*dst++ = ((ch >> 8) & 0xFF);
+	*dst++ = (ch & 0xFF);
 #else
-	    *dst++ = (ch & 0xFF);
-	    *dst++ = (ch >> 8);
+	*dst++ = (ch >> 8);
+	*dst++ = (ch & 0xFF);
 #endif
-	} else {
+#else
 #if TCL_UTF_MAX > 4
-	    if (ch <= 0xFFFF) {
-		*dst++ = (ch >> 8);
-		*dst++ = (ch & 0xFF);
-	    } else {
-		*dst++ = ((ch & 0x3) >> 8) | 0xDC;
-		*dst++ = (ch & 0xFF);
-		*dst++ = (((ch - 0x10000) >> 18) & 0x3) | 0xD8;
-		*dst++ = (((ch - 0x10000) >> 10) & 0xFF);
-	    }
+	*dst++ = (ch & 0xFF);
+	*dst++ = ((ch >> 8) & 0xFF);
+	*dst++ = ((ch >> 16) & 0xFF);
+	*dst++ = (ch >> 24);
 #else
-	    *dst++ = (ch >> 8);
-	    *dst++ = (ch & 0xFF);
+	*dst++ = (ch & 0xFF);
+	*dst++ = (ch >> 8);
+#endif
 #endif
-	}
     }
     *srcReadPtr = src - srcStart;
     *dstWrotePtr = dst - dstStart;
@@ -2917,6 +2899,7 @@ Iso88591FromUtfProc(
 		result = TCL_CONVERT_UNKNOWN;
 		break;
 	    }
+
 	    /*
 	     * Plunge on, using '?' as a fallback character.
 	     */
@@ -3404,13 +3387,14 @@ EscapeFromUtfProc(
  *
  * EscapeFreeProc --
  *
- *	Frees resources used by the encoding.
+ *	This function is invoked when an EscapeEncodingData encoding is
+ *	deleted. It deletes the memory used by the encoding.
  *
  * Results:
  *	None.
  *
  * Side effects:
- *	Memory is freed.
+ *	Memory freed.
  *
  *---------------------------------------------------------------------------
  */
-- 
cgit v0.12


From 5ccd380c46e3e74f3273ecfa83b0686bca5e8056 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Tue, 21 Apr 2020 02:57:41 +0000
Subject: We've settled on using (TCL_UTF_MAX > 3) to indicate 4-byte
 Tcl_UniChar.

---
 generic/tclEncoding.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 5a9d2d5..66bec44 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -2477,7 +2477,7 @@ UtfToUnicodeProc(
 	 * by casting dst to a Tcl_UniChar. [Bug 1122671]
 	 */
 #ifdef WORDS_BIGENDIAN
-#if TCL_UTF_MAX > 4
+#if TCL_UTF_MAX > 3
 	*dst++ = (ch >> 24);
 	*dst++ = ((ch >> 16) & 0xFF);
 	*dst++ = ((ch >> 8) & 0xFF);
@@ -2487,7 +2487,7 @@ UtfToUnicodeProc(
 	*dst++ = (ch & 0xFF);
 #endif
 #else
-#if TCL_UTF_MAX > 4
+#if TCL_UTF_MAX > 3
 	*dst++ = (ch & 0xFF);
 	*dst++ = ((ch >> 8) & 0xFF);
 	*dst++ = ((ch >> 16) & 0xFF);
-- 
cgit v0.12


From 108ee434235c4aed8698d7624c7b7c8bce4dae55 Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Tue, 21 Apr 2020 07:03:46 +0000
Subject: Add more test-cases for TCL_UTF_MAX>3

---
 generic/tclUtf.c |   2 +-
 tests/utf.test   | 341 ++++++++++++++++++++++++++++++++++---------------------
 2 files changed, 210 insertions(+), 133 deletions(-)

diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 842744d..35a98a1 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -74,7 +74,7 @@ static const unsigned char totalBytes[256] = {
     2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
     3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
 #if TCL_UTF_MAX > 3
-	4,4,4,4,4,
+    4,4,4,4,4,
 #else
     1,1,1,1,1,
 #endif
diff --git a/tests/utf.test b/tests/utf.test
index 3301dde..35772ae 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -16,15 +16,20 @@ if {[lsearch [namespace children] ::tcltest] == -1} {
 ::tcltest::loadTestedCommands
 catch [list package require -exact Tcltest [info patchlevel]]
 
-testConstraint testbytestring [llength [info commands testbytestring]]
-
-catch {unset x}
-
-# Some tests require support for 4-byte UTF-8 sequences
 testConstraint smallutf [expr {[format %c 0x010000] == "\uFFFD"}]
 testConstraint fullutf [expr {[format %c 0x010000] != "\uFFFD"}]
 testConstraint tip389 [expr {[string length \U010000] == 2}]
 
+testConstraint testbytestring [llength [info commands testbytestring]]
+testConstraint testfindfirst [llength [info commands testfindfirst]]
+testConstraint testfindlast [llength [info commands testfindlast]]
+testConstraint testnumutfchars [llength [info commands testnumutfchars]]
+testConstraint teststringobj [llength [info commands teststringobj]]
+testConstraint testutfnext [llength [info commands testutfnext]]
+testConstraint testutfprev [llength [info commands testutfprev]]
+
+catch {unset x}
+
 test utf-1.1 {Tcl_UniCharToUtf: 1 byte sequences} testbytestring {
     expr {"\x01" eq [testbytestring "\x01"]}
 } 1
@@ -96,6 +101,7 @@ test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} t
     string length [testbytestring "\xF0\x8F\xBF\xBF"]
 } {4}
 test utf-2.11 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, overflow} {testbytestring knownBug} {# Doesn't work with any TCL_UTF_MAX value
+    # Would decode to U+110000 but that is outside the Unicode range.
     string length [testbytestring "\xF4\x90\x80\x80"]
 } {4}
 test utf-2.12 {Tcl_UtfToUniChar: longer UTF sequences not supported} testbytestring {
@@ -105,10 +111,6 @@ test utf-2.12 {Tcl_UtfToUniChar: longer UTF sequences not supported} testbytestr
 test utf-3.1 {Tcl_UtfCharComplete} {
 } {}
 
-testConstraint testnumutfchars [llength [info commands testnumutfchars]]
-testConstraint testfindfirst [llength [info commands testfindfirst]]
-testConstraint testfindlast [llength [info commands testfindlast]]
-
 test utf-4.1 {Tcl_NumUtfChars: zero length} testnumutfchars {
     testnumutfchars ""
 } {0}
@@ -116,7 +118,7 @@ test utf-4.2 {Tcl_NumUtfChars: length 1} {testnumutfchars testbytestring} {
     testnumutfchars [testbytestring "\xC2\xA2"]
 } {1}
 test utf-4.3 {Tcl_NumUtfChars: long string} {testnumutfchars testbytestring} {
-    testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\uA2\x4E"]
+    testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\xA2\x4E"]
 } {7}
 test utf-4.4 {Tcl_NumUtfChars: #u0000} {testnumutfchars testbytestring} {
     testnumutfchars [testbytestring "\xC0\x80"]
@@ -125,13 +127,13 @@ test utf-4.5 {Tcl_NumUtfChars: zero length, calc len} testnumutfchars {
     testnumutfchars "" 0
 } {0}
 test utf-4.6 {Tcl_NumUtfChars: length 1, calc len} {testnumutfchars testbytestring} {
-    testnumutfchars [testbytestring "\xC2\xA2"] 2
+    testnumutfchars [testbytestring "\xC2\xA2"] 1
 } {1}
 test utf-4.7 {Tcl_NumUtfChars: long string, calc len} {testnumutfchars testbytestring} {
-    testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\uA2\x4E"] 10
+    testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\xA2\x4E"] 10
 } {7}
 test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} {testnumutfchars testbytestring} {
-    testnumutfchars [testbytestring "\xC0\x80"] 2
+    testnumutfchars [testbytestring "\xC0\x80"] 1
 } {1}
 # Bug [2738427]: Tcl_NumUtfChars(...) no overflow check
 test utf-4.9 {Tcl_NumUtfChars: #u20AC, calc len, incomplete} {testnumutfchars testbytestring} {
@@ -141,10 +143,13 @@ test utf-4.10 {Tcl_NumUtfChars: #u0000, calc len, overcomplete} {testnumutfchars
     testnumutfchars [testbytestring "\x00"] 2
 } {2}
 test utf-4.11 {Tcl_NumUtfChars: 3 bytes of 4-byte UTF-8 characater} {testnumutfchars testbytestring} {
-    testnumutfchars [testbytestring \xf0\x9f\x92\xa9] 3
+    testnumutfchars [testbytestring \xF0\x9F\x92\xA9] 3
 } {3}
-test utf-4.12 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring tip389} {
-    testnumutfchars [testbytestring \xf0\x9f\x92\xa9] 4
+test utf-4.12.0 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring smallutf} {
+    testnumutfchars [testbytestring \xF0\x9F\x92\xA9] 4
+} {4}
+test utf-4.12.1 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring tip389} {
+    testnumutfchars [testbytestring \xF0\x9F\x92\xA9] 4
 } {2}
 
 test utf-5.1 {Tcl_UtfFindFirst} {testfindfirst testbytestring} {
@@ -154,8 +159,6 @@ test utf-5.2 {Tcl_UtfFindLast} {testfindlast testbytestring} {
     testfindlast [testbytestring "abcbc"] 98
 } {bc}
 
-testConstraint testutfnext [llength [info commands testutfnext]]
-
 test utf-6.1 {Tcl_UtfNext} testutfnext {
     # This takes the pointer one past the terminating NUL.
     # This is really an invalid call.
@@ -177,7 +180,7 @@ test utf-6.6 {Tcl_UtfNext} testutfnext {
     testutfnext A\xE8
 } 1
 test utf-6.7 {Tcl_UtfNext} testutfnext {
-    testutfnext A\xF4
+    testutfnext A\xF2
 } 1
 test utf-6.8 {Tcl_UtfNext} testutfnext {
     testutfnext A\xF8
@@ -198,7 +201,7 @@ test utf-6.13 {Tcl_UtfNext} testutfnext {
     testutfnext \xA0\xE8
 } 1
 test utf-6.14 {Tcl_UtfNext} testutfnext {
-    testutfnext \xA0\xF4
+    testutfnext \xA0\xF2
 } 1
 test utf-6.15 {Tcl_UtfNext} testutfnext {
     testutfnext \xA0\xF8
@@ -207,7 +210,7 @@ test utf-6.16 {Tcl_UtfNext} testutfnext {
     testutfnext \xD0
 } 1
 test utf-6.17 {Tcl_UtfNext} testutfnext {
-    testutfnext \xD0A
+    testutfnext \xD0G
 } 1
 test utf-6.18 {Tcl_UtfNext} testutfnext {
     testutfnext \xD0\xA0
@@ -219,7 +222,7 @@ test utf-6.20 {Tcl_UtfNext} testutfnext {
     testutfnext \xD0\xE8
 } 1
 test utf-6.21 {Tcl_UtfNext} testutfnext {
-    testutfnext \xD0\xF4
+    testutfnext \xD0\xF2
 } 1
 test utf-6.22 {Tcl_UtfNext} testutfnext {
     testutfnext \xD0\xF8
@@ -228,7 +231,7 @@ test utf-6.23 {Tcl_UtfNext} testutfnext {
     testutfnext \xE8
 } 1
 test utf-6.24 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8A
+    testutfnext \xE8G
 } 1
 test utf-6.25 {Tcl_UtfNext} testutfnext {
     testutfnext \xE8\xA0
@@ -240,37 +243,37 @@ test utf-6.27 {Tcl_UtfNext} testutfnext {
     testutfnext \xE8\xE8
 } 1
 test utf-6.28 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8\xF4
+    testutfnext \xE8\xF2
 } 1
 test utf-6.29 {Tcl_UtfNext} testutfnext {
     testutfnext \xE8\xF8
 } 1
 test utf-6.30 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4
+    testutfnext \xF2
 } 1
 test utf-6.31 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4A
+    testutfnext \xF2G
 } 1
 test utf-6.32 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0
+    testutfnext \xF2\xA0
 } 1
 test utf-6.33 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xD0
+    testutfnext \xF2\xD0
 } 1
 test utf-6.34 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xE8
+    testutfnext \xF2\xE8
 } 1
 test utf-6.35 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xF4
+    testutfnext \xF2\xF2
 } 1
 test utf-6.36 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xF8
+    testutfnext \xF2\xF8
 } 1
 test utf-6.37 {Tcl_UtfNext} testutfnext {
     testutfnext \xF8
 } 1
 test utf-6.38 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF8A
+    testutfnext \xF8G
 } 1
 test utf-6.39 {Tcl_UtfNext} testutfnext {
     testutfnext \xF8\xA0
@@ -282,7 +285,7 @@ test utf-6.41 {Tcl_UtfNext} testutfnext {
     testutfnext \xF8\xE8
 } 1
 test utf-6.42 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF8\xF4
+    testutfnext \xF8\xF2
 } 1
 test utf-6.43 {Tcl_UtfNext} testutfnext {
     testutfnext \xF8\xF8
@@ -300,7 +303,7 @@ test utf-6.47 {Tcl_UtfNext} testutfnext {
     testutfnext \xD0\xA0\xE8
 } 2
 test utf-6.48 {Tcl_UtfNext} testutfnext {
-    testutfnext \xD0\xA0\xF4
+    testutfnext \xD0\xA0\xF2
 } 2
 test utf-6.49 {Tcl_UtfNext} testutfnext {
     testutfnext \xD0\xA0\xF8
@@ -318,28 +321,28 @@ test utf-6.53 {Tcl_UtfNext} testutfnext {
     testutfnext \xE8\xA0\xE8
 } 1
 test utf-6.54 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8\xA0\xF4
+    testutfnext \xE8\xA0\xF2
 } 1
 test utf-6.55 {Tcl_UtfNext} testutfnext {
     testutfnext \xE8\xA0\xF8
 } 1
 test utf-6.56 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0G
+    testutfnext \xF2\xA0G
 } 1
 test utf-6.57 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0\xA0
+    testutfnext \xF2\xA0\xA0
 } 1
 test utf-6.58 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0\xD0
+    testutfnext \xF2\xA0\xD0
 } 1
 test utf-6.59 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0\xE8
+    testutfnext \xF2\xA0\xE8
 } 1
 test utf-6.60 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0\xF4
+    testutfnext \xF2\xA0\xF2
 } 1
 test utf-6.61 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0\xF8
+    testutfnext \xF2\xA0\xF8
 } 1
 test utf-6.62 {Tcl_UtfNext} testutfnext {
     testutfnext \xE8\xA0\xA0G
@@ -354,67 +357,67 @@ test utf-6.65 {Tcl_UtfNext} testutfnext {
     testutfnext \xE8\xA0\xA0\xE8
 } 3
 test utf-6.66 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8\xA0\xA0\xF4
+    testutfnext \xE8\xA0\xA0\xF2
 } 3
 test utf-6.67 {Tcl_UtfNext} testutfnext {
     testutfnext \xE8\xA0\xA0\xF8
 } 3
 test utf-6.68 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0\xA0G
+    testutfnext \xF2\xA0\xA0G
 } 1
-test utf-6.69 {Tcl_UtfNext} {testutfnext smallutf} {
-    testutfnext \xF4\xA0\xA0\xA0
+test utf-6.69.0 {Tcl_UtfNext} {testutfnext smallutf} {
+    testutfnext \xF2\xA0\xA0\xA0
 } 1
 test utf-6.69.1 {Tcl_UtfNext} {testutfnext fullutf} {
-    testutfnext \xF4\xA0\xA0\xA0
+    testutfnext \xF2\xA0\xA0\xA0
 } 4
 test utf-6.70 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0\xA0\xD0
+    testutfnext \xF2\xA0\xA0\xD0
 } 1
 test utf-6.71 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0\xA0\xE8
+    testutfnext \xF2\xA0\xA0\xE8
 } 1
 test utf-6.71 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0\xA0\xF4
+    testutfnext \xF2\xA0\xA0\xF2
 } 1
 test utf-6.73 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0\xA0\xF8
+    testutfnext \xF2\xA0\xA0\xF8
 } 1
-test utf-6.74 {Tcl_UtfNext} {testutfnext smallutf} {
-    testutfnext \xF4\xA0\xA0\xA0G
+test utf-6.74.0 {Tcl_UtfNext} {testutfnext smallutf} {
+    testutfnext \xF2\xA0\xA0\xA0G
 } 1
 test utf-6.74.1 {Tcl_UtfNext} {testutfnext fullutf} {
-    testutfnext \xF4\xA0\xA0\xA0G
+    testutfnext \xF2\xA0\xA0\xA0G
 } 4
-test utf-6.75 {Tcl_UtfNext} {testutfnext smallutf} {
-    testutfnext \xF4\xA0\xA0\xA0\xA0
+test utf-6.75.0 {Tcl_UtfNext} {testutfnext smallutf} {
+    testutfnext \xF2\xA0\xA0\xA0\xA0
 } 1
 test utf-6.75.1 {Tcl_UtfNext} {testutfnext fullutf} {
-    testutfnext \xF4\xA0\xA0\xA0\xA0
+    testutfnext \xF2\xA0\xA0\xA0\xA0
 } 4
-test utf-6.76 {Tcl_UtfNext} {testutfnext smallutf} {
-    testutfnext \xF4\xA0\xA0\xA0\xD0
+test utf-6.76.0 {Tcl_UtfNext} {testutfnext smallutf} {
+    testutfnext \xF2\xA0\xA0\xA0\xD0
 } 1
 test utf-6.76.1 {Tcl_UtfNext} {testutfnext fullutf} {
-    testutfnext \xF4\xA0\xA0\xA0\xD0
+    testutfnext \xF2\xA0\xA0\xA0\xD0
 } 4
-test utf-6.77 {Tcl_UtfNext} {testutfnext smallutf} {
-    testutfnext \xF4\xA0\xA0\xA0\xE8
+test utf-6.77.0 {Tcl_UtfNext} {testutfnext smallutf} {
+    testutfnext \xF2\xA0\xA0\xA0\xE8
 } 1
 test utf-6.77.1 {Tcl_UtfNext} {testutfnext fullutf} {
-    testutfnext \xF4\xA0\xA0\xA0\xE8
+    testutfnext \xF2\xA0\xA0\xA0\xE8
 } 4
-test utf-6.78 {Tcl_UtfNext} {testutfnext smallutf} {
-    testutfnext \xF4\xA0\xA0\xA0\xF4
+test utf-6.78.0 {Tcl_UtfNext} {testutfnext smallutf} {
+    testutfnext \xF2\xA0\xA0\xA0\xF2
 } 1
 test utf-6.78.1 {Tcl_UtfNext} {testutfnext fullutf} {
-    testutfnext \xF4\xA0\xA0\xA0\xF4
+    testutfnext \xF2\xA0\xA0\xA0\xF2
 } 4
-test utf-6.79 {Tcl_UtfNext} {testutfnext smallutf} {
-    testutfnext \xF4\xA0\xA0\xA0G\xF8
+test utf-6.79.0 {Tcl_UtfNext} {testutfnext smallutf} {
+    testutfnext \xF2\xA0\xA0\xA0G\xF8
 } 1
 test utf-6.79.1 {Tcl_UtfNext} {testutfnext fullutf} {
-    testutfnext \xF4\xA0\xA0\xA0G\xF8
+    testutfnext \xF2\xA0\xA0\xA0G\xF8
 } 4
 test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext {
     testutfnext \xC0\x80
@@ -437,10 +440,10 @@ test utf-6.85 {Tcl_UtfNext - overlong sequences} testutfnext {
 test utf-6.86 {Tcl_UtfNext - overlong sequences} testutfnext {
     testutfnext \xF0\x80\x80\x80
 } 1
-test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext smallutf} {
+test utf-6.87.0 {Tcl_UtfNext - overlong sequences} {testutfnext smallutf} {
     testutfnext \xF0\x90\x80\x80
 } 1
-test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext fullutf} {
+test utf-6.87.1 {Tcl_UtfNext - overlong sequences} {testutfnext fullutf} {
     testutfnext \xF0\x90\x80\x80
 } 4
 test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} {
@@ -455,8 +458,18 @@ test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {te
 test utf-6.89.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} {
     testutfnext \xF0\x80\x80 1
 } 3
-
-testConstraint testutfprev [llength [info commands testutfprev]]
+test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext smallutf} {
+    testutfnext \xF4\x8F\xBF\xBF
+} 1
+test utf-6.90.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext fullutf} {
+    testutfnext \xF4\x8F\xBF\xBF
+} 4
+test utf-6.91.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext smallutf} {
+    testutfnext \xF4\x90\x80\x80
+} 1
+test utf-6.91.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext fullutf} {
+    testutfnext \xF4\x90\x80\x80
+} 4
 test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} testutfnext {
     testutfnext \xA0\xA0\xA0
 } 3
@@ -489,13 +502,13 @@ test utf-7.4.2 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xF8\xF8\xA0\xA0 2
 } 1
 test utf-7.5 {Tcl_UtfPrev} testutfprev {
-    testutfprev A\xF4
+    testutfprev A\xF2
 } 1
 test utf-7.5.1 {Tcl_UtfPrev} testutfprev {
-    testutfprev A\xF4\xA0\xA0\xA0 2
+    testutfprev A\xF2\xA0\xA0\xA0 2
 } 1
 test utf-7.5.2 {Tcl_UtfPrev} testutfprev {
-    testutfprev A\xF4\xF8\xA0\xA0 2
+    testutfprev A\xF2\xF8\xA0\xA0 2
 } 1
 test utf-7.6 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xE8
@@ -533,23 +546,23 @@ test utf-7.9.1 {Tcl_UtfPrev} testutfprev {
 test utf-7.9.2 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xF8\xA0\xF8\xA0 3
 } 2
-test utf-7.10 {Tcl_UtfPrev} {testutfprev smallutf} {
-    testutfprev A\xF4\xA0
+test utf-7.10.0 {Tcl_UtfPrev} {testutfprev smallutf} {
+    testutfprev A\xF2\xA0
 } 2
-test utf-7.10.1 {Tcl_UtfPrev} {testutfprev smallutf} {
-    testutfprev A\xF4\xA0\xA0\xA0 3
-} 2
-test utf-7.10.2 {Tcl_UtfPrev} {testutfprev smallutf} {
-    testutfprev A\xF4\xA0\xF8\xA0 3
-} 2
-test utf-7.10 {Tcl_UtfPrev} {testutfprev fullutf} {
-    testutfprev A\xF4\xA0
-} 1
 test utf-7.10.1 {Tcl_UtfPrev} {testutfprev fullutf} {
-    testutfprev A\xF4\xA0\xA0\xA0 3
+    testutfprev A\xF2\xA0
 } 1
-test utf-7.10.2 {Tcl_UtfPrev} {testutfprev fullutf} {
-    testutfprev A\xF4\xA0\xF8\xA0 3
+test utf-7.10.1.0 {Tcl_UtfPrev} {testutfprev smallutf} {
+    testutfprev A\xF2\xA0\xA0\xA0 3
+} 2
+test utf-7.10.1.1 {Tcl_UtfPrev} {testutfprev fullutf} {
+    testutfprev A\xF2\xA0\xA0\xA0 3
+} 1
+test utf-7.10.2.0 {Tcl_UtfPrev} {testutfprev smallutf} {
+    testutfprev A\xF2\xA0\xF8\xA0 3
+} 2
+test utf-7.10.2.1 {Tcl_UtfPrev} {testutfprev fullutf} {
+    testutfprev A\xF2\xA0\xF8\xA0 3
 } 1
 test utf-7.11 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xE8\xA0
@@ -590,23 +603,23 @@ test utf-7.14.1 {Tcl_UtfPrev} testutfprev {
 test utf-7.14.2 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xF8\xA0\xA0\xF8 4
 } 3
-test utf-7.15 {Tcl_UtfPrev} {testutfprev smallutf} {
-    testutfprev A\xF4\xA0\xA0
-} 3
-test utf-7.15.1 {Tcl_UtfPrev} {testutfprev smallutf} {
-    testutfprev A\xF4\xA0\xA0\xA0 4
+test utf-7.15.0 {Tcl_UtfPrev} {testutfprev smallutf} {
+    testutfprev A\xF2\xA0\xA0
 } 3
-test utf-7.15.2 {Tcl_UtfPrev} {testutfprev smallutf} {
-    testutfprev A\xF4\xA0\xA0\xF8 4
-} 3
-test utf-7.15.3 {Tcl_UtfPrev} {testutfprev fullutf} {
-    testutfprev A\xF4\xA0\xA0
+test utf-7.15.1 {Tcl_UtfPrev} {testutfprev fullutf} {
+    testutfprev A\xF2\xA0\xA0
 } 1
-test utf-7.15.4 {Tcl_UtfPrev} {testutfprev fullutf} {
-    testutfprev A\xF4\xA0\xA0\xA0 4
+test utf-7.15.1.0 {Tcl_UtfPrev} {testutfprev smallutf} {
+    testutfprev A\xF2\xA0\xA0\xA0 4
+} 3
+test utf-7.15.1.1 {Tcl_UtfPrev} {testutfprev fullutf} {
+    testutfprev A\xF2\xA0\xA0\xA0 4
 } 1
-test utf-7.15.5 {Tcl_UtfPrev} {testutfprev fullutf} {
-    testutfprev A\xF4\xA0\xA0\xF8 4
+test utf-7.15.2.0 {Tcl_UtfPrev} {testutfprev smallutf} {
+    testutfprev A\xF2\xA0\xA0\xF8 4
+} 3
+test utf-7.15.2.1 {Tcl_UtfPrev} {testutfprev fullutf} {
+    testutfprev A\xF2\xA0\xA0\xF8 4
 } 1
 test utf-7.16 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xE8\xA0\xA0
@@ -728,19 +741,19 @@ test utf-7.37 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
 test utf-7.38 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
     testutfprev A\xE0\xA0\x80 2
 } 1
-test utf-7.39 {Tcl_UtfPrev -- overlong sequence}  {testutfprev smallutf} {
+test utf-7.39.0 {Tcl_UtfPrev -- overlong sequence}  {testutfprev smallutf} {
     testutfprev A\xF0\x90\x80\x80
 } 2
 test utf-7.39.1 {Tcl_UtfPrev -- overlong sequence}  {testutfprev fullutf} {
     testutfprev A\xF0\x90\x80\x80
 } 1
-test utf-7.40 {Tcl_UtfPrev -- overlong sequence}  {testutfprev smallutf} {
+test utf-7.40.0 {Tcl_UtfPrev -- overlong sequence}  {testutfprev smallutf} {
     testutfprev A\xF0\x90\x80\x80 4
 } 3
 test utf-7.40.1 {Tcl_UtfPrev -- overlong sequence}  {testutfprev fullutf} {
     testutfprev A\xF0\x90\x80\x80 4
 } 1
-test utf-7.41 {Tcl_UtfPrev -- overlong sequence}  {testutfprev smallutf} {
+test utf-7.41.0 {Tcl_UtfPrev -- overlong sequence}  {testutfprev smallutf} {
     testutfprev A\xF0\x90\x80\x80 3
 } 2
 test utf-7.41.1 {Tcl_UtfPrev -- overlong sequence}  {testutfprev fullutf} {
@@ -773,6 +786,48 @@ test utf-7.47.1 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {te
 test utf-7.47.2 {Tcl_UtfPrev, pointing to 3th byte of 3-byte invalid sequence} {testutfprev} {
     testutfprev \xE8\xA0\x00 2
 } 0
+test utf-7.48.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev smallutf} {
+    testutfprev A\xF4\x8F\xBF\xBF
+} 2
+test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} {
+    testutfprev A\xF4\x8F\xBF\xBF
+} 1
+test utf-7.48.1.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev smallutf} {
+    testutfprev A\xF4\x8F\xBF\xBF 4
+} 3
+test utf-7.48.1.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} {
+    testutfprev A\xF4\x8F\xBF\xBF 4
+} 1
+test utf-7.48.2.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev smallutf} {
+    testutfprev A\xF4\x8F\xBF\xBF 3
+} 2
+test utf-7.48.2.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} {
+    testutfprev A\xF4\x8F\xBF\xBF 3
+} 1
+test utf-7.48.3 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
+    testutfprev A\xF4\x8F\xBF\xBF 2
+} 1
+test utf-7.49.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev smallutf} {
+    testutfprev A\xF4\x90\x80\x80
+} 2
+test utf-7.49.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} {
+    testutfprev A\xF4\x90\x80\x80
+} 1
+test utf-7.49.2 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev smallutf} {
+    testutfprev A\xF4\x90\x80\x80 4
+} 3
+test utf-7.49.3 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} {
+    testutfprev A\xF4\x90\x80\x80 4
+} 1
+test utf-7.49.4 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev smallutf} {
+    testutfprev A\xF4\x90\x80\x80 3
+} 2
+test utf-7.49.5 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} {
+    testutfprev A\xF4\x90\x80\x80 3
+} 1
+test utf-7.49.6 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
+    testutfprev A\xF4\x90\x80\x80 2
+} 1
 
 test utf-8.1 {Tcl_UniCharAtIndex: index = 0} {
     string index abcd 0
@@ -786,6 +841,18 @@ test utf-8.3 {Tcl_UniCharAtIndex: index > 0} {
 test utf-8.4 {Tcl_UniCharAtIndex: index > 0} {
     string index \u4E4E\u25A\xFF\u543 2
 } "\uFF"
+test utf-8.5 {Tcl_UniCharAtIndex: high surrogate} smallutf {
+    string index \uD842 0
+} "\uD842"
+test utf-8.6 {Tcl_UniCharAtIndex: low surrogate} {
+    string index \uDC42 0
+} "\uDC42"
+test utf-8.7 {Tcl_UniCharAtIndex: Emoji} smallutf {
+    string index \uD83D\uDE00 0
+} "\uD83D"
+test utf-8.8 {Tcl_UniCharAtIndex: Emoji} {
+    string index \uD83D\uDE00 1
+} "\uDE00"
 
 test utf-9.1 {Tcl_UtfAtIndex: index = 0} {
     string range abcd 0 2
@@ -793,6 +860,12 @@ test utf-9.1 {Tcl_UtfAtIndex: index = 0} {
 test utf-9.2 {Tcl_UtfAtIndex: index > 0} {
     string range \u4E4E\u25A\xFF\u543klmnop 1 5
 } "\u25A\xFF\u543kl"
+test utf-9.3 {Tcl_UtfAtIndex: index = 0, Emoji} smallutf {
+    string range \uD83D\uDE00G 0 0
+} "\uD83D"
+test utf-9.4 {Tcl_UtfAtIndex: index > 0, Emoji} smallutf {
+    string range \uD83D\uDE00G 1 1
+} "\uDE00"
 
 
 test utf-10.1 {Tcl_UtfBackslash: dst == NULL} {
@@ -891,11 +964,11 @@ test utf-11.2 {Tcl_UtfToUpper} {
     string toupper abc
 } ABC
 test utf-11.3 {Tcl_UtfToUpper} {
-    string toupper \u00E3AB
-} \u00C3AB
+    string toupper \xE3gh
+} \xC3GH
 test utf-11.4 {Tcl_UtfToUpper} {
-    string toupper \u01E3AB
-} \u01E2AB
+    string toupper \u01E3gh
+} \u01E2GH
 test utf-11.5 {Tcl_UtfToUpper Georgian (new in Unicode 11)} {
     string toupper \u10D0\u1C90
 } \u1C90\u1C90
@@ -907,14 +980,17 @@ test utf-12.2 {Tcl_UtfToLower} {
     string tolower ABC
 } abc
 test utf-12.3 {Tcl_UtfToLower} {
-    string tolower \u00C3AB
-} \u00E3ab
+    string tolower \xC3GH
+} \xE3gh
 test utf-12.4 {Tcl_UtfToLower} {
-    string tolower \u01E2AB
-} \u01E3ab
+    string tolower \u01E2GH
+} \u01E3gh
 test utf-12.5 {Tcl_UtfToLower Georgian (new in Unicode 11)} {
     string tolower \u10D0\u1C90
 } \u10D0\u10D0
+test utf-12.6 {Tcl_UtfToUpper low/high surrogate)} smallutf {
+    string tolower \uDC24\uD824
+} \uDC24\uD824
 
 test utf-13.1 {Tcl_UtfToTitle} {
     string totitle {}
@@ -923,8 +999,8 @@ test utf-13.2 {Tcl_UtfToTitle} {
     string totitle abc
 } Abc
 test utf-13.3 {Tcl_UtfToTitle} {
-    string totitle \u00E3AB
-} \u00C3ab
+    string totitle \xE3GH
+} \xC3gh
 test utf-13.4 {Tcl_UtfToTitle} {
     string totitle \u01F3AB
 } \u01F2ab
@@ -934,6 +1010,9 @@ test utf-13.5 {Tcl_UtfToTitle Georgian (new in Unicode 11)} {
 test utf-13.6 {Tcl_UtfToTitle Georgian (new in Unicode 11)} {
     string totitle \u1C90\u10D0
 } \u1C90\u10D0
+test utf-13.7 {Tcl_UtfToTitle low/high surrogate)} smallutf {
+    string totitle \uDC24\uD824
+} \uDC24\uD824
 
 test utf-14.1 {Tcl_UtfNcasecmp} {
     string compare -nocase a b
@@ -952,7 +1031,7 @@ test utf-15.1 {Tcl_UniCharToUpper, negative delta} {
     string toupper aA
 } AA
 test utf-15.2 {Tcl_UniCharToUpper, positive delta} {
-    string toupper \u0178\u00FF
+    string toupper \u0178\xFF
 } \u0178\u0178
 test utf-15.3 {Tcl_UniCharToUpper, no delta} {
     string toupper !
@@ -962,8 +1041,8 @@ test utf-16.1 {Tcl_UniCharToLower, negative delta} {
     string tolower aA
 } aa
 test utf-16.2 {Tcl_UniCharToLower, positive delta} {
-    string tolower \u0178\u00FF\uA78D\u01C5
-} \u00FF\u00FF\u0265\u01C6
+    string tolower \u0178\xFF\uA78D\u01C5
+} \xFF\xFF\u0265\u01C6
 
 test utf-17.1 {Tcl_UniCharToLower, no delta} {
     string tolower !
@@ -977,9 +1056,9 @@ test utf-18.2 {Tcl_UniCharToTitle, subtract one for title} {
 } \u01C5
 test utf-18.3 {Tcl_UniCharToTitle, subtract delta for title (positive)} {
     string totitle \u017F
-} \u0053
+} \x53
 test utf-18.4 {Tcl_UniCharToTitle, subtract delta for title (negative)} {
-    string totitle \u00FF
+    string totitle \xFF
 } \u0178
 test utf-18.5 {Tcl_UniCharToTitle, no delta} {
     string totitle !
@@ -1016,31 +1095,31 @@ test utf-21.5 {unicode graph char in regc_locale.c} {
 } {1}
 test utf-21.6 {TclUniCharIsGraph} {
     # [Bug 3464428]
-    string is graph \u00A0
+    string is graph \xA0
 } {0}
 test utf-21.7 {unicode graph char in regc_locale.c} {
     # [Bug 3464428]
-    regexp {[[:graph:]]} \u0020\u00A0\u2028\u2029
+    regexp {[[:graph:]]} \x20\xA0\u2028\u2029
 } {0}
 test utf-21.8 {TclUniCharIsPrint} {
     # [Bug 3464428]
-    string is print \u0009
+    string is print \x09
 } {0}
 test utf-21.9 {unicode print char in regc_locale.c} {
     # [Bug 3464428]
-    regexp {[[:print:]]} \u0009
+    regexp {[[:print:]]} \x09
 } {0}
 test utf-21.10 {unicode print char in regc_locale.c} {
     # [Bug 3464428]
-    regexp {[[:print:]]} \u0009
+    regexp {[[:print:]]} \x09
 } {0}
 test utf-21.11 {TclUniCharIsControl} {
     # [Bug 3464428]
-    string is control \u0000\u001F\u00AD\u0605\u061C\u180E\u2066\uFEFF
+    string is control \x00\x1F\xAD\u0605\u061C\u180E\u2066\uFEFF
 } {1}
 test utf-21.12 {unicode control char in regc_locale.c} {
     # [Bug 3464428], [Bug a876646efe]
-    regexp {^[[:cntrl:]]*$} \u0000\u001F\u00AD\u0605\u061C\u180E\u2066\uFEFF
+    regexp {^[[:cntrl:]]*$} \x00\x1F\xAD\u0605\u061C\u180E\u2066\uFEFF
 } {1}
 
 test utf-22.1 {TclUniCharIsWordChar} {
@@ -1070,15 +1149,13 @@ test utf-24.2 {unicode digit char in regc_locale.c} {
 
 test utf-24.3 {TclUniCharIsSpace} {
     # this returns 1 with Unicode 7/TIP 413 compliance
-    string is space \u0085\u1680\u180E\u200B\u202F\u2060
+    string is space \x85\u1680\u180E\u200B\u202F\u2060
 } {1}
 test utf-24.4 {unicode space char in regc_locale.c} {
     # this returns 1 with Unicode 7/TIP 413 compliance
-    list [regexp {^[[:space:]]+$} \u0085\u1680\u180E\u200B\u202F\u2060] [regexp {^\s+$} \u0085\u1680\u180E\u200B\u202F\u2060]
+    list [regexp {^[[:space:]]+$} \x85\u1680\u180E\u200B\u202F\u2060] [regexp {^\s+$} \x85\u1680\u180E\u200B\u202F\u2060]
 } {1 1}
 
-testConstraint teststringobj [llength [info commands teststringobj]]
-
 test utf-25.1 {Tcl_UniCharNcasecmp} -constraints teststringobj \
     -setup {
 	testobj freeallvars
-- 
cgit v0.12


From 941ef44a3fce68b1dc81abb397a80f209b2ca982 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Tue, 21 Apr 2020 14:52:48 +0000
Subject: Revert the other encoding system backport.

The blocking and failing tests are illustrations of existing tickets
[1004065] and [1122671], recording that the encoding machinery hardcodes
assumptions in multiple places that sizeof(Tcl_UniChar) == 2.

Closing the segfault bug fix should not be hostage to fixing those old bugs.
---
 generic/tclEncoding.c | 17 ++---------------
 1 file changed, 2 insertions(+), 15 deletions(-)

diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 66bec44..6c16827 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -2470,33 +2470,20 @@ UtfToUnicodeProc(
 	if (dst > dstEnd) {
 	    result = TCL_CONVERT_NOSPACE;
 	    break;
-	}
+        }
 	src += TclUtfToUniChar(src, &ch);
 	/*
 	 * Need to handle this in a way that won't cause misalignment
 	 * by casting dst to a Tcl_UniChar. [Bug 1122671]
+	 * XXX: This hard-codes the assumed size of Tcl_UniChar as 2.
 	 */
 #ifdef WORDS_BIGENDIAN
-#if TCL_UTF_MAX > 3
-	*dst++ = (ch >> 24);
-	*dst++ = ((ch >> 16) & 0xFF);
-	*dst++ = ((ch >> 8) & 0xFF);
-	*dst++ = (ch & 0xFF);
-#else
 	*dst++ = (ch >> 8);
 	*dst++ = (ch & 0xFF);
-#endif
-#else
-#if TCL_UTF_MAX > 3
-	*dst++ = (ch & 0xFF);
-	*dst++ = ((ch >> 8) & 0xFF);
-	*dst++ = ((ch >> 16) & 0xFF);
-	*dst++ = (ch >> 24);
 #else
 	*dst++ = (ch & 0xFF);
 	*dst++ = (ch >> 8);
 #endif
-#endif
     }
     *srcReadPtr = src - srcStart;
     *dstWrotePtr = dst - dstStart;
-- 
cgit v0.12


From d48bca33242b3f10d21a25a6c6a91c27ae707b96 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Tue, 21 Apr 2020 15:41:01 +0000
Subject: Move testing command [testsize] from Windows to generic. Extend it to
 report sizeof(Tcl_UniChar).

---
 generic/tclTest.c | 31 +++++++++++++++++++++++++++++++
 win/tclWinTest.c  | 28 ----------------------------
 2 files changed, 31 insertions(+), 28 deletions(-)

diff --git a/generic/tclTest.c b/generic/tclTest.c
index 8c29aa7..b9fd204 100644
--- a/generic/tclTest.c
+++ b/generic/tclTest.c
@@ -281,6 +281,7 @@ static Tcl_CmdProc	Testset2Cmd;
 static Tcl_CmdProc	TestseterrorcodeCmd;
 static Tcl_ObjCmdProc	TestsetobjerrorcodeCmd;
 static Tcl_CmdProc	TestsetplatformCmd;
+static Tcl_ObjCmdProc	TestsizeCmd;
 static Tcl_CmdProc	TeststaticpkgCmd;
 static Tcl_CmdProc	TesttranslatefilenameCmd;
 static Tcl_CmdProc	TestupvarCmd;
@@ -592,6 +593,7 @@ Tcltest_Init(
 	    TestFindLastCmd, NULL, NULL);
     Tcl_CreateCommand(interp, "testsetplatform", TestsetplatformCmd,
 	    NULL, NULL);
+    Tcl_CreateObjCommand(interp, "testsize", TestsizeCmd, NULL, NULL);
     Tcl_CreateCommand(interp, "teststaticpkg", TeststaticpkgCmd,
 	    NULL, NULL);
     Tcl_CreateCommand(interp, "testtranslatefilename",
@@ -4122,6 +4124,35 @@ TestsetplatformCmd(
     return TCL_OK;
 }
 
+static int
+TestsizeCmd(
+    ClientData clientData,      /* Unused */
+    Tcl_Interp* interp,         /* Tcl interpreter */
+    int objc,                   /* Parameter count */
+    Tcl_Obj *const * objv)      /* Parameter vector */
+{
+    if (objc != 2) {
+        goto syntax;
+    }
+    if (strcmp(Tcl_GetString(objv[1]), "time_t") == 0) {
+        Tcl_SetObjResult(interp, Tcl_NewWideIntObj(sizeof(time_t)));
+        return TCL_OK;
+    }
+    if (strcmp(Tcl_GetString(objv[1]), "st_mtime") == 0) {
+        Tcl_StatBuf *statPtr;
+        Tcl_SetObjResult(interp, Tcl_NewWideIntObj(sizeof(statPtr->st_mtime)));
+        return TCL_OK;
+    }
+    if (strcmp(Tcl_GetString(objv[1]), "unichar") == 0) {
+        Tcl_SetObjResult(interp, Tcl_NewWideIntObj(sizeof(Tcl_UniChar)));
+        return TCL_OK;
+    }
+
+syntax:
+    Tcl_WrongNumArgs(interp, 1, objv, "time_t|st_mtime|unichar");
+    return TCL_ERROR;
+}
+
 /*
  *----------------------------------------------------------------------
  *
diff --git a/win/tclWinTest.c b/win/tclWinTest.c
index 04878fe..7f49b63 100644
--- a/win/tclWinTest.c
+++ b/win/tclWinTest.c
@@ -39,8 +39,6 @@ static int		TestwinclockCmd(ClientData dummy, Tcl_Interp* interp,
 			    int objc, Tcl_Obj *const objv[]);
 static int		TestwinsleepCmd(ClientData dummy, Tcl_Interp* interp,
 			    int objc, Tcl_Obj *const objv[]);
-static int		TestSizeCmd(ClientData dummy, Tcl_Interp* interp,
-			    int objc, Tcl_Obj *const objv[]);
 static Tcl_ObjCmdProc	TestExceptionCmd;
 static int		TestplatformChmod(const char *nativePath, int pmode);
 static int		TestchmodCmd(ClientData dummy,
@@ -78,7 +76,6 @@ TclplatformtestInit(
     Tcl_CreateObjCommand(interp, "testwinclock", TestwinclockCmd, NULL, NULL);
     Tcl_CreateObjCommand(interp, "testwinsleep", TestwinsleepCmd, NULL, NULL);
     Tcl_CreateObjCommand(interp, "testexcept", TestExceptionCmd, NULL, NULL);
-    Tcl_CreateObjCommand(interp, "testsize", TestSizeCmd, NULL, NULL);
     return TCL_OK;
 }
 
@@ -312,31 +309,6 @@ TestwinsleepCmd(
     return TCL_OK;
 }
 
-static int
-TestSizeCmd(
-    ClientData clientData,	/* Unused */
-    Tcl_Interp* interp,		/* Tcl interpreter */
-    int objc,			/* Parameter count */
-    Tcl_Obj *const * objv)	/* Parameter vector */
-{
-    if (objc != 2) {
-	goto syntax;
-    }
-    if (strcmp(Tcl_GetString(objv[1]), "time_t") == 0) {
-	Tcl_SetObjResult(interp, Tcl_NewWideIntObj(sizeof(time_t)));
-	return TCL_OK;
-    }
-    if (strcmp(Tcl_GetString(objv[1]), "st_mtime") == 0) {
-        Tcl_StatBuf *statPtr;
-        Tcl_SetObjResult(interp, Tcl_NewWideIntObj(sizeof(statPtr->st_mtime)));
-        return TCL_OK;
-    }
-
-syntax:
-    Tcl_WrongNumArgs(interp, 1, objv, "time_t|st_mtime");
-    return TCL_ERROR;
-}
-
 /*
  *----------------------------------------------------------------------
  *
-- 
cgit v0.12


From 1588b8475d8a1378e6e9504e10913d756d84983b Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Tue, 21 Apr 2020 15:52:03 +0000
Subject: Use new testing command to constrain tests to (sizeof(Tcl_UniChar) ==
 2) until bugs are fixed when (sizeof(Tcl_UniChar == 4).

---
 tests/chanio.test   | 4 +++-
 tests/encoding.test | 6 +++++-
 tests/io.test       | 5 ++++-
 tests/source.test   | 5 ++++-
 4 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/tests/chanio.test b/tests/chanio.test
index 5fae431..c2f561b 100644
--- a/tests/chanio.test
+++ b/tests/chanio.test
@@ -29,6 +29,8 @@ namespace eval ::tcl::test::io {
     variable msg
     variable expected
 
+    testConstraint ucs2	[expr {	[llength [info commands testsize]] &&
+				([testsize unichar] == 2) }]
     testConstraint testchannel      [llength [info commands testchannel]]
     testConstraint exec             [llength [info commands exec]]
     testConstraint openpipe         1
@@ -875,7 +877,7 @@ test chan-io-6.44 {Tcl_GetsObj: input saw cr, not followed by cr} {stdio testcha
     chan close $f
     set x
 } [list "bbbbbbbbbbbbbbb" 15 "123456789abcdef" 1 4 "abcd" 0 3 "efg"]
-test chan-io-6.45 {Tcl_GetsObj: input saw cr, skip right number of bytes} {stdio testchannel openpipe fileevent} {
+test chan-io-6.45 {Tcl_GetsObj: input saw cr, skip right number of bytes} {stdio testchannel openpipe fileevent ucs2} {
     # Tcl_ExternalToUtf()
 
     set f [open "|[list [interpreter] $path(cat)]" w+]
diff --git a/tests/encoding.test b/tests/encoding.test
index 8722a93..ad55e26 100644
--- a/tests/encoding.test
+++ b/tests/encoding.test
@@ -32,6 +32,10 @@ proc runtests {} {
 testConstraint testencoding [llength [info commands testencoding]]
 testConstraint exec [llength [info commands exec]]
 
+testConstraint ucs2 [expr { [llength [info commands testsize]] &&
+                                ([testsize unichar] == 2) }]
+
+
 # TclInitEncodingSubsystem is tested by the rest of this file
 # TclFinalizeEncodingSubsystem is not currently tested
 
@@ -316,7 +320,7 @@ test encoding-15.3 {UtfToUtfProc null character input} {
     list [string bytelength $x] [string bytelength $y] $z
 } {1 2 c080}
 
-test encoding-16.1 {UnicodeToUtfProc} {
+test encoding-16.1 {UnicodeToUtfProc} ucs2 {
     set val [encoding convertfrom unicode NN]
     list $val [format %x [scan $val %c]]
 } "\u4e4e 4e4e"
diff --git a/tests/io.test b/tests/io.test
index 04fa1d2..1c18576 100644
--- a/tests/io.test
+++ b/tests/io.test
@@ -29,6 +29,9 @@ namespace eval ::tcl::test::io {
     variable msg
     variable expected
 
+testConstraint ucs2 [expr { [llength [info commands testsize]] &&
+                            ([testsize unichar] == 2) }]
+
 testConstraint testchannel      [llength [info commands testchannel]]
 testConstraint exec             [llength [info commands exec]]
 testConstraint openpipe         1
@@ -910,7 +913,7 @@ test io-6.44 {Tcl_GetsObj: input saw cr, not followed by cr} {stdio testchannel
     close $f
     set x
 } [list "bbbbbbbbbbbbbbb" 15 "123456789abcdef" 1 4 "abcd" 0 3 "efg"]
-test io-6.45 {Tcl_GetsObj: input saw cr, skip right number of bytes} {stdio testchannel openpipe fileevent} {
+test io-6.45 {Tcl_GetsObj: input saw cr, skip right number of bytes} {stdio testchannel openpipe fileevent ucs2} {
     # Tcl_ExternalToUtf()
 
     set f [open "|[list [interpreter] $path(cat)]" w+]
diff --git a/tests/source.test b/tests/source.test
index dc3c2d8..8511004 100644
--- a/tests/source.test
+++ b/tests/source.test
@@ -20,6 +20,9 @@ if {[catch {package require tcltest 2.1}]} {
 namespace eval ::tcl::test::source {
     namespace import ::tcltest::*
 
+testConstraint ucs2 [expr { [llength [info commands testsize]] &&
+                                ([testsize unichar] == 2) }]
+
 test source-1.1 {source command} -setup {
     set x "old x value"
     set y "old y value"
@@ -232,7 +235,7 @@ test source-7.1 {source -encoding test} -setup {
 } -cleanup {
     removeFile source.file
 } -result correct
-test source-7.2 {source -encoding test} -setup {
+test source-7.2 {source -encoding test} -constraints ucs2 -setup {
     # This tests for bad interactions between [source -encoding]
     # and use of the Control-Z character (\u001A) as a cross-platform
     # EOF character by [source].  Here we write out and the [source] a
-- 
cgit v0.12


From 4da0e252257e24143039784510363d066545be27 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Tue, 21 Apr 2020 15:56:34 +0000
Subject: remove merge litter

---
 tests/util.test | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/util.test b/tests/util.test
index a483de1..85c06dd 100644
--- a/tests/util.test
+++ b/tests/util.test
@@ -15,7 +15,6 @@ if {[lsearch [namespace children] ::tcltest] == -1} {
 testConstraint testdstring [llength [info commands testdstring]]
 testConstraint testconcatobj [llength [info commands testconcatobj]]
 testConstraint testdoubledigits [llength [info commands testdoubledigits]]
-testConstraint compat85 [expr {[format %c 0x010000] == "\uFFFD"}]
 
 # Big test for correct ordering of data in [expr]
 
-- 
cgit v0.12


From ce76e24a88d8c6c8abfd5da63402691c072e697b Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Tue, 21 Apr 2020 19:45:34 +0000
Subject: Improve the "testutfnext" command. It can now accept both bytes and
 strings, and it will test whether src[-1] is read without needing
 test-variations for it.

---
 generic/tclTest.c |  48 +++++++------
 tests/utf.test    | 208 +++++++++++++++++++++++++++---------------------------
 2 files changed, 131 insertions(+), 125 deletions(-)

diff --git a/generic/tclTest.c b/generic/tclTest.c
index b9fd204..7a531b4 100644
--- a/generic/tclTest.c
+++ b/generic/tclTest.c
@@ -7113,7 +7113,7 @@ SimpleListVolumes(void)
 /*
  * Used to check operations of Tcl_UtfNext.
  *
- * Usage: testutfnext $bytes $offset
+ * Usage: testutfnext -bytestring $bytes
  */
 
 static int
@@ -7123,37 +7123,43 @@ TestUtfNextCmd(
     int objc,
     Tcl_Obj *const objv[])
 {
-    int numBytes, offset = 0;
+    int numBytes;
     char *bytes;
-    const char *result;
-    Tcl_Obj *copy;
+    const char *result, *first;
+    char buffer[32];
+    static const char tobetested[] = "\xFF\xFE\xF4\xF2\xF0\xEF\xE8\xE3\xE2\xE1\xE0\xC2\xC1\xC0\x82";
+    const char *p = tobetested;
+
+    if (objc != 3 || strcmp(Tcl_GetString(objv[1]), "-bytestring")) {
+	if (objc != 2) {
+	    Tcl_WrongNumArgs(interp, 1, objv, "?-bytestring? bytes");
+	    return TCL_ERROR;
+	}
+	bytes = Tcl_GetStringFromObj(objv[1], &numBytes);
+    } else {
+	bytes = (char *) Tcl_GetByteArrayFromObj(objv[2], &numBytes);
+    }
 
-    if (objc < 2 || objc > 3) {
-	Tcl_WrongNumArgs(interp, 1, objv, "bytes ?offset?");
+    if (numBytes > sizeof(buffer)-2) {
+	Tcl_AppendResult(interp, "\"testutfnext\" can only handle 30 bytes", NULL);
 	return TCL_ERROR;
     }
 
-    bytes = (char *) Tcl_GetByteArrayFromObj(objv[1], &numBytes);
+    memcpy(buffer + 1, bytes, numBytes);
+    buffer[0] = buffer[numBytes + 1] = '\x00';
 
-    if (objc == 3) {
-	if (TCL_OK != TclGetIntForIndex(interp, objv[2], numBytes, &offset)) {
+    first = Tcl_UtfNext(buffer + 1);
+    while ((buffer[0] = *p++) != '\0') {
+	/* Run Tcl_UtfNext with many more possible bytes at src[-1], all should give the same result */
+	result = Tcl_UtfNext(buffer + 1);
+	if (first != result) {
+	    Tcl_AppendResult(interp, "Tcl_UtfNext is not supposed to read src[-1]", NULL);
 	    return TCL_ERROR;
 	}
-	if (offset < 0) {
-	    offset = 0;
-	}
-	if (offset > numBytes) {
-	    offset = numBytes;
-	}
     }
-    copy = Tcl_DuplicateObj(objv[1]);
-    bytes = (char *) Tcl_SetByteArrayLength(copy, numBytes+1);
-    bytes[numBytes] = '\0';
 
-    result = Tcl_UtfNext(bytes + offset);
-    Tcl_SetObjResult(interp, Tcl_NewIntObj(result - bytes));
+    Tcl_SetObjResult(interp, Tcl_NewIntObj(result - buffer - 1));
 
-    Tcl_DecrRefCount(copy);
     return TCL_OK;
 }
 /*
diff --git a/tests/utf.test b/tests/utf.test
index 1c79f32..0a81ae3 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -143,7 +143,7 @@ test utf-5.2 {Tcl_UtfFindLast} {testfindlast testbytestring} {
 test utf-6.1 {Tcl_UtfNext} testutfnext {
     # This takes the pointer one past the terminating NUL.
     # This is really an invalid call.
-    testutfnext {}
+    testutfnext -bytestring {}
 } 1
 test utf-6.2 {Tcl_UtfNext} testutfnext {
     testutfnext A
@@ -152,301 +152,301 @@ test utf-6.3 {Tcl_UtfNext} testutfnext {
     testutfnext AA
 } 1
 test utf-6.4 {Tcl_UtfNext} testutfnext {
-    testutfnext A\xA0
+    testutfnext -bytestring A\xA0
 } 1
 test utf-6.5 {Tcl_UtfNext} testutfnext {
-    testutfnext A\xD0
+    testutfnext -bytestring A\xD0
 } 1
 test utf-6.6 {Tcl_UtfNext} testutfnext {
-    testutfnext A\xE8
+    testutfnext -bytestring A\xE8
 } 1
 test utf-6.7 {Tcl_UtfNext} testutfnext {
-    testutfnext A\xF2
+    testutfnext -bytestring A\xF2
 } 1
 test utf-6.8 {Tcl_UtfNext} testutfnext {
-    testutfnext A\xF8
+    testutfnext -bytestring A\xF8
 } 1
 test utf-6.9 {Tcl_UtfNext} testutfnext {
-    testutfnext \xA0
+    testutfnext -bytestring \xA0
 } 1
 test utf-6.10 {Tcl_UtfNext} testutfnext {
-    testutfnext \xA0G
+    testutfnext -bytestring \xA0G
 } 1
 test utf-6.11 {Tcl_UtfNext} testutfnext {
-    testutfnext \xA0\xA0
+    testutfnext -bytestring \xA0\xA0
 } 1
 test utf-6.12 {Tcl_UtfNext} testutfnext {
-    testutfnext \xA0\xD0
+    testutfnext -bytestring \xA0\xD0
 } 1
 test utf-6.13 {Tcl_UtfNext} testutfnext {
-    testutfnext \xA0\xE8
+    testutfnext -bytestring \xA0\xE8
 } 1
 test utf-6.14 {Tcl_UtfNext} testutfnext {
-    testutfnext \xA0\xF2
+    testutfnext -bytestring \xA0\xF2
 } 1
 test utf-6.15 {Tcl_UtfNext} testutfnext {
-    testutfnext \xA0\xF8
+    testutfnext -bytestring \xA0\xF8
 } 1
 test utf-6.16 {Tcl_UtfNext} testutfnext {
-    testutfnext \xD0
+    testutfnext -bytestring \xD0
 } 1
 test utf-6.17 {Tcl_UtfNext} testutfnext {
-    testutfnext \xD0G
+    testutfnext -bytestring \xD0G
 } 1
 test utf-6.18 {Tcl_UtfNext} testutfnext {
-    testutfnext \xD0\xA0
+    testutfnext -bytestring \xD0\xA0
 } 2
 test utf-6.19 {Tcl_UtfNext} testutfnext {
-    testutfnext \xD0\xD0
+    testutfnext -bytestring \xD0\xD0
 } 1
 test utf-6.20 {Tcl_UtfNext} testutfnext {
-    testutfnext \xD0\xE8
+    testutfnext -bytestring \xD0\xE8
 } 1
 test utf-6.21 {Tcl_UtfNext} testutfnext {
-    testutfnext \xD0\xF2
+    testutfnext -bytestring \xD0\xF2
 } 1
 test utf-6.22 {Tcl_UtfNext} testutfnext {
-    testutfnext \xD0\xF8
+    testutfnext -bytestring \xD0\xF8
 } 1
 test utf-6.23 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8
+    testutfnext -bytestring \xE8
 } 1
 test utf-6.24 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8G
+    testutfnext -bytestring \xE8G
 } 1
 test utf-6.25 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8\xA0
+    testutfnext -bytestring \xE8\xA0
 } 1
 test utf-6.26 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8\xD0
+    testutfnext -bytestring \xE8\xD0
 } 1
 test utf-6.27 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8\xE8
+    testutfnext -bytestring \xE8\xE8
 } 1
 test utf-6.28 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8\xF2
+    testutfnext -bytestring \xE8\xF2
 } 1
 test utf-6.29 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8\xF8
+    testutfnext -bytestring \xE8\xF8
 } 1
 test utf-6.30 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF2
+    testutfnext -bytestring \xF2
 } 1
 test utf-6.31 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF2G
+    testutfnext -bytestring \xF2G
 } 1
 test utf-6.32 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF2\xA0
+    testutfnext -bytestring \xF2\xA0
 } 1
 test utf-6.33 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF2\xD0
+    testutfnext -bytestring \xF2\xD0
 } 1
 test utf-6.34 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF2\xE8
+    testutfnext -bytestring \xF2\xE8
 } 1
 test utf-6.35 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF2\xF2
+    testutfnext -bytestring \xF2\xF2
 } 1
 test utf-6.36 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF2\xF8
+    testutfnext -bytestring \xF2\xF8
 } 1
 test utf-6.37 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF8
+    testutfnext -bytestring \xF8
 } 1
 test utf-6.38 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF8G
+    testutfnext -bytestring \xF8G
 } 1
 test utf-6.39 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF8\xA0
+    testutfnext -bytestring \xF8\xA0
 } 1
 test utf-6.40 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF8\xD0
+    testutfnext -bytestring \xF8\xD0
 } 1
 test utf-6.41 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF8\xE8
+    testutfnext -bytestring \xF8\xE8
 } 1
 test utf-6.42 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF8\xF2
+    testutfnext -bytestring \xF8\xF2
 } 1
 test utf-6.43 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF8\xF8
+    testutfnext -bytestring \xF8\xF8
 } 1
 test utf-6.44 {Tcl_UtfNext} testutfnext {
-    testutfnext \xD0\xA0G
+    testutfnext -bytestring \xD0\xA0G
 } 2
 test utf-6.45 {Tcl_UtfNext} testutfnext {
-    testutfnext \xD0\xA0\xA0
+    testutfnext -bytestring \xD0\xA0\xA0
 } 2
 test utf-6.46 {Tcl_UtfNext} testutfnext {
-    testutfnext \xD0\xA0\xD0
+    testutfnext -bytestring \xD0\xA0\xD0
 } 2
 test utf-6.47 {Tcl_UtfNext} testutfnext {
-    testutfnext \xD0\xA0\xE8
+    testutfnext -bytestring \xD0\xA0\xE8
 } 2
 test utf-6.48 {Tcl_UtfNext} testutfnext {
-    testutfnext \xD0\xA0\xF2
+    testutfnext -bytestring \xD0\xA0\xF2
 } 2
 test utf-6.49 {Tcl_UtfNext} testutfnext {
-    testutfnext \xD0\xA0\xF8
+    testutfnext -bytestring \xD0\xA0\xF8
 } 2
 test utf-6.50 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8\xA0G
+    testutfnext -bytestring \xE8\xA0G
 } 1
 test utf-6.51 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8\xA0\xA0
+    testutfnext -bytestring \xE8\xA0\xA0
 } 3
 test utf-6.52 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8\xA0\xD0
+    testutfnext -bytestring \xE8\xA0\xD0
 } 1
 test utf-6.53 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8\xA0\xE8
+    testutfnext -bytestring \xE8\xA0\xE8
 } 1
 test utf-6.54 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8\xA0\xF2
+    testutfnext -bytestring \xE8\xA0\xF2
 } 1
 test utf-6.55 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8\xA0\xF8
+    testutfnext -bytestring \xE8\xA0\xF8
 } 1
 test utf-6.56 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF2\xA0G
+    testutfnext -bytestring \xF2\xA0G
 } 1
 test utf-6.57 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF2\xA0\xA0
+    testutfnext -bytestring \xF2\xA0\xA0
 } 1
 test utf-6.58 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF2\xA0\xD0
+    testutfnext -bytestring \xF2\xA0\xD0
 } 1
 test utf-6.59 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF2\xA0\xE8
+    testutfnext -bytestring \xF2\xA0\xE8
 } 1
 test utf-6.60 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF2\xA0\xF2
+    testutfnext -bytestring \xF2\xA0\xF2
 } 1
 test utf-6.61 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF2\xA0\xF8
+    testutfnext -bytestring \xF2\xA0\xF8
 } 1
 test utf-6.62 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8\xA0\xA0G
+    testutfnext -bytestring \xE8\xA0\xA0G
 } 3
 test utf-6.63 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8\xA0\xA0\xA0
+    testutfnext -bytestring \xE8\xA0\xA0\xA0
 } 3
 test utf-6.64 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8\xA0\xA0\xD0
+    testutfnext -bytestring \xE8\xA0\xA0\xD0
 } 3
 test utf-6.65 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8\xA0\xA0\xE8
+    testutfnext -bytestring \xE8\xA0\xA0\xE8
 } 3
 test utf-6.66 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8\xA0\xA0\xF2
+    testutfnext -bytestring \xE8\xA0\xA0\xF2
 } 3
 test utf-6.67 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8\xA0\xA0\xF8
+    testutfnext -bytestring \xE8\xA0\xA0\xF8
 } 3
 test utf-6.68 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF2\xA0\xA0G
+    testutfnext -bytestring \xF2\xA0\xA0G
 } 1
 test utf-6.69.0 {Tcl_UtfNext} {testutfnext compat85} {
-    testutfnext \xF2\xA0\xA0\xA0
+    testutfnext -bytestring \xF2\xA0\xA0\xA0
 } 1
 test utf-6.69.1 {Tcl_UtfNext} {testutfnext fullutf} {
-    testutfnext \xF2\xA0\xA0\xA0
+    testutfnext -bytestring \xF2\xA0\xA0\xA0
 } 4
 test utf-6.70 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF2\xA0\xA0\xD0
+    testutfnext -bytestring \xF2\xA0\xA0\xD0
 } 1
 test utf-6.71 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF2\xA0\xA0\xE8
+    testutfnext -bytestring \xF2\xA0\xA0\xE8
 } 1
 test utf-6.71 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF2\xA0\xA0\xF2
+    testutfnext -bytestring \xF2\xA0\xA0\xF2
 } 1
 test utf-6.73 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF2\xA0\xA0\xF8
+    testutfnext -bytestring \xF2\xA0\xA0\xF8
 } 1
 test utf-6.74.0 {Tcl_UtfNext} {testutfnext compat85} {
-    testutfnext \xF2\xA0\xA0\xA0G
+    testutfnext -bytestring \xF2\xA0\xA0\xA0G
 } 1
 test utf-6.74.1 {Tcl_UtfNext} {testutfnext fullutf} {
-    testutfnext \xF2\xA0\xA0\xA0G
+    testutfnext -bytestring \xF2\xA0\xA0\xA0G
 } 4
 test utf-6.75.0 {Tcl_UtfNext} {testutfnext compat85} {
-    testutfnext \xF2\xA0\xA0\xA0\xA0
+    testutfnext -bytestring \xF2\xA0\xA0\xA0\xA0
 } 1
 test utf-6.75.1 {Tcl_UtfNext} {testutfnext fullutf} {
-    testutfnext \xF2\xA0\xA0\xA0\xA0
+    testutfnext -bytestring \xF2\xA0\xA0\xA0\xA0
 } 4
 test utf-6.76.0 {Tcl_UtfNext} {testutfnext compat85} {
-    testutfnext \xF2\xA0\xA0\xA0\xD0
+    testutfnext -bytestring \xF2\xA0\xA0\xA0\xD0
 } 1
 test utf-6.76.1 {Tcl_UtfNext} {testutfnext fullutf} {
-    testutfnext \xF2\xA0\xA0\xA0\xD0
+    testutfnext -bytestring \xF2\xA0\xA0\xA0\xD0
 } 4
 test utf-6.77.0 {Tcl_UtfNext} {testutfnext compat85} {
-    testutfnext \xF2\xA0\xA0\xA0\xE8
+    testutfnext -bytestring \xF2\xA0\xA0\xA0\xE8
 } 1
 test utf-6.77.1 {Tcl_UtfNext} {testutfnext fullutf} {
-    testutfnext \xF2\xA0\xA0\xA0\xE8
+    testutfnext -bytestring \xF2\xA0\xA0\xA0\xE8
 } 4
 test utf-6.78.0 {Tcl_UtfNext} {testutfnext compat85} {
-    testutfnext \xF2\xA0\xA0\xA0\xF2
+    testutfnext -bytestring \xF2\xA0\xA0\xA0\xF2
 } 1
 test utf-6.78.1 {Tcl_UtfNext} {testutfnext fullutf} {
-    testutfnext \xF2\xA0\xA0\xA0\xF2
+    testutfnext -bytestring \xF2\xA0\xA0\xA0\xF2
 } 4
 test utf-6.79.0 {Tcl_UtfNext} {testutfnext compat85} {
-    testutfnext \xF2\xA0\xA0\xA0G\xF8
+    testutfnext -bytestring \xF2\xA0\xA0\xA0G\xF8
 } 1
 test utf-6.79.1 {Tcl_UtfNext} {testutfnext fullutf} {
-    testutfnext \xF2\xA0\xA0\xA0G\xF8
+    testutfnext -bytestring \xF2\xA0\xA0\xA0G\xF8
 } 4
 test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext {
-    testutfnext \xC0\x80
+    testutfnext -bytestring \xC0\x80
 } 2
 test utf-6.81 {Tcl_UtfNext - overlong sequences} testutfnext {
-    testutfnext \xC0\x81
+    testutfnext -bytestring \xC0\x81
 } 1
 test utf-6.82 {Tcl_UtfNext - overlong sequences} testutfnext {
-    testutfnext \xC1\x80
+    testutfnext -bytestring \xC1\x80
 } 1
 test utf-6.83 {Tcl_UtfNext - overlong sequences} testutfnext {
-    testutfnext \xC2\x80
+    testutfnext -bytestring \xC2\x80
 } 2
 test utf-6.84 {Tcl_UtfNext - overlong sequences} testutfnext {
-    testutfnext \xE0\x80\x80
+    testutfnext -bytestring \xE0\x80\x80
 } 1
 test utf-6.85 {Tcl_UtfNext - overlong sequences} testutfnext {
-    testutfnext \xE0\xA0\x80
+    testutfnext -bytestring \xE0\xA0\x80
 } 3
 test utf-6.86 {Tcl_UtfNext - overlong sequences} testutfnext {
-    testutfnext \xF0\x80\x80\x80
+    testutfnext -bytestring \xF0\x80\x80\x80
 } 1
 test utf-6.87.0 {Tcl_UtfNext - overlong sequences} {testutfnext compat85} {
-    testutfnext \xF0\x90\x80\x80
+    testutfnext -bytestring \xF0\x90\x80\x80
 } 1
 test utf-6.87.1 {Tcl_UtfNext - overlong sequences} {testutfnext fullutf} {
-    testutfnext \xF0\x90\x80\x80
+    testutfnext -bytestring \xF0\x90\x80\x80
 } 4
 test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} {
-    testutfnext \xA0\xA0
+    testutfnext -bytestring \xA0\xA0
 } 1
-test utf-6.88.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} {
-    testutfnext \xE8\xA0\xA0 1
-} 2
 test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} {
-    testutfnext \x80\x80
+    testutfnext -bytestring \x80\x80
 } 1
-test utf-6.89.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} {
-    testutfnext \xF0\x80\x80 1
-} 2
 test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext compat85} {
-    testutfnext \xF4\x8F\xBF\xBF
+    testutfnext -bytestring \xF4\x8F\xBF\xBF
 } 1
 test utf-6.90.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext fullutf} {
-    testutfnext \xF4\x8F\xBF\xBF
+    testutfnext -bytestring \xF4\x8F\xBF\xBF
 } 4
 test utf-6.91 {Tcl_UtfNext, validity check [493dccc2de]} testutfnext {
-    testutfnext \xF4\x90\x80\x80
+    testutfnext -bytestring \xF4\x90\x80\x80
+} 1
+test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} testutfnext {
+    testutfnext -bytestring \xA0\xA0\xA0
+} 1
+test utf-6.93 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} testutfnext {
+    testutfnext -bytestring \x80\x80\x80
 } 1
 
 test utf-7.1 {Tcl_UtfPrev} testutfprev {
-- 
cgit v0.12


From 070fa22acbfab9614a41de82797ab8709a43ae36 Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Wed, 22 Apr 2020 10:02:06 +0000
Subject: Determine "testConstraint ucs2" without the need for a testcommand.
 Rename "compat85" testConstraint to "ucs2", because that's what it actually
 is.

---
 tests/chanio.test   |  3 +--
 tests/encoding.test |  3 +--
 tests/io.test       |  3 +--
 tests/source.test   |  3 +--
 tests/utf.test      | 52 ++++++++++++++++++++++++++--------------------------
 5 files changed, 30 insertions(+), 34 deletions(-)

diff --git a/tests/chanio.test b/tests/chanio.test
index c2f561b..db4544c 100644
--- a/tests/chanio.test
+++ b/tests/chanio.test
@@ -29,8 +29,7 @@ namespace eval ::tcl::test::io {
     variable msg
     variable expected
 
-    testConstraint ucs2	[expr {	[llength [info commands testsize]] &&
-				([testsize unichar] == 2) }]
+    testConstraint ucs2 [expr {[format %c 0x010000] eq "\uFFFD"}]
     testConstraint testchannel      [llength [info commands testchannel]]
     testConstraint exec             [llength [info commands exec]]
     testConstraint openpipe         1
diff --git a/tests/encoding.test b/tests/encoding.test
index ad55e26..af325c1 100644
--- a/tests/encoding.test
+++ b/tests/encoding.test
@@ -32,8 +32,7 @@ proc runtests {} {
 testConstraint testencoding [llength [info commands testencoding]]
 testConstraint exec [llength [info commands exec]]
 
-testConstraint ucs2 [expr { [llength [info commands testsize]] &&
-                                ([testsize unichar] == 2) }]
+testConstraint ucs2 [expr {[format %c 0x010000] eq "\uFFFD"}]
 
 
 # TclInitEncodingSubsystem is tested by the rest of this file
diff --git a/tests/io.test b/tests/io.test
index 1c18576..06be982 100644
--- a/tests/io.test
+++ b/tests/io.test
@@ -29,8 +29,7 @@ namespace eval ::tcl::test::io {
     variable msg
     variable expected
 
-testConstraint ucs2 [expr { [llength [info commands testsize]] &&
-                            ([testsize unichar] == 2) }]
+testConstraint ucs2 [expr {[format %c 0x010000] eq "\uFFFD"}]
 
 testConstraint testchannel      [llength [info commands testchannel]]
 testConstraint exec             [llength [info commands exec]]
diff --git a/tests/source.test b/tests/source.test
index 8511004..877921e 100644
--- a/tests/source.test
+++ b/tests/source.test
@@ -20,8 +20,7 @@ if {[catch {package require tcltest 2.1}]} {
 namespace eval ::tcl::test::source {
     namespace import ::tcltest::*
 
-testConstraint ucs2 [expr { [llength [info commands testsize]] &&
-                                ([testsize unichar] == 2) }]
+testConstraint ucs2 [expr {[format %c 0x010000] eq "\uFFFD"}]
 
 test source-1.1 {source command} -setup {
     set x "old x value"
diff --git a/tests/utf.test b/tests/utf.test
index 0a81ae3..fd4e396 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -13,7 +13,7 @@ if {[lsearch [namespace children] ::tcltest] == -1} {
     namespace import -force ::tcltest::*
 }
 
-testConstraint compat85 [expr {[format %c 0x010000] eq "\uFFFD"}]
+testConstraint ucs2 [expr {[format %c 0x010000] eq "\uFFFD"}]
 testConstraint fullutf  [expr {[format %c 0x010000] ne "\uFFFD"}]
 
 testConstraint testbytestring [llength [info commands testbytestring]]
@@ -66,13 +66,13 @@ test utf-2.6 {Tcl_UtfToUniChar: lead (3-byte) followed by 1 trail} testbytestrin
 test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} testbytestring {
     string length [testbytestring "\xE4\xB9\x8E"]
 } {1}
-test utf-2.8.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring compat85} -body {
+test utf-2.8.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring ucs2} -body {
     string length [testbytestring "\xF0\x90\x80\x80"]
 } -result {4}
 test utf-2.8.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring fullutf} -body {
     string length [testbytestring "\xF0\x90\x80\x80"]
 } -result {1}
-test utf-2.9.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring compat85} -body {
+test utf-2.9.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring ucs2} -body {
     string length [testbytestring "\xF4\x8F\xBF\xBF"]
 } -result {4}
 test utf-2.9.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring fullutf} -body {
@@ -126,7 +126,7 @@ test utf-4.10 {Tcl_NumUtfChars: #u0000, calc len, overcomplete} {testnumutfchars
 test utf-4.11 {Tcl_NumUtfChars: 3 bytes of 4-byte UTF-8 characater} {testnumutfchars testbytestring} {
     testnumutfchars [testbytestring \xF0\x9F\x92\xA9] 3
 } {3}
-test utf-4.12.0 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring compat85} {
+test utf-4.12.0 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring ucs2} {
     testnumutfchars [testbytestring \xF0\x9F\x92\xA9] 4
 } {4}
 test utf-4.12.1 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring fullutf} {
@@ -346,7 +346,7 @@ test utf-6.67 {Tcl_UtfNext} testutfnext {
 test utf-6.68 {Tcl_UtfNext} testutfnext {
     testutfnext -bytestring \xF2\xA0\xA0G
 } 1
-test utf-6.69.0 {Tcl_UtfNext} {testutfnext compat85} {
+test utf-6.69.0 {Tcl_UtfNext} {testutfnext ucs2} {
     testutfnext -bytestring \xF2\xA0\xA0\xA0
 } 1
 test utf-6.69.1 {Tcl_UtfNext} {testutfnext fullutf} {
@@ -364,37 +364,37 @@ test utf-6.71 {Tcl_UtfNext} testutfnext {
 test utf-6.73 {Tcl_UtfNext} testutfnext {
     testutfnext -bytestring \xF2\xA0\xA0\xF8
 } 1
-test utf-6.74.0 {Tcl_UtfNext} {testutfnext compat85} {
+test utf-6.74.0 {Tcl_UtfNext} {testutfnext ucs2} {
     testutfnext -bytestring \xF2\xA0\xA0\xA0G
 } 1
 test utf-6.74.1 {Tcl_UtfNext} {testutfnext fullutf} {
     testutfnext -bytestring \xF2\xA0\xA0\xA0G
 } 4
-test utf-6.75.0 {Tcl_UtfNext} {testutfnext compat85} {
+test utf-6.75.0 {Tcl_UtfNext} {testutfnext ucs2} {
     testutfnext -bytestring \xF2\xA0\xA0\xA0\xA0
 } 1
 test utf-6.75.1 {Tcl_UtfNext} {testutfnext fullutf} {
     testutfnext -bytestring \xF2\xA0\xA0\xA0\xA0
 } 4
-test utf-6.76.0 {Tcl_UtfNext} {testutfnext compat85} {
+test utf-6.76.0 {Tcl_UtfNext} {testutfnext ucs2} {
     testutfnext -bytestring \xF2\xA0\xA0\xA0\xD0
 } 1
 test utf-6.76.1 {Tcl_UtfNext} {testutfnext fullutf} {
     testutfnext -bytestring \xF2\xA0\xA0\xA0\xD0
 } 4
-test utf-6.77.0 {Tcl_UtfNext} {testutfnext compat85} {
+test utf-6.77.0 {Tcl_UtfNext} {testutfnext ucs2} {
     testutfnext -bytestring \xF2\xA0\xA0\xA0\xE8
 } 1
 test utf-6.77.1 {Tcl_UtfNext} {testutfnext fullutf} {
     testutfnext -bytestring \xF2\xA0\xA0\xA0\xE8
 } 4
-test utf-6.78.0 {Tcl_UtfNext} {testutfnext compat85} {
+test utf-6.78.0 {Tcl_UtfNext} {testutfnext ucs2} {
     testutfnext -bytestring \xF2\xA0\xA0\xA0\xF2
 } 1
 test utf-6.78.1 {Tcl_UtfNext} {testutfnext fullutf} {
     testutfnext -bytestring \xF2\xA0\xA0\xA0\xF2
 } 4
-test utf-6.79.0 {Tcl_UtfNext} {testutfnext compat85} {
+test utf-6.79.0 {Tcl_UtfNext} {testutfnext ucs2} {
     testutfnext -bytestring \xF2\xA0\xA0\xA0G\xF8
 } 1
 test utf-6.79.1 {Tcl_UtfNext} {testutfnext fullutf} {
@@ -421,7 +421,7 @@ test utf-6.85 {Tcl_UtfNext - overlong sequences} testutfnext {
 test utf-6.86 {Tcl_UtfNext - overlong sequences} testutfnext {
     testutfnext -bytestring \xF0\x80\x80\x80
 } 1
-test utf-6.87.0 {Tcl_UtfNext - overlong sequences} {testutfnext compat85} {
+test utf-6.87.0 {Tcl_UtfNext - overlong sequences} {testutfnext ucs2} {
     testutfnext -bytestring \xF0\x90\x80\x80
 } 1
 test utf-6.87.1 {Tcl_UtfNext - overlong sequences} {testutfnext fullutf} {
@@ -433,7 +433,7 @@ test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {test
 test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} {
     testutfnext -bytestring \x80\x80
 } 1
-test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext compat85} {
+test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext ucs2} {
     testutfnext -bytestring \xF4\x8F\xBF\xBF
 } 1
 test utf-6.90.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext fullutf} {
@@ -512,19 +512,19 @@ test utf-7.9.1 {Tcl_UtfPrev} testutfprev {
 test utf-7.9.2 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xF8\xA0\xF8\xA0 3
 } 2
-test utf-7.10.0 {Tcl_UtfPrev} {testutfprev compat85} {
+test utf-7.10.0 {Tcl_UtfPrev} {testutfprev ucs2} {
     testutfprev A\xF2\xA0
 } 2
 test utf-7.10.1 {Tcl_UtfPrev} {testutfprev fullutf} {
     testutfprev A\xF2\xA0
 } 1
-test utf-7.10.1.0 {Tcl_UtfPrev} {testutfprev compat85} {
+test utf-7.10.1.0 {Tcl_UtfPrev} {testutfprev ucs2} {
     testutfprev A\xF2\xA0\xA0\xA0 3
 } 2
 test utf-7.10.1.1 {Tcl_UtfPrev} {testutfprev fullutf} {
     testutfprev A\xF2\xA0\xA0\xA0 3
 } 1
-test utf-7.10.2.0 {Tcl_UtfPrev} {testutfprev compat85} {
+test utf-7.10.2.0 {Tcl_UtfPrev} {testutfprev ucs2} {
     testutfprev A\xF2\xA0\xF8\xA0 3
 } 2
 test utf-7.10.2.1 {Tcl_UtfPrev} {testutfprev fullutf} {
@@ -569,19 +569,19 @@ test utf-7.14.1 {Tcl_UtfPrev} testutfprev {
 test utf-7.14.2 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xF8\xA0\xA0\xF8 4
 } 3
-test utf-7.15.0 {Tcl_UtfPrev} {testutfprev compat85} {
+test utf-7.15.0 {Tcl_UtfPrev} {testutfprev ucs2} {
     testutfprev A\xF2\xA0\xA0
 } 3
 test utf-7.15.1 {Tcl_UtfPrev} {testutfprev fullutf} {
     testutfprev A\xF2\xA0\xA0
 } 1
-test utf-7.15.1.0 {Tcl_UtfPrev} {testutfprev compat85} {
+test utf-7.15.1.0 {Tcl_UtfPrev} {testutfprev ucs2} {
     testutfprev A\xF2\xA0\xA0\xA0 4
 } 3
 test utf-7.15.1.1 {Tcl_UtfPrev} {testutfprev fullutf} {
     testutfprev A\xF2\xA0\xA0\xA0 4
 } 1
-test utf-7.15.2.0 {Tcl_UtfPrev} {testutfprev compat85} {
+test utf-7.15.2.0 {Tcl_UtfPrev} {testutfprev ucs2} {
     testutfprev A\xF2\xA0\xA0\xF8 4
 } 3
 test utf-7.15.2.1 {Tcl_UtfPrev} {testutfprev fullutf} {
@@ -617,7 +617,7 @@ test utf-7.18.2 {Tcl_UtfPrev} testutfprev {
 test utf-7.19 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xF8\xA0\xA0\xA0
 } 4
-test utf-7.20.0 {Tcl_UtfPrev} {testutfprev compat85} {
+test utf-7.20.0 {Tcl_UtfPrev} {testutfprev ucs2} {
     testutfprev A\xF2\xA0\xA0\xA0
 } 4
 test utf-7.20.1 {Tcl_UtfPrev} {testutfprev fullutf} {
@@ -683,19 +683,19 @@ test utf-7.37 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
 test utf-7.38 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
     testutfprev A\xE0\xA0\x80 2
 } 1
-test utf-7.39.0 {Tcl_UtfPrev -- overlong sequence}  {testutfprev compat85} {
+test utf-7.39.0 {Tcl_UtfPrev -- overlong sequence}  {testutfprev ucs2} {
     testutfprev A\xF0\x90\x80\x80
 } 4
 test utf-7.39.1 {Tcl_UtfPrev -- overlong sequence}  {testutfprev fullutf} {
     testutfprev A\xF0\x90\x80\x80
 } 1
-test utf-7.40.0 {Tcl_UtfPrev -- overlong sequence}  {testutfprev compat85} {
+test utf-7.40.0 {Tcl_UtfPrev -- overlong sequence}  {testutfprev ucs2} {
     testutfprev A\xF0\x90\x80\x80 4
 } 3
 test utf-7.40.1 {Tcl_UtfPrev -- overlong sequence}  {testutfprev fullutf} {
     testutfprev A\xF0\x90\x80\x80 4
 } 1
-test utf-7.41.0 {Tcl_UtfPrev -- overlong sequence}  {testutfprev compat85} {
+test utf-7.41.0 {Tcl_UtfPrev -- overlong sequence}  {testutfprev ucs2} {
     testutfprev A\xF0\x90\x80\x80 3
 } 2
 test utf-7.41.1 {Tcl_UtfPrev -- overlong sequence}  {testutfprev fullutf} {
@@ -725,19 +725,19 @@ test utf-7.47.1 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {te
 test utf-7.47.2 {Tcl_UtfPrev, pointing to 3th byte of 3-byte invalid sequence} {testutfprev} {
     testutfprev \xE8\xA0\x00 2
 } 0
-test utf-7.48.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev compat85} {
+test utf-7.48.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev ucs2} {
     testutfprev A\xF4\x8F\xBF\xBF
 } 4
 test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} {
     testutfprev A\xF4\x8F\xBF\xBF
 } 1
-test utf-7.48.1.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev compat85} {
+test utf-7.48.1.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev ucs2} {
     testutfprev A\xF4\x8F\xBF\xBF 4
 } 3
 test utf-7.48.1.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} {
     testutfprev A\xF4\x8F\xBF\xBF 4
 } 1
-test utf-7.48.2.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev compat85} {
+test utf-7.48.2.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev ucs2} {
     testutfprev A\xF4\x8F\xBF\xBF 3
 } 2
 test utf-7.48.2.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} {
-- 
cgit v0.12


From 05fe66d4ee8a8ae3847782fba4717855d1961ceb Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Wed, 22 Apr 2020 22:13:53 +0000
Subject: (cherry-pick): Update documentation of Tcl_UtfPrev/Tcl_UtfNext back
 to how it was. Will be updated later, when implementation is ready and agreed
 upon.

---
 doc/Utf.3        | 21 +++++------------
 generic/tclUtf.c | 70 +++++++++++++++++++-------------------------------------
 2 files changed, 30 insertions(+), 61 deletions(-)

diff --git a/doc/Utf.3 b/doc/Utf.3
index 55ef80d..e9bfaa7 100644
--- a/doc/Utf.3
+++ b/doc/Utf.3
@@ -141,7 +141,7 @@ source buffer is long enough such that this routine does not run off the
 end and dereference non-existent or random memory; if the source buffer
 is known to be null-terminated, this will not happen.  If the input is
 not in proper UTF-8 format, \fBTcl_UtfToUniChar\fR will store the first
-byte of \fIsrc\fR in \fI*chPtr\fR as a Tcl_UniChar between 0x0000 and
+byte of \fIsrc\fR in \fI*chPtr\fR as a Tcl_UniChar between 0x0080 and
 0x00FF and return 1.
 .PP
 \fBTcl_UniCharToUtfDString\fR converts the given Unicode string
@@ -217,20 +217,11 @@ returns a pointer to the last occurrence of the Tcl_UniChar \fIch\fR
 in the null-terminated UTF-8 string \fIsrc\fR.  The null terminator is
 considered part of the UTF-8 string.
 .PP
-\fBTcl_UtfNext\fR is used to step forward through a UTF-8 string.
-If the UTF-8 string is made up entirely of complete, well-formed, and
-valid character byte sequences, and \fIsrc\fR points to the lead byte
-of one of those sequences, then repeated calls of \fBTcl_UtfNext\fR will
-return pointers to the lead bytes of each character in the string, one
-character at a time. In any other circumstance, \fBTcl_UtfNext\fR
-returns \fIsrc\fR+1.  \fBTcl_UtfNext\fR will always read \fIsrc[0]\fR
-and may read as many following bytes (up to a total of \fBTCL_UTF_MAX\fR)
-as needed to find the end of the byte sequence. If the string is
-\fBNUL\fR-terminated, \fBTcl_UtfNext\fR will not read beyond the terminating
-\fBNUL\fR byte. If not, the caller must use the companion routine
-\fBTcl_UtfCharComplete\fR to determine whether there is any risk
-\fBTcl_UtfNext\fR might read beyond the readable memory occupied
-by the string.
+Given \fIsrc\fR, a pointer to some location in a UTF-8 string,
+\fBTcl_UtfNext\fR returns a pointer to the next UTF-8 character in the
+string.  The caller must not ask for the next character after the last
+character in the string if the string is not terminated by a null
+character.
 .PP
 \fBTcl_UtfPrev\fR is used to step backward through but not beyond the
 UTF-8 string that begins at \fIstart\fR.  If the UTF-8 string is made
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index a5e4fd4..e7048ee 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -717,35 +717,13 @@ Tcl_UtfFindLast(
  *
  * Tcl_UtfNext --
  *
- *	The aim of this routine is to provide a way to iterate forward
- *	through a UTF-8 string. The caller is expected to pass a non-NULL
- *	pointer argument /src/ which points to a location within a string.
- *	(*src) will be read, so /src/ must not point to an unreadable
- *	location past the end of the string. If /src/ points to the
- *	beginning of a complete, well-formed and valid UTF_8 byte sequence
- *	of no more than TCL_UTF_MAX bytes, Tcl_UtfNext returns the pointer
- *	just past the end of that sequence. In any other circumstance,
- *	Tcl_UtfNext returns /src/+1.
- *
- *	Because this routine always returns a value > /src/, it is useful
- *	as a forward iterator that will always make progress. If the string
- *	is NUL-terminated, Tcl_UtfNext will not read beyond the terminating
- *	NUL character. If it is not NUL-terminated, the caller must make
- *	use of the companion routine Tcl_UtfCharComplete to test whether
- *	there is risk that Tcl_UtfNext will read beyond the end of the string.
- *	Tcl_UtfNext will never read more than TCL_UTF_MAX bytes.
- *
- *	In a string where all characters are complete and properly formed,
- *	and /src/ points to the first byte of a character, repeated
- *	Tcl_UtfNext calls will step to the starting bytes of characters, one
- *	character at a time. Within those limitations, Tcl_UtfPrev and
- *	Tcl_UtfNext are inverses. If either condition cannot be met,
- *	Tcl_UtfPrev and Tcl_UtfNext may not function as inverses and the
- *	caller will have to take greater care.
+ *	Given a pointer to some current location in a UTF-8 string, move
+ *	forward one character. The caller must ensure that they are not asking
+ *	for the next character after the last character in the string.
  *
  * Results:
- *	A pointer to the start of the next character in the string (or to
- *	the end of the string) as described above.
+ *	The return value is the pointer to the next character in the UTF-8
+ *	string.
  *
  * Side effects:
  *	None.
@@ -786,37 +764,37 @@ Tcl_UtfNext(
  *
  *	The aim of this routine is to provide a way to move backward
  *	through a UTF-8 string. The caller is expected to pass non-NULL
- *	pointer arguments /start/ and /src/. /start/ points to the beginning
- *	of a string, and /src/ (>= /start/) points to a location within (or
- *	just past the end) of the string. This routine always returns a
- *	pointer within the string (>= /start/).  When (/src/ == /start/),
- *	it returns /start/. When (/src/ > /start/), it returns a pointer
- *	(< /src/) and (>= /src/ - TCL_UTF_MAX).  Subject to these constraints,
- *	the routine returns a pointer to the earliest byte in the string that
- *	starts a character when characters are read starting at /start/ and
+ *	pointer arguments start and src. start points to the beginning
+ *	of a string, and src >= start points to a location within (or just
+ *	past the end) of the string. This routine always returns a
+ *	pointer within the string (>= start).  When (src == start), it
+ *	returns start. When (src > start), it returns a pointer (< src)
+ *	and (>= src - TCL_UTF_MAX).  Subject to these constraints, the
+ *	routine returns a pointer to the earliest byte in the string that
+ *	starts a character when characters are read starting at start and
  *	that character might include the byte src[-1]. The routine will
  *	examine only those bytes in the range that might be returned.
- *	It will not examine the byte (*src), and because of that cannot
+ *	It will not examine the byte *src, and because of that cannot
  *	determine for certain in all circumstances whether the character
  *	that begins with the returned pointer will or will not include
- *	the byte src[-1]. In the scenario where /src/ points to the end of
- *	a buffer being filled, the returned pointer points to either the
+ *	the byte src[-1]. In the scenario, where src points to the end of
+ *	a buffer being filled, the returned pointer point to either the
  *	final complete character in the string or to the earliest byte
  *	that might start an incomplete character waiting for more bytes to
  *	complete.
  *
- *	Because this routine always returns a value < /src/ until the point
- *	it is forced to return /start/, it is useful as a backward iterator
+ *	Because this routine always returns a value < src until the point
+ *	it is forced to return start, it is useful as a backward iterator
  *	through a string that will always make progress and always be
  *	prevented from running past the beginning of the string.
  *
  *	In a string where all characters are complete and properly formed,
- *	and /src/ points to the first byte of a character, repeated
- *	Tcl_UtfPrev calls will step to the starting bytes of characters, one
- *	character at a time. Within those limitations, Tcl_UtfPrev and
- *	Tcl_UtfNext are inverses. If either condition cannot be met,
- *	Tcl_UtfPrev and Tcl_UtfNext may not function as inverses and the
- *	caller will have to take greater care.
+ *	and the value of src points to the first byte of a character,
+ *	repeated Tcl_UtfPrev calls will step to the starting bytes of
+ *	characters, one character at a time. Within those limitations,
+ *	Tcl_UtfPrev and Tcl_UtfNext are inverses. If either condition cannot
+ *	be met, Tcl_UtfPrev and Tcl_UtfNext may not function as inverses and
+ *	the caller will have to take greater care.
  *
  * Results:
  *	A pointer to the start of a character in the string as described
-- 
cgit v0.12


From d016b9e8e12b19aeb60ac6654100f0a3cfdb4b05 Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Thu, 23 Apr 2020 12:19:42 +0000
Subject: Testcase cleanup

---
 tests/utf.test | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/utf.test b/tests/utf.test
index 3af70c4..cb650f4 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -16,9 +16,9 @@ if {[lsearch [namespace children] ::tcltest] == -1} {
 ::tcltest::loadTestedCommands
 catch [list package require -exact Tcltest [info patchlevel]]
 
-testConstraint ucs2 [expr {[format %c 0x010000] == "\uFFFD"}]
-testConstraint fullutf [expr {[format %c 0x010000] != "\uFFFD"}]
-testConstraint tip389 [expr {[string length \U010000] == 2}]
+testConstraint ucs2 [expr {[format %c 0x010000] eq "\uFFFD"}]
+testConstraint fullutf [expr {[format %c 0x010000] ne "\uFFFD"}]
+testConstraint tip389 [expr {[string length \U010000] eq 2}]
 
 testConstraint testbytestring [llength [info commands testbytestring]]
 testConstraint testfindfirst [llength [info commands testfindfirst]]
@@ -377,7 +377,7 @@ test utf-6.70 {Tcl_UtfNext} testutfnext {
 test utf-6.71 {Tcl_UtfNext} testutfnext {
     testutfnext -bytestring \xF2\xA0\xA0\xE8
 } 1
-test utf-6.71 {Tcl_UtfNext} testutfnext {
+test utf-6.72 {Tcl_UtfNext} testutfnext {
     testutfnext -bytestring \xF2\xA0\xA0\xF2
 } 1
 test utf-6.73 {Tcl_UtfNext} testutfnext {
@@ -1064,7 +1064,7 @@ test utf-20.1 {TclUniCharNcmp} {
 test utf-21.1 {TclUniCharIsAlnum} {
     # this returns 1 with Unicode 7 compliance
     string is alnum \u1040\u021F\u0220
-} {1}
+} 1
 test utf-21.2 {unicode alnum char in regc_locale.c} {
     # this returns 1 with Unicode 7 compliance
     list [regexp {^[[:alnum:]]+$} \u1040\u021F\u0220] [regexp {^\w+$} \u1040\u021F\u0220_\u203F\u2040\u2054\uFE33\uFE34\uFE4D\uFE4E\uFE4F\uFF3F]
-- 
cgit v0.12


From 2ca7ab9af0d59c9907dde3d844e1785d33df4812 Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Thu, 23 Apr 2020 13:53:54 +0000
Subject: Fix TclUtfNext() macro. Use it in tclTest.c, so such a mistake can be
 detected next time.

---
 generic/tclInt.h  | 2 +-
 generic/tclTest.c | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/generic/tclInt.h b/generic/tclInt.h
index 11c9ec8..780ea30 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -4472,7 +4472,7 @@ MODULE_SCOPE void	TclDbInitNewObj(Tcl_Obj *objPtr, const char *file,
 	Tcl_UtfPrev(src, start))
 
 #define TclUtfNext(src)	\
-	((((unsigned char) *(src)) < 0xC0) ? src + 1 : Tcl_UtfNext(src))
+	((((unsigned char) *(src)) < 0x80) ? src + 1 : Tcl_UtfNext(src))
 
 /*
  *----------------------------------------------------------------
diff --git a/generic/tclTest.c b/generic/tclTest.c
index 539d188..856e9ea 100644
--- a/generic/tclTest.c
+++ b/generic/tclTest.c
@@ -6739,10 +6739,10 @@ TestUtfNextCmd(
     memcpy(buffer + 1, bytes, numBytes);
     buffer[0] = buffer[numBytes + 1] = '\x00';
 
-    first = Tcl_UtfNext(buffer + 1);
+    first = TclUtfNext(buffer + 1);
     while ((buffer[0] = *p++) != '\0') {
 	/* Run Tcl_UtfNext with many more possible bytes at src[-1], all should give the same result */
-	result = Tcl_UtfNext(buffer + 1);
+	result = TclUtfNext(buffer + 1);
 	if (first != result) {
 	    Tcl_AppendResult(interp, "Tcl_UtfNext is not supposed to read src[-1]", NULL);
 	    return TCL_ERROR;
@@ -6795,7 +6795,7 @@ TestUtfPrevCmd(
     bytes = (char *) Tcl_SetByteArrayLength(copy, numBytes+1);
     bytes[numBytes] = '\0';
 
-    result = Tcl_UtfPrev(bytes + offset, bytes);
+    result = TclUtfPrev(bytes + offset, bytes);
     Tcl_SetObjResult(interp, Tcl_NewIntObj(result - bytes));
 
     Tcl_DecrRefCount(copy);
-- 
cgit v0.12