From 4b0626ddd7e1f7450781deb2508d94a98c8db93d Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Fri, 17 Apr 2020 16:56:51 +0000
Subject: Bring back the test utf-2.11; it fails in a TCL_UTF_MAX=4 build.

---
 tests/utf.test | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tests/utf.test b/tests/utf.test
index 7b7b5c2..a22dafe 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -13,6 +13,8 @@ if {[lsearch [namespace children] ::tcltest] == -1} {
     namespace import -force ::tcltest::*
 }
 
+testConstraint testbytestring [llength [info commands testbytestring]]
+
 catch {unset x}
 
 test utf-1.1 {Tcl_UniCharToUtf: 1 byte sequences} {
@@ -59,6 +61,12 @@ test utf-2.8 {Tcl_UtfToUniChar: longer UTF sequences not supported} {
     string length [bytestring "\xF4\xA2\xA2\xA2"]
 } {4}
 
+test utf-2.11 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, invalid} testbytestring {
+    # Would decode to U+110000 but that is outside the Unicode range.
+    string length [testbytestring "\xF4\x90\x80\x80"]
+} {4}
+
+
 test utf-3.1 {Tcl_UtfCharComplete} {
 } {}
 
-- 
cgit v0.12


From 1e5043ff451573bf735f6aec84208af7f0c24cc2 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Fri, 17 Apr 2020 20:15:07 +0000
Subject: Backport a collection of tests for consistency between branches.

---
 tests/utf.test | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/tests/utf.test b/tests/utf.test
index a22dafe..ff4f4a9 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -57,15 +57,22 @@ test utf-2.6 {Tcl_UtfToUniChar: lead (3-byte) followed by 1 trail} {
 test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} {
     string length [bytestring "\xE4\xb9\x8e"]
 } {1}
-test utf-2.8 {Tcl_UtfToUniChar: longer UTF sequences not supported} {
-    string length [bytestring "\xF4\xA2\xA2\xA2"]
+test utf-2.8 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring} -body {
+    string length [testbytestring "\xF0\x90\x80\x80"]
+} -result {4}
+test utf-2.9 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring} -body {
+    string length [testbytestring "\xF4\x8F\xBF\xBF"]
+} -result {4}
+test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} testbytestring {
+    string length [testbytestring "\xF0\x8F\xBF\xBF"]
 } {4}
-
-test utf-2.11 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, invalid} testbytestring {
+test utf-2.11 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, overflow} testbytestring {
     # Would decode to U+110000 but that is outside the Unicode range.
     string length [testbytestring "\xF4\x90\x80\x80"]
 } {4}
-
+test utf-2.12 {Tcl_UtfToUniChar: longer UTF sequences not supported} testbytestring {
+    string length [testbytestring "\xF8\xA2\xA2\xA2\xA2"]
+} {5}
 
 test utf-3.1 {Tcl_UtfCharComplete} {
 } {}
-- 
cgit v0.12


From 7067acad796b0536c589101c3f61fbae9fd268aa Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Fri, 17 Apr 2020 20:23:49 +0000
Subject: Corrections for many tests, changing lead byte \xF4 to \xF2. The
 tested sequences were always intended to be valid 4-byte sequences. Also a
 few errors with greedy \xHHHH .

---
 tests/utf.test | 92 +++++++++++++++++++++++++++++-----------------------------
 1 file changed, 46 insertions(+), 46 deletions(-)

diff --git a/tests/utf.test b/tests/utf.test
index ff4f4a9..7953a68 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -149,7 +149,7 @@ test utf-6.6 {Tcl_UtfNext} testutfnext {
     testutfnext A\xE8
 } 1
 test utf-6.7 {Tcl_UtfNext} testutfnext {
-    testutfnext A\xF4
+    testutfnext A\xF2
 } 1
 test utf-6.8 {Tcl_UtfNext} testutfnext {
     testutfnext A\xF8
@@ -170,7 +170,7 @@ test utf-6.13 {Tcl_UtfNext} testutfnext {
     testutfnext \xA0\xE8
 } 1
 test utf-6.14 {Tcl_UtfNext} testutfnext {
-    testutfnext \xA0\xF4
+    testutfnext \xA0\xF2
 } 1
 test utf-6.15 {Tcl_UtfNext} testutfnext {
     testutfnext \xA0\xF8
@@ -179,7 +179,7 @@ test utf-6.16 {Tcl_UtfNext} testutfnext {
     testutfnext \xD0
 } 1
 test utf-6.17 {Tcl_UtfNext} testutfnext {
-    testutfnext \xD0A
+    testutfnext \xD0G
 } 1
 test utf-6.18 {Tcl_UtfNext} testutfnext {
     testutfnext \xD0\xA0
@@ -191,7 +191,7 @@ test utf-6.20 {Tcl_UtfNext} testutfnext {
     testutfnext \xD0\xE8
 } 1
 test utf-6.21 {Tcl_UtfNext} testutfnext {
-    testutfnext \xD0\xF4
+    testutfnext \xD0\xF2
 } 1
 test utf-6.22 {Tcl_UtfNext} testutfnext {
     testutfnext \xD0\xF8
@@ -200,7 +200,7 @@ test utf-6.23 {Tcl_UtfNext} testutfnext {
     testutfnext \xE8
 } 1
 test utf-6.24 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8A
+    testutfnext \xE8G
 } 1
 test utf-6.25 {Tcl_UtfNext} testutfnext {
     testutfnext \xE8\xA0
@@ -212,37 +212,37 @@ test utf-6.27 {Tcl_UtfNext} testutfnext {
     testutfnext \xE8\xE8
 } 1
 test utf-6.28 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8\xF4
+    testutfnext \xE8\xF2
 } 1
 test utf-6.29 {Tcl_UtfNext} testutfnext {
     testutfnext \xE8\xF8
 } 1
 test utf-6.30 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4
+    testutfnext \xF2
 } 1
 test utf-6.31 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4A
+    testutfnext \xF2G
 } 1
 test utf-6.32 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0
+    testutfnext \xF2\xA0
 } 1
 test utf-6.33 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xD0
+    testutfnext \xF2\xD0
 } 1
 test utf-6.34 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xE8
+    testutfnext \xF2\xE8
 } 1
 test utf-6.35 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xF4
+    testutfnext \xF2\xF2
 } 1
 test utf-6.36 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xF8
+    testutfnext \xF2\xF8
 } 1
 test utf-6.37 {Tcl_UtfNext} testutfnext {
     testutfnext \xF8
 } 1
 test utf-6.38 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF8A
+    testutfnext \xF8G
 } 1
 test utf-6.39 {Tcl_UtfNext} testutfnext {
     testutfnext \xF8\xA0
@@ -254,7 +254,7 @@ test utf-6.41 {Tcl_UtfNext} testutfnext {
     testutfnext \xF8\xE8
 } 1
 test utf-6.42 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF8\xF4
+    testutfnext \xF8\xF2
 } 1
 test utf-6.43 {Tcl_UtfNext} testutfnext {
     testutfnext \xF8\xF8
@@ -272,7 +272,7 @@ test utf-6.47 {Tcl_UtfNext} testutfnext {
     testutfnext \xD0\xA0\xE8
 } 2
 test utf-6.48 {Tcl_UtfNext} testutfnext {
-    testutfnext \xD0\xA0\xF4
+    testutfnext \xD0\xA0\xF2
 } 2
 test utf-6.49 {Tcl_UtfNext} testutfnext {
     testutfnext \xD0\xA0\xF8
@@ -290,28 +290,28 @@ test utf-6.53 {Tcl_UtfNext} testutfnext {
     testutfnext \xE8\xA0\xE8
 } 1
 test utf-6.54 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8\xA0\xF4
+    testutfnext \xE8\xA0\xF2
 } 1
 test utf-6.55 {Tcl_UtfNext} testutfnext {
     testutfnext \xE8\xA0\xF8
 } 1
 test utf-6.56 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0G
+    testutfnext \xF2\xA0G
 } 1
 test utf-6.57 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0\xA0
+    testutfnext \xF2\xA0\xA0
 } 1
 test utf-6.58 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0\xD0
+    testutfnext \xF2\xA0\xD0
 } 1
 test utf-6.59 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0\xE8
+    testutfnext \xF2\xA0\xE8
 } 1
 test utf-6.60 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0\xF4
+    testutfnext \xF2\xA0\xF2
 } 1
 test utf-6.61 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0\xF8
+    testutfnext \xF2\xA0\xF8
 } 1
 test utf-6.62 {Tcl_UtfNext} testutfnext {
     testutfnext \xE8\xA0\xA0G
@@ -326,46 +326,46 @@ test utf-6.65 {Tcl_UtfNext} testutfnext {
     testutfnext \xE8\xA0\xA0\xE8
 } 3
 test utf-6.66 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8\xA0\xA0\xF4
+    testutfnext \xE8\xA0\xA0\xF2
 } 3
 test utf-6.67 {Tcl_UtfNext} testutfnext {
     testutfnext \xE8\xA0\xA0\xF8
 } 3
 test utf-6.68 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0\xA0G
+    testutfnext \xF2\xA0\xA0G
 } 1
 test utf-6.69 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0\xA0\xA0
+    testutfnext \xF2\xA0\xA0\xA0
 } 1
 test utf-6.70 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0\xA0\xD0
+    testutfnext \xF2\xA0\xA0\xD0
 } 1
 test utf-6.71 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0\xA0\xE8
+    testutfnext \xF2\xA0\xA0\xE8
 } 1
 test utf-6.71 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0\xA0\xF4
+    testutfnext \xF2\xA0\xA0\xF2
 } 1
 test utf-6.73 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0\xA0\xF8
+    testutfnext \xF2\xA0\xA0\xF8
 } 1
 test utf-6.74 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0\xA0\xA0G
+    testutfnext \xF2\xA0\xA0\xA0G
 } 1
 test utf-6.75 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0\xA0\xA0\xA0
+    testutfnext \xF2\xA0\xA0\xA0\xA0
 } 1
 test utf-6.76 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0\xA0\xA0\xD0
+    testutfnext \xF2\xA0\xA0\xA0\xD0
 } 1
 test utf-6.77 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0\xA0\xA0\xE8
+    testutfnext \xF2\xA0\xA0\xA0\xE8
 } 1
 test utf-6.78 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0\xA0\xA0\xF4
+    testutfnext \xF2\xA0\xA0\xA0\xF2
 } 1
 test utf-6.79 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF4\xA0\xA0\xA0G\xF8
+    testutfnext \xF2\xA0\xA0\xA0G\xF8
 } 1
 test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext {
     testutfnext \xC0\x80
@@ -425,13 +425,13 @@ test utf-7.4.2 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xF8\xF8\xA0\xA0 2
 } 1
 test utf-7.5 {Tcl_UtfPrev} testutfprev {
-    testutfprev A\xF4
+    testutfprev A\xF2
 } 1
 test utf-7.5.1 {Tcl_UtfPrev} testutfprev {
-    testutfprev A\xF4\xA0\xA0\xA0 2
+    testutfprev A\xF2\xA0\xA0\xA0 2
 } 1
 test utf-7.5.2 {Tcl_UtfPrev} testutfprev {
-    testutfprev A\xF4\xF8\xA0\xA0 2
+    testutfprev A\xF2\xF8\xA0\xA0 2
 } 1
 test utf-7.6 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xE8
@@ -470,13 +470,13 @@ test utf-7.9.2 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xF8\xA0\xF8\xA0 3
 } 2
 test utf-7.10 {Tcl_UtfPrev} testutfprev {
-    testutfprev A\xF4\xA0
+    testutfprev A\xF2\xA0
 } 2
 test utf-7.10.1 {Tcl_UtfPrev} testutfprev {
-    testutfprev A\xF4\xA0\xA0\xA0 3
+    testutfprev A\xF2\xA0\xA0\xA0 3
 } 2
 test utf-7.10.2 {Tcl_UtfPrev} testutfprev {
-    testutfprev A\xF4\xA0\xF8\xA0 3
+    testutfprev A\xF2\xA0\xF8\xA0 3
 } 2
 test utf-7.11 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xE8\xA0
@@ -518,13 +518,13 @@ test utf-7.14.2 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xF8\xA0\xA0\xF8 4
 } 3
 test utf-7.15 {Tcl_UtfPrev} testutfprev {
-    testutfprev A\xF4\xA0\xA0
+    testutfprev A\xF2\xA0\xA0
 } 3
 test utf-7.15.1 {Tcl_UtfPrev} testutfprev {
-    testutfprev A\xF4\xA0\xA0\xA0 4
+    testutfprev A\xF2\xA0\xA0\xA0 4
 } 3
 test utf-7.15.2 {Tcl_UtfPrev} testutfprev {
-    testutfprev A\xF4\xA0\xA0\xF8 4
+    testutfprev A\xF2\xA0\xA0\xF8 4
 } 3
 test utf-7.16 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xE8\xA0\xA0
@@ -557,7 +557,7 @@ test utf-7.19 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xF8\xA0\xA0\xA0
 } 4
 test utf-7.20 {Tcl_UtfPrev} testutfprev {
-    testutfprev A\xF4\xA0\xA0\xA0
+    testutfprev A\xF2\xA0\xA0\xA0
 } 4
 test utf-7.21 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xE8\xA0\xA0\xA0
-- 
cgit v0.12


From e19b1f1306ccd36f01270f8594f9315dbdf39846 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Fri, 17 Apr 2020 20:38:38 +0000
Subject: [493dccc2de] Coverage that Tcl_UtfPrev also checks the upper range
 validity.

---
 tests/utf.test | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/tests/utf.test b/tests/utf.test
index 7953a68..6d87928 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -652,6 +652,30 @@ test utf-7.47.1 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {te
 test utf-7.47.2 {Tcl_UtfPrev, pointing to 3th byte of 3-byte invalid sequence} {testutfprev} {
     testutfprev \xE8\xA0\x00 2
 } 0
+test utf-7.48 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
+    testutfprev A\xF4\x8F\xBF\xBF
+} 4
+test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
+    testutfprev A\xF4\x8F\xBF\xBF 4
+} 3
+test utf-7.48.2 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
+    testutfprev A\xF4\x8F\xBF\xBF 3
+} 2
+test utf-7.48.3 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
+    testutfprev A\xF4\x8F\xBF\xBF 2
+} 1
+test utf-7.49 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
+    testutfprev A\xF4\x90\x80\x80
+} 4
+test utf-7.49.1 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
+    testutfprev A\xF4\x90\x80\x80 4
+} 3
+test utf-7.49.2 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
+    testutfprev A\xF4\x90\x80\x80 3
+} 2
+test utf-7.49.3 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
+    testutfprev A\xF4\x90\x80\x80 2
+} 1
 
 test utf-8.1 {Tcl_UniCharAtIndex: index = 0} {
     string index abcd 0
-- 
cgit v0.12


From 4520aa1ca30a7b09dc9cfc4bc9007aa262793711 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Fri, 17 Apr 2020 21:03:30 +0000
Subject: More tests explicitly for Tcl_UtfNext near validity boundary U+110000

---
 tests/utf.test | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tests/utf.test b/tests/utf.test
index 6d87928..01e0bb2 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -403,6 +403,12 @@ test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {te
 test utf-6.89.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} {
     testutfnext \xF0\x80\x80 1
 } 2
+test utf-6.90 {Tcl_UtfNext, validity check [493dccc2de]} testutfnext {
+    testutfnext \xF4\x8F\xBF\xBF
+} 1
+test utf-6.91 {Tcl_UtfNext, validity check [493dccc2de]} testutfnext {
+    testutfnext \xF4\x90\x80\x80
+} 1
 
 testConstraint testutfprev [llength [info commands testutfprev]]
 
-- 
cgit v0.12


From aa9bb7f9e401573bc8c79e8336fdb74636b2702f Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Fri, 17 Apr 2020 21:07:09 +0000
Subject: [493dccc2de] Revise sequence validity check to reject out of range
 decodes too.

---
 generic/tclUtf.c | 53 +++++++++++++++++++++++++++--------------------------
 1 file changed, 27 insertions(+), 26 deletions(-)

diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index b5c430b..1883804 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -81,7 +81,7 @@ static CONST unsigned char totalBytes[256] = {
  */
 
 static int		UtfCount(int ch);
-static int		Overlong(unsigned char *src);
+static int		Invalid(unsigned char *src);
 
 /*
  *---------------------------------------------------------------------------
@@ -120,51 +120,52 @@ UtfCount(
 /*
  *---------------------------------------------------------------------------
  *
- * Overlong --
+ * Invalid --
  *
  *	Utility routine to report whether /src/ points to the start of an
- *	overlong byte sequence that should be rejected. Caller guarantees
- *	that src[0] and src[1] are readable, and
+ *	invald byte sequence that should be rejected. This might be because
+ *	it is an overlong encoding, or because it encodes something out of
+ *	the proper range. Caller guarantees that src[0] and src[1] are
+ *	readable, and
  *
  *	(src[0] >= 0xC0) && (src[0] != 0xC1)
  * 	(src[1] >= 0x80) && (src[1] < 0xC0)
- *	(src[0] < ((TCL_UTF_MAX > 3) ? 0xF8 : 0xF0))
+ *	(src[0] < ((TCL_UTF_MAX > 3) ? 0xF5 : 0xF0))
  *
  * Results:
  *	A boolean.
  *---------------------------------------------------------------------------
  */
 
-static CONST unsigned char overlong[3] = {
-    0x80,	/* \xD0 -- all sequences valid */
-    0xA0,	/* \xE0\x80 through \xE0\x9F are invalid prefixes */
+static CONST unsigned char bounds[28] = {
+    0x80, 0x80,		/* \xC0 accepts \x80 only */
+    0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF,
+    0x80, 0xBF,		/* (\xC4 - \xDC) -- all sequences valid */
+    0xA0, 0xBF,	/* \xE0\x80 through \xE0\x9F are invalid prefixes */
+    0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, /* (\xE4 - \xEC) -- all valid */
 #if TCL_UTF_MAX > 3
-    0x90	/* \xF0\x80 through \xF0\x8F are invalid prefixes */
+    0x90, 0xBF,	/* \xF0\x80 through \xF0\x8F are invalid prefixes */
+    0x80, 0x8F  /* \xF4\x90 and higher are invalid prefixes */
 #else
-    0xC0	/* Not used, but reject all again for safety. */
+    0xC0, 0xBF,	/* Not used, but reject all again for safety. */
+    0xC0, 0xBF	/* Not used, but reject all again for safety. */
 #endif
 };
 
 INLINE static int
-Overlong(
+Invalid(
     unsigned char *src)	/* Points to lead byte of a UTF-8 byte sequence */
 {
     unsigned char byte = *src;
+    int index;
 
-    if (byte % 0x10) {
-	/* Only lead bytes 0xC0, 0xE0, 0xF0 need examination */
+    if (byte % 0x04) {
+	/* Only lead bytes 0xC0, 0xE0, 0xF0, 0xF4 need examination */
 	return 0;
     }
-    if (byte == 0xC0) {
-	if (src[1] == 0x80) {
-	    /* Valid sequence: \xC0\x80 for \u0000 */
-	    return 0;
-	}
-	/* Reject overlong: \xC0\x81 - \xC0\xBF */
-	return 1;
-    }
-    if (src[1] < overlong[(byte >> 4) - 0x0D]) {
-	/* Reject overlong */
+    index = (byte - 0xC0) >> 1;
+    if (src[1] < bounds[index] || src[1] > bounds[index+1]) {
+	/* Out of bounds - report invalid. */
 	return 1;
     }
     return 0;
@@ -733,7 +734,7 @@ Tcl_UtfNext(
 	}
 	next++;
     }
-    if (Overlong((unsigned char *)src)) {
+    if (Invalid((unsigned char *)src)) {
 	return src + 1;
     }
     return next;
@@ -843,10 +844,10 @@ Tcl_UtfPrev(
 
 	    /*
 	     * trailBytesSeen > 0, so we can examine look[1] safely.
-	     * Use that capability to screen out overlong sequences.
+	     * Use that capability to screen out invalid sequences.
 	     */
 
-	    if (Overlong(look)) {
+	    if (Invalid(look)) {
 		/* Reject */
 		return fallback;
 	    }
-- 
cgit v0.12


From ec1723eeadcf9efe52b0f81a65d683dff9b160c5 Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Sat, 18 Apr 2020 12:46:54 +0000
Subject: Update documentation of Tcl_UtfPrev/Tcl_UtfNext back to how it was.
 Will be updated later, when implementation is ready and agreed upon.

---
 doc/Utf.3        | 37 +++++++++++------------------
 generic/tclUtf.c | 72 ++++++++++++++++++++------------------------------------
 2 files changed, 39 insertions(+), 70 deletions(-)

diff --git a/doc/Utf.3 b/doc/Utf.3
index cb82699..334fa6f 100644
--- a/doc/Utf.3
+++ b/doc/Utf.3
@@ -3,7 +3,7 @@
 '\"
 '\" See the file "license.terms" for information on usage and redistribution
 '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES.
-'\" 
+'\"
 .TH Utf 3 "8.1" Tcl "Tcl Library Procedures"
 .so man.macros
 .BS
@@ -13,7 +13,7 @@ Tcl_UniChar, Tcl_UniCharToUtf, Tcl_UtfToUniChar, Tcl_UniCharToUtfDString, Tcl_Ut
 .nf
 \fB#include <tcl.h>\fR
 .sp
-typedef ... Tcl_UniChar;
+typedef ... \fBTcl_UniChar\fR;
 .sp
 int
 \fBTcl_UniCharToUtf\fR(\fIch, buf\fR)
@@ -48,7 +48,7 @@ int
 int
 \fBTcl_UtfCharComplete\fR(\fIsrc, length\fR)
 .sp
-int 
+int
 \fBTcl_NumUtfChars\fR(\fIsrc, length\fR)
 .sp
 const char *
@@ -109,7 +109,7 @@ Pointer to the beginning of a UTF-8 string.
 .AP int index in
 The index of a character (not byte) in the UTF-8 string.
 .AP int *readPtr out
-If non-NULL, filled with the number of bytes in the backslash sequence, 
+If non-NULL, filled with the number of bytes in the backslash sequence,
 including the backslash character.
 .AP char *dst out
 Buffer in which the bytes represented by the backslash sequence are stored.
@@ -141,8 +141,8 @@ source buffer is long enough such that this routine does not run off the
 end and dereference non-existent or random memory; if the source buffer
 is known to be null-terminated, this will not happen.  If the input is
 not in proper UTF-8 format, \fBTcl_UtfToUniChar\fR will store the first
-byte of \fIsrc\fR in \fI*chPtr\fR as a Tcl_UniChar between 0x0000 and
-0x00FF and return 1.  
+byte of \fIsrc\fR in \fI*chPtr\fR as a Tcl_UniChar between 0x0080 and
+0x00FF and return 1.
 .PP
 \fBTcl_UniCharToUtfDString\fR converts the given Unicode string
 to UTF-8, storing the result in a previously initialized \fBTcl_DString\fR.
@@ -210,27 +210,18 @@ length is negative, all bytes up to the first null byte are used.
 \fBTcl_UtfFindFirst\fR corresponds to \fBstrchr\fR for UTF-8 strings.  It
 returns a pointer to the first occurrence of the Tcl_UniChar \fIch\fR
 in the null-terminated UTF-8 string \fIsrc\fR.  The null terminator is
-considered part of the UTF-8 string.  
+considered part of the UTF-8 string.
 .PP
 \fBTcl_UtfFindLast\fR corresponds to \fBstrrchr\fR for UTF-8 strings.  It
 returns a pointer to the last occurrence of the Tcl_UniChar \fIch\fR
 in the null-terminated UTF-8 string \fIsrc\fR.  The null terminator is
-considered part of the UTF-8 string.  
+considered part of the UTF-8 string.
 .PP
-\fBTcl_UtfNext\fR is used to step forward through a UTF-8 string.
-If the UTF-8 string is made up entirely of complete, well-formed, and
-valid character byte sequences, and \fIsrc\fR points to the lead byte
-of one of those sequences, then repeated calls of \fBTcl_UtfNext\fR will
-return pointers to the lead bytes of each character in the string, one
-character at a time. In any other circumstance, \fBTcl_UtfNext\fR
-returns \fIsrc\fR+1.  \fBTcl_UtfNext\fR will always read \fIsrc[0]\fR
-and may read as many following bytes (up to a total of \fBTCL_UTF_MAX\fR)
-as needed to find the end of the byte sequence. If the string is
-\fBNUL\fR-terminated, \fBTcl_UtfNext\fR will not read beyond the terminating
-\fBNUL\fR byte. If not, the caller must use the companion routine
-\fBTcl_UtfCharComplete\fR to determine whether there is any risk
-\fBTcl_UtfNext\fR might read beyond the readable memory occupied
-by the string.
+Given \fIsrc\fR, a pointer to some location in a UTF-8 string,
+\fBTcl_UtfNext\fR returns a pointer to the next UTF-8 character in the
+string.  The caller must not ask for the next character after the last
+character in the string if the string is not terminated by a null
+character.
 .PP
 \fBTcl_UtfPrev\fR is used to step backward through but not beyond the
 UTF-8 string that begins at \fIstart\fR.  If the UTF-8 string is made
@@ -262,7 +253,7 @@ characters.  Behavior is undefined if a negative \fIindex\fR is given.
 .PP
 \fBTcl_UtfAtIndex\fR returns a pointer to the specified character (not
 byte) \fIindex\fR in the UTF-8 string \fIsrc\fR.  The source string must
-contain at least \fIindex\fR characters.  This is equivalent to calling 
+contain at least \fIindex\fR characters.  This is equivalent to calling
 \fBTcl_UtfNext\fR \fIindex\fR times.  If a negative \fIindex\fR is given,
 the return pointer points to the first character in the source string.
 .PP
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 1883804..64ee0a8 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -678,35 +678,13 @@ Tcl_UtfFindLast(
  *
  * Tcl_UtfNext --
  *
- *	The aim of this routine is to provide a way to iterate forward
- *	through a UTF-8 string. The caller is expected to pass a non-NULL
- *	pointer argument /src/ which points to a location within a string.
- *	(*src) will be read, so /src/ must not point to an unreadable
- *	location past the end of the string. If /src/ points to the
- *	beginning of a complete, well-formed and valid UTF_8 byte sequence
- *	of no more than TCL_UTF_MAX bytes, Tcl_UtfNext returns the pointer
- *	just past the end of that sequence. In any other circumstance,
- *	Tcl_UtfNext returns /src/+1.
- *
- *	Because this routine always returns a value > /src/, it is useful
- *	as a forward iterator that will always make progress. If the string
- *	is NUL-terminated, Tcl_UtfNext will not read beyond the terminating
- *	NUL character. If it is not NUL-terminated, the caller must make
- *	use of the companion routine Tcl_UtfCharComplete to test whether
- *	there is risk that Tcl_UtfNext will read beyond the end of the string.
- *	Tcl_UtfNext will never read more than TCL_UTF_MAX bytes.
- *
- *	In a string where all characters are complete and properly formed,
- *	and /src/ points to the first byte of a character, repeated
- *	Tcl_UtfNext calls will step to the starting bytes of characters, one
- *	character at a time. Within those limitations, Tcl_UtfPrev and
- *	Tcl_UtfNext are inverses. If either condition cannot be met,
- *	Tcl_UtfPrev and Tcl_UtfNext may not function as inverses and the
- *	caller will have to take greater care.
+ *	Given a pointer to some current location in a UTF-8 string, move
+ *	forward one character. The caller must ensure that they are not asking
+ *	for the next character after the last character in the string.
  *
  * Results:
- *	A pointer to the start of the next character in the string (or to
- *	the end of the string) as described above.
+ *	The return value is the pointer to the next character in the UTF-8
+ *	string.
  *
  * Side effects:
  *	None.
@@ -747,37 +725,37 @@ Tcl_UtfNext(
  *
  *	The aim of this routine is to provide a way to move backward
  *	through a UTF-8 string. The caller is expected to pass non-NULL
- *	pointer arguments /start/ and /src/. /start/ points to the beginning
- *	of a string, and /src/ (>= /start/) points to a location within (or
- *	just past the end) of the string. This routine always returns a
- *	pointer within the string (>= /start/).  When (/src/ == /start/),
- *	it returns /start/. When (/src/ > /start/), it returns a pointer
- *	(< /src/) and (>= /src/ - TCL_UTF_MAX).  Subject to these constraints,
- *	the routine returns a pointer to the earliest byte in the string that
- *	starts a character when characters are read starting at /start/ and
+ *	pointer arguments start and src. start points to the beginning
+ *	of a string, and src >= start points to a location within (or just
+ *	past the end) of the string. This routine always returns a
+ *	pointer within the string (>= start).  When (src == start), it
+ *	returns start. When (src > start), it returns a pointer (< src)
+ *	and (>= src - TCL_UTF_MAX).  Subject to these constraints, the
+ *	routine returns a pointer to the earliest byte in the string that
+ *	starts a character when characters are read starting at start and
  *	that character might include the byte src[-1]. The routine will
  *	examine only those bytes in the range that might be returned.
- *	It will not examine the byte (*src), and because of that cannot
+ *	It will not examine the byte *src, and because of that cannot
  *	determine for certain in all circumstances whether the character
  *	that begins with the returned pointer will or will not include
- *	the byte src[-1]. In the scenario where /src/ points to the end of
- *	a buffer being filled, the returned pointer points to either the
+ *	the byte src[-1]. In the scenario, where src points to the end of
+ *	a buffer being filled, the returned pointer point to either the
  *	final complete character in the string or to the earliest byte
  *	that might start an incomplete character waiting for more bytes to
  *	complete.
  *
- *	Because this routine always returns a value < /src/ until the point
- *	it is forced to return /start/, it is useful as a backward iterator
+ *	Because this routine always returns a value < src until the point
+ *	it is forced to return start, it is useful as a backward iterator
  *	through a string that will always make progress and always be
  *	prevented from running past the beginning of the string.
  *
  *	In a string where all characters are complete and properly formed,
- *	and /src/ points to the first byte of a character, repeated
- *	Tcl_UtfPrev calls will step to the starting bytes of characters, one
- *	character at a time. Within those limitations, Tcl_UtfPrev and
- *	Tcl_UtfNext are inverses. If either condition cannot be met,
- *	Tcl_UtfPrev and Tcl_UtfNext may not function as inverses and the
- *	caller will have to take greater care.
+ *	and the value of src points to the first byte of a character,
+ *	repeated Tcl_UtfPrev calls will step to the starting bytes of
+ *	characters, one character at a time. Within those limitations,
+ *	Tcl_UtfPrev and Tcl_UtfNext are inverses. If either condition cannot
+ *	be met, Tcl_UtfPrev and Tcl_UtfNext may not function as inverses and
+ *	the caller will have to take greater care.
  *
  * Results:
  *	A pointer to the start of a character in the string as described
@@ -887,7 +865,7 @@ Tcl_UtfPrev(
  *
  * Tcl_UniCharAtIndex --
  *
- *	Returns the Unicode character represented at the specified character
+ *	Returns the Tcl_UniChar represented at the specified character
  *	(not byte) position in the UTF-8 string.
  *
  * Results:
-- 
cgit v0.12


From 6f00fef31d332688308f392fd5df4cab98d05161 Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Sat, 18 Apr 2020 13:47:06 +0000
Subject: Fix [c574e50a3b30e76f]: CRASH: utf-2.[89] in 8.5 built with
 TCL_UTF_MAX=4

---
 generic/regcustom.h |   2 +-
 generic/tcl.h       |   2 +-
 generic/tclUtf.c    |  84 +----------------
 tests/utf.test      | 259 ++++++++++++++++++++++++++--------------------------
 tests/util.test     |   1 +
 5 files changed, 132 insertions(+), 216 deletions(-)

diff --git a/generic/regcustom.h b/generic/regcustom.h
index 57a2d47..ac33087 100644
--- a/generic/regcustom.h
+++ b/generic/regcustom.h
@@ -97,7 +97,7 @@ typedef int celt;		/* Type to hold chr, or NOCELT */
 #define	NOCELT (-1)		/* Celt value which is not valid chr */
 #define	CHR(c) (UCHAR(c))	/* Turn char literal into chr literal */
 #define	DIGITVAL(c) ((c)-'0')	/* Turn chr digit into its value */
-#if TCL_UTF_MAX > 4
+#if TCL_UTF_MAX > 3
 #define	CHRBITS	32		/* Bits in a chr; must not use sizeof */
 #define	CHR_MIN	0x00000000	/* Smallest and largest chr; the value */
 #define	CHR_MAX	0xffffffff	/* CHR_MAX-CHR_MIN+1 should fit in uchr */
diff --git a/generic/tcl.h b/generic/tcl.h
index 7378a8f..d7d064c 100644
--- a/generic/tcl.h
+++ b/generic/tcl.h
@@ -2148,7 +2148,7 @@ typedef struct Tcl_Parse {
  * reflected in regcustom.h.
  */
 
-#if TCL_UTF_MAX > 4
+#if TCL_UTF_MAX > 3
     /*
      * unsigned int isn't 100% accurate as it should be a strict 4-byte value
      * (perhaps wchar_t). 64-bit systems may have troubles. The size of this
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 64ee0a8..3741d70 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -209,30 +209,6 @@ Tcl_UniCharToUtf(
 	    return 2;
 	}
 	if (ch <= 0xFFFF) {
-#if TCL_UTF_MAX == 4
-	    if ((ch & 0xF800) == 0xD800) {
-		if (ch & 0x0400) {
-		    /* Low surrogate */
-		    if (((buf[0] & 0xF8) == 0xF0) && ((buf[1] & 0xC0) == 0x80)
-			    && ((buf[2] & 0xCF) == 0)) {
-			/* Previous Tcl_UniChar was a High surrogate, so combine */
-			buf[3] = (char) ((ch & 0x3F) | 0x80);
-			buf[2] |= (char) (((ch >> 6) & 0x0F) | 0x80);
-			return 4;
-		    }
-		    /* Previous Tcl_UniChar was not a High surrogate, so just output */
-		} else {
-		    /* High surrogate */
-		    ch += 0x40;
-		    /* Fill buffer with specific 3-byte (invalid) byte combination,
-		       so following Low surrogate can recognize it and combine */
-		    buf[2] = (char) ((ch << 4) & 0x30);
-		    buf[1] = (char) (((ch >> 2) & 0x3F) | 0x80);
-		    buf[0] = (char) (((ch >> 8) & 0x07) | 0xF0);
-		    return 0;
-		}
-	    }
-#endif
 	    goto three;
 	}
 
@@ -321,15 +297,6 @@ Tcl_UniCharToUtfDString(
  *	Tcl_UtfCharComplete() before calling this routine to ensure that
  *	enough bytes remain in the string.
  *
- *	If TCL_UTF_MAX == 4, special handling of Surrogate pairs is done:
- *	For any UTF-8 string containing a character outside of the BMP, the
- *	first call to this function will fill *chPtr with the high surrogate
- *	and generate a return value of 0. Calling Tcl_UtfToUniChar again
- *	will produce the low surrogate and a return value of 4. Because *chPtr
- *	is used to remember whether the high surrogate is already produced, it
- *	is recommended to initialize the variable it points to as 0 before
- *	the first call to Tcl_UtfToUniChar is done.
- *
  * Results:
  *	*chPtr is filled with the Tcl_UniChar, and the return value is the
  *	number of bytes from the UTF-8 string that were consumed.
@@ -402,34 +369,15 @@ Tcl_UtfToUniChar(
 	    /*
 	     * Four-byte-character lead byte followed by three trail bytes.
 	     */
-#if TCL_UTF_MAX == 4
-	    Tcl_UniChar surrogate;
-
-	    byte = (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12)
-		    | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F)) - 0x10000;
-	    surrogate = (Tcl_UniChar) (0xD800 + (byte >> 10));
-	    if (byte & 0x100000) {
-		/* out of range, < 0x10000 or > 0x10ffff */
-	    } else if (*chPtr != surrogate) {
-		/* produce high surrogate, but don't advance source pointer */
-		*chPtr = surrogate;
-		return 0;
-	    } else {
-		/* produce low surrogate, and advance source pointer */
-		*chPtr = (Tcl_UniChar) (0xDC00 | (byte & 0x3FF));
-		return 4;
-	    }
-#else
 	    *chPtr = (Tcl_UniChar) (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12)
 		    | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F));
 	    if ((unsigned)(*chPtr - 0x10000) <= 0xFFFFF) {
 		return 4;
 	    }
-#endif
 	}
 
 	/*
-	 * A four-byte-character lead-byte not followed by two trail-bytes
+	 * A four-byte-character lead-byte not followed by three trail-bytes
 	 * represents itself.
 	 */
     }
@@ -1230,16 +1178,6 @@ Tcl_UtfNcmp(
 	cs += TclUtfToUniChar(cs, &ch1);
 	ct += TclUtfToUniChar(ct, &ch2);
 	if (ch1 != ch2) {
-#if TCL_UTF_MAX == 4
-	    /* Surrogates always report higher than non-surrogates */
-	    if (((ch1 & 0xFC00) == 0xD800)) {
-	    if ((ch2 & 0xFC00) != 0xD800) {
-		return ch1;
-	    }
-	    } else if ((ch2 & 0xFC00) == 0xD800) {
-		return -ch2;
-	    }
-#endif
 	    return (ch1 - ch2);
 	}
     }
@@ -1280,16 +1218,6 @@ Tcl_UtfNcasecmp(
 	cs += TclUtfToUniChar(cs, &ch1);
 	ct += TclUtfToUniChar(ct, &ch2);
 	if (ch1 != ch2) {
-#if TCL_UTF_MAX == 4
-	    /* Surrogates always report higher than non-surrogates */
-	    if (((ch1 & 0xFC00) == 0xD800)) {
-	    if ((ch2 & 0xFC00) != 0xD800) {
-		return ch1;
-	    }
-	    } else if ((ch2 & 0xFC00) == 0xD800) {
-		return -ch2;
-	    }
-#endif
 	    ch1 = Tcl_UniCharToLower(ch1);
 	    ch2 = Tcl_UniCharToLower(ch2);
 	    if (ch1 != ch2) {
@@ -1329,16 +1257,6 @@ TclUtfCasecmp(
 	cs += TclUtfToUniChar(cs, &ch1);
 	ct += TclUtfToUniChar(ct, &ch2);
 	if (ch1 != ch2) {
-#if TCL_UTF_MAX == 4
-	    /* Surrogates always report higher than non-surrogates */
-	    if (((ch1 & 0xFC00) == 0xD800)) {
-	    if ((ch2 & 0xFC00) != 0xD800) {
-		return ch1;
-	    }
-	    } else if ((ch2 & 0xFC00) == 0xD800) {
-		return -ch2;
-	    }
-#endif
 	    ch1 = Tcl_UniCharToLower(ch1);
 	    ch2 = Tcl_UniCharToLower(ch2);
 	    if (ch1 != ch2) {
diff --git a/tests/utf.test b/tests/utf.test
index 01e0bb2..9a55729 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -13,54 +13,61 @@ if {[lsearch [namespace children] ::tcltest] == -1} {
     namespace import -force ::tcltest::*
 }
 
+testConstraint compat85 [expr {[format %c 0x010000] == "\uFFFD"}]
 testConstraint testbytestring [llength [info commands testbytestring]]
+testConstraint testfindfirst [llength [info commands testfindfirst]]
+testConstraint testfindlast [llength [info commands testfindlast]]
+testConstraint testnumutfchars [llength [info commands testnumutfchars]]
+testConstraint teststringobj [llength [info commands teststringobj]]
+testConstraint testutfnext [llength [info commands testutfnext]]
+testConstraint testutfprev [llength [info commands testutfprev]]
 
 catch {unset x}
 
-test utf-1.1 {Tcl_UniCharToUtf: 1 byte sequences} {
+test utf-1.1 {Tcl_UniCharToUtf: 1 byte sequences} testbytestring {
     set x \x01
-} [bytestring "\x01"]
-test utf-1.2 {Tcl_UniCharToUtf: 2 byte sequences} {
+} [testbytestring "\x01"]
+test utf-1.2 {Tcl_UniCharToUtf: 2 byte sequences} testbytestring {
     set x "\x00"
-} [bytestring "\xc0\x80"]
-test utf-1.3 {Tcl_UniCharToUtf: 2 byte sequences} {
+} [testbytestring "\xC0\x80"]
+test utf-1.3 {Tcl_UniCharToUtf: 2 byte sequences} testbytestring {
     set x "\xe0"
-} [bytestring "\xc3\xa0"]
-test utf-1.4 {Tcl_UniCharToUtf: 3 byte sequences} {
-    set x "\u4e4e"
-} [bytestring "\xe4\xb9\x8e"]
-test utf-1.5 {Tcl_UniCharToUtf: overflowed Tcl_UniChar} {
+} [testbytestring "\xC3\xA0"]
+test utf-1.4 {Tcl_UniCharToUtf: 3 byte sequences} testbytestring {
+    set x "\u4E4E"
+} [testbytestring "\xE4\xB9\x8E"]
+test utf-1.5 {Tcl_UniCharToUtf: overflowed Tcl_UniChar} testbytestring {
     format %c 0x110000
-} [bytestring "\xef\xbf\xbd"]
-test utf-1.6 {Tcl_UniCharToUtf: negative Tcl_UniChar} {
+} [testbytestring "\xEF\xBF\xBD"]
+test utf-1.6 {Tcl_UniCharToUtf: negative Tcl_UniChar} testbytestring {
     format %c -1
-} [bytestring "\xef\xbf\xbd"]
+} [testbytestring "\xEF\xBF\xBD"]
 
 test utf-2.1 {Tcl_UtfToUniChar: low ascii} {
     string length "abc"
 } {3}
-test utf-2.2 {Tcl_UtfToUniChar: naked trail bytes} {
-    string length [bytestring "\x82\x83\x84"]
+test utf-2.2 {Tcl_UtfToUniChar: naked trail bytes} testbytestring {
+    string length [testbytestring "\x82\x83\x84"]
 } {3}
-test utf-2.3 {Tcl_UtfToUniChar: lead (2-byte) followed by non-trail} {
-    string length [bytestring "\xC2"]
+test utf-2.3 {Tcl_UtfToUniChar: lead (2-byte) followed by non-trail} testbytestring {
+    string length [testbytestring "\xC2"]
 } {1}
-test utf-2.4 {Tcl_UtfToUniChar: lead (2-byte) followed by trail} {
-    string length [bytestring "\xC2\xa2"]
+test utf-2.4 {Tcl_UtfToUniChar: lead (2-byte) followed by trail} testbytestring {
+    string length [testbytestring "\xC2\xA2"]
 } {1}
-test utf-2.5 {Tcl_UtfToUniChar: lead (3-byte) followed by non-trail} {
-    string length [bytestring "\xE2"]
+test utf-2.5 {Tcl_UtfToUniChar: lead (3-byte) followed by non-trail} testbytestring {
+    string length [testbytestring "\xE2"]
 } {1}
-test utf-2.6 {Tcl_UtfToUniChar: lead (3-byte) followed by 1 trail} {
-    string length [bytestring "\xE2\xA2"]
+test utf-2.6 {Tcl_UtfToUniChar: lead (3-byte) followed by 1 trail} testbytestring {
+    string length [testbytestring "\xE2\xA2"]
 } {2}
-test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} {
-    string length [bytestring "\xE4\xb9\x8e"]
+test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} testbytestring {
+    string length [testbytestring "\xE4\xb9\x8E"]
 } {1}
-test utf-2.8 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring} -body {
+test utf-2.8 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring compat85} -body {
     string length [testbytestring "\xF0\x90\x80\x80"]
 } -result {4}
-test utf-2.9 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring} -body {
+test utf-2.9 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring compat85} -body {
     string length [testbytestring "\xF4\x8F\xBF\xBF"]
 } -result {4}
 test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} testbytestring {
@@ -77,57 +84,51 @@ test utf-2.12 {Tcl_UtfToUniChar: longer UTF sequences not supported} testbytestr
 test utf-3.1 {Tcl_UtfCharComplete} {
 } {}
 
-testConstraint testnumutfchars [llength [info commands testnumutfchars]]
-testConstraint testfindfirst [llength [info commands testfindfirst]]
-testConstraint testfindlast [llength [info commands testfindlast]]
-
 test utf-4.1 {Tcl_NumUtfChars: zero length} testnumutfchars {
     testnumutfchars ""
 } {0}
-test utf-4.2 {Tcl_NumUtfChars: length 1} testnumutfchars {
-    testnumutfchars [bytestring "\xC2\xA2"]
+test utf-4.2 {Tcl_NumUtfChars: length 1} {testnumutfchars testbytestring} {
+    testnumutfchars [testbytestring "\xC2\xA2"]
 } {1}
-test utf-4.3 {Tcl_NumUtfChars: long string} testnumutfchars {
-    testnumutfchars [bytestring "abc\xC2\xA2\xe4\xb9\x8e\uA2\u4e4e"]
+test utf-4.3 {Tcl_NumUtfChars: long string} {testnumutfchars testbytestring} {
+    testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8e\uA2\u4E4E"]
 } {7}
-test utf-4.4 {Tcl_NumUtfChars: #u0000} testnumutfchars {
-    testnumutfchars [bytestring "\xC0\x80"]
+test utf-4.4 {Tcl_NumUtfChars: #u0000} {testnumutfchars testbytestring} {
+    testnumutfchars [testbytestring "\xC0\x80"]
 } {1}
 test utf-4.5 {Tcl_NumUtfChars: zero length, calc len} testnumutfchars {
     testnumutfchars "" 0
 } {0}
-test utf-4.6 {Tcl_NumUtfChars: length 1, calc len} testnumutfchars {
-    testnumutfchars [bytestring "\xC2\xA2"] 1
+test utf-4.6 {Tcl_NumUtfChars: length 1, calc len} {testnumutfchars testbytestring} {
+    testnumutfchars [testbytestring "\xC2\xA2"] 1
 } {1}
-test utf-4.7 {Tcl_NumUtfChars: long string, calc len} testnumutfchars {
-    testnumutfchars [bytestring "abc\xC2\xA2\xe4\xb9\x8e\uA2\u4e4e"] 10
+test utf-4.7 {Tcl_NumUtfChars: long string, calc len} {testnumutfchars testbytestring} {
+    testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\uA2\u4E4E"] 10
 } {7}
-test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} testnumutfchars {
-    testnumutfchars [bytestring "\xC0\x80"] 1
+test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} {testnumutfchars testbytestring} {
+    testnumutfchars [testbytestring "\xC0\x80"] 1
 } {1}
 # Bug [2738427]: Tcl_NumUtfChars(...) no overflow check
-test utf-4.9 {Tcl_NumUtfChars: #u20AC, calc len, incomplete} testnumutfchars {
-    testnumutfchars [bytestring "\xE2\x82\xAC"] 2
+test utf-4.9 {Tcl_NumUtfChars: #u20AC, calc len, incomplete} {testnumutfchars testbytestring} {
+    testnumutfchars [testbytestring "\xE2\x82\xAC"] 2
 } {2}
-test utf-4.10 {Tcl_NumUtfChars: #u0000, calc len, overcomplete} testnumutfchars {
-    testnumutfchars [bytestring "\x00"] 2
+test utf-4.10 {Tcl_NumUtfChars: #u0000, calc len, overcomplete} {testnumutfchars testbytestring} {
+    testnumutfchars [testbytestring "\x00"] 2
 } {2}
-test utf-4.11 {Tcl_NumUtfChars: 3 bytes of 4-byte UTF-8 characater} testnumutfchars {
-    testnumutfchars [bytestring \xf0\x9f\x92\xa9] 3
+test utf-4.11 {Tcl_NumUtfChars: 3 bytes of 4-byte UTF-8 characater} {testnumutfchars testbytestring} {
+    testnumutfchars [testbytestring \xf0\x9f\x92\xA9] 3
 } {3}
-test utf-4.12 {Tcl_NumUtfChars: #4-byte UTF-8 character} testnumutfchars {
-    testnumutfchars [bytestring \xf0\x9f\x92\xa9] 4
+test utf-4.12 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring compat85} {
+    testnumutfchars [testbytestring \xf0\x9f\x92\xA9] 4
 } {4}
 
-test utf-5.1 {Tcl_UtfFindFirst} testfindfirst {
-    testfindfirst [bytestring "abcbc"] 98
+test utf-5.1 {Tcl_UtfFindFirst} {testfindfirst testbytestring} {
+    testfindfirst [testbytestring "abcbc"] 98
 } {bcbc}
-test utf-5.2 {Tcl_UtfFindLast} testfindlast {
-    testfindlast [bytestring "abcbc"] 98
+test utf-5.2 {Tcl_UtfFindLast} {testfindlast testbytestring} {
+    testfindlast [testbytestring "abcbc"] 98
 } {bc}
 
-testConstraint testutfnext [llength [info commands testutfnext]]
-
 test utf-6.1 {Tcl_UtfNext} testutfnext {
     # This takes the pointer one past the terminating NUL.
     # This is really an invalid call.
@@ -334,7 +335,7 @@ test utf-6.67 {Tcl_UtfNext} testutfnext {
 test utf-6.68 {Tcl_UtfNext} testutfnext {
     testutfnext \xF2\xA0\xA0G
 } 1
-test utf-6.69 {Tcl_UtfNext} testutfnext {
+test utf-6.69 {Tcl_UtfNext} {testutfnext compat85} {
     testutfnext \xF2\xA0\xA0\xA0
 } 1
 test utf-6.70 {Tcl_UtfNext} testutfnext {
@@ -349,22 +350,22 @@ test utf-6.71 {Tcl_UtfNext} testutfnext {
 test utf-6.73 {Tcl_UtfNext} testutfnext {
     testutfnext \xF2\xA0\xA0\xF8
 } 1
-test utf-6.74 {Tcl_UtfNext} testutfnext {
+test utf-6.74 {Tcl_UtfNext} {testutfnext compat85} {
     testutfnext \xF2\xA0\xA0\xA0G
 } 1
-test utf-6.75 {Tcl_UtfNext} testutfnext {
+test utf-6.75 {Tcl_UtfNext} {testutfnext compat85} {
     testutfnext \xF2\xA0\xA0\xA0\xA0
 } 1
-test utf-6.76 {Tcl_UtfNext} testutfnext {
+test utf-6.76 {Tcl_UtfNext} {testutfnext compat85} {
     testutfnext \xF2\xA0\xA0\xA0\xD0
 } 1
-test utf-6.77 {Tcl_UtfNext} testutfnext {
+test utf-6.77 {Tcl_UtfNext} {testutfnext compat85} {
     testutfnext \xF2\xA0\xA0\xA0\xE8
 } 1
-test utf-6.78 {Tcl_UtfNext} testutfnext {
+test utf-6.78 {Tcl_UtfNext} {testutfnext compat85} {
     testutfnext \xF2\xA0\xA0\xA0\xF2
 } 1
-test utf-6.79 {Tcl_UtfNext} testutfnext {
+test utf-6.79 {Tcl_UtfNext} {testutfnext compat85} {
     testutfnext \xF2\xA0\xA0\xA0G\xF8
 } 1
 test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext {
@@ -388,7 +389,7 @@ test utf-6.85 {Tcl_UtfNext - overlong sequences} testutfnext {
 test utf-6.86 {Tcl_UtfNext - overlong sequences} testutfnext {
     testutfnext \xF0\x80\x80\x80
 } 1
-test utf-6.87 {Tcl_UtfNext - overlong sequences} testutfnext {
+test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext compat85} {
     testutfnext \xF0\x90\x80\x80
 } 1
 test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} {
@@ -403,15 +404,13 @@ test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {te
 test utf-6.89.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} {
     testutfnext \xF0\x80\x80 1
 } 2
-test utf-6.90 {Tcl_UtfNext, validity check [493dccc2de]} testutfnext {
+test utf-6.90 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext compat85} {
     testutfnext \xF4\x8F\xBF\xBF
 } 1
 test utf-6.91 {Tcl_UtfNext, validity check [493dccc2de]} testutfnext {
     testutfnext \xF4\x90\x80\x80
 } 1
 
-testConstraint testutfprev [llength [info commands testutfprev]]
-
 test utf-7.1 {Tcl_UtfPrev} testutfprev {
     testutfprev {}
 } 0
@@ -475,13 +474,13 @@ test utf-7.9.1 {Tcl_UtfPrev} testutfprev {
 test utf-7.9.2 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xF8\xA0\xF8\xA0 3
 } 2
-test utf-7.10 {Tcl_UtfPrev} testutfprev {
+test utf-7.10 {Tcl_UtfPrev} {testutfprev compat85} {
     testutfprev A\xF2\xA0
 } 2
-test utf-7.10.1 {Tcl_UtfPrev} testutfprev {
+test utf-7.10.1 {Tcl_UtfPrev} {testutfprev compat85} {
     testutfprev A\xF2\xA0\xA0\xA0 3
 } 2
-test utf-7.10.2 {Tcl_UtfPrev} testutfprev {
+test utf-7.10.2 {Tcl_UtfPrev} {testutfprev compat85} {
     testutfprev A\xF2\xA0\xF8\xA0 3
 } 2
 test utf-7.11 {Tcl_UtfPrev} testutfprev {
@@ -523,13 +522,13 @@ test utf-7.14.1 {Tcl_UtfPrev} testutfprev {
 test utf-7.14.2 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xF8\xA0\xA0\xF8 4
 } 3
-test utf-7.15 {Tcl_UtfPrev} testutfprev {
+test utf-7.15 {Tcl_UtfPrev} {testutfprev compat85} {
     testutfprev A\xF2\xA0\xA0
 } 3
-test utf-7.15.1 {Tcl_UtfPrev} testutfprev {
+test utf-7.15.1 {Tcl_UtfPrev} {testutfprev compat85} {
     testutfprev A\xF2\xA0\xA0\xA0 4
 } 3
-test utf-7.15.2 {Tcl_UtfPrev} testutfprev {
+test utf-7.15.2 {Tcl_UtfPrev} {testutfprev compat85} {
     testutfprev A\xF2\xA0\xA0\xF8 4
 } 3
 test utf-7.16 {Tcl_UtfPrev} testutfprev {
@@ -562,7 +561,7 @@ test utf-7.18.2 {Tcl_UtfPrev} testutfprev {
 test utf-7.19 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xF8\xA0\xA0\xA0
 } 4
-test utf-7.20 {Tcl_UtfPrev} testutfprev {
+test utf-7.20 {Tcl_UtfPrev} {testutfprev compat85} {
     testutfprev A\xF2\xA0\xA0\xA0
 } 4
 test utf-7.21 {Tcl_UtfPrev} testutfprev {
@@ -622,16 +621,16 @@ test utf-7.36 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
 test utf-7.37 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
     testutfprev A\xE0\xA0\x80 3
 } 1
-test utf-7.38 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
+test utf-7.38 {Tcl_UtfPrev -- overlong sequence}  {testutfprev compat85} {
     testutfprev A\xE0\xA0\x80 2
 } 1
-test utf-7.39 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
+test utf-7.39 {Tcl_UtfPrev -- overlong sequence}  {testutfprev compat85} {
     testutfprev A\xF0\x90\x80\x80
 } 4
-test utf-7.40 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
+test utf-7.40 {Tcl_UtfPrev -- overlong sequence}  {testutfprev compat85} {
     testutfprev A\xF0\x90\x80\x80 4
 } 3
-test utf-7.41 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
+test utf-7.41 {Tcl_UtfPrev -- overlong sequence}  {testutfprev compat85} {
     testutfprev A\xF0\x90\x80\x80 3
 } 2
 test utf-7.42 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
@@ -658,13 +657,13 @@ test utf-7.47.1 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {te
 test utf-7.47.2 {Tcl_UtfPrev, pointing to 3th byte of 3-byte invalid sequence} {testutfprev} {
     testutfprev \xE8\xA0\x00 2
 } 0
-test utf-7.48 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
+test utf-7.48 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev compat85} {
     testutfprev A\xF4\x8F\xBF\xBF
 } 4
-test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
+test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev compat85} {
     testutfprev A\xF4\x8F\xBF\xBF 4
 } 3
-test utf-7.48.2 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
+test utf-7.48.2 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev compat85} {
     testutfprev A\xF4\x8F\xBF\xBF 3
 } 2
 test utf-7.48.3 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
@@ -708,18 +707,18 @@ test utf-10.1 {Tcl_UtfBackslash: dst == NULL} {
     set x \n
 } {
 }
-test utf-10.2 {Tcl_UtfBackslash: \u subst} {
-    set x \ua2
-} [bytestring "\xc2\xa2"]
-test utf-10.3 {Tcl_UtfBackslash: longer \u subst} {
-    set x \u4e21
-} [bytestring "\xe4\xb8\xa1"]
-test utf-10.4 {Tcl_UtfBackslash: stops at first non-hex} {
-    set x \u4e2k
-} "[bytestring \xd3\xa2]k"
-test utf-10.5 {Tcl_UtfBackslash: stops after 4 hex chars} {
-    set x \u4e216
-} "[bytestring \xe4\xb8\xa1]6"
+test utf-10.2 {Tcl_UtfBackslash: \u subst} testbytestring {
+    set x \uA2
+} [testbytestring "\xC2\xA2"]
+test utf-10.3 {Tcl_UtfBackslash: longer \u subst} testbytestring {
+    set x \u4E21
+} [testbytestring "\xE4\xB8\xA1"]
+test utf-10.4 {Tcl_UtfBackslash: stops at first non-hex} testbytestring {
+    set x \u4E2k
+} "[testbytestring \xD3\xA2]k"
+test utf-10.5 {Tcl_UtfBackslash: stops after 4 hex chars} testbytestring {
+    set x \u4E216
+} "[testbytestring \xE4\xB8\xA1]6"
 proc bsCheck {char num} {
     global errNum
     test utf-10.$errNum {backslash substitution} {
@@ -774,11 +773,11 @@ test utf-11.2 {Tcl_UtfToUpper} {
     string toupper abc
 } ABC
 test utf-11.3 {Tcl_UtfToUpper} {
-    string toupper \u00e3ab
-} \u00c3AB
+    string toupper \u00E3ab
+} \u00C3AB
 test utf-11.4 {Tcl_UtfToUpper} {
-    string toupper \u01e3ab
-} \u01e2AB
+    string toupper \u01E3ab
+} \u01E2AB
 
 test utf-12.1 {Tcl_UtfToLower} {
     string tolower {}
@@ -787,11 +786,11 @@ test utf-12.2 {Tcl_UtfToLower} {
     string tolower ABC
 } abc
 test utf-12.3 {Tcl_UtfToLower} {
-    string tolower \u00c3AB
-} \u00e3ab
+    string tolower \u00C3AB
+} \u00E3ab
 test utf-12.4 {Tcl_UtfToLower} {
-    string tolower \u01e2AB
-} \u01e3ab
+    string tolower \u01E2AB
+} \u01E3ab
 
 test utf-13.1 {Tcl_UtfToTitle} {
     string totitle {}
@@ -800,11 +799,11 @@ test utf-13.2 {Tcl_UtfToTitle} {
     string totitle abc
 } Abc
 test utf-13.3 {Tcl_UtfToTitle} {
-    string totitle \u00e3ab
-} \u00c3ab
+    string totitle \u00E3ab
+} \u00C3ab
 test utf-13.4 {Tcl_UtfToTitle} {
-    string totitle \u01f3ab
-} \u01f2ab
+    string totitle \u01F3ab
+} \u01F2ab
 
 test utf-14.1 {Tcl_UtfNcasecmp} {
     string compare -nocase a b
@@ -823,7 +822,7 @@ test utf-15.1 {Tcl_UniCharToUpper, negative delta} {
     string toupper aA
 } AA
 test utf-15.2 {Tcl_UniCharToUpper, positive delta} {
-    string toupper \u0178\u00ff
+    string toupper \u0178\xFF
 } \u0178\u0178
 test utf-15.3 {Tcl_UniCharToUpper, no delta} {
     string toupper !
@@ -833,24 +832,24 @@ test utf-16.1 {Tcl_UniCharToLower, negative delta} {
     string tolower aA
 } aa
 test utf-16.2 {Tcl_UniCharToLower, positive delta} {
-    string tolower \u0178\u00ff\uA78D\u01c5
-} \u00ff\u00ff\u0265\u01c6
+    string tolower \u0178\xFF\uA78D\u01C5
+} \xFF\xFF\u0265\u01C6
 
 test utf-17.1 {Tcl_UniCharToLower, no delta} {
     string tolower !
 } !
 
 test utf-18.1 {Tcl_UniCharToTitle, add one for title} {
-    string totitle \u01c4
-} \u01c5
+    string totitle \u01C4
+} \u01C5
 test utf-18.2 {Tcl_UniCharToTitle, subtract one for title} {
-    string totitle \u01c6
-} \u01c5
+    string totitle \u01C6
+} \u01C5
 test utf-18.3 {Tcl_UniCharToTitle, subtract delta for title (positive)} {
-    string totitle \u017f
+    string totitle \u017F
 } \u0053
 test utf-18.4 {Tcl_UniCharToTitle, subtract delta for title (negative)} {
-    string totitle \u00ff
+    string totitle \xFF
 } \u0178
 test utf-18.5 {Tcl_UniCharToTitle, no delta} {
     string totitle !
@@ -865,15 +864,15 @@ test utf-20.1 {TclUniCharNcmp} {
 
 test utf-21.1 {TclUniCharIsAlnum} {
     # this returns 1 with Unicode 7 compliance
-    string is alnum \u1040\u021f\u0220
+    string is alnum \u1040\u021F\u0220
 } {1}
 test utf-21.2 {unicode alnum char in regc_locale.c} {
     # this returns 1 with Unicode 7 compliance
-    list [regexp {^[[:alnum:]]+$} \u1040\u021f\u0220] [regexp {^\w+$} \u1040\u021f\u0220_\u203f\u2040\u2054\ufe33\ufe34\ufe4d\ufe4e\ufe4f\uff3f]
+    list [regexp {^[[:alnum:]]+$} \u1040\u021F\u0220] [regexp {^\w+$} \u1040\u021F\u0220_\u203F\u2040\u2054\uFE33\uFE34\uFE4D\uFE4E\uFE4F\uFF3F]
 } {1 1}
 test utf-21.3 {unicode print char in regc_locale.c} {
     # this returns 1 with Unicode 7 compliance
-    regexp {^[[:print:]]+$} \ufbc1
+    regexp {^[[:print:]]+$} \uFBC1
 } 1
 test utf-21.4 {TclUniCharIsGraph} {
     # [Bug 3464428]
@@ -885,11 +884,11 @@ test utf-21.5 {unicode graph char in regc_locale.c} {
 } {1}
 test utf-21.6 {TclUniCharIsGraph} {
     # [Bug 3464428]
-    string is graph \u00a0
+    string is graph \xA0
 } {0}
 test utf-21.7 {unicode graph char in regc_locale.c} {
     # [Bug 3464428]
-    regexp {[[:graph:]]} \u0020\u00a0\u2028\u2029
+    regexp {[[:graph:]]} \x20\xA0\u2028\u2029
 } {0}
 test utf-21.8 {TclUniCharIsPrint} {
     # [Bug 3464428]
@@ -905,49 +904,47 @@ test utf-21.10 {unicode print char in regc_locale.c} {
 } {0}
 test utf-21.11 {TclUniCharIsControl} {
     # [Bug 3464428]
-    string is control \u0000\u001f\u00ad\u0605\u061c\u180e\u2066\ufeff
+    string is control \x00\x1F\xad\u0605\u061C\u180E\u2066\uFEFF
 } {1}
 test utf-21.12 {unicode control char in regc_locale.c} {
     # [Bug 3464428], [Bug a876646efe]
-    regexp {^[[:cntrl:]]*$} \u0000\u001f\u00ad\u0605\u061c\u180e\u2066\ufeff
+    regexp {^[[:cntrl:]]*$} \x00\x1F\xad\u0605\u061C\u180E\u2066\uFEFF
 } {1}
 
 test utf-22.1 {TclUniCharIsWordChar} {
     string wordend "xyz123_bar fg" 0
 } 10
 test utf-22.2 {TclUniCharIsWordChar} {
-    string wordend "x\u5080z123_bar\u203c fg" 0
+    string wordend "x\u5080z123_bar\u203C fg" 0
 } 10
 
 test utf-23.1 {TclUniCharIsAlpha} {
     # this returns 1 with Unicode 7 compliance
-    string is alpha \u021f\u0220\u037f\u052f
+    string is alpha \u021F\u0220\u037F\u052F
 } {1}
 test utf-23.2 {unicode alpha char in regc_locale.c} {
     # this returns 1 with Unicode 7 compliance
-    regexp {^[[:alpha:]]+$} \u021f\u0220\u037f\u052f
+    regexp {^[[:alpha:]]+$} \u021F\u0220\u037F\u052F
 } {1}
 
 test utf-24.1 {TclUniCharIsDigit} {
     # this returns 1 with Unicode 7 compliance
-    string is digit \u1040\uabf0
+    string is digit \u1040\uABF0
 } {1}
 test utf-24.2 {unicode digit char in regc_locale.c} {
     # this returns 1 with Unicode 7 compliance
-    list [regexp {^[[:digit:]]+$} \u1040\uabf0] [regexp {^\d+$} \u1040\uabf0]
+    list [regexp {^[[:digit:]]+$} \u1040\uABF0] [regexp {^\d+$} \u1040\uABF0]
 } {1 1}
 
 test utf-24.3 {TclUniCharIsSpace} {
     # this returns 1 with Unicode 7 compliance
-    string is space \u1680\u180e\u202f
+    string is space \u1680\u180E\u202F
 } {1}
 test utf-24.4 {unicode space char in regc_locale.c} {
     # this returns 1 with Unicode 7 compliance
-    list [regexp {^[[:space:]]+$} \u1680\u180e\u202f] [regexp {^\s+$} \u1680\u180e\u202f]
+    list [regexp {^[[:space:]]+$} \u1680\u180E\u202F] [regexp {^\s+$} \u1680\u180E\u202F]
 } {1 1}
 
-testConstraint teststringobj [llength [info commands teststringobj]]
-
 test utf-25.1 {Tcl_UniCharNcasecmp} -constraints teststringobj \
     -setup {
 	testobj freeallvars
diff --git a/tests/util.test b/tests/util.test
index 85c06dd..a483de1 100644
--- a/tests/util.test
+++ b/tests/util.test
@@ -15,6 +15,7 @@ if {[lsearch [namespace children] ::tcltest] == -1} {
 testConstraint testdstring [llength [info commands testdstring]]
 testConstraint testconcatobj [llength [info commands testconcatobj]]
 testConstraint testdoubledigits [llength [info commands testdoubledigits]]
+testConstraint compat85 [expr {[format %c 0x010000] == "\uFFFD"}]
 
 # Big test for correct ordering of data in [expr]
 
-- 
cgit v0.12


From ea9467702aecb854ba8cd803edbb38c4590aa928 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Sat, 18 Apr 2020 15:02:51 +0000
Subject: Make TCL_UTF_MAX=4 build test clean again.

---
 generic/tcl.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/generic/tcl.h b/generic/tcl.h
index d7d064c..7378a8f 100644
--- a/generic/tcl.h
+++ b/generic/tcl.h
@@ -2148,7 +2148,7 @@ typedef struct Tcl_Parse {
  * reflected in regcustom.h.
  */
 
-#if TCL_UTF_MAX > 3
+#if TCL_UTF_MAX > 4
     /*
      * unsigned int isn't 100% accurate as it should be a strict 4-byte value
      * (perhaps wchar_t). 64-bit systems may have troubles. The size of this
-- 
cgit v0.12


From f866e98a39dc53d4864e3b04119b7dc2fd65078d Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Sat, 18 Apr 2020 15:11:22 +0000
Subject: regexp engine has to agree about the sizeof(Tcl_UniChar).

---
 generic/regcustom.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/generic/regcustom.h b/generic/regcustom.h
index ac33087..57a2d47 100644
--- a/generic/regcustom.h
+++ b/generic/regcustom.h
@@ -97,7 +97,7 @@ typedef int celt;		/* Type to hold chr, or NOCELT */
 #define	NOCELT (-1)		/* Celt value which is not valid chr */
 #define	CHR(c) (UCHAR(c))	/* Turn char literal into chr literal */
 #define	DIGITVAL(c) ((c)-'0')	/* Turn chr digit into its value */
-#if TCL_UTF_MAX > 3
+#if TCL_UTF_MAX > 4
 #define	CHRBITS	32		/* Bits in a chr; must not use sizeof */
 #define	CHR_MIN	0x00000000	/* Smallest and largest chr; the value */
 #define	CHR_MAX	0xffffffff	/* CHR_MAX-CHR_MIN+1 should fit in uchr */
-- 
cgit v0.12


From 4f3cc7f661e8ae301fd9b4aaf7a4c66d94897ec3 Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Sat, 18 Apr 2020 19:54:08 +0000
Subject: Clean-up testcases: Constant use of uppercase in hex-values. Use
 "testbytestring" in stead of "bytestring". Mark tests not working with
 TCL_UTF_MAX>3 with "compat85"

---
 tests/utf.test | 278 ++++++++++++++++++++++++++++-----------------------------
 1 file changed, 137 insertions(+), 141 deletions(-)

diff --git a/tests/utf.test b/tests/utf.test
index 01e0bb2..189b85d 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -13,54 +13,60 @@ if {[lsearch [namespace children] ::tcltest] == -1} {
     namespace import -force ::tcltest::*
 }
 
+testConstraint compat85 [expr {[format %c 0x010000] == "\uFFFD"}]
 testConstraint testbytestring [llength [info commands testbytestring]]
+testConstraint testfindfirst [llength [info commands testfindfirst]]
+testConstraint testfindlast [llength [info commands testfindlast]]
+testConstraint testnumutfchars [llength [info commands testnumutfchars]]
+testConstraint teststringobj [llength [info commands teststringobj]]
+testConstraint testutfnext [llength [info commands testutfnext]]
+testConstraint testutfprev [llength [info commands testutfprev]]
 
 catch {unset x}
 
-test utf-1.1 {Tcl_UniCharToUtf: 1 byte sequences} {
-    set x \x01
-} [bytestring "\x01"]
-test utf-1.2 {Tcl_UniCharToUtf: 2 byte sequences} {
-    set x "\x00"
-} [bytestring "\xc0\x80"]
-test utf-1.3 {Tcl_UniCharToUtf: 2 byte sequences} {
-    set x "\xe0"
-} [bytestring "\xc3\xa0"]
-test utf-1.4 {Tcl_UniCharToUtf: 3 byte sequences} {
-    set x "\u4e4e"
-} [bytestring "\xe4\xb9\x8e"]
-test utf-1.5 {Tcl_UniCharToUtf: overflowed Tcl_UniChar} {
-    format %c 0x110000
-} [bytestring "\xef\xbf\xbd"]
-test utf-1.6 {Tcl_UniCharToUtf: negative Tcl_UniChar} {
-    format %c -1
-} [bytestring "\xef\xbf\xbd"]
-
+test utf-1.1 {Tcl_UniCharToUtf: 1 byte sequences} testbytestring {
+    expr {"\x01" eq [testbytestring "\x01"]}
+} 1
+test utf-1.2 {Tcl_UniCharToUtf: 2 byte sequences} testbytestring {
+    expr {"\x00" eq [testbytestring "\xC0\x80"]}
+} 1
+test utf-1.3 {Tcl_UniCharToUtf: 2 byte sequences} testbytestring {
+    expr {"\xE0" eq [testbytestring "\xC3\xA0"]}
+} 1
+test utf-1.4 {Tcl_UniCharToUtf: 3 byte sequences} testbytestring {
+    expr {"\u4E4E" eq [testbytestring "\xE4\xB9\x8E"]}
+} 1
+test utf-1.5 {Tcl_UniCharToUtf: overflowed Tcl_UniChar} testbytestring {
+    expr {[format %c 0x110000] eq [testbytestring "\xEF\xBF\xBD"]}
+} 1
+test utf-1.6 {Tcl_UniCharToUtf: negative Tcl_UniChar} testbytestring {
+    expr {[format %c -1] eq [testbytestring "\xEF\xBF\xBD"]}
+} 1
 test utf-2.1 {Tcl_UtfToUniChar: low ascii} {
     string length "abc"
 } {3}
-test utf-2.2 {Tcl_UtfToUniChar: naked trail bytes} {
-    string length [bytestring "\x82\x83\x84"]
+test utf-2.2 {Tcl_UtfToUniChar: naked trail bytes} testbytestring {
+    string length [testbytestring "\x82\x83\x84"]
 } {3}
-test utf-2.3 {Tcl_UtfToUniChar: lead (2-byte) followed by non-trail} {
-    string length [bytestring "\xC2"]
+test utf-2.3 {Tcl_UtfToUniChar: lead (2-byte) followed by non-trail} testbytestring {
+    string length [testbytestring "\xC2"]
 } {1}
-test utf-2.4 {Tcl_UtfToUniChar: lead (2-byte) followed by trail} {
-    string length [bytestring "\xC2\xa2"]
+test utf-2.4 {Tcl_UtfToUniChar: lead (2-byte) followed by trail} testbytestring {
+    string length [testbytestring "\xC2\xA2"]
 } {1}
-test utf-2.5 {Tcl_UtfToUniChar: lead (3-byte) followed by non-trail} {
-    string length [bytestring "\xE2"]
+test utf-2.5 {Tcl_UtfToUniChar: lead (3-byte) followed by non-trail} testbytestring {
+    string length [testbytestring "\xE2"]
 } {1}
-test utf-2.6 {Tcl_UtfToUniChar: lead (3-byte) followed by 1 trail} {
-    string length [bytestring "\xE2\xA2"]
+test utf-2.6 {Tcl_UtfToUniChar: lead (3-byte) followed by 1 trail} testbytestring {
+    string length [testbytestring "\xE2\xA2"]
 } {2}
-test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} {
-    string length [bytestring "\xE4\xb9\x8e"]
+test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} testbytestring {
+    string length [testbytestring "\xE4\xB9\x8E"]
 } {1}
-test utf-2.8 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring} -body {
+test utf-2.8 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring compat85} -body {
     string length [testbytestring "\xF0\x90\x80\x80"]
 } -result {4}
-test utf-2.9 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring} -body {
+test utf-2.9 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring compat85} -body {
     string length [testbytestring "\xF4\x8F\xBF\xBF"]
 } -result {4}
 test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} testbytestring {
@@ -77,57 +83,51 @@ test utf-2.12 {Tcl_UtfToUniChar: longer UTF sequences not supported} testbytestr
 test utf-3.1 {Tcl_UtfCharComplete} {
 } {}
 
-testConstraint testnumutfchars [llength [info commands testnumutfchars]]
-testConstraint testfindfirst [llength [info commands testfindfirst]]
-testConstraint testfindlast [llength [info commands testfindlast]]
-
 test utf-4.1 {Tcl_NumUtfChars: zero length} testnumutfchars {
     testnumutfchars ""
 } {0}
-test utf-4.2 {Tcl_NumUtfChars: length 1} testnumutfchars {
-    testnumutfchars [bytestring "\xC2\xA2"]
+test utf-4.2 {Tcl_NumUtfChars: length 1} {testnumutfchars testbytestring} {
+    testnumutfchars [testbytestring "\xC2\xA2"]
 } {1}
-test utf-4.3 {Tcl_NumUtfChars: long string} testnumutfchars {
-    testnumutfchars [bytestring "abc\xC2\xA2\xe4\xb9\x8e\uA2\u4e4e"]
+test utf-4.3 {Tcl_NumUtfChars: long string} {testnumutfchars testbytestring} {
+    testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\uA2\x4E"]
 } {7}
-test utf-4.4 {Tcl_NumUtfChars: #u0000} testnumutfchars {
-    testnumutfchars [bytestring "\xC0\x80"]
+test utf-4.4 {Tcl_NumUtfChars: #u0000} {testnumutfchars testbytestring} {
+    testnumutfchars [testbytestring "\xC0\x80"]
 } {1}
 test utf-4.5 {Tcl_NumUtfChars: zero length, calc len} testnumutfchars {
     testnumutfchars "" 0
 } {0}
-test utf-4.6 {Tcl_NumUtfChars: length 1, calc len} testnumutfchars {
-    testnumutfchars [bytestring "\xC2\xA2"] 1
+test utf-4.6 {Tcl_NumUtfChars: length 1, calc len} {testnumutfchars testbytestring} {
+    testnumutfchars [testbytestring "\xC2\xA2"] 1
 } {1}
-test utf-4.7 {Tcl_NumUtfChars: long string, calc len} testnumutfchars {
-    testnumutfchars [bytestring "abc\xC2\xA2\xe4\xb9\x8e\uA2\u4e4e"] 10
+test utf-4.7 {Tcl_NumUtfChars: long string, calc len} {testnumutfchars testbytestring} {
+    testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\uA2\x4E"] 10
 } {7}
-test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} testnumutfchars {
-    testnumutfchars [bytestring "\xC0\x80"] 1
+test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} {testnumutfchars testbytestring} {
+    testnumutfchars [testbytestring "\xC0\x80"] 1
 } {1}
 # Bug [2738427]: Tcl_NumUtfChars(...) no overflow check
-test utf-4.9 {Tcl_NumUtfChars: #u20AC, calc len, incomplete} testnumutfchars {
-    testnumutfchars [bytestring "\xE2\x82\xAC"] 2
+test utf-4.9 {Tcl_NumUtfChars: #u20AC, calc len, incomplete} {testnumutfchars testbytestring} {
+    testnumutfchars [testbytestring "\xE2\x82\xAC"] 2
 } {2}
-test utf-4.10 {Tcl_NumUtfChars: #u0000, calc len, overcomplete} testnumutfchars {
-    testnumutfchars [bytestring "\x00"] 2
+test utf-4.10 {Tcl_NumUtfChars: #u0000, calc len, overcomplete} {testnumutfchars testbytestring} {
+    testnumutfchars [testbytestring "\x00"] 2
 } {2}
-test utf-4.11 {Tcl_NumUtfChars: 3 bytes of 4-byte UTF-8 characater} testnumutfchars {
-    testnumutfchars [bytestring \xf0\x9f\x92\xa9] 3
+test utf-4.11 {Tcl_NumUtfChars: 3 bytes of 4-byte UTF-8 characater} {testnumutfchars testbytestring} {
+    testnumutfchars [testbytestring \xF0\x9F\x92\xA9] 3
 } {3}
-test utf-4.12 {Tcl_NumUtfChars: #4-byte UTF-8 character} testnumutfchars {
-    testnumutfchars [bytestring \xf0\x9f\x92\xa9] 4
+test utf-4.12 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring compat85} {
+    testnumutfchars [testbytestring \xF0\x9F\x92\xA9] 4
 } {4}
 
-test utf-5.1 {Tcl_UtfFindFirst} testfindfirst {
-    testfindfirst [bytestring "abcbc"] 98
+test utf-5.1 {Tcl_UtfFindFirst} {testfindfirst testbytestring} {
+    testfindfirst [testbytestring "abcbc"] 98
 } {bcbc}
-test utf-5.2 {Tcl_UtfFindLast} testfindlast {
-    testfindlast [bytestring "abcbc"] 98
+test utf-5.2 {Tcl_UtfFindLast} {testfindlast testbytestring} {
+    testfindlast [testbytestring "abcbc"] 98
 } {bc}
 
-testConstraint testutfnext [llength [info commands testutfnext]]
-
 test utf-6.1 {Tcl_UtfNext} testutfnext {
     # This takes the pointer one past the terminating NUL.
     # This is really an invalid call.
@@ -334,7 +334,7 @@ test utf-6.67 {Tcl_UtfNext} testutfnext {
 test utf-6.68 {Tcl_UtfNext} testutfnext {
     testutfnext \xF2\xA0\xA0G
 } 1
-test utf-6.69 {Tcl_UtfNext} testutfnext {
+test utf-6.69 {Tcl_UtfNext} {testutfnext compat85} {
     testutfnext \xF2\xA0\xA0\xA0
 } 1
 test utf-6.70 {Tcl_UtfNext} testutfnext {
@@ -349,22 +349,22 @@ test utf-6.71 {Tcl_UtfNext} testutfnext {
 test utf-6.73 {Tcl_UtfNext} testutfnext {
     testutfnext \xF2\xA0\xA0\xF8
 } 1
-test utf-6.74 {Tcl_UtfNext} testutfnext {
+test utf-6.74 {Tcl_UtfNext} {testutfnext compat85} {
     testutfnext \xF2\xA0\xA0\xA0G
 } 1
-test utf-6.75 {Tcl_UtfNext} testutfnext {
+test utf-6.75 {Tcl_UtfNext} {testutfnext compat85} {
     testutfnext \xF2\xA0\xA0\xA0\xA0
 } 1
-test utf-6.76 {Tcl_UtfNext} testutfnext {
+test utf-6.76 {Tcl_UtfNext} {testutfnext compat85} {
     testutfnext \xF2\xA0\xA0\xA0\xD0
 } 1
-test utf-6.77 {Tcl_UtfNext} testutfnext {
+test utf-6.77 {Tcl_UtfNext} {testutfnext compat85} {
     testutfnext \xF2\xA0\xA0\xA0\xE8
 } 1
-test utf-6.78 {Tcl_UtfNext} testutfnext {
+test utf-6.78 {Tcl_UtfNext} {testutfnext compat85} {
     testutfnext \xF2\xA0\xA0\xA0\xF2
 } 1
-test utf-6.79 {Tcl_UtfNext} testutfnext {
+test utf-6.79 {Tcl_UtfNext} {testutfnext compat85} {
     testutfnext \xF2\xA0\xA0\xA0G\xF8
 } 1
 test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext {
@@ -388,7 +388,7 @@ test utf-6.85 {Tcl_UtfNext - overlong sequences} testutfnext {
 test utf-6.86 {Tcl_UtfNext - overlong sequences} testutfnext {
     testutfnext \xF0\x80\x80\x80
 } 1
-test utf-6.87 {Tcl_UtfNext - overlong sequences} testutfnext {
+test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext compat85} {
     testutfnext \xF0\x90\x80\x80
 } 1
 test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} {
@@ -403,15 +403,13 @@ test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {te
 test utf-6.89.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} {
     testutfnext \xF0\x80\x80 1
 } 2
-test utf-6.90 {Tcl_UtfNext, validity check [493dccc2de]} testutfnext {
+test utf-6.90 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext compat85} {
     testutfnext \xF4\x8F\xBF\xBF
 } 1
 test utf-6.91 {Tcl_UtfNext, validity check [493dccc2de]} testutfnext {
     testutfnext \xF4\x90\x80\x80
 } 1
 
-testConstraint testutfprev [llength [info commands testutfprev]]
-
 test utf-7.1 {Tcl_UtfPrev} testutfprev {
     testutfprev {}
 } 0
@@ -475,13 +473,13 @@ test utf-7.9.1 {Tcl_UtfPrev} testutfprev {
 test utf-7.9.2 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xF8\xA0\xF8\xA0 3
 } 2
-test utf-7.10 {Tcl_UtfPrev} testutfprev {
+test utf-7.10 {Tcl_UtfPrev} {testutfprev compat85} {
     testutfprev A\xF2\xA0
 } 2
-test utf-7.10.1 {Tcl_UtfPrev} testutfprev {
+test utf-7.10.1 {Tcl_UtfPrev} {testutfprev compat85} {
     testutfprev A\xF2\xA0\xA0\xA0 3
 } 2
-test utf-7.10.2 {Tcl_UtfPrev} testutfprev {
+test utf-7.10.2 {Tcl_UtfPrev} {testutfprev compat85} {
     testutfprev A\xF2\xA0\xF8\xA0 3
 } 2
 test utf-7.11 {Tcl_UtfPrev} testutfprev {
@@ -523,13 +521,13 @@ test utf-7.14.1 {Tcl_UtfPrev} testutfprev {
 test utf-7.14.2 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xF8\xA0\xA0\xF8 4
 } 3
-test utf-7.15 {Tcl_UtfPrev} testutfprev {
+test utf-7.15 {Tcl_UtfPrev} {testutfprev compat85} {
     testutfprev A\xF2\xA0\xA0
 } 3
-test utf-7.15.1 {Tcl_UtfPrev} testutfprev {
+test utf-7.15.1 {Tcl_UtfPrev} {testutfprev compat85} {
     testutfprev A\xF2\xA0\xA0\xA0 4
 } 3
-test utf-7.15.2 {Tcl_UtfPrev} testutfprev {
+test utf-7.15.2 {Tcl_UtfPrev} {testutfprev compat85} {
     testutfprev A\xF2\xA0\xA0\xF8 4
 } 3
 test utf-7.16 {Tcl_UtfPrev} testutfprev {
@@ -562,7 +560,7 @@ test utf-7.18.2 {Tcl_UtfPrev} testutfprev {
 test utf-7.19 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xF8\xA0\xA0\xA0
 } 4
-test utf-7.20 {Tcl_UtfPrev} testutfprev {
+test utf-7.20 {Tcl_UtfPrev} {testutfprev compat85} {
     testutfprev A\xF2\xA0\xA0\xA0
 } 4
 test utf-7.21 {Tcl_UtfPrev} testutfprev {
@@ -622,16 +620,16 @@ test utf-7.36 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
 test utf-7.37 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
     testutfprev A\xE0\xA0\x80 3
 } 1
-test utf-7.38 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
+test utf-7.38 {Tcl_UtfPrev -- overlong sequence}  {testutfprev compat85} {
     testutfprev A\xE0\xA0\x80 2
 } 1
-test utf-7.39 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
+test utf-7.39 {Tcl_UtfPrev -- overlong sequence}  {testutfprev compat85} {
     testutfprev A\xF0\x90\x80\x80
 } 4
-test utf-7.40 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
+test utf-7.40 {Tcl_UtfPrev -- overlong sequence}  {testutfprev compat85} {
     testutfprev A\xF0\x90\x80\x80 4
 } 3
-test utf-7.41 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
+test utf-7.41 {Tcl_UtfPrev -- overlong sequence}  {testutfprev compat85} {
     testutfprev A\xF0\x90\x80\x80 3
 } 2
 test utf-7.42 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
@@ -658,13 +656,13 @@ test utf-7.47.1 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {te
 test utf-7.47.2 {Tcl_UtfPrev, pointing to 3th byte of 3-byte invalid sequence} {testutfprev} {
     testutfprev \xE8\xA0\x00 2
 } 0
-test utf-7.48 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
+test utf-7.48 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev compat85} {
     testutfprev A\xF4\x8F\xBF\xBF
 } 4
-test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
+test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev compat85} {
     testutfprev A\xF4\x8F\xBF\xBF 4
 } 3
-test utf-7.48.2 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
+test utf-7.48.2 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev compat85} {
     testutfprev A\xF4\x8F\xBF\xBF 3
 } 2
 test utf-7.48.3 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
@@ -708,18 +706,18 @@ test utf-10.1 {Tcl_UtfBackslash: dst == NULL} {
     set x \n
 } {
 }
-test utf-10.2 {Tcl_UtfBackslash: \u subst} {
-    set x \ua2
-} [bytestring "\xc2\xa2"]
-test utf-10.3 {Tcl_UtfBackslash: longer \u subst} {
-    set x \u4e21
-} [bytestring "\xe4\xb8\xa1"]
-test utf-10.4 {Tcl_UtfBackslash: stops at first non-hex} {
-    set x \u4e2k
-} "[bytestring \xd3\xa2]k"
-test utf-10.5 {Tcl_UtfBackslash: stops after 4 hex chars} {
-    set x \u4e216
-} "[bytestring \xe4\xb8\xa1]6"
+test utf-10.2 {Tcl_UtfBackslash: \u subst} testbytestring {
+    expr {"\uA2" eq [testbytestring "\xC2\xA2"]}
+} 1
+test utf-10.3 {Tcl_UtfBackslash: longer \u subst} testbytestring {
+    expr {"\u4E21" eq [testbytestring "\xE4\xB8\xA1"]}
+} 1
+test utf-10.4 {Tcl_UtfBackslash: stops at first non-hex} testbytestring {
+    expr {"\u4E2k" eq "[testbytestring \xD3\xA2]k"}
+} 1
+test utf-10.5 {Tcl_UtfBackslash: stops after 4 hex chars} testbytestring {
+    expr {"\u4E216" eq [testbytestring "\xE4\xB8\xA1"]6}
+} 1
 proc bsCheck {char num} {
     global errNum
     test utf-10.$errNum {backslash substitution} {
@@ -774,11 +772,11 @@ test utf-11.2 {Tcl_UtfToUpper} {
     string toupper abc
 } ABC
 test utf-11.3 {Tcl_UtfToUpper} {
-    string toupper \u00e3ab
-} \u00c3AB
+    string toupper \xE3gh
+} \xC3GH
 test utf-11.4 {Tcl_UtfToUpper} {
-    string toupper \u01e3ab
-} \u01e2AB
+    string toupper \u01E3ab
+} \u01E2AB
 
 test utf-12.1 {Tcl_UtfToLower} {
     string tolower {}
@@ -787,11 +785,11 @@ test utf-12.2 {Tcl_UtfToLower} {
     string tolower ABC
 } abc
 test utf-12.3 {Tcl_UtfToLower} {
-    string tolower \u00c3AB
-} \u00e3ab
+    string tolower \xC3GH
+} \xE3gh
 test utf-12.4 {Tcl_UtfToLower} {
-    string tolower \u01e2AB
-} \u01e3ab
+    string tolower \u01E2AB
+} \u01E3ab
 
 test utf-13.1 {Tcl_UtfToTitle} {
     string totitle {}
@@ -800,11 +798,11 @@ test utf-13.2 {Tcl_UtfToTitle} {
     string totitle abc
 } Abc
 test utf-13.3 {Tcl_UtfToTitle} {
-    string totitle \u00e3ab
-} \u00c3ab
+    string totitle \xE3GH
+} \xC3gh
 test utf-13.4 {Tcl_UtfToTitle} {
-    string totitle \u01f3ab
-} \u01f2ab
+    string totitle \u01F3AB
+} \u01F2ab
 
 test utf-14.1 {Tcl_UtfNcasecmp} {
     string compare -nocase a b
@@ -823,7 +821,7 @@ test utf-15.1 {Tcl_UniCharToUpper, negative delta} {
     string toupper aA
 } AA
 test utf-15.2 {Tcl_UniCharToUpper, positive delta} {
-    string toupper \u0178\u00ff
+    string toupper \u0178\xFF
 } \u0178\u0178
 test utf-15.3 {Tcl_UniCharToUpper, no delta} {
     string toupper !
@@ -833,24 +831,24 @@ test utf-16.1 {Tcl_UniCharToLower, negative delta} {
     string tolower aA
 } aa
 test utf-16.2 {Tcl_UniCharToLower, positive delta} {
-    string tolower \u0178\u00ff\uA78D\u01c5
-} \u00ff\u00ff\u0265\u01c6
+    string tolower \u0178\xFF\uA78D\u01C5
+} \xFF\xFF\u0265\u01C6
 
 test utf-17.1 {Tcl_UniCharToLower, no delta} {
     string tolower !
 } !
 
 test utf-18.1 {Tcl_UniCharToTitle, add one for title} {
-    string totitle \u01c4
-} \u01c5
+    string totitle \u01C4
+} \u01C5
 test utf-18.2 {Tcl_UniCharToTitle, subtract one for title} {
-    string totitle \u01c6
-} \u01c5
+    string totitle \u01C6
+} \u01C5
 test utf-18.3 {Tcl_UniCharToTitle, subtract delta for title (positive)} {
-    string totitle \u017f
-} \u0053
+    string totitle \u017F
+} \x53
 test utf-18.4 {Tcl_UniCharToTitle, subtract delta for title (negative)} {
-    string totitle \u00ff
+    string totitle \xFF
 } \u0178
 test utf-18.5 {Tcl_UniCharToTitle, no delta} {
     string totitle !
@@ -865,15 +863,15 @@ test utf-20.1 {TclUniCharNcmp} {
 
 test utf-21.1 {TclUniCharIsAlnum} {
     # this returns 1 with Unicode 7 compliance
-    string is alnum \u1040\u021f\u0220
+    string is alnum \u1040\u021F\u0220
 } {1}
 test utf-21.2 {unicode alnum char in regc_locale.c} {
     # this returns 1 with Unicode 7 compliance
-    list [regexp {^[[:alnum:]]+$} \u1040\u021f\u0220] [regexp {^\w+$} \u1040\u021f\u0220_\u203f\u2040\u2054\ufe33\ufe34\ufe4d\ufe4e\ufe4f\uff3f]
+    list [regexp {^[[:alnum:]]+$} \u1040\u021F\u0220] [regexp {^\w+$} \u1040\u021F\u0220_\u203F\u2040\u2054\uFE33\uFE34\uFE4D\uFE4E\uFE4F\uFF3F]
 } {1 1}
 test utf-21.3 {unicode print char in regc_locale.c} {
     # this returns 1 with Unicode 7 compliance
-    regexp {^[[:print:]]+$} \ufbc1
+    regexp {^[[:print:]]+$} \uFBC1
 } 1
 test utf-21.4 {TclUniCharIsGraph} {
     # [Bug 3464428]
@@ -885,69 +883,67 @@ test utf-21.5 {unicode graph char in regc_locale.c} {
 } {1}
 test utf-21.6 {TclUniCharIsGraph} {
     # [Bug 3464428]
-    string is graph \u00a0
+    string is graph \xA0
 } {0}
 test utf-21.7 {unicode graph char in regc_locale.c} {
     # [Bug 3464428]
-    regexp {[[:graph:]]} \u0020\u00a0\u2028\u2029
+    regexp {[[:graph:]]} \x20\xA0\u2028\u2029
 } {0}
 test utf-21.8 {TclUniCharIsPrint} {
     # [Bug 3464428]
-    string is print \u0009
+    string is print \x09
 } {0}
 test utf-21.9 {unicode print char in regc_locale.c} {
     # [Bug 3464428]
-    regexp {[[:print:]]} \u0009
+    regexp {[[:print:]]} \x09
 } {0}
 test utf-21.10 {unicode print char in regc_locale.c} {
     # [Bug 3464428]
-    regexp {[[:print:]]} \u0009
+    regexp {[[:print:]]} \x09
 } {0}
 test utf-21.11 {TclUniCharIsControl} {
     # [Bug 3464428]
-    string is control \u0000\u001f\u00ad\u0605\u061c\u180e\u2066\ufeff
+    string is control \x00\x1F\xAD\u0605\u061C\u180E\u2066\uFEFF
 } {1}
 test utf-21.12 {unicode control char in regc_locale.c} {
     # [Bug 3464428], [Bug a876646efe]
-    regexp {^[[:cntrl:]]*$} \u0000\u001f\u00ad\u0605\u061c\u180e\u2066\ufeff
+    regexp {^[[:cntrl:]]*$} \x00\x1F\xAD\u0605\u061C\u180E\u2066\uFEFF
 } {1}
 
 test utf-22.1 {TclUniCharIsWordChar} {
     string wordend "xyz123_bar fg" 0
 } 10
 test utf-22.2 {TclUniCharIsWordChar} {
-    string wordend "x\u5080z123_bar\u203c fg" 0
+    string wordend "x\u5080z123_bar\u203C fg" 0
 } 10
 
 test utf-23.1 {TclUniCharIsAlpha} {
     # this returns 1 with Unicode 7 compliance
-    string is alpha \u021f\u0220\u037f\u052f
+    string is alpha \u021F\u0220\u037F\u052F
 } {1}
 test utf-23.2 {unicode alpha char in regc_locale.c} {
     # this returns 1 with Unicode 7 compliance
-    regexp {^[[:alpha:]]+$} \u021f\u0220\u037f\u052f
+    regexp {^[[:alpha:]]+$} \u021F\u0220\u037F\u052F
 } {1}
 
 test utf-24.1 {TclUniCharIsDigit} {
     # this returns 1 with Unicode 7 compliance
-    string is digit \u1040\uabf0
+    string is digit \u1040\uABF0
 } {1}
 test utf-24.2 {unicode digit char in regc_locale.c} {
     # this returns 1 with Unicode 7 compliance
-    list [regexp {^[[:digit:]]+$} \u1040\uabf0] [regexp {^\d+$} \u1040\uabf0]
+    list [regexp {^[[:digit:]]+$} \u1040\uABF0] [regexp {^\d+$} \u1040\uABF0]
 } {1 1}
 
 test utf-24.3 {TclUniCharIsSpace} {
     # this returns 1 with Unicode 7 compliance
-    string is space \u1680\u180e\u202f
+    string is space \u1680\u180E\u202F
 } {1}
 test utf-24.4 {unicode space char in regc_locale.c} {
     # this returns 1 with Unicode 7 compliance
-    list [regexp {^[[:space:]]+$} \u1680\u180e\u202f] [regexp {^\s+$} \u1680\u180e\u202f]
+    list [regexp {^[[:space:]]+$} \u1680\u180E\u202F] [regexp {^\s+$} \u1680\u180E\u202F]
 } {1 1}
 
-testConstraint teststringobj [llength [info commands teststringobj]]
-
 test utf-25.1 {Tcl_UniCharNcasecmp} -constraints teststringobj \
     -setup {
 	testobj freeallvars
-- 
cgit v0.12


From 1a343cd043776b8acc3c4a047a10556c70f077dd Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Sun, 19 Apr 2020 19:37:05 +0000
Subject: More test-cases. Fix wrong quoting in testcase utf-10.5

---
 tests/utf.test | 52 ++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 44 insertions(+), 8 deletions(-)

diff --git a/tests/utf.test b/tests/utf.test
index 189b85d..946aa83 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -42,6 +42,7 @@ test utf-1.5 {Tcl_UniCharToUtf: overflowed Tcl_UniChar} testbytestring {
 test utf-1.6 {Tcl_UniCharToUtf: negative Tcl_UniChar} testbytestring {
     expr {[format %c -1] eq [testbytestring "\xEF\xBF\xBD"]}
 } 1
+
 test utf-2.1 {Tcl_UtfToUniChar: low ascii} {
     string length "abc"
 } {3}
@@ -90,7 +91,7 @@ test utf-4.2 {Tcl_NumUtfChars: length 1} {testnumutfchars testbytestring} {
     testnumutfchars [testbytestring "\xC2\xA2"]
 } {1}
 test utf-4.3 {Tcl_NumUtfChars: long string} {testnumutfchars testbytestring} {
-    testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\uA2\x4E"]
+    testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\xA2\x4E"]
 } {7}
 test utf-4.4 {Tcl_NumUtfChars: #u0000} {testnumutfchars testbytestring} {
     testnumutfchars [testbytestring "\xC0\x80"]
@@ -693,6 +694,18 @@ test utf-8.3 {Tcl_UniCharAtIndex: index > 0} {
 test utf-8.4 {Tcl_UniCharAtIndex: index > 0} {
     string index \u4E4E\u25A\xFF\u543 2
 } "\uFF"
+test utf-8.5 {Tcl_UniCharAtIndex: high surrogate} {
+    string index \uD842 0
+} "\uD842"
+test utf-8.6 {Tcl_UniCharAtIndex: low surrogate} {
+    string index \uDC42 0
+} "\uDC42"
+test utf-8.7 {Tcl_UniCharAtIndex: Emoji} compat85 {
+    string index \uD83D\uDE00 0
+} "\uD83D"
+test utf-8.8 {Tcl_UniCharAtIndex: Emoji} compat85 {
+    string index \uD83D\uDE00 1
+} "\uDE00"
 
 test utf-9.1 {Tcl_UtfAtIndex: index = 0} {
     string range abcd 0 2
@@ -700,6 +713,12 @@ test utf-9.1 {Tcl_UtfAtIndex: index = 0} {
 test utf-9.2 {Tcl_UtfAtIndex: index > 0} {
     string range \u4E4E\u25A\xFF\u543klmnop 1 5
 } "\u25A\xFF\u543kl"
+test utf-9.3 {Tcl_UtfAtIndex: index = 0, Emoji} compat85 {
+    string range \uD83D\uDE00G 0 0
+} "\uD83D"
+test utf-9.4 {Tcl_UtfAtIndex: index > 0, Emoji} compat85 {
+    string range \uD83D\uDE00G 1 1
+} "\uDE00"
 
 
 test utf-10.1 {Tcl_UtfBackslash: dst == NULL} {
@@ -716,7 +735,7 @@ test utf-10.4 {Tcl_UtfBackslash: stops at first non-hex} testbytestring {
     expr {"\u4E2k" eq "[testbytestring \xD3\xA2]k"}
 } 1
 test utf-10.5 {Tcl_UtfBackslash: stops after 4 hex chars} testbytestring {
-    expr {"\u4E216" eq [testbytestring "\xE4\xB8\xA1"]6}
+    expr {"\u4E216" eq "[testbytestring \xE4\xB8\xA1]6"}
 } 1
 proc bsCheck {char num} {
     global errNum
@@ -775,8 +794,8 @@ test utf-11.3 {Tcl_UtfToUpper} {
     string toupper \xE3gh
 } \xC3GH
 test utf-11.4 {Tcl_UtfToUpper} {
-    string toupper \u01E3ab
-} \u01E2AB
+    string toupper \u01E3gh
+} \u01E2GH
 
 test utf-12.1 {Tcl_UtfToLower} {
     string tolower {}
@@ -788,8 +807,14 @@ test utf-12.3 {Tcl_UtfToLower} {
     string tolower \xC3GH
 } \xE3gh
 test utf-12.4 {Tcl_UtfToLower} {
-    string tolower \u01E2AB
-} \u01E3ab
+    string tolower \u01E2GH
+} \u01E3gh
+test utf-12.5 {Tcl_UtfToLower Georgian (new in Unicode 11)} {
+    string tolower \u10D0\u1C90
+} \u10D0\u10D0
+test utf-12.6 {Tcl_UtfToUpper low/high surrogate)} {
+    string tolower \uDC24\uD824
+} \uDC24\uD824
 
 test utf-13.1 {Tcl_UtfToTitle} {
     string totitle {}
@@ -803,6 +828,15 @@ test utf-13.3 {Tcl_UtfToTitle} {
 test utf-13.4 {Tcl_UtfToTitle} {
     string totitle \u01F3AB
 } \u01F2ab
+test utf-13.5 {Tcl_UtfToTitle Georgian (new in Unicode 11)} {
+    string totitle \u10D0\u1C90
+} \u10D0\u1C90
+test utf-13.6 {Tcl_UtfToTitle Georgian (new in Unicode 11)} {
+    string totitle \u1C90\u10D0
+} \u1C90\u10D0
+test utf-13.7 {Tcl_UtfToTitle low/high surrogate)} {
+    string totitle \uDC24\uD824
+} \uDC24\uD824
 
 test utf-14.1 {Tcl_UtfNcasecmp} {
     string compare -nocase a b
@@ -854,9 +888,11 @@ test utf-18.5 {Tcl_UniCharToTitle, no delta} {
     string totitle !
 } !
 
-test utf-19.1 {TclUniCharLen} {
+test utf-19.1 {TclUniCharLen} -body {
     list [regexp \\d abc456def foo] $foo
-} {1 4}
+} -cleanup {
+    unset -nocomplain foo
+} -result {1 4}
 
 test utf-20.1 {TclUniCharNcmp} {
 } {}
-- 
cgit v0.12


From 9bcd48e5ffd32d1858e4d0f90a4eaee550ede17f Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Sun, 19 Apr 2020 22:02:10 +0000
Subject: typo

---
 tests/utf.test | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/utf.test b/tests/utf.test
index 946aa83..07863b9 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -103,7 +103,7 @@ test utf-4.6 {Tcl_NumUtfChars: length 1, calc len} {testnumutfchars testbytestri
     testnumutfchars [testbytestring "\xC2\xA2"] 1
 } {1}
 test utf-4.7 {Tcl_NumUtfChars: long string, calc len} {testnumutfchars testbytestring} {
-    testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\uA2\x4E"] 10
+    testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\xA2\x4E"] 10
 } {7}
 test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} {testnumutfchars testbytestring} {
     testnumutfchars [testbytestring "\xC0\x80"] 1
-- 
cgit v0.12


From bb5381f946565a91e146910d62c56b40c02c5193 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Mon, 20 Apr 2020 05:35:54 +0000
Subject: Reconcile tests to the 8.5 branch history.

---
 tests/utf.test | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/utf.test b/tests/utf.test
index 07863b9..1ca3647 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -621,7 +621,7 @@ test utf-7.36 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
 test utf-7.37 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
     testutfprev A\xE0\xA0\x80 3
 } 1
-test utf-7.38 {Tcl_UtfPrev -- overlong sequence}  {testutfprev compat85} {
+test utf-7.38 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
     testutfprev A\xE0\xA0\x80 2
 } 1
 test utf-7.39 {Tcl_UtfPrev -- overlong sequence}  {testutfprev compat85} {
-- 
cgit v0.12


From 534db753aefcbe8cbdbec69611e9c6e31ea3deec Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Mon, 20 Apr 2020 06:45:47 +0000
Subject: Backport the encoding fix for source-7.2 in TCL_UTF_MAX=6 build.

---
 generic/tclEncoding.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 6c16827..5a9d2d5 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -2470,20 +2470,33 @@ UtfToUnicodeProc(
 	if (dst > dstEnd) {
 	    result = TCL_CONVERT_NOSPACE;
 	    break;
-        }
+	}
 	src += TclUtfToUniChar(src, &ch);
 	/*
 	 * Need to handle this in a way that won't cause misalignment
 	 * by casting dst to a Tcl_UniChar. [Bug 1122671]
-	 * XXX: This hard-codes the assumed size of Tcl_UniChar as 2.
 	 */
 #ifdef WORDS_BIGENDIAN
+#if TCL_UTF_MAX > 4
+	*dst++ = (ch >> 24);
+	*dst++ = ((ch >> 16) & 0xFF);
+	*dst++ = ((ch >> 8) & 0xFF);
+	*dst++ = (ch & 0xFF);
+#else
 	*dst++ = (ch >> 8);
 	*dst++ = (ch & 0xFF);
+#endif
+#else
+#if TCL_UTF_MAX > 4
+	*dst++ = (ch & 0xFF);
+	*dst++ = ((ch >> 8) & 0xFF);
+	*dst++ = ((ch >> 16) & 0xFF);
+	*dst++ = (ch >> 24);
 #else
 	*dst++ = (ch & 0xFF);
 	*dst++ = (ch >> 8);
 #endif
+#endif
     }
     *srcReadPtr = src - srcStart;
     *dstWrotePtr = dst - dstStart;
-- 
cgit v0.12


From 0424b820bc8101075ba4673a8d07df870348f134 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Mon, 20 Apr 2020 07:34:28 +0000
Subject: Backport the fix for encoding-16.1 in a TCL_UTF_MAX=6 build.

---
 generic/tclEncoding.c | 240 +++++++++++++++++++++++++++-----------------------
 1 file changed, 128 insertions(+), 112 deletions(-)

diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 5a9d2d5..da03055 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -83,7 +83,7 @@ typedef struct TableEncodingData {
 } TableEncodingData;
 
 /*
- * The following structures is the clientData for a dynamically-loaded,
+ * Each of the following structures is the clientData for a dynamically-loaded
  * escape-driven encoding that is itself comprised of other simpler encodings.
  * An example is "iso-2022-jp", which uses escape sequences to switch between
  * ascii, jis0208, jis0212, gb2312, and ksc5601. Note that "escape-driven"
@@ -117,8 +117,8 @@ typedef struct EscapeEncodingData {
 				 * 0. */
     int numSubTables;		/* Length of following array. */
     EscapeSubTable subTables[1];/* Information about each EscapeSubTable used
-				 * by this encoding type. The actual size will
-				 * be as large as necessary to hold all
+				 * by this encoding type. The actual size is
+				 * as large as necessary to hold all
 				 * EscapeSubTables. */
 } EscapeEncodingData;
 
@@ -156,7 +156,7 @@ static ProcessGlobalValue encodingFileMap = {
  * A list of directories making up the "library path". Historically this
  * search path has served many uses, but the only one remaining is a base for
  * the encodingSearchPath above. If the application does not explicitly set
- * the encodingSearchPath, then it will be initialized by appending /encoding
+ * the encodingSearchPath, then it is initialized by appending /encoding
  * to each directory in this "libraryPath".
  */
 
@@ -177,7 +177,7 @@ TCL_DECLARE_MUTEX(encodingMutex)
 /*
  * The following are used to hold the default and current system encodings.
  * If NULL is passed to one of the conversion routines, the current setting of
- * the system encoding will be used to perform the conversion.
+ * the system encoding is used to perform the conversion.
  */
 
 static Tcl_Encoding defaultEncoding;
@@ -429,9 +429,8 @@ TclGetLibraryPath(void)
  *	Keeps the per-thread copy of the library path current with changes to
  *	the global copy.
  *
- *	NOTE: this routine returns void, so there's no way to report the error
- *	that searchPath is not a valid list. In that case, this routine will
- *	silently do nothing.
+ *	Since the result of this routine is void, if searchPath is not a valid
+ *	list this routine silently does nothing.
  *
  *----------------------------------------------------------------------
  */
@@ -453,17 +452,16 @@ TclSetLibraryPath(
  *
  * FillEncodingFileMap --
  *
- * 	Called to bring the encoding file map in sync with the current value
- * 	of the encoding search path.
+ *	Called to update the encoding file map with the current value
+ *	of the encoding search path.
  *
- *	Scan the directories on the encoding search path, find the *.enc
- *	files, and store the found pathnames in a map associated with the
- *	encoding name.
+ *	Finds *.end files in the directories on the encoding search path and
+ *	stores the found pathnames in a map associated with the encoding name.
  *
- *	In particular, if $dir is on the encoding search path, and the file
- *	$dir/foo.enc is found, then store a "foo" -> $dir entry in the map.
- *	Later, any need for the "foo" encoding will quickly * be able to
- *	construct the $dir/foo.enc pathname for reading the encoding data.
+ *	If $dir is on the encoding search path and the file $dir/foo.enc is
+ *	found, stores a "foo" -> $dir entry in the map.  if the "foo" encoding
+ *	is needed later, the $dir/foo.enc name can be quickly constructed in
+ *	order to read the encoding data.
  *
  * Results:
  *	None.
@@ -544,19 +542,24 @@ void
 TclInitEncodingSubsystem(void)
 {
     Tcl_EncodingType type;
+    union {
+        char c;
+        short s;
+    } isLe;
 
     if (encodingsInitialized) {
 	return;
     }
 
+    isLe.s = 1;
     Tcl_MutexLock(&encodingMutex);
     Tcl_InitHashTable(&encodingTable, TCL_STRING_KEYS);
     Tcl_MutexUnlock(&encodingMutex);
 
     /*
-     * Create a few initial encodings. Note that the UTF-8 to UTF-8
-     * translation is not a no-op, because it will turn a stream of improperly
-     * formed UTF-8 into a properly formed stream.
+     * Create a few initial encodings.  UTF-8 to UTF-8 translation is not a
+     * no-op because it turns a stream of improperly formed UTF-8 into a
+     * properly formed stream.
      */
 
     type.encodingName	= "identity";
@@ -583,7 +586,7 @@ TclInitEncodingSubsystem(void)
     type.fromUtfProc    = UtfToUnicodeProc;
     type.freeProc	= NULL;
     type.nullSize	= 2;
-    type.clientData	= NULL;
+    type.clientData	= INT2PTR(isLe.c);
     Tcl_CreateEncoding(&type);
 
     /*
@@ -755,11 +758,7 @@ Tcl_SetDefaultEncodingDir(
  *	interp was NULL.
  *
  * Side effects:
- *	The new encoding type is entered into a table visible to all
- *	interpreters, keyed off the encoding's name. For each call to this
- *	function, there should eventually be a call to Tcl_FreeEncoding, so
- *	that the database can be cleaned up when encodings aren't needed
- *	anymore.
+ *	LoadEncodingFile is called if necessary.
  *
  *-------------------------------------------------------------------------
  */
@@ -797,15 +796,15 @@ Tcl_GetEncoding(
  *
  * Tcl_FreeEncoding --
  *
- *	This function is called to release an encoding allocated by
- *	Tcl_CreateEncoding() or Tcl_GetEncoding().
+ *	Releases an encoding allocated by Tcl_CreateEncoding() or
+ *	Tcl_GetEncoding().
  *
  * Results:
  *	None.
  *
  * Side effects:
  *	The reference count associated with the encoding is decremented and
- *	the encoding may be deleted if nothing is using it anymore.
+ *	the encoding is deleted if nothing is using it anymore.
  *
  *---------------------------------------------------------------------------
  */
@@ -824,13 +823,14 @@ Tcl_FreeEncoding(
  *
  * FreeEncoding --
  *
- *	This function is called to release an encoding by functions that
- *	already have the encodingMutex.
+ *	Decrements the reference count of an encoding.  The caller must hold
+ *	encodingMutes.
  *
  * Results:
  *	None.
  *
  * Side effects:
+ *	Releases the resource for an encoding if it is now unused.
  *	The reference count associated with the encoding is decremented and
  *	the encoding may be deleted if nothing is using it anymore.
  *
@@ -850,16 +850,17 @@ FreeEncoding(
     if (encodingPtr->refCount<=0) {
 	Tcl_Panic("FreeEncoding: refcount problem !!!");
     }
-    encodingPtr->refCount--;
-    if (encodingPtr->refCount == 0) {
+    if (encodingPtr->refCount-- <= 1) {
 	if (encodingPtr->freeProc != NULL) {
 	    (*encodingPtr->freeProc)(encodingPtr->clientData);
 	}
 	if (encodingPtr->hPtr != NULL) {
 	    Tcl_DeleteHashEntry(encodingPtr->hPtr);
 	}
-	ckfree((char *) encodingPtr->name);
-	ckfree((char *) encodingPtr);
+	if (encodingPtr->name) {
+	    ckfree((char *)encodingPtr->name);
+	}
+	ckfree((char *)encodingPtr);
     }
 }
 
@@ -1020,23 +1021,22 @@ Tcl_SetSystemEncoding(
  *
  * Tcl_CreateEncoding --
  *
- *	This function is called to define a new encoding and the functions
- *	that are used to convert between the specified encoding and Unicode.
+ *	Defines a new encoding, along with the functions that are used to
+ *	convert to and from Unicode.
  *
  * Results:
  *	Returns a token that represents the encoding. If an encoding with the
  *	same name already existed, the old encoding token remains valid and
- *	continues to behave as it used to, and will eventually be garbage
- *	collected when the last reference to it goes away. Any subsequent
- *	calls to Tcl_GetEncoding with the specified name will retrieve the
- *	most recent encoding token.
+ *	continues to behave as it used to, and is eventually garbage collected
+ *	when the last reference to it goes away. Any subsequent calls to
+ *	Tcl_GetEncoding with the specified name retrieve the most recent
+ *	encoding token.
  *
  * Side effects:
- *	The new encoding type is entered into a table visible to all
- *	interpreters, keyed off the encoding's name. For each call to this
- *	function, there should eventually be a call to Tcl_FreeEncoding, so
- *	that the database can be cleaned up when encodings aren't needed
- *	anymore.
+ *	A new record having the name of the encoding is entered into a table of
+ *	encodings visible to all interpreters.  For each call to this function,
+ *	there should eventually be a call to Tcl_FreeEncoding, which cleans
+ *	deletes the record in the table when an encoding is no longer needed.
  *
  *---------------------------------------------------------------------------
  */
@@ -1258,10 +1258,9 @@ Tcl_ExternalToUtf(
  *
  * Tcl_UtfToExternalDString --
  *
- *	Convert a source buffer from UTF-8 into the specified encoding. If any
+ *	Convert a source buffer from UTF-8 to the specified encoding. If any
  *	of the bytes in the source buffer are invalid or cannot be represented
- *	in the target encoding, a default fallback character will be
- *	substituted.
+ *	in the target encoding, a default fallback character is substituted.
  *
  * Results:
  *	The converted bytes are stored in the DString, which is then NULL
@@ -1570,13 +1569,13 @@ OpenEncodingFileChannel(
  *	the data.
  *
  * Results:
- *	The return value is the newly loaded Encoding, or NULL if the file
- *	didn't exist of was in the incorrect format. If NULL was returned, an
- *	error message is left in interp's result object, unless interp was
- *	NULL.
+ *	The return value is the newly loaded Tcl_Encoding or NULL if the file
+ *	didn't exist or could not be processed. If NULL is returned and interp
+ *	is not NULL, an error message is left in interp's result object.
  *
  * Side effects:
- *	File read from disk.
+ *	A corresponding encoding file might be read from persistent storage, in
+ *	which case LoadTableEncoding is called.
  *
  *---------------------------------------------------------------------------
  */
@@ -1584,8 +1583,8 @@ OpenEncodingFileChannel(
 static Tcl_Encoding
 LoadEncodingFile(
     Tcl_Interp *interp,		/* Interp for error reporting, if not NULL. */
-    const char *name)		/* The name of the encoding file on disk and
-				 * also the name for new encoding. */
+    const char *name)		/* The name of both the encoding file
+				 * and the new encoding. */
 {
     Tcl_Channel chan = NULL;
     Tcl_Encoding encoding = NULL;
@@ -1637,27 +1636,27 @@ LoadEncodingFile(
  *
  * LoadTableEncoding --
  *
- *	Helper function for LoadEncodingTable(). Loads a table to that
- *	converts between Unicode and some other encoding and creates an
- *	encoding (using a TableEncoding structure) from that information.
+ *	Helper function for LoadEncodingFile().  Creates a Tcl_EncodingType
+ *	structure along with its corresponding TableEncodingData structure, and
+ *	passes it to Tcl_Createncoding.
  *
- *	File contains binary data, but begins with a marker to indicate
- *	byte-ordering, so that same binary file can be read on either endian
- *	platforms.
+ *	The file contains binary data but begins with a marker to indicate
+ *	byte-ordering so a single binary file can be read on big or
+ *	little-endian systems.
  *
  * Results:
- *	The return value is the new encoding, or NULL if the encoding could
- *	not be created (because the file contained invalid data).
+ *	Returns the new Tcl_Encoding,  or NULL if it could could
+ *	not be created because the file contained invalid data.
  *
  * Side effects:
- *	None.
+ *	See Tcl_CreateEncoding().
  *
  *-------------------------------------------------------------------------
  */
 
 static Tcl_Encoding
 LoadTableEncoding(
-    const char *name,		/* Name for new encoding. */
+    const char *name,		/* Name of the new encoding. */
     int type,			/* Type of encoding (ENCODING_?????). */
     Tcl_Channel chan)		/* File containing new encoding. */
 {
@@ -1769,10 +1768,10 @@ LoadTableEncoding(
     }
 
     /*
-     * Invert toUnicode array to produce the fromUnicode array. Performs a
+     * Invert the toUnicode array to produce the fromUnicode array. Performs a
      * single malloc to get the memory for the array and all the pages needed
-     * by the array. While reading in the toUnicode array, we remembered what
-     * pages that would be needed for the fromUnicode array.
+     * by the array. While reading in the toUnicode array remember what
+     * pages are needed for the fromUnicode array.
      */
 
     if (symbol) {
@@ -1814,8 +1813,8 @@ LoadTableEncoding(
     if (type == ENCODING_MULTIBYTE) {
 	/*
 	 * If multibyte encodings don't have a backslash character, define
-	 * one. Otherwise, on Windows, native file names won't work because
-	 * the backslash in the file name will map to the unknown character
+	 * one. Otherwise, on Windows, native file names don't work because
+	 * the backslash in the file name maps to the unknown character
 	 * (question mark) when converting from UTF-8 to external encoding.
 	 */
 
@@ -1829,13 +1828,13 @@ LoadTableEncoding(
 	unsigned short *page;
 
 	/*
-	 * Make a special symbol encoding that not only maps the symbol
-	 * characters from their Unicode code points down into page 0, but
-	 * also ensure that the characters on page 0 map to themselves. This
-	 * is so that a symbol font can be used to display a simple string
-	 * like "abcd" and have alpha, beta, chi, delta show up, rather than
-	 * have "unknown" chars show up because strictly speaking the symbol
-	 * font doesn't have glyphs for those low ascii chars.
+	 * Make a special symbol encoding that maps each symbol character from
+	 * its Unicode code point down into page 0, and also ensure that each
+	 * characters on page 0 maps to itself so that a symbol font can be
+	 * used to display a simple string like "abcd" and have alpha, beta,
+	 * chi, delta show up, rather than have "unknown" chars show up because
+	 * strictly speaking the symbol font doesn't have glyphs for those low
+	 * ASCII chars.
 	 */
 
 	page = dataPtr->fromUnicode[0];
@@ -1939,7 +1938,7 @@ LoadTableEncoding(
 
 static Tcl_Encoding
 LoadEscapeEncoding(
-    const char *name,		/* Name for new encoding. */
+    const char *name,		/* Name of the new encoding. */
     Tcl_Channel chan)		/* File containing new encoding. */
 {
     int i;
@@ -2318,7 +2317,7 @@ UtfToUtfProc(
  *
  * UnicodeToUtfProc --
  *
- *	Convert from Unicode to UTF-8.
+ *	Convert from UTF-16 to UTF-8.
  *
  * Results:
  *	Returns TCL_OK if conversion was successful.
@@ -2331,7 +2330,7 @@ UtfToUtfProc(
 
 static int
 UnicodeToUtfProc(
-    ClientData clientData,	/* Not used. */
+    ClientData clientData,	/* != NULL means LE, == NUL means BE */
     const char *src,		/* Source string in Unicode. */
     int srcLen,			/* Source string length in bytes. */
     int flags,			/* Conversion control flags. */
@@ -2359,13 +2358,19 @@ UnicodeToUtfProc(
     const char *srcStart, *srcEnd;
     char *dstEnd, *dstStart;
     int result, numChars;
-    Tcl_UniChar ch;
+    unsigned short ch;
 
     result = TCL_OK;
-    if ((srcLen % sizeof(Tcl_UniChar)) != 0) {
+
+    /* check alignment with utf-16 (2 == sizeof(UTF-16)) */
+    if ((srcLen % 2) != 0) {
+	result = TCL_CONVERT_MULTIBYTE;
+	srcLen--;
+    }
+    /* If last code point is a high surrogate, we cannot handle that yet */
+    if ((srcLen >= 2) && ((src[srcLen - (clientData?1:2)] & 0xFC) == 0xD8)) {
 	result = TCL_CONVERT_MULTIBYTE;
-	srcLen /= sizeof(Tcl_UniChar);
-	srcLen *= sizeof(Tcl_UniChar);
+	srcLen-= 2;
     }
 
     srcStart = src;
@@ -2379,17 +2384,21 @@ UnicodeToUtfProc(
 	    result = TCL_CONVERT_NOSPACE;
 	    break;
 	}
+	if (clientData) {
+	    ch = (src[1] & 0xFF) << 8 | (src[0] & 0xFF);
+	} else {
+	    ch = (src[0] & 0xFF) << 8 | (src[1] & 0xFF);
+	}
 	/*
-	 * Special case for 1-byte utf chars for speed.  Make sure we
-	 * work with Tcl_UniChar-size data.
+	 * Special case for 1-byte utf chars for speed. Make sure we work with
+	 * unsigned short-size data.
 	 */
-	ch = *(Tcl_UniChar *)src;
 	if (ch && ch < 0x80) {
 	    *dst++ = (ch & 0xFF);
 	} else {
 	    dst += Tcl_UniCharToUtf(ch, dst);
 	}
-	src += sizeof(Tcl_UniChar);
+	src += sizeof(unsigned short);
     }
 
     *srcReadPtr = src - srcStart;
@@ -2403,7 +2412,7 @@ UnicodeToUtfProc(
  *
  * UtfToUnicodeProc --
  *
- *	Convert from UTF-8 to Unicode.
+ *	Convert from UTF-8 to UTF-16.
  *
  * Results:
  *	Returns TCL_OK if conversion was successful.
@@ -2416,8 +2425,7 @@ UnicodeToUtfProc(
 
 static int
 UtfToUnicodeProc(
-    ClientData clientData,	/* TableEncodingData that specifies
-				 * encoding. */
+    ClientData clientData,	/* != NULL means LE, == NUL means BE */
     const char *src,		/* Source string in UTF-8. */
     int srcLen,			/* Source string length in bytes. */
     int flags,			/* Conversion control flags. */
@@ -2444,7 +2452,7 @@ UtfToUnicodeProc(
 {
     const char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd;
     int result, numChars;
-    Tcl_UniChar ch;
+    Tcl_UniChar ch = 0;
 
     srcStart = src;
     srcEnd = src + srcLen;
@@ -2476,27 +2484,37 @@ UtfToUnicodeProc(
 	 * Need to handle this in a way that won't cause misalignment
 	 * by casting dst to a Tcl_UniChar. [Bug 1122671]
 	 */
-#ifdef WORDS_BIGENDIAN
+	if (clientData) {
 #if TCL_UTF_MAX > 4
-	*dst++ = (ch >> 24);
-	*dst++ = ((ch >> 16) & 0xFF);
-	*dst++ = ((ch >> 8) & 0xFF);
-	*dst++ = (ch & 0xFF);
+	    if (ch <= 0xFFFF) {
+		*dst++ = (ch & 0xFF);
+		*dst++ = (ch >> 8);
+	    } else {
+		*dst++ = (((ch - 0x10000) >> 10) & 0xFF);
+		*dst++ = (((ch - 0x10000) >> 18) & 0x3) | 0xD8;
+		*dst++ = (ch & 0xFF);
+		*dst++ = ((ch & 0x3) >> 8) | 0xDC;
+	    }
 #else
-	*dst++ = (ch >> 8);
-	*dst++ = (ch & 0xFF);
+	    *dst++ = (ch & 0xFF);
+	    *dst++ = (ch >> 8);
 #endif
-#else
+	} else {
 #if TCL_UTF_MAX > 4
-	*dst++ = (ch & 0xFF);
-	*dst++ = ((ch >> 8) & 0xFF);
-	*dst++ = ((ch >> 16) & 0xFF);
-	*dst++ = (ch >> 24);
+	    if (ch <= 0xFFFF) {
+		*dst++ = (ch >> 8);
+		*dst++ = (ch & 0xFF);
+	    } else {
+		*dst++ = ((ch & 0x3) >> 8) | 0xDC;
+		*dst++ = (ch & 0xFF);
+		*dst++ = (((ch - 0x10000) >> 18) & 0x3) | 0xD8;
+		*dst++ = (((ch - 0x10000) >> 10) & 0xFF);
+	    }
 #else
-	*dst++ = (ch & 0xFF);
-	*dst++ = (ch >> 8);
-#endif
+	    *dst++ = (ch >> 8);
+	    *dst++ = (ch & 0xFF);
 #endif
+	}
     }
     *srcReadPtr = src - srcStart;
     *dstWrotePtr = dst - dstStart;
@@ -2899,7 +2917,6 @@ Iso88591FromUtfProc(
 		result = TCL_CONVERT_UNKNOWN;
 		break;
 	    }
-
 	    /*
 	     * Plunge on, using '?' as a fallback character.
 	     */
@@ -3387,14 +3404,13 @@ EscapeFromUtfProc(
  *
  * EscapeFreeProc --
  *
- *	This function is invoked when an EscapeEncodingData encoding is
- *	deleted. It deletes the memory used by the encoding.
+ *	Frees resources used by the encoding.
  *
  * Results:
  *	None.
  *
  * Side effects:
- *	Memory freed.
+ *	Memory is freed.
  *
  *---------------------------------------------------------------------------
  */
-- 
cgit v0.12


From 2958d5196de3452ea46a083603d4ce1dc0d05d2a Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Mon, 20 Apr 2020 07:50:22 +0000
Subject: Move the needed apt package in .travis.yml to the top, so they can be
 shared between the images.

---
 .travis.yml | 25 ++++++++++---------------
 1 file changed, 10 insertions(+), 15 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index e10ca7c..5672c0b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,5 +1,15 @@
 sudo: false
 language: c
+addons:
+  apt:
+    packages:
+      - binutils-mingw-w64-i686
+      - binutils-mingw-w64-x86-64
+      - gcc-mingw-w64
+      - gcc-mingw-w64-base
+      - gcc-mingw-w64-i686
+      - gcc-mingw-w64-x86-64
+      - gcc-multilib
 matrix:
   include:
 # Testing on Linux with various compilers
@@ -146,13 +156,6 @@ matrix:
       os: linux
       dist: bionic
       compiler: x86_64-w64-mingw32-gcc
-      addons:
-        apt:
-          packages:
-            - gcc-mingw-w64-base
-            - binutils-mingw-w64-x86-64
-            - gcc-mingw-w64-x86-64
-            - gcc-mingw-w64
       env:
         - BUILD_DIR=win
         - CFGOPT="--host=x86_64-w64-mingw32 --enable-64bit --enable-threads"
@@ -167,14 +170,6 @@ matrix:
       os: linux
       dist: bionic
       compiler: i686-w64-mingw32-gcc
-      addons:
-        apt:
-          packages:
-            - gcc-mingw-w64-base
-            - binutils-mingw-w64-i686
-            - gcc-mingw-w64-i686
-            - gcc-mingw-w64
-            - gcc-multilib
       env:
         - BUILD_DIR=win
         - CFGOPT="--host=i686-w64-mingw32 --enable-threads"
-- 
cgit v0.12


From 58cf4db1ccb0602f9bd023ecc4e56830aea2453a Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Mon, 20 Apr 2020 22:35:39 +0000
Subject: Tie together the TCL_UTF_MAX=4 and TCL_UTF_MAX=6 builds to mean the
 same thing on the 8.5 branch -- use internal UCS-4 storage.

---
 generic/regcustom.h | 2 +-
 generic/tcl.h       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/generic/regcustom.h b/generic/regcustom.h
index 57a2d47..ac33087 100644
--- a/generic/regcustom.h
+++ b/generic/regcustom.h
@@ -97,7 +97,7 @@ typedef int celt;		/* Type to hold chr, or NOCELT */
 #define	NOCELT (-1)		/* Celt value which is not valid chr */
 #define	CHR(c) (UCHAR(c))	/* Turn char literal into chr literal */
 #define	DIGITVAL(c) ((c)-'0')	/* Turn chr digit into its value */
-#if TCL_UTF_MAX > 4
+#if TCL_UTF_MAX > 3
 #define	CHRBITS	32		/* Bits in a chr; must not use sizeof */
 #define	CHR_MIN	0x00000000	/* Smallest and largest chr; the value */
 #define	CHR_MAX	0xffffffff	/* CHR_MAX-CHR_MIN+1 should fit in uchr */
diff --git a/generic/tcl.h b/generic/tcl.h
index 7378a8f..d7d064c 100644
--- a/generic/tcl.h
+++ b/generic/tcl.h
@@ -2148,7 +2148,7 @@ typedef struct Tcl_Parse {
  * reflected in regcustom.h.
  */
 
-#if TCL_UTF_MAX > 4
+#if TCL_UTF_MAX > 3
     /*
      * unsigned int isn't 100% accurate as it should be a strict 4-byte value
      * (perhaps wchar_t). 64-bit systems may have troubles. The size of this
-- 
cgit v0.12


From 4c9bc32c393e100ae9caf7f06e57c798f96ada6d Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Mon, 20 Apr 2020 23:00:30 +0000
Subject: Pair every compat85 test with a fullutf test so that we cover all
 variants.

---
 tests/utf.test | 137 ++++++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 107 insertions(+), 30 deletions(-)

diff --git a/tests/utf.test b/tests/utf.test
index 1ca3647..1c79f32 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -13,7 +13,9 @@ if {[lsearch [namespace children] ::tcltest] == -1} {
     namespace import -force ::tcltest::*
 }
 
-testConstraint compat85 [expr {[format %c 0x010000] == "\uFFFD"}]
+testConstraint compat85 [expr {[format %c 0x010000] eq "\uFFFD"}]
+testConstraint fullutf  [expr {[format %c 0x010000] ne "\uFFFD"}]
+
 testConstraint testbytestring [llength [info commands testbytestring]]
 testConstraint testfindfirst [llength [info commands testfindfirst]]
 testConstraint testfindlast [llength [info commands testfindlast]]
@@ -64,12 +66,18 @@ test utf-2.6 {Tcl_UtfToUniChar: lead (3-byte) followed by 1 trail} testbytestrin
 test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} testbytestring {
     string length [testbytestring "\xE4\xB9\x8E"]
 } {1}
-test utf-2.8 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring compat85} -body {
+test utf-2.8.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring compat85} -body {
     string length [testbytestring "\xF0\x90\x80\x80"]
 } -result {4}
-test utf-2.9 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring compat85} -body {
+test utf-2.8.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring fullutf} -body {
+    string length [testbytestring "\xF0\x90\x80\x80"]
+} -result {1}
+test utf-2.9.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring compat85} -body {
     string length [testbytestring "\xF4\x8F\xBF\xBF"]
 } -result {4}
+test utf-2.9.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {testbytestring fullutf} -body {
+    string length [testbytestring "\xF4\x8F\xBF\xBF"]
+} -result {1}
 test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} testbytestring {
     string length [testbytestring "\xF0\x8F\xBF\xBF"]
 } {4}
@@ -118,9 +126,12 @@ test utf-4.10 {Tcl_NumUtfChars: #u0000, calc len, overcomplete} {testnumutfchars
 test utf-4.11 {Tcl_NumUtfChars: 3 bytes of 4-byte UTF-8 characater} {testnumutfchars testbytestring} {
     testnumutfchars [testbytestring \xF0\x9F\x92\xA9] 3
 } {3}
-test utf-4.12 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring compat85} {
+test utf-4.12.0 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring compat85} {
     testnumutfchars [testbytestring \xF0\x9F\x92\xA9] 4
 } {4}
+test utf-4.12.1 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring fullutf} {
+    testnumutfchars [testbytestring \xF0\x9F\x92\xA9] 4
+} {1}
 
 test utf-5.1 {Tcl_UtfFindFirst} {testfindfirst testbytestring} {
     testfindfirst [testbytestring "abcbc"] 98
@@ -335,9 +346,12 @@ test utf-6.67 {Tcl_UtfNext} testutfnext {
 test utf-6.68 {Tcl_UtfNext} testutfnext {
     testutfnext \xF2\xA0\xA0G
 } 1
-test utf-6.69 {Tcl_UtfNext} {testutfnext compat85} {
+test utf-6.69.0 {Tcl_UtfNext} {testutfnext compat85} {
     testutfnext \xF2\xA0\xA0\xA0
 } 1
+test utf-6.69.1 {Tcl_UtfNext} {testutfnext fullutf} {
+    testutfnext \xF2\xA0\xA0\xA0
+} 4
 test utf-6.70 {Tcl_UtfNext} testutfnext {
     testutfnext \xF2\xA0\xA0\xD0
 } 1
@@ -350,24 +364,42 @@ test utf-6.71 {Tcl_UtfNext} testutfnext {
 test utf-6.73 {Tcl_UtfNext} testutfnext {
     testutfnext \xF2\xA0\xA0\xF8
 } 1
-test utf-6.74 {Tcl_UtfNext} {testutfnext compat85} {
+test utf-6.74.0 {Tcl_UtfNext} {testutfnext compat85} {
     testutfnext \xF2\xA0\xA0\xA0G
 } 1
-test utf-6.75 {Tcl_UtfNext} {testutfnext compat85} {
+test utf-6.74.1 {Tcl_UtfNext} {testutfnext fullutf} {
+    testutfnext \xF2\xA0\xA0\xA0G
+} 4
+test utf-6.75.0 {Tcl_UtfNext} {testutfnext compat85} {
     testutfnext \xF2\xA0\xA0\xA0\xA0
 } 1
-test utf-6.76 {Tcl_UtfNext} {testutfnext compat85} {
+test utf-6.75.1 {Tcl_UtfNext} {testutfnext fullutf} {
+    testutfnext \xF2\xA0\xA0\xA0\xA0
+} 4
+test utf-6.76.0 {Tcl_UtfNext} {testutfnext compat85} {
     testutfnext \xF2\xA0\xA0\xA0\xD0
 } 1
-test utf-6.77 {Tcl_UtfNext} {testutfnext compat85} {
+test utf-6.76.1 {Tcl_UtfNext} {testutfnext fullutf} {
+    testutfnext \xF2\xA0\xA0\xA0\xD0
+} 4
+test utf-6.77.0 {Tcl_UtfNext} {testutfnext compat85} {
     testutfnext \xF2\xA0\xA0\xA0\xE8
 } 1
-test utf-6.78 {Tcl_UtfNext} {testutfnext compat85} {
+test utf-6.77.1 {Tcl_UtfNext} {testutfnext fullutf} {
+    testutfnext \xF2\xA0\xA0\xA0\xE8
+} 4
+test utf-6.78.0 {Tcl_UtfNext} {testutfnext compat85} {
     testutfnext \xF2\xA0\xA0\xA0\xF2
 } 1
-test utf-6.79 {Tcl_UtfNext} {testutfnext compat85} {
+test utf-6.78.1 {Tcl_UtfNext} {testutfnext fullutf} {
+    testutfnext \xF2\xA0\xA0\xA0\xF2
+} 4
+test utf-6.79.0 {Tcl_UtfNext} {testutfnext compat85} {
     testutfnext \xF2\xA0\xA0\xA0G\xF8
 } 1
+test utf-6.79.1 {Tcl_UtfNext} {testutfnext fullutf} {
+    testutfnext \xF2\xA0\xA0\xA0G\xF8
+} 4
 test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext {
     testutfnext \xC0\x80
 } 2
@@ -389,9 +421,12 @@ test utf-6.85 {Tcl_UtfNext - overlong sequences} testutfnext {
 test utf-6.86 {Tcl_UtfNext - overlong sequences} testutfnext {
     testutfnext \xF0\x80\x80\x80
 } 1
-test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext compat85} {
+test utf-6.87.0 {Tcl_UtfNext - overlong sequences} {testutfnext compat85} {
     testutfnext \xF0\x90\x80\x80
 } 1
+test utf-6.87.1 {Tcl_UtfNext - overlong sequences} {testutfnext fullutf} {
+    testutfnext \xF0\x90\x80\x80
+} 4
 test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} {
     testutfnext \xA0\xA0
 } 1
@@ -404,9 +439,12 @@ test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {te
 test utf-6.89.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} {
     testutfnext \xF0\x80\x80 1
 } 2
-test utf-6.90 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext compat85} {
+test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext compat85} {
     testutfnext \xF4\x8F\xBF\xBF
 } 1
+test utf-6.90.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext fullutf} {
+    testutfnext \xF4\x8F\xBF\xBF
+} 4
 test utf-6.91 {Tcl_UtfNext, validity check [493dccc2de]} testutfnext {
     testutfnext \xF4\x90\x80\x80
 } 1
@@ -474,15 +512,24 @@ test utf-7.9.1 {Tcl_UtfPrev} testutfprev {
 test utf-7.9.2 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xF8\xA0\xF8\xA0 3
 } 2
-test utf-7.10 {Tcl_UtfPrev} {testutfprev compat85} {
+test utf-7.10.0 {Tcl_UtfPrev} {testutfprev compat85} {
     testutfprev A\xF2\xA0
 } 2
-test utf-7.10.1 {Tcl_UtfPrev} {testutfprev compat85} {
+test utf-7.10.1 {Tcl_UtfPrev} {testutfprev fullutf} {
+    testutfprev A\xF2\xA0
+} 1
+test utf-7.10.1.0 {Tcl_UtfPrev} {testutfprev compat85} {
     testutfprev A\xF2\xA0\xA0\xA0 3
 } 2
-test utf-7.10.2 {Tcl_UtfPrev} {testutfprev compat85} {
+test utf-7.10.1.1 {Tcl_UtfPrev} {testutfprev fullutf} {
+    testutfprev A\xF2\xA0\xA0\xA0 3
+} 1
+test utf-7.10.2.0 {Tcl_UtfPrev} {testutfprev compat85} {
     testutfprev A\xF2\xA0\xF8\xA0 3
 } 2
+test utf-7.10.2.1 {Tcl_UtfPrev} {testutfprev fullutf} {
+    testutfprev A\xF2\xA0\xF8\xA0 3
+} 1
 test utf-7.11 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xE8\xA0
 } 1
@@ -522,15 +569,24 @@ test utf-7.14.1 {Tcl_UtfPrev} testutfprev {
 test utf-7.14.2 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xF8\xA0\xA0\xF8 4
 } 3
-test utf-7.15 {Tcl_UtfPrev} {testutfprev compat85} {
+test utf-7.15.0 {Tcl_UtfPrev} {testutfprev compat85} {
     testutfprev A\xF2\xA0\xA0
 } 3
-test utf-7.15.1 {Tcl_UtfPrev} {testutfprev compat85} {
+test utf-7.15.1 {Tcl_UtfPrev} {testutfprev fullutf} {
+    testutfprev A\xF2\xA0\xA0
+} 1
+test utf-7.15.1.0 {Tcl_UtfPrev} {testutfprev compat85} {
     testutfprev A\xF2\xA0\xA0\xA0 4
 } 3
-test utf-7.15.2 {Tcl_UtfPrev} {testutfprev compat85} {
+test utf-7.15.1.1 {Tcl_UtfPrev} {testutfprev fullutf} {
+    testutfprev A\xF2\xA0\xA0\xA0 4
+} 1
+test utf-7.15.2.0 {Tcl_UtfPrev} {testutfprev compat85} {
     testutfprev A\xF2\xA0\xA0\xF8 4
 } 3
+test utf-7.15.2.1 {Tcl_UtfPrev} {testutfprev fullutf} {
+    testutfprev A\xF2\xA0\xA0\xF8 4
+} 1
 test utf-7.16 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xE8\xA0\xA0
 } 1
@@ -561,9 +617,12 @@ test utf-7.18.2 {Tcl_UtfPrev} testutfprev {
 test utf-7.19 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xF8\xA0\xA0\xA0
 } 4
-test utf-7.20 {Tcl_UtfPrev} {testutfprev compat85} {
+test utf-7.20.0 {Tcl_UtfPrev} {testutfprev compat85} {
     testutfprev A\xF2\xA0\xA0\xA0
 } 4
+test utf-7.20.1 {Tcl_UtfPrev} {testutfprev fullutf} {
+    testutfprev A\xF2\xA0\xA0\xA0
+} 1
 test utf-7.21 {Tcl_UtfPrev} testutfprev {
     testutfprev A\xE8\xA0\xA0\xA0
 } 4
@@ -624,15 +683,24 @@ test utf-7.37 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
 test utf-7.38 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
     testutfprev A\xE0\xA0\x80 2
 } 1
-test utf-7.39 {Tcl_UtfPrev -- overlong sequence}  {testutfprev compat85} {
+test utf-7.39.0 {Tcl_UtfPrev -- overlong sequence}  {testutfprev compat85} {
     testutfprev A\xF0\x90\x80\x80
 } 4
-test utf-7.40 {Tcl_UtfPrev -- overlong sequence}  {testutfprev compat85} {
+test utf-7.39.1 {Tcl_UtfPrev -- overlong sequence}  {testutfprev fullutf} {
+    testutfprev A\xF0\x90\x80\x80
+} 1
+test utf-7.40.0 {Tcl_UtfPrev -- overlong sequence}  {testutfprev compat85} {
     testutfprev A\xF0\x90\x80\x80 4
 } 3
-test utf-7.41 {Tcl_UtfPrev -- overlong sequence}  {testutfprev compat85} {
+test utf-7.40.1 {Tcl_UtfPrev -- overlong sequence}  {testutfprev fullutf} {
+    testutfprev A\xF0\x90\x80\x80 4
+} 1
+test utf-7.41.0 {Tcl_UtfPrev -- overlong sequence}  {testutfprev compat85} {
     testutfprev A\xF0\x90\x80\x80 3
 } 2
+test utf-7.41.1 {Tcl_UtfPrev -- overlong sequence}  {testutfprev fullutf} {
+    testutfprev A\xF0\x90\x80\x80 3
+} 1
 test utf-7.42 {Tcl_UtfPrev -- overlong sequence}  testutfprev {
     testutfprev A\xF0\x90\x80\x80 2
 } 1
@@ -657,15 +725,24 @@ test utf-7.47.1 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {te
 test utf-7.47.2 {Tcl_UtfPrev, pointing to 3th byte of 3-byte invalid sequence} {testutfprev} {
     testutfprev \xE8\xA0\x00 2
 } 0
-test utf-7.48 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev compat85} {
+test utf-7.48.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev compat85} {
     testutfprev A\xF4\x8F\xBF\xBF
 } 4
-test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev compat85} {
+test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} {
+    testutfprev A\xF4\x8F\xBF\xBF
+} 1
+test utf-7.48.1.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev compat85} {
     testutfprev A\xF4\x8F\xBF\xBF 4
 } 3
-test utf-7.48.2 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev compat85} {
+test utf-7.48.1.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} {
+    testutfprev A\xF4\x8F\xBF\xBF 4
+} 1
+test utf-7.48.2.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev compat85} {
     testutfprev A\xF4\x8F\xBF\xBF 3
 } 2
+test utf-7.48.2.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} {
+    testutfprev A\xF4\x8F\xBF\xBF 3
+} 1
 test utf-7.48.3 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev {
     testutfprev A\xF4\x8F\xBF\xBF 2
 } 1
@@ -700,10 +777,10 @@ test utf-8.5 {Tcl_UniCharAtIndex: high surrogate} {
 test utf-8.6 {Tcl_UniCharAtIndex: low surrogate} {
     string index \uDC42 0
 } "\uDC42"
-test utf-8.7 {Tcl_UniCharAtIndex: Emoji} compat85 {
+test utf-8.7 {Tcl_UniCharAtIndex: Emoji} {
     string index \uD83D\uDE00 0
 } "\uD83D"
-test utf-8.8 {Tcl_UniCharAtIndex: Emoji} compat85 {
+test utf-8.8 {Tcl_UniCharAtIndex: Emoji} {
     string index \uD83D\uDE00 1
 } "\uDE00"
 
@@ -713,10 +790,10 @@ test utf-9.1 {Tcl_UtfAtIndex: index = 0} {
 test utf-9.2 {Tcl_UtfAtIndex: index > 0} {
     string range \u4E4E\u25A\xFF\u543klmnop 1 5
 } "\u25A\xFF\u543kl"
-test utf-9.3 {Tcl_UtfAtIndex: index = 0, Emoji} compat85 {
+test utf-9.3 {Tcl_UtfAtIndex: index = 0, Emoji} {
     string range \uD83D\uDE00G 0 0
 } "\uD83D"
-test utf-9.4 {Tcl_UtfAtIndex: index > 0, Emoji} compat85 {
+test utf-9.4 {Tcl_UtfAtIndex: index > 0, Emoji} {
     string range \uD83D\uDE00G 1 1
 } "\uDE00"
 
-- 
cgit v0.12


From d8bc590eef94f9e9ec24150cf8208f38638290ef Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Tue, 21 Apr 2020 02:50:23 +0000
Subject: Revert the backport to tclEncoding.c that seems to redefine the
 "unicode" encoding to mean UTF-16. Don't want that behavior change in 8.5.

---
 generic/tclEncoding.c | 240 +++++++++++++++++++++++---------------------------
 1 file changed, 112 insertions(+), 128 deletions(-)

diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index da03055..5a9d2d5 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -83,7 +83,7 @@ typedef struct TableEncodingData {
 } TableEncodingData;
 
 /*
- * Each of the following structures is the clientData for a dynamically-loaded
+ * The following structures is the clientData for a dynamically-loaded,
  * escape-driven encoding that is itself comprised of other simpler encodings.
  * An example is "iso-2022-jp", which uses escape sequences to switch between
  * ascii, jis0208, jis0212, gb2312, and ksc5601. Note that "escape-driven"
@@ -117,8 +117,8 @@ typedef struct EscapeEncodingData {
 				 * 0. */
     int numSubTables;		/* Length of following array. */
     EscapeSubTable subTables[1];/* Information about each EscapeSubTable used
-				 * by this encoding type. The actual size is
-				 * as large as necessary to hold all
+				 * by this encoding type. The actual size will
+				 * be as large as necessary to hold all
 				 * EscapeSubTables. */
 } EscapeEncodingData;
 
@@ -156,7 +156,7 @@ static ProcessGlobalValue encodingFileMap = {
  * A list of directories making up the "library path". Historically this
  * search path has served many uses, but the only one remaining is a base for
  * the encodingSearchPath above. If the application does not explicitly set
- * the encodingSearchPath, then it is initialized by appending /encoding
+ * the encodingSearchPath, then it will be initialized by appending /encoding
  * to each directory in this "libraryPath".
  */
 
@@ -177,7 +177,7 @@ TCL_DECLARE_MUTEX(encodingMutex)
 /*
  * The following are used to hold the default and current system encodings.
  * If NULL is passed to one of the conversion routines, the current setting of
- * the system encoding is used to perform the conversion.
+ * the system encoding will be used to perform the conversion.
  */
 
 static Tcl_Encoding defaultEncoding;
@@ -429,8 +429,9 @@ TclGetLibraryPath(void)
  *	Keeps the per-thread copy of the library path current with changes to
  *	the global copy.
  *
- *	Since the result of this routine is void, if searchPath is not a valid
- *	list this routine silently does nothing.
+ *	NOTE: this routine returns void, so there's no way to report the error
+ *	that searchPath is not a valid list. In that case, this routine will
+ *	silently do nothing.
  *
  *----------------------------------------------------------------------
  */
@@ -452,16 +453,17 @@ TclSetLibraryPath(
  *
  * FillEncodingFileMap --
  *
- *	Called to update the encoding file map with the current value
- *	of the encoding search path.
+ * 	Called to bring the encoding file map in sync with the current value
+ * 	of the encoding search path.
  *
- *	Finds *.end files in the directories on the encoding search path and
- *	stores the found pathnames in a map associated with the encoding name.
+ *	Scan the directories on the encoding search path, find the *.enc
+ *	files, and store the found pathnames in a map associated with the
+ *	encoding name.
  *
- *	If $dir is on the encoding search path and the file $dir/foo.enc is
- *	found, stores a "foo" -> $dir entry in the map.  if the "foo" encoding
- *	is needed later, the $dir/foo.enc name can be quickly constructed in
- *	order to read the encoding data.
+ *	In particular, if $dir is on the encoding search path, and the file
+ *	$dir/foo.enc is found, then store a "foo" -> $dir entry in the map.
+ *	Later, any need for the "foo" encoding will quickly * be able to
+ *	construct the $dir/foo.enc pathname for reading the encoding data.
  *
  * Results:
  *	None.
@@ -542,24 +544,19 @@ void
 TclInitEncodingSubsystem(void)
 {
     Tcl_EncodingType type;
-    union {
-        char c;
-        short s;
-    } isLe;
 
     if (encodingsInitialized) {
 	return;
     }
 
-    isLe.s = 1;
     Tcl_MutexLock(&encodingMutex);
     Tcl_InitHashTable(&encodingTable, TCL_STRING_KEYS);
     Tcl_MutexUnlock(&encodingMutex);
 
     /*
-     * Create a few initial encodings.  UTF-8 to UTF-8 translation is not a
-     * no-op because it turns a stream of improperly formed UTF-8 into a
-     * properly formed stream.
+     * Create a few initial encodings. Note that the UTF-8 to UTF-8
+     * translation is not a no-op, because it will turn a stream of improperly
+     * formed UTF-8 into a properly formed stream.
      */
 
     type.encodingName	= "identity";
@@ -586,7 +583,7 @@ TclInitEncodingSubsystem(void)
     type.fromUtfProc    = UtfToUnicodeProc;
     type.freeProc	= NULL;
     type.nullSize	= 2;
-    type.clientData	= INT2PTR(isLe.c);
+    type.clientData	= NULL;
     Tcl_CreateEncoding(&type);
 
     /*
@@ -758,7 +755,11 @@ Tcl_SetDefaultEncodingDir(
  *	interp was NULL.
  *
  * Side effects:
- *	LoadEncodingFile is called if necessary.
+ *	The new encoding type is entered into a table visible to all
+ *	interpreters, keyed off the encoding's name. For each call to this
+ *	function, there should eventually be a call to Tcl_FreeEncoding, so
+ *	that the database can be cleaned up when encodings aren't needed
+ *	anymore.
  *
  *-------------------------------------------------------------------------
  */
@@ -796,15 +797,15 @@ Tcl_GetEncoding(
  *
  * Tcl_FreeEncoding --
  *
- *	Releases an encoding allocated by Tcl_CreateEncoding() or
- *	Tcl_GetEncoding().
+ *	This function is called to release an encoding allocated by
+ *	Tcl_CreateEncoding() or Tcl_GetEncoding().
  *
  * Results:
  *	None.
  *
  * Side effects:
  *	The reference count associated with the encoding is decremented and
- *	the encoding is deleted if nothing is using it anymore.
+ *	the encoding may be deleted if nothing is using it anymore.
  *
  *---------------------------------------------------------------------------
  */
@@ -823,14 +824,13 @@ Tcl_FreeEncoding(
  *
  * FreeEncoding --
  *
- *	Decrements the reference count of an encoding.  The caller must hold
- *	encodingMutes.
+ *	This function is called to release an encoding by functions that
+ *	already have the encodingMutex.
  *
  * Results:
  *	None.
  *
  * Side effects:
- *	Releases the resource for an encoding if it is now unused.
  *	The reference count associated with the encoding is decremented and
  *	the encoding may be deleted if nothing is using it anymore.
  *
@@ -850,17 +850,16 @@ FreeEncoding(
     if (encodingPtr->refCount<=0) {
 	Tcl_Panic("FreeEncoding: refcount problem !!!");
     }
-    if (encodingPtr->refCount-- <= 1) {
+    encodingPtr->refCount--;
+    if (encodingPtr->refCount == 0) {
 	if (encodingPtr->freeProc != NULL) {
 	    (*encodingPtr->freeProc)(encodingPtr->clientData);
 	}
 	if (encodingPtr->hPtr != NULL) {
 	    Tcl_DeleteHashEntry(encodingPtr->hPtr);
 	}
-	if (encodingPtr->name) {
-	    ckfree((char *)encodingPtr->name);
-	}
-	ckfree((char *)encodingPtr);
+	ckfree((char *) encodingPtr->name);
+	ckfree((char *) encodingPtr);
     }
 }
 
@@ -1021,22 +1020,23 @@ Tcl_SetSystemEncoding(
  *
  * Tcl_CreateEncoding --
  *
- *	Defines a new encoding, along with the functions that are used to
- *	convert to and from Unicode.
+ *	This function is called to define a new encoding and the functions
+ *	that are used to convert between the specified encoding and Unicode.
  *
  * Results:
  *	Returns a token that represents the encoding. If an encoding with the
  *	same name already existed, the old encoding token remains valid and
- *	continues to behave as it used to, and is eventually garbage collected
- *	when the last reference to it goes away. Any subsequent calls to
- *	Tcl_GetEncoding with the specified name retrieve the most recent
- *	encoding token.
+ *	continues to behave as it used to, and will eventually be garbage
+ *	collected when the last reference to it goes away. Any subsequent
+ *	calls to Tcl_GetEncoding with the specified name will retrieve the
+ *	most recent encoding token.
  *
  * Side effects:
- *	A new record having the name of the encoding is entered into a table of
- *	encodings visible to all interpreters.  For each call to this function,
- *	there should eventually be a call to Tcl_FreeEncoding, which cleans
- *	deletes the record in the table when an encoding is no longer needed.
+ *	The new encoding type is entered into a table visible to all
+ *	interpreters, keyed off the encoding's name. For each call to this
+ *	function, there should eventually be a call to Tcl_FreeEncoding, so
+ *	that the database can be cleaned up when encodings aren't needed
+ *	anymore.
  *
  *---------------------------------------------------------------------------
  */
@@ -1258,9 +1258,10 @@ Tcl_ExternalToUtf(
  *
  * Tcl_UtfToExternalDString --
  *
- *	Convert a source buffer from UTF-8 to the specified encoding. If any
+ *	Convert a source buffer from UTF-8 into the specified encoding. If any
  *	of the bytes in the source buffer are invalid or cannot be represented
- *	in the target encoding, a default fallback character is substituted.
+ *	in the target encoding, a default fallback character will be
+ *	substituted.
  *
  * Results:
  *	The converted bytes are stored in the DString, which is then NULL
@@ -1569,13 +1570,13 @@ OpenEncodingFileChannel(
  *	the data.
  *
  * Results:
- *	The return value is the newly loaded Tcl_Encoding or NULL if the file
- *	didn't exist or could not be processed. If NULL is returned and interp
- *	is not NULL, an error message is left in interp's result object.
+ *	The return value is the newly loaded Encoding, or NULL if the file
+ *	didn't exist of was in the incorrect format. If NULL was returned, an
+ *	error message is left in interp's result object, unless interp was
+ *	NULL.
  *
  * Side effects:
- *	A corresponding encoding file might be read from persistent storage, in
- *	which case LoadTableEncoding is called.
+ *	File read from disk.
  *
  *---------------------------------------------------------------------------
  */
@@ -1583,8 +1584,8 @@ OpenEncodingFileChannel(
 static Tcl_Encoding
 LoadEncodingFile(
     Tcl_Interp *interp,		/* Interp for error reporting, if not NULL. */
-    const char *name)		/* The name of both the encoding file
-				 * and the new encoding. */
+    const char *name)		/* The name of the encoding file on disk and
+				 * also the name for new encoding. */
 {
     Tcl_Channel chan = NULL;
     Tcl_Encoding encoding = NULL;
@@ -1636,27 +1637,27 @@ LoadEncodingFile(
  *
  * LoadTableEncoding --
  *
- *	Helper function for LoadEncodingFile().  Creates a Tcl_EncodingType
- *	structure along with its corresponding TableEncodingData structure, and
- *	passes it to Tcl_Createncoding.
+ *	Helper function for LoadEncodingTable(). Loads a table to that
+ *	converts between Unicode and some other encoding and creates an
+ *	encoding (using a TableEncoding structure) from that information.
  *
- *	The file contains binary data but begins with a marker to indicate
- *	byte-ordering so a single binary file can be read on big or
- *	little-endian systems.
+ *	File contains binary data, but begins with a marker to indicate
+ *	byte-ordering, so that same binary file can be read on either endian
+ *	platforms.
  *
  * Results:
- *	Returns the new Tcl_Encoding,  or NULL if it could could
- *	not be created because the file contained invalid data.
+ *	The return value is the new encoding, or NULL if the encoding could
+ *	not be created (because the file contained invalid data).
  *
  * Side effects:
- *	See Tcl_CreateEncoding().
+ *	None.
  *
  *-------------------------------------------------------------------------
  */
 
 static Tcl_Encoding
 LoadTableEncoding(
-    const char *name,		/* Name of the new encoding. */
+    const char *name,		/* Name for new encoding. */
     int type,			/* Type of encoding (ENCODING_?????). */
     Tcl_Channel chan)		/* File containing new encoding. */
 {
@@ -1768,10 +1769,10 @@ LoadTableEncoding(
     }
 
     /*
-     * Invert the toUnicode array to produce the fromUnicode array. Performs a
+     * Invert toUnicode array to produce the fromUnicode array. Performs a
      * single malloc to get the memory for the array and all the pages needed
-     * by the array. While reading in the toUnicode array remember what
-     * pages are needed for the fromUnicode array.
+     * by the array. While reading in the toUnicode array, we remembered what
+     * pages that would be needed for the fromUnicode array.
      */
 
     if (symbol) {
@@ -1813,8 +1814,8 @@ LoadTableEncoding(
     if (type == ENCODING_MULTIBYTE) {
 	/*
 	 * If multibyte encodings don't have a backslash character, define
-	 * one. Otherwise, on Windows, native file names don't work because
-	 * the backslash in the file name maps to the unknown character
+	 * one. Otherwise, on Windows, native file names won't work because
+	 * the backslash in the file name will map to the unknown character
 	 * (question mark) when converting from UTF-8 to external encoding.
 	 */
 
@@ -1828,13 +1829,13 @@ LoadTableEncoding(
 	unsigned short *page;
 
 	/*
-	 * Make a special symbol encoding that maps each symbol character from
-	 * its Unicode code point down into page 0, and also ensure that each
-	 * characters on page 0 maps to itself so that a symbol font can be
-	 * used to display a simple string like "abcd" and have alpha, beta,
-	 * chi, delta show up, rather than have "unknown" chars show up because
-	 * strictly speaking the symbol font doesn't have glyphs for those low
-	 * ASCII chars.
+	 * Make a special symbol encoding that not only maps the symbol
+	 * characters from their Unicode code points down into page 0, but
+	 * also ensure that the characters on page 0 map to themselves. This
+	 * is so that a symbol font can be used to display a simple string
+	 * like "abcd" and have alpha, beta, chi, delta show up, rather than
+	 * have "unknown" chars show up because strictly speaking the symbol
+	 * font doesn't have glyphs for those low ascii chars.
 	 */
 
 	page = dataPtr->fromUnicode[0];
@@ -1938,7 +1939,7 @@ LoadTableEncoding(
 
 static Tcl_Encoding
 LoadEscapeEncoding(
-    const char *name,		/* Name of the new encoding. */
+    const char *name,		/* Name for new encoding. */
     Tcl_Channel chan)		/* File containing new encoding. */
 {
     int i;
@@ -2317,7 +2318,7 @@ UtfToUtfProc(
  *
  * UnicodeToUtfProc --
  *
- *	Convert from UTF-16 to UTF-8.
+ *	Convert from Unicode to UTF-8.
  *
  * Results:
  *	Returns TCL_OK if conversion was successful.
@@ -2330,7 +2331,7 @@ UtfToUtfProc(
 
 static int
 UnicodeToUtfProc(
-    ClientData clientData,	/* != NULL means LE, == NUL means BE */
+    ClientData clientData,	/* Not used. */
     const char *src,		/* Source string in Unicode. */
     int srcLen,			/* Source string length in bytes. */
     int flags,			/* Conversion control flags. */
@@ -2358,19 +2359,13 @@ UnicodeToUtfProc(
     const char *srcStart, *srcEnd;
     char *dstEnd, *dstStart;
     int result, numChars;
-    unsigned short ch;
+    Tcl_UniChar ch;
 
     result = TCL_OK;
-
-    /* check alignment with utf-16 (2 == sizeof(UTF-16)) */
-    if ((srcLen % 2) != 0) {
-	result = TCL_CONVERT_MULTIBYTE;
-	srcLen--;
-    }
-    /* If last code point is a high surrogate, we cannot handle that yet */
-    if ((srcLen >= 2) && ((src[srcLen - (clientData?1:2)] & 0xFC) == 0xD8)) {
+    if ((srcLen % sizeof(Tcl_UniChar)) != 0) {
 	result = TCL_CONVERT_MULTIBYTE;
-	srcLen-= 2;
+	srcLen /= sizeof(Tcl_UniChar);
+	srcLen *= sizeof(Tcl_UniChar);
     }
 
     srcStart = src;
@@ -2384,21 +2379,17 @@ UnicodeToUtfProc(
 	    result = TCL_CONVERT_NOSPACE;
 	    break;
 	}
-	if (clientData) {
-	    ch = (src[1] & 0xFF) << 8 | (src[0] & 0xFF);
-	} else {
-	    ch = (src[0] & 0xFF) << 8 | (src[1] & 0xFF);
-	}
 	/*
-	 * Special case for 1-byte utf chars for speed. Make sure we work with
-	 * unsigned short-size data.
+	 * Special case for 1-byte utf chars for speed.  Make sure we
+	 * work with Tcl_UniChar-size data.
 	 */
+	ch = *(Tcl_UniChar *)src;
 	if (ch && ch < 0x80) {
 	    *dst++ = (ch & 0xFF);
 	} else {
 	    dst += Tcl_UniCharToUtf(ch, dst);
 	}
-	src += sizeof(unsigned short);
+	src += sizeof(Tcl_UniChar);
     }
 
     *srcReadPtr = src - srcStart;
@@ -2412,7 +2403,7 @@ UnicodeToUtfProc(
  *
  * UtfToUnicodeProc --
  *
- *	Convert from UTF-8 to UTF-16.
+ *	Convert from UTF-8 to Unicode.
  *
  * Results:
  *	Returns TCL_OK if conversion was successful.
@@ -2425,7 +2416,8 @@ UnicodeToUtfProc(
 
 static int
 UtfToUnicodeProc(
-    ClientData clientData,	/* != NULL means LE, == NUL means BE */
+    ClientData clientData,	/* TableEncodingData that specifies
+				 * encoding. */
     const char *src,		/* Source string in UTF-8. */
     int srcLen,			/* Source string length in bytes. */
     int flags,			/* Conversion control flags. */
@@ -2452,7 +2444,7 @@ UtfToUnicodeProc(
 {
     const char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd;
     int result, numChars;
-    Tcl_UniChar ch = 0;
+    Tcl_UniChar ch;
 
     srcStart = src;
     srcEnd = src + srcLen;
@@ -2484,37 +2476,27 @@ UtfToUnicodeProc(
 	 * Need to handle this in a way that won't cause misalignment
 	 * by casting dst to a Tcl_UniChar. [Bug 1122671]
 	 */
-	if (clientData) {
+#ifdef WORDS_BIGENDIAN
 #if TCL_UTF_MAX > 4
-	    if (ch <= 0xFFFF) {
-		*dst++ = (ch & 0xFF);
-		*dst++ = (ch >> 8);
-	    } else {
-		*dst++ = (((ch - 0x10000) >> 10) & 0xFF);
-		*dst++ = (((ch - 0x10000) >> 18) & 0x3) | 0xD8;
-		*dst++ = (ch & 0xFF);
-		*dst++ = ((ch & 0x3) >> 8) | 0xDC;
-	    }
+	*dst++ = (ch >> 24);
+	*dst++ = ((ch >> 16) & 0xFF);
+	*dst++ = ((ch >> 8) & 0xFF);
+	*dst++ = (ch & 0xFF);
 #else
-	    *dst++ = (ch & 0xFF);
-	    *dst++ = (ch >> 8);
+	*dst++ = (ch >> 8);
+	*dst++ = (ch & 0xFF);
 #endif
-	} else {
+#else
 #if TCL_UTF_MAX > 4
-	    if (ch <= 0xFFFF) {
-		*dst++ = (ch >> 8);
-		*dst++ = (ch & 0xFF);
-	    } else {
-		*dst++ = ((ch & 0x3) >> 8) | 0xDC;
-		*dst++ = (ch & 0xFF);
-		*dst++ = (((ch - 0x10000) >> 18) & 0x3) | 0xD8;
-		*dst++ = (((ch - 0x10000) >> 10) & 0xFF);
-	    }
+	*dst++ = (ch & 0xFF);
+	*dst++ = ((ch >> 8) & 0xFF);
+	*dst++ = ((ch >> 16) & 0xFF);
+	*dst++ = (ch >> 24);
 #else
-	    *dst++ = (ch >> 8);
-	    *dst++ = (ch & 0xFF);
+	*dst++ = (ch & 0xFF);
+	*dst++ = (ch >> 8);
+#endif
 #endif
-	}
     }
     *srcReadPtr = src - srcStart;
     *dstWrotePtr = dst - dstStart;
@@ -2917,6 +2899,7 @@ Iso88591FromUtfProc(
 		result = TCL_CONVERT_UNKNOWN;
 		break;
 	    }
+
 	    /*
 	     * Plunge on, using '?' as a fallback character.
 	     */
@@ -3404,13 +3387,14 @@ EscapeFromUtfProc(
  *
  * EscapeFreeProc --
  *
- *	Frees resources used by the encoding.
+ *	This function is invoked when an EscapeEncodingData encoding is
+ *	deleted. It deletes the memory used by the encoding.
  *
  * Results:
  *	None.
  *
  * Side effects:
- *	Memory is freed.
+ *	Memory freed.
  *
  *---------------------------------------------------------------------------
  */
-- 
cgit v0.12


From 5ccd380c46e3e74f3273ecfa83b0686bca5e8056 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Tue, 21 Apr 2020 02:57:41 +0000
Subject: We've settled on using (TCL_UTF_MAX > 3) to indicate 4-byte
 Tcl_UniChar.

---
 generic/tclEncoding.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 5a9d2d5..66bec44 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -2477,7 +2477,7 @@ UtfToUnicodeProc(
 	 * by casting dst to a Tcl_UniChar. [Bug 1122671]
 	 */
 #ifdef WORDS_BIGENDIAN
-#if TCL_UTF_MAX > 4
+#if TCL_UTF_MAX > 3
 	*dst++ = (ch >> 24);
 	*dst++ = ((ch >> 16) & 0xFF);
 	*dst++ = ((ch >> 8) & 0xFF);
@@ -2487,7 +2487,7 @@ UtfToUnicodeProc(
 	*dst++ = (ch & 0xFF);
 #endif
 #else
-#if TCL_UTF_MAX > 4
+#if TCL_UTF_MAX > 3
 	*dst++ = (ch & 0xFF);
 	*dst++ = ((ch >> 8) & 0xFF);
 	*dst++ = ((ch >> 16) & 0xFF);
-- 
cgit v0.12


From 941ef44a3fce68b1dc81abb397a80f209b2ca982 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Tue, 21 Apr 2020 14:52:48 +0000
Subject: Revert the other encoding system backport.

The blocking and failing tests are illustrations of existing tickets
[1004065] and [1122671], recording that the encoding machinery hardcodes
assumptions in multiple places that sizeof(Tcl_UniChar) == 2.

Closing the segfault bug fix should not be hostage to fixing those old bugs.
---
 generic/tclEncoding.c | 17 ++---------------
 1 file changed, 2 insertions(+), 15 deletions(-)

diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 66bec44..6c16827 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -2470,33 +2470,20 @@ UtfToUnicodeProc(
 	if (dst > dstEnd) {
 	    result = TCL_CONVERT_NOSPACE;
 	    break;
-	}
+        }
 	src += TclUtfToUniChar(src, &ch);
 	/*
 	 * Need to handle this in a way that won't cause misalignment
 	 * by casting dst to a Tcl_UniChar. [Bug 1122671]
+	 * XXX: This hard-codes the assumed size of Tcl_UniChar as 2.
 	 */
 #ifdef WORDS_BIGENDIAN
-#if TCL_UTF_MAX > 3
-	*dst++ = (ch >> 24);
-	*dst++ = ((ch >> 16) & 0xFF);
-	*dst++ = ((ch >> 8) & 0xFF);
-	*dst++ = (ch & 0xFF);
-#else
 	*dst++ = (ch >> 8);
 	*dst++ = (ch & 0xFF);
-#endif
-#else
-#if TCL_UTF_MAX > 3
-	*dst++ = (ch & 0xFF);
-	*dst++ = ((ch >> 8) & 0xFF);
-	*dst++ = ((ch >> 16) & 0xFF);
-	*dst++ = (ch >> 24);
 #else
 	*dst++ = (ch & 0xFF);
 	*dst++ = (ch >> 8);
 #endif
-#endif
     }
     *srcReadPtr = src - srcStart;
     *dstWrotePtr = dst - dstStart;
-- 
cgit v0.12


From d48bca33242b3f10d21a25a6c6a91c27ae707b96 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Tue, 21 Apr 2020 15:41:01 +0000
Subject: Move testing command [testsize] from Windows to generic. Extend it to
 report sizeof(Tcl_UniChar).

---
 generic/tclTest.c | 31 +++++++++++++++++++++++++++++++
 win/tclWinTest.c  | 28 ----------------------------
 2 files changed, 31 insertions(+), 28 deletions(-)

diff --git a/generic/tclTest.c b/generic/tclTest.c
index 8c29aa7..b9fd204 100644
--- a/generic/tclTest.c
+++ b/generic/tclTest.c
@@ -281,6 +281,7 @@ static Tcl_CmdProc	Testset2Cmd;
 static Tcl_CmdProc	TestseterrorcodeCmd;
 static Tcl_ObjCmdProc	TestsetobjerrorcodeCmd;
 static Tcl_CmdProc	TestsetplatformCmd;
+static Tcl_ObjCmdProc	TestsizeCmd;
 static Tcl_CmdProc	TeststaticpkgCmd;
 static Tcl_CmdProc	TesttranslatefilenameCmd;
 static Tcl_CmdProc	TestupvarCmd;
@@ -592,6 +593,7 @@ Tcltest_Init(
 	    TestFindLastCmd, NULL, NULL);
     Tcl_CreateCommand(interp, "testsetplatform", TestsetplatformCmd,
 	    NULL, NULL);
+    Tcl_CreateObjCommand(interp, "testsize", TestsizeCmd, NULL, NULL);
     Tcl_CreateCommand(interp, "teststaticpkg", TeststaticpkgCmd,
 	    NULL, NULL);
     Tcl_CreateCommand(interp, "testtranslatefilename",
@@ -4122,6 +4124,35 @@ TestsetplatformCmd(
     return TCL_OK;
 }
 
+static int
+TestsizeCmd(
+    ClientData clientData,      /* Unused */
+    Tcl_Interp* interp,         /* Tcl interpreter */
+    int objc,                   /* Parameter count */
+    Tcl_Obj *const * objv)      /* Parameter vector */
+{
+    if (objc != 2) {
+        goto syntax;
+    }
+    if (strcmp(Tcl_GetString(objv[1]), "time_t") == 0) {
+        Tcl_SetObjResult(interp, Tcl_NewWideIntObj(sizeof(time_t)));
+        return TCL_OK;
+    }
+    if (strcmp(Tcl_GetString(objv[1]), "st_mtime") == 0) {
+        Tcl_StatBuf *statPtr;
+        Tcl_SetObjResult(interp, Tcl_NewWideIntObj(sizeof(statPtr->st_mtime)));
+        return TCL_OK;
+    }
+    if (strcmp(Tcl_GetString(objv[1]), "unichar") == 0) {
+        Tcl_SetObjResult(interp, Tcl_NewWideIntObj(sizeof(Tcl_UniChar)));
+        return TCL_OK;
+    }
+
+syntax:
+    Tcl_WrongNumArgs(interp, 1, objv, "time_t|st_mtime|unichar");
+    return TCL_ERROR;
+}
+
 /*
  *----------------------------------------------------------------------
  *
diff --git a/win/tclWinTest.c b/win/tclWinTest.c
index 04878fe..7f49b63 100644
--- a/win/tclWinTest.c
+++ b/win/tclWinTest.c
@@ -39,8 +39,6 @@ static int		TestwinclockCmd(ClientData dummy, Tcl_Interp* interp,
 			    int objc, Tcl_Obj *const objv[]);
 static int		TestwinsleepCmd(ClientData dummy, Tcl_Interp* interp,
 			    int objc, Tcl_Obj *const objv[]);
-static int		TestSizeCmd(ClientData dummy, Tcl_Interp* interp,
-			    int objc, Tcl_Obj *const objv[]);
 static Tcl_ObjCmdProc	TestExceptionCmd;
 static int		TestplatformChmod(const char *nativePath, int pmode);
 static int		TestchmodCmd(ClientData dummy,
@@ -78,7 +76,6 @@ TclplatformtestInit(
     Tcl_CreateObjCommand(interp, "testwinclock", TestwinclockCmd, NULL, NULL);
     Tcl_CreateObjCommand(interp, "testwinsleep", TestwinsleepCmd, NULL, NULL);
     Tcl_CreateObjCommand(interp, "testexcept", TestExceptionCmd, NULL, NULL);
-    Tcl_CreateObjCommand(interp, "testsize", TestSizeCmd, NULL, NULL);
     return TCL_OK;
 }
 
@@ -312,31 +309,6 @@ TestwinsleepCmd(
     return TCL_OK;
 }
 
-static int
-TestSizeCmd(
-    ClientData clientData,	/* Unused */
-    Tcl_Interp* interp,		/* Tcl interpreter */
-    int objc,			/* Parameter count */
-    Tcl_Obj *const * objv)	/* Parameter vector */
-{
-    if (objc != 2) {
-	goto syntax;
-    }
-    if (strcmp(Tcl_GetString(objv[1]), "time_t") == 0) {
-	Tcl_SetObjResult(interp, Tcl_NewWideIntObj(sizeof(time_t)));
-	return TCL_OK;
-    }
-    if (strcmp(Tcl_GetString(objv[1]), "st_mtime") == 0) {
-        Tcl_StatBuf *statPtr;
-        Tcl_SetObjResult(interp, Tcl_NewWideIntObj(sizeof(statPtr->st_mtime)));
-        return TCL_OK;
-    }
-
-syntax:
-    Tcl_WrongNumArgs(interp, 1, objv, "time_t|st_mtime");
-    return TCL_ERROR;
-}
-
 /*
  *----------------------------------------------------------------------
  *
-- 
cgit v0.12


From 1588b8475d8a1378e6e9504e10913d756d84983b Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Tue, 21 Apr 2020 15:52:03 +0000
Subject: Use new testing command to constrain tests to (sizeof(Tcl_UniChar) ==
 2) until bugs are fixed when (sizeof(Tcl_UniChar == 4).

---
 tests/chanio.test   | 4 +++-
 tests/encoding.test | 6 +++++-
 tests/io.test       | 5 ++++-
 tests/source.test   | 5 ++++-
 4 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/tests/chanio.test b/tests/chanio.test
index 5fae431..c2f561b 100644
--- a/tests/chanio.test
+++ b/tests/chanio.test
@@ -29,6 +29,8 @@ namespace eval ::tcl::test::io {
     variable msg
     variable expected
 
+    testConstraint ucs2	[expr {	[llength [info commands testsize]] &&
+				([testsize unichar] == 2) }]
     testConstraint testchannel      [llength [info commands testchannel]]
     testConstraint exec             [llength [info commands exec]]
     testConstraint openpipe         1
@@ -875,7 +877,7 @@ test chan-io-6.44 {Tcl_GetsObj: input saw cr, not followed by cr} {stdio testcha
     chan close $f
     set x
 } [list "bbbbbbbbbbbbbbb" 15 "123456789abcdef" 1 4 "abcd" 0 3 "efg"]
-test chan-io-6.45 {Tcl_GetsObj: input saw cr, skip right number of bytes} {stdio testchannel openpipe fileevent} {
+test chan-io-6.45 {Tcl_GetsObj: input saw cr, skip right number of bytes} {stdio testchannel openpipe fileevent ucs2} {
     # Tcl_ExternalToUtf()
 
     set f [open "|[list [interpreter] $path(cat)]" w+]
diff --git a/tests/encoding.test b/tests/encoding.test
index 8722a93..ad55e26 100644
--- a/tests/encoding.test
+++ b/tests/encoding.test
@@ -32,6 +32,10 @@ proc runtests {} {
 testConstraint testencoding [llength [info commands testencoding]]
 testConstraint exec [llength [info commands exec]]
 
+testConstraint ucs2 [expr { [llength [info commands testsize]] &&
+                                ([testsize unichar] == 2) }]
+
+
 # TclInitEncodingSubsystem is tested by the rest of this file
 # TclFinalizeEncodingSubsystem is not currently tested
 
@@ -316,7 +320,7 @@ test encoding-15.3 {UtfToUtfProc null character input} {
     list [string bytelength $x] [string bytelength $y] $z
 } {1 2 c080}
 
-test encoding-16.1 {UnicodeToUtfProc} {
+test encoding-16.1 {UnicodeToUtfProc} ucs2 {
     set val [encoding convertfrom unicode NN]
     list $val [format %x [scan $val %c]]
 } "\u4e4e 4e4e"
diff --git a/tests/io.test b/tests/io.test
index 04fa1d2..1c18576 100644
--- a/tests/io.test
+++ b/tests/io.test
@@ -29,6 +29,9 @@ namespace eval ::tcl::test::io {
     variable msg
     variable expected
 
+testConstraint ucs2 [expr { [llength [info commands testsize]] &&
+                            ([testsize unichar] == 2) }]
+
 testConstraint testchannel      [llength [info commands testchannel]]
 testConstraint exec             [llength [info commands exec]]
 testConstraint openpipe         1
@@ -910,7 +913,7 @@ test io-6.44 {Tcl_GetsObj: input saw cr, not followed by cr} {stdio testchannel
     close $f
     set x
 } [list "bbbbbbbbbbbbbbb" 15 "123456789abcdef" 1 4 "abcd" 0 3 "efg"]
-test io-6.45 {Tcl_GetsObj: input saw cr, skip right number of bytes} {stdio testchannel openpipe fileevent} {
+test io-6.45 {Tcl_GetsObj: input saw cr, skip right number of bytes} {stdio testchannel openpipe fileevent ucs2} {
     # Tcl_ExternalToUtf()
 
     set f [open "|[list [interpreter] $path(cat)]" w+]
diff --git a/tests/source.test b/tests/source.test
index dc3c2d8..8511004 100644
--- a/tests/source.test
+++ b/tests/source.test
@@ -20,6 +20,9 @@ if {[catch {package require tcltest 2.1}]} {
 namespace eval ::tcl::test::source {
     namespace import ::tcltest::*
 
+testConstraint ucs2 [expr { [llength [info commands testsize]] &&
+                                ([testsize unichar] == 2) }]
+
 test source-1.1 {source command} -setup {
     set x "old x value"
     set y "old y value"
@@ -232,7 +235,7 @@ test source-7.1 {source -encoding test} -setup {
 } -cleanup {
     removeFile source.file
 } -result correct
-test source-7.2 {source -encoding test} -setup {
+test source-7.2 {source -encoding test} -constraints ucs2 -setup {
     # This tests for bad interactions between [source -encoding]
     # and use of the Control-Z character (\u001A) as a cross-platform
     # EOF character by [source].  Here we write out and the [source] a
-- 
cgit v0.12


From 4da0e252257e24143039784510363d066545be27 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Tue, 21 Apr 2020 15:56:34 +0000
Subject: remove merge litter

---
 tests/util.test | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/util.test b/tests/util.test
index a483de1..85c06dd 100644
--- a/tests/util.test
+++ b/tests/util.test
@@ -15,7 +15,6 @@ if {[lsearch [namespace children] ::tcltest] == -1} {
 testConstraint testdstring [llength [info commands testdstring]]
 testConstraint testconcatobj [llength [info commands testconcatobj]]
 testConstraint testdoubledigits [llength [info commands testdoubledigits]]
-testConstraint compat85 [expr {[format %c 0x010000] == "\uFFFD"}]
 
 # Big test for correct ordering of data in [expr]
 
-- 
cgit v0.12


From ce76e24a88d8c6c8abfd5da63402691c072e697b Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Tue, 21 Apr 2020 19:45:34 +0000
Subject: Improve the "testutfnext" command. It can now accept both bytes and
 strings, and it will test whether src[-1] is read without needing
 test-variations for it.

---
 generic/tclTest.c |  48 +++++++------
 tests/utf.test    | 208 +++++++++++++++++++++++++++---------------------------
 2 files changed, 131 insertions(+), 125 deletions(-)

diff --git a/generic/tclTest.c b/generic/tclTest.c
index b9fd204..7a531b4 100644
--- a/generic/tclTest.c
+++ b/generic/tclTest.c
@@ -7113,7 +7113,7 @@ SimpleListVolumes(void)
 /*
  * Used to check operations of Tcl_UtfNext.
  *
- * Usage: testutfnext $bytes $offset
+ * Usage: testutfnext -bytestring $bytes
  */
 
 static int
@@ -7123,37 +7123,43 @@ TestUtfNextCmd(
     int objc,
     Tcl_Obj *const objv[])
 {
-    int numBytes, offset = 0;
+    int numBytes;
     char *bytes;
-    const char *result;
-    Tcl_Obj *copy;
+    const char *result, *first;
+    char buffer[32];
+    static const char tobetested[] = "\xFF\xFE\xF4\xF2\xF0\xEF\xE8\xE3\xE2\xE1\xE0\xC2\xC1\xC0\x82";
+    const char *p = tobetested;
+
+    if (objc != 3 || strcmp(Tcl_GetString(objv[1]), "-bytestring")) {
+	if (objc != 2) {
+	    Tcl_WrongNumArgs(interp, 1, objv, "?-bytestring? bytes");
+	    return TCL_ERROR;
+	}
+	bytes = Tcl_GetStringFromObj(objv[1], &numBytes);
+    } else {
+	bytes = (char *) Tcl_GetByteArrayFromObj(objv[2], &numBytes);
+    }
 
-    if (objc < 2 || objc > 3) {
-	Tcl_WrongNumArgs(interp, 1, objv, "bytes ?offset?");
+    if (numBytes > sizeof(buffer)-2) {
+	Tcl_AppendResult(interp, "\"testutfnext\" can only handle 30 bytes", NULL);
 	return TCL_ERROR;
     }
 
-    bytes = (char *) Tcl_GetByteArrayFromObj(objv[1], &numBytes);
+    memcpy(buffer + 1, bytes, numBytes);
+    buffer[0] = buffer[numBytes + 1] = '\x00';
 
-    if (objc == 3) {
-	if (TCL_OK != TclGetIntForIndex(interp, objv[2], numBytes, &offset)) {
+    first = Tcl_UtfNext(buffer + 1);
+    while ((buffer[0] = *p++) != '\0') {
+	/* Run Tcl_UtfNext with many more possible bytes at src[-1], all should give the same result */
+	result = Tcl_UtfNext(buffer + 1);
+	if (first != result) {
+	    Tcl_AppendResult(interp, "Tcl_UtfNext is not supposed to read src[-1]", NULL);
 	    return TCL_ERROR;
 	}
-	if (offset < 0) {
-	    offset = 0;
-	}
-	if (offset > numBytes) {
-	    offset = numBytes;
-	}
     }
-    copy = Tcl_DuplicateObj(objv[1]);
-    bytes = (char *) Tcl_SetByteArrayLength(copy, numBytes+1);
-    bytes[numBytes] = '\0';
 
-    result = Tcl_UtfNext(bytes + offset);
-    Tcl_SetObjResult(interp, Tcl_NewIntObj(result - bytes));
+    Tcl_SetObjResult(interp, Tcl_NewIntObj(result - buffer - 1));
 
-    Tcl_DecrRefCount(copy);
     return TCL_OK;
 }
 /*
diff --git a/tests/utf.test b/tests/utf.test
index 1c79f32..0a81ae3 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -143,7 +143,7 @@ test utf-5.2 {Tcl_UtfFindLast} {testfindlast testbytestring} {
 test utf-6.1 {Tcl_UtfNext} testutfnext {
     # This takes the pointer one past the terminating NUL.
     # This is really an invalid call.
-    testutfnext {}
+    testutfnext -bytestring {}
 } 1
 test utf-6.2 {Tcl_UtfNext} testutfnext {
     testutfnext A
@@ -152,301 +152,301 @@ test utf-6.3 {Tcl_UtfNext} testutfnext {
     testutfnext AA
 } 1
 test utf-6.4 {Tcl_UtfNext} testutfnext {
-    testutfnext A\xA0
+    testutfnext -bytestring A\xA0
 } 1
 test utf-6.5 {Tcl_UtfNext} testutfnext {
-    testutfnext A\xD0
+    testutfnext -bytestring A\xD0
 } 1
 test utf-6.6 {Tcl_UtfNext} testutfnext {
-    testutfnext A\xE8
+    testutfnext -bytestring A\xE8
 } 1
 test utf-6.7 {Tcl_UtfNext} testutfnext {
-    testutfnext A\xF2
+    testutfnext -bytestring A\xF2
 } 1
 test utf-6.8 {Tcl_UtfNext} testutfnext {
-    testutfnext A\xF8
+    testutfnext -bytestring A\xF8
 } 1
 test utf-6.9 {Tcl_UtfNext} testutfnext {
-    testutfnext \xA0
+    testutfnext -bytestring \xA0
 } 1
 test utf-6.10 {Tcl_UtfNext} testutfnext {
-    testutfnext \xA0G
+    testutfnext -bytestring \xA0G
 } 1
 test utf-6.11 {Tcl_UtfNext} testutfnext {
-    testutfnext \xA0\xA0
+    testutfnext -bytestring \xA0\xA0
 } 1
 test utf-6.12 {Tcl_UtfNext} testutfnext {
-    testutfnext \xA0\xD0
+    testutfnext -bytestring \xA0\xD0
 } 1
 test utf-6.13 {Tcl_UtfNext} testutfnext {
-    testutfnext \xA0\xE8
+    testutfnext -bytestring \xA0\xE8
 } 1
 test utf-6.14 {Tcl_UtfNext} testutfnext {
-    testutfnext \xA0\xF2
+    testutfnext -bytestring \xA0\xF2
 } 1
 test utf-6.15 {Tcl_UtfNext} testutfnext {
-    testutfnext \xA0\xF8
+    testutfnext -bytestring \xA0\xF8
 } 1
 test utf-6.16 {Tcl_UtfNext} testutfnext {
-    testutfnext \xD0
+    testutfnext -bytestring \xD0
 } 1
 test utf-6.17 {Tcl_UtfNext} testutfnext {
-    testutfnext \xD0G
+    testutfnext -bytestring \xD0G
 } 1
 test utf-6.18 {Tcl_UtfNext} testutfnext {
-    testutfnext \xD0\xA0
+    testutfnext -bytestring \xD0\xA0
 } 2
 test utf-6.19 {Tcl_UtfNext} testutfnext {
-    testutfnext \xD0\xD0
+    testutfnext -bytestring \xD0\xD0
 } 1
 test utf-6.20 {Tcl_UtfNext} testutfnext {
-    testutfnext \xD0\xE8
+    testutfnext -bytestring \xD0\xE8
 } 1
 test utf-6.21 {Tcl_UtfNext} testutfnext {
-    testutfnext \xD0\xF2
+    testutfnext -bytestring \xD0\xF2
 } 1
 test utf-6.22 {Tcl_UtfNext} testutfnext {
-    testutfnext \xD0\xF8
+    testutfnext -bytestring \xD0\xF8
 } 1
 test utf-6.23 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8
+    testutfnext -bytestring \xE8
 } 1
 test utf-6.24 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8G
+    testutfnext -bytestring \xE8G
 } 1
 test utf-6.25 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8\xA0
+    testutfnext -bytestring \xE8\xA0
 } 1
 test utf-6.26 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8\xD0
+    testutfnext -bytestring \xE8\xD0
 } 1
 test utf-6.27 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8\xE8
+    testutfnext -bytestring \xE8\xE8
 } 1
 test utf-6.28 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8\xF2
+    testutfnext -bytestring \xE8\xF2
 } 1
 test utf-6.29 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8\xF8
+    testutfnext -bytestring \xE8\xF8
 } 1
 test utf-6.30 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF2
+    testutfnext -bytestring \xF2
 } 1
 test utf-6.31 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF2G
+    testutfnext -bytestring \xF2G
 } 1
 test utf-6.32 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF2\xA0
+    testutfnext -bytestring \xF2\xA0
 } 1
 test utf-6.33 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF2\xD0
+    testutfnext -bytestring \xF2\xD0
 } 1
 test utf-6.34 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF2\xE8
+    testutfnext -bytestring \xF2\xE8
 } 1
 test utf-6.35 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF2\xF2
+    testutfnext -bytestring \xF2\xF2
 } 1
 test utf-6.36 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF2\xF8
+    testutfnext -bytestring \xF2\xF8
 } 1
 test utf-6.37 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF8
+    testutfnext -bytestring \xF8
 } 1
 test utf-6.38 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF8G
+    testutfnext -bytestring \xF8G
 } 1
 test utf-6.39 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF8\xA0
+    testutfnext -bytestring \xF8\xA0
 } 1
 test utf-6.40 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF8\xD0
+    testutfnext -bytestring \xF8\xD0
 } 1
 test utf-6.41 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF8\xE8
+    testutfnext -bytestring \xF8\xE8
 } 1
 test utf-6.42 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF8\xF2
+    testutfnext -bytestring \xF8\xF2
 } 1
 test utf-6.43 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF8\xF8
+    testutfnext -bytestring \xF8\xF8
 } 1
 test utf-6.44 {Tcl_UtfNext} testutfnext {
-    testutfnext \xD0\xA0G
+    testutfnext -bytestring \xD0\xA0G
 } 2
 test utf-6.45 {Tcl_UtfNext} testutfnext {
-    testutfnext \xD0\xA0\xA0
+    testutfnext -bytestring \xD0\xA0\xA0
 } 2
 test utf-6.46 {Tcl_UtfNext} testutfnext {
-    testutfnext \xD0\xA0\xD0
+    testutfnext -bytestring \xD0\xA0\xD0
 } 2
 test utf-6.47 {Tcl_UtfNext} testutfnext {
-    testutfnext \xD0\xA0\xE8
+    testutfnext -bytestring \xD0\xA0\xE8
 } 2
 test utf-6.48 {Tcl_UtfNext} testutfnext {
-    testutfnext \xD0\xA0\xF2
+    testutfnext -bytestring \xD0\xA0\xF2
 } 2
 test utf-6.49 {Tcl_UtfNext} testutfnext {
-    testutfnext \xD0\xA0\xF8
+    testutfnext -bytestring \xD0\xA0\xF8
 } 2
 test utf-6.50 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8\xA0G
+    testutfnext -bytestring \xE8\xA0G
 } 1
 test utf-6.51 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8\xA0\xA0
+    testutfnext -bytestring \xE8\xA0\xA0
 } 3
 test utf-6.52 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8\xA0\xD0
+    testutfnext -bytestring \xE8\xA0\xD0
 } 1
 test utf-6.53 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8\xA0\xE8
+    testutfnext -bytestring \xE8\xA0\xE8
 } 1
 test utf-6.54 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8\xA0\xF2
+    testutfnext -bytestring \xE8\xA0\xF2
 } 1
 test utf-6.55 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8\xA0\xF8
+    testutfnext -bytestring \xE8\xA0\xF8
 } 1
 test utf-6.56 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF2\xA0G
+    testutfnext -bytestring \xF2\xA0G
 } 1
 test utf-6.57 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF2\xA0\xA0
+    testutfnext -bytestring \xF2\xA0\xA0
 } 1
 test utf-6.58 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF2\xA0\xD0
+    testutfnext -bytestring \xF2\xA0\xD0
 } 1
 test utf-6.59 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF2\xA0\xE8
+    testutfnext -bytestring \xF2\xA0\xE8
 } 1
 test utf-6.60 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF2\xA0\xF2
+    testutfnext -bytestring \xF2\xA0\xF2
 } 1
 test utf-6.61 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF2\xA0\xF8
+    testutfnext -bytestring \xF2\xA0\xF8
 } 1
 test utf-6.62 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8\xA0\xA0G
+    testutfnext -bytestring \xE8\xA0\xA0G
 } 3
 test utf-6.63 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8\xA0\xA0\xA0
+    testutfnext -bytestring \xE8\xA0\xA0\xA0
 } 3
 test utf-6.64 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8\xA0\xA0\xD0
+    testutfnext -bytestring \xE8\xA0\xA0\xD0
 } 3
 test utf-6.65 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8\xA0\xA0\xE8
+    testutfnext -bytestring \xE8\xA0\xA0\xE8
 } 3
 test utf-6.66 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8\xA0\xA0\xF2
+    testutfnext -bytestring \xE8\xA0\xA0\xF2
 } 3
 test utf-6.67 {Tcl_UtfNext} testutfnext {
-    testutfnext \xE8\xA0\xA0\xF8
+    testutfnext -bytestring \xE8\xA0\xA0\xF8
 } 3
 test utf-6.68 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF2\xA0\xA0G
+    testutfnext -bytestring \xF2\xA0\xA0G
 } 1
 test utf-6.69.0 {Tcl_UtfNext} {testutfnext compat85} {
-    testutfnext \xF2\xA0\xA0\xA0
+    testutfnext -bytestring \xF2\xA0\xA0\xA0
 } 1
 test utf-6.69.1 {Tcl_UtfNext} {testutfnext fullutf} {
-    testutfnext \xF2\xA0\xA0\xA0
+    testutfnext -bytestring \xF2\xA0\xA0\xA0
 } 4
 test utf-6.70 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF2\xA0\xA0\xD0
+    testutfnext -bytestring \xF2\xA0\xA0\xD0
 } 1
 test utf-6.71 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF2\xA0\xA0\xE8
+    testutfnext -bytestring \xF2\xA0\xA0\xE8
 } 1
 test utf-6.71 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF2\xA0\xA0\xF2
+    testutfnext -bytestring \xF2\xA0\xA0\xF2
 } 1
 test utf-6.73 {Tcl_UtfNext} testutfnext {
-    testutfnext \xF2\xA0\xA0\xF8
+    testutfnext -bytestring \xF2\xA0\xA0\xF8
 } 1
 test utf-6.74.0 {Tcl_UtfNext} {testutfnext compat85} {
-    testutfnext \xF2\xA0\xA0\xA0G
+    testutfnext -bytestring \xF2\xA0\xA0\xA0G
 } 1
 test utf-6.74.1 {Tcl_UtfNext} {testutfnext fullutf} {
-    testutfnext \xF2\xA0\xA0\xA0G
+    testutfnext -bytestring \xF2\xA0\xA0\xA0G
 } 4
 test utf-6.75.0 {Tcl_UtfNext} {testutfnext compat85} {
-    testutfnext \xF2\xA0\xA0\xA0\xA0
+    testutfnext -bytestring \xF2\xA0\xA0\xA0\xA0
 } 1
 test utf-6.75.1 {Tcl_UtfNext} {testutfnext fullutf} {
-    testutfnext \xF2\xA0\xA0\xA0\xA0
+    testutfnext -bytestring \xF2\xA0\xA0\xA0\xA0
 } 4
 test utf-6.76.0 {Tcl_UtfNext} {testutfnext compat85} {
-    testutfnext \xF2\xA0\xA0\xA0\xD0
+    testutfnext -bytestring \xF2\xA0\xA0\xA0\xD0
 } 1
 test utf-6.76.1 {Tcl_UtfNext} {testutfnext fullutf} {
-    testutfnext \xF2\xA0\xA0\xA0\xD0
+    testutfnext -bytestring \xF2\xA0\xA0\xA0\xD0
 } 4
 test utf-6.77.0 {Tcl_UtfNext} {testutfnext compat85} {
-    testutfnext \xF2\xA0\xA0\xA0\xE8
+    testutfnext -bytestring \xF2\xA0\xA0\xA0\xE8
 } 1
 test utf-6.77.1 {Tcl_UtfNext} {testutfnext fullutf} {
-    testutfnext \xF2\xA0\xA0\xA0\xE8
+    testutfnext -bytestring \xF2\xA0\xA0\xA0\xE8
 } 4
 test utf-6.78.0 {Tcl_UtfNext} {testutfnext compat85} {
-    testutfnext \xF2\xA0\xA0\xA0\xF2
+    testutfnext -bytestring \xF2\xA0\xA0\xA0\xF2
 } 1
 test utf-6.78.1 {Tcl_UtfNext} {testutfnext fullutf} {
-    testutfnext \xF2\xA0\xA0\xA0\xF2
+    testutfnext -bytestring \xF2\xA0\xA0\xA0\xF2
 } 4
 test utf-6.79.0 {Tcl_UtfNext} {testutfnext compat85} {
-    testutfnext \xF2\xA0\xA0\xA0G\xF8
+    testutfnext -bytestring \xF2\xA0\xA0\xA0G\xF8
 } 1
 test utf-6.79.1 {Tcl_UtfNext} {testutfnext fullutf} {
-    testutfnext \xF2\xA0\xA0\xA0G\xF8
+    testutfnext -bytestring \xF2\xA0\xA0\xA0G\xF8
 } 4
 test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext {
-    testutfnext \xC0\x80
+    testutfnext -bytestring \xC0\x80
 } 2
 test utf-6.81 {Tcl_UtfNext - overlong sequences} testutfnext {
-    testutfnext \xC0\x81
+    testutfnext -bytestring \xC0\x81
 } 1
 test utf-6.82 {Tcl_UtfNext - overlong sequences} testutfnext {
-    testutfnext \xC1\x80
+    testutfnext -bytestring \xC1\x80
 } 1
 test utf-6.83 {Tcl_UtfNext - overlong sequences} testutfnext {
-    testutfnext \xC2\x80
+    testutfnext -bytestring \xC2\x80
 } 2
 test utf-6.84 {Tcl_UtfNext - overlong sequences} testutfnext {
-    testutfnext \xE0\x80\x80
+    testutfnext -bytestring \xE0\x80\x80
 } 1
 test utf-6.85 {Tcl_UtfNext - overlong sequences} testutfnext {
-    testutfnext \xE0\xA0\x80
+    testutfnext -bytestring \xE0\xA0\x80
 } 3
 test utf-6.86 {Tcl_UtfNext - overlong sequences} testutfnext {
-    testutfnext \xF0\x80\x80\x80
+    testutfnext -bytestring \xF0\x80\x80\x80
 } 1
 test utf-6.87.0 {Tcl_UtfNext - overlong sequences} {testutfnext compat85} {
-    testutfnext \xF0\x90\x80\x80
+    testutfnext -bytestring \xF0\x90\x80\x80
 } 1
 test utf-6.87.1 {Tcl_UtfNext - overlong sequences} {testutfnext fullutf} {
-    testutfnext \xF0\x90\x80\x80
+    testutfnext -bytestring \xF0\x90\x80\x80
 } 4
 test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} {
-    testutfnext \xA0\xA0
+    testutfnext -bytestring \xA0\xA0
 } 1
-test utf-6.88.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} {
-    testutfnext \xE8\xA0\xA0 1
-} 2
 test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} {
-    testutfnext \x80\x80
+    testutfnext -bytestring \x80\x80
 } 1
-test utf-6.89.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} {
-    testutfnext \xF0\x80\x80 1
-} 2
 test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext compat85} {
-    testutfnext \xF4\x8F\xBF\xBF
+    testutfnext -bytestring \xF4\x8F\xBF\xBF
 } 1
 test utf-6.90.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext fullutf} {
-    testutfnext \xF4\x8F\xBF\xBF
+    testutfnext -bytestring \xF4\x8F\xBF\xBF
 } 4
 test utf-6.91 {Tcl_UtfNext, validity check [493dccc2de]} testutfnext {
-    testutfnext \xF4\x90\x80\x80
+    testutfnext -bytestring \xF4\x90\x80\x80
+} 1
+test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} testutfnext {
+    testutfnext -bytestring \xA0\xA0\xA0
+} 1
+test utf-6.93 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} testutfnext {
+    testutfnext -bytestring \x80\x80\x80
 } 1
 
 test utf-7.1 {Tcl_UtfPrev} testutfprev {
-- 
cgit v0.12