From 8c3587a6e899c6fd12fd0563312c4a20c289d8fd Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Mon, 4 May 2020 08:35:44 +0000
Subject: (partial) fix for [9d0cb35bb2]: Various issues with core-8-6-branch,
 TCL_UTF_MAX=4. (even though TCL_UTF_MAX=4 is unsupported, it would be nice to
 make it work) Marked various test-cases as "knownBug", those work correctly
 in core-8-branch (8.7). The fix there could be backported. Low prio.

---
 generic/tclUtf.c | 34 ++++++++++++++++++----------------
 tests/utf.test   | 16 ++++++++--------
 2 files changed, 26 insertions(+), 24 deletions(-)

diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index a2080dd..ab3c577 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -359,8 +359,8 @@ Tcl_UniCharToUtfDString(
 
 int
 Tcl_UtfToUniChar(
-    register const char *src,	/* The UTF-8 string. */
-    register Tcl_UniChar *chPtr)/* Filled with the Tcl_UniChar represented by
+    const char *src,	/* The UTF-8 string. */
+    Tcl_UniChar *chPtr)/* Filled with the Tcl_UniChar represented by
 				 * the UTF-8 string. */
 {
     Tcl_UniChar byte;
@@ -580,12 +580,12 @@ Tcl_UtfCharComplete(
 
 int
 Tcl_NumUtfChars(
-    register const char *src,	/* The UTF-8 string to measure. */
+    const char *src,	/* The UTF-8 string to measure. */
     int length)			/* The length of the string in bytes, or -1
 				 * for strlen(string). */
 {
     Tcl_UniChar ch = 0;
-    register int i = 0;
+    int i = 0;
 
     /*
      * The separate implementations are faster.
@@ -601,27 +601,29 @@ Tcl_NumUtfChars(
 	}
 	if (i < 0) i = INT_MAX; /* Bug [2738427] */
     } else {
-	register const char *endPtr = src + length - TCL_UTF_MAX;
+	const char *endPtr = src + length - TCL_UTF_MAX;
 
 	while (src < endPtr) {
+#if TCL_UTF_MAX < 4
 	    if (((unsigned)UCHAR(*src) - 0xF0) < 5) {
 		/* treat F0 - F4 as single character */
 		ch = 0;
 		src++;
-	    } else {
-		src += TclUtfToUniChar(src, &ch);
-	    }
+	    } else
+#endif
+	    src += TclUtfToUniChar(src, &ch);
 	    i++;
 	}
 	endPtr += TCL_UTF_MAX;
 	while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) {
+#if TCL_UTF_MAX < 4
 	    if (((unsigned)UCHAR(*src) - 0xF0) < 5) {
 		/* treat F0 - F4 as single character */
 		ch = 0;
 		src++;
-	    } else {
-		src += TclUtfToUniChar(src, &ch);
-	    }
+	    } else
+#endif
+	    src += TclUtfToUniChar(src, &ch);
 	    i++;
 	}
 	if (src < endPtr) {
@@ -890,8 +892,8 @@ Tcl_UtfPrev(
 
 Tcl_UniChar
 Tcl_UniCharAtIndex(
-    register const char *src,	/* The UTF-8 string to dereference. */
-    register int index)		/* The position of the desired character. */
+    const char *src,	/* The UTF-8 string to dereference. */
+    int index)		/* The position of the desired character. */
 {
     Tcl_UniChar ch = 0;
 
@@ -918,8 +920,8 @@ Tcl_UniCharAtIndex(
 
 const char *
 Tcl_UtfAtIndex(
-    register const char *src,	/* The UTF-8 string. */
-    register int index)		/* The position of the desired character. */
+    const char *src,	/* The UTF-8 string. */
+    int index)		/* The position of the desired character. */
 {
     Tcl_UniChar ch = 0;
     int len = 0;
@@ -1191,7 +1193,7 @@ TclpUtfNcmp2(
      * fine in the strcmp manner.
      */
 
-    register int result = 0;
+    int result = 0;
 
     for ( ; numBytes != 0; numBytes--, cs++, ct++) {
 	if (*cs != *ct) {
diff --git a/tests/utf.test b/tests/utf.test
index c0fed6f..a3c049d 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -499,9 +499,9 @@ test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} testu
 test utf-6.93.0 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext ucs2} {
     testutfnext \x80\x80\x80
 } 1
-test utf-6.93.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext fullutf} {
+test utf-6.93.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext fullutf knownBug} {
     testutfnext \x80\x80\x80
-} 1
+} 3
 test utf-6.94 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} testutfnext {
     testutfnext \xA0\xA0\xA0\xA0
 } 1
@@ -987,10 +987,10 @@ test utf-8.4 {Tcl_UniCharAtIndex: index > 0} {
 test utf-8.5.0 {Tcl_UniCharAtIndex: high surrogate} ucs2 {
     string index \uD842 0
 } \uD842
-test utf-8.5.1 {Tcl_UniCharAtIndex: high surrogate} ucs4 {
+test utf-8.5.1 {Tcl_UniCharAtIndex: high surrogate} {ucs4 knownBug} {
     string index \uD842 0
 } \uD842
-test utf-8.5.2 {Tcl_UniCharAtIndex: high surrogate} tip389 {
+test utf-8.5.2 {Tcl_UniCharAtIndex: high surrogate} {tip389 knownBug} {
     string index \uD842 0
 } \uD842
 test utf-8.6 {Tcl_UniCharAtIndex: low surrogate} {
@@ -1002,7 +1002,7 @@ test utf-8.7.0 {Tcl_UniCharAtIndex: Emoji} ucs2 {
 test utf-8.7.1 {Tcl_UniCharAtIndex: Emoji} ucs4 {
     string index \uD83D\uDE00G 0
 } \U1F600
-test utf-8.7.2 {Tcl_UniCharAtIndex: Emoji} tip389 {
+test utf-8.7.2 {Tcl_UniCharAtIndex: Emoji} {tip389 knownBug} {
     string index \uD83D\uDE00G 0
 } \U1F600
 test utf-8.8.0 {Tcl_UniCharAtIndex: Emoji} ucs2 {
@@ -1011,7 +1011,7 @@ test utf-8.8.0 {Tcl_UniCharAtIndex: Emoji} ucs2 {
 test utf-8.8.1 {Tcl_UniCharAtIndex: Emoji} ucs4 {
     string index \uD83D\uDE00G 1
 } G
-test utf-8.8.2 {Tcl_UniCharAtIndex: Emoji} tip389 {
+test utf-8.8.2 {Tcl_UniCharAtIndex: Emoji} {tip389 knownBug} {
     string index \uD83D\uDE00G 1
 } {}
 test utf-8.9.0 {Tcl_UniCharAtIndex: Emoji} ucs2 {
@@ -1029,7 +1029,7 @@ test utf-8.10.0 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs2} {
 test utf-8.10.1 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs4} {
     string index \U1F600G 0
 } \U1F600
-test utf-8.10.2 {Tcl_UniCharAtIndex: Emoji} {Uesc tip389} {
+test utf-8.10.2 {Tcl_UniCharAtIndex: Emoji} {Uesc tip389 knownBug} {
     string index \U1F600G 0
 } \U1F600
 test utf-8.11.0 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs2} {
@@ -1038,7 +1038,7 @@ test utf-8.11.0 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs2} {
 test utf-8.11.1 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs4} {
     string index \U1F600G 1
 } G
-test utf-8.11.2 {Tcl_UniCharAtIndex: Emoji} {Uesc tip389} {
+test utf-8.11.2 {Tcl_UniCharAtIndex: Emoji} {Uesc tip389 knownBug} {
     string index \U1F600G 1
 } {}
 test utf-8.12.0 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs2} {
-- 
cgit v0.12