From 0187afa965d2276476598016ea28d8fcd96d48ea Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Thu, 7 May 2020 18:56:37 +0000
Subject: Test demonstrating bug in ticket [b2816a3afe].

---
 tests/utf.test | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/utf.test b/tests/utf.test
index 8aa3757..f48299d 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -175,6 +175,9 @@ test utf-4.12.1 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars test
 test utf-4.12.2 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring utf16} {
     testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end
 } 2
+test utf-4.13 {Tcl_NumUtfChars: end of string} {testnumutfchars testbytestring} { 
+    testnumutfchars foobar[testbytestring \xF2\xC2\xA0] end
+} 8
 
 test utf-5.1 {Tcl_UtfFindFirst} {testfindfirst testbytestring} {
     testfindfirst [testbytestring abcbc] 98
-- 
cgit v0.12


From 2f7461f649fe3b5d78645f8efea56d24693f1bef Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Thu, 7 May 2020 19:08:24 +0000
Subject: Fix.  Note that just because we get one positive detection of an
 incomplete character, we cannot conclude that the next byte also will be, or
 can by taken as a single byte.  At least we cannot when TCL_UTF_MAX > 3 so
 that we have room for valid two-byte sequences after incomplete sequence
 detection. No need for conditional code, just use an algorithm that always
 works.

---
 generic/tclUtf.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 4147bbc..80f3be8 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -548,17 +548,18 @@ Tcl_NumUtfChars(
 	    i++;
 	}
 	/* Loop over the remaining string where call must happen */
-	while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) {
-	    src += TclUtfToUniChar(src, &ch);
+	while (src < endPtr) {
+	    if (Tcl_UtfCharComplete(src, endPtr - src)) {
+		src += TclUtfToUniChar(src, &ch);
+	    } else {
+		/*
+		 * src points to incomplete UTF-8 sequence 
+		 * Treat first byte as character and count it
+		 */
+		src++;
+	    }
 	    i++;
 	}
-	if (src < endPtr) {
-	    /*
-	     * String ends in an incomplete UTF-8 sequence.
-	     * Count every byte in it.
-	     */
-	    i += endPtr - src;
-	}
     }
     return i;
 }
-- 
cgit v0.12