From 2f7461f649fe3b5d78645f8efea56d24693f1bef Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Thu, 7 May 2020 19:08:24 +0000
Subject: Fix.  Note that just because we get one positive detection of an
 incomplete character, we cannot conclude that the next byte also will be, or
 can by taken as a single byte.  At least we cannot when TCL_UTF_MAX > 3 so
 that we have room for valid two-byte sequences after incomplete sequence
 detection. No need for conditional code, just use an algorithm that always
 works.

---
 generic/tclUtf.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 4147bbc..80f3be8 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -548,17 +548,18 @@ Tcl_NumUtfChars(
 	    i++;
 	}
 	/* Loop over the remaining string where call must happen */
-	while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) {
-	    src += TclUtfToUniChar(src, &ch);
+	while (src < endPtr) {
+	    if (Tcl_UtfCharComplete(src, endPtr - src)) {
+		src += TclUtfToUniChar(src, &ch);
+	    } else {
+		/*
+		 * src points to incomplete UTF-8 sequence 
+		 * Treat first byte as character and count it
+		 */
+		src++;
+	    }
 	    i++;
 	}
-	if (src < endPtr) {
-	    /*
-	     * String ends in an incomplete UTF-8 sequence.
-	     * Count every byte in it.
-	     */
-	    i += endPtr - src;
-	}
     }
     return i;
 }
-- 
cgit v0.12