2 files changed, 49 insertions, 35 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 8ae4b15..b855844 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -525,12 +525,13 @@ Tcl_UtfToUniCharDString(
 	p += TclUtfToUniChar(p, &ch);
 	*w++ = ch;
     }
-    while ((p < endPtr) && Tcl_UtfCharComplete(p, endPtr-p)) {
-	p += TclUtfToUniChar(p, &ch);
-	*w++ = ch;
-    }
     while (p < endPtr) {
-	*w++ = UCHAR(*p++);
+	if (Tcl_UtfCharComplete(p, endPtr-p)) {
+	    p += TclUtfToUniChar(p, &ch);
+	    *w++ = ch;
+	} else {
+	    *w++ = UCHAR(*p++);
+	}
     }
     *w = '\0';
     Tcl_DStringSetLength(dsPtr,
@@ -621,17 +622,18 @@ Tcl_NumUtfChars(
 	    i++;
 	}
 	/* Loop over the remaining string where call must happen */
-	while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) {
-	    src += TclUtfToUniChar(src, &ch);
+	while (src < endPtr) {
+	    if (Tcl_UtfCharComplete(src, endPtr - src)) {
+		src += TclUtfToUniChar(src, &ch);
+	    } else {
+		/*
+		 * src points to incomplete UTF-8 sequence 
+		 * Treat first byte as character and count it
+		 */
+		src++;
+	    }
 	    i++;
 	}
-	if (src < endPtr) {
-	    /*
-	     * String ends in an incomplete UTF-8 sequence.
-	     * Count every byte in it.
-	     */
-	    i += endPtr - src;
-	}
     }
     return i;
 }
diff --git a/tests/utf.test b/tests/utf.test
index 7b6bf1c..502aa37 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -92,8 +92,8 @@ test utf-2.2 {Tcl_UtfToUniChar: naked trail bytes} testbytestring {
 test utf-2.3 {Tcl_UtfToUniChar: lead (2-byte) followed by non-trail} testbytestring {
     string length [testbytestring \xC2]
 } 1
-test utf-2.4 {Tcl_UtfToUniChar: lead (2-byte) followed by trail} testbytestring {
-    string length [testbytestring \xC2\xA2]
+test utf-2.4 {Tcl_UtfToUniChar: lead (2-byte) followed by trail} {
+    string length \xA2
 } 1
 test utf-2.5 {Tcl_UtfToUniChar: lead (3-byte) followed by non-trail} testbytestring {
     string length [testbytestring \xE2]
@@ -139,32 +139,32 @@ test utf-3.1 {Tcl_UtfCharComplete} {
 test utf-4.1 {Tcl_NumUtfChars: zero length} testnumutfchars {
     testnumutfchars ""
 } 0
-test utf-4.2 {Tcl_NumUtfChars: length 1} {testnumutfchars testbytestring} {
-    testnumutfchars [testbytestring \xC2\xA2]
+test utf-4.2 {Tcl_NumUtfChars: length 1} testnumutfchars {
+    testnumutfchars \xA2
 } 1
 test utf-4.3 {Tcl_NumUtfChars: long string} {testnumutfchars testbytestring} {
-    testnumutfchars [testbytestring abc\xC2\xA2\xE4\xB9\x8E\xA2\x4E]
+    testnumutfchars abc\xA2[testbytestring \xE4\xB9\x8E\xA2\x4E]
 } 7
-test utf-4.4 {Tcl_NumUtfChars: #u0000} {testnumutfchars testbytestring} {
-    testnumutfchars [testbytestring \xC0\x80]
+test utf-4.4 {Tcl_NumUtfChars: #x00} testnumutfchars {
+    testnumutfchars \x00
 } 1
 test utf-4.5 {Tcl_NumUtfChars: zero length, calc len} testnumutfchars {
     testnumutfchars "" 0
 } 0
 test utf-4.6 {Tcl_NumUtfChars: length 1, calc len} {testnumutfchars testbytestring} {
-    testnumutfchars [testbytestring \xC2\xA2] end
+    testnumutfchars \xA2 end
 } 1
 test utf-4.7 {Tcl_NumUtfChars: long string, calc len} {testnumutfchars testbytestring} {
-    testnumutfchars [testbytestring abc\xC2\xA2\xE4\xB9\x8E\xA2\x4E] end
+    testnumutfchars abc\xA2[testbytestring \xE4\xB9\x8E\xA2\x4E] end
 } 7
-test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} {testnumutfchars testbytestring} {
-    testnumutfchars [testbytestring \xC0\x80] end
+test utf-4.8 {Tcl_NumUtfChars: #x00, calc len} testnumutfchars {
+    testnumutfchars \x00 end
 } 1
 # Bug [2738427]: Tcl_NumUtfChars(...) no overflow check
 test utf-4.9 {Tcl_NumUtfChars: #u20AC, calc len, incomplete} {testnumutfchars testbytestring} {
     testnumutfchars [testbytestring \xE2\x82\xAC] end-1
 } 2
-test utf-4.10 {Tcl_NumUtfChars: #u0000, calc len, overcomplete} {testnumutfchars testbytestring} {
+test utf-4.10 {Tcl_NumUtfChars: #x00, calc len, overcomplete} {testnumutfchars testbytestring} {
     testnumutfchars [testbytestring \x00] end+1
 } 2
 test utf-4.11 {Tcl_NumUtfChars: 3 bytes of 4-byte UTF-8 characater} {testnumutfchars testbytestring} {
@@ -179,6 +179,9 @@ test utf-4.12.1 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars test
 test utf-4.12.2 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring ucs4} {
     testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end
 } 1
+test utf-4.13 {Tcl_NumUtfChars: end of string} {testnumutfchars testbytestring} { 
+    testnumutfchars foobar[testbytestring \xF2\xC2\xA0] end
+} 8
 
 test utf-5.1 {Tcl_UtfFindFirst} {testfindfirst testbytestring} {
     testfindfirst [testbytestring abcbc] 98
@@ -187,15 +190,15 @@ test utf-5.2 {Tcl_UtfFindLast} {testfindlast testbytestring} {
     testfindlast [testbytestring abcbc] 98
 } bc
 
-test utf-6.1 {Tcl_UtfNext} {testutfnext testbytestring} {
+test utf-6.1 {Tcl_UtfNext} testutfnext {
     # This takes the pointer one past the terminating NUL.
     # This is really an invalid call.
     testutfnext {}
 } 1
-test utf-6.2 {Tcl_UtfNext} {testutfnext testbytestring} {
+test utf-6.2 {Tcl_UtfNext} testutfnext {
     testutfnext A
 } 1
-test utf-6.3 {Tcl_UtfNext} {testutfnext testbytestring} {
+test utf-6.3 {Tcl_UtfNext} testutfnext {
     testutfnext AA
 } 1
 test utf-6.4 {Tcl_UtfNext} {testutfnext testbytestring} {
@@ -447,8 +450,8 @@ test utf-6.79.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} {
 test utf-6.79.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} {
     testutfnext [testbytestring \xF2\xA0\xA0\xA0G\xF8]
 } 4
-test utf-6.80 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring} {
-    testutfnext [testbytestring \xC0\x80]
+test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext {
+    testutfnext \x00
 } 2
 test utf-6.81 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring} {
     testutfnext [testbytestring \xC0\x81]
@@ -859,8 +862,8 @@ test utf-7.31 {Tcl_UtfPrev -- overlong sequence}  {testutfprev testbytestring} {
 test utf-7.32 {Tcl_UtfPrev -- overlong sequence}  {testutfprev testbytestring} {
     testutfprev A[testbytestring \xF0\x80\x80\x80] 2
 } 1
-test utf-7.33 {Tcl_UtfPrev -- overlong sequence}  {testutfprev testbytestring} {
-    testutfprev A[testbytestring \xC0\x80]
+test utf-7.33 {Tcl_UtfPrev -- overlong sequence}  {testutfprev} {
+    testutfprev A\x00
 } 1
 test utf-7.34 {Tcl_UtfPrev -- overlong sequence}  {testutfprev testbytestring} {
     testutfprev A[testbytestring \xC1\x80]
@@ -916,7 +919,7 @@ test utf-7.46.1 {Tcl_UtfPrev -- no lead byte at start}  {testutfprev testbytestr
 test utf-7.47 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {testutfprev testbytestring} {
     testutfprev [testbytestring \xE8\xA0]
 } 0
-test utf-7.47.1 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {testutfprev testbytestring} {
+test utf-7.47.1 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} testutfprev {
     testutfprev \u8820 2
 } 0
 test utf-7.47.2 {Tcl_UtfPrev, pointing to 3th byte of 3-byte invalid sequence} {testutfprev testbytestring} {
@@ -1459,7 +1462,16 @@ UniCharCaseCmpTest > [format %c 0x10000] \uFFFF	ucs4
 UniCharCaseCmpTest > \U10000 \uFFFF		{Uesc ucs4}
 
 
-
+test utf-26.1 {Tcl_UniCharDString} -setup {
+    testobj freeallvars
+} -constraints {teststringobj} -cleanup {
+    testobj freeallvars
+} -body {
+    teststringobj set 1 foo
+    teststringobj getunicode 1
+    teststringobj append 1 [testbytestring barsoom\xF2\xC2\x80] 10
+    scan [string index [teststringobj get 1] 11] %c
+} -result 128
 
 
 unset count