summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--generic/tclUtf.c19
-rw-r--r--tests/utf.test3
2 files changed, 13 insertions, 9 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 4147bbc..80f3be8 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -548,17 +548,18 @@ Tcl_NumUtfChars(
i++;
}
/* Loop over the remaining string where call must happen */
- while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) {
- src += TclUtfToUniChar(src, &ch);
+ while (src < endPtr) {
+ if (Tcl_UtfCharComplete(src, endPtr - src)) {
+ src += TclUtfToUniChar(src, &ch);
+ } else {
+ /*
+ * src points to incomplete UTF-8 sequence
+ * Treat first byte as character and count it
+ */
+ src++;
+ }
i++;
}
- if (src < endPtr) {
- /*
- * String ends in an incomplete UTF-8 sequence.
- * Count every byte in it.
- */
- i += endPtr - src;
- }
}
return i;
}
diff --git a/tests/utf.test b/tests/utf.test
index 8aa3757..f48299d 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -175,6 +175,9 @@ test utf-4.12.1 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars test
test utf-4.12.2 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring utf16} {
testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end
} 2
+test utf-4.13 {Tcl_NumUtfChars: end of string} {testnumutfchars testbytestring} {
+ testnumutfchars foobar[testbytestring \xF2\xC2\xA0] end
+} 8
test utf-5.1 {Tcl_UtfFindFirst} {testfindfirst testbytestring} {
testfindfirst [testbytestring abcbc] 98