summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordgp <dgp@users.sourceforge.net>2020-05-07 19:12:36 (GMT)
committerdgp <dgp@users.sourceforge.net>2020-05-07 19:12:36 (GMT)
commit18b39d376e420191428ecf3ad9491ebf0bd9f128 (patch)
treef8b2c44309a3ab961b7018250e0bb75d091418c0
parent846b23ab0a9dfcfb2675ae37c1451ee573609196 (diff)
parent2f7461f649fe3b5d78645f8efea56d24693f1bef (diff)
downloadtcl-18b39d376e420191428ecf3ad9491ebf0bd9f128.zip
tcl-18b39d376e420191428ecf3ad9491ebf0bd9f128.tar.gz
tcl-18b39d376e420191428ecf3ad9491ebf0bd9f128.tar.bz2
[b2816a3afe] Fix counting of malformed sequences at end of string.
-rw-r--r--generic/tclUtf.c19
-rw-r--r--tests/utf.test3
2 files changed, 13 insertions, 9 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 4147bbc..80f3be8 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -548,17 +548,18 @@ Tcl_NumUtfChars(
i++;
}
/* Loop over the remaining string where call must happen */
- while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) {
- src += TclUtfToUniChar(src, &ch);
+ while (src < endPtr) {
+ if (Tcl_UtfCharComplete(src, endPtr - src)) {
+ src += TclUtfToUniChar(src, &ch);
+ } else {
+ /*
+ * src points to incomplete UTF-8 sequence
+ * Treat first byte as character and count it
+ */
+ src++;
+ }
i++;
}
- if (src < endPtr) {
- /*
- * String ends in an incomplete UTF-8 sequence.
- * Count every byte in it.
- */
- i += endPtr - src;
- }
}
return i;
}
diff --git a/tests/utf.test b/tests/utf.test
index 8aa3757..f48299d 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -175,6 +175,9 @@ test utf-4.12.1 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars test
test utf-4.12.2 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring utf16} {
testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end
} 2
+test utf-4.13 {Tcl_NumUtfChars: end of string} {testnumutfchars testbytestring} {
+ testnumutfchars foobar[testbytestring \xF2\xC2\xA0] end
+} 8
test utf-5.1 {Tcl_UtfFindFirst} {testfindfirst testbytestring} {
testfindfirst [testbytestring abcbc] 98