summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordgp <dgp@users.sourceforge.net>2020-05-07 19:08:24 (GMT)
committerdgp <dgp@users.sourceforge.net>2020-05-07 19:08:24 (GMT)
commit2f7461f649fe3b5d78645f8efea56d24693f1bef (patch)
treef8b2c44309a3ab961b7018250e0bb75d091418c0
parent0187afa965d2276476598016ea28d8fcd96d48ea (diff)
downloadtcl-2f7461f649fe3b5d78645f8efea56d24693f1bef.zip
tcl-2f7461f649fe3b5d78645f8efea56d24693f1bef.tar.gz
tcl-2f7461f649fe3b5d78645f8efea56d24693f1bef.tar.bz2
Fix. Note that just because we get one positive detection of an incomplete
character, we cannot conclude that the next byte also will be, or can by taken as a single byte. At least we cannot when TCL_UTF_MAX > 3 so that we have room for valid two-byte sequences after incomplete sequence detection. No need for conditional code, just use an algorithm that always works.
-rw-r--r--generic/tclUtf.c19
1 files changed, 10 insertions, 9 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 4147bbc..80f3be8 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -548,17 +548,18 @@ Tcl_NumUtfChars(
i++;
}
/* Loop over the remaining string where call must happen */
- while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) {
- src += TclUtfToUniChar(src, &ch);
+ while (src < endPtr) {
+ if (Tcl_UtfCharComplete(src, endPtr - src)) {
+ src += TclUtfToUniChar(src, &ch);
+ } else {
+ /*
+ * src points to incomplete UTF-8 sequence
+ * Treat first byte as character and count it
+ */
+ src++;
+ }
i++;
}
- if (src < endPtr) {
- /*
- * String ends in an incomplete UTF-8 sequence.
- * Count every byte in it.
- */
- i += endPtr - src;
- }
}
return i;
}