summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordgp <dgp@users.sourceforge.net>2020-05-07 19:22:06 (GMT)
committerdgp <dgp@users.sourceforge.net>2020-05-07 19:22:06 (GMT)
commit1b11028e45d3369ecbe447dd685246ceae1eda04 (patch)
tree9443c14cf1c04fe357c6823c69c2f92701fa6d83
parent13faf8b1d4ae63413f8d1c301c422aeab3eb977b (diff)
parent18b39d376e420191428ecf3ad9491ebf0bd9f128 (diff)
downloadtcl-1b11028e45d3369ecbe447dd685246ceae1eda04.zip
tcl-1b11028e45d3369ecbe447dd685246ceae1eda04.tar.gz
tcl-1b11028e45d3369ecbe447dd685246ceae1eda04.tar.bz2
merge 8.5
-rw-r--r--generic/tclUtf.c29
-rw-r--r--tests/utf.test3
2 files changed, 18 insertions, 14 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 46a18e4..4103eff 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -631,24 +631,25 @@ Tcl_NumUtfChars(
i++;
}
/* Loop over the remaining string where call must happen */
- while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) {
+ while (src < endPtr) {
+ if (Tcl_UtfCharComplete(src, endPtr - src)) {
#if TCL_UTF_MAX < 4
- if (((unsigned)UCHAR(*src) - 0xF0) < 5) {
- /* treat F0 - F4 as single character */
- ch = 0;
- src++;
- } else
+ if (((unsigned)UCHAR(*src) - 0xF0) < 5) {
+ /* treat F0 - F4 as single character */
+ ch = 0;
+ src++;
+ } else
#endif
- src += TclUtfToUniChar(src, &ch);
+ src += TclUtfToUniChar(src, &ch);
+ } else {
+ /*
+ * src points to incomplete UTF-8 sequence
+ * Treat first byte as character and count it
+ */
+ src++;
+ }
i++;
}
- if (src < endPtr) {
- /*
- * String ends in an incomplete UTF-8 sequence.
- * Count every byte in it.
- */
- i += endPtr - src;
- }
}
return i;
}
diff --git a/tests/utf.test b/tests/utf.test
index 0acbb76..0e5aba6 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -179,6 +179,9 @@ test utf-4.12.1 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars test
test utf-4.12.2 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring utf16} {
testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end
} 2
+test utf-4.13 {Tcl_NumUtfChars: end of string} {testnumutfchars testbytestring} {
+ testnumutfchars foobar[testbytestring \xF2\xC2\xA0] end
+} 8
test utf-5.1 {Tcl_UtfFindFirst} {testfindfirst testbytestring} {
testfindfirst [testbytestring abcbc] 98