diff options
author | dgp <dgp@users.sourceforge.net> | 2020-05-07 19:08:24 (GMT) |
---|---|---|
committer | dgp <dgp@users.sourceforge.net> | 2020-05-07 19:08:24 (GMT) |
commit | 2f7461f649fe3b5d78645f8efea56d24693f1bef (patch) | |
tree | f8b2c44309a3ab961b7018250e0bb75d091418c0 | |
parent | 0187afa965d2276476598016ea28d8fcd96d48ea (diff) | |
download | tcl-2f7461f649fe3b5d78645f8efea56d24693f1bef.zip tcl-2f7461f649fe3b5d78645f8efea56d24693f1bef.tar.gz tcl-2f7461f649fe3b5d78645f8efea56d24693f1bef.tar.bz2 |
Fix. Note that just because we get one positive detection of an incomplete
character, we cannot conclude that the next byte also will be, or can by
taken as a single byte. At least we cannot when TCL_UTF_MAX > 3 so that we
have room for valid two-byte sequences after incomplete sequence detection.
No need for conditional code, just use an algorithm that always works.
-rw-r--r-- | generic/tclUtf.c | 19 |
1 files changed, 10 insertions, 9 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 4147bbc..80f3be8 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -548,17 +548,18 @@ Tcl_NumUtfChars( i++; } /* Loop over the remaining string where call must happen */ - while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) { - src += TclUtfToUniChar(src, &ch); + while (src < endPtr) { + if (Tcl_UtfCharComplete(src, endPtr - src)) { + src += TclUtfToUniChar(src, &ch); + } else { + /* + * src points to incomplete UTF-8 sequence + * Treat first byte as character and count it + */ + src++; + } i++; } - if (src < endPtr) { - /* - * String ends in an incomplete UTF-8 sequence. - * Count every byte in it. - */ - i += endPtr - src; - } } return i; } |