diff options
| author | dgp <dgp@users.sourceforge.net> | 2020-05-07 19:12:36 (GMT) |
|---|---|---|
| committer | dgp <dgp@users.sourceforge.net> | 2020-05-07 19:12:36 (GMT) |
| commit | 18b39d376e420191428ecf3ad9491ebf0bd9f128 (patch) | |
| tree | f8b2c44309a3ab961b7018250e0bb75d091418c0 | |
| parent | 846b23ab0a9dfcfb2675ae37c1451ee573609196 (diff) | |
| parent | 2f7461f649fe3b5d78645f8efea56d24693f1bef (diff) | |
| download | tcl-18b39d376e420191428ecf3ad9491ebf0bd9f128.zip tcl-18b39d376e420191428ecf3ad9491ebf0bd9f128.tar.gz tcl-18b39d376e420191428ecf3ad9491ebf0bd9f128.tar.bz2 | |
[b2816a3afe] Fix counting of malformed sequences at end of string.
| -rw-r--r-- | generic/tclUtf.c | 19 | ||||
| -rw-r--r-- | tests/utf.test | 3 |
2 files changed, 13 insertions, 9 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 4147bbc..80f3be8 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -548,17 +548,18 @@ Tcl_NumUtfChars( i++; } /* Loop over the remaining string where call must happen */ - while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) { - src += TclUtfToUniChar(src, &ch); + while (src < endPtr) { + if (Tcl_UtfCharComplete(src, endPtr - src)) { + src += TclUtfToUniChar(src, &ch); + } else { + /* + * src points to incomplete UTF-8 sequence + * Treat first byte as character and count it + */ + src++; + } i++; } - if (src < endPtr) { - /* - * String ends in an incomplete UTF-8 sequence. - * Count every byte in it. - */ - i += endPtr - src; - } } return i; } diff --git a/tests/utf.test b/tests/utf.test index 8aa3757..f48299d 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -175,6 +175,9 @@ test utf-4.12.1 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars test test utf-4.12.2 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring utf16} { testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end } 2 +test utf-4.13 {Tcl_NumUtfChars: end of string} {testnumutfchars testbytestring} { + testnumutfchars foobar[testbytestring \xF2\xC2\xA0] end +} 8 test utf-5.1 {Tcl_UtfFindFirst} {testfindfirst testbytestring} { testfindfirst [testbytestring abcbc] 98 |
