diff options
| author | dgp <dgp@users.sourceforge.net> | 2020-05-07 19:22:06 (GMT) |
|---|---|---|
| committer | dgp <dgp@users.sourceforge.net> | 2020-05-07 19:22:06 (GMT) |
| commit | 1b11028e45d3369ecbe447dd685246ceae1eda04 (patch) | |
| tree | 9443c14cf1c04fe357c6823c69c2f92701fa6d83 | |
| parent | 13faf8b1d4ae63413f8d1c301c422aeab3eb977b (diff) | |
| parent | 18b39d376e420191428ecf3ad9491ebf0bd9f128 (diff) | |
| download | tcl-1b11028e45d3369ecbe447dd685246ceae1eda04.zip tcl-1b11028e45d3369ecbe447dd685246ceae1eda04.tar.gz tcl-1b11028e45d3369ecbe447dd685246ceae1eda04.tar.bz2 | |
merge 8.5
| -rw-r--r-- | generic/tclUtf.c | 29 | ||||
| -rw-r--r-- | tests/utf.test | 3 |
2 files changed, 18 insertions, 14 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 46a18e4..4103eff 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -631,24 +631,25 @@ Tcl_NumUtfChars( i++; } /* Loop over the remaining string where call must happen */ - while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) { + while (src < endPtr) { + if (Tcl_UtfCharComplete(src, endPtr - src)) { #if TCL_UTF_MAX < 4 - if (((unsigned)UCHAR(*src) - 0xF0) < 5) { - /* treat F0 - F4 as single character */ - ch = 0; - src++; - } else + if (((unsigned)UCHAR(*src) - 0xF0) < 5) { + /* treat F0 - F4 as single character */ + ch = 0; + src++; + } else #endif - src += TclUtfToUniChar(src, &ch); + src += TclUtfToUniChar(src, &ch); + } else { + /* + * src points to incomplete UTF-8 sequence + * Treat first byte as character and count it + */ + src++; + } i++; } - if (src < endPtr) { - /* - * String ends in an incomplete UTF-8 sequence. - * Count every byte in it. - */ - i += endPtr - src; - } } return i; } diff --git a/tests/utf.test b/tests/utf.test index 0acbb76..0e5aba6 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -179,6 +179,9 @@ test utf-4.12.1 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars test test utf-4.12.2 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring utf16} { testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end } 2 +test utf-4.13 {Tcl_NumUtfChars: end of string} {testnumutfchars testbytestring} { + testnumutfchars foobar[testbytestring \xF2\xC2\xA0] end +} 8 test utf-5.1 {Tcl_UtfFindFirst} {testfindfirst testbytestring} { testfindfirst [testbytestring abcbc] 98 |
