diff options
-rw-r--r-- | generic/tclUtf.c | 27 | ||||
-rw-r--r-- | tests/utf.test | 6 |
2 files changed, 19 insertions, 14 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 52b4291..b13ad75 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -465,7 +465,6 @@ Tcl_NumUtfChars( * for strlen(string). */ { Tcl_UniChar ch; - register Tcl_UniChar *chPtr = &ch; register int i; /* @@ -478,23 +477,25 @@ Tcl_NumUtfChars( i = 0; if (length < 0) { while (*src != '\0') { - src += TclUtfToUniChar(src, chPtr); + src += TclUtfToUniChar(src, &ch); i++; } + if (i < 0) i = INT_MAX; /* Bug [2738427] */ } else { - register int n; - - while (length > 0) { - if (UCHAR(*src) < 0xC0) { - length--; - src++; - } else { - n = Tcl_UtfToUniChar(src, chPtr); - length -= n; - src += n; - } + register const char *endPtr = src + length - TCL_UTF_MAX; + + while (src < endPtr) { + src += TclUtfToUniChar(src, &ch); i++; } + endPtr += TCL_UTF_MAX; + while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) { + src += TclUtfToUniChar(src, &ch); + i++; + } + if (src < endPtr) { + i += endPtr - src; + } } return i; } diff --git a/tests/utf.test b/tests/utf.test index f677438..422ab08 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -110,9 +110,13 @@ test utf-4.7 {Tcl_NumUtfChars: long string, calc len} {testnumutfchars testbytes test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} {testnumutfchars testbytestring} { testnumutfchars [testbytestring "\xC0\x80"] 2 } {1} -test utf-4.9 {Tcl_NumUtfChars: #u20AC, calc len, incomplete} {knownBug testnumutfchars testbytestring} { +# Bug [2738427]: Tcl_NumUtfChars(...) no overflow check +test utf-4.9 {Tcl_NumUtfChars: #u20AC, calc len, incomplete} {testnumutfchars testbytestring} { testnumutfchars [testbytestring "\xE2\x82\xAC"] 2 } {2} +test utf-4.10 {Tcl_NumUtfChars: #u0000, calc len, overcomplete} {testnumutfchars testbytestring} { + testnumutfchars [testbytestring "\x00"] 2 +} {2} test utf-5.1 {Tcl_UtfFindFirsts} { } {} |