diff options
-rw-r--r-- | generic/tclTest.c | 2 | ||||
-rw-r--r-- | generic/tclUtf.c | 27 | ||||
-rw-r--r-- | tests/utf.test | 15 |
3 files changed, 26 insertions, 18 deletions
diff --git a/generic/tclTest.c b/generic/tclTest.c index 547dc9a..ebd90ae 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -6836,7 +6836,7 @@ TestNumUtfCharsCmd( int len = -1; if (objc > 2) { - (void) Tcl_GetStringFromObj(objv[1], &len); + (void) Tcl_GetIntFromObj(interp, objv[2], &len); } len = Tcl_NumUtfChars(Tcl_GetString(objv[1]), len); Tcl_SetObjResult(interp, Tcl_NewIntObj(len)); diff --git a/generic/tclUtf.c b/generic/tclUtf.c index c46d250..161a4bd 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -459,7 +459,6 @@ Tcl_NumUtfChars( * for strlen(string). */ { Tcl_UniChar ch; - register Tcl_UniChar *chPtr = &ch; register int i; /* @@ -472,23 +471,25 @@ Tcl_NumUtfChars( i = 0; if (length < 0) { while (*src != '\0') { - src += TclUtfToUniChar(src, chPtr); + src += TclUtfToUniChar(src, &ch); i++; } + if (i < 0) i = INT_MAX; /* Bug [2738427] */ } else { - register int n; - - while (length > 0) { - if (UCHAR(*src) < 0xC0) { - length--; - src++; - } else { - n = Tcl_UtfToUniChar(src, chPtr); - length -= n; - src += n; - } + register const char *endPtr = src + length - TCL_UTF_MAX; + + while (src < endPtr) { + src += TclUtfToUniChar(src, &ch); i++; } + endPtr += TCL_UTF_MAX; + while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) { + src += TclUtfToUniChar(src, &ch); + i++; + } + if (src < endPtr) { + i += endPtr - src; + } } return i; } diff --git a/tests/utf.test b/tests/utf.test index 28981d6..422ab08 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -99,17 +99,24 @@ test utf-4.4 {Tcl_NumUtfChars: #u0000} {testnumutfchars testbytestring} { testnumutfchars [testbytestring "\xC0\x80"] } {1} test utf-4.5 {Tcl_NumUtfChars: zero length, calc len} testnumutfchars { - testnumutfchars "" 1 + testnumutfchars "" 0 } {0} test utf-4.6 {Tcl_NumUtfChars: length 1, calc len} {testnumutfchars testbytestring} { - testnumutfchars [testbytestring "\xC2\xA2"] 1 + testnumutfchars [testbytestring "\xC2\xA2"] 2 } {1} test utf-4.7 {Tcl_NumUtfChars: long string, calc len} {testnumutfchars testbytestring} { - testnumutfchars [testbytestring "abc\xC2\xA2\xe4\xb9\x8e\uA2\u4e4e"] 1 + testnumutfchars [testbytestring "abc\xC2\xA2\xe4\xb9\x8e\uA2\u4e4e"] 10 } {7} test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} {testnumutfchars testbytestring} { - testnumutfchars [testbytestring "\xC0\x80"] 1 + testnumutfchars [testbytestring "\xC0\x80"] 2 } {1} +# Bug [2738427]: Tcl_NumUtfChars(...) no overflow check +test utf-4.9 {Tcl_NumUtfChars: #u20AC, calc len, incomplete} {testnumutfchars testbytestring} { + testnumutfchars [testbytestring "\xE2\x82\xAC"] 2 +} {2} +test utf-4.10 {Tcl_NumUtfChars: #u0000, calc len, overcomplete} {testnumutfchars testbytestring} { + testnumutfchars [testbytestring "\x00"] 2 +} {2} test utf-5.1 {Tcl_UtfFindFirsts} { } {} |