diff options
| -rw-r--r-- | doc/ListObj.3 | 4 | ||||
| -rw-r--r-- | generic/tclUtf.c | 11 | ||||
| -rw-r--r-- | tests/utf.test | 18 |
3 files changed, 14 insertions, 19 deletions
diff --git a/doc/ListObj.3 b/doc/ListObj.3 index dc1ba53..ab836d8 100644 --- a/doc/ListObj.3 +++ b/doc/ListObj.3 @@ -138,7 +138,9 @@ create a new value or modify an existing value to hold the \fIobjc\fR elements of the array referenced by \fIobjv\fR where each element is a pointer to a Tcl value. If \fIobjc\fR is less than or equal to zero, -they return an empty value. +they return an empty value. If \fIobjv\fR is NULL, the resulting list +contains 0 elements, with reserved space in an internal representation +for \fIobjc\fR more elements (to avoid its reallocation later). The new value's string representation is left invalid. The two procedures increment the reference counts of the elements in \fIobjc\fR since the list value now refers to them. diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 0e11e0e..96953e2 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -64,7 +64,7 @@ static const unsigned char totalBytes[256] = { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -#if TCL_UTF_MAX > 4 +#if TCL_UTF_MAX != 4 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, #else /* Tcl_UtfCharComplete() might point to 2nd byte of valid 4-byte sequence */ @@ -733,13 +733,6 @@ Tcl_UtfNext( int left = totalBytes[UCHAR(*src)]; const char *next = src + 1; - if (((*src) & 0xC0) == 0x80) { - if ((((*++src) & 0xC0) == 0x80) && (((*++src) & 0xC0) == 0x80)) { - ++src; - } - return src; - } - while (--left) { if ((*next & 0xC0) != 0x80) { /* @@ -751,7 +744,7 @@ Tcl_UtfNext( } next++; } - if (Invalid((unsigned char *)src)) { + if ((next == src + 1) || Invalid((unsigned char *)src)) { return src + 1; } return next; diff --git a/tests/utf.test b/tests/utf.test index cb650f4..fc0766d 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -97,14 +97,14 @@ test utf-2.8 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {tip389 testb test utf-2.9 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {tip389 testbytestring} { string length [testbytestring "\xF4\x8F\xBF\xBF"] } 2 -test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} testbytestring { +test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} {testbytestring ucs2} { string length [testbytestring "\xF0\x8F\xBF\xBF"] } 4 -test utf-2.11 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, overflow} testbytestring { +test utf-2.11 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, overflow} {testbytestring ucs2} { # Would decode to U+110000 but that is outside the Unicode range. string length [testbytestring "\xF4\x90\x80\x80"] } 4 -test utf-2.12 {Tcl_UtfToUniChar: longer UTF sequences not supported} testbytestring { +test utf-2.12 {Tcl_UtfToUniChar: longer UTF sequences not supported} {testbytestring ucs2} { string length [testbytestring "\xF8\xA2\xA2\xA2\xA2"] } 5 @@ -193,7 +193,7 @@ test utf-6.10 {Tcl_UtfNext} testutfnext { } 1 test utf-6.11 {Tcl_UtfNext} testutfnext { testutfnext -bytestring \xA0\xA0 -} 2 +} 1 test utf-6.12 {Tcl_UtfNext} testutfnext { testutfnext -bytestring \xA0\xD0 } 1 @@ -448,10 +448,10 @@ test utf-6.87.1 {Tcl_UtfNext - overlong sequences} {testutfnext fullutf} { } 4 test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} testutfnext { testutfnext -bytestring \xA0\xA0 -} 2 +} 1 test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} testutfnext { testutfnext -bytestring \x80\x80 -} 2 +} 1 test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext ucs2} { testutfnext -bytestring \xF4\x8F\xBF\xBF } 1 @@ -466,10 +466,10 @@ test utf-6.91.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext fullutf} } 1 test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} testutfnext { testutfnext -bytestring \xA0\xA0\xA0 -} 3 -test utf-6.93 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} testutfnext { +} 1 +test utf-6.93 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext ucs2} { testutfnext -bytestring \x80\x80\x80 -} 3 +} 1 test utf-7.1 {Tcl_UtfPrev} testutfprev { testutfprev {} |
