diff options
-rw-r--r-- | generic/tclUtf.c | 36 | ||||
-rw-r--r-- | tests/encoding.test | 2 |
2 files changed, 19 insertions, 19 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c index eec4068..c46d250 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -103,7 +103,7 @@ TclUtfCount( return 2; } #if TCL_UTF_MAX > 3 - if (((unsigned)(ch - 0x10000) <= 0xfffff)) { + if (((unsigned)(ch - 0x10000) <= 0xFFFFF)) { return 4; } #endif @@ -292,7 +292,7 @@ Tcl_UtfToUniChar( */ *chPtr = (Tcl_UniChar) (((byte & 0x1F) << 6) | (src[1] & 0x3F)); - if ((*chPtr == 0) || (*chPtr > 0x7f)) { + if ((unsigned)(*chPtr - 1) >= (UNICODE_SELF - 1)) { return 2; } } @@ -309,7 +309,7 @@ Tcl_UtfToUniChar( *chPtr = (Tcl_UniChar) (((byte & 0x0F) << 12) | ((src[1] & 0x3F) << 6) | (src[2] & 0x3F)); - if (*chPtr > 0x7ff) { + if (*chPtr > 0x7FF) { return 3; } } @@ -326,15 +326,15 @@ Tcl_UtfToUniChar( * Four-byte-character lead byte followed by three trail bytes. */ - *chPtr = (Tcl_UniChar) (((byte & 0x0E) << 18) | ((src[1] & 0x3F) << 12) + *chPtr = (Tcl_UniChar) (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12) | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F)); - if ((*chPtr <= 0x10ffff) && (*chPtr > 0xffff)) { + if ((unsigned)(*chPtr - 0x10000) <= 0xFFFFF) { return 4; } } /* - * A three-byte-character lead-byte not followed by two trail-bytes + * A four-byte-character lead-byte not followed by two trail-bytes * represents itself. */ } @@ -1010,7 +1010,7 @@ Tcl_UtfNcmp( /* * Cannot use 'memcmp(cs, ct, n);' as byte representation of \u0000 (the - * pair of bytes 0xc0,0x80) is larger than byte representation of \u0001 + * pair of bytes 0xC0,0x80) is larger than byte representation of \u0001 * (the byte 0x01.) */ @@ -1394,11 +1394,11 @@ Tcl_UniCharIsControl( { #if TCL_UTF_MAX > 3 if (UNICODE_OUT_OF_RANGE(ch)) { - ch &= 0x1fffff; - if ((ch == 0xe0001) || ((ch >= 0xe0020) && (ch <= 0xe007f))) { + ch &= 0x1FFFFF; + if ((ch == 0xE0001) || ((ch >= 0xE0020) && (ch <= 0xE007f))) { return 1; } - if ((ch >= 0xf0000) && ((ch & 0xffff) <= 0xfffd)) { + if ((ch >= 0xF0000) && ((ch & 0xFFFF) <= 0xFFFD)) { return 1; } return 0; @@ -1457,8 +1457,8 @@ Tcl_UniCharIsGraph( { #if TCL_UTF_MAX > 3 if (UNICODE_OUT_OF_RANGE(ch)) { - ch &= 0x1fffff; - return (ch >= 0xe0100) && (ch <= 0xe01ef); + ch &= 0x1FFFFF; + return (ch >= 0xE0100) && (ch <= 0xE01EF); } #endif return ((GRAPH_BITS >> GetCategory(ch)) & 1); @@ -1514,8 +1514,8 @@ Tcl_UniCharIsPrint( { #if TCL_UTF_MAX > 3 if (UNICODE_OUT_OF_RANGE(ch)) { - ch &= 0x1fffff; - return (ch >= 0xe0100) && (ch <= 0xe01ef); + ch &= 0x1FFFFF; + return (ch >= 0xE0100) && (ch <= 0xE01EF); } #endif return (((GRAPH_BITS|SPACE_BITS) >> GetCategory(ch)) & 1); @@ -1571,10 +1571,10 @@ Tcl_UniCharIsSpace( { #if TCL_UTF_MAX > 3 /* Ignore upper 11 bits. */ - ch &= 0x1fffff; + ch &= 0x1FFFFF; #else /* Ignore upper 16 bits. */ - ch &= 0xffff; + ch &= 0xFFFF; #endif /* @@ -1588,8 +1588,8 @@ Tcl_UniCharIsSpace( } else if (UNICODE_OUT_OF_RANGE(ch)) { return 0; #endif - } else if (ch == 0x0085 || ch == 0x180e || ch == 0x200b - || ch == 0x202f || ch == 0x2060 || ch == 0xfeff) { + } else if (ch == 0x0085 || ch == 0x180E || ch == 0x200B + || ch == 0x202F || ch == 0x2060 || ch == 0xFEFF) { return 1; } else { return ((SPACE_BITS >> GetCategory(ch)) & 1); diff --git a/tests/encoding.test b/tests/encoding.test index eb2a08f..49555b6 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -469,7 +469,7 @@ test encoding-24.9 {Parse valid or invalid utf-8} { test encoding-24.10 {Parse valid or invalid utf-8} { string length [encoding convertfrom utf-8 "\xe0\xa0\x80"] } 1 -test encoding-24.10 {Parse valid or invalid utf-8} { +test encoding-24.11 {Parse valid or invalid utf-8} { string length [encoding convertfrom utf-8 "\xef\xbf\xbf"] } 1 |