Fix first part of [ed29806baf]: Tcl_UtfToUniChar reads more than TCL_UTF_MAX bytes.

Tcl_UtfToUniChar() now never reads more than TCL_UTF_MAX bytes any more. The UtfToUtf encoder/decoder is adapted to do attitional checks (more tricky than in Tcl 8.7, since we want compatibility with earlier 8.6 releases). Other callers of Tcl_UtfToUniChar() needs to be revised for the same problem. Most callers will need to change Tcl_UtfToUniChar() -> TclUtfToUCS4() and Tcl_UtfCharComplete() -> TclUCS4Complete(), but that's not done yet.
author: jan.nijtmans <nijtmans@users.sourceforge.net> 2020-05-01 13:38:22 (GMT)
committer: jan.nijtmans <nijtmans@users.sourceforge.net> 2020-05-01 13:38:22 (GMT)
commit: 9eaf82b745ac07bc55f7238813c449fc5a447cf8 (patch)
tree: f421a15863ac0ae1148013bf95a401b8eeba0357 /generic/tclUtf.c
parent: ba28f4892362a62309d8809b4dc5099a888a9f91 (diff)
parent: 62c00ac54a6f93ad1324d7e7aa5ef43623ca2415 (diff)
download: tcl-9eaf82b745ac07bc55f7238813c449fc5a447cf8.zip
tcl-9eaf82b745ac07bc55f7238813c449fc5a447cf8.tar.gz
tcl-9eaf82b745ac07bc55f7238813c449fc5a447cf8.tar.bz2
1 files changed, 10 insertions, 10 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 712beaa..9ffbfba 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -431,17 +431,17 @@ Tcl_UtfToUniChar(
 	if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80)) {
 	    /*
 	     * Four-byte-character lead byte followed by at least two trail bytes.
-	     * (validity of 3th trail byte will be tested later)
+	     * We don't test the validity of 3th trail byte, see [ed29806ba]
 	     */
 #if TCL_UTF_MAX <= 4
 	    Tcl_UniChar high = (((byte & 0x07) << 8) | ((src[1] & 0x3F) << 2)
 		    | ((src[2] & 0x3F) >> 4)) - 0x40;
-	    if ((high < 0x400) && ((src[3] & 0xC0) == 0x80)) {
+	    if (high < 0x400) {
 		/* produce high surrogate, advance source pointer */
 		*chPtr = 0xD800 + high;
 		return 1;
 	    }
-	    /* out of range, < 0x10000 or > 0x10FFFF or invalid 3th byte */
+	    /* out of range, < 0x10000 or > 0x10FFFF */
 #else
 	    if ((src[3] & 0xC0) == 0x80) {
 		*chPtr = (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12)
@@ -557,7 +557,7 @@ Tcl_UtfCharComplete(
 				 * a complete UTF-8 character. */
     int length)			/* Length of above string in bytes. */
 {
-    return length >= totalBytes[(unsigned char)*src];
+    return length >= totalBytes[UCHAR(*src)];
 }
 
 /*
@@ -604,7 +604,7 @@ Tcl_NumUtfChars(
 	register const char *endPtr = src + length - TCL_UTF_MAX;
 
 	while (src < endPtr) {
-	    if (((unsigned)(unsigned char)*src - 0xF0) < 5) {
+	    if (((unsigned)UCHAR(*src) - 0xF0) < 5) {
 		/* treat F0 - F4 as single character */
 		ch = 0;
 		src++;
@@ -615,7 +615,7 @@ Tcl_NumUtfChars(
 	}
 	endPtr += TCL_UTF_MAX;
 	while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) {
-	    if (((unsigned)(unsigned char)*src - 0xF0) < 5) {
+	    if (((unsigned)UCHAR(*src) - 0xF0) < 5) {
 		/* treat F0 - F4 as single character */
 		ch = 0;
 		src++;
@@ -1031,7 +1031,7 @@ Tcl_UtfToUpper(
 	 * char to dst if its size is <= the original char.
 	 */
 
-	if (len < UtfCount(upChar) || ((upChar & 0xF800) == 0xD800)) {
+	if (len < UtfCount(upChar) || ((upChar & ~0x7FF) == 0xD800)) {
 	    memmove(dst, src, len);
 	    dst += len;
 	} else {
@@ -1084,7 +1084,7 @@ Tcl_UtfToLower(
 	 * char to dst if its size is <= the original char.
 	 */
 
-	if (len < UtfCount(lowChar) || ((lowChar & 0xF800) == 0xD800)) {
+	if (len < UtfCount(lowChar) || ((lowChar & ~0x7FF) == 0xD800)) {
 	    memmove(dst, src, len);
 	    dst += len;
 	} else {
@@ -1134,7 +1134,7 @@ Tcl_UtfToTitle(
 	len = TclUtfToUCS4(src, &ch);
 	titleChar = UCS4ToTitle(ch);
 
-	if (len < UtfCount(titleChar) || ((titleChar & 0xF800) == 0xD800)) {
+	if (len < UtfCount(titleChar) || ((titleChar & ~0x7FF) == 0xD800)) {
 	    memmove(dst, src, len);
 	    dst += len;
 	} else {
@@ -1150,7 +1150,7 @@ Tcl_UtfToTitle(
 	    lowChar = UCS4ToLower(lowChar);
 	}
 
-	if (len < UtfCount(lowChar) || ((lowChar & 0xF800) == 0xD800)) {
+	if (len < UtfCount(lowChar) || ((lowChar & ~0x7FF) == 0xD800)) {
 	    memmove(dst, src, len);
 	    dst += len;
 	} else {
author	jan.nijtmans <nijtmans@users.sourceforge.net>	2020-05-01 13:38:22 (GMT)
committer	jan.nijtmans <nijtmans@users.sourceforge.net>	2020-05-01 13:38:22 (GMT)
commit	9eaf82b745ac07bc55f7238813c449fc5a447cf8 (patch)
tree	f421a15863ac0ae1148013bf95a401b8eeba0357 /generic/tclUtf.c
parent	ba28f4892362a62309d8809b4dc5099a888a9f91 (diff)
parent	62c00ac54a6f93ad1324d7e7aa5ef43623ca2415 (diff)
download	tcl-9eaf82b745ac07bc55f7238813c449fc5a447cf8.zip tcl-9eaf82b745ac07bc55f7238813c449fc5a447cf8.tar.gz tcl-9eaf82b745ac07bc55f7238813c449fc5a447cf8.tar.bz2