Minor optimizations

author: jan.nijtmans <nijtmans@users.sourceforge.net> 2019-02-19 19:38:10 (GMT)
committer: jan.nijtmans <nijtmans@users.sourceforge.net> 2019-02-19 19:38:10 (GMT)
commit: 9589c85462da7e8d01fe0154de892c6d30d92f0d (patch)
tree: 748e6bc9110e6e510384467e0bddc482b5037aaa /generic/tclUtf.c
parent: 2473a591bfbd5b346e1900e3c1088496b0d17590 (diff)
download: tcl-9589c85462da7e8d01fe0154de892c6d30d92f0d.zip
tcl-9589c85462da7e8d01fe0154de892c6d30d92f0d.tar.gz
tcl-9589c85462da7e8d01fe0154de892c6d30d92f0d.tar.bz2
1 files changed, 58 insertions, 55 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 2227d45..6b63ecb 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -147,17 +147,17 @@ Tcl_UniCharToUtf(
 		    /* Low surrogate */
 		    if (((buf[0] & 0xF8) == 0xF0) && ((buf[1] & 0xC0) == 0x80)
 			    && ((buf[2] & 0xCF) == 0)) {
-			/* Previous Tcl_UniChar was a High surrogate, so combine */
+			/* Previous Tcl_UniChar was a high surrogate, so combine */
 			buf[3] = (char) ((ch & 0x3F) | 0x80);
 			buf[2] |= (char) (((ch >> 6) & 0x0F) | 0x80);
 			return 4;
 		    }
-		    /* Previous Tcl_UniChar was not a High surrogate, so just output */
+		    /* Previous Tcl_UniChar was not a high surrogate, so just output */
 		} else {
 		    /* High surrogate */
 		    ch += 0x40;
 		    /* Fill buffer with specific 3-byte (invalid) byte combination,
-		       so following Low surrogate can recognize it and combine */
+		       so following low surrogate can recognize it and combine */
 		    buf[2] = (char) ((ch << 4) & 0x30);
 		    buf[1] = (char) (((ch >> 2) & 0x3F) | 0x80);
 		    buf[0] = (char) (((ch >> 8) & 0x07) | 0xF0);
@@ -232,15 +232,18 @@ Tcl_UniCharToUtfDString(
     wEnd = uniStr + uniLength;
     for (w = uniStr; w < wEnd; ) {
 	if (!len && ((*w & 0xFC00) != 0xDC00)) {
-	    /* Special case for handling upper surrogates. */
+	    /* Special case for handling high surrogates. */
 	    p += Tcl_UniCharToUtf(-1, p);
 	}
 	len = Tcl_UniCharToUtf(*w, p);
 	p += len;
+	if ((*w >= 0xD800) && (len < 3)) {
+	    len = 0; /* Indication that high surrogate was found */
+	}
 	w++;
     }
     if (!len) {
-	/* Special case for handling upper surrogates. */
+	/* Special case for handling high surrogates. */
 	p += Tcl_UniCharToUtf(-1, p);
     }
     Tcl_DStringSetLength(dsPtr, oldLength + (p - string));
@@ -296,7 +299,7 @@ Tcl_UtfToUniChar(
     register Tcl_UniChar *chPtr)/* Filled with the Tcl_UniChar represented by
 				 * the UTF-8 string. */
 {
-    register int byte;
+    Tcl_UniChar byte;
 
     /*
      * Unroll 1 to 3 (or 4) byte UTF-8 sequences.
@@ -326,10 +329,10 @@ Tcl_UtfToUniChar(
 	    return 3;
 	}
 #endif
-	if ((unsigned)(byte-0x80) < (unsigned) 0x20) {
-	    *chPtr = (Tcl_UniChar) cp1252[byte-0x80];
+	if (byte-0x80 < 0x20) {
+	    *chPtr = cp1252[byte-0x80];
 	} else {
-	    *chPtr = (Tcl_UniChar) byte;
+	    *chPtr = byte;
 	}
 	return 1;
     } else if (byte < 0xE0) {
@@ -338,7 +341,7 @@ Tcl_UtfToUniChar(
 	     * Two-byte-character lead-byte followed by a trail-byte.
 	     */
 
-	    *chPtr = (Tcl_UniChar) (((byte & 0x1F) << 6) | (src[1] & 0x3F));
+	    *chPtr = (((byte & 0x1F) << 6) | (src[1] & 0x3F));
 	    if ((unsigned)(*chPtr - 1) >= (UNICODE_SELF - 1)) {
 		return 2;
 	    }
@@ -354,7 +357,7 @@ Tcl_UtfToUniChar(
 	     * Three-byte-character lead byte followed by two trail bytes.
 	     */
 
-	    *chPtr = (Tcl_UniChar) (((byte & 0x0F) << 12)
+	    *chPtr = (((byte & 0x0F) << 12)
 		    | ((src[1] & 0x3F) << 6) | (src[2] & 0x3F));
 	    if (*chPtr > 0x7FF) {
 		return 3;
@@ -374,7 +377,7 @@ Tcl_UtfToUniChar(
 #if TCL_UTF_MAX <= 4
 	    byte = (((byte & 0x07) << 8) | ((src[1] & 0x3F) << 2)
 		    | ((src[2] & 0x3F) >> 4)) - 0x40;
-	    if ((unsigned) byte >= 0x400) {
+	    if (byte >= 0x400) {
 		/* out of range, < 0x10000 or > 0x10ffff */
 	    } else {
 		/* produce high surrogate, advance source pointer */
@@ -382,9 +385,9 @@ Tcl_UtfToUniChar(
 		return 1;
 	    }
 #else
-	    *chPtr = (Tcl_UniChar) (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12)
+	    *chPtr = (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12)
 		    | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F));
-	    if ((unsigned)(*chPtr - 0x10000) <= 0xFFFFF) {
+	    if ((*chPtr - 0x10000) <= 0xFFFFF) {
 		return 4;
 	    }
 #endif
@@ -396,7 +399,7 @@ Tcl_UtfToUniChar(
 	 */
     }
 
-    *chPtr = (Tcl_UniChar) byte;
+    *chPtr = byte;
     return 1;
 }
 
@@ -457,8 +460,8 @@ Tcl_UtfToUniCharDString(
     while (p < end) {
 	if (Tcl_UtfCharComplete(p, end-p)) {
 	    p += TclUtfToUniChar(p, &ch);
-	} else if ((unsigned)((UCHAR(*p)-0x80)) < (unsigned) 0x20) {
-	    ch = (Tcl_UniChar) cp1252[UCHAR(*p++)-0x80];
+	} else if (((UCHAR(*p)-0x80)) < 0x20) {
+	    ch = cp1252[UCHAR(*p++)-0x80];
 	} else {
 	    ch = UCHAR(*p++);
 	}
@@ -589,7 +592,7 @@ Tcl_UtfFindFirst(
 	len = TclUtfToUniChar(src, &find);
 	fullchar = find;
 #if TCL_UTF_MAX <= 4
-	if ((len == 1) && ((ch & 0xFC00) == 0xD800)) {
+	if ((ch >= 0xD800) && (len < 3)) {
 	    len += TclUtfToUniChar(src + len, &find);
 	    fullchar = (((fullchar & 0x3ff) << 10) | (find & 0x3ff)) + 0x10000;
 	}
@@ -637,7 +640,7 @@ Tcl_UtfFindLast(
 	len = TclUtfToUniChar(src, &find);
 	fullchar = find;
 #if TCL_UTF_MAX <= 4
-	if ((len == 1) && ((ch & 0xFC00) == 0xD800)) {
+	if ((ch >= 0xD800) && (len < 3)) {
 	    len += TclUtfToUniChar(src + len, &find);
 	    fullchar = (((fullchar & 0x3ff) << 10) | (find & 0x3ff)) + 0x10000;
 	}
@@ -680,8 +683,8 @@ Tcl_UtfNext(
     int len = TclUtfToUniChar(src, &ch);
 
 #if TCL_UTF_MAX <= 4
-    if ((len == 1) && ((ch & 0xFC00) == 0xD800)) {
-      len += TclUtfToUniChar(src + len, &ch);
+    if ((ch >= 0xD800) && (len < 3)) {
+	len += TclUtfToUniChar(src + len, &ch);
     }
 #endif
     return src + len;
@@ -774,8 +777,8 @@ Tcl_UniCharAtIndex(
     }
     fullchar = ch;
 #if TCL_UTF_MAX <= 4
-    if ((len == 1) && ((ch & 0xFC00) == 0xD800)) {
-	/* If last Tcl_UniChar was an upper surrogate, combine with lower surrogate */
+    if ((ch >= 0xD800) && (len < 3)) {
+	/* If last Tcl_UniChar was an high surrogate, combine with low surrogate */
 	(void)TclUtfToUniChar(src + len, &ch);
 	fullchar = (((fullchar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000;
     }
@@ -815,7 +818,7 @@ Tcl_UtfAtIndex(
 	src += len;
     }
 #if TCL_UTF_MAX <= 4
-    if ((len == 1) && ((ch & 0xFC00) == 0xD800)) {
+    if ((ch >= 0xD800) && (len < 3)) {
 	/* Index points at character following High Surrogate */
 	src += TclUtfToUniChar(src, &ch);
     }
@@ -901,7 +904,7 @@ Tcl_UtfToUpper(
     Tcl_UniChar ch = 0;
     int upChar;
     char *src, *dst;
-    int bytes;
+    int len;
 
     /*
      * Iterate over the string until we hit the terminating null.
@@ -909,11 +912,11 @@ Tcl_UtfToUpper(
 
     src = dst = str;
     while (*src) {
-	bytes = TclUtfToUniChar(src, &ch);
+	len = TclUtfToUniChar(src, &ch);
 	upChar = ch;
 #if TCL_UTF_MAX <= 4
-	if ((bytes == 1) && ((ch & 0xFC00) == 0xD800)) {
-	    bytes += TclUtfToUniChar(src + bytes, &ch);
+	if ((ch >= 0xD800) && (len < 3)) {
+	    len += TclUtfToUniChar(src + len, &ch);
 	    /* Combine surrogates */
 	    upChar = (((upChar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000;
 	}
@@ -926,13 +929,13 @@ Tcl_UtfToUpper(
 	 * char to dst if its size is <= the original char.
 	 */
 
-	if ((bytes < TclUtfCount(upChar)) || ((upChar & 0xF800) == 0xD800)) {
-	    memcpy(dst, src, (size_t) bytes);
-	    dst += bytes;
+	if ((len < TclUtfCount(upChar)) || ((upChar & 0xF800) == 0xD800)) {
+	    memcpy(dst, src, len);
+	    dst += len;
 	} else {
 	    dst += Tcl_UniCharToUtf(upChar, dst);
 	}
-	src += bytes;
+	src += len;
     }
     *dst = '\0';
     return (dst - str);
@@ -963,7 +966,7 @@ Tcl_UtfToLower(
     Tcl_UniChar ch = 0;
     int lowChar;
     char *src, *dst;
-    int bytes;
+    int len;
 
     /*
      * Iterate over the string until we hit the terminating null.
@@ -971,11 +974,11 @@ Tcl_UtfToLower(
 
     src = dst = str;
     while (*src) {
-	bytes = TclUtfToUniChar(src, &ch);
+	len = TclUtfToUniChar(src, &ch);
 	lowChar = ch;
 #if TCL_UTF_MAX <= 4
-	if ((bytes == 1) && ((ch & 0xFC00) == 0xD800)) {
-	    bytes += TclUtfToUniChar(src + bytes, &ch);
+	if ((ch >= 0xD800) && (len < 3)) {
+	    len += TclUtfToUniChar(src + len, &ch);
 	    /* Combine surrogates */
 	    lowChar = (((lowChar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000;
 	}
@@ -988,13 +991,13 @@ Tcl_UtfToLower(
 	 * char to dst if its size is <= the original char.
 	 */
 
-	if ((bytes < TclUtfCount(lowChar)) || ((lowChar & 0xF800) == 0xD800)) {
-	    memcpy(dst, src, (size_t) bytes);
-	    dst += bytes;
+	if ((len < TclUtfCount(lowChar)) || ((lowChar & 0xF800) == 0xD800)) {
+	    memcpy(dst, src, len);
+	    dst += len;
 	} else {
 	    dst += Tcl_UniCharToUtf(lowChar, dst);
 	}
-	src += bytes;
+	src += len;
     }
     *dst = '\0';
     return (dst - str);
@@ -1026,7 +1029,7 @@ Tcl_UtfToTitle(
     Tcl_UniChar ch = 0;
     int titleChar, lowChar;
     char *src, *dst;
-    int bytes;
+    int len;
 
     /*
      * Capitalize the first character and then lowercase the rest of the
@@ -1036,31 +1039,31 @@ Tcl_UtfToTitle(
     src = dst = str;
 
     if (*src) {
-	bytes = TclUtfToUniChar(src, &ch);
+	len = TclUtfToUniChar(src, &ch);
 	titleChar = ch;
 #if TCL_UTF_MAX <= 4
-	if ((bytes == 1) && ((ch & 0xFC00) == 0xD800)) {
-	    bytes += TclUtfToUniChar(src + bytes, &ch);
+	if ((ch >= 0xD800) && (len < 3)) {
+	    len += TclUtfToUniChar(src + len, &ch);
 	    /* Combine surrogates */
 	    titleChar = (((titleChar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000;
 	}
 #endif
 	titleChar = Tcl_UniCharToTitle(titleChar);
 
-	if ((bytes < TclUtfCount(titleChar)) || ((titleChar & 0xF800) == 0xD800)) {
-	    memcpy(dst, src, (size_t) bytes);
-	    dst += bytes;
+	if ((len < TclUtfCount(titleChar)) || ((titleChar & 0xF800) == 0xD800)) {
+	    memcpy(dst, src, len);
+	    dst += len;
 	} else {
 	    dst += Tcl_UniCharToUtf(titleChar, dst);
 	}
-	src += bytes;
+	src += len;
     }
     while (*src) {
-	bytes = TclUtfToUniChar(src, &ch);
+	len = TclUtfToUniChar(src, &ch);
 	lowChar = ch;
 #if TCL_UTF_MAX <= 4
-	if ((bytes == 1) && ((ch & 0xFC00) == 0xD800)) {
-	    bytes += TclUtfToUniChar(src + bytes, &ch);
+	if ((ch >= 0xD800) && (len < 3)) {
+	    len += TclUtfToUniChar(src + len, &ch);
 	    /* Combine surrogates */
 	    lowChar = (((lowChar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000;
 	}
@@ -1070,13 +1073,13 @@ Tcl_UtfToTitle(
 	    lowChar = Tcl_UniCharToLower(lowChar);
 	}
 
-	if ((bytes < TclUtfCount(lowChar)) || ((lowChar & 0xF800) == 0xD800)) {
-	    memcpy(dst, src, (size_t) bytes);
-	    dst += bytes;
+	if ((len < TclUtfCount(lowChar)) || ((lowChar & 0xF800) == 0xD800)) {
+	    memcpy(dst, src, len);
+	    dst += len;
 	} else {
 	    dst += Tcl_UniCharToUtf(lowChar, dst);
 	}
-	src += bytes;
+	src += len;
     }
     *dst = '\0';
     return (dst - str);
author	jan.nijtmans <nijtmans@users.sourceforge.net>	2019-02-19 19:38:10 (GMT)
committer	jan.nijtmans <nijtmans@users.sourceforge.net>	2019-02-19 19:38:10 (GMT)
commit	9589c85462da7e8d01fe0154de892c6d30d92f0d (patch)
tree	748e6bc9110e6e510384467e0bddc482b5037aaa /generic/tclUtf.c
parent	2473a591bfbd5b346e1900e3c1088496b0d17590 (diff)
download	tcl-9589c85462da7e8d01fe0154de892c6d30d92f0d.zip tcl-9589c85462da7e8d01fe0154de892c6d30d92f0d.tar.gz tcl-9589c85462da7e8d01fe0154de892c6d30d92f0d.tar.bz2