diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2019-02-19 19:38:10 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2019-02-19 19:38:10 (GMT) |
commit | 9589c85462da7e8d01fe0154de892c6d30d92f0d (patch) | |
tree | 748e6bc9110e6e510384467e0bddc482b5037aaa /generic/tclUtf.c | |
parent | 2473a591bfbd5b346e1900e3c1088496b0d17590 (diff) | |
download | tcl-9589c85462da7e8d01fe0154de892c6d30d92f0d.zip tcl-9589c85462da7e8d01fe0154de892c6d30d92f0d.tar.gz tcl-9589c85462da7e8d01fe0154de892c6d30d92f0d.tar.bz2 |
Minor optimizations
Diffstat (limited to 'generic/tclUtf.c')
-rw-r--r-- | generic/tclUtf.c | 113 |
1 files changed, 58 insertions, 55 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 2227d45..6b63ecb 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -147,17 +147,17 @@ Tcl_UniCharToUtf( /* Low surrogate */ if (((buf[0] & 0xF8) == 0xF0) && ((buf[1] & 0xC0) == 0x80) && ((buf[2] & 0xCF) == 0)) { - /* Previous Tcl_UniChar was a High surrogate, so combine */ + /* Previous Tcl_UniChar was a high surrogate, so combine */ buf[3] = (char) ((ch & 0x3F) | 0x80); buf[2] |= (char) (((ch >> 6) & 0x0F) | 0x80); return 4; } - /* Previous Tcl_UniChar was not a High surrogate, so just output */ + /* Previous Tcl_UniChar was not a high surrogate, so just output */ } else { /* High surrogate */ ch += 0x40; /* Fill buffer with specific 3-byte (invalid) byte combination, - so following Low surrogate can recognize it and combine */ + so following low surrogate can recognize it and combine */ buf[2] = (char) ((ch << 4) & 0x30); buf[1] = (char) (((ch >> 2) & 0x3F) | 0x80); buf[0] = (char) (((ch >> 8) & 0x07) | 0xF0); @@ -232,15 +232,18 @@ Tcl_UniCharToUtfDString( wEnd = uniStr + uniLength; for (w = uniStr; w < wEnd; ) { if (!len && ((*w & 0xFC00) != 0xDC00)) { - /* Special case for handling upper surrogates. */ + /* Special case for handling high surrogates. */ p += Tcl_UniCharToUtf(-1, p); } len = Tcl_UniCharToUtf(*w, p); p += len; + if ((*w >= 0xD800) && (len < 3)) { + len = 0; /* Indication that high surrogate was found */ + } w++; } if (!len) { - /* Special case for handling upper surrogates. */ + /* Special case for handling high surrogates. */ p += Tcl_UniCharToUtf(-1, p); } Tcl_DStringSetLength(dsPtr, oldLength + (p - string)); @@ -296,7 +299,7 @@ Tcl_UtfToUniChar( register Tcl_UniChar *chPtr)/* Filled with the Tcl_UniChar represented by * the UTF-8 string. */ { - register int byte; + Tcl_UniChar byte; /* * Unroll 1 to 3 (or 4) byte UTF-8 sequences. @@ -326,10 +329,10 @@ Tcl_UtfToUniChar( return 3; } #endif - if ((unsigned)(byte-0x80) < (unsigned) 0x20) { - *chPtr = (Tcl_UniChar) cp1252[byte-0x80]; + if (byte-0x80 < 0x20) { + *chPtr = cp1252[byte-0x80]; } else { - *chPtr = (Tcl_UniChar) byte; + *chPtr = byte; } return 1; } else if (byte < 0xE0) { @@ -338,7 +341,7 @@ Tcl_UtfToUniChar( * Two-byte-character lead-byte followed by a trail-byte. */ - *chPtr = (Tcl_UniChar) (((byte & 0x1F) << 6) | (src[1] & 0x3F)); + *chPtr = (((byte & 0x1F) << 6) | (src[1] & 0x3F)); if ((unsigned)(*chPtr - 1) >= (UNICODE_SELF - 1)) { return 2; } @@ -354,7 +357,7 @@ Tcl_UtfToUniChar( * Three-byte-character lead byte followed by two trail bytes. */ - *chPtr = (Tcl_UniChar) (((byte & 0x0F) << 12) + *chPtr = (((byte & 0x0F) << 12) | ((src[1] & 0x3F) << 6) | (src[2] & 0x3F)); if (*chPtr > 0x7FF) { return 3; @@ -374,7 +377,7 @@ Tcl_UtfToUniChar( #if TCL_UTF_MAX <= 4 byte = (((byte & 0x07) << 8) | ((src[1] & 0x3F) << 2) | ((src[2] & 0x3F) >> 4)) - 0x40; - if ((unsigned) byte >= 0x400) { + if (byte >= 0x400) { /* out of range, < 0x10000 or > 0x10ffff */ } else { /* produce high surrogate, advance source pointer */ @@ -382,9 +385,9 @@ Tcl_UtfToUniChar( return 1; } #else - *chPtr = (Tcl_UniChar) (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12) + *chPtr = (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12) | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F)); - if ((unsigned)(*chPtr - 0x10000) <= 0xFFFFF) { + if ((*chPtr - 0x10000) <= 0xFFFFF) { return 4; } #endif @@ -396,7 +399,7 @@ Tcl_UtfToUniChar( */ } - *chPtr = (Tcl_UniChar) byte; + *chPtr = byte; return 1; } @@ -457,8 +460,8 @@ Tcl_UtfToUniCharDString( while (p < end) { if (Tcl_UtfCharComplete(p, end-p)) { p += TclUtfToUniChar(p, &ch); - } else if ((unsigned)((UCHAR(*p)-0x80)) < (unsigned) 0x20) { - ch = (Tcl_UniChar) cp1252[UCHAR(*p++)-0x80]; + } else if (((UCHAR(*p)-0x80)) < 0x20) { + ch = cp1252[UCHAR(*p++)-0x80]; } else { ch = UCHAR(*p++); } @@ -589,7 +592,7 @@ Tcl_UtfFindFirst( len = TclUtfToUniChar(src, &find); fullchar = find; #if TCL_UTF_MAX <= 4 - if ((len == 1) && ((ch & 0xFC00) == 0xD800)) { + if ((ch >= 0xD800) && (len < 3)) { len += TclUtfToUniChar(src + len, &find); fullchar = (((fullchar & 0x3ff) << 10) | (find & 0x3ff)) + 0x10000; } @@ -637,7 +640,7 @@ Tcl_UtfFindLast( len = TclUtfToUniChar(src, &find); fullchar = find; #if TCL_UTF_MAX <= 4 - if ((len == 1) && ((ch & 0xFC00) == 0xD800)) { + if ((ch >= 0xD800) && (len < 3)) { len += TclUtfToUniChar(src + len, &find); fullchar = (((fullchar & 0x3ff) << 10) | (find & 0x3ff)) + 0x10000; } @@ -680,8 +683,8 @@ Tcl_UtfNext( int len = TclUtfToUniChar(src, &ch); #if TCL_UTF_MAX <= 4 - if ((len == 1) && ((ch & 0xFC00) == 0xD800)) { - len += TclUtfToUniChar(src + len, &ch); + if ((ch >= 0xD800) && (len < 3)) { + len += TclUtfToUniChar(src + len, &ch); } #endif return src + len; @@ -774,8 +777,8 @@ Tcl_UniCharAtIndex( } fullchar = ch; #if TCL_UTF_MAX <= 4 - if ((len == 1) && ((ch & 0xFC00) == 0xD800)) { - /* If last Tcl_UniChar was an upper surrogate, combine with lower surrogate */ + if ((ch >= 0xD800) && (len < 3)) { + /* If last Tcl_UniChar was an high surrogate, combine with low surrogate */ (void)TclUtfToUniChar(src + len, &ch); fullchar = (((fullchar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000; } @@ -815,7 +818,7 @@ Tcl_UtfAtIndex( src += len; } #if TCL_UTF_MAX <= 4 - if ((len == 1) && ((ch & 0xFC00) == 0xD800)) { + if ((ch >= 0xD800) && (len < 3)) { /* Index points at character following High Surrogate */ src += TclUtfToUniChar(src, &ch); } @@ -901,7 +904,7 @@ Tcl_UtfToUpper( Tcl_UniChar ch = 0; int upChar; char *src, *dst; - int bytes; + int len; /* * Iterate over the string until we hit the terminating null. @@ -909,11 +912,11 @@ Tcl_UtfToUpper( src = dst = str; while (*src) { - bytes = TclUtfToUniChar(src, &ch); + len = TclUtfToUniChar(src, &ch); upChar = ch; #if TCL_UTF_MAX <= 4 - if ((bytes == 1) && ((ch & 0xFC00) == 0xD800)) { - bytes += TclUtfToUniChar(src + bytes, &ch); + if ((ch >= 0xD800) && (len < 3)) { + len += TclUtfToUniChar(src + len, &ch); /* Combine surrogates */ upChar = (((upChar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000; } @@ -926,13 +929,13 @@ Tcl_UtfToUpper( * char to dst if its size is <= the original char. */ - if ((bytes < TclUtfCount(upChar)) || ((upChar & 0xF800) == 0xD800)) { - memcpy(dst, src, (size_t) bytes); - dst += bytes; + if ((len < TclUtfCount(upChar)) || ((upChar & 0xF800) == 0xD800)) { + memcpy(dst, src, len); + dst += len; } else { dst += Tcl_UniCharToUtf(upChar, dst); } - src += bytes; + src += len; } *dst = '\0'; return (dst - str); @@ -963,7 +966,7 @@ Tcl_UtfToLower( Tcl_UniChar ch = 0; int lowChar; char *src, *dst; - int bytes; + int len; /* * Iterate over the string until we hit the terminating null. @@ -971,11 +974,11 @@ Tcl_UtfToLower( src = dst = str; while (*src) { - bytes = TclUtfToUniChar(src, &ch); + len = TclUtfToUniChar(src, &ch); lowChar = ch; #if TCL_UTF_MAX <= 4 - if ((bytes == 1) && ((ch & 0xFC00) == 0xD800)) { - bytes += TclUtfToUniChar(src + bytes, &ch); + if ((ch >= 0xD800) && (len < 3)) { + len += TclUtfToUniChar(src + len, &ch); /* Combine surrogates */ lowChar = (((lowChar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000; } @@ -988,13 +991,13 @@ Tcl_UtfToLower( * char to dst if its size is <= the original char. */ - if ((bytes < TclUtfCount(lowChar)) || ((lowChar & 0xF800) == 0xD800)) { - memcpy(dst, src, (size_t) bytes); - dst += bytes; + if ((len < TclUtfCount(lowChar)) || ((lowChar & 0xF800) == 0xD800)) { + memcpy(dst, src, len); + dst += len; } else { dst += Tcl_UniCharToUtf(lowChar, dst); } - src += bytes; + src += len; } *dst = '\0'; return (dst - str); @@ -1026,7 +1029,7 @@ Tcl_UtfToTitle( Tcl_UniChar ch = 0; int titleChar, lowChar; char *src, *dst; - int bytes; + int len; /* * Capitalize the first character and then lowercase the rest of the @@ -1036,31 +1039,31 @@ Tcl_UtfToTitle( src = dst = str; if (*src) { - bytes = TclUtfToUniChar(src, &ch); + len = TclUtfToUniChar(src, &ch); titleChar = ch; #if TCL_UTF_MAX <= 4 - if ((bytes == 1) && ((ch & 0xFC00) == 0xD800)) { - bytes += TclUtfToUniChar(src + bytes, &ch); + if ((ch >= 0xD800) && (len < 3)) { + len += TclUtfToUniChar(src + len, &ch); /* Combine surrogates */ titleChar = (((titleChar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000; } #endif titleChar = Tcl_UniCharToTitle(titleChar); - if ((bytes < TclUtfCount(titleChar)) || ((titleChar & 0xF800) == 0xD800)) { - memcpy(dst, src, (size_t) bytes); - dst += bytes; + if ((len < TclUtfCount(titleChar)) || ((titleChar & 0xF800) == 0xD800)) { + memcpy(dst, src, len); + dst += len; } else { dst += Tcl_UniCharToUtf(titleChar, dst); } - src += bytes; + src += len; } while (*src) { - bytes = TclUtfToUniChar(src, &ch); + len = TclUtfToUniChar(src, &ch); lowChar = ch; #if TCL_UTF_MAX <= 4 - if ((bytes == 1) && ((ch & 0xFC00) == 0xD800)) { - bytes += TclUtfToUniChar(src + bytes, &ch); + if ((ch >= 0xD800) && (len < 3)) { + len += TclUtfToUniChar(src + len, &ch); /* Combine surrogates */ lowChar = (((lowChar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000; } @@ -1070,13 +1073,13 @@ Tcl_UtfToTitle( lowChar = Tcl_UniCharToLower(lowChar); } - if ((bytes < TclUtfCount(lowChar)) || ((lowChar & 0xF800) == 0xD800)) { - memcpy(dst, src, (size_t) bytes); - dst += bytes; + if ((len < TclUtfCount(lowChar)) || ((lowChar & 0xF800) == 0xD800)) { + memcpy(dst, src, len); + dst += len; } else { dst += Tcl_UniCharToUtf(lowChar, dst); } - src += bytes; + src += len; } *dst = '\0'; return (dst - str); |