diff options
Diffstat (limited to 'generic/tclUtf.c')
-rw-r--r-- | generic/tclUtf.c | 90 |
1 files changed, 32 insertions, 58 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c index b878149..b33bf6a 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -73,28 +73,13 @@ static const unsigned char totalBytes[256] = { #else 1,1,1,1,1,1,1,1, #endif -#if TCL_UTF_MAX > 4 - 5,5,5,5, -#else - 1,1,1,1, -#endif -#if TCL_UTF_MAX > 5 - 6,6,6,6 -#else - 1,1,1,1 -#endif + 1,1,1,1,1,1,1,1 }; - -/* - * Functions used only in this module. - */ - -static int UtfCount(int ch); /* *--------------------------------------------------------------------------- * - * UtfCount -- + * TclUtfCount -- * * Find the number of bytes in the Utf character "ch". * @@ -107,18 +92,18 @@ static int UtfCount(int ch); *--------------------------------------------------------------------------- */ -INLINE static int -UtfCount( +int +TclUtfCount( int ch) /* The Tcl_UniChar whose size is returned. */ { - if ((ch > 0) && (ch < UNICODE_SELF)) { + if ((unsigned)(ch - 1) < (UNICODE_SELF - 1)) { return 1; } if (ch <= 0x7FF) { return 2; } #if TCL_UTF_MAX > 3 - if ((ch > 0xFFFF) && (ch <= 0x10FFFF)) { + if (((unsigned)(ch - 0x10000) <= 0xfffff)) { return 4; } #endif @@ -143,7 +128,7 @@ UtfCount( *--------------------------------------------------------------------------- */ -INLINE int +int Tcl_UniCharToUtf( int ch, /* The Tcl_UniChar to be stored in the * buffer. */ @@ -152,7 +137,7 @@ Tcl_UniCharToUtf( * large enough to hold the UTF-8 character * (at most TCL_UTF_MAX bytes). */ { - if ((ch > 0) && (ch < UNICODE_SELF)) { + if ((unsigned)(ch - 1) < (UNICODE_SELF - 1)) { buf[0] = (char) ch; return 1; } @@ -180,11 +165,7 @@ Tcl_UniCharToUtf( } } #endif - three: - buf[2] = (char) ((ch | 0x80) & 0xBF); - buf[1] = (char) (((ch >> 6) | 0x80) & 0xBF); - buf[0] = (char) ((ch >> 12) | 0xE0); - return 3; + goto three; } #if TCL_UTF_MAX > 3 @@ -199,7 +180,11 @@ Tcl_UniCharToUtf( } ch = 0xFFFD; - goto three; +three: + buf[2] = (char) ((ch | 0x80) & 0xBF); + buf[1] = (char) (((ch >> 6) | 0x80) & 0xBF); + buf[0] = (char) ((ch >> 12) | 0xE0); + return 3; } /* @@ -314,9 +299,6 @@ Tcl_UtfToUniChar( * A two-byte-character lead-byte not followed by trail-byte * represents itself. */ - - *chPtr = (Tcl_UniChar) byte; - return 1; } else if (byte < 0xF0) { if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80)) { /* @@ -332,31 +314,23 @@ Tcl_UtfToUniChar( * A three-byte-character lead-byte not followed by two trail-bytes * represents itself. */ - - *chPtr = (Tcl_UniChar) byte; - return 1; } #if TCL_UTF_MAX > 3 - { - int ch, total, trail; - - total = totalBytes[byte]; - trail = total - 1; - if (trail > 0) { - ch = byte & (0x3F >> trail); - do { - src++; - if ((*src & 0xC0) != 0x80) { - *chPtr = byte; - return 1; - } - ch <<= 6; - ch |= (*src & 0x3F); - trail--; - } while (trail > 0); - *chPtr = ch; - return total; + else if (byte < 0xF8) { + if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80) && ((src[3] & 0xC0) == 0x80)) { + /* + * Four-byte-character lead byte followed by three trail bytes. + */ + + *chPtr = (Tcl_UniChar) (((byte & 0x0E) << 18) | ((src[1] & 0x3F) << 12) + | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F)); + return 4; } + + /* + * A three-byte-character lead-byte not followed by two trail-bytes + * represents itself. + */ } #endif @@ -829,7 +803,7 @@ Tcl_UtfToUpper( * char to dst if its size is <= the original char. */ - if (bytes < UtfCount(upChar)) { + if (bytes < TclUtfCount(upChar)) { memcpy(dst, src, (size_t) bytes); dst += bytes; } else { @@ -882,7 +856,7 @@ Tcl_UtfToLower( * char to dst if its size is <= the original char. */ - if (bytes < UtfCount(lowChar)) { + if (bytes < TclUtfCount(lowChar)) { memcpy(dst, src, (size_t) bytes); dst += bytes; } else { @@ -932,7 +906,7 @@ Tcl_UtfToTitle( bytes = TclUtfToUniChar(src, &ch); titleChar = Tcl_UniCharToTitle(ch); - if (bytes < UtfCount(titleChar)) { + if (bytes < TclUtfCount(titleChar)) { memcpy(dst, src, (size_t) bytes); dst += bytes; } else { @@ -944,7 +918,7 @@ Tcl_UtfToTitle( bytes = TclUtfToUniChar(src, &ch); lowChar = Tcl_UniCharToLower(ch); - if (bytes < UtfCount(lowChar)) { + if (bytes < TclUtfCount(lowChar)) { memcpy(dst, src, (size_t) bytes); dst += bytes; } else { |