summaryrefslogtreecommitdiffstats
path: root/generic/tclUtf.c
diff options
context:
space:
mode:
Diffstat (limited to 'generic/tclUtf.c')
-rw-r--r--generic/tclUtf.c90
1 files changed, 32 insertions, 58 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index b878149..b33bf6a 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -73,28 +73,13 @@ static const unsigned char totalBytes[256] = {
#else
1,1,1,1,1,1,1,1,
#endif
-#if TCL_UTF_MAX > 4
- 5,5,5,5,
-#else
- 1,1,1,1,
-#endif
-#if TCL_UTF_MAX > 5
- 6,6,6,6
-#else
- 1,1,1,1
-#endif
+ 1,1,1,1,1,1,1,1
};
-
-/*
- * Functions used only in this module.
- */
-
-static int UtfCount(int ch);
/*
*---------------------------------------------------------------------------
*
- * UtfCount --
+ * TclUtfCount --
*
* Find the number of bytes in the Utf character "ch".
*
@@ -107,18 +92,18 @@ static int UtfCount(int ch);
*---------------------------------------------------------------------------
*/
-INLINE static int
-UtfCount(
+int
+TclUtfCount(
int ch) /* The Tcl_UniChar whose size is returned. */
{
- if ((ch > 0) && (ch < UNICODE_SELF)) {
+ if ((unsigned)(ch - 1) < (UNICODE_SELF - 1)) {
return 1;
}
if (ch <= 0x7FF) {
return 2;
}
#if TCL_UTF_MAX > 3
- if ((ch > 0xFFFF) && (ch <= 0x10FFFF)) {
+ if (((unsigned)(ch - 0x10000) <= 0xfffff)) {
return 4;
}
#endif
@@ -143,7 +128,7 @@ UtfCount(
*---------------------------------------------------------------------------
*/
-INLINE int
+int
Tcl_UniCharToUtf(
int ch, /* The Tcl_UniChar to be stored in the
* buffer. */
@@ -152,7 +137,7 @@ Tcl_UniCharToUtf(
* large enough to hold the UTF-8 character
* (at most TCL_UTF_MAX bytes). */
{
- if ((ch > 0) && (ch < UNICODE_SELF)) {
+ if ((unsigned)(ch - 1) < (UNICODE_SELF - 1)) {
buf[0] = (char) ch;
return 1;
}
@@ -180,11 +165,7 @@ Tcl_UniCharToUtf(
}
}
#endif
- three:
- buf[2] = (char) ((ch | 0x80) & 0xBF);
- buf[1] = (char) (((ch >> 6) | 0x80) & 0xBF);
- buf[0] = (char) ((ch >> 12) | 0xE0);
- return 3;
+ goto three;
}
#if TCL_UTF_MAX > 3
@@ -199,7 +180,11 @@ Tcl_UniCharToUtf(
}
ch = 0xFFFD;
- goto three;
+three:
+ buf[2] = (char) ((ch | 0x80) & 0xBF);
+ buf[1] = (char) (((ch >> 6) | 0x80) & 0xBF);
+ buf[0] = (char) ((ch >> 12) | 0xE0);
+ return 3;
}
/*
@@ -314,9 +299,6 @@ Tcl_UtfToUniChar(
* A two-byte-character lead-byte not followed by trail-byte
* represents itself.
*/
-
- *chPtr = (Tcl_UniChar) byte;
- return 1;
} else if (byte < 0xF0) {
if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80)) {
/*
@@ -332,31 +314,23 @@ Tcl_UtfToUniChar(
* A three-byte-character lead-byte not followed by two trail-bytes
* represents itself.
*/
-
- *chPtr = (Tcl_UniChar) byte;
- return 1;
}
#if TCL_UTF_MAX > 3
- {
- int ch, total, trail;
-
- total = totalBytes[byte];
- trail = total - 1;
- if (trail > 0) {
- ch = byte & (0x3F >> trail);
- do {
- src++;
- if ((*src & 0xC0) != 0x80) {
- *chPtr = byte;
- return 1;
- }
- ch <<= 6;
- ch |= (*src & 0x3F);
- trail--;
- } while (trail > 0);
- *chPtr = ch;
- return total;
+ else if (byte < 0xF8) {
+ if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80) && ((src[3] & 0xC0) == 0x80)) {
+ /*
+ * Four-byte-character lead byte followed by three trail bytes.
+ */
+
+ *chPtr = (Tcl_UniChar) (((byte & 0x0E) << 18) | ((src[1] & 0x3F) << 12)
+ | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F));
+ return 4;
}
+
+ /*
+ * A three-byte-character lead-byte not followed by two trail-bytes
+ * represents itself.
+ */
}
#endif
@@ -829,7 +803,7 @@ Tcl_UtfToUpper(
* char to dst if its size is <= the original char.
*/
- if (bytes < UtfCount(upChar)) {
+ if (bytes < TclUtfCount(upChar)) {
memcpy(dst, src, (size_t) bytes);
dst += bytes;
} else {
@@ -882,7 +856,7 @@ Tcl_UtfToLower(
* char to dst if its size is <= the original char.
*/
- if (bytes < UtfCount(lowChar)) {
+ if (bytes < TclUtfCount(lowChar)) {
memcpy(dst, src, (size_t) bytes);
dst += bytes;
} else {
@@ -932,7 +906,7 @@ Tcl_UtfToTitle(
bytes = TclUtfToUniChar(src, &ch);
titleChar = Tcl_UniCharToTitle(ch);
- if (bytes < UtfCount(titleChar)) {
+ if (bytes < TclUtfCount(titleChar)) {
memcpy(dst, src, (size_t) bytes);
dst += bytes;
} else {
@@ -944,7 +918,7 @@ Tcl_UtfToTitle(
bytes = TclUtfToUniChar(src, &ch);
lowChar = Tcl_UniCharToLower(ch);
- if (bytes < UtfCount(lowChar)) {
+ if (bytes < TclUtfCount(lowChar)) {
memcpy(dst, src, (size_t) bytes);
dst += bytes;
} else {