diff options
Diffstat (limited to 'generic/tclUtf.c')
-rw-r--r-- | generic/tclUtf.c | 22 |
1 files changed, 21 insertions, 1 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 017688b..28f725a 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -956,6 +956,10 @@ Tcl_UtfNext( const char *next; if (((*src) & 0xC0) == 0x80) { + /* Continuation byte, so we start 'inside' a (possible valid) UTF-8 + * sequence. Since we are not allowed to access src[-1], we cannot + * check if the sequence is actually valid, the best we can do is + * just assume it is valid and locate the end. */ if ((((*++src) & 0xC0) == 0x80) && (((*++src) & 0xC0) == 0x80)) { ++src; } @@ -1819,7 +1823,7 @@ Tcl_UniCharNcmp( const Tcl_UniChar *uct, /* Unicode string ucs is compared to. */ unsigned long numChars) /* Number of unichars to compare. */ { -#ifdef WORDS_BIGENDIAN +#if defined(WORDS_BIGENDIAN) && (TCL_UTF_MAX > 3) /* * We are definitely on a big-endian machine; memcmp() is safe */ @@ -1833,6 +1837,14 @@ Tcl_UniCharNcmp( for ( ; numChars != 0; ucs++, uct++, numChars--) { if (*ucs != *uct) { +#if TCL_UTF_MAX < 4 + /* special case for handling upper surrogates */ + if (((*ucs & 0xFC00) == 0xD800) && ((*uct & 0xFC00) != 0xD800)) { + return 1; + } else if (((*uct & 0xFC00) == 0xD800)) { + return -1; + } +#endif return (*ucs - *uct); } } @@ -1870,6 +1882,14 @@ Tcl_UniCharNcasecmp( Tcl_UniChar lct = Tcl_UniCharToLower(*uct); if (lcs != lct) { +#if TCL_UTF_MAX < 4 + /* special case for handling upper surrogates */ + if (((lcs & 0xFC00) == 0xD800) && ((lct & 0xFC00) != 0xD800)) { + return 1; + } else if (((lct & 0xFC00) == 0xD800)) { + return -1; + } +#endif return (lcs - lct); } } |