summaryrefslogtreecommitdiffstats
path: root/generic/tclUtf.c
diff options
context:
space:
mode:
Diffstat (limited to 'generic/tclUtf.c')
-rw-r--r--generic/tclUtf.c22
1 files changed, 21 insertions, 1 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 017688b..28f725a 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -956,6 +956,10 @@ Tcl_UtfNext(
const char *next;
if (((*src) & 0xC0) == 0x80) {
+ /* Continuation byte, so we start 'inside' a (possible valid) UTF-8
+ * sequence. Since we are not allowed to access src[-1], we cannot
+ * check if the sequence is actually valid, the best we can do is
+ * just assume it is valid and locate the end. */
if ((((*++src) & 0xC0) == 0x80) && (((*++src) & 0xC0) == 0x80)) {
++src;
}
@@ -1819,7 +1823,7 @@ Tcl_UniCharNcmp(
const Tcl_UniChar *uct, /* Unicode string ucs is compared to. */
unsigned long numChars) /* Number of unichars to compare. */
{
-#ifdef WORDS_BIGENDIAN
+#if defined(WORDS_BIGENDIAN) && (TCL_UTF_MAX > 3)
/*
* We are definitely on a big-endian machine; memcmp() is safe
*/
@@ -1833,6 +1837,14 @@ Tcl_UniCharNcmp(
for ( ; numChars != 0; ucs++, uct++, numChars--) {
if (*ucs != *uct) {
+#if TCL_UTF_MAX < 4
+ /* special case for handling upper surrogates */
+ if (((*ucs & 0xFC00) == 0xD800) && ((*uct & 0xFC00) != 0xD800)) {
+ return 1;
+ } else if (((*uct & 0xFC00) == 0xD800)) {
+ return -1;
+ }
+#endif
return (*ucs - *uct);
}
}
@@ -1870,6 +1882,14 @@ Tcl_UniCharNcasecmp(
Tcl_UniChar lct = Tcl_UniCharToLower(*uct);
if (lcs != lct) {
+#if TCL_UTF_MAX < 4
+ /* special case for handling upper surrogates */
+ if (((lcs & 0xFC00) == 0xD800) && ((lct & 0xFC00) != 0xD800)) {
+ return 1;
+ } else if (((lct & 0xFC00) == 0xD800)) {
+ return -1;
+ }
+#endif
return (lcs - lct);
}
}