From 0834bb45471baf76fe11345ac6893304cd971420 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 10 Mar 2021 15:39:15 +0000 Subject: Fix [4c591fa487]: [string compare] EIAS violation --- generic/tclCmdMZ.c | 2 +- generic/tclInt.h | 2 +- generic/tclUtf.c | 17 ++++++++--------- tests/utf.test | 20 +------------------- 4 files changed, 11 insertions(+), 30 deletions(-) diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index ee30a7a..c895039 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -2755,7 +2755,7 @@ TclStringCmp( s1 = (char *) Tcl_GetUnicode(value1Ptr); s2 = (char *) Tcl_GetUnicode(value2Ptr); if ( -#ifdef WORDS_BIGENDIAN +#if defined(WORDS_BIGENDIAN) && (TCL_UTF_MAX != 4) 1 #else checkEq diff --git a/generic/tclInt.h b/generic/tclInt.h index 5da21b0..9fec41e 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -4534,7 +4534,7 @@ MODULE_SCOPE void TclDbInitNewObj(Tcl_Obj *objPtr, const char *file, *---------------------------------------------------------------- */ -#ifdef WORDS_BIGENDIAN +#if defined(WORDS_BIGENDIAN) && (TCL_UTF_MAX != 4) # define TclUniCharNcmp(cs,ct,n) memcmp((cs),(ct),(n)*sizeof(Tcl_UniChar)) #else /* !WORDS_BIGENDIAN */ # define TclUniCharNcmp Tcl_UniCharNcmp diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 57e58c1..6e6d386 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -1592,25 +1592,24 @@ Tcl_UniCharNcmp( const Tcl_UniChar *uct, /* Unicode string ucs is compared to. */ unsigned long numChars) /* Number of unichars to compare. */ { -#ifdef WORDS_BIGENDIAN - /* - * We are definitely on a big-endian machine; memcmp() is safe - */ - - return memcmp(ucs, uct, numChars*sizeof(Tcl_UniChar)); - -#else /* !WORDS_BIGENDIAN */ /* * We can't simply call memcmp() because that is not lexically correct. */ for ( ; numChars != 0; ucs++, uct++, numChars--) { if (*ucs != *uct) { +#if TCL_UTF_MAX == 4 + /* special case for handling upper surrogates */ + if (((*ucs & 0xFC00) == 0xD800) && ((*uct & 0xFC00) != 0xD800)) { + return 1; + } else if (((*uct & 0xFC00) == 0xD800)) { + return -1; + } +#endif return (*ucs - *uct); } } return 0; -#endif /* WORDS_BIGENDIAN */ } /* diff --git a/tests/utf.test b/tests/utf.test index 8985313..f4d9134 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -1332,25 +1332,7 @@ test utf-19.1 {TclUniCharLen} -body { test utf-20.1 {TclUniCharNcmp} ucs4 { string compare [string range [format %c 0xFFFF] 0 0] [string range [format %c 0x10000] 0 0] } -1 -test utf-20.2 {[4c591fa487] TclUniCharNcmp/TclUtfNcmp} ucs2 { - set one [format %c 0xFFFF] - set two [format %c 0x10000] - set first [string compare $one $two] - string range $one 0 0 - string range $two 0 0 - set second [string compare $one $two] - expr {($first == $second) ? "agree" : "disagree"} -} agree -test utf-20.2.1 {[4c591fa487] TclUniCharNcmp/TclUtfNcmp} {utf16 knownBug} { - set one [format %c 0xFFFF] - set two [format %c 0x10000] - set first [string compare $one $two] - string range $one 0 0 - string range $two 0 0 - set second [string compare $one $two] - expr {($first == $second) ? "agree" : "disagree"} -} agree -test utf-20.2.2 {[4c591fa487] TclUniCharNcmp/TclUtfNcmp} ucs4 { +test utf-20.2 {[4c591fa487] TclUniCharNcmp/TclUtfNcmp} { set one [format %c 0xFFFF] set two [format %c 0x10000] set first [string compare $one $two] -- cgit v0.12