From b06b03b94b73d1fcb9e5c6b283a4f3dfb39377fb Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 10 Jan 2024 21:01:29 +0000 Subject: Fix [4e38c347a4] Changed contract for Tcl_UtfN(case)cmp in Tcl 8.7 --- generic/tcl.decls | 7 ++++ generic/tclDecls.h | 21 ++++++++---- generic/tclStubInit.c | 4 +-- generic/tclUtf.c | 93 +++++++++++++++++++++++++++++++++++++++++++++------ 4 files changed, 106 insertions(+), 19 deletions(-) diff --git a/generic/tcl.decls b/generic/tcl.decls index 0097eea..ec135a5 100644 --- a/generic/tcl.decls +++ b/generic/tcl.decls @@ -2502,6 +2502,13 @@ declare 685 { Tcl_Obj *Tcl_DStringToObj(Tcl_DString *dsPtr) } +declare 686 { + int TclUtfNcmp(const char *s1, const char *s2, size_t n) +} +declare 687 { + int TclUtfNcasecmp(const char *s1, const char *s2, size_t n) +} + # ----- BASELINE -- FOR -- 8.7.0 / 9.0.0 ----- # declare 688 { diff --git a/generic/tclDecls.h b/generic/tclDecls.h index 5768233..447bd9a 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -2009,8 +2009,11 @@ EXTERN int Tcl_GetWideUIntFromObj(Tcl_Interp *interp, Tcl_Obj *objPtr, Tcl_WideUInt *uwidePtr); /* 685 */ EXTERN Tcl_Obj * Tcl_DStringToObj(Tcl_DString *dsPtr); -/* Slot 686 is reserved */ -/* Slot 687 is reserved */ +/* 686 */ +EXTERN int TclUtfNcmp(const char *s1, const char *s2, size_t n); +/* 687 */ +EXTERN int TclUtfNcasecmp(const char *s1, const char *s2, + size_t n); /* 688 */ EXTERN void TclUnusedStubEntry(void); @@ -2734,8 +2737,8 @@ typedef struct TclStubs { Tcl_Size (*tcl_GetEncodingNulLength) (Tcl_Encoding encoding); /* 683 */ int (*tcl_GetWideUIntFromObj) (Tcl_Interp *interp, Tcl_Obj *objPtr, Tcl_WideUInt *uwidePtr); /* 684 */ Tcl_Obj * (*tcl_DStringToObj) (Tcl_DString *dsPtr); /* 685 */ - void (*reserved686)(void); - void (*reserved687)(void); + int (*tclUtfNcmp) (const char *s1, const char *s2, size_t n); /* 686 */ + int (*tclUtfNcasecmp) (const char *s1, const char *s2, size_t n); /* 687 */ void (*tclUnusedStubEntry) (void); /* 688 */ } TclStubs; @@ -4124,8 +4127,10 @@ extern const TclStubs *tclStubsPtr; (tclStubsPtr->tcl_GetWideUIntFromObj) /* 684 */ #define Tcl_DStringToObj \ (tclStubsPtr->tcl_DStringToObj) /* 685 */ -/* Slot 686 is reserved */ -/* Slot 687 is reserved */ +#define TclUtfNcmp \ + (tclStubsPtr->tclUtfNcmp) /* 686 */ +#define TclUtfNcasecmp \ + (tclStubsPtr->tclUtfNcasecmp) /* 687 */ #define TclUnusedStubEntry \ (tclStubsPtr->tclUnusedStubEntry) /* 688 */ @@ -4392,6 +4397,10 @@ extern const TclStubs *tclStubsPtr; # define Tcl_GetRange TclGetRange # undef Tcl_GetUniChar # define Tcl_GetUniChar TclGetUniChar +# undef Tcl_UtfNcmp +# define Tcl_UtfNcmp TclUtfNcmp +# undef Tcl_UtfNcasecmp +# define Tcl_UtfNcasecmp TclUtfNcasecmp #endif #if defined(USE_TCL_STUBS) # define Tcl_WCharToUtfDString (sizeof(wchar_t) != sizeof(short) \ diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c index e45efc8..dfcc1fb 100644 --- a/generic/tclStubInit.c +++ b/generic/tclStubInit.c @@ -1970,8 +1970,8 @@ const TclStubs tclStubs = { Tcl_GetEncodingNulLength, /* 683 */ Tcl_GetWideUIntFromObj, /* 684 */ Tcl_DStringToObj, /* 685 */ - 0, /* 686 */ - 0, /* 687 */ + TclUtfNcmp, /* 686 */ + TclUtfNcasecmp, /* 687 */ TclUnusedStubEntry, /* 688 */ }; diff --git a/generic/tclUtf.c b/generic/tclUtf.c index a502f69..d495402 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -1229,7 +1229,7 @@ TclUtfAtIndex( const char *src, /* The UTF-8 string. */ Tcl_Size index) /* The position of the desired character. */ { - Tcl_UniChar ch = 0; + Tcl_UniChar ch = 0; while (index-- > 0) { src += TclUtfToUniChar(src, &ch); @@ -1552,7 +1552,45 @@ int Tcl_UtfNcmp( const char *cs, /* UTF string to compare to ct. */ const char *ct, /* UTF string cs is compared to. */ - unsigned long numChars) /* Number of UTF chars to compare. */ + unsigned long numChars) /* Number of UTF-16 chars to compare. */ +{ + unsigned short ch1 = 0, ch2 = 0; + + /* + * Cannot use 'memcmp(cs, ct, n);' as byte representation of \u0000 (the + * pair of bytes 0xC0,0x80) is larger than byte representation of \u0001 + * (the byte 0x01.) + */ + + while (numChars-- > 0) { + /* + * n must be interpreted as chars, not bytes. This should be called + * only when both strings are of at least n UTF-16 chars long (no need for \0 + * check) + */ + + cs += Tcl_UtfToChar16(cs, &ch1); + ct += Tcl_UtfToChar16(ct, &ch2); + if (ch1 != ch2) { + /* Surrogates always report higher than non-surrogates */ + if (((ch1 & 0xFC00) == 0xD800)) { + if ((ch2 & 0xFC00) != 0xD800) { + return ch1; + } + } else if ((ch2 & 0xFC00) == 0xD800) { + return -ch2; + } + return (ch1 - ch2); + } + } + return 0; +} + +int +TclUtfNcmp( + const char *cs, /* UTF string to compare to ct. */ + const char *ct, /* UTF string cs is compared to. */ + size_t numChars) /* Number of UTF chars to compare. */ { Tcl_UniChar ch1 = 0, ch2 = 0; @@ -1632,7 +1670,42 @@ int Tcl_UtfNcasecmp( const char *cs, /* UTF string to compare to ct. */ const char *ct, /* UTF string cs is compared to. */ - unsigned long numChars) /* Number of UTF chars to compare. */ + unsigned long numChars) /* Number of UTF-16 chars to compare. */ +{ + unsigned short ch1 = 0, ch2 = 0; + + while (numChars-- > 0) { + /* + * n must be interpreted as UTF-16 chars, not bytes. + * This should be called only when both strings are of + * at least n UTF-16 chars long (no need for \0 check) + */ + cs += Tcl_UtfToChar16(cs, &ch1); + ct += Tcl_UtfToChar16(ct, &ch2); + if (ch1 != ch2) { + /* Surrogates always report higher than non-surrogates */ + if (((ch1 & 0xFC00) == 0xD800)) { + if ((ch2 & 0xFC00) != 0xD800) { + return ch1; + } + } else if ((ch2 & 0xFC00) == 0xD800) { + return -ch2; + } + ch1 = Tcl_UniCharToLower(ch1); + ch2 = Tcl_UniCharToLower(ch2); + if (ch1 != ch2) { + return (ch1 - ch2); + } + } + } + return 0; +} + +int +TclUtfNcasecmp( + const char *cs, /* UTF string to compare to ct. */ + const char *ct, /* UTF string cs is compared to. */ + size_t numChars) /* Number of UTF chars to compare. */ { Tcl_UniChar ch1 = 0, ch2 = 0; @@ -2057,8 +2130,8 @@ TclUniCharNcasecmp( { for ( ; numChars != 0; numChars--, ucs++, uct++) { if (*ucs != *uct) { - int lcs = Tcl_UniCharToLower(*ucs); - int lct = Tcl_UniCharToLower(*uct); + Tcl_UniChar lcs = Tcl_UniCharToLower(*ucs); + Tcl_UniChar lct = Tcl_UniCharToLower(*uct); if (lcs != lct) { return (lcs - lct); @@ -2078,8 +2151,8 @@ TclUniCharNcasememcmp( const Tcl_UniChar *uct = (const Tcl_UniChar *)uctPtr; for ( ; numChars != 0; numChars--, ucs++, uct++) { if (*ucs != *uct) { - int lcs = Tcl_UniCharToLower(*ucs); - int lct = Tcl_UniCharToLower(*uct); + Tcl_UniChar lcs = Tcl_UniCharToLower(*ucs); + Tcl_UniChar lct = Tcl_UniCharToLower(*uct); if (lcs != lct) { return (lcs - lct); @@ -2115,7 +2188,6 @@ Tcl_UniCharNcasecmp( return 0; } #endif - /* *---------------------------------------------------------------------- @@ -2486,7 +2558,7 @@ TclUniCharCaseMatch( * characters. */ int nocase) /* 0 for case sensitive, 1 for insensitive */ { - int ch1 = 0, p; + Tcl_UniChar ch1 = 0, p; while (1) { p = *uniPattern; @@ -2574,7 +2646,7 @@ TclUniCharCaseMatch( */ if (p == '[') { - int startChar, endChar; + Tcl_UniChar startChar, endChar; uniPattern++; ch1 = (nocase ? Tcl_UniCharToLower(*uniStr) : *uniStr); @@ -2814,7 +2886,6 @@ Tcl_UniCharCaseMatch( } #endif - /* *---------------------------------------------------------------------- * -- cgit v0.12