diff options
-rw-r--r-- | doc/Utf.3 | 49 | ||||
-rw-r--r-- | generic/regc_locale.c | 2 | ||||
-rw-r--r-- | generic/tcl.decls | 34 | ||||
-rw-r--r-- | generic/tcl.h | 10 | ||||
-rw-r--r-- | generic/tclBinary.c | 4 | ||||
-rw-r--r-- | generic/tclDecls.h | 88 | ||||
-rw-r--r-- | generic/tclEncoding.c | 102 | ||||
-rw-r--r-- | generic/tclScan.c | 2 | ||||
-rw-r--r-- | generic/tclStubInit.c | 65 | ||||
-rw-r--r-- | generic/tclUtf.c | 22 |
10 files changed, 228 insertions, 150 deletions
@@ -8,7 +8,7 @@ .so man.macros .BS .SH NAME -Tcl_UniChar, Tcl_UniCharToUtf, Tcl_UtfToUniChar, Tcl_UniCharToUtfDString, Tcl_UtfToUniCharDString, Tcl_UniCharLen, Tcl_UniCharNcmp, Tcl_UniCharNcasecmp, Tcl_UniCharCaseMatch, Tcl_UtfNcmp, Tcl_UtfNcasecmp, Tcl_UtfCharComplete, Tcl_NumUtfChars, Tcl_UtfFindFirst, Tcl_UtfFindLast, Tcl_UtfNext, Tcl_UtfPrev, Tcl_UniCharAtIndex, Tcl_UtfAtIndex, Tcl_UtfBackslash \- routines for manipulating UTF-8 strings +Tcl_UniChar, Tcl_UniCharToUtf, Tcl_UtfToUniChar, Tcl_UtfToUtf16, Tcl_UniCharToUtfDString, Tcl_UtfToUniCharDString, Tcl_Utf16ToUtfDString, Tcl_UtfToUtf16DString, Tcl_UniCharLen, Tcl_Utf16Len, Tcl_UniCharNcmp, Tcl_UniCharNcasecmp, Tcl_Utf16Ncmp, Tcl_Utf16Ncasecmp, Tcl_UniCharCaseMatch, Tcl_Utf16CaseMatch, Tcl_UtfNcmp, Tcl_UtfNcasecmp, Tcl_UtfCharComplete, Tcl_NumUtfChars, Tcl_UtfFindFirst, Tcl_UtfFindLast, Tcl_UtfNext, Tcl_UtfPrev, Tcl_UniCharAtIndex, Tcl_UtfAtIndex, Tcl_UtfBackslash \- routines for manipulating UTF-8 strings .SH SYNOPSIS .nf \fB#include <tcl.h>\fR @@ -21,25 +21,46 @@ int int \fBTcl_UtfToUniChar\fR(\fIsrc, chPtr\fR) .sp +int +\fBTcl_UtfToUtf16\fR(\fIsrc, utf16Ptr\fR) +.sp char * \fBTcl_UniCharToUtfDString\fR(\fIuniStr, uniLength, dsPtr\fR) .sp +char * +\fBTcl_Utf16ToUtfDString\fR(\fIutf16Str, uniLength, dsPtr\fR) +.sp Tcl_UniChar * \fBTcl_UtfToUniCharDString\fR(\fIsrc, length, dsPtr\fR) .sp +unsigned short * +\fBTcl_UtfToUtf16DString\fR(\fIsrc, length, dsPtr\fR) +.sp int \fBTcl_UniCharLen\fR(\fIuniStr\fR) .sp int +\fBTcl_Utf16Len\fR(\fIutf16Str\fR) +.sp +int \fBTcl_UniCharNcmp\fR(\fIucs, uct, numChars\fR) .sp int \fBTcl_UniCharNcasecmp\fR(\fIucs, uct, numChars\fR) .sp int +\fBTcl_Utf16Ncmp\fR(\fIutf16s, tf16t, numChars\fR) +.sp +int +\fBTcl_Utf16Ncasecmp\fR(\fIutf16s, utf16t, numChars\fR) +.sp +int \fBTcl_UniCharCaseMatch\fR(\fIuniStr, uniPattern, nocase\fR) .sp int +\fBTcl_Utf16CaseMatch\fR(\fIutf16Str, utf16Pattern, nocase\fR) +.sp +int \fBTcl_UtfNcmp\fR(\fIcs, ct, numChars\fR) .sp int @@ -80,6 +101,8 @@ Buffer in which the UTF-8 representation of the Tcl_UniChar is stored. At most The Unicode character to be converted or examined. .AP Tcl_UniChar *chPtr out Filled with the Tcl_UniChar represented by the head of the UTF-8 string. +.AP unsigned short *utf16Ptr out +Filled with the utf-16 represented by the head of the UTF-8 string. .AP "const char" *src in Pointer to a UTF-8 string. .AP "const char" *cs in @@ -94,6 +117,14 @@ A null-terminated Unicode string. A null-terminated Unicode string. .AP "const Tcl_UniChar" *uniPattern in A null-terminated Unicode string. +.AP "const unsigned short" *utf16Str in +A null-terminated utf-16 string. +.AP "const unsigned short" *utf16s in +A null-terminated utf-16 string. +.AP "const unsigned short" *utf16t in +A null-terminated utf-16 string. +.AP "const unsigned short" *utf16Pattern in +A null-terminated utf-16 string. .AP int length in The length of the UTF-8 string in bytes (not UTF-8 characters). If negative, all bytes up to the first null byte are used. @@ -121,8 +152,8 @@ case-insensitive (1). .SH DESCRIPTION .PP -These routines convert between UTF-8 strings and Unicode characters. An -Unicode character represented as an unsigned, fixed-size +These routines convert between UTF-8 strings and Unicode/Utf-16 characters. +An Unicode character represented as an unsigned, fixed-size quantity. A UTF-8 character is a Unicode character represented as a varying-length sequence of up to \fBTCL_UTF_MAX\fR bytes. A multibyte UTF-8 sequence consists of a lead byte followed by some number of trail bytes. @@ -133,9 +164,10 @@ represent one Unicode character in the UTF-8 representation. \fBTcl_UniCharToUtf\fR stores the character \fIch\fR as a UTF-8 string in starting at \fIbuf\fR. The return value is the number of bytes stored in \fIbuf\fR. If ch is a high surrogate (range U+D800 - U+DBFF), then -the return value will be 0 and nothing will be stored. If you still -want to produce UTF-8 output for it (even though knowing it's an illegal -code-point on its own), just call \fBTcl_UniCharToUtf\fR again using ch = -1. +the return value will be 1 and a single byte in the range 0xF0 - 0xF4 +will be stored. If you still want to produce UTF-8 output for it (even +though knowing it's an illegal code-point on its own), just call +\fBTcl_UniCharToUtf\fR again specifying ch = -1. .PP \fBTcl_UtfToUniChar\fR reads one UTF-8 character starting at \fIsrc\fR and stores it as a Tcl_UniChar in \fI*chPtr\fR. The return value is the @@ -187,6 +219,11 @@ is the Unicode case insensitive version. a Unicode pattern, and a boolean value specifying whether the match should be case sensitive and returns whether the string matches the pattern. .PP +\fBTcl_Utf16CaseMatch\fR is the utf-16 equivalent to +\fBTcl_StringCaseMatch\fR. It accepts a null-terminated utf-16 string, +a utf-16 pattern, and a boolean value specifying whether the match should +be case sensitive and returns whether the string matches the pattern. +.PP \fBTcl_UtfNcmp\fR corresponds to \fBstrncmp\fR for UTF-8 strings. It accepts two null-terminated UTF-8 strings and the number of characters to compare. (Both strings are assumed to be at least \fInumChars\fR diff --git a/generic/regc_locale.c b/generic/regc_locale.c index 3fa9b04..afe6298 100644 --- a/generic/regc_locale.c +++ b/generic/regc_locale.c @@ -833,7 +833,7 @@ element( */ Tcl_DStringInit(&ds); - np = Tcl_UniCharToUtfDString(startp, (int)len, &ds); + np = Tcl_UniCharToUtfDString(startp, len, &ds); for (cn=cnames; cn->name!=NULL; cn++) { if (strlen(cn->name)==len && strncmp(cn->name, np, len)==0) { break; /* NOTE BREAK OUT */ diff --git a/generic/tcl.decls b/generic/tcl.decls index 2c21b91..ca47f11 100644 --- a/generic/tcl.decls +++ b/generic/tcl.decls @@ -2381,44 +2381,44 @@ declare 643 { # TIP #??? declare 644 { - int *Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr, int *lengthPtr) + Tcl_Obj *Tcl_NewUnicodeObj(const int *unicode, int numChars) } declare 645 { - Tcl_Obj *Tcl_NewUnicodeObj(const int *unicode, int numChars) + void Tcl_SetUnicodeObj(Tcl_Obj *objPtr, const int *unicode, + int numChars) } declare 646 { - int Tcl_UtfToUniChar(const char *src, int *chPtr) + int *Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr, int *lengthPtr) } declare 647 { - int Tcl_UniCharLen(const int *uniStr) + void Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr, const int *unicode, + int length) } declare 648 { - int Tcl_UniCharNcmp(const int *ucs, const int *uct, - unsigned long numChars) + int Tcl_UtfToUniChar(const char *src, int *chPtr) } declare 649 { - int Tcl_UniCharNcasecmp(const int *ucs, const int *uct, - unsigned long numChars) -} -declare 650 { char *Tcl_UniCharToUtfDString(const int *uniStr, int uniLength, Tcl_DString *dsPtr) } -declare 651 { +declare 650 { int *Tcl_UtfToUniCharDString(const char *src, int length, Tcl_DString *dsPtr) } +declare 651 { + int Tcl_UniCharLen(const int *uniStr) +} declare 652 { - int Tcl_UniCharCaseMatch(const int *uniStr, - const int *uniPattern, int nocase) + int Tcl_UniCharNcmp(const int *ucs, const int *uct, + unsigned long numChars) } declare 653 { - void Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr, const int *unicode, - int length) + int Tcl_UniCharNcasecmp(const int *ucs, const int *uct, + unsigned long numChars) } declare 654 { - void Tcl_SetUnicodeObj(Tcl_Obj *objPtr, const int *unicode, - int numChars) + int Tcl_UniCharCaseMatch(const int *uniStr, + const int *uniPattern, int nocase) } diff --git a/generic/tcl.h b/generic/tcl.h index 63d845d..e168c60 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -2152,13 +2152,9 @@ typedef struct Tcl_EncodingType { #if TCL_UTF_MAX > 3 /* - * unsigned int isn't 100% accurate as it should be a strict 4-byte value - * (perhaps wchar_t). 64-bit systems may have troubles. The size of this - * value must be reflected correctly in regcustom.h and - * in tclEncoding.c. - * XXX: Tcl is currently UCS-2 and planning UTF-16 for the Unicode - * XXX: string rep that Tcl_UniChar represents. Changing the size - * XXX: of Tcl_UniChar is /not/ supported. + * int isn't 100% accurate as it should be a strict 4-byte value + * (perhaps wchar_t). ILP64 systems may have troubles. The size of this + * value must be reflected correctly in regcustom.h. */ typedef int Tcl_UniChar; #else diff --git a/generic/tclBinary.c b/generic/tclBinary.c index 8600b3f..1f78d18 100644 --- a/generic/tclBinary.c +++ b/generic/tclBinary.c @@ -1354,7 +1354,7 @@ BinaryFormatCmd( badField: { Tcl_UniChar ch = 0; - char buf[5] = ""; + char buf[TCL_UTF_MAX + 1] = ""; TclUtfToUniChar(errorString, &ch); buf[Tcl_UniCharToUtf(ch, buf)] = '\0'; @@ -1724,7 +1724,7 @@ BinaryScanCmd( badField: { Tcl_UniChar ch = 0; - char buf[5] = ""; + char buf[TCL_UTF_MAX + 1] = ""; TclUtfToUniChar(errorString, &ch); buf[Tcl_UniCharToUtf(ch, buf)] = '\0'; diff --git a/generic/tclDecls.h b/generic/tclDecls.h index f0d0b4c..7a8ad87 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -1897,35 +1897,35 @@ EXTERN void Tcl_DecrRefCount(Tcl_Obj *objPtr); /* 643 */ EXTERN int Tcl_IsShared(Tcl_Obj *objPtr); /* 644 */ -EXTERN int * Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr, - int *lengthPtr); -/* 645 */ EXTERN Tcl_Obj * Tcl_NewUnicodeObj(const int *unicode, int numChars); +/* 645 */ +EXTERN void Tcl_SetUnicodeObj(Tcl_Obj *objPtr, + const int *unicode, int numChars); /* 646 */ -EXTERN int Tcl_UtfToUniChar(const char *src, int *chPtr); +EXTERN int * Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr, + int *lengthPtr); /* 647 */ -EXTERN int Tcl_UniCharLen(const int *uniStr); +EXTERN void Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr, + const int *unicode, int length); /* 648 */ -EXTERN int Tcl_UniCharNcmp(const int *ucs, const int *uct, - unsigned long numChars); +EXTERN int Tcl_UtfToUniChar(const char *src, int *chPtr); /* 649 */ -EXTERN int Tcl_UniCharNcasecmp(const int *ucs, const int *uct, - unsigned long numChars); -/* 650 */ EXTERN char * Tcl_UniCharToUtfDString(const int *uniStr, int uniLength, Tcl_DString *dsPtr); -/* 651 */ +/* 650 */ EXTERN int * Tcl_UtfToUniCharDString(const char *src, int length, Tcl_DString *dsPtr); +/* 651 */ +EXTERN int Tcl_UniCharLen(const int *uniStr); /* 652 */ -EXTERN int Tcl_UniCharCaseMatch(const int *uniStr, - const int *uniPattern, int nocase); +EXTERN int Tcl_UniCharNcmp(const int *ucs, const int *uct, + unsigned long numChars); /* 653 */ -EXTERN void Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr, - const int *unicode, int length); +EXTERN int Tcl_UniCharNcasecmp(const int *ucs, const int *uct, + unsigned long numChars); /* 654 */ -EXTERN void Tcl_SetUnicodeObj(Tcl_Obj *objPtr, - const int *unicode, int numChars); +EXTERN int Tcl_UniCharCaseMatch(const int *uniStr, + const int *uniPattern, int nocase); typedef struct { const struct TclPlatStubs *tclPlatStubs; @@ -2605,17 +2605,17 @@ typedef struct TclStubs { void (*tcl_IncrRefCount) (Tcl_Obj *objPtr); /* 641 */ void (*tcl_DecrRefCount) (Tcl_Obj *objPtr); /* 642 */ int (*tcl_IsShared) (Tcl_Obj *objPtr); /* 643 */ - int * (*tcl_GetUnicodeFromObj) (Tcl_Obj *objPtr, int *lengthPtr); /* 644 */ - Tcl_Obj * (*tcl_NewUnicodeObj) (const int *unicode, int numChars); /* 645 */ - int (*tcl_UtfToUniChar) (const char *src, int *chPtr); /* 646 */ - int (*tcl_UniCharLen) (const int *uniStr); /* 647 */ - int (*tcl_UniCharNcmp) (const int *ucs, const int *uct, unsigned long numChars); /* 648 */ - int (*tcl_UniCharNcasecmp) (const int *ucs, const int *uct, unsigned long numChars); /* 649 */ - char * (*tcl_UniCharToUtfDString) (const int *uniStr, int uniLength, Tcl_DString *dsPtr); /* 650 */ - int * (*tcl_UtfToUniCharDString) (const char *src, int length, Tcl_DString *dsPtr); /* 651 */ - int (*tcl_UniCharCaseMatch) (const int *uniStr, const int *uniPattern, int nocase); /* 652 */ - void (*tcl_AppendUnicodeToObj) (Tcl_Obj *objPtr, const int *unicode, int length); /* 653 */ - void (*tcl_SetUnicodeObj) (Tcl_Obj *objPtr, const int *unicode, int numChars); /* 654 */ + Tcl_Obj * (*tcl_NewUnicodeObj) (const int *unicode, int numChars); /* 644 */ + void (*tcl_SetUnicodeObj) (Tcl_Obj *objPtr, const int *unicode, int numChars); /* 645 */ + int * (*tcl_GetUnicodeFromObj) (Tcl_Obj *objPtr, int *lengthPtr); /* 646 */ + void (*tcl_AppendUnicodeToObj) (Tcl_Obj *objPtr, const int *unicode, int length); /* 647 */ + int (*tcl_UtfToUniChar) (const char *src, int *chPtr); /* 648 */ + char * (*tcl_UniCharToUtfDString) (const int *uniStr, int uniLength, Tcl_DString *dsPtr); /* 649 */ + int * (*tcl_UtfToUniCharDString) (const char *src, int length, Tcl_DString *dsPtr); /* 650 */ + int (*tcl_UniCharLen) (const int *uniStr); /* 651 */ + int (*tcl_UniCharNcmp) (const int *ucs, const int *uct, unsigned long numChars); /* 652 */ + int (*tcl_UniCharNcasecmp) (const int *ucs, const int *uct, unsigned long numChars); /* 653 */ + int (*tcl_UniCharCaseMatch) (const int *uniStr, const int *uniPattern, int nocase); /* 654 */ } TclStubs; extern const TclStubs *tclStubsPtr; @@ -3934,28 +3934,28 @@ extern const TclStubs *tclStubsPtr; (tclStubsPtr->tcl_DecrRefCount) /* 642 */ #define Tcl_IsShared \ (tclStubsPtr->tcl_IsShared) /* 643 */ -#define Tcl_GetUnicodeFromObj \ - (tclStubsPtr->tcl_GetUnicodeFromObj) /* 644 */ #define Tcl_NewUnicodeObj \ - (tclStubsPtr->tcl_NewUnicodeObj) /* 645 */ + (tclStubsPtr->tcl_NewUnicodeObj) /* 644 */ +#define Tcl_SetUnicodeObj \ + (tclStubsPtr->tcl_SetUnicodeObj) /* 645 */ +#define Tcl_GetUnicodeFromObj \ + (tclStubsPtr->tcl_GetUnicodeFromObj) /* 646 */ +#define Tcl_AppendUnicodeToObj \ + (tclStubsPtr->tcl_AppendUnicodeToObj) /* 647 */ #define Tcl_UtfToUniChar \ - (tclStubsPtr->tcl_UtfToUniChar) /* 646 */ + (tclStubsPtr->tcl_UtfToUniChar) /* 648 */ +#define Tcl_UniCharToUtfDString \ + (tclStubsPtr->tcl_UniCharToUtfDString) /* 649 */ +#define Tcl_UtfToUniCharDString \ + (tclStubsPtr->tcl_UtfToUniCharDString) /* 650 */ #define Tcl_UniCharLen \ - (tclStubsPtr->tcl_UniCharLen) /* 647 */ + (tclStubsPtr->tcl_UniCharLen) /* 651 */ #define Tcl_UniCharNcmp \ - (tclStubsPtr->tcl_UniCharNcmp) /* 648 */ + (tclStubsPtr->tcl_UniCharNcmp) /* 652 */ #define Tcl_UniCharNcasecmp \ - (tclStubsPtr->tcl_UniCharNcasecmp) /* 649 */ -#define Tcl_UniCharToUtfDString \ - (tclStubsPtr->tcl_UniCharToUtfDString) /* 650 */ -#define Tcl_UtfToUniCharDString \ - (tclStubsPtr->tcl_UtfToUniCharDString) /* 651 */ + (tclStubsPtr->tcl_UniCharNcasecmp) /* 653 */ #define Tcl_UniCharCaseMatch \ - (tclStubsPtr->tcl_UniCharCaseMatch) /* 652 */ -#define Tcl_AppendUnicodeToObj \ - (tclStubsPtr->tcl_AppendUnicodeToObj) /* 653 */ -#define Tcl_SetUnicodeObj \ - (tclStubsPtr->tcl_SetUnicodeObj) /* 654 */ + (tclStubsPtr->tcl_UniCharCaseMatch) /* 654 */ #endif /* defined(USE_TCL_STUBS) */ diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 3c73c68..34fd551 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -569,11 +569,16 @@ TclInitEncodingSubsystem(void) TableEncodingData *dataPtr; unsigned size; unsigned short i; + union { + char c; + short s; + } isLe; if (encodingsInitialized) { return; } + isLe.s = 1; Tcl_MutexLock(&encodingMutex); Tcl_InitHashTable(&encodingTable, TCL_STRING_KEYS); Tcl_MutexUnlock(&encodingMutex); @@ -600,20 +605,32 @@ TclInitEncodingSubsystem(void) type.clientData = NULL; Tcl_CreateEncoding(&type); - type.encodingName = "ucs-2"; type.toUtfProc = Utf16ToUtfProc; type.fromUtfProc = UtfToUcs2Proc; type.freeProc = NULL; type.nullSize = 2; - type.clientData = NULL; + type.encodingName = "ucs-2le"; + type.clientData = INT2PTR(1); + Tcl_CreateEncoding(&type); + type.encodingName = "ucs-2be"; + type.clientData = INT2PTR(0); + Tcl_CreateEncoding(&type); + type.encodingName = "ucs-2"; + type.clientData = INT2PTR(isLe.c); Tcl_CreateEncoding(&type); - type.encodingName = "utf-16"; type.toUtfProc = Utf16ToUtfProc; type.fromUtfProc = UtfToUtf16Proc; type.freeProc = NULL; type.nullSize = 2; - type.clientData = NULL; + type.encodingName = "utf-16le"; + type.clientData = INT2PTR(1);; + Tcl_CreateEncoding(&type); + type.encodingName = "utf-16be"; + type.clientData = INT2PTR(0); + Tcl_CreateEncoding(&type); + type.encodingName = "utf-16"; + type.clientData = INT2PTR(isLe.c);; Tcl_CreateEncoding(&type); #ifndef TCL_NO_DEPRECATED @@ -2434,7 +2451,7 @@ UtfToUtfProc( static int Utf16ToUtfProc( - ClientData clientData, /* Not used. */ + ClientData clientData, /* != NULL means LE, == NUL means BE */ const char *src, /* Source string in Unicode. */ int srcLen, /* Source string length in bytes. */ int flags, /* Conversion control flags. */ @@ -2486,12 +2503,15 @@ Utf16ToUtfProc( break; } + if (clientData) { + ch = (src[1] & 0xFF) << 8 | (src[0] & 0xFF); + } else { + ch = (src[0] & 0xFF) << 8 | (src[1] & 0xFF); + } /* * Special case for 1-byte utf chars for speed. Make sure we work with * unsigned short-size data. */ - - ch = *(unsigned short *)src; if (ch && ch < 0x80) { *dst++ = (ch & 0xFF); } else { @@ -2524,8 +2544,7 @@ Utf16ToUtfProc( static int UtfToUtf16Proc( - ClientData clientData, /* TableEncodingData that specifies - * encoding. */ + ClientData clientData, /* != NULL means LE, == NUL means BE */ const char *src, /* Source string in UTF-8. */ int srcLen, /* Source string length in bytes. */ int flags, /* Conversion control flags. */ @@ -2589,37 +2608,37 @@ UtfToUtf16Proc( * casting dst to a Tcl_UniChar. [Bug 1122671] */ -#ifdef WORDS_BIGENDIAN + if (clientData) { #if TCL_UTF_MAX > 3 - if (*chPtr <= 0xFFFF) { - *dst++ = (*chPtr >> 8); - *dst++ = (*chPtr & 0xFF); - } else { - *dst++ = ((*chPtr & 0x3) >> 8) | 0xDC; - *dst++ = (*chPtr & 0xFF); - *dst++ = (((*chPtr - 0x10000) >> 18) & 0x3) | 0xD8; - *dst++ = (((*chPtr - 0x10000) >> 10) & 0xFF); - } -#else - *dst++ = (*chPtr >> 8); - *dst++ = (*chPtr & 0xFF); -#endif + if (*chPtr <= 0xFFFF) { + *dst++ = (*chPtr & 0xFF); + *dst++ = (*chPtr >> 8); + } else { + *dst++ = (((*chPtr - 0x10000) >> 10) & 0xFF); + *dst++ = (((*chPtr - 0x10000) >> 18) & 0x3) | 0xD8; + *dst++ = (*chPtr & 0xFF); + *dst++ = ((*chPtr & 0x3) >> 8) | 0xDC; + } #else -#if TCL_UTF_MAX > 3 - if (*chPtr <= 0xFFFF) { *dst++ = (*chPtr & 0xFF); *dst++ = (*chPtr >> 8); +#endif } else { - *dst++ = (((*chPtr - 0x10000) >> 10) & 0xFF); - *dst++ = (((*chPtr - 0x10000) >> 18) & 0x3) | 0xD8; - *dst++ = (*chPtr & 0xFF); - *dst++ = ((*chPtr & 0x3) >> 8) | 0xDC; - } +#if TCL_UTF_MAX > 3 + if (*chPtr <= 0xFFFF) { + *dst++ = (*chPtr >> 8); + *dst++ = (*chPtr & 0xFF); + } else { + *dst++ = ((*chPtr & 0x3) >> 8) | 0xDC; + *dst++ = (*chPtr & 0xFF); + *dst++ = (((*chPtr - 0x10000) >> 18) & 0x3) | 0xD8; + *dst++ = (((*chPtr - 0x10000) >> 10) & 0xFF); + } #else - *dst++ = (*chPtr & 0xFF); - *dst++ = (*chPtr >> 8); -#endif + *dst++ = (*chPtr >> 8); + *dst++ = (*chPtr & 0xFF); #endif + } } *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; @@ -2645,8 +2664,7 @@ UtfToUtf16Proc( static int UtfToUcs2Proc( - ClientData clientData, /* TableEncodingData that specifies - * encoding. */ + ClientData clientData, /* != NULL means LE, == NUL means BE */ const char *src, /* Source string in UTF-8. */ int srcLen, /* Source string length in bytes. */ int flags, /* Conversion control flags. */ @@ -2721,13 +2739,13 @@ UtfToUcs2Proc( * casting dst to a Tcl_UniChar. [Bug 1122671] */ -#ifdef WORDS_BIGENDIAN - *dst++ = (ch >> 8); - *dst++ = (ch & 0xFF); -#else - *dst++ = (ch & 0xFF); - *dst++ = (ch >> 8); -#endif + if (clientData) { + *dst++ = (ch & 0xFF); + *dst++ = (ch >> 8); + } else { + *dst++ = (ch >> 8); + *dst++ = (ch & 0xFF); + } } *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; diff --git a/generic/tclScan.c b/generic/tclScan.c index 068450c..b03664f 100644 --- a/generic/tclScan.c +++ b/generic/tclScan.c @@ -261,7 +261,7 @@ ValidateFormat( Tcl_UniChar ch = 0; int objIndex, xpgSize, nspace = numVars; int *nassign = TclStackAlloc(interp, nspace * sizeof(int)); - char buf[5] = ""; + char buf[TCL_UTF_MAX + 1] = ""; Tcl_Obj *errorMsg; /* Place to build an error messages. Note that * these are messy operations because we do * not want to use the formatting engine; diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c index ca26e8d..34619c2 100644 --- a/generic/tclStubInit.c +++ b/generic/tclStubInit.c @@ -80,15 +80,21 @@ static void uniCodePanic() { } #if TCL_UTF_MAX == 3 -#ifdef TCL_NO_DEPRECATED -# define Tcl_GetUnicode 0 -#endif # define Tcl_GetUnicodeFromObj (int *(*)(Tcl_Obj *, int *)) uniCodePanic # define Tcl_NewUnicodeObj (Tcl_Obj *(*)(const int *, int)) uniCodePanic -# define Tcl_SetUnicodeObj (void(*)(Tcl_Obj *,const int *, int)) uniCodePanic -# define Tcl_AppendUnicodeToObj (void(*)(Tcl_Obj *, const int *, int)) uniCodePanic +# define Tcl_SetUnicodeObj (void (*)(Tcl_Obj *,const int *, int)) uniCodePanic +# define Tcl_AppendUnicodeToObj (void (*)(Tcl_Obj *, const int *, int)) uniCodePanic +# define Tcl_UtfToUniChar (int (*)(const char *, int *)) uniCodePanic +# define Tcl_UniCharToUtfDString (char *(*)(const int *, int, Tcl_DString *)) uniCodePanic +# define Tcl_UtfToUniCharDString (int *(*)(const char *, int, Tcl_DString *)) uniCodePanic +# define Tcl_UniCharCaseMatch (int (*)(const int *, const int *, int)) uniCodePanic +# define Tcl_UniCharLen (int (*)(const int *)) uniCodePanic +# define Tcl_UniCharNcmp (int (*)(const int *, const int *, unsigned long)) uniCodePanic +# define Tcl_UniCharNcasecmp (int (*)(const int *, const int *, unsigned long)) uniCodePanic #else -# define Tcl_GetUnicode (unsigned short *(*)(Tcl_Obj *)) uniCodePanic +#if !defined(TCL_NO_DEPRECATED) && TCL_MAJOR_VERSION < 9 +# define Tcl_GetUnicode (unsigned short *(*)(Tcl_Obj *)) uniCodePanic +# endif # define Tcl_GetUtf16FromObj (unsigned short *(*)(Tcl_Obj *, int *)) uniCodePanic # define Tcl_NewUtf16Obj (Tcl_Obj *(*)(const unsigned short *, int)) uniCodePanic # define Tcl_SetUtf16Obj (void(*)(Tcl_Obj *, const unsigned short *, int)) uniCodePanic @@ -137,6 +143,8 @@ static int TclSockMinimumBuffersOld(int sock, int size) # define Tcl_NewLongObj 0 # define Tcl_DbNewLongObj 0 # define Tcl_BackgroundError 0 +# define Tcl_GetUnicode 0 + #else #define TclBNInitBignumFromLong initBignumFromLong static void TclBNInitBignumFromLong(mp_int *a, long b) @@ -341,10 +349,6 @@ static int exprIntObj(Tcl_Interp *interp, Tcl_Obj*expr, int *ptr){ return result; } #define Tcl_ExprLongObj (int(*)(Tcl_Interp*,Tcl_Obj*,long*))exprIntObj -static int uniCharNcmp(const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned int n){ - return Tcl_UniCharNcmp(ucs, uct, (unsigned long)n); -} -#define Tcl_UniCharNcmp (int(*)(const Tcl_UniChar*,const Tcl_UniChar*,unsigned long))uniCharNcmp static int utfNcmp(const char *s1, const char *s2, unsigned int n){ return Tcl_UtfNcmp(s1, s2, (unsigned long)n); } @@ -353,10 +357,25 @@ static int utfNcasecmp(const char *s1, const char *s2, unsigned int n){ return Tcl_UtfNcasecmp(s1, s2, (unsigned long)n); } #define Tcl_UtfNcasecmp (int(*)(const char*,const char*,unsigned long))utfNcasecmp -static int uniCharNcasecmp(const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned int n){ +#if TCL_UTF_MAX > 3 +static int uniCharNcmp(const int *ucs, const int *uct, unsigned int n){ + return Tcl_UniCharNcmp(ucs, uct, (unsigned long)n); +} +#define Tcl_UniCharNcmp (int(*)(const int*,const int*,unsigned long))uniCharNcmp +static int uniCharNcasecmp(const int *ucs, const int *uct, unsigned int n){ return Tcl_UniCharNcasecmp(ucs, uct, (unsigned long)n); } -#define Tcl_UniCharNcasecmp (int(*)(const Tcl_UniChar*,const Tcl_UniChar*,unsigned long))uniCharNcasecmp +#define Tcl_UniCharNcasecmp (int(*)(const int*,const int*,unsigned long))uniCharNcasecmp +#else +static int utf16Ncmp(const unsigned short *ucs, const unsigned short *uct, unsigned int n){ + return Tcl_Utf16Ncmp(ucs, uct, (unsigned long)n); +} +#define Tcl_Utf16Ncmp (int(*)(const unsigned short*,const unsigned short*,unsigned long))utf16Ncmp +static int utf16Ncasecmp(const unsigned short *ucs, const unsigned short *uct, unsigned int n){ + return Tcl_Utf16Ncasecmp(ucs, uct, (unsigned long)n); +} +#define Tcl_Utf16Ncasecmp (int(*)(const unsigned short*,const unsigned short*,unsigned long))utf16Ncasecmp +#endif #endif /* TCL_WIDE_INT_IS_LONG */ @@ -1659,17 +1678,17 @@ const TclStubs tclStubs = { Tcl_IncrRefCount, /* 641 */ Tcl_DecrRefCount, /* 642 */ Tcl_IsShared, /* 643 */ - Tcl_GetUnicodeFromObj, /* 644 */ - Tcl_NewUnicodeObj, /* 645 */ - Tcl_UtfToUniChar, /* 646 */ - Tcl_UniCharLen, /* 647 */ - Tcl_UniCharNcmp, /* 648 */ - Tcl_UniCharNcasecmp, /* 649 */ - Tcl_UniCharToUtfDString, /* 650 */ - Tcl_UtfToUniCharDString, /* 651 */ - Tcl_UniCharCaseMatch, /* 652 */ - Tcl_AppendUnicodeToObj, /* 653 */ - Tcl_SetUnicodeObj, /* 654 */ + Tcl_NewUnicodeObj, /* 644 */ + Tcl_SetUnicodeObj, /* 645 */ + Tcl_GetUnicodeFromObj, /* 646 */ + Tcl_AppendUnicodeToObj, /* 647 */ + Tcl_UtfToUniChar, /* 648 */ + Tcl_UniCharToUtfDString, /* 649 */ + Tcl_UtfToUniCharDString, /* 650 */ + Tcl_UniCharLen, /* 651 */ + Tcl_UniCharNcmp, /* 652 */ + Tcl_UniCharNcasecmp, /* 653 */ + Tcl_UniCharCaseMatch, /* 654 */ }; /* !END!: Do not edit above this line. */ diff --git a/generic/tclUtf.c b/generic/tclUtf.c index c5a2ca5..7866afd 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -223,7 +223,7 @@ three: *--------------------------------------------------------------------------- */ -#undef Tcl_UniCharToUtfDString +#if TCL_UTF_MAX > 3 char * Tcl_UniCharToUtfDString( const int *uniStr, /* Unicode string to convert to UTF-8. */ @@ -253,6 +253,7 @@ Tcl_UniCharToUtfDString( return string; } +#endif /* TCL_UTF_MAX > 3 */ char * Tcl_Utf16ToUtfDString( @@ -337,7 +338,7 @@ static const unsigned short cp1252[32] = { 0x2DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x9D, 0x017E, 0x0178 }; -#undef Tcl_UtfToUniChar +#if TCL_UTF_MAX > 3 int Tcl_UtfToUniChar( const char *src, /* The UTF-8 string. */ @@ -421,6 +422,7 @@ Tcl_UtfToUniChar( *chPtr = byte; return 1; } +#endif /* TCL_UTF_MAX > 3 */ int Tcl_UtfToUtf16( @@ -540,7 +542,7 @@ Tcl_UtfToUtf16( *--------------------------------------------------------------------------- */ -#undef Tcl_UtfToUniCharDString +#if TCL_UTF_MAX > 3 int * Tcl_UtfToUniCharDString( const char *src, /* UTF-8 string to convert to Unicode. */ @@ -593,6 +595,7 @@ Tcl_UtfToUniCharDString( return wString; } +#endif /* TCL_UTF_MAX > 3 */ unsigned short * Tcl_UtfToUtf16DString( @@ -1636,7 +1639,7 @@ Tcl_UniCharToTitle( *---------------------------------------------------------------------- */ -#undef Tcl_UniCharLen +#if TCL_UTF_MAX > 3 int Tcl_UniCharLen( const int *uniStr) /* Unicode string to find length of. */ @@ -1649,6 +1652,7 @@ Tcl_UniCharLen( } return len; } +#endif /* TCL_UTF_MAX > 3 */ int Tcl_Utf16Len( @@ -1680,7 +1684,7 @@ Tcl_Utf16Len( *---------------------------------------------------------------------- */ -#undef Tcl_UniCharNcmp +#if TCL_UTF_MAX > 3 int Tcl_UniCharNcmp( const int *ucs, /* Unicode string to compare to uct. */ @@ -1707,6 +1711,7 @@ Tcl_UniCharNcmp( return 0; #endif /* WORDS_BIGENDIAN */ } +#endif /* TCL_UTF_MAX > 3 */ int Tcl_Utf16Ncmp( @@ -1753,7 +1758,7 @@ Tcl_Utf16Ncmp( *---------------------------------------------------------------------- */ -#undef Tcl_UniCharNcasecmp +#if TCL_UTF_MAX > 3 int Tcl_UniCharNcasecmp( const int *ucs, /* Unicode string to compare to uct. */ @@ -1772,6 +1777,8 @@ Tcl_UniCharNcasecmp( } return 0; } +#endif /* TCL_UTF_MAX > 3 */ + int Tcl_Utf16Ncasecmp( const unsigned short *ucs, /* Utf16 string to compare to uct. */ @@ -2123,7 +2130,7 @@ Tcl_UniCharIsWordChar( *---------------------------------------------------------------------- */ -#undef Tcl_UniCharCaseMatch +#if TCL_UTF_MAX > 3 int Tcl_UniCharCaseMatch( const int *uniStr, /* Unicode String. */ @@ -2290,6 +2297,7 @@ Tcl_UniCharCaseMatch( uniPattern++; } } +#endif /* TCL_UTF_MAX > 3 */ int Tcl_Utf16CaseMatch( |