diff options
| -rw-r--r-- | .travis.yml | 37 | ||||
| -rw-r--r-- | doc/Utf.3 | 14 | ||||
| -rw-r--r-- | generic/regcustom.h | 2 | ||||
| -rw-r--r-- | generic/tcl.decls | 16 | ||||
| -rw-r--r-- | generic/tcl.h | 25 | ||||
| -rw-r--r-- | generic/tclBinary.c | 4 | ||||
| -rw-r--r-- | generic/tclCmdMZ.c | 4 | ||||
| -rw-r--r-- | generic/tclDecls.h | 42 | ||||
| -rw-r--r-- | generic/tclDisassemble.c | 2 | ||||
| -rw-r--r-- | generic/tclEncoding.c | 14 | ||||
| -rw-r--r-- | generic/tclIO.c | 4 | ||||
| -rw-r--r-- | generic/tclInt.h | 2 | ||||
| -rw-r--r-- | generic/tclParse.c | 2 | ||||
| -rw-r--r-- | generic/tclScan.c | 4 | ||||
| -rw-r--r-- | generic/tclStringObj.c | 6 | ||||
| -rw-r--r-- | generic/tclStubInit.c | 40 | ||||
| -rw-r--r-- | generic/tclUtf.c | 32 | ||||
| -rw-r--r-- | tests/stringObj.test | 10 | ||||
| -rw-r--r-- | win/rules.vc | 6 |
19 files changed, 144 insertions, 122 deletions
diff --git a/.travis.yml b/.travis.yml index ec263cb..4c08c26 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,20 +10,13 @@ matrix: compiler: gcc env: - BUILD_DIR=unix - - name: "Linux/GCC/Shared: UTF_MAX=6" + - name: "Linux/GCC/Shared: UTF_MAX=4" os: linux dist: xenial compiler: gcc env: - BUILD_DIR=unix - - CFGOPT=CFLAGS=-DTCL_UTF_MAX=6 - - name: "Linux/GCC/Shared: UTF_MAX=3" - os: linux - dist: xenial - compiler: gcc - env: - - BUILD_DIR=unix - - CFGOPT=CFLAGS=-DTCL_UTF_MAX=3 + - CFGOPT=CFLAGS=-DTCL_UTF_MAX=4 - name: "Linux/GCC/Shared: NO_DEPRECATED" os: linux dist: xenial @@ -228,7 +221,7 @@ matrix: script: - cmd.exe /C 'vcvarsall.bat x64 && nmake -f makefile.vc all tcltest' - cmd.exe /C 'vcvarsall.bat x64 && nmake -f makefile.vc test' - - name: "Windows/MSVC/Shared: UTF_MAX=6" + - name: "Windows/MSVC/Shared: UTF_MAX=4" os: windows compiler: cl env: *vcenv @@ -274,7 +267,7 @@ matrix: script: - cmd.exe /C 'vcvarsall.bat x86 && nmake -f makefile.vc all tcltest' - cmd.exe /C 'vcvarsall.bat x86 && nmake -f makefile.vc test' - - name: "Windows/MSVC-x86/Shared: UTF_MAX=6" + - name: "Windows/MSVC-x86/Shared: UTF_MAX=4" os: windows compiler: cl env: *vcenv @@ -320,19 +313,12 @@ matrix: before_install: &makepreinst - choco install -y make zip - cd ${BUILD_DIR} - - name: "Windows/GCC/Shared: UTF_MAX=6" + - name: "Windows/GCC/Shared: UTF_MAX=4" os: windows compiler: gcc env: - BUILD_DIR=win - - CFGOPT="--enable-64bit CFLAGS=-DTCL_UTF_MAX=6" - before_install: *makepreinst - - name: "Windows/GCC/Shared: UTF_MAX=3" - os: windows - compiler: gcc - env: - - BUILD_DIR=win - - CFGOPT="--enable-64bit CFLAGS=-DTCL_UTF_MAX=3" + - CFGOPT="--enable-64bit CFLAGS=-DTCL_UTF_MAX=4" before_install: *makepreinst - name: "Windows/GCC/Shared: NO_DEPRECATED" os: windows @@ -362,19 +348,12 @@ matrix: env: - BUILD_DIR=win before_install: *makepreinst - - name: "Windows/GCC-x86/Shared: UTF_MAX=6" - os: windows - compiler: gcc - env: - - BUILD_DIR=win - - CFGOPT="CFLAGS=-DTCL_UTF_MAX=6" - before_install: *makepreinst - - name: "Windows/GCC-x86/Shared: UTF_MAX=3" + - name: "Windows/GCC-x86/Shared: UTF_MAX=4" os: windows compiler: gcc env: - BUILD_DIR=win - - CFGOPT="CFLAGS=-DTCL_UTF_MAX=3" + - CFGOPT="CFLAGS=-DTCL_UTF_MAX=4" before_install: *makepreinst - name: "Windows/GCC-x86/Shared: NO_DEPRECATED" os: windows @@ -8,7 +8,7 @@ .so man.macros .BS .SH NAME -Tcl_UniChar, Tcl_UniCharToUtf, Tcl_UtfToUniChar, Tcl_UtfToChar16, Tcl_UtfToWChar, Tcl_UniCharToUtfDString, Tcl_UtfToUniCharDString, Tcl_WCharToUtfDString, Tcl_UtfToWCharDString, Tcl_Char16ToUtfDString, Tcl_UtfToChar16DString, Tcl_UniCharLen, Tcl_UniCharNcmp, Tcl_UniCharNcasecmp, Tcl_UniCharCaseMatch, Tcl_UtfNcmp, Tcl_UtfNcasecmp, Tcl_UtfCharComplete, Tcl_NumUtfChars, Tcl_UtfFindFirst, Tcl_UtfFindLast, Tcl_UtfNext, Tcl_UtfPrev, Tcl_UniCharAtIndex, Tcl_UtfAtIndex, Tcl_UtfBackslash \- routines for manipulating UTF-8 strings +Tcl_UniChar, Tcl_UniCharToUtf, Tcl_UtfToUniChar, Tcl_UtfToChar16, Tcl_UtfToWChar, Tcl_UniCharToUtfDString, Tcl_UtfToUniCharDString, Tcl_Char16ToUtfDString, Tcl_UtfToWCharDString, Tcl_UtfToChar16DString, Tcl_UniCharLen, Tcl_UniCharNcmp, Tcl_UniCharNcasecmp, Tcl_UniCharCaseMatch, Tcl_UtfNcmp, Tcl_UtfNcasecmp, Tcl_UtfCharComplete, Tcl_NumUtfChars, Tcl_UtfFindFirst, Tcl_UtfFindLast, Tcl_UtfNext, Tcl_UtfPrev, Tcl_UniCharAtIndex, Tcl_UtfAtIndex, Tcl_UtfBackslash \- routines for manipulating UTF-8 strings .SH SYNOPSIS .nf \fB#include <tcl.h>\fR @@ -120,6 +120,12 @@ A null-terminated Unicode string. A null-terminated UTF-16 string. .AP "const wchar_t" *wStr in A null-terminated wchar_t string. +.AP "const unsigned short" *utf16s in +A null-terminated utf-16 string. +.AP "const unsigned short" *utf16t in +A null-terminated utf-16 string. +.AP "const unsigned short" *utf16Pattern in +A null-terminated utf-16 string. .AP int length in The length of the UTF-8 string in bytes (not UTF-8 characters). If negative, all bytes up to the first null byte are used. @@ -148,11 +154,11 @@ case-insensitive (1). .PP These routines convert between UTF-8 strings and Unicode/Utf-16 characters. A UTF-8 character is a Unicode character represented as a varying-length -sequence of up to \fBTCL_UTF_MAX\fR bytes. A multibyte UTF-8 sequence +sequence of up to \fB4\fR bytes. A multibyte UTF-8 sequence consists of a lead byte followed by some number of trail bytes. .PP -\fBTCL_UTF_MAX\fR is the maximum number of bytes that it takes to -represent one Unicode character in the UTF-8 representation. +\fBTCL_UTF_MAX\fR is the maximum number of bytes that \fBTcl_UtfToUniChar\fR +can consume in a single call. .PP \fBTcl_UniCharToUtf\fR stores the character \fIch\fR as a UTF-8 string in starting at \fIbuf\fR. The return value is the number of bytes stored diff --git a/generic/regcustom.h b/generic/regcustom.h index 4396399..a6c19a3 100644 --- a/generic/regcustom.h +++ b/generic/regcustom.h @@ -88,7 +88,7 @@ typedef int celt; /* Type to hold chr, or NOCELT */ #define NOCELT (-1) /* Celt value which is not valid chr */ #define CHR(c) (UCHAR(c)) /* Turn char literal into chr literal */ #define DIGITVAL(c) ((c)-'0') /* Turn chr digit into its value */ -#if TCL_UTF_MAX > 4 +#if TCL_UTF_MAX > 3 #define CHRBITS 32 /* Bits in a chr; must not use sizeof */ #define CHR_MIN 0x00000000 /* Smallest and largest chr; the value */ #define CHR_MAX 0x10ffff /* CHR_MAX-CHR_MIN+1 should fit in uchr */ diff --git a/generic/tcl.decls b/generic/tcl.decls index 528938d..0587088 100644 --- a/generic/tcl.decls +++ b/generic/tcl.decls @@ -1244,10 +1244,10 @@ declare 350 { declare 351 { int Tcl_UniCharIsWordChar(int ch) } -declare 352 { +declare 352 {deprecated {Use Tcl_GetCharLength}} { int Tcl_UniCharLen(const Tcl_UniChar *uniStr) } -declare 353 { +declare 353 {deprecated {Use Tcl_UtfNcmp}} { int Tcl_UniCharNcmp(const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned long numChars) } @@ -1337,10 +1337,10 @@ declare 376 { declare 377 { void Tcl_RegExpGetInfo(Tcl_RegExp regexp, Tcl_RegExpInfo *infoPtr) } -declare 378 { +declare 378 {deprecated {Use Tcl_UniCharToUtfDString}} { Tcl_Obj *Tcl_NewUnicodeObj(const Tcl_UniChar *unicode, int numChars) } -declare 379 { +declare 379 {deprecated {Use Tcl_UniCharToUtfDString}} { void Tcl_SetUnicodeObj(Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int numChars) } @@ -1356,7 +1356,7 @@ declare 382 {deprecated {No longer in use, changed to macro}} { declare 383 { Tcl_Obj *Tcl_GetRange(Tcl_Obj *objPtr, int first, int last) } -declare 384 { +declare 384 {deprecated {Use Tcl_AppendStringsToObj}} { void Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int length) } @@ -1482,11 +1482,11 @@ declare 417 { declare 418 { int Tcl_IsChannelExisting(const char *channelName) } -declare 419 { +declare 419 {deprecated {Use Tcl_UtfNcasecmp}} { int Tcl_UniCharNcasecmp(const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned long numChars) } -declare 420 { +declare 420 {deprecated {Use Tcl_StringCaseMatch}} { int Tcl_UniCharCaseMatch(const Tcl_UniChar *uniStr, const Tcl_UniChar *uniPattern, int nocase) } @@ -1540,7 +1540,7 @@ declare 433 { } # introduced in 8.4a3 -declare 434 { +declare 434 {deprecated {Use Tcl_UtfToUniCharDString}} { Tcl_UniChar *Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr, int *lengthPtr) } diff --git a/generic/tcl.h b/generic/tcl.h index 8a81d9e..de8ca43 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -2106,16 +2106,15 @@ typedef struct Tcl_EncodingType { /* * The maximum number of bytes that are necessary to represent a single - * Unicode character in UTF-8. The valid values are 4 and 6 - * (or perhaps 1 if we want to support a non-unicode enabled core). If 4, - * then Tcl_UniChar must be 2-bytes in size (UCS-2) (the default). If 6, + * Unicode character in UTF-8. The valid values are 3 and 4 + * (or perhaps 1 if we want to support a non-unicode enabled core). If 3, + * then Tcl_UniChar must be 2-bytes in size (UCS-2) (the default). If > 3, * then Tcl_UniChar must be 4-bytes in size (UCS-4). At this time UCS-2 mode - * is the default and recommended mode. UCS-4 is experimental and not - * recommended. It works for the core, but most extensions expect UCS-2. + * is the default and recommended mode. */ #ifndef TCL_UTF_MAX -#define TCL_UTF_MAX 4 +#define TCL_UTF_MAX 3 #endif /* @@ -2123,15 +2122,11 @@ typedef struct Tcl_EncodingType { * reflected in regcustom.h. */ -#if TCL_UTF_MAX > 4 +#if TCL_UTF_MAX > 3 /* * int isn't 100% accurate as it should be a strict 4-byte value - * (perhaps wchar_t). 64-bit systems may have troubles. The size of this - * value must be reflected correctly in regcustom.h and - * in tclEncoding.c. - * XXX: Tcl is currently UCS-2 and planning UTF-16 for the Unicode - * XXX: string rep that Tcl_UniChar represents. Changing the size - * XXX: of Tcl_UniChar is /not/ supported. + * (perhaps wchar_t). ILP64/SILP64 systems may have troubles. The + * size of this value must be reflected correctly in regcustom.h. */ typedef int Tcl_UniChar; #else @@ -2311,10 +2306,10 @@ typedef int (Tcl_NRPostProc) (ClientData data[], Tcl_Interp *interp, /* *---------------------------------------------------------------------------- * The following constant is used to test for older versions of Tcl in the - * stubs tables. If TCL_UTF_MAX>4 use a different value. + * stubs tables. */ -#define TCL_STUB_MAGIC ((int) 0xFCA3BACF + (TCL_UTF_MAX>4)) +#define TCL_STUB_MAGIC ((int) 0xFCA3BACF) /* * The following function is required to be defined in all stubs aware diff --git a/generic/tclBinary.c b/generic/tclBinary.c index 8ba0fab..eacac35 100644 --- a/generic/tclBinary.c +++ b/generic/tclBinary.c @@ -1363,7 +1363,7 @@ BinaryFormatCmd( badField: { Tcl_UniChar ch = 0; - char buf[TCL_UTF_MAX + 1] = ""; + char buf[5] = ""; TclUtfToUniChar(errorString, &ch); buf[Tcl_UniCharToUtf(ch, buf)] = '\0'; @@ -1733,7 +1733,7 @@ BinaryScanCmd( badField: { Tcl_UniChar ch = 0; - char buf[TCL_UTF_MAX + 1] = ""; + char buf[5] = ""; TclUtfToUniChar(errorString, &ch); buf[Tcl_UniCharToUtf(ch, buf)] = '\0'; diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index ecb13b1..b5d5c89 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -1221,7 +1221,7 @@ Tcl_SplitObjCmd( len = TclUtfToUniChar(stringPtr, &ch); fullchar = ch; -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 if ((ch >= 0xD800) && (len < 3)) { len += TclUtfToUniChar(stringPtr + len, &ch); fullchar = (((fullchar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000; @@ -1911,7 +1911,7 @@ StringIsCmd( int fullchar; length2 = TclUtfToUniChar(string1, &ch); fullchar = ch; -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 if ((ch >= 0xD800) && (length2 < 3)) { length2 += TclUtfToUniChar(string1 + length2, &ch); fullchar = (((fullchar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000; diff --git a/generic/tclDecls.h b/generic/tclDecls.h index ed1da85..75b9c01 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -1063,9 +1063,11 @@ EXTERN int Tcl_UniCharIsUpper(int ch); /* 351 */ EXTERN int Tcl_UniCharIsWordChar(int ch); /* 352 */ -EXTERN int Tcl_UniCharLen(const Tcl_UniChar *uniStr); +TCL_DEPRECATED("Use Tcl_GetCharLength") +int Tcl_UniCharLen(const Tcl_UniChar *uniStr); /* 353 */ -EXTERN int Tcl_UniCharNcmp(const Tcl_UniChar *ucs, +TCL_DEPRECATED("Use Tcl_UtfNcmp") +int Tcl_UniCharNcmp(const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned long numChars); /* 354 */ @@ -1141,10 +1143,12 @@ EXTERN int Tcl_RegExpExecObj(Tcl_Interp *interp, EXTERN void Tcl_RegExpGetInfo(Tcl_RegExp regexp, Tcl_RegExpInfo *infoPtr); /* 378 */ -EXTERN Tcl_Obj * Tcl_NewUnicodeObj(const Tcl_UniChar *unicode, +TCL_DEPRECATED("Use Tcl_UniCharToUtfDString") +Tcl_Obj * Tcl_NewUnicodeObj(const Tcl_UniChar *unicode, int numChars); /* 379 */ -EXTERN void Tcl_SetUnicodeObj(Tcl_Obj *objPtr, +TCL_DEPRECATED("Use Tcl_UniCharToUtfDString") +void Tcl_SetUnicodeObj(Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int numChars); /* 380 */ EXTERN int Tcl_GetCharLength(Tcl_Obj *objPtr); @@ -1156,7 +1160,8 @@ Tcl_UniChar * Tcl_GetUnicode(Tcl_Obj *objPtr); /* 383 */ EXTERN Tcl_Obj * Tcl_GetRange(Tcl_Obj *objPtr, int first, int last); /* 384 */ -EXTERN void Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr, +TCL_DEPRECATED("Use Tcl_AppendStringsToObj") +void Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int length); /* 385 */ EXTERN int Tcl_RegExpMatchObj(Tcl_Interp *interp, @@ -1250,11 +1255,13 @@ EXTERN void Tcl_ClearChannelHandlers(Tcl_Channel channel); /* 418 */ EXTERN int Tcl_IsChannelExisting(const char *channelName); /* 419 */ -EXTERN int Tcl_UniCharNcasecmp(const Tcl_UniChar *ucs, +TCL_DEPRECATED("Use Tcl_UtfNcasecmp") +int Tcl_UniCharNcasecmp(const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned long numChars); /* 420 */ -EXTERN int Tcl_UniCharCaseMatch(const Tcl_UniChar *uniStr, +TCL_DEPRECATED("Use Tcl_StringCaseMatch") +int Tcl_UniCharCaseMatch(const Tcl_UniChar *uniStr, const Tcl_UniChar *uniPattern, int nocase); /* 421 */ EXTERN Tcl_HashEntry * Tcl_FindHashEntry(Tcl_HashTable *tablePtr, @@ -1297,7 +1304,8 @@ EXTERN int Tcl_AttemptSetObjLength(Tcl_Obj *objPtr, int length); /* 433 */ EXTERN Tcl_ThreadId Tcl_GetChannelThread(Tcl_Channel channel); /* 434 */ -EXTERN Tcl_UniChar * Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr, +TCL_DEPRECATED("Use Tcl_UtfToUniCharDString") +Tcl_UniChar * Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr, int *lengthPtr); /* 435 */ TCL_DEPRECATED("") @@ -2300,8 +2308,8 @@ typedef struct TclStubs { int (*tcl_UniCharIsSpace) (int ch); /* 349 */ int (*tcl_UniCharIsUpper) (int ch); /* 350 */ int (*tcl_UniCharIsWordChar) (int ch); /* 351 */ - int (*tcl_UniCharLen) (const Tcl_UniChar *uniStr); /* 352 */ - int (*tcl_UniCharNcmp) (const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned long numChars); /* 353 */ + TCL_DEPRECATED_API("Use Tcl_GetCharLength") int (*tcl_UniCharLen) (const Tcl_UniChar *uniStr); /* 352 */ + TCL_DEPRECATED_API("Use Tcl_UtfNcmp") int (*tcl_UniCharNcmp) (const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned long numChars); /* 353 */ char * (*tcl_Char16ToUtfDString) (const unsigned short *uniStr, int uniLength, Tcl_DString *dsPtr); /* 354 */ unsigned short * (*tcl_UtfToChar16DString) (const char *src, int length, Tcl_DString *dsPtr); /* 355 */ Tcl_RegExp (*tcl_GetRegExpFromObj) (Tcl_Interp *interp, Tcl_Obj *patObj, int flags); /* 356 */ @@ -2326,13 +2334,13 @@ typedef struct TclStubs { int (*tcl_UniCharIsPunct) (int ch); /* 375 */ int (*tcl_RegExpExecObj) (Tcl_Interp *interp, Tcl_RegExp regexp, Tcl_Obj *textObj, int offset, int nmatches, int flags); /* 376 */ void (*tcl_RegExpGetInfo) (Tcl_RegExp regexp, Tcl_RegExpInfo *infoPtr); /* 377 */ - Tcl_Obj * (*tcl_NewUnicodeObj) (const Tcl_UniChar *unicode, int numChars); /* 378 */ - void (*tcl_SetUnicodeObj) (Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int numChars); /* 379 */ + TCL_DEPRECATED_API("Use Tcl_UniCharToUtfDString") Tcl_Obj * (*tcl_NewUnicodeObj) (const Tcl_UniChar *unicode, int numChars); /* 378 */ + TCL_DEPRECATED_API("Use Tcl_UniCharToUtfDString") void (*tcl_SetUnicodeObj) (Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int numChars); /* 379 */ int (*tcl_GetCharLength) (Tcl_Obj *objPtr); /* 380 */ int (*tcl_GetUniChar) (Tcl_Obj *objPtr, int index); /* 381 */ TCL_DEPRECATED_API("No longer in use, changed to macro") Tcl_UniChar * (*tcl_GetUnicode) (Tcl_Obj *objPtr); /* 382 */ Tcl_Obj * (*tcl_GetRange) (Tcl_Obj *objPtr, int first, int last); /* 383 */ - void (*tcl_AppendUnicodeToObj) (Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int length); /* 384 */ + TCL_DEPRECATED_API("Use Tcl_AppendStringsToObj") void (*tcl_AppendUnicodeToObj) (Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int length); /* 384 */ int (*tcl_RegExpMatchObj) (Tcl_Interp *interp, Tcl_Obj *textObj, Tcl_Obj *patternObj); /* 385 */ void (*tcl_SetNotifier) (Tcl_NotifierProcs *notifierProcPtr); /* 386 */ Tcl_Mutex * (*tcl_GetAllocMutex) (void); /* 387 */ @@ -2367,8 +2375,8 @@ typedef struct TclStubs { void (*tcl_SpliceChannel) (Tcl_Channel channel); /* 416 */ void (*tcl_ClearChannelHandlers) (Tcl_Channel channel); /* 417 */ int (*tcl_IsChannelExisting) (const char *channelName); /* 418 */ - int (*tcl_UniCharNcasecmp) (const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned long numChars); /* 419 */ - int (*tcl_UniCharCaseMatch) (const Tcl_UniChar *uniStr, const Tcl_UniChar *uniPattern, int nocase); /* 420 */ + TCL_DEPRECATED_API("Use Tcl_UtfNcasecmp") int (*tcl_UniCharNcasecmp) (const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned long numChars); /* 419 */ + TCL_DEPRECATED_API("Use Tcl_StringCaseMatch") int (*tcl_UniCharCaseMatch) (const Tcl_UniChar *uniStr, const Tcl_UniChar *uniPattern, int nocase); /* 420 */ Tcl_HashEntry * (*tcl_FindHashEntry) (Tcl_HashTable *tablePtr, const void *key); /* 421 */ Tcl_HashEntry * (*tcl_CreateHashEntry) (Tcl_HashTable *tablePtr, const void *key, int *newPtr); /* 422 */ void (*tcl_InitCustomHashTable) (Tcl_HashTable *tablePtr, int keyType, const Tcl_HashKeyType *typePtr); /* 423 */ @@ -2382,7 +2390,7 @@ typedef struct TclStubs { char * (*tcl_AttemptDbCkrealloc) (char *ptr, unsigned int size, const char *file, int line); /* 431 */ int (*tcl_AttemptSetObjLength) (Tcl_Obj *objPtr, int length); /* 432 */ Tcl_ThreadId (*tcl_GetChannelThread) (Tcl_Channel channel); /* 433 */ - Tcl_UniChar * (*tcl_GetUnicodeFromObj) (Tcl_Obj *objPtr, int *lengthPtr); /* 434 */ + TCL_DEPRECATED_API("Use Tcl_UtfToUniCharDString") Tcl_UniChar * (*tcl_GetUnicodeFromObj) (Tcl_Obj *objPtr, int *lengthPtr); /* 434 */ TCL_DEPRECATED_API("") int (*tcl_GetMathFuncInfo) (Tcl_Interp *interp, const char *name, int *numArgsPtr, Tcl_ValueType **argTypesPtr, Tcl_MathProc **procPtr, ClientData *clientDataPtr); /* 435 */ TCL_DEPRECATED_API("") Tcl_Obj * (*tcl_ListMathFuncs) (Tcl_Interp *interp, const char *pattern); /* 436 */ Tcl_Obj * (*tcl_SubstObj) (Tcl_Interp *interp, Tcl_Obj *objPtr, int flags); /* 437 */ @@ -4109,7 +4117,7 @@ extern const TclStubs *tclStubsPtr; #undef Tcl_StringMatch #define Tcl_StringMatch(str, pattern) Tcl_StringCaseMatch((str), (pattern), 0) -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 # undef Tcl_UniCharToUtfDString # define Tcl_UniCharToUtfDString Tcl_Char16ToUtfDString # undef Tcl_UtfToUniCharDString diff --git a/generic/tclDisassemble.c b/generic/tclDisassemble.c index 3204619..a7ab8db 100644 --- a/generic/tclDisassemble.c +++ b/generic/tclDisassemble.c @@ -903,7 +903,7 @@ PrintSourceToObj( i += 2; continue; default: -#if TCL_UTF_MAX > 4 +#if TCL_UTF_MAX > 3 if (ch > 0xffff) { Tcl_AppendPrintfToObj(appendObj, "\\U%08x", ch); i += 10; diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 6740565..375e519 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2615,7 +2615,7 @@ UtfToUtf16Proc( */ if (clientData) { -#if TCL_UTF_MAX > 4 +#if TCL_UTF_MAX > 3 if (*chPtr <= 0xFFFF) { *dst++ = (*chPtr & 0xFF); *dst++ = (*chPtr >> 8); @@ -2630,7 +2630,7 @@ UtfToUtf16Proc( *dst++ = (*chPtr >> 8); #endif } else { -#if TCL_UTF_MAX > 4 +#if TCL_UTF_MAX > 3 if (*chPtr <= 0xFFFF) { *dst++ = (*chPtr >> 8); *dst++ = (*chPtr & 0xFF); @@ -2697,7 +2697,7 @@ UtfToUcs2Proc( { const char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd; int result, numChars; -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 int len; #endif Tcl_UniChar ch = 0; @@ -2727,7 +2727,7 @@ UtfToUcs2Proc( result = TCL_CONVERT_NOSPACE; break; } -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 src += (len = TclUtfToUniChar(src, &ch)); if ((ch >= 0xD800) && (len < 3)) { src += TclUtfToUniChar(src, &ch); @@ -2951,7 +2951,7 @@ TableFromUtfProc( } len = TclUtfToUniChar(src, &ch); -#if TCL_UTF_MAX > 4 +#if TCL_UTF_MAX > 3 /* * This prevents a crash condition. More evaluation is required for * full support of int Tcl_UniChar. [Bug 1004065] @@ -3162,7 +3162,7 @@ Iso88591FromUtfProc( */ if (ch > 0xff -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 || ((ch >= 0xD800) && (len < 3)) #endif ) { @@ -3170,7 +3170,7 @@ Iso88591FromUtfProc( result = TCL_CONVERT_UNKNOWN; break; } -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 if ((ch >= 0xD800) && (len < 3)) len = 4; #endif /* diff --git a/generic/tclIO.c b/generic/tclIO.c index 3ba577d..643b8f1 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -4719,7 +4719,7 @@ Tcl_GetsObj( Tcl_ExternalToUtf(NULL, gs.encoding, RemovePoint(bufPtr), gs.rawRead, statePtr->inputEncodingFlags | TCL_ENCODING_NO_TERMINATE, &gs.state, tmp, - TCL_UTF_MAX, &rawRead, NULL, NULL); + sizeof(tmp), &rawRead, NULL, NULL); bufPtr->nextRemoved += rawRead; gs.rawRead -= rawRead; gs.bytesWrote--; @@ -6282,7 +6282,7 @@ ReadChars( Tcl_ExternalToUtf(NULL, encoding, src, srcLen, (statePtr->inputEncodingFlags | TCL_ENCODING_NO_TERMINATE), - &statePtr->inputEncodingState, buffer, TCL_UTF_MAX + 1, + &statePtr->inputEncodingState, buffer, sizeof(buffer), &read, &decoded, &count); if (count == 2) { diff --git a/generic/tclInt.h b/generic/tclInt.h index 8b150db..491c66c0 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -4615,7 +4615,7 @@ MODULE_SCOPE void TclDbInitNewObj(Tcl_Obj *objPtr, const char *file, *---------------------------------------------------------------- */ -#if TCL_UTF_MAX > 4 +#if TCL_UTF_MAX > 3 #define TclUtfToUniChar(str, chPtr) \ ((((unsigned char) *(str)) < 0x80) ? \ ((*(chPtr) = (unsigned char) *(str)), 1) \ diff --git a/generic/tclParse.c b/generic/tclParse.c index 6143cb7..3cc44b1 100644 --- a/generic/tclParse.c +++ b/generic/tclParse.c @@ -936,7 +936,7 @@ TclParseBackslash( if (Tcl_UtfCharComplete(p, numBytes - 1)) { count = TclUtfToUniChar(p, &unichar) + 1; /* +1 for '\' */ } else { - char utfBytes[TCL_UTF_MAX]; + char utfBytes[4]; memcpy(utfBytes, p, numBytes - 1); utfBytes[numBytes - 1] = '\0'; diff --git a/generic/tclScan.c b/generic/tclScan.c index 0d869b7..aa48c09 100644 --- a/generic/tclScan.c +++ b/generic/tclScan.c @@ -265,7 +265,7 @@ ValidateFormat( * these are messy operations because we do * not want to use the formatting engine; * we're inside there! */ - char buf[TCL_UTF_MAX + 1] = ""; + char buf[5] = ""; /* * Initialize an array that records the number of times a variable is @@ -877,7 +877,7 @@ Tcl_ScanObjCmd( offset = TclUtfToUniChar(string, &sch); i = (int)sch; -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 if ((sch >= 0xD800) && (offset < 3)) { offset += TclUtfToUniChar(string+offset, &sch); i = (((i<<10) & 0x0FFC00) + 0x10000) + (sch & 0x3FF); diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index eed49b1..3bee1de 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -568,7 +568,7 @@ Tcl_GetUniChar( return -1; } ch = stringPtr->unicode[index]; -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 /* See: bug [11ae2be95dac9417] */ if ((ch & 0xF800) == 0xD800) { if (ch & 0x400) { @@ -752,7 +752,7 @@ Tcl_GetRange( if (last < first) { return Tcl_NewObj(); } -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 /* See: bug [11ae2be95dac9417] */ if ((first > 0) && ((stringPtr->unicode[first] & 0xFC00) == 0xDC00) && ((stringPtr->unicode[first-1] & 0xFC00) == 0xD800)) { @@ -2613,7 +2613,7 @@ AppendPrintfToObjVA( end = q; } - q = bytes + TCL_UTF_MAX; + q = bytes + 4; while ((bytes < end) && (bytes < q) && ((*bytes & 0xC0) == 0x80)) { bytes++; diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c index 6983113..65c6e1a 100644 --- a/generic/tclStubInit.c +++ b/generic/tclStubInit.c @@ -40,6 +40,14 @@ #undef Tcl_NewObj #undef Tcl_NewStringObj #undef Tcl_GetUnicode +#undef Tcl_GetUnicodeFromObj +#undef Tcl_AppendUnicodeToObj +#undef Tcl_NewUnicodeObj +#undef Tcl_SetUnicodeObj +#undef Tcl_UniCharNcasecmp +#undef Tcl_UniCharCaseMatch +#undef Tcl_UniCharLen +#undef Tcl_UniCharNcmp #undef Tcl_DumpActiveMemory #undef Tcl_ValidateAllMemory #undef Tcl_FindHashEntry @@ -64,6 +72,21 @@ #undef Tcl_UtfToUniCharDString #undef Tcl_UtfToUniChar +#if TCL_UTF_MAX > 3 +static void uniCodePanic() { + Tcl_Panic("This extension uses a deprecated function, not available now: Tcl is compiled with -DTCL_UTF_MAX==%d", TCL_UTF_MAX); +} +# define Tcl_GetUnicode (int *(*)(Tcl_Obj *)) uniCodePanic +# define Tcl_GetUnicodeFromObj (int *(*)(Tcl_Obj *, Tcl_UniChar *)) uniCodePanic +# define Tcl_NewUnicodeObj (Tcl_Obj *(*)(const int *, Tcl_UniChar)) uniCodePanic +# define Tcl_SetUnicodeObj (void(*)(Tcl_Obj *, const Tcl_UniChar *, int)) uniCodePanic +# define Tcl_AppendUnicodeToObj (void(*)(Tcl_Obj *, const Tcl_UniChar *, int)) uniCodePanic +# define Tcl_UniCharNcasecmp (int(*)(const Tcl_UniChar *, const Tcl_UniChar *, unsigned long)) uniCodePanic +# define Tcl_UniCharCaseMatch (int(*)(const Tcl_UniChar *, const Tcl_UniChar *, int)) uniCodePanic +# define Tcl_UniCharLen (int(*)(const Tcl_UniChar *)) uniCodePanic +# define Tcl_UniCharNcmp (int(*)(const Tcl_UniChar *, const Tcl_UniChar *, unsigned long)) uniCodePanic +#endif + #define TclBN_mp_add mp_add #define TclBN_mp_and mp_and #define TclBN_mp_clamp mp_clamp @@ -480,10 +503,16 @@ static int exprIntObj(Tcl_Interp *interp, Tcl_Obj*expr, int *ptr){ return result; } #define Tcl_ExprLongObj (int(*)(Tcl_Interp*,Tcl_Obj*,long*))exprIntObj +#if TCL_UTF_MAX < 4 static int uniCharNcmp(const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned int n){ return Tcl_UniCharNcmp(ucs, uct, (unsigned long)n); } #define Tcl_UniCharNcmp (int(*)(const Tcl_UniChar*,const Tcl_UniChar*,unsigned long))uniCharNcmp +static int uniCharNcasecmp(const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned int n){ + return Tcl_UniCharNcasecmp(ucs, uct, (unsigned long)n); +} +#define Tcl_UniCharNcasecmp (int(*)(const Tcl_UniChar*,const Tcl_UniChar*,unsigned long))uniCharNcasecmp +#endif static int utfNcmp(const char *s1, const char *s2, unsigned int n){ return Tcl_UtfNcmp(s1, s2, (unsigned long)n); } @@ -492,10 +521,6 @@ static int utfNcasecmp(const char *s1, const char *s2, unsigned int n){ return Tcl_UtfNcasecmp(s1, s2, (unsigned long)n); } #define Tcl_UtfNcasecmp (int(*)(const char*,const char*,unsigned long))utfNcasecmp -static int uniCharNcasecmp(const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned int n){ - return Tcl_UniCharNcasecmp(ucs, uct, (unsigned long)n); -} -#define Tcl_UniCharNcasecmp (int(*)(const Tcl_UniChar*,const Tcl_UniChar*,unsigned long))uniCharNcasecmp #endif /* TCL_WIDE_INT_IS_LONG */ @@ -591,6 +616,13 @@ static int uniCharNcasecmp(const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsig # define Tcl_SetPanicProc 0 # define Tcl_FindExecutable 0 # define Tcl_GetUnicode 0 +#if TCL_UTF_MAX < 4 +# define Tcl_AppendUnicodeToObj 0 +# define Tcl_UniCharCaseMatch 0 +# define Tcl_UniCharLen 0 +# define Tcl_UniCharNcasecmp 0 +# define Tcl_UniCharNcmp 0 +#endif # undef Tcl_StringMatch # define Tcl_StringMatch 0 # define TclBN_reverse 0 diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 8bc4d49..9522f11 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -787,7 +787,7 @@ Tcl_UtfFindFirst( while (1) { len = TclUtfToUniChar(src, &find); fullchar = find; -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 if ((fullchar != ch) && (find >= 0xD800) && (len < 3)) { len += TclUtfToUniChar(src + len, &find); fullchar = (((fullchar & 0x3ff) << 10) | (find & 0x3ff)) + 0x10000; @@ -835,7 +835,7 @@ Tcl_UtfFindLast( while (1) { len = TclUtfToUniChar(src, &find); fullchar = find; -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 if ((fullchar != ch) && (find >= 0xD800) && (len < 3)) { len += TclUtfToUniChar(src + len, &find); fullchar = (((fullchar & 0x3ff) << 10) | (find & 0x3ff)) + 0x10000; @@ -878,7 +878,7 @@ Tcl_UtfNext( Tcl_UniChar ch = 0; int len = TclUtfToUniChar(src, &ch); -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 if ((ch >= 0xD800) && (len < 3)) { len += TclUtfToUniChar(src + len, &ch); } @@ -960,19 +960,19 @@ Tcl_UniCharAtIndex( { Tcl_UniChar ch = 0; int fullchar = 0; -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 int len = 0; #endif while (index-- >= 0) { -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 src += (len = TclUtfToUniChar(src, &ch)); #else src += TclUtfToUniChar(src, &ch); #endif } fullchar = ch; -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 if ((ch >= 0xD800) && (len < 3)) { /* If last Tcl_UniChar was a high surrogate, combine with low surrogate */ (void)TclUtfToUniChar(src, &ch); @@ -988,7 +988,7 @@ Tcl_UniCharAtIndex( * Tcl_UtfAtIndex -- * * Returns a pointer to the specified character (not byte) position in - * the UTF-8 string. If TCL_UTF_MAX <= 4, characters > U+FFFF count as + * the UTF-8 string. If TCL_UTF_MAX <= 3, characters > U+FFFF count as * 2 positions, but then the pointer should never be placed between * the two positions. * @@ -1013,7 +1013,7 @@ Tcl_UtfAtIndex( len = TclUtfToUniChar(src, &ch); src += len; } -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 if ((ch >= 0xD800) && (len < 3)) { /* Index points at character following high Surrogate */ src += TclUtfToUniChar(src, &ch); @@ -1110,7 +1110,7 @@ Tcl_UtfToUpper( while (*src) { len = TclUtfToUniChar(src, &ch); upChar = ch; -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 if ((ch >= 0xD800) && (len < 3)) { len += TclUtfToUniChar(src + len, &ch); /* Combine surrogates */ @@ -1172,7 +1172,7 @@ Tcl_UtfToLower( while (*src) { len = TclUtfToUniChar(src, &ch); lowChar = ch; -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 if ((ch >= 0xD800) && (len < 3)) { len += TclUtfToUniChar(src + len, &ch); /* Combine surrogates */ @@ -1237,7 +1237,7 @@ Tcl_UtfToTitle( if (*src) { len = TclUtfToUniChar(src, &ch); titleChar = ch; -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 if ((ch >= 0xD800) && (len < 3)) { len += TclUtfToUniChar(src + len, &ch); /* Combine surrogates */ @@ -1257,7 +1257,7 @@ Tcl_UtfToTitle( while (*src) { len = TclUtfToUniChar(src, &ch); lowChar = ch; -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 if ((ch >= 0xD800) && (len < 3)) { len += TclUtfToUniChar(src + len, &ch); /* Combine surrogates */ @@ -1369,7 +1369,7 @@ Tcl_UtfNcmp( cs += TclUtfToUniChar(cs, &ch1); ct += TclUtfToUniChar(ct, &ch2); if (ch1 != ch2) { -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 /* Surrogates always report higher than non-surrogates */ if (((ch1 & 0xFC00) == 0xD800)) { if ((ch2 & 0xFC00) != 0xD800) { @@ -1420,7 +1420,7 @@ Tcl_UtfNcasecmp( cs += TclUtfToUniChar(cs, &ch1); ct += TclUtfToUniChar(ct, &ch2); if (ch1 != ch2) { -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 /* Surrogates always report higher than non-surrogates */ if (((ch1 & 0xFC00) == 0xD800)) { if ((ch2 & 0xFC00) != 0xD800) { @@ -1469,7 +1469,7 @@ TclUtfCmp( cs += TclUtfToUniChar(cs, &ch1); ct += TclUtfToUniChar(ct, &ch2); if (ch1 != ch2) { -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 /* Surrogates always report higher than non-surrogates */ if (((ch1 & 0xFC00) == 0xD800)) { if ((ch2 & 0xFC00) != 0xD800) { @@ -1515,7 +1515,7 @@ TclUtfCasecmp( cs += TclUtfToUniChar(cs, &ch1); ct += TclUtfToUniChar(ct, &ch2); if (ch1 != ch2) { -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 /* Surrogates always report higher than non-surrogates */ if (((ch1 & 0xFC00) == 0xD800)) { if ((ch2 & 0xFC00) != 0xD800) { diff --git a/tests/stringObj.test b/tests/stringObj.test index bda7285..3779bca 100644 --- a/tests/stringObj.test +++ b/tests/stringObj.test @@ -23,6 +23,8 @@ catch [list package require -exact Tcltest [info patchlevel]] testConstraint testobj [llength [info commands testobj]] testConstraint testbytestring [llength [info commands testbytestring]] testConstraint testdstring [llength [info commands testdstring]] +testConstraint tip389 [expr {[string length \U010000] == 2}] +testConstraint nodep [info exists tcl_precision] test stringObj-1.1 {string type registration} testobj { set t [testobj types] @@ -464,19 +466,19 @@ test stringObj-15.4 {Tcl_Append*ToObj: self appends} testobj { teststringobj set 1 foo teststringobj appendself 1 3 } foo -test stringObj-15.5 {Tcl_Append*ToObj: self appends} testobj { +test stringObj-15.5 {Tcl_Append*ToObj: self appends} {testobj tip389 nodep} { teststringobj set 1 foo teststringobj appendself2 1 0 } foofoo -test stringObj-15.6 {Tcl_Append*ToObj: self appends} testobj { +test stringObj-15.6 {Tcl_Append*ToObj: self appends} {testobj tip389 nodep} { teststringobj set 1 foo teststringobj appendself2 1 1 } foooo -test stringObj-15.7 {Tcl_Append*ToObj: self appends} testobj { +test stringObj-15.7 {Tcl_Append*ToObj: self appends} {testobj tip389 nodep} { teststringobj set 1 foo teststringobj appendself2 1 2 } fooo -test stringObj-15.8 {Tcl_Append*ToObj: self appends} testobj { +test stringObj-15.8 {Tcl_Append*ToObj: self appends} {testobj tip389 nodep} { teststringobj set 1 foo teststringobj appendself2 1 3 } foo diff --git a/win/rules.vc b/win/rules.vc index 064823b..2dccee0 100644 --- a/win/rules.vc +++ b/win/rules.vc @@ -686,7 +686,7 @@ LINKERFLAGS = $(LINKERFLAGS) -ltcg # configuration (ignored for Tcl itself)
# _USE_64BIT_TIME_T - forces a build using 64-bit time_t for 32-bit build
# (CRT library should support this)
-# TCL_UTF_MAX=6 - forces a build using 32-bit Tcl_UniChar in stead of 16-bit.
+# TCL_UTF_MAX=4 - forces a build using 32-bit Tcl_UniChar in stead of 16-bit.
# Further, LINKERFLAGS are modified based on above.
# Default values for all the above
@@ -762,7 +762,7 @@ _USE_64BIT_TIME_T = 1 !if [nmakehlp -f $(OPTS) "utfmax"]
!message *** Force 32-bit Tcl_UniChar
-TCL_UTF_MAX = 6
+TCL_UTF_MAX = 4
!endif
# Yes, it's weird that the "symbols" option controls DEBUG and
@@ -1364,7 +1364,7 @@ OPTDEFINES = $(OPTDEFINES) /DNO_STRTOI64=1 OPTDEFINES = $(OPTDEFINES) /D_USE_64BIT_TIME_T=1
!endif
!if "$(TCL_UTF_MAX)" == "6"
-OPTDEFINES = $(OPTDEFINES) /DTCL_UTF_MAX=6
+OPTDEFINES = $(OPTDEFINES) /DTCL_UTF_MAX=4
!endif
# _ATL_XP_TARGETING - Newer SDK's need this to build for XP
|
