diff options
-rw-r--r-- | .travis.yml | 37 | ||||
-rw-r--r-- | doc/Utf.3 | 15 | ||||
-rw-r--r-- | generic/regcustom.h | 2 | ||||
-rw-r--r-- | generic/tcl.decls | 16 | ||||
-rw-r--r-- | generic/tcl.h | 8 | ||||
-rw-r--r-- | generic/tclCmdMZ.c | 4 | ||||
-rw-r--r-- | generic/tclDecls.h | 42 | ||||
-rw-r--r-- | generic/tclDisassemble.c | 2 | ||||
-rw-r--r-- | generic/tclEncoding.c | 16 | ||||
-rw-r--r-- | generic/tclInt.h | 2 | ||||
-rw-r--r-- | generic/tclMain.c | 10 | ||||
-rw-r--r-- | generic/tclScan.c | 2 | ||||
-rw-r--r-- | generic/tclStringObj.c | 4 | ||||
-rw-r--r-- | generic/tclStubInit.c | 16 | ||||
-rw-r--r-- | generic/tclTestObj.c | 42 | ||||
-rw-r--r-- | generic/tclUtf.c | 32 | ||||
-rw-r--r-- | tests/stringObj.test | 20 | ||||
-rw-r--r-- | tests/utf.test | 16 | ||||
-rw-r--r-- | win/rules.vc | 8 |
19 files changed, 118 insertions, 176 deletions
diff --git a/.travis.yml b/.travis.yml index 73e3fc2..538006d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -17,20 +17,13 @@ matrix: env: - CFGOPT=--disable-shared - BUILD_DIR=unix - - name: "Linux/GCC/Shared: UTF_MAX=6" + - name: "Linux/GCC/Shared: UTF_MAX=4" os: linux dist: xenial compiler: gcc env: - BUILD_DIR=unix - - CFGOPT=CFLAGS=-DTCL_UTF_MAX=6 - - name: "Linux/GCC/Shared: UTF_MAX=3" - os: linux - dist: xenial - compiler: gcc - env: - - BUILD_DIR=unix - - CFGOPT=CFLAGS=-DTCL_UTF_MAX=3 + - CFGOPT=CFLAGS=-DTCL_UTF_MAX=4 - name: "Linux/GCC/Shared: NO_DEPRECATED" os: linux dist: xenial @@ -190,23 +183,14 @@ matrix: - BUILD_DIR=win - CFGOPT="--host=i686-w64-mingw32 --disable-shared" script: *crosstest - - name: "Linux-cross-Windows-32/GCC/Shared/no test: UTF_MAX=6" + - name: "Linux-cross-Windows-32/GCC/Shared/no test: UTF_MAX=4" os: linux dist: xenial compiler: i686-w64-mingw32-gcc addons: *mingw32 env: - BUILD_DIR=win - - CFGOPT="--host=i686-w64-mingw32 CFLAGS=-DTCL_UTF_MAX=6" - script: *crosstest - - name: "Linux-cross-Windows-32/GCC/Shared/no test: UTF_MAX=3" - os: linux - dist: xenial - compiler: i686-w64-mingw32-gcc - addons: *mingw32 - env: - - BUILD_DIR=win - - CFGOPT="--host=i686-w64-mingw32 CFLAGS=-DTCL_UTF_MAX=3" + - CFGOPT="--host=i686-w64-mingw32 CFLAGS=-DTCL_UTF_MAX=4" script: *crosstest - name: "Linux-cross-Windows-32/GCC/Shared/no test: NO_DEPRECATED" os: linux @@ -253,23 +237,14 @@ matrix: - BUILD_DIR=win - CFGOPT="--host=x86_64-w64-mingw32 --enable-64bit --disable-shared" script: *crosstest - - name: "Linux-cross-Windows-64/GCC/Shared/no test: UTF_MAX=6" - os: linux - dist: xenial - compiler: x86_64-w64-mingw32-gcc - addons: *mingw64 - env: - - BUILD_DIR=win - - CFGOPT="--host=x86_64-w64-mingw32 --enable-64bit CFLAGS=-DTCL_UTF_MAX=6" - script: *crosstest - - name: "Linux-cross-Windows-64/GCC/Shared/no test: UTF_MAX=3" + - name: "Linux-cross-Windows-64/GCC/Shared/no test: UTF_MAX=4" os: linux dist: xenial compiler: x86_64-w64-mingw32-gcc addons: *mingw64 env: - BUILD_DIR=win - - CFGOPT="--host=x86_64-w64-mingw32 --enable-64bit CFLAGS=-DTCL_UTF_MAX=3" + - CFGOPT="--host=x86_64-w64-mingw32 --enable-64bit CFLAGS=-DTCL_UTF_MAX=4" script: *crosstest - name: "Linux-cross-Windows-64/GCC/Shared/no test: NO_DEPRECATED" os: linux @@ -8,7 +8,7 @@ .so man.macros .BS .SH NAME -Tcl_UniChar, Tcl_UniCharToUtf, Tcl_UtfToUniChar, Tcl_UtfToChar16, Tcl_UtfToWChar, Tcl_UniCharToUtfDString, Tcl_UtfToUniCharDString, Tcl_WCharToUtfDString, Tcl_UtfToWCharDString, Tcl_Char16ToUtfDString, Tcl_UtfToChar16DString, Tcl_UniCharLen, Tcl_UniCharNcmp, Tcl_UniCharNcasecmp, Tcl_UniCharCaseMatch, Tcl_UtfNcmp, Tcl_UtfNcasecmp, Tcl_UtfCharComplete, Tcl_NumUtfChars, Tcl_UtfFindFirst, Tcl_UtfFindLast, Tcl_UtfNext, Tcl_UtfPrev, Tcl_UniCharAtIndex, Tcl_UtfAtIndex, Tcl_UtfBackslash \- routines for manipulating UTF-8 strings +Tcl_UniChar, Tcl_UniCharToUtf, Tcl_UtfToUniChar, Tcl_UtfToChar16, Tcl_UtfToWChar, Tcl_UniCharToUtfDString, Tcl_UtfToUniCharDString, Tcl_Char16ToUtfDString, Tcl_UtfToWCharDString, Tcl_UtfToChar16DString, Tcl_UniCharLen, Tcl_UniCharNcmp, Tcl_UniCharNcasecmp, Tcl_UniCharCaseMatch, Tcl_UtfNcmp, Tcl_UtfNcasecmp, Tcl_UtfCharComplete, Tcl_NumUtfChars, Tcl_UtfFindFirst, Tcl_UtfFindLast, Tcl_UtfNext, Tcl_UtfPrev, Tcl_UniCharAtIndex, Tcl_UtfAtIndex, Tcl_UtfBackslash \- routines for manipulating UTF-8 strings .SH SYNOPSIS .nf \fB#include <tcl.h>\fR @@ -120,6 +120,12 @@ A null-terminated Unicode string. A null-terminated UTF-16 string. .AP "const wchar_t" *wStr in A null-terminated wchar_t string. +.AP "const unsigned short" *utf16s in +A null-terminated utf-16 string. +.AP "const unsigned short" *utf16t in +A null-terminated utf-16 string. +.AP "const unsigned short" *utf16Pattern in +A null-terminated utf-16 string. .AP int length in The length of the UTF-8 string in bytes (not UTF-8 characters). If negative, all bytes up to the first null byte are used. @@ -147,9 +153,10 @@ case-insensitive (1). .SH DESCRIPTION .PP These routines convert between UTF-8 strings and Unicode/Utf-16 characters. -A UTF-8 character is a Unicode character represented as a varying-length -sequence of up to \fBTCL_UTF_MAX\fR bytes. A multibyte UTF-8 sequence -consists of a lead byte followed by some number of trail bytes. +An Unicode character represented as an unsigned, fixed-size +quantity. A UTF-8 character is a Unicode character represented as +a varying-length sequence of up to \fBTCL_UTF_MAX\fR bytes. A multibyte UTF-8 +sequence consists of a lead byte followed by some number of trail bytes. .PP \fBTCL_UTF_MAX\fR is the maximum number of bytes that it takes to represent one Unicode character in the UTF-8 representation. diff --git a/generic/regcustom.h b/generic/regcustom.h index 4396399..a6c19a3 100644 --- a/generic/regcustom.h +++ b/generic/regcustom.h @@ -88,7 +88,7 @@ typedef int celt; /* Type to hold chr, or NOCELT */ #define NOCELT (-1) /* Celt value which is not valid chr */ #define CHR(c) (UCHAR(c)) /* Turn char literal into chr literal */ #define DIGITVAL(c) ((c)-'0') /* Turn chr digit into its value */ -#if TCL_UTF_MAX > 4 +#if TCL_UTF_MAX > 3 #define CHRBITS 32 /* Bits in a chr; must not use sizeof */ #define CHR_MIN 0x00000000 /* Smallest and largest chr; the value */ #define CHR_MAX 0x10ffff /* CHR_MAX-CHR_MIN+1 should fit in uchr */ diff --git a/generic/tcl.decls b/generic/tcl.decls index 26a58dc..68efba2 100644 --- a/generic/tcl.decls +++ b/generic/tcl.decls @@ -1245,10 +1245,10 @@ declare 350 { declare 351 { int Tcl_UniCharIsWordChar(int ch) } -declare 352 { +declare 352 {deprecated {Use Tcl_GetCharLength}} { int Tcl_UniCharLen(const Tcl_UniChar *uniStr) } -declare 353 { +declare 353 {deprecated {Use Tcl_UtfNcmp}} { int Tcl_UniCharNcmp(const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned long numChars) } @@ -1338,10 +1338,10 @@ declare 376 { declare 377 { void Tcl_RegExpGetInfo(Tcl_RegExp regexp, Tcl_RegExpInfo *infoPtr) } -declare 378 { +declare 378 {deprecated {Use Tcl_UniCharToUtfDString}} { Tcl_Obj *Tcl_NewUnicodeObj(const Tcl_UniChar *unicode, int numChars) } -declare 379 { +declare 379 {deprecated {Use Tcl_UniCharToUtfDString}} { void Tcl_SetUnicodeObj(Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int numChars) } @@ -1357,7 +1357,7 @@ declare 382 {deprecated {No longer in use, changed to macro}} { declare 383 { Tcl_Obj *Tcl_GetRange(Tcl_Obj *objPtr, int first, int last) } -declare 384 { +declare 384 {deprecated {Use Tcl_AppendStringsToObj}} { void Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int length) } @@ -1483,11 +1483,11 @@ declare 417 { declare 418 { int Tcl_IsChannelExisting(const char *channelName) } -declare 419 { +declare 419 {deprecated {Use Tcl_UtfNcasecmp}} { int Tcl_UniCharNcasecmp(const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned long numChars) } -declare 420 { +declare 420 {deprecated {Use Tcl_StringCaseMatch}} { int Tcl_UniCharCaseMatch(const Tcl_UniChar *uniStr, const Tcl_UniChar *uniPattern, int nocase) } @@ -1541,7 +1541,7 @@ declare 433 { } # introduced in 8.4a3 -declare 434 { +declare 434 {deprecated {Use Tcl_UtfToUniCharDString}} { Tcl_UniChar *Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr, int *lengthPtr) } diff --git a/generic/tcl.h b/generic/tcl.h index 3df2c2d..1c3115d 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -2144,7 +2144,7 @@ typedef struct Tcl_EncodingType { */ #ifndef TCL_UTF_MAX -#define TCL_UTF_MAX 4 +#define TCL_UTF_MAX 3 #endif /* @@ -2152,7 +2152,7 @@ typedef struct Tcl_EncodingType { * reflected in regcustom.h. */ -#if TCL_UTF_MAX > 4 +#if TCL_UTF_MAX > 3 /* * int isn't 100% accurate as it should be a strict 4-byte value * (perhaps wchar_t). 64-bit systems may have troubles. The size of this @@ -2342,10 +2342,10 @@ typedef int (Tcl_NRPostProc) (ClientData data[], Tcl_Interp *interp, /* *---------------------------------------------------------------------------- * The following constant is used to test for older versions of Tcl in the - * stubs tables. If TCL_UTF_MAX>4 use a different value. + * stubs tables. */ -#define TCL_STUB_MAGIC ((int) 0xFCA3BACF + (TCL_UTF_MAX>4)) +#define TCL_STUB_MAGIC ((int) 0xFCA3BACF) /* * The following function is required to be defined in all stubs aware diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index a754a09..8706fb6 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -1221,7 +1221,7 @@ Tcl_SplitObjCmd( len = TclUtfToUniChar(stringPtr, &ch); fullchar = ch; -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 if ((ch >= 0xD800) && (len < 3)) { len += TclUtfToUniChar(stringPtr + len, &ch); fullchar = (((fullchar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000; @@ -1911,7 +1911,7 @@ StringIsCmd( int fullchar; length2 = TclUtfToUniChar(string1, &ch); fullchar = ch; -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 if ((ch >= 0xD800) && (length2 < 3)) { length2 += TclUtfToUniChar(string1 + length2, &ch); fullchar = (((fullchar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000; diff --git a/generic/tclDecls.h b/generic/tclDecls.h index eddd385..c993602 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -1063,9 +1063,11 @@ EXTERN int Tcl_UniCharIsUpper(int ch); /* 351 */ EXTERN int Tcl_UniCharIsWordChar(int ch); /* 352 */ -EXTERN int Tcl_UniCharLen(const Tcl_UniChar *uniStr); +TCL_DEPRECATED("Use Tcl_GetCharLength") +int Tcl_UniCharLen(const Tcl_UniChar *uniStr); /* 353 */ -EXTERN int Tcl_UniCharNcmp(const Tcl_UniChar *ucs, +TCL_DEPRECATED("Use Tcl_UtfNcmp") +int Tcl_UniCharNcmp(const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned long numChars); /* 354 */ @@ -1141,10 +1143,12 @@ EXTERN int Tcl_RegExpExecObj(Tcl_Interp *interp, EXTERN void Tcl_RegExpGetInfo(Tcl_RegExp regexp, Tcl_RegExpInfo *infoPtr); /* 378 */ -EXTERN Tcl_Obj * Tcl_NewUnicodeObj(const Tcl_UniChar *unicode, +TCL_DEPRECATED("Use Tcl_UniCharToUtfDString") +Tcl_Obj * Tcl_NewUnicodeObj(const Tcl_UniChar *unicode, int numChars); /* 379 */ -EXTERN void Tcl_SetUnicodeObj(Tcl_Obj *objPtr, +TCL_DEPRECATED("Use Tcl_UniCharToUtfDString") +void Tcl_SetUnicodeObj(Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int numChars); /* 380 */ EXTERN int Tcl_GetCharLength(Tcl_Obj *objPtr); @@ -1156,7 +1160,8 @@ Tcl_UniChar * Tcl_GetUnicode(Tcl_Obj *objPtr); /* 383 */ EXTERN Tcl_Obj * Tcl_GetRange(Tcl_Obj *objPtr, int first, int last); /* 384 */ -EXTERN void Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr, +TCL_DEPRECATED("Use Tcl_AppendStringsToObj") +void Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int length); /* 385 */ EXTERN int Tcl_RegExpMatchObj(Tcl_Interp *interp, @@ -1250,11 +1255,13 @@ EXTERN void Tcl_ClearChannelHandlers(Tcl_Channel channel); /* 418 */ EXTERN int Tcl_IsChannelExisting(const char *channelName); /* 419 */ -EXTERN int Tcl_UniCharNcasecmp(const Tcl_UniChar *ucs, +TCL_DEPRECATED("Use Tcl_UtfNcasecmp") +int Tcl_UniCharNcasecmp(const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned long numChars); /* 420 */ -EXTERN int Tcl_UniCharCaseMatch(const Tcl_UniChar *uniStr, +TCL_DEPRECATED("Use Tcl_StringCaseMatch") +int Tcl_UniCharCaseMatch(const Tcl_UniChar *uniStr, const Tcl_UniChar *uniPattern, int nocase); /* 421 */ EXTERN Tcl_HashEntry * Tcl_FindHashEntry(Tcl_HashTable *tablePtr, @@ -1297,7 +1304,8 @@ EXTERN int Tcl_AttemptSetObjLength(Tcl_Obj *objPtr, int length); /* 433 */ EXTERN Tcl_ThreadId Tcl_GetChannelThread(Tcl_Channel channel); /* 434 */ -EXTERN Tcl_UniChar * Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr, +TCL_DEPRECATED("Use Tcl_UtfToUniCharDString") +Tcl_UniChar * Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr, int *lengthPtr); /* 435 */ TCL_DEPRECATED("") @@ -2300,8 +2308,8 @@ typedef struct TclStubs { int (*tcl_UniCharIsSpace) (int ch); /* 349 */ int (*tcl_UniCharIsUpper) (int ch); /* 350 */ int (*tcl_UniCharIsWordChar) (int ch); /* 351 */ - int (*tcl_UniCharLen) (const Tcl_UniChar *uniStr); /* 352 */ - int (*tcl_UniCharNcmp) (const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned long numChars); /* 353 */ + TCL_DEPRECATED_API("Use Tcl_GetCharLength") int (*tcl_UniCharLen) (const Tcl_UniChar *uniStr); /* 352 */ + TCL_DEPRECATED_API("Use Tcl_UtfNcmp") int (*tcl_UniCharNcmp) (const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned long numChars); /* 353 */ char * (*tcl_Char16ToUtfDString) (const unsigned short *uniStr, int uniLength, Tcl_DString *dsPtr); /* 354 */ unsigned short * (*tcl_UtfToChar16DString) (const char *src, int length, Tcl_DString *dsPtr); /* 355 */ Tcl_RegExp (*tcl_GetRegExpFromObj) (Tcl_Interp *interp, Tcl_Obj *patObj, int flags); /* 356 */ @@ -2326,13 +2334,13 @@ typedef struct TclStubs { int (*tcl_UniCharIsPunct) (int ch); /* 375 */ int (*tcl_RegExpExecObj) (Tcl_Interp *interp, Tcl_RegExp regexp, Tcl_Obj *textObj, int offset, int nmatches, int flags); /* 376 */ void (*tcl_RegExpGetInfo) (Tcl_RegExp regexp, Tcl_RegExpInfo *infoPtr); /* 377 */ - Tcl_Obj * (*tcl_NewUnicodeObj) (const Tcl_UniChar *unicode, int numChars); /* 378 */ - void (*tcl_SetUnicodeObj) (Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int numChars); /* 379 */ + TCL_DEPRECATED_API("Use Tcl_UniCharToUtfDString") Tcl_Obj * (*tcl_NewUnicodeObj) (const Tcl_UniChar *unicode, int numChars); /* 378 */ + TCL_DEPRECATED_API("Use Tcl_UniCharToUtfDString") void (*tcl_SetUnicodeObj) (Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int numChars); /* 379 */ int (*tcl_GetCharLength) (Tcl_Obj *objPtr); /* 380 */ int (*tcl_GetUniChar) (Tcl_Obj *objPtr, int index); /* 381 */ TCL_DEPRECATED_API("No longer in use, changed to macro") Tcl_UniChar * (*tcl_GetUnicode) (Tcl_Obj *objPtr); /* 382 */ Tcl_Obj * (*tcl_GetRange) (Tcl_Obj *objPtr, int first, int last); /* 383 */ - void (*tcl_AppendUnicodeToObj) (Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int length); /* 384 */ + TCL_DEPRECATED_API("Use Tcl_AppendStringsToObj") void (*tcl_AppendUnicodeToObj) (Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int length); /* 384 */ int (*tcl_RegExpMatchObj) (Tcl_Interp *interp, Tcl_Obj *textObj, Tcl_Obj *patternObj); /* 385 */ void (*tcl_SetNotifier) (Tcl_NotifierProcs *notifierProcPtr); /* 386 */ Tcl_Mutex * (*tcl_GetAllocMutex) (void); /* 387 */ @@ -2367,8 +2375,8 @@ typedef struct TclStubs { void (*tcl_SpliceChannel) (Tcl_Channel channel); /* 416 */ void (*tcl_ClearChannelHandlers) (Tcl_Channel channel); /* 417 */ int (*tcl_IsChannelExisting) (const char *channelName); /* 418 */ - int (*tcl_UniCharNcasecmp) (const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned long numChars); /* 419 */ - int (*tcl_UniCharCaseMatch) (const Tcl_UniChar *uniStr, const Tcl_UniChar *uniPattern, int nocase); /* 420 */ + TCL_DEPRECATED_API("Use Tcl_UtfNcasecmp") int (*tcl_UniCharNcasecmp) (const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned long numChars); /* 419 */ + TCL_DEPRECATED_API("Use Tcl_StringCaseMatch") int (*tcl_UniCharCaseMatch) (const Tcl_UniChar *uniStr, const Tcl_UniChar *uniPattern, int nocase); /* 420 */ Tcl_HashEntry * (*tcl_FindHashEntry) (Tcl_HashTable *tablePtr, const void *key); /* 421 */ Tcl_HashEntry * (*tcl_CreateHashEntry) (Tcl_HashTable *tablePtr, const void *key, int *newPtr); /* 422 */ void (*tcl_InitCustomHashTable) (Tcl_HashTable *tablePtr, int keyType, const Tcl_HashKeyType *typePtr); /* 423 */ @@ -2382,7 +2390,7 @@ typedef struct TclStubs { char * (*tcl_AttemptDbCkrealloc) (char *ptr, unsigned int size, const char *file, int line); /* 431 */ int (*tcl_AttemptSetObjLength) (Tcl_Obj *objPtr, int length); /* 432 */ Tcl_ThreadId (*tcl_GetChannelThread) (Tcl_Channel channel); /* 433 */ - Tcl_UniChar * (*tcl_GetUnicodeFromObj) (Tcl_Obj *objPtr, int *lengthPtr); /* 434 */ + TCL_DEPRECATED_API("Use Tcl_UtfToUniCharDString") Tcl_UniChar * (*tcl_GetUnicodeFromObj) (Tcl_Obj *objPtr, int *lengthPtr); /* 434 */ TCL_DEPRECATED_API("") int (*tcl_GetMathFuncInfo) (Tcl_Interp *interp, const char *name, int *numArgsPtr, Tcl_ValueType **argTypesPtr, Tcl_MathProc **procPtr, ClientData *clientDataPtr); /* 435 */ TCL_DEPRECATED_API("") Tcl_Obj * (*tcl_ListMathFuncs) (Tcl_Interp *interp, const char *pattern); /* 436 */ Tcl_Obj * (*tcl_SubstObj) (Tcl_Interp *interp, Tcl_Obj *objPtr, int flags); /* 437 */ @@ -4110,7 +4118,7 @@ extern const TclStubs *tclStubsPtr; #undef Tcl_StringMatch #define Tcl_StringMatch(str, pattern) Tcl_StringCaseMatch((str), (pattern), 0) -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 # undef Tcl_UniCharToUtfDString # define Tcl_UniCharToUtfDString Tcl_Char16ToUtfDString # undef Tcl_UtfToUniCharDString diff --git a/generic/tclDisassemble.c b/generic/tclDisassemble.c index 76a4d46..bbe1a97 100644 --- a/generic/tclDisassemble.c +++ b/generic/tclDisassemble.c @@ -903,7 +903,7 @@ PrintSourceToObj( i += 2; continue; default: -#if TCL_UTF_MAX > 4 +#if TCL_UTF_MAX > 3 if (ch > 0xffff) { Tcl_AppendPrintfToObj(appendObj, "\\U%08x", ch); i += 10; diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index a88c1a7..e7cc344 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2418,7 +2418,7 @@ UtfToUtfProc( int len = TclUtfToUniChar(src, chPtr); src += len; dst += Tcl_UniCharToUtf(*chPtr, dst); -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 if ((*chPtr >= 0xD800) && (len < 3)) { src += TclUtfToUniChar(src + len, chPtr); dst += Tcl_UniCharToUtf(*chPtr, dst); @@ -2609,7 +2609,7 @@ UtfToUtf16Proc( */ if (clientData) { -#if TCL_UTF_MAX > 4 +#if TCL_UTF_MAX > 3 if (*chPtr <= 0xFFFF) { *dst++ = (*chPtr & 0xFF); *dst++ = (*chPtr >> 8); @@ -2624,7 +2624,7 @@ UtfToUtf16Proc( *dst++ = (*chPtr >> 8); #endif } else { -#if TCL_UTF_MAX > 4 +#if TCL_UTF_MAX > 3 if (*chPtr <= 0xFFFF) { *dst++ = (*chPtr >> 8); *dst++ = (*chPtr & 0xFF); @@ -2691,7 +2691,7 @@ UtfToUcs2Proc( { const char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd; int result, numChars; -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 int len; #endif Tcl_UniChar ch = 0; @@ -2721,7 +2721,7 @@ UtfToUcs2Proc( result = TCL_CONVERT_NOSPACE; break; } -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 src += (len = TclUtfToUniChar(src, &ch)); if ((ch >= 0xD800) && (len < 3)) { src += TclUtfToUniChar(src, &ch); @@ -2945,7 +2945,7 @@ TableFromUtfProc( } len = TclUtfToUniChar(src, &ch); -#if TCL_UTF_MAX > 4 +#if TCL_UTF_MAX > 3 /* * This prevents a crash condition. More evaluation is required for * full support of int Tcl_UniChar. [Bug 1004065] @@ -3156,7 +3156,7 @@ Iso88591FromUtfProc( */ if (ch > 0xff -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 || ((ch >= 0xD800) && (len < 3)) #endif ) { @@ -3164,7 +3164,7 @@ Iso88591FromUtfProc( result = TCL_CONVERT_UNKNOWN; break; } -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 if ((ch >= 0xD800) && (len < 3)) len = 4; #endif /* diff --git a/generic/tclInt.h b/generic/tclInt.h index 4d73469..26f1840 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -4620,7 +4620,7 @@ MODULE_SCOPE void TclDbInitNewObj(Tcl_Obj *objPtr, const char *file, *---------------------------------------------------------------- */ -#if TCL_UTF_MAX > 4 +#if TCL_UTF_MAX > 3 #define TclUtfToUniChar(str, chPtr) \ ((((unsigned char) *(str)) < 0x80) ? \ ((*(chPtr) = (unsigned char) *(str)), 1) \ diff --git a/generic/tclMain.c b/generic/tclMain.c index 4a66793..b7d740a 100644 --- a/generic/tclMain.c +++ b/generic/tclMain.c @@ -53,15 +53,6 @@ # define _tcscmp strcmp #endif -/* - * Further on, in UNICODE mode we just use Tcl_NewUnicodeObj, otherwise - * NewNativeObj is needed (which provides proper conversion from native - * encoding to UTF-8). - */ - -#if defined(UNICODE) && (TCL_UTF_MAX <= 4) -# define NewNativeObj Tcl_NewUnicodeObj -#else /* !UNICODE || (TCL_UTF_MAX > 4) */ static inline Tcl_Obj * NewNativeObj( TCHAR *string, @@ -77,7 +68,6 @@ NewNativeObj( #endif return TclDStringToObj(&ds); } -#endif /* !UNICODE || (TCL_UTF_MAX > 4) */ /* * Declarations for various library functions and variables (don't want to diff --git a/generic/tclScan.c b/generic/tclScan.c index 74ec2da..916809f 100644 --- a/generic/tclScan.c +++ b/generic/tclScan.c @@ -881,7 +881,7 @@ Tcl_ScanObjCmd( offset = TclUtfToUniChar(string, &sch); i = (int)sch; -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 if ((sch >= 0xD800) && (offset < 3)) { offset += TclUtfToUniChar(string+offset, &sch); i = (((i<<10) & 0x0FFC00) + 0x10000) + (sch & 0x3FF); diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index ce687c6..1b4f225 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -568,7 +568,7 @@ Tcl_GetUniChar( return -1; } ch = stringPtr->unicode[index]; -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 /* See: bug [11ae2be95dac9417] */ if ((ch & 0xF800) == 0xD800) { if (ch & 0x400) { @@ -752,7 +752,7 @@ Tcl_GetRange( if (last < first) { return Tcl_NewObj(); } -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 /* See: bug [11ae2be95dac9417] */ if ((first > 0) && ((stringPtr->unicode[first] & 0xFC00) == 0xDC00) && ((stringPtr->unicode[first-1] & 0xFC00) == 0xD800)) { diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c index 1c3f094..01b0303 100644 --- a/generic/tclStubInit.c +++ b/generic/tclStubInit.c @@ -39,6 +39,14 @@ #undef Tcl_NewObj #undef Tcl_NewStringObj #undef Tcl_GetUnicode +#undef Tcl_GetUnicodeFromObj +#undef Tcl_AppendUnicodeToObj +#undef Tcl_NewUnicodeObj +#undef Tcl_SetUnicodeObj +#undef Tcl_UniCharNcasecmp +#undef Tcl_UniCharCaseMatch +#undef Tcl_UniCharLen +#undef Tcl_UniCharNcmp #undef Tcl_DumpActiveMemory #undef Tcl_ValidateAllMemory #undef Tcl_FindHashEntry @@ -419,7 +427,15 @@ static int uniCharNcasecmp(const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsig # define Tcl_SetExitProc 0 # define Tcl_SetPanicProc 0 # define Tcl_FindExecutable 0 +# define Tcl_GetUnicodeFromObj 0 # define Tcl_GetUnicode 0 +# define Tcl_AppendUnicodeToObj 0 +# define Tcl_NewUnicodeObj 0 +# define Tcl_SetUnicodeObj 0 +# define Tcl_UniCharNcasecmp 0 +# define Tcl_UniCharCaseMatch 0 +# define Tcl_UniCharLen 0 +# define Tcl_UniCharNcmp 0 # define TclOldFreeObj 0 # undef Tcl_StringMatch # define Tcl_StringMatch 0 diff --git a/generic/tclTestObj.c b/generic/tclTestObj.c index c9e4a6f..699c503 100644 --- a/generic/tclTestObj.c +++ b/generic/tclTestObj.c @@ -1178,8 +1178,7 @@ TeststringobjCmd( Tcl_Obj **varPtr; static const char *const options[] = { "append", "appendstrings", "get", "get2", "length", "length2", - "set", "set2", "setlength", "maxchars", "getunicode", - "appendself", "appendself2", NULL + "set", "set2", "setlength", "maxchars", "appendself", NULL }; if (objc < 3) { @@ -1344,13 +1343,7 @@ TeststringobjCmd( } Tcl_SetIntObj(Tcl_GetObjResult(interp), length); break; - case 10: /* getunicode */ - if (objc != 3) { - goto wrongNumArgs; - } - Tcl_GetUnicode(varPtr[varIndex]); - break; - case 11: /* appendself */ + case 10: /* appendself */ if (objc != 4) { goto wrongNumArgs; } @@ -1381,37 +1374,6 @@ TeststringobjCmd( Tcl_AppendToObj(varPtr[varIndex], string + i, length - i); Tcl_SetObjResult(interp, varPtr[varIndex]); break; - case 12: /* appendself2 */ - if (objc != 4) { - goto wrongNumArgs; - } - if (varPtr[varIndex] == NULL) { - SetVarToObj(varPtr, varIndex, Tcl_NewObj()); - } - - /* - * If the object bound to variable "varIndex" is shared, we must - * "copy on write" and append to a copy of the object. - */ - - if (Tcl_IsShared(varPtr[varIndex])) { - SetVarToObj(varPtr, varIndex, Tcl_DuplicateObj(varPtr[varIndex])); - } - - unicode = Tcl_GetUnicodeFromObj(varPtr[varIndex], &length); - - if (Tcl_GetIntFromObj(interp, objv[3], &i) != TCL_OK) { - return TCL_ERROR; - } - if ((i < 0) || (i > length)) { - Tcl_SetObjResult(interp, Tcl_NewStringObj( - "index value out of range", -1)); - return TCL_ERROR; - } - - Tcl_AppendUnicodeToObj(varPtr[varIndex], unicode + i, length - i); - Tcl_SetObjResult(interp, varPtr[varIndex]); - break; } return TCL_OK; diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 56b6d4d..2870c44 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -787,7 +787,7 @@ Tcl_UtfFindFirst( while (1) { len = TclUtfToUniChar(src, &find); fullchar = find; -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 if ((ch >= 0xD800) && (len < 3)) { len += TclUtfToUniChar(src + len, &find); fullchar = (((fullchar & 0x3ff) << 10) | (find & 0x3ff)) + 0x10000; @@ -835,7 +835,7 @@ Tcl_UtfFindLast( while (1) { len = TclUtfToUniChar(src, &find); fullchar = find; -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 if ((ch >= 0xD800) && (len < 3)) { len += TclUtfToUniChar(src + len, &find); fullchar = (((fullchar & 0x3ff) << 10) | (find & 0x3ff)) + 0x10000; @@ -878,7 +878,7 @@ Tcl_UtfNext( Tcl_UniChar ch = 0; int len = TclUtfToUniChar(src, &ch); -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 if ((ch >= 0xD800) && (len < 3)) { len += TclUtfToUniChar(src + len, &ch); } @@ -960,19 +960,19 @@ Tcl_UniCharAtIndex( { Tcl_UniChar ch = 0; int fullchar = 0; -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 int len = 0; #endif while (index-- >= 0) { -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 src += (len = TclUtfToUniChar(src, &ch)); #else src += TclUtfToUniChar(src, &ch); #endif } fullchar = ch; -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 if ((ch >= 0xD800) && (len < 3)) { /* If last Tcl_UniChar was a high surrogate, combine with low surrogate */ (void)TclUtfToUniChar(src, &ch); @@ -988,7 +988,7 @@ Tcl_UniCharAtIndex( * Tcl_UtfAtIndex -- * * Returns a pointer to the specified character (not byte) position in - * the UTF-8 string. If TCL_UTF_MAX <= 4, characters > U+FFFF count as + * the UTF-8 string. If TCL_UTF_MAX <= 3, characters > U+FFFF count as * 2 positions, but then the pointer should never be placed between * the two positions. * @@ -1013,7 +1013,7 @@ Tcl_UtfAtIndex( len = TclUtfToUniChar(src, &ch); src += len; } -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 if ((ch >= 0xD800) && (len < 3)) { /* Index points at character following high Surrogate */ src += TclUtfToUniChar(src, &ch); @@ -1110,7 +1110,7 @@ Tcl_UtfToUpper( while (*src) { len = TclUtfToUniChar(src, &ch); upChar = ch; -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 if ((ch >= 0xD800) && (len < 3)) { len += TclUtfToUniChar(src + len, &ch); /* Combine surrogates */ @@ -1172,7 +1172,7 @@ Tcl_UtfToLower( while (*src) { len = TclUtfToUniChar(src, &ch); lowChar = ch; -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 if ((ch >= 0xD800) && (len < 3)) { len += TclUtfToUniChar(src + len, &ch); /* Combine surrogates */ @@ -1237,7 +1237,7 @@ Tcl_UtfToTitle( if (*src) { len = TclUtfToUniChar(src, &ch); titleChar = ch; -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 if ((ch >= 0xD800) && (len < 3)) { len += TclUtfToUniChar(src + len, &ch); /* Combine surrogates */ @@ -1257,7 +1257,7 @@ Tcl_UtfToTitle( while (*src) { len = TclUtfToUniChar(src, &ch); lowChar = ch; -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 if ((ch >= 0xD800) && (len < 3)) { len += TclUtfToUniChar(src + len, &ch); /* Combine surrogates */ @@ -1369,7 +1369,7 @@ Tcl_UtfNcmp( cs += TclUtfToUniChar(cs, &ch1); ct += TclUtfToUniChar(ct, &ch2); if (ch1 != ch2) { -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 /* Surrogates always report higher than non-surrogates */ if (((ch1 & 0xFC00) == 0xD800)) { if ((ch2 & 0xFC00) != 0xD800) { @@ -1420,7 +1420,7 @@ Tcl_UtfNcasecmp( cs += TclUtfToUniChar(cs, &ch1); ct += TclUtfToUniChar(ct, &ch2); if (ch1 != ch2) { -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 /* Surrogates always report higher than non-surrogates */ if (((ch1 & 0xFC00) == 0xD800)) { if ((ch2 & 0xFC00) != 0xD800) { @@ -1469,7 +1469,7 @@ TclUtfCmp( cs += TclUtfToUniChar(cs, &ch1); ct += TclUtfToUniChar(ct, &ch2); if (ch1 != ch2) { -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 /* Surrogates always report higher than non-surrogates */ if (((ch1 & 0xFC00) == 0xD800)) { if ((ch2 & 0xFC00) != 0xD800) { @@ -1515,7 +1515,7 @@ TclUtfCasecmp( cs += TclUtfToUniChar(cs, &ch1); ct += TclUtfToUniChar(ct, &ch2); if (ch1 != ch2) { -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX <= 3 /* Surrogates always report higher than non-surrogates */ if (((ch1 & 0xFC00) == 0xD800)) { if ((ch2 & 0xFC00) != 0xD800) { diff --git a/tests/stringObj.test b/tests/stringObj.test index a78b5f8..cc9d123 100644 --- a/tests/stringObj.test +++ b/tests/stringObj.test @@ -439,9 +439,9 @@ test stringObj-13.8 {Tcl_GetCharLength with identity nulls} {testobj testbytestr test stringObj-14.1 {Tcl_SetObjLength on pure unicode object} testobj { teststringobj set 1 foo - teststringobj getunicode 1 + teststringobj maxchars 1 teststringobj append 1 bar -1 - teststringobj getunicode 1 + teststringobj maxchars 1 teststringobj append 1 bar -1 teststringobj setlength 1 0 teststringobj append 1 bar -1 @@ -464,22 +464,6 @@ test stringObj-15.4 {Tcl_Append*ToObj: self appends} testobj { teststringobj set 1 foo teststringobj appendself 1 3 } foo -test stringObj-15.5 {Tcl_Append*ToObj: self appends} testobj { - teststringobj set 1 foo - teststringobj appendself2 1 0 -} foofoo -test stringObj-15.6 {Tcl_Append*ToObj: self appends} testobj { - teststringobj set 1 foo - teststringobj appendself2 1 1 -} foooo -test stringObj-15.7 {Tcl_Append*ToObj: self appends} testobj { - teststringobj set 1 foo - teststringobj appendself2 1 2 -} fooo -test stringObj-15.8 {Tcl_Append*ToObj: self appends} testobj { - teststringobj set 1 foo - teststringobj appendself2 1 3 -} foo if {[testConstraint testobj]} { testobj freeallvars diff --git a/tests/utf.test b/tests/utf.test index dc1a435..f75d19e 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -471,8 +471,8 @@ test utf-25.1 {Tcl_UniCharNcasecmp} -constraints teststringobj \ -body { teststringobj set 1 a teststringobj set 2 b - teststringobj getunicode 1 - teststringobj getunicode 2 + teststringobj maxchars 1 + teststringobj maxchars 2 string compare -nocase [teststringobj get 1] [teststringobj get 2] } \ -cleanup { @@ -486,8 +486,8 @@ test utf-25.2 {Tcl_UniCharNcasecmp} -constraints teststringobj \ -body { teststringobj set 1 b teststringobj set 2 a - teststringobj getunicode 1 - teststringobj getunicode 2 + teststringobj maxchars 1 + teststringobj maxchars 2 string compare -nocase [teststringobj get 1] [teststringobj get 2] } \ -cleanup { @@ -501,8 +501,8 @@ test utf-25.3 {Tcl_UniCharNcasecmp} -constraints teststringobj \ -body { teststringobj set 1 B teststringobj set 2 a - teststringobj getunicode 1 - teststringobj getunicode 2 + teststringobj maxchars 1 + teststringobj maxchars 2 string compare -nocase [teststringobj get 1] [teststringobj get 2] } \ -cleanup { @@ -517,8 +517,8 @@ test utf-25.4 {Tcl_UniCharNcasecmp} -constraints teststringobj \ -body { teststringobj set 1 aBcB teststringobj set 2 abca - teststringobj getunicode 1 - teststringobj getunicode 2 + teststringobj maxchars 1 + teststringobj maxchars 2 string compare -nocase [teststringobj get 1] [teststringobj get 2] } \ -cleanup { diff --git a/win/rules.vc b/win/rules.vc index 3fa0704..34ac230 100644 --- a/win/rules.vc +++ b/win/rules.vc @@ -688,7 +688,7 @@ LINKERFLAGS = $(LINKERFLAGS) -ltcg # configuration (ignored for Tcl itself)
# _USE_64BIT_TIME_T - forces a build using 64-bit time_t for 32-bit build
# (CRT library should support this)
-# TCL_UTF_MAX=6 - forces a build using 32-bit Tcl_UniChar in stead of 16-bit.
+# TCL_UTF_MAX=4 - forces a build using 32-bit Tcl_UniChar in stead of 16-bit.
# Further, LINKERFLAGS are modified based on above.
# Default values for all the above
@@ -755,7 +755,7 @@ _USE_64BIT_TIME_T = 1 !if [nmakehlp -f $(OPTS) "utfmax"]
!message *** Force 32-bit Tcl_UniChar
-TCL_UTF_MAX = 6
+TCL_UTF_MAX = 4
!endif
# Yes, it's weird that the "symbols" option controls DEBUG and
@@ -1321,8 +1321,8 @@ OPTDEFINES = $(OPTDEFINES) -DNO_STRTOI64 !if "$(_USE_64BIT_TIME_T)" == "1"
OPTDEFINES = $(OPTDEFINES) -D_USE_64BIT_TIME_T
!endif
-!if "$(TCL_UTF_MAX)" == "6"
-OPTDEFINES = $(OPTDEFINES) -DTCL_UTF_MAX=6
+!if "$(TCL_UTF_MAX)" == "4"
+OPTDEFINES = $(OPTDEFINES) -DTCL_UTF_MAX=4
!endif
# _ATL_XP_TARGETING - Newer SDK's need this to build for XP
|