summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.travis.yml37
-rw-r--r--doc/Utf.315
-rw-r--r--generic/regcustom.h2
-rw-r--r--generic/tcl.decls16
-rw-r--r--generic/tcl.h8
-rw-r--r--generic/tclCmdMZ.c4
-rw-r--r--generic/tclDecls.h42
-rw-r--r--generic/tclDisassemble.c2
-rw-r--r--generic/tclEncoding.c16
-rw-r--r--generic/tclInt.h2
-rw-r--r--generic/tclMain.c10
-rw-r--r--generic/tclScan.c2
-rw-r--r--generic/tclStringObj.c4
-rw-r--r--generic/tclStubInit.c16
-rw-r--r--generic/tclTestObj.c42
-rw-r--r--generic/tclUtf.c32
-rw-r--r--tests/stringObj.test20
-rw-r--r--tests/utf.test16
-rw-r--r--win/rules.vc8
19 files changed, 118 insertions, 176 deletions
diff --git a/.travis.yml b/.travis.yml
index 73e3fc2..538006d 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -17,20 +17,13 @@ matrix:
env:
- CFGOPT=--disable-shared
- BUILD_DIR=unix
- - name: "Linux/GCC/Shared: UTF_MAX=6"
+ - name: "Linux/GCC/Shared: UTF_MAX=4"
os: linux
dist: xenial
compiler: gcc
env:
- BUILD_DIR=unix
- - CFGOPT=CFLAGS=-DTCL_UTF_MAX=6
- - name: "Linux/GCC/Shared: UTF_MAX=3"
- os: linux
- dist: xenial
- compiler: gcc
- env:
- - BUILD_DIR=unix
- - CFGOPT=CFLAGS=-DTCL_UTF_MAX=3
+ - CFGOPT=CFLAGS=-DTCL_UTF_MAX=4
- name: "Linux/GCC/Shared: NO_DEPRECATED"
os: linux
dist: xenial
@@ -190,23 +183,14 @@ matrix:
- BUILD_DIR=win
- CFGOPT="--host=i686-w64-mingw32 --disable-shared"
script: *crosstest
- - name: "Linux-cross-Windows-32/GCC/Shared/no test: UTF_MAX=6"
+ - name: "Linux-cross-Windows-32/GCC/Shared/no test: UTF_MAX=4"
os: linux
dist: xenial
compiler: i686-w64-mingw32-gcc
addons: *mingw32
env:
- BUILD_DIR=win
- - CFGOPT="--host=i686-w64-mingw32 CFLAGS=-DTCL_UTF_MAX=6"
- script: *crosstest
- - name: "Linux-cross-Windows-32/GCC/Shared/no test: UTF_MAX=3"
- os: linux
- dist: xenial
- compiler: i686-w64-mingw32-gcc
- addons: *mingw32
- env:
- - BUILD_DIR=win
- - CFGOPT="--host=i686-w64-mingw32 CFLAGS=-DTCL_UTF_MAX=3"
+ - CFGOPT="--host=i686-w64-mingw32 CFLAGS=-DTCL_UTF_MAX=4"
script: *crosstest
- name: "Linux-cross-Windows-32/GCC/Shared/no test: NO_DEPRECATED"
os: linux
@@ -253,23 +237,14 @@ matrix:
- BUILD_DIR=win
- CFGOPT="--host=x86_64-w64-mingw32 --enable-64bit --disable-shared"
script: *crosstest
- - name: "Linux-cross-Windows-64/GCC/Shared/no test: UTF_MAX=6"
- os: linux
- dist: xenial
- compiler: x86_64-w64-mingw32-gcc
- addons: *mingw64
- env:
- - BUILD_DIR=win
- - CFGOPT="--host=x86_64-w64-mingw32 --enable-64bit CFLAGS=-DTCL_UTF_MAX=6"
- script: *crosstest
- - name: "Linux-cross-Windows-64/GCC/Shared/no test: UTF_MAX=3"
+ - name: "Linux-cross-Windows-64/GCC/Shared/no test: UTF_MAX=4"
os: linux
dist: xenial
compiler: x86_64-w64-mingw32-gcc
addons: *mingw64
env:
- BUILD_DIR=win
- - CFGOPT="--host=x86_64-w64-mingw32 --enable-64bit CFLAGS=-DTCL_UTF_MAX=3"
+ - CFGOPT="--host=x86_64-w64-mingw32 --enable-64bit CFLAGS=-DTCL_UTF_MAX=4"
script: *crosstest
- name: "Linux-cross-Windows-64/GCC/Shared/no test: NO_DEPRECATED"
os: linux
diff --git a/doc/Utf.3 b/doc/Utf.3
index 9b0ec3c..b2a8389 100644
--- a/doc/Utf.3
+++ b/doc/Utf.3
@@ -8,7 +8,7 @@
.so man.macros
.BS
.SH NAME
-Tcl_UniChar, Tcl_UniCharToUtf, Tcl_UtfToUniChar, Tcl_UtfToChar16, Tcl_UtfToWChar, Tcl_UniCharToUtfDString, Tcl_UtfToUniCharDString, Tcl_WCharToUtfDString, Tcl_UtfToWCharDString, Tcl_Char16ToUtfDString, Tcl_UtfToChar16DString, Tcl_UniCharLen, Tcl_UniCharNcmp, Tcl_UniCharNcasecmp, Tcl_UniCharCaseMatch, Tcl_UtfNcmp, Tcl_UtfNcasecmp, Tcl_UtfCharComplete, Tcl_NumUtfChars, Tcl_UtfFindFirst, Tcl_UtfFindLast, Tcl_UtfNext, Tcl_UtfPrev, Tcl_UniCharAtIndex, Tcl_UtfAtIndex, Tcl_UtfBackslash \- routines for manipulating UTF-8 strings
+Tcl_UniChar, Tcl_UniCharToUtf, Tcl_UtfToUniChar, Tcl_UtfToChar16, Tcl_UtfToWChar, Tcl_UniCharToUtfDString, Tcl_UtfToUniCharDString, Tcl_Char16ToUtfDString, Tcl_UtfToWCharDString, Tcl_UtfToChar16DString, Tcl_UniCharLen, Tcl_UniCharNcmp, Tcl_UniCharNcasecmp, Tcl_UniCharCaseMatch, Tcl_UtfNcmp, Tcl_UtfNcasecmp, Tcl_UtfCharComplete, Tcl_NumUtfChars, Tcl_UtfFindFirst, Tcl_UtfFindLast, Tcl_UtfNext, Tcl_UtfPrev, Tcl_UniCharAtIndex, Tcl_UtfAtIndex, Tcl_UtfBackslash \- routines for manipulating UTF-8 strings
.SH SYNOPSIS
.nf
\fB#include <tcl.h>\fR
@@ -120,6 +120,12 @@ A null-terminated Unicode string.
A null-terminated UTF-16 string.
.AP "const wchar_t" *wStr in
A null-terminated wchar_t string.
+.AP "const unsigned short" *utf16s in
+A null-terminated utf-16 string.
+.AP "const unsigned short" *utf16t in
+A null-terminated utf-16 string.
+.AP "const unsigned short" *utf16Pattern in
+A null-terminated utf-16 string.
.AP int length in
The length of the UTF-8 string in bytes (not UTF-8 characters). If
negative, all bytes up to the first null byte are used.
@@ -147,9 +153,10 @@ case-insensitive (1).
.SH DESCRIPTION
.PP
These routines convert between UTF-8 strings and Unicode/Utf-16 characters.
-A UTF-8 character is a Unicode character represented as a varying-length
-sequence of up to \fBTCL_UTF_MAX\fR bytes. A multibyte UTF-8 sequence
-consists of a lead byte followed by some number of trail bytes.
+An Unicode character represented as an unsigned, fixed-size
+quantity. A UTF-8 character is a Unicode character represented as
+a varying-length sequence of up to \fBTCL_UTF_MAX\fR bytes. A multibyte UTF-8
+sequence consists of a lead byte followed by some number of trail bytes.
.PP
\fBTCL_UTF_MAX\fR is the maximum number of bytes that it takes to
represent one Unicode character in the UTF-8 representation.
diff --git a/generic/regcustom.h b/generic/regcustom.h
index 4396399..a6c19a3 100644
--- a/generic/regcustom.h
+++ b/generic/regcustom.h
@@ -88,7 +88,7 @@ typedef int celt; /* Type to hold chr, or NOCELT */
#define NOCELT (-1) /* Celt value which is not valid chr */
#define CHR(c) (UCHAR(c)) /* Turn char literal into chr literal */
#define DIGITVAL(c) ((c)-'0') /* Turn chr digit into its value */
-#if TCL_UTF_MAX > 4
+#if TCL_UTF_MAX > 3
#define CHRBITS 32 /* Bits in a chr; must not use sizeof */
#define CHR_MIN 0x00000000 /* Smallest and largest chr; the value */
#define CHR_MAX 0x10ffff /* CHR_MAX-CHR_MIN+1 should fit in uchr */
diff --git a/generic/tcl.decls b/generic/tcl.decls
index 26a58dc..68efba2 100644
--- a/generic/tcl.decls
+++ b/generic/tcl.decls
@@ -1245,10 +1245,10 @@ declare 350 {
declare 351 {
int Tcl_UniCharIsWordChar(int ch)
}
-declare 352 {
+declare 352 {deprecated {Use Tcl_GetCharLength}} {
int Tcl_UniCharLen(const Tcl_UniChar *uniStr)
}
-declare 353 {
+declare 353 {deprecated {Use Tcl_UtfNcmp}} {
int Tcl_UniCharNcmp(const Tcl_UniChar *ucs, const Tcl_UniChar *uct,
unsigned long numChars)
}
@@ -1338,10 +1338,10 @@ declare 376 {
declare 377 {
void Tcl_RegExpGetInfo(Tcl_RegExp regexp, Tcl_RegExpInfo *infoPtr)
}
-declare 378 {
+declare 378 {deprecated {Use Tcl_UniCharToUtfDString}} {
Tcl_Obj *Tcl_NewUnicodeObj(const Tcl_UniChar *unicode, int numChars)
}
-declare 379 {
+declare 379 {deprecated {Use Tcl_UniCharToUtfDString}} {
void Tcl_SetUnicodeObj(Tcl_Obj *objPtr, const Tcl_UniChar *unicode,
int numChars)
}
@@ -1357,7 +1357,7 @@ declare 382 {deprecated {No longer in use, changed to macro}} {
declare 383 {
Tcl_Obj *Tcl_GetRange(Tcl_Obj *objPtr, int first, int last)
}
-declare 384 {
+declare 384 {deprecated {Use Tcl_AppendStringsToObj}} {
void Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr, const Tcl_UniChar *unicode,
int length)
}
@@ -1483,11 +1483,11 @@ declare 417 {
declare 418 {
int Tcl_IsChannelExisting(const char *channelName)
}
-declare 419 {
+declare 419 {deprecated {Use Tcl_UtfNcasecmp}} {
int Tcl_UniCharNcasecmp(const Tcl_UniChar *ucs, const Tcl_UniChar *uct,
unsigned long numChars)
}
-declare 420 {
+declare 420 {deprecated {Use Tcl_StringCaseMatch}} {
int Tcl_UniCharCaseMatch(const Tcl_UniChar *uniStr,
const Tcl_UniChar *uniPattern, int nocase)
}
@@ -1541,7 +1541,7 @@ declare 433 {
}
# introduced in 8.4a3
-declare 434 {
+declare 434 {deprecated {Use Tcl_UtfToUniCharDString}} {
Tcl_UniChar *Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr, int *lengthPtr)
}
diff --git a/generic/tcl.h b/generic/tcl.h
index 3df2c2d..1c3115d 100644
--- a/generic/tcl.h
+++ b/generic/tcl.h
@@ -2144,7 +2144,7 @@ typedef struct Tcl_EncodingType {
*/
#ifndef TCL_UTF_MAX
-#define TCL_UTF_MAX 4
+#define TCL_UTF_MAX 3
#endif
/*
@@ -2152,7 +2152,7 @@ typedef struct Tcl_EncodingType {
* reflected in regcustom.h.
*/
-#if TCL_UTF_MAX > 4
+#if TCL_UTF_MAX > 3
/*
* int isn't 100% accurate as it should be a strict 4-byte value
* (perhaps wchar_t). 64-bit systems may have troubles. The size of this
@@ -2342,10 +2342,10 @@ typedef int (Tcl_NRPostProc) (ClientData data[], Tcl_Interp *interp,
/*
*----------------------------------------------------------------------------
* The following constant is used to test for older versions of Tcl in the
- * stubs tables. If TCL_UTF_MAX>4 use a different value.
+ * stubs tables.
*/
-#define TCL_STUB_MAGIC ((int) 0xFCA3BACF + (TCL_UTF_MAX>4))
+#define TCL_STUB_MAGIC ((int) 0xFCA3BACF)
/*
* The following function is required to be defined in all stubs aware
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index a754a09..8706fb6 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -1221,7 +1221,7 @@ Tcl_SplitObjCmd(
len = TclUtfToUniChar(stringPtr, &ch);
fullchar = ch;
-#if TCL_UTF_MAX <= 4
+#if TCL_UTF_MAX <= 3
if ((ch >= 0xD800) && (len < 3)) {
len += TclUtfToUniChar(stringPtr + len, &ch);
fullchar = (((fullchar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000;
@@ -1911,7 +1911,7 @@ StringIsCmd(
int fullchar;
length2 = TclUtfToUniChar(string1, &ch);
fullchar = ch;
-#if TCL_UTF_MAX <= 4
+#if TCL_UTF_MAX <= 3
if ((ch >= 0xD800) && (length2 < 3)) {
length2 += TclUtfToUniChar(string1 + length2, &ch);
fullchar = (((fullchar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000;
diff --git a/generic/tclDecls.h b/generic/tclDecls.h
index eddd385..c993602 100644
--- a/generic/tclDecls.h
+++ b/generic/tclDecls.h
@@ -1063,9 +1063,11 @@ EXTERN int Tcl_UniCharIsUpper(int ch);
/* 351 */
EXTERN int Tcl_UniCharIsWordChar(int ch);
/* 352 */
-EXTERN int Tcl_UniCharLen(const Tcl_UniChar *uniStr);
+TCL_DEPRECATED("Use Tcl_GetCharLength")
+int Tcl_UniCharLen(const Tcl_UniChar *uniStr);
/* 353 */
-EXTERN int Tcl_UniCharNcmp(const Tcl_UniChar *ucs,
+TCL_DEPRECATED("Use Tcl_UtfNcmp")
+int Tcl_UniCharNcmp(const Tcl_UniChar *ucs,
const Tcl_UniChar *uct,
unsigned long numChars);
/* 354 */
@@ -1141,10 +1143,12 @@ EXTERN int Tcl_RegExpExecObj(Tcl_Interp *interp,
EXTERN void Tcl_RegExpGetInfo(Tcl_RegExp regexp,
Tcl_RegExpInfo *infoPtr);
/* 378 */
-EXTERN Tcl_Obj * Tcl_NewUnicodeObj(const Tcl_UniChar *unicode,
+TCL_DEPRECATED("Use Tcl_UniCharToUtfDString")
+Tcl_Obj * Tcl_NewUnicodeObj(const Tcl_UniChar *unicode,
int numChars);
/* 379 */
-EXTERN void Tcl_SetUnicodeObj(Tcl_Obj *objPtr,
+TCL_DEPRECATED("Use Tcl_UniCharToUtfDString")
+void Tcl_SetUnicodeObj(Tcl_Obj *objPtr,
const Tcl_UniChar *unicode, int numChars);
/* 380 */
EXTERN int Tcl_GetCharLength(Tcl_Obj *objPtr);
@@ -1156,7 +1160,8 @@ Tcl_UniChar * Tcl_GetUnicode(Tcl_Obj *objPtr);
/* 383 */
EXTERN Tcl_Obj * Tcl_GetRange(Tcl_Obj *objPtr, int first, int last);
/* 384 */
-EXTERN void Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr,
+TCL_DEPRECATED("Use Tcl_AppendStringsToObj")
+void Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr,
const Tcl_UniChar *unicode, int length);
/* 385 */
EXTERN int Tcl_RegExpMatchObj(Tcl_Interp *interp,
@@ -1250,11 +1255,13 @@ EXTERN void Tcl_ClearChannelHandlers(Tcl_Channel channel);
/* 418 */
EXTERN int Tcl_IsChannelExisting(const char *channelName);
/* 419 */
-EXTERN int Tcl_UniCharNcasecmp(const Tcl_UniChar *ucs,
+TCL_DEPRECATED("Use Tcl_UtfNcasecmp")
+int Tcl_UniCharNcasecmp(const Tcl_UniChar *ucs,
const Tcl_UniChar *uct,
unsigned long numChars);
/* 420 */
-EXTERN int Tcl_UniCharCaseMatch(const Tcl_UniChar *uniStr,
+TCL_DEPRECATED("Use Tcl_StringCaseMatch")
+int Tcl_UniCharCaseMatch(const Tcl_UniChar *uniStr,
const Tcl_UniChar *uniPattern, int nocase);
/* 421 */
EXTERN Tcl_HashEntry * Tcl_FindHashEntry(Tcl_HashTable *tablePtr,
@@ -1297,7 +1304,8 @@ EXTERN int Tcl_AttemptSetObjLength(Tcl_Obj *objPtr, int length);
/* 433 */
EXTERN Tcl_ThreadId Tcl_GetChannelThread(Tcl_Channel channel);
/* 434 */
-EXTERN Tcl_UniChar * Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr,
+TCL_DEPRECATED("Use Tcl_UtfToUniCharDString")
+Tcl_UniChar * Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr,
int *lengthPtr);
/* 435 */
TCL_DEPRECATED("")
@@ -2300,8 +2308,8 @@ typedef struct TclStubs {
int (*tcl_UniCharIsSpace) (int ch); /* 349 */
int (*tcl_UniCharIsUpper) (int ch); /* 350 */
int (*tcl_UniCharIsWordChar) (int ch); /* 351 */
- int (*tcl_UniCharLen) (const Tcl_UniChar *uniStr); /* 352 */
- int (*tcl_UniCharNcmp) (const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned long numChars); /* 353 */
+ TCL_DEPRECATED_API("Use Tcl_GetCharLength") int (*tcl_UniCharLen) (const Tcl_UniChar *uniStr); /* 352 */
+ TCL_DEPRECATED_API("Use Tcl_UtfNcmp") int (*tcl_UniCharNcmp) (const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned long numChars); /* 353 */
char * (*tcl_Char16ToUtfDString) (const unsigned short *uniStr, int uniLength, Tcl_DString *dsPtr); /* 354 */
unsigned short * (*tcl_UtfToChar16DString) (const char *src, int length, Tcl_DString *dsPtr); /* 355 */
Tcl_RegExp (*tcl_GetRegExpFromObj) (Tcl_Interp *interp, Tcl_Obj *patObj, int flags); /* 356 */
@@ -2326,13 +2334,13 @@ typedef struct TclStubs {
int (*tcl_UniCharIsPunct) (int ch); /* 375 */
int (*tcl_RegExpExecObj) (Tcl_Interp *interp, Tcl_RegExp regexp, Tcl_Obj *textObj, int offset, int nmatches, int flags); /* 376 */
void (*tcl_RegExpGetInfo) (Tcl_RegExp regexp, Tcl_RegExpInfo *infoPtr); /* 377 */
- Tcl_Obj * (*tcl_NewUnicodeObj) (const Tcl_UniChar *unicode, int numChars); /* 378 */
- void (*tcl_SetUnicodeObj) (Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int numChars); /* 379 */
+ TCL_DEPRECATED_API("Use Tcl_UniCharToUtfDString") Tcl_Obj * (*tcl_NewUnicodeObj) (const Tcl_UniChar *unicode, int numChars); /* 378 */
+ TCL_DEPRECATED_API("Use Tcl_UniCharToUtfDString") void (*tcl_SetUnicodeObj) (Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int numChars); /* 379 */
int (*tcl_GetCharLength) (Tcl_Obj *objPtr); /* 380 */
int (*tcl_GetUniChar) (Tcl_Obj *objPtr, int index); /* 381 */
TCL_DEPRECATED_API("No longer in use, changed to macro") Tcl_UniChar * (*tcl_GetUnicode) (Tcl_Obj *objPtr); /* 382 */
Tcl_Obj * (*tcl_GetRange) (Tcl_Obj *objPtr, int first, int last); /* 383 */
- void (*tcl_AppendUnicodeToObj) (Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int length); /* 384 */
+ TCL_DEPRECATED_API("Use Tcl_AppendStringsToObj") void (*tcl_AppendUnicodeToObj) (Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int length); /* 384 */
int (*tcl_RegExpMatchObj) (Tcl_Interp *interp, Tcl_Obj *textObj, Tcl_Obj *patternObj); /* 385 */
void (*tcl_SetNotifier) (Tcl_NotifierProcs *notifierProcPtr); /* 386 */
Tcl_Mutex * (*tcl_GetAllocMutex) (void); /* 387 */
@@ -2367,8 +2375,8 @@ typedef struct TclStubs {
void (*tcl_SpliceChannel) (Tcl_Channel channel); /* 416 */
void (*tcl_ClearChannelHandlers) (Tcl_Channel channel); /* 417 */
int (*tcl_IsChannelExisting) (const char *channelName); /* 418 */
- int (*tcl_UniCharNcasecmp) (const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned long numChars); /* 419 */
- int (*tcl_UniCharCaseMatch) (const Tcl_UniChar *uniStr, const Tcl_UniChar *uniPattern, int nocase); /* 420 */
+ TCL_DEPRECATED_API("Use Tcl_UtfNcasecmp") int (*tcl_UniCharNcasecmp) (const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned long numChars); /* 419 */
+ TCL_DEPRECATED_API("Use Tcl_StringCaseMatch") int (*tcl_UniCharCaseMatch) (const Tcl_UniChar *uniStr, const Tcl_UniChar *uniPattern, int nocase); /* 420 */
Tcl_HashEntry * (*tcl_FindHashEntry) (Tcl_HashTable *tablePtr, const void *key); /* 421 */
Tcl_HashEntry * (*tcl_CreateHashEntry) (Tcl_HashTable *tablePtr, const void *key, int *newPtr); /* 422 */
void (*tcl_InitCustomHashTable) (Tcl_HashTable *tablePtr, int keyType, const Tcl_HashKeyType *typePtr); /* 423 */
@@ -2382,7 +2390,7 @@ typedef struct TclStubs {
char * (*tcl_AttemptDbCkrealloc) (char *ptr, unsigned int size, const char *file, int line); /* 431 */
int (*tcl_AttemptSetObjLength) (Tcl_Obj *objPtr, int length); /* 432 */
Tcl_ThreadId (*tcl_GetChannelThread) (Tcl_Channel channel); /* 433 */
- Tcl_UniChar * (*tcl_GetUnicodeFromObj) (Tcl_Obj *objPtr, int *lengthPtr); /* 434 */
+ TCL_DEPRECATED_API("Use Tcl_UtfToUniCharDString") Tcl_UniChar * (*tcl_GetUnicodeFromObj) (Tcl_Obj *objPtr, int *lengthPtr); /* 434 */
TCL_DEPRECATED_API("") int (*tcl_GetMathFuncInfo) (Tcl_Interp *interp, const char *name, int *numArgsPtr, Tcl_ValueType **argTypesPtr, Tcl_MathProc **procPtr, ClientData *clientDataPtr); /* 435 */
TCL_DEPRECATED_API("") Tcl_Obj * (*tcl_ListMathFuncs) (Tcl_Interp *interp, const char *pattern); /* 436 */
Tcl_Obj * (*tcl_SubstObj) (Tcl_Interp *interp, Tcl_Obj *objPtr, int flags); /* 437 */
@@ -4110,7 +4118,7 @@ extern const TclStubs *tclStubsPtr;
#undef Tcl_StringMatch
#define Tcl_StringMatch(str, pattern) Tcl_StringCaseMatch((str), (pattern), 0)
-#if TCL_UTF_MAX <= 4
+#if TCL_UTF_MAX <= 3
# undef Tcl_UniCharToUtfDString
# define Tcl_UniCharToUtfDString Tcl_Char16ToUtfDString
# undef Tcl_UtfToUniCharDString
diff --git a/generic/tclDisassemble.c b/generic/tclDisassemble.c
index 76a4d46..bbe1a97 100644
--- a/generic/tclDisassemble.c
+++ b/generic/tclDisassemble.c
@@ -903,7 +903,7 @@ PrintSourceToObj(
i += 2;
continue;
default:
-#if TCL_UTF_MAX > 4
+#if TCL_UTF_MAX > 3
if (ch > 0xffff) {
Tcl_AppendPrintfToObj(appendObj, "\\U%08x", ch);
i += 10;
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index a88c1a7..e7cc344 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -2418,7 +2418,7 @@ UtfToUtfProc(
int len = TclUtfToUniChar(src, chPtr);
src += len;
dst += Tcl_UniCharToUtf(*chPtr, dst);
-#if TCL_UTF_MAX <= 4
+#if TCL_UTF_MAX <= 3
if ((*chPtr >= 0xD800) && (len < 3)) {
src += TclUtfToUniChar(src + len, chPtr);
dst += Tcl_UniCharToUtf(*chPtr, dst);
@@ -2609,7 +2609,7 @@ UtfToUtf16Proc(
*/
if (clientData) {
-#if TCL_UTF_MAX > 4
+#if TCL_UTF_MAX > 3
if (*chPtr <= 0xFFFF) {
*dst++ = (*chPtr & 0xFF);
*dst++ = (*chPtr >> 8);
@@ -2624,7 +2624,7 @@ UtfToUtf16Proc(
*dst++ = (*chPtr >> 8);
#endif
} else {
-#if TCL_UTF_MAX > 4
+#if TCL_UTF_MAX > 3
if (*chPtr <= 0xFFFF) {
*dst++ = (*chPtr >> 8);
*dst++ = (*chPtr & 0xFF);
@@ -2691,7 +2691,7 @@ UtfToUcs2Proc(
{
const char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd;
int result, numChars;
-#if TCL_UTF_MAX <= 4
+#if TCL_UTF_MAX <= 3
int len;
#endif
Tcl_UniChar ch = 0;
@@ -2721,7 +2721,7 @@ UtfToUcs2Proc(
result = TCL_CONVERT_NOSPACE;
break;
}
-#if TCL_UTF_MAX <= 4
+#if TCL_UTF_MAX <= 3
src += (len = TclUtfToUniChar(src, &ch));
if ((ch >= 0xD800) && (len < 3)) {
src += TclUtfToUniChar(src, &ch);
@@ -2945,7 +2945,7 @@ TableFromUtfProc(
}
len = TclUtfToUniChar(src, &ch);
-#if TCL_UTF_MAX > 4
+#if TCL_UTF_MAX > 3
/*
* This prevents a crash condition. More evaluation is required for
* full support of int Tcl_UniChar. [Bug 1004065]
@@ -3156,7 +3156,7 @@ Iso88591FromUtfProc(
*/
if (ch > 0xff
-#if TCL_UTF_MAX <= 4
+#if TCL_UTF_MAX <= 3
|| ((ch >= 0xD800) && (len < 3))
#endif
) {
@@ -3164,7 +3164,7 @@ Iso88591FromUtfProc(
result = TCL_CONVERT_UNKNOWN;
break;
}
-#if TCL_UTF_MAX <= 4
+#if TCL_UTF_MAX <= 3
if ((ch >= 0xD800) && (len < 3)) len = 4;
#endif
/*
diff --git a/generic/tclInt.h b/generic/tclInt.h
index 4d73469..26f1840 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -4620,7 +4620,7 @@ MODULE_SCOPE void TclDbInitNewObj(Tcl_Obj *objPtr, const char *file,
*----------------------------------------------------------------
*/
-#if TCL_UTF_MAX > 4
+#if TCL_UTF_MAX > 3
#define TclUtfToUniChar(str, chPtr) \
((((unsigned char) *(str)) < 0x80) ? \
((*(chPtr) = (unsigned char) *(str)), 1) \
diff --git a/generic/tclMain.c b/generic/tclMain.c
index 4a66793..b7d740a 100644
--- a/generic/tclMain.c
+++ b/generic/tclMain.c
@@ -53,15 +53,6 @@
# define _tcscmp strcmp
#endif
-/*
- * Further on, in UNICODE mode we just use Tcl_NewUnicodeObj, otherwise
- * NewNativeObj is needed (which provides proper conversion from native
- * encoding to UTF-8).
- */
-
-#if defined(UNICODE) && (TCL_UTF_MAX <= 4)
-# define NewNativeObj Tcl_NewUnicodeObj
-#else /* !UNICODE || (TCL_UTF_MAX > 4) */
static inline Tcl_Obj *
NewNativeObj(
TCHAR *string,
@@ -77,7 +68,6 @@ NewNativeObj(
#endif
return TclDStringToObj(&ds);
}
-#endif /* !UNICODE || (TCL_UTF_MAX > 4) */
/*
* Declarations for various library functions and variables (don't want to
diff --git a/generic/tclScan.c b/generic/tclScan.c
index 74ec2da..916809f 100644
--- a/generic/tclScan.c
+++ b/generic/tclScan.c
@@ -881,7 +881,7 @@ Tcl_ScanObjCmd(
offset = TclUtfToUniChar(string, &sch);
i = (int)sch;
-#if TCL_UTF_MAX <= 4
+#if TCL_UTF_MAX <= 3
if ((sch >= 0xD800) && (offset < 3)) {
offset += TclUtfToUniChar(string+offset, &sch);
i = (((i<<10) & 0x0FFC00) + 0x10000) + (sch & 0x3FF);
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index ce687c6..1b4f225 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -568,7 +568,7 @@ Tcl_GetUniChar(
return -1;
}
ch = stringPtr->unicode[index];
-#if TCL_UTF_MAX <= 4
+#if TCL_UTF_MAX <= 3
/* See: bug [11ae2be95dac9417] */
if ((ch & 0xF800) == 0xD800) {
if (ch & 0x400) {
@@ -752,7 +752,7 @@ Tcl_GetRange(
if (last < first) {
return Tcl_NewObj();
}
-#if TCL_UTF_MAX <= 4
+#if TCL_UTF_MAX <= 3
/* See: bug [11ae2be95dac9417] */
if ((first > 0) && ((stringPtr->unicode[first] & 0xFC00) == 0xDC00)
&& ((stringPtr->unicode[first-1] & 0xFC00) == 0xD800)) {
diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c
index 1c3f094..01b0303 100644
--- a/generic/tclStubInit.c
+++ b/generic/tclStubInit.c
@@ -39,6 +39,14 @@
#undef Tcl_NewObj
#undef Tcl_NewStringObj
#undef Tcl_GetUnicode
+#undef Tcl_GetUnicodeFromObj
+#undef Tcl_AppendUnicodeToObj
+#undef Tcl_NewUnicodeObj
+#undef Tcl_SetUnicodeObj
+#undef Tcl_UniCharNcasecmp
+#undef Tcl_UniCharCaseMatch
+#undef Tcl_UniCharLen
+#undef Tcl_UniCharNcmp
#undef Tcl_DumpActiveMemory
#undef Tcl_ValidateAllMemory
#undef Tcl_FindHashEntry
@@ -419,7 +427,15 @@ static int uniCharNcasecmp(const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsig
# define Tcl_SetExitProc 0
# define Tcl_SetPanicProc 0
# define Tcl_FindExecutable 0
+# define Tcl_GetUnicodeFromObj 0
# define Tcl_GetUnicode 0
+# define Tcl_AppendUnicodeToObj 0
+# define Tcl_NewUnicodeObj 0
+# define Tcl_SetUnicodeObj 0
+# define Tcl_UniCharNcasecmp 0
+# define Tcl_UniCharCaseMatch 0
+# define Tcl_UniCharLen 0
+# define Tcl_UniCharNcmp 0
# define TclOldFreeObj 0
# undef Tcl_StringMatch
# define Tcl_StringMatch 0
diff --git a/generic/tclTestObj.c b/generic/tclTestObj.c
index c9e4a6f..699c503 100644
--- a/generic/tclTestObj.c
+++ b/generic/tclTestObj.c
@@ -1178,8 +1178,7 @@ TeststringobjCmd(
Tcl_Obj **varPtr;
static const char *const options[] = {
"append", "appendstrings", "get", "get2", "length", "length2",
- "set", "set2", "setlength", "maxchars", "getunicode",
- "appendself", "appendself2", NULL
+ "set", "set2", "setlength", "maxchars", "appendself", NULL
};
if (objc < 3) {
@@ -1344,13 +1343,7 @@ TeststringobjCmd(
}
Tcl_SetIntObj(Tcl_GetObjResult(interp), length);
break;
- case 10: /* getunicode */
- if (objc != 3) {
- goto wrongNumArgs;
- }
- Tcl_GetUnicode(varPtr[varIndex]);
- break;
- case 11: /* appendself */
+ case 10: /* appendself */
if (objc != 4) {
goto wrongNumArgs;
}
@@ -1381,37 +1374,6 @@ TeststringobjCmd(
Tcl_AppendToObj(varPtr[varIndex], string + i, length - i);
Tcl_SetObjResult(interp, varPtr[varIndex]);
break;
- case 12: /* appendself2 */
- if (objc != 4) {
- goto wrongNumArgs;
- }
- if (varPtr[varIndex] == NULL) {
- SetVarToObj(varPtr, varIndex, Tcl_NewObj());
- }
-
- /*
- * If the object bound to variable "varIndex" is shared, we must
- * "copy on write" and append to a copy of the object.
- */
-
- if (Tcl_IsShared(varPtr[varIndex])) {
- SetVarToObj(varPtr, varIndex, Tcl_DuplicateObj(varPtr[varIndex]));
- }
-
- unicode = Tcl_GetUnicodeFromObj(varPtr[varIndex], &length);
-
- if (Tcl_GetIntFromObj(interp, objv[3], &i) != TCL_OK) {
- return TCL_ERROR;
- }
- if ((i < 0) || (i > length)) {
- Tcl_SetObjResult(interp, Tcl_NewStringObj(
- "index value out of range", -1));
- return TCL_ERROR;
- }
-
- Tcl_AppendUnicodeToObj(varPtr[varIndex], unicode + i, length - i);
- Tcl_SetObjResult(interp, varPtr[varIndex]);
- break;
}
return TCL_OK;
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 56b6d4d..2870c44 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -787,7 +787,7 @@ Tcl_UtfFindFirst(
while (1) {
len = TclUtfToUniChar(src, &find);
fullchar = find;
-#if TCL_UTF_MAX <= 4
+#if TCL_UTF_MAX <= 3
if ((ch >= 0xD800) && (len < 3)) {
len += TclUtfToUniChar(src + len, &find);
fullchar = (((fullchar & 0x3ff) << 10) | (find & 0x3ff)) + 0x10000;
@@ -835,7 +835,7 @@ Tcl_UtfFindLast(
while (1) {
len = TclUtfToUniChar(src, &find);
fullchar = find;
-#if TCL_UTF_MAX <= 4
+#if TCL_UTF_MAX <= 3
if ((ch >= 0xD800) && (len < 3)) {
len += TclUtfToUniChar(src + len, &find);
fullchar = (((fullchar & 0x3ff) << 10) | (find & 0x3ff)) + 0x10000;
@@ -878,7 +878,7 @@ Tcl_UtfNext(
Tcl_UniChar ch = 0;
int len = TclUtfToUniChar(src, &ch);
-#if TCL_UTF_MAX <= 4
+#if TCL_UTF_MAX <= 3
if ((ch >= 0xD800) && (len < 3)) {
len += TclUtfToUniChar(src + len, &ch);
}
@@ -960,19 +960,19 @@ Tcl_UniCharAtIndex(
{
Tcl_UniChar ch = 0;
int fullchar = 0;
-#if TCL_UTF_MAX <= 4
+#if TCL_UTF_MAX <= 3
int len = 0;
#endif
while (index-- >= 0) {
-#if TCL_UTF_MAX <= 4
+#if TCL_UTF_MAX <= 3
src += (len = TclUtfToUniChar(src, &ch));
#else
src += TclUtfToUniChar(src, &ch);
#endif
}
fullchar = ch;
-#if TCL_UTF_MAX <= 4
+#if TCL_UTF_MAX <= 3
if ((ch >= 0xD800) && (len < 3)) {
/* If last Tcl_UniChar was a high surrogate, combine with low surrogate */
(void)TclUtfToUniChar(src, &ch);
@@ -988,7 +988,7 @@ Tcl_UniCharAtIndex(
* Tcl_UtfAtIndex --
*
* Returns a pointer to the specified character (not byte) position in
- * the UTF-8 string. If TCL_UTF_MAX <= 4, characters > U+FFFF count as
+ * the UTF-8 string. If TCL_UTF_MAX <= 3, characters > U+FFFF count as
* 2 positions, but then the pointer should never be placed between
* the two positions.
*
@@ -1013,7 +1013,7 @@ Tcl_UtfAtIndex(
len = TclUtfToUniChar(src, &ch);
src += len;
}
-#if TCL_UTF_MAX <= 4
+#if TCL_UTF_MAX <= 3
if ((ch >= 0xD800) && (len < 3)) {
/* Index points at character following high Surrogate */
src += TclUtfToUniChar(src, &ch);
@@ -1110,7 +1110,7 @@ Tcl_UtfToUpper(
while (*src) {
len = TclUtfToUniChar(src, &ch);
upChar = ch;
-#if TCL_UTF_MAX <= 4
+#if TCL_UTF_MAX <= 3
if ((ch >= 0xD800) && (len < 3)) {
len += TclUtfToUniChar(src + len, &ch);
/* Combine surrogates */
@@ -1172,7 +1172,7 @@ Tcl_UtfToLower(
while (*src) {
len = TclUtfToUniChar(src, &ch);
lowChar = ch;
-#if TCL_UTF_MAX <= 4
+#if TCL_UTF_MAX <= 3
if ((ch >= 0xD800) && (len < 3)) {
len += TclUtfToUniChar(src + len, &ch);
/* Combine surrogates */
@@ -1237,7 +1237,7 @@ Tcl_UtfToTitle(
if (*src) {
len = TclUtfToUniChar(src, &ch);
titleChar = ch;
-#if TCL_UTF_MAX <= 4
+#if TCL_UTF_MAX <= 3
if ((ch >= 0xD800) && (len < 3)) {
len += TclUtfToUniChar(src + len, &ch);
/* Combine surrogates */
@@ -1257,7 +1257,7 @@ Tcl_UtfToTitle(
while (*src) {
len = TclUtfToUniChar(src, &ch);
lowChar = ch;
-#if TCL_UTF_MAX <= 4
+#if TCL_UTF_MAX <= 3
if ((ch >= 0xD800) && (len < 3)) {
len += TclUtfToUniChar(src + len, &ch);
/* Combine surrogates */
@@ -1369,7 +1369,7 @@ Tcl_UtfNcmp(
cs += TclUtfToUniChar(cs, &ch1);
ct += TclUtfToUniChar(ct, &ch2);
if (ch1 != ch2) {
-#if TCL_UTF_MAX <= 4
+#if TCL_UTF_MAX <= 3
/* Surrogates always report higher than non-surrogates */
if (((ch1 & 0xFC00) == 0xD800)) {
if ((ch2 & 0xFC00) != 0xD800) {
@@ -1420,7 +1420,7 @@ Tcl_UtfNcasecmp(
cs += TclUtfToUniChar(cs, &ch1);
ct += TclUtfToUniChar(ct, &ch2);
if (ch1 != ch2) {
-#if TCL_UTF_MAX <= 4
+#if TCL_UTF_MAX <= 3
/* Surrogates always report higher than non-surrogates */
if (((ch1 & 0xFC00) == 0xD800)) {
if ((ch2 & 0xFC00) != 0xD800) {
@@ -1469,7 +1469,7 @@ TclUtfCmp(
cs += TclUtfToUniChar(cs, &ch1);
ct += TclUtfToUniChar(ct, &ch2);
if (ch1 != ch2) {
-#if TCL_UTF_MAX <= 4
+#if TCL_UTF_MAX <= 3
/* Surrogates always report higher than non-surrogates */
if (((ch1 & 0xFC00) == 0xD800)) {
if ((ch2 & 0xFC00) != 0xD800) {
@@ -1515,7 +1515,7 @@ TclUtfCasecmp(
cs += TclUtfToUniChar(cs, &ch1);
ct += TclUtfToUniChar(ct, &ch2);
if (ch1 != ch2) {
-#if TCL_UTF_MAX <= 4
+#if TCL_UTF_MAX <= 3
/* Surrogates always report higher than non-surrogates */
if (((ch1 & 0xFC00) == 0xD800)) {
if ((ch2 & 0xFC00) != 0xD800) {
diff --git a/tests/stringObj.test b/tests/stringObj.test
index a78b5f8..cc9d123 100644
--- a/tests/stringObj.test
+++ b/tests/stringObj.test
@@ -439,9 +439,9 @@ test stringObj-13.8 {Tcl_GetCharLength with identity nulls} {testobj testbytestr
test stringObj-14.1 {Tcl_SetObjLength on pure unicode object} testobj {
teststringobj set 1 foo
- teststringobj getunicode 1
+ teststringobj maxchars 1
teststringobj append 1 bar -1
- teststringobj getunicode 1
+ teststringobj maxchars 1
teststringobj append 1 bar -1
teststringobj setlength 1 0
teststringobj append 1 bar -1
@@ -464,22 +464,6 @@ test stringObj-15.4 {Tcl_Append*ToObj: self appends} testobj {
teststringobj set 1 foo
teststringobj appendself 1 3
} foo
-test stringObj-15.5 {Tcl_Append*ToObj: self appends} testobj {
- teststringobj set 1 foo
- teststringobj appendself2 1 0
-} foofoo
-test stringObj-15.6 {Tcl_Append*ToObj: self appends} testobj {
- teststringobj set 1 foo
- teststringobj appendself2 1 1
-} foooo
-test stringObj-15.7 {Tcl_Append*ToObj: self appends} testobj {
- teststringobj set 1 foo
- teststringobj appendself2 1 2
-} fooo
-test stringObj-15.8 {Tcl_Append*ToObj: self appends} testobj {
- teststringobj set 1 foo
- teststringobj appendself2 1 3
-} foo
if {[testConstraint testobj]} {
testobj freeallvars
diff --git a/tests/utf.test b/tests/utf.test
index dc1a435..f75d19e 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -471,8 +471,8 @@ test utf-25.1 {Tcl_UniCharNcasecmp} -constraints teststringobj \
-body {
teststringobj set 1 a
teststringobj set 2 b
- teststringobj getunicode 1
- teststringobj getunicode 2
+ teststringobj maxchars 1
+ teststringobj maxchars 2
string compare -nocase [teststringobj get 1] [teststringobj get 2]
} \
-cleanup {
@@ -486,8 +486,8 @@ test utf-25.2 {Tcl_UniCharNcasecmp} -constraints teststringobj \
-body {
teststringobj set 1 b
teststringobj set 2 a
- teststringobj getunicode 1
- teststringobj getunicode 2
+ teststringobj maxchars 1
+ teststringobj maxchars 2
string compare -nocase [teststringobj get 1] [teststringobj get 2]
} \
-cleanup {
@@ -501,8 +501,8 @@ test utf-25.3 {Tcl_UniCharNcasecmp} -constraints teststringobj \
-body {
teststringobj set 1 B
teststringobj set 2 a
- teststringobj getunicode 1
- teststringobj getunicode 2
+ teststringobj maxchars 1
+ teststringobj maxchars 2
string compare -nocase [teststringobj get 1] [teststringobj get 2]
} \
-cleanup {
@@ -517,8 +517,8 @@ test utf-25.4 {Tcl_UniCharNcasecmp} -constraints teststringobj \
-body {
teststringobj set 1 aBcB
teststringobj set 2 abca
- teststringobj getunicode 1
- teststringobj getunicode 2
+ teststringobj maxchars 1
+ teststringobj maxchars 2
string compare -nocase [teststringobj get 1] [teststringobj get 2]
} \
-cleanup {
diff --git a/win/rules.vc b/win/rules.vc
index 3fa0704..34ac230 100644
--- a/win/rules.vc
+++ b/win/rules.vc
@@ -688,7 +688,7 @@ LINKERFLAGS = $(LINKERFLAGS) -ltcg
# configuration (ignored for Tcl itself)
# _USE_64BIT_TIME_T - forces a build using 64-bit time_t for 32-bit build
# (CRT library should support this)
-# TCL_UTF_MAX=6 - forces a build using 32-bit Tcl_UniChar in stead of 16-bit.
+# TCL_UTF_MAX=4 - forces a build using 32-bit Tcl_UniChar in stead of 16-bit.
# Further, LINKERFLAGS are modified based on above.
# Default values for all the above
@@ -755,7 +755,7 @@ _USE_64BIT_TIME_T = 1
!if [nmakehlp -f $(OPTS) "utfmax"]
!message *** Force 32-bit Tcl_UniChar
-TCL_UTF_MAX = 6
+TCL_UTF_MAX = 4
!endif
# Yes, it's weird that the "symbols" option controls DEBUG and
@@ -1321,8 +1321,8 @@ OPTDEFINES = $(OPTDEFINES) -DNO_STRTOI64
!if "$(_USE_64BIT_TIME_T)" == "1"
OPTDEFINES = $(OPTDEFINES) -D_USE_64BIT_TIME_T
!endif
-!if "$(TCL_UTF_MAX)" == "6"
-OPTDEFINES = $(OPTDEFINES) -DTCL_UTF_MAX=6
+!if "$(TCL_UTF_MAX)" == "4"
+OPTDEFINES = $(OPTDEFINES) -DTCL_UTF_MAX=4
!endif
# _ATL_XP_TARGETING - Newer SDK's need this to build for XP