summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.github/workflows/linux-build.yml1
-rw-r--r--.github/workflows/win-build.yml2
-rw-r--r--.travis.yml39
-rw-r--r--generic/regcustom.h6
-rw-r--r--generic/tcl.h6
-rw-r--r--generic/tclBinary.c8
-rw-r--r--generic/tclCmdIL.c4
-rw-r--r--generic/tclCmdMZ.c47
-rw-r--r--generic/tclCompExpr.c4
-rw-r--r--generic/tclDisassemble.c6
-rw-r--r--generic/tclEncoding.c43
-rw-r--r--generic/tclEvent.c3
-rw-r--r--generic/tclExecute.c14
-rw-r--r--generic/tclInt.h59
-rw-r--r--generic/tclObj.c4
-rw-r--r--generic/tclParse.c4
-rw-r--r--generic/tclRegexp.c2
-rw-r--r--generic/tclScan.c2
-rw-r--r--generic/tclStringObj.c168
-rw-r--r--generic/tclStubInit.c26
-rw-r--r--generic/tclTest.c3
-rw-r--r--generic/tclUtf.c156
-rw-r--r--generic/tclUtil.c34
-rw-r--r--tests/utf.test366
-rw-r--r--win/makefile.vc3
-rw-r--r--win/rules.vc9
26 files changed, 221 insertions, 798 deletions
diff --git a/.github/workflows/linux-build.yml b/.github/workflows/linux-build.yml
index e3ad637..96ee7d9 100644
--- a/.github/workflows/linux-build.yml
+++ b/.github/workflows/linux-build.yml
@@ -16,7 +16,6 @@ jobs:
matrix:
cfgopt:
- ""
- - "CFLAGS=-DTCL_UTF_MAX=3"
- "CFLAGS=-DTCL_NO_DEPRECATED=1"
- "--disable-shared"
- "--enable-symbols"
diff --git a/.github/workflows/win-build.yml b/.github/workflows/win-build.yml
index 13a1316..2b772f9 100644
--- a/.github/workflows/win-build.yml
+++ b/.github/workflows/win-build.yml
@@ -22,7 +22,6 @@ jobs:
matrix:
cfgopt:
- ""
- - "OPTS=utf16"
- "CHECKS=nodep"
- "OPTS=static"
- "OPTS=symbols"
@@ -61,7 +60,6 @@ jobs:
matrix:
cfgopt:
- ""
- - "CFLAGS=-DTCL_UTF_MAX=3"
- "CFLAGS=-DTCL_NO_DEPRECATED=1"
- "--disable-shared"
- "--enable-symbols"
diff --git a/.travis.yml b/.travis.yml
index 02fd9a3..b63be12 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -20,13 +20,6 @@ jobs:
compiler: gcc
env:
- BUILD_DIR=unix
- - name: "Linux/GCC/Shared: UTF_MAX=4"
- os: linux
- dist: focal
- compiler: gcc
- env:
- - BUILD_DIR=unix
- - CFGOPT=CFLAGS=-DTCL_UTF_MAX=4
- name: "Linux/GCC/Shared: NO_DEPRECATED"
os: linux
dist: focal
@@ -215,15 +208,6 @@ jobs:
script:
- cmd.exe //C vcvarsall.bat x64 '&&' nmake '-f' makefile.vc all tcltest
- cmd.exe //C vcvarsall.bat x64 '&&' nmake '-f' makefile.vc test
- - name: "Windows/MSVC/Shared: UTF_MAX=4"
- os: windows
- compiler: cl
- env: *vcenv
- before_install: *vcpreinst
- install: []
- script:
- - cmd.exe //C vcvarsall.bat x64 '&&' nmake 'OPTS=utf16' '-f' makefile.vc all tcltest
- - cmd.exe //C vcvarsall.bat x64 '&&' nmake 'OPTS=utf16' '-f' makefile.vc test
- name: "Windows/MSVC/Shared: NO_DEPRECATED"
os: windows
compiler: cl
@@ -270,15 +254,6 @@ jobs:
script:
- cmd.exe //C vcvarsall.bat x86 '&&' nmake '-f' makefile.vc all tcltest
- cmd.exe //C vcvarsall.bat x86 '&&' nmake '-f' makefile.vc test
- - name: "Windows/MSVC-x86/Shared: UTF_MAX=4"
- os: windows
- compiler: cl
- env: *vcenv
- before_install: *vcpreinst
- install: []
- script:
- - cmd.exe //C vcvarsall.bat x86 '&&' nmake 'OPTS=utf16' '-f' makefile.vc all tcltest
- - cmd.exe //C vcvarsall.bat x86 '&&' nmake 'OPTS=utf16' '-f' makefile.vc test
- name: "Windows/MSVC-x86/Shared: NO_DEPRECATED"
os: windows
compiler: cl
@@ -326,13 +301,6 @@ jobs:
- touch generic/tclStubInit.c generic/tclOOStubInit.c generic/tclOOScript.h
- choco install -y make zip
- cd ${BUILD_DIR}
- - name: "Windows/GCC/Shared: UTF_MAX=4"
- os: windows
- compiler: gcc
- env:
- - BUILD_DIR=win
- - CFGOPT="--enable-64bit CFLAGS=-DTCL_UTF_MAX=4"
- before_install: *makepreinst
- name: "Windows/GCC/Shared: NO_DEPRECATED"
os: windows
compiler: gcc
@@ -368,13 +336,6 @@ jobs:
env:
- BUILD_DIR=win
before_install: *makepreinst
- - name: "Windows/GCC-x86/Shared: UTF_MAX=4"
- os: windows
- compiler: gcc
- env:
- - BUILD_DIR=win
- - CFGOPT="CFLAGS=-DTCL_UTF_MAX=4"
- before_install: *makepreinst
- name: "Windows/GCC-x86/Shared: NO_DEPRECATED"
os: windows
compiler: gcc
diff --git a/generic/regcustom.h b/generic/regcustom.h
index 5bda852..56bf571 100644
--- a/generic/regcustom.h
+++ b/generic/regcustom.h
@@ -88,15 +88,9 @@ typedef int celt; /* Type to hold chr, or NOCELT */
#define NOCELT (-1) /* Celt value which is not valid chr */
#define CHR(c) (UCHAR(c)) /* Turn char literal into chr literal */
#define DIGITVAL(c) ((c)-'0') /* Turn chr digit into its value */
-#if TCL_UTF_MAX > 3
#define CHRBITS 32 /* Bits in a chr; must not use sizeof */
#define CHR_MIN 0x00000000 /* Smallest and largest chr; the value */
#define CHR_MAX 0x10FFFF /* CHR_MAX-CHR_MIN+1 should fit in uchr */
-#else
-#define CHRBITS 16 /* Bits in a chr; must not use sizeof */
-#define CHR_MIN 0x0000 /* Smallest and largest chr; the value */
-#define CHR_MAX 0xFFFF /* CHR_MAX-CHR_MIN+1 should fit in uchr */
-#endif
/*
* Functions operating on chr.
diff --git a/generic/tcl.h b/generic/tcl.h
index d96b8aa..ff86949 100644
--- a/generic/tcl.h
+++ b/generic/tcl.h
@@ -2238,15 +2238,17 @@ typedef struct Tcl_EncodingType {
* reflected in regcustom.h.
*/
-#if TCL_UTF_MAX > 3
+#if TCL_UTF_MAX == 4
/*
* int isn't 100% accurate as it should be a strict 4-byte value
* (perhaps int32_t). ILP64/SILP64 systems may have troubles. The
* size of this value must be reflected correctly in regcustom.h.
*/
typedef int Tcl_UniChar;
-#else
+#elif TCL_UTF_MAX == 3 && !defined(BUILD_tcl)
typedef unsigned short Tcl_UniChar;
+#else
+# error "This TCL_UTF_MAX value is not supported"
#endif
/*
diff --git a/generic/tclBinary.c b/generic/tclBinary.c
index 6fde660..3112f02 100644
--- a/generic/tclBinary.c
+++ b/generic/tclBinary.c
@@ -435,7 +435,7 @@ Tcl_GetBytesFromObj(
irPtr = TclFetchInternalRep(objPtr, &tclByteArrayType);
baPtr = GET_BYTEARRAY(irPtr);
nonbyte = TclUtfAtIndex(Tcl_GetString(objPtr), baPtr->bad);
- TclUtfToUCS4(nonbyte, &ucs4);
+ Tcl_UtfToUniChar(nonbyte, &ucs4);
Tcl_SetObjResult(interp, Tcl_ObjPrintf(
"expected byte sequence but character %d "
@@ -2653,7 +2653,7 @@ BinaryDecodeHex(
if (pure) {
ucs4 = c;
} else {
- TclUtfToUCS4((const char *)(data - 1), &ucs4);
+ Tcl_UtfToUniChar((const char *)(data - 1), &ucs4);
}
TclDecrRefCount(resultObj);
Tcl_SetObjResult(interp, Tcl_ObjPrintf(
@@ -3110,7 +3110,7 @@ BinaryDecodeUu(
if (pure) {
ucs4 = c;
} else {
- TclUtfToUCS4((const char *)(data - 1), &ucs4);
+ Tcl_UtfToUniChar((const char *)(data - 1), &ucs4);
}
Tcl_SetObjResult(interp, Tcl_ObjPrintf(
"invalid uuencode character \"%c\" (U+%06X) at position %d",
@@ -3284,7 +3284,7 @@ BinaryDecode64(
* of a multi-byte character. */
/* Safe because we know data is NUL-terminated */
- TclUtfToUCS4((const char *)(data - 1), &ucs4);
+ Tcl_UtfToUniChar((const char *)(data - 1), &ucs4);
}
Tcl_SetObjResult(interp, Tcl_ObjPrintf(
diff --git a/generic/tclCmdIL.c b/generic/tclCmdIL.c
index f57a54a..0723b40 100644
--- a/generic/tclCmdIL.c
+++ b/generic/tclCmdIL.c
@@ -5431,8 +5431,8 @@ DictionaryCompare(
*/
if ((*left != '\0') && (*right != '\0')) {
- left += TclUtfToUCS4(left, &uniLeft);
- right += TclUtfToUCS4(right, &uniRight);
+ left += Tcl_UtfToUniChar(left, &uniLeft);
+ right += Tcl_UtfToUniChar(right, &uniRight);
/*
* Convert both chars to lower for the comparison, because
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index cd34364..0b0f68a 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -606,9 +606,9 @@ Tcl_RegsubObjCmd(
nocase = (cflags & TCL_REG_NOCASE);
strCmpFn = nocase ? TclUniCharNcasecmp : TclUniCharNcmp;
- wsrc = TclGetUnicodeFromObj_(objv[0], &slen);
- wstring = TclGetUnicodeFromObj_(objv[1], &wlen);
- wsubspec = TclGetUnicodeFromObj_(objv[2], &wsublen);
+ wsrc = TclGetUnicodeFromObj(objv[0], &slen);
+ wstring = TclGetUnicodeFromObj(objv[1], &wlen);
+ wsubspec = TclGetUnicodeFromObj(objv[2], &wsublen);
wend = wstring + wlen - (slen ? slen - 1 : 0);
result = TCL_OK;
@@ -699,14 +699,14 @@ Tcl_RegsubObjCmd(
} else {
objPtr = objv[1];
}
- wstring = TclGetUnicodeFromObj_(objPtr, &wlen);
+ wstring = TclGetUnicodeFromObj(objPtr, &wlen);
if (objv[2] == objv[0]) {
subPtr = Tcl_DuplicateObj(objv[2]);
} else {
subPtr = objv[2];
}
if (!command) {
- wsubspec = TclGetUnicodeFromObj_(subPtr, &wsublen);
+ wsubspec = TclGetUnicodeFromObj(subPtr, &wsublen);
}
result = TCL_OK;
@@ -826,7 +826,7 @@ Tcl_RegsubObjCmd(
* the user code.
*/
- wstring = TclGetUnicodeFromObj_(objPtr, &wlen);
+ wstring = TclGetUnicodeFromObj(objPtr, &wlen);
offset += end;
if (end == 0 || start == end) {
@@ -1217,7 +1217,7 @@ Tcl_SplitObjCmd(
Tcl_InitHashTable(&charReuseTable, TCL_ONE_WORD_KEYS);
for ( ; stringPtr < end; stringPtr += len) {
- len = TclUtfToUCS4(stringPtr, &ch);
+ len = Tcl_UtfToUniChar(stringPtr, &ch);
hPtr = Tcl_CreateHashEntry(&charReuseTable, INT2PTR(ch), &isNew);
if (isNew) {
TclNewStringObj(objPtr, stringPtr, len);
@@ -1263,9 +1263,9 @@ Tcl_SplitObjCmd(
splitEnd = splitChars + splitCharLen;
for (element = stringPtr; stringPtr < end; stringPtr += len) {
- len = TclUtfToUCS4(stringPtr, &ch);
+ len = Tcl_UtfToUniChar(stringPtr, &ch);
for (p = splitChars; p < splitEnd; p += splitLen) {
- splitLen = TclUtfToUCS4(p, &splitChar);
+ splitLen = Tcl_UtfToUniChar(p, &splitChar);
if (ch == splitChar) {
TclNewStringObj(objPtr, element, stringPtr - element);
Tcl_ListObjAppendElement(NULL, listPtr, objPtr);
@@ -1895,7 +1895,7 @@ StringIsCmd(
for (; string1 < end; string1 += length2, failat++) {
int ucs4;
- length2 = TclUtfToUCS4(string1, &ucs4);
+ length2 = Tcl_UtfToUniChar(string1, &ucs4);
if (!chcomp(ucs4)) {
result = 0;
break;
@@ -2060,7 +2060,7 @@ StringMapCmd(
} else {
sourceObj = objv[objc-1];
}
- ustring1 = TclGetUnicodeFromObj_(sourceObj, &length1);
+ ustring1 = TclGetUnicodeFromObj(sourceObj, &length1);
if (length1 == 0) {
/*
* Empty input string, just stop now.
@@ -2089,7 +2089,7 @@ StringMapCmd(
int mapLen, u2lc;
Tcl_UniChar *mapString;
- ustring2 = TclGetUnicodeFromObj_(mapElemv[0], &length2);
+ ustring2 = TclGetUnicodeFromObj(mapElemv[0], &length2);
p = ustring1;
if ((length2 > length1) || (length2 == 0)) {
/*
@@ -2098,7 +2098,7 @@ StringMapCmd(
ustring1 = end;
} else {
- mapString = TclGetUnicodeFromObj_(mapElemv[1], &mapLen);
+ mapString = TclGetUnicodeFromObj(mapElemv[1], &mapLen);
u2lc = (nocase ? Tcl_UniCharToLower(*ustring2) : 0);
for (; ustring1 < end; ustring1++) {
if (((*ustring1 == *ustring2) ||
@@ -2134,7 +2134,7 @@ StringMapCmd(
u2lc = (int *)TclStackAlloc(interp, mapElemc * sizeof(int));
}
for (index = 0; index < mapElemc; index++) {
- mapStrings[index] = TclGetUnicodeFromObj_(mapElemv[index],
+ mapStrings[index] = TclGetUnicodeFromObj(mapElemv[index],
mapLens+index);
if (nocase && ((index % 2) == 0)) {
u2lc[index/2] = Tcl_UniCharToLower(*mapStrings[index]);
@@ -2506,7 +2506,7 @@ StringStartCmd(
return TCL_ERROR;
}
- string = TclGetUnicodeFromObj_(objv[1], &length);
+ string = TclGetUnicodeFromObj(objv[1], &length);
if (TclGetIntForIndexM(interp, objv[2], length-1, &index) != TCL_OK) {
return TCL_ERROR;
}
@@ -2517,7 +2517,7 @@ StringStartCmd(
if (index > 0) {
p = &string[index];
- (void)TclUniCharToUCS4(p, &ch);
+ ch = *p;
for (cur = index; cur >= 0; cur--) {
int delta = 0;
const Tcl_UniChar *next;
@@ -2526,10 +2526,11 @@ StringStartCmd(
break;
}
- next = TclUCS4Prev(p, string);
+ next = (p > string) ? p - 1 : p;
do {
next += delta;
- delta = TclUniCharToUCS4(next, &ch);
+ ch = *next;
+ delta = 1;
} while (next + delta < p);
p = next;
}
@@ -2576,7 +2577,7 @@ StringEndCmd(
return TCL_ERROR;
}
- string = TclGetUnicodeFromObj_(objv[1], &length);
+ string = TclGetUnicodeFromObj(objv[1], &length);
if (TclGetIntForIndexM(interp, objv[2], length-1, &index) != TCL_OK) {
return TCL_ERROR;
}
@@ -2587,7 +2588,7 @@ StringEndCmd(
p = &string[index];
end = string+length;
for (cur = index; p < end; cur++) {
- p += TclUniCharToUCS4(p, &ch);
+ ch = *p++;
if (!Tcl_UniCharIsWordChar(ch)) {
break;
}
@@ -2931,7 +2932,7 @@ StringLowerCmd(
const char *start, *end;
Tcl_Obj *resultPtr;
- length1 = Tcl_NumUtfChars(string1, length1) - 1;
+ length1 = TclNumUtfChars(string1, length1) - 1;
if (TclGetIntForIndexM(interp,objv[2],length1, &first) != TCL_OK) {
return TCL_ERROR;
}
@@ -3016,7 +3017,7 @@ StringUpperCmd(
const char *start, *end;
Tcl_Obj *resultPtr;
- length1 = Tcl_NumUtfChars(string1, length1) - 1;
+ length1 = TclNumUtfChars(string1, length1) - 1;
if (TclGetIntForIndexM(interp,objv[2],length1, &first) != TCL_OK) {
return TCL_ERROR;
}
@@ -3101,7 +3102,7 @@ StringTitleCmd(
const char *start, *end;
Tcl_Obj *resultPtr;
- length1 = Tcl_NumUtfChars(string1, length1) - 1;
+ length1 = TclNumUtfChars(string1, length1) - 1;
if (TclGetIntForIndexM(interp,objv[2],length1, &first) != TCL_OK) {
return TCL_ERROR;
}
diff --git a/generic/tclCompExpr.c b/generic/tclCompExpr.c
index a295e41..3a05621 100644
--- a/generic/tclCompExpr.c
+++ b/generic/tclCompExpr.c
@@ -2146,13 +2146,13 @@ ParseLexeme(
if (!TclIsBareword(*start) || *start == '_') {
if (Tcl_UtfCharComplete(start, numBytes)) {
- scanned = TclUtfToUCS4(start, &ch);
+ scanned = Tcl_UtfToUniChar(start, &ch);
} else {
char utfBytes[8];
memcpy(utfBytes, start, numBytes);
utfBytes[numBytes] = '\0';
- scanned = TclUtfToUCS4(utfBytes, &ch);
+ scanned = Tcl_UtfToUniChar(utfBytes, &ch);
}
*lexemePtr = INVALID;
Tcl_DecrRefCount(literal);
diff --git a/generic/tclDisassemble.c b/generic/tclDisassemble.c
index 2bbfc40..08f7888 100644
--- a/generic/tclDisassemble.c
+++ b/generic/tclDisassemble.c
@@ -875,7 +875,7 @@ PrintSourceToObj(
for (; (*p != '\0') && (i < maxChars); p+=len) {
int ucs4;
- len = TclUtfToUCS4(p, &ucs4);
+ len = Tcl_UtfToUniChar(p, &ucs4);
switch (ucs4) {
case '"':
Tcl_AppendToObj(appendObj, "\\\"", -1);
@@ -1199,10 +1199,10 @@ DisassembleByteCodeAsDicts(
*/
Tcl_DictObjPut(NULL, cmd, Tcl_NewStringObj("scriptfrom", -1),
- Tcl_NewWideIntObj(Tcl_NumUtfChars(codePtr->source,
+ Tcl_NewWideIntObj(TclNumUtfChars(codePtr->source,
sourceOffset)));
Tcl_DictObjPut(NULL, cmd, Tcl_NewStringObj("scriptto", -1),
- Tcl_NewWideIntObj(Tcl_NumUtfChars(codePtr->source,
+ Tcl_NewWideIntObj(TclNumUtfChars(codePtr->source,
sourceOffset + sourceLength - 1)));
Tcl_DictObjPut(NULL, cmd, Tcl_NewStringObj("script", -1),
Tcl_NewStringObj(codePtr->source+sourceOffset, sourceLength));
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 9311833..80a37d2 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -1638,10 +1638,10 @@ Tcl_UtfToExternalDStringEx(
} else {
/* Caller wants error message on failure */
if (result != TCL_OK && interp != NULL) {
- Tcl_Size pos = Tcl_NumUtfChars(srcStart, nBytesProcessed);
+ Tcl_Size pos = TclNumUtfChars(srcStart, nBytesProcessed);
int ucs4;
char buf[TCL_INTEGER_SPACE];
- TclUtfToUCS4(&srcStart[nBytesProcessed], &ucs4);
+ Tcl_UtfToUniChar(&srcStart[nBytesProcessed], &ucs4);
snprintf(buf, sizeof(buf), "%" TCL_SIZE_MODIFIER "u", nBytesProcessed);
Tcl_SetObjResult(
interp,
@@ -2587,7 +2587,7 @@ UtfToUtfProc(
} else if (!Tcl_UtfCharComplete(src, srcEnd - src)) {
/*
* Incomplete byte sequence.
- * Always check before using TclUtfToUCS4. Not doing can so
+ * Always check before using Tcl_UtfToUniChar. Not doing can so
* cause it run beyond the end of the buffer! If we happen such an
* incomplete char its bytes are made to represent themselves
* unless the user has explicitly asked to be told.
@@ -2609,13 +2609,13 @@ UtfToUtfProc(
/* TCL_ENCODING_PROFILE_TCL8 */
char chbuf[2];
chbuf[0] = UCHAR(*src++); chbuf[1] = 0;
- TclUtfToUCS4(chbuf, &ch);
+ Tcl_UtfToUniChar(chbuf, &ch);
}
dst += Tcl_UniCharToUtf(ch, dst);
} else {
int low;
int isInvalid = 0;
- size_t len = TclUtfToUCS4(src, &ch);
+ size_t len = Tcl_UtfToUniChar(src, &ch);
if (flags & ENCODING_INPUT) {
if ((len < 2) && (ch != 0)) {
isInvalid = 1;
@@ -2657,7 +2657,7 @@ UtfToUtfProc(
ch = UNICODE_REPLACE_CHAR;
} else {
low = ch;
- len = (src <= srcEnd - 3) ? TclUtfToUCS4(src, &low) : 0;
+ len = (src <= srcEnd - 3) ? Tcl_UtfToUniChar(src, &low) : 0;
if ((!LOW_SURROGATE(low)) || (ch & 0x400)) {
@@ -2924,7 +2924,7 @@ UtfToUtf32Proc(
result = TCL_CONVERT_NOSPACE;
break;
}
- len = TclUtfToUCS4(src, &ch);
+ len = Tcl_UtfToUniChar(src, &ch);
if (SURROGATE(ch)) {
if (PROFILE_STRICT(flags)) {
result = TCL_CONVERT_UNKNOWN;
@@ -3183,7 +3183,7 @@ UtfToUtf16Proc(
result = TCL_CONVERT_NOSPACE;
break;
}
- len = TclUtfToUCS4(src, &ch);
+ len = Tcl_UtfToUniChar(src, &ch);
if (SURROGATE(ch)) {
if (PROFILE_STRICT(flags)) {
result = TCL_CONVERT_UNKNOWN;
@@ -3292,18 +3292,6 @@ UtfToUcs2Proc(
result = TCL_CONVERT_NOSPACE;
break;
}
-#if TCL_UTF_MAX < 4
- len = TclUtfToUniChar(src, &ch);
- if ((ch >= 0xD800) && (len < 3)) {
- if (PROFILE_STRICT(flags)) {
- result = TCL_CONVERT_UNKNOWN;
- break;
- }
- src += len;
- src += TclUtfToUniChar(src, &ch);
- ch = UNICODE_REPLACE_CHAR;
- }
-#else
len = TclUtfToUniChar(src, &ch);
if (ch > 0xFFFF) {
if (PROFILE_STRICT(flags)) {
@@ -3312,7 +3300,6 @@ UtfToUcs2Proc(
}
ch = UNICODE_REPLACE_CHAR;
}
-#endif
if (PROFILE_STRICT(flags) && SURROGATE(ch)) {
result = TCL_CONVERT_SYNTAX;
break;
@@ -3541,16 +3528,10 @@ TableFromUtfProc(
}
len = TclUtfToUniChar(src, &ch);
-#if TCL_UTF_MAX > 3
/* Unicode chars > +U0FFFF cannot be represented in any table encoding */
if (ch & 0xFFFF0000) {
word = 0;
} else
-#else
- if (!len) {
- word = 0;
- } else
-#endif
word = fromUnicode[(ch >> 8)][ch & 0xFF];
if ((word == 0) && (ch != 0)) {
@@ -3740,19 +3721,11 @@ Iso88591FromUtfProc(
*/
if (ch > 0xFF
-#if TCL_UTF_MAX < 4
- || ((ch >= 0xD800) && (len < 3))
-#endif
) {
if (PROFILE_STRICT(flags)) {
result = TCL_CONVERT_UNKNOWN;
break;
}
-#if TCL_UTF_MAX < 4
- if ((ch >= 0xD800) && (len < 3)) {
- len = 4;
- }
-#endif
/*
* Plunge on, using '?' as a fallback character.
*/
diff --git a/generic/tclEvent.c b/generic/tclEvent.c
index 5501721..541e708 100644
--- a/generic/tclEvent.c
+++ b/generic/tclEvent.c
@@ -1119,9 +1119,6 @@ static const struct {
#ifdef STATIC_BUILD
".static"
#endif
-#if TCL_UTF_MAX < 4
- ".utf-16"
-#endif
}};
const char *
diff --git a/generic/tclExecute.c b/generic/tclExecute.c
index d76c287..19f0980 100644
--- a/generic/tclExecute.c
+++ b/generic/tclExecute.c
@@ -5648,12 +5648,12 @@ TEBCresume(
objResultPtr = value3Ptr;
goto doneStringMap;
}
- ustring1 = TclGetUnicodeFromObj_(valuePtr, &length);
+ ustring1 = TclGetUnicodeFromObj(valuePtr, &length);
if (length == 0) {
objResultPtr = valuePtr;
goto doneStringMap;
}
- ustring2 = TclGetUnicodeFromObj_(value2Ptr, &length2);
+ ustring2 = TclGetUnicodeFromObj(value2Ptr, &length2);
if (length2 > length || length2 == 0) {
objResultPtr = valuePtr;
goto doneStringMap;
@@ -5665,7 +5665,7 @@ TEBCresume(
}
goto doneStringMap;
}
- ustring3 = TclGetUnicodeFromObj_(value3Ptr, &length3);
+ ustring3 = TclGetUnicodeFromObj(value3Ptr, &length3);
objResultPtr = TclNewUnicodeObj(ustring1, 0);
p = ustring1;
@@ -5718,13 +5718,13 @@ TEBCresume(
valuePtr = OBJ_AT_TOS;
TRACE(("%s \"%.30s\" => ", tclStringClassTable[opnd].name,
O2S(valuePtr)));
- ustring1 = TclGetUnicodeFromObj_(valuePtr, &length);
+ ustring1 = TclGetUnicodeFromObj(valuePtr, &length);
match = 1;
if (length > 0) {
int ch;
end = ustring1 + length;
for (p=ustring1 ; p<end ; ) {
- p += TclUniCharToUCS4(p, &ch);
+ ch = *p++;
if (!tclStringClassTable[opnd].comparator(ch)) {
match = 0;
break;
@@ -5749,8 +5749,8 @@ TEBCresume(
|| TclHasInternalRep(value2Ptr, &tclUniCharStringType)) {
Tcl_UniChar *ustring1, *ustring2;
- ustring1 = TclGetUnicodeFromObj_(valuePtr, &length);
- ustring2 = TclGetUnicodeFromObj_(value2Ptr, &length2);
+ ustring1 = TclGetUnicodeFromObj(valuePtr, &length);
+ ustring2 = TclGetUnicodeFromObj(value2Ptr, &length2);
match = TclUniCharMatch(ustring1, length, ustring2, length2,
nocase);
} else if (TclIsPureByteArray(valuePtr) && !nocase) {
diff --git a/generic/tclInt.h b/generic/tclInt.h
index 73896e7..fa9938b 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -3449,15 +3449,6 @@ MODULE_SCOPE void TclRegisterCommandTypeName(
MODULE_SCOPE int TclUtfCmp(const char *cs, const char *ct);
MODULE_SCOPE int TclUtfCasecmp(const char *cs, const char *ct);
MODULE_SCOPE int TclUtfCount(int ch);
-#if TCL_UTF_MAX > 3
-# define TclUtfToUCS4 Tcl_UtfToUniChar
-# define TclUniCharToUCS4(src, ptr) (*ptr = *(src),1)
-# define TclUCS4Prev(src, ptr) (((src) > (ptr)) ? ((src) - 1) : (src))
-#else
- MODULE_SCOPE int TclUtfToUCS4(const char *, int *);
- MODULE_SCOPE int TclUniCharToUCS4(const Tcl_UniChar *, int *);
- MODULE_SCOPE const Tcl_UniChar *TclUCS4Prev(const Tcl_UniChar *, const Tcl_UniChar *);
-#endif
MODULE_SCOPE Tcl_Obj * TclpNativeToNormalized(void *clientData);
MODULE_SCOPE Tcl_Obj * TclpFilesystemPathType(Tcl_Obj *pathPtr);
MODULE_SCOPE int TclpDlopen(Tcl_Interp *interp, Tcl_Obj *pathPtr,
@@ -3500,43 +3491,12 @@ MODULE_SCOPE void TclErrorStackResetIf(Tcl_Interp *interp,
MODULE_SCOPE int TclZipfs_Init(Tcl_Interp *interp);
-#if TCL_UTF_MAX > 3
- MODULE_SCOPE int *TclGetUnicodeFromObj_(Tcl_Obj *, int *);
- MODULE_SCOPE Tcl_Obj *TclNewUnicodeObj(const int *, int);
- MODULE_SCOPE void TclAppendUnicodeToObj(Tcl_Obj *, const int *, int);
- MODULE_SCOPE int TclUniCharNcasecmp(const int *, const int *, unsigned long);
- MODULE_SCOPE int TclUniCharCaseMatch(const int *, const int *, int);
- MODULE_SCOPE int TclUniCharNcmp(const int *, const int *, unsigned long);
-# undef Tcl_NumUtfChars
-# define Tcl_NumUtfChars TclNumUtfChars
-# undef Tcl_GetCharLength
-# define Tcl_GetCharLength TclGetCharLength
-# undef Tcl_UtfAtIndex
-# define Tcl_UtfAtIndex TclUtfAtIndex
-# undef Tcl_GetRange
-# define Tcl_GetRange TclGetRange
-# undef Tcl_GetUniChar
-# define Tcl_GetUniChar TclGetUniChar
-#else
-# define tclUniCharStringType tclStringType
-# define TclGetUnicodeFromObj_ Tcl_GetUnicodeFromObj
-# define TclNewUnicodeObj Tcl_NewUnicodeObj
-# define TclAppendUnicodeToObj Tcl_AppendUnicodeToObj
-# define TclUniCharNcasecmp Tcl_UniCharNcasecmp
-# define TclUniCharCaseMatch Tcl_UniCharCaseMatch
-# define TclUniCharNcmp Tcl_UniCharNcmp
-# undef TclNumUtfChars
-# define TclNumUtfChars Tcl_NumUtfChars
-# undef TclGetCharLength
-# define TclGetCharLength Tcl_GetCharLength
-# undef TclUtfAtIndex
-# define TclUtfAtIndex Tcl_UtfAtIndex
-# undef TclGetRange
-# define TclGetRange Tcl_GetRange
-# undef TclGetUniChar
-# define TclGetUniChar Tcl_GetUniChar
-#endif
-
+MODULE_SCOPE int *TclGetUnicodeFromObj(Tcl_Obj *, int *);
+MODULE_SCOPE Tcl_Obj *TclNewUnicodeObj(const int *, int);
+MODULE_SCOPE void TclAppendUnicodeToObj(Tcl_Obj *, const int *, int);
+MODULE_SCOPE int TclUniCharNcasecmp(const int *, const int *, unsigned long);
+MODULE_SCOPE int TclUniCharCaseMatch(const int *, const int *, int);
+MODULE_SCOPE int TclUniCharNcmp(const int *, const int *, unsigned long);
/*
* Many parsing tasks need a common definition of whitespace.
@@ -4734,17 +4694,10 @@ MODULE_SCOPE const TclFileAttrProcs tclpFileAttrProcs[];
*----------------------------------------------------------------
*/
-#if TCL_UTF_MAX > 3
#define TclUtfToUniChar(str, chPtr) \
(((UCHAR(*(str))) < 0x80) ? \
((*(chPtr) = UCHAR(*(str))), 1) \
: Tcl_UtfToUniChar(str, chPtr))
-#else
-#define TclUtfToUniChar(str, chPtr) \
- (((UCHAR(*(str))) < 0x80) ? \
- ((*(chPtr) = UCHAR(*(str))), 1) \
- : Tcl_UtfToChar16(str, chPtr))
-#endif
/*
*----------------------------------------------------------------
diff --git a/generic/tclObj.c b/generic/tclObj.c
index 08465c2f..b999540 100644
--- a/generic/tclObj.c
+++ b/generic/tclObj.c
@@ -387,10 +387,8 @@ TclInitObjSubsystem(void)
Tcl_RegisterObjType(&tclByteArrayType);
Tcl_RegisterObjType(&tclDoubleType);
-#if (TCL_UTF_MAX < 4) || !defined(TCL_NO_DEPRECATED)
+#if !defined(TCL_NO_DEPRECATED)
Tcl_RegisterObjType(&tclStringType);
-#endif
-#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED)
/* Only registered for 8.7, not for 9.0 any more.
* See [https://core.tcl-lang.org/tk/tktview/6b49149b4e] */
Tcl_RegisterObjType(&tclUniCharStringType);
diff --git a/generic/tclParse.c b/generic/tclParse.c
index 5ee1a9f..aab69f3 100644
--- a/generic/tclParse.c
+++ b/generic/tclParse.c
@@ -936,13 +936,13 @@ TclParseBackslash(
*/
if (Tcl_UtfCharComplete(p, numBytes - 1)) {
- count = TclUtfToUCS4(p, &unichar) + 1; /* +1 for '\' */
+ count = Tcl_UtfToUniChar(p, &unichar) + 1; /* +1 for '\' */
} else {
char utfBytes[8];
memcpy(utfBytes, p, numBytes - 1);
utfBytes[numBytes - 1] = '\0';
- count = TclUtfToUCS4(utfBytes, &unichar) + 1;
+ count = Tcl_UtfToUniChar(utfBytes, &unichar) + 1;
}
result = unichar;
break;
diff --git a/generic/tclRegexp.c b/generic/tclRegexp.c
index 8e0681c..558355f 100644
--- a/generic/tclRegexp.c
+++ b/generic/tclRegexp.c
@@ -482,7 +482,7 @@ Tcl_RegExpExecObj(
regexpPtr->string = NULL;
regexpPtr->objPtr = textObj;
- udata = TclGetUnicodeFromObj_(textObj, &length);
+ udata = TclGetUnicodeFromObj(textObj, &length);
if (offset > length) {
offset = length;
diff --git a/generic/tclScan.c b/generic/tclScan.c
index 3749c12..b8c29fa 100644
--- a/generic/tclScan.c
+++ b/generic/tclScan.c
@@ -879,7 +879,7 @@ Tcl_ScanObjCmd(
* Scan a single Unicode character.
*/
- offset = TclUtfToUCS4(string, &i);
+ offset = Tcl_UtfToUniChar(string, &i);
string += offset;
if (!(flags & SCAN_SUPPRESS)) {
TclNewIntObj(objPtr, i);
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index b0a5e09..7cf04fa 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -69,7 +69,7 @@ static Tcl_Size UnicodeLength(const Tcl_UniChar *unicode);
static int UTF16Length(const unsigned short *unicode);
#endif
static void UpdateStringOfString(Tcl_Obj *objPtr);
-#if (TCL_UTF_MAX) > 3 && !defined(TCL_NO_DEPRECATED)
+#if !defined(TCL_NO_DEPRECATED)
static void DupUTF16StringInternalRep(Tcl_Obj *objPtr,
Tcl_Obj *copyPtr);
static int SetUTF16StringFromAny(Tcl_Interp *interp, Tcl_Obj *objPtr);
@@ -89,30 +89,6 @@ static void UpdateStringOfUTF16String(Tcl_Obj *objPtr);
* functions that can be invoked by generic object code.
*/
-#if TCL_UTF_MAX < 4
-
-#define tclUniCharStringType tclStringType
-#define GET_UNICHAR_STRING GET_STRING
-#define UniCharString String
-#define UNICHAR_STRING_MAXCHARS STRING_MAXCHARS
-#define uniCharStringAlloc stringAlloc
-#define uniCharStringRealloc stringRealloc
-#define uniCharStringAttemptAlloc stringAttemptAlloc
-#define uniCharStringAttemptRealloc stringAttemptRealloc
-#define uniCharStringCheckLimits stringCheckLimits
-#define SET_UNICHAR_STRING SET_STRING
-#define UNICHAR_STRING_SIZE STRING_SIZE
-
-const Tcl_ObjType tclStringType = {
- "string", /* name */
- FreeStringInternalRep, /* freeIntRepPro */
- DupStringInternalRep, /* dupIntRepProc */
- UpdateStringOfString, /* updateStringProc */
- SetStringFromAny /* setFromAnyProc */
-};
-
-#else
-
#ifndef TCL_NO_DEPRECATED
const Tcl_ObjType tclStringType = {
"string", /* name */
@@ -250,8 +226,6 @@ UpdateStringOfUTF16String(
Tcl_DStringFree(&ds);
}
#endif
-
-#endif
/*
* TCL STRING GROWTH ALGORITHM
@@ -552,7 +526,7 @@ TclNewUnicodeObj(
return objPtr;
}
-#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED)
+#if !defined(TCL_NO_DEPRECATED)
Tcl_Obj *
Tcl_NewUnicodeObj(
const unsigned short *unicode, /* The unicode string used to initialize the
@@ -654,7 +628,7 @@ TclGetCharLength(
return numChars;
}
-#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED)
+#if !defined(TCL_NO_DEPRECATED)
#undef Tcl_GetCharLength
int
Tcl_GetCharLength(
@@ -687,7 +661,7 @@ Tcl_GetCharLength(
(void) Tcl_GetByteArrayFromObj(objPtr, &numChars);
} else {
Tcl_GetString(objPtr);
- numChars = Tcl_NumUtfChars(objPtr->bytes, objPtr->length);
+ numChars = TclNumUtfChars(objPtr->bytes, objPtr->length);
}
return numChars;
@@ -722,7 +696,7 @@ TclCheckEmptyString(
}
if (TclIsPureByteArray(objPtr)
- && Tcl_GetCharLength(objPtr) == 0) {
+ && TclGetCharLength(objPtr) == 0) {
return TCL_EMPTYSTRING_YES;
}
@@ -760,7 +734,7 @@ TclCheckEmptyString(
*----------------------------------------------------------------------
*/
-#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED)
+#if !defined(TCL_NO_DEPRECATED)
#undef Tcl_GetUniChar
int
Tcl_GetUniChar(
@@ -876,22 +850,6 @@ TclGetUniChar(
return -1;
}
ch = stringPtr->unicode[index];
-#if TCL_UTF_MAX < 4
- /* See: bug [11ae2be95dac9417] */
- if (SURROGATE(ch)) {
- if (ch & 0x400) {
- if ((index > 0)
- && HIGH_SURROGATE(stringPtr->unicode[index-1])) {
- ch = -1; /* low surrogate preceded by high surrogate */
- }
- } else if ((++index < stringPtr->numChars)
- && LOW_SURROGATE(stringPtr->unicode[index])) {
- /* high surrogate followed by low surrogate */
- ch = (((ch & 0x3FF) << 10) |
- (stringPtr->unicode[index] & 0x3FF)) + 0x10000;
- }
- }
-#endif
return ch;
}
@@ -945,7 +903,7 @@ Tcl_GetUnicode(
*/
Tcl_UniChar *
-TclGetUnicodeFromObj_(
+TclGetUnicodeFromObj(
Tcl_Obj *objPtr, /* The object to find the Unicode string
* for. */
int *lengthPtr) /* If non-NULL, the location where the string
@@ -968,7 +926,7 @@ TclGetUnicodeFromObj_(
return stringPtr->unicode;
}
-#if TCL_UTF_MAX > 3 && !defined(TCL_NO_DEPRECATED)
+#if !defined(TCL_NO_DEPRECATED)
unsigned short *
Tcl_GetUnicodeFromObj(
Tcl_Obj *objPtr, /* The object to find the Unicode string
@@ -1009,7 +967,7 @@ Tcl_GetUnicodeFromObj(
*----------------------------------------------------------------------
*/
-#if TCL_UTF_MAX > 3 && !defined(TCL_NO_DEPRECATED)
+#if !defined(TCL_NO_DEPRECATED)
#undef Tcl_GetRange
Tcl_Obj *
Tcl_GetRange(
@@ -1042,7 +1000,7 @@ Tcl_GetRange(
return Tcl_NewByteArrayObj(bytes + first, last - first + 1);
}
- int numChars = Tcl_NumUtfChars(objPtr->bytes, objPtr->length);
+ int numChars = TclNumUtfChars(objPtr->bytes, objPtr->length);
if (last < 0 || last >= numChars) {
last = numChars - 1;
@@ -1133,18 +1091,6 @@ TclGetRange(
TclNewObj(newObjPtr);
return newObjPtr;
}
-#if TCL_UTF_MAX < 4
- /* See: bug [11ae2be95dac9417] */
- if ((first > 0) && LOW_SURROGATE(stringPtr->unicode[first])
- && HIGH_SURROGATE(stringPtr->unicode[first-1])) {
- ++first;
- }
- if ((last + 1 < stringPtr->numChars)
- && LOW_SURROGATE(stringPtr->unicode[last+1])
- && HIGH_SURROGATE(stringPtr->unicode[last])) {
- ++last;
- }
-#endif
return TclNewUnicodeObj(stringPtr->unicode + first, last - first + 1);
}
@@ -1602,7 +1548,7 @@ Tcl_AppendLimitedToObj(
/* If appended string starts with a continuation byte or a lower surrogate,
* force objPtr to unicode representation. See [7f1162a867] */
if (bytes && ISCONTINUATION(bytes)) {
- TclGetUnicodeFromObj_(objPtr, NULL);
+ TclGetUnicodeFromObj(objPtr, NULL);
stringPtr = GET_UNICHAR_STRING(objPtr);
}
if (stringPtr->hasUnicode && stringPtr->numChars > 0) {
@@ -1703,7 +1649,7 @@ TclAppendUnicodeToObj(
}
}
-#if TCL_UTF_MAX > 3 && !defined(TCL_NO_DEPRECATED)
+#if !defined(TCL_NO_DEPRECATED)
void
Tcl_AppendUnicodeToObj(
Tcl_Obj *objPtr, /* Points to the object to append to. */
@@ -1837,7 +1783,7 @@ Tcl_AppendObjToObj(
* force objPtr to unicode representation. See [7f1162a867]
* This fixes append-3.4, append-3.7 and utf-1.18 testcases. */
if (ISCONTINUATION(TclGetString(appendObjPtr))) {
- TclGetUnicodeFromObj_(objPtr, NULL);
+ TclGetUnicodeFromObj(objPtr, NULL);
stringPtr = GET_UNICHAR_STRING(objPtr);
}
/*
@@ -1852,7 +1798,7 @@ Tcl_AppendObjToObj(
if (TclHasInternalRep(appendObjPtr, &tclUniCharStringType)) {
Tcl_UniChar *unicode =
- TclGetUnicodeFromObj_(appendObjPtr, &numChars);
+ TclGetUnicodeFromObj(appendObjPtr, &numChars);
AppendUnicodeToUnicodeRep(objPtr, unicode, numChars);
} else {
@@ -3368,7 +3314,7 @@ TclStringRepeat(
Tcl_GetByteArrayFromObj(objPtr, &length);
} else if (unichar) {
/* Result will be pure Tcl_UniChar array. Pre-size it. */
- TclGetUnicodeFromObj_(objPtr, &length);
+ TclGetUnicodeFromObj(objPtr, &length);
} else {
/* Result will be concat of string reps. Pre-size it. */
TclGetStringFromObj(objPtr, &length);
@@ -3408,7 +3354,7 @@ TclStringRepeat(
*/
if (!inPlace || Tcl_IsShared(objPtr)) {
- objResultPtr = TclNewUnicodeObj(TclGetUnicodeFromObj_(objPtr, NULL), length);
+ objResultPtr = TclNewUnicodeObj(TclGetUnicodeFromObj(objPtr, NULL), length);
} else {
TclInvalidateStringRep(objPtr);
objResultPtr = objPtr;
@@ -3429,7 +3375,7 @@ TclStringRepeat(
Tcl_AppendObjToObj(objResultPtr, objResultPtr);
done *= 2;
}
- TclAppendUnicodeToObj(objResultPtr, TclGetUnicodeFromObj_(objResultPtr, NULL),
+ TclAppendUnicodeToObj(objResultPtr, TclGetUnicodeFromObj(objResultPtr, NULL),
(count - done) * length);
} else {
/*
@@ -3596,7 +3542,7 @@ TclStringCat(
if ((objPtr->bytes == NULL) || (objPtr->length)) {
Tcl_Size numChars;
- TclGetUnicodeFromObj_(objPtr, &numChars); /* PANIC? */
+ TclGetUnicodeFromObj(objPtr, &numChars); /* PANIC? */
if (numChars) {
last = objc - oc;
if (length == 0) {
@@ -3746,7 +3692,7 @@ TclStringCat(
objResultPtr = *objv++; objc--;
/* Ugly interface! Force resize of the unicode array. */
- TclGetUnicodeFromObj_(objResultPtr, &start);
+ TclGetUnicodeFromObj(objResultPtr, &start);
Tcl_InvalidateStringRep(objResultPtr);
if (0 == Tcl_AttemptSetObjLength(objResultPtr, length)) {
if (interp) {
@@ -3758,7 +3704,7 @@ TclStringCat(
}
return NULL;
}
- dst = TclGetUnicodeFromObj_(objResultPtr, NULL) + start;
+ dst = TclGetUnicodeFromObj(objResultPtr, NULL) + start;
} else {
Tcl_UniChar ch = 0;
@@ -3775,14 +3721,14 @@ TclStringCat(
}
return NULL;
}
- dst = TclGetUnicodeFromObj_(objResultPtr, NULL);
+ dst = TclGetUnicodeFromObj(objResultPtr, NULL);
}
while (objc--) {
Tcl_Obj *objPtr = *objv++;
if ((objPtr->bytes == NULL) || (objPtr->length)) {
Tcl_Size more;
- Tcl_UniChar *src = TclGetUnicodeFromObj_(objPtr, &more);
+ Tcl_UniChar *src = TclGetUnicodeFromObj(objPtr, &more);
memcpy(dst, src, more * sizeof(Tcl_UniChar));
dst += more;
}
@@ -3908,8 +3854,8 @@ TclStringCmp(
*/
if (nocase) {
- s1 = (char *) TclGetUnicodeFromObj_(value1Ptr, &s1len);
- s2 = (char *) TclGetUnicodeFromObj_(value2Ptr, &s2len);
+ s1 = (char *) TclGetUnicodeFromObj(value1Ptr, &s1len);
+ s2 = (char *) TclGetUnicodeFromObj(value2Ptr, &s2len);
memCmpFn = (memCmpFn_t)(void *)TclUniCharNcasecmp;
} else {
s1len = TclGetCharLength(value1Ptr);
@@ -3925,10 +3871,10 @@ TclStringCmp(
s2 = value2Ptr->bytes;
memCmpFn = memcmp;
} else {
- s1 = (char *) TclGetUnicodeFromObj_(value1Ptr, NULL);
- s2 = (char *) TclGetUnicodeFromObj_(value2Ptr, NULL);
+ s1 = (char *) TclGetUnicodeFromObj(value1Ptr, NULL);
+ s2 = (char *) TclGetUnicodeFromObj(value2Ptr, NULL);
if (
-#if defined(WORDS_BIGENDIAN) && (TCL_UTF_MAX > 3)
+#if defined(WORDS_BIGENDIAN)
1
#else
checkEq
@@ -4001,8 +3947,8 @@ TclStringCmp(
if ((reqlength < 0) && !nocase) {
memCmpFn = (memCmpFn_t)(void *)TclpUtfNcmp2;
} else {
- s1len = Tcl_NumUtfChars(s1, s1len);
- s2len = Tcl_NumUtfChars(s2, s2len);
+ s1len = TclNumUtfChars(s1, s1len);
+ s2len = TclNumUtfChars(s2, s2len);
memCmpFn = (memCmpFn_t)(void *)
(nocase ? Tcl_UtfNcasecmp : Tcl_UtfNcmp);
}
@@ -4131,8 +4077,8 @@ TclStringFirst(
* do only the well-defined Tcl_UniChar array search.
*/
- un = TclGetUnicodeFromObj_(needle, &ln);
- uh = TclGetUnicodeFromObj_(haystack, &lh);
+ un = TclGetUnicodeFromObj(needle, &ln);
+ uh = TclGetUnicodeFromObj(haystack, &lh);
if ((lh < ln) || (start > lh - ln)) {
/* Don't start the loop if there cannot be a valid answer */
goto firstEnd;
@@ -4214,8 +4160,8 @@ TclStringLast(
goto lastEnd;
}
- uh = TclGetUnicodeFromObj_(haystack, &lh);
- un = TclGetUnicodeFromObj_(needle, &ln);
+ uh = TclGetUnicodeFromObj(haystack, &lh);
+ un = TclGetUnicodeFromObj(needle, &ln);
if (last >= lh) {
last = lh - 1;
@@ -4288,9 +4234,6 @@ TclStringReverse(
UniCharString *stringPtr;
Tcl_UniChar ch = 0;
int inPlace = flags & TCL_STRING_IN_PLACE;
-#if TCL_UTF_MAX < 4
- int needFlip = 0;
-#endif
if (TclIsPureByteArray(objPtr)) {
Tcl_Size numBytes;
@@ -4307,7 +4250,7 @@ TclStringReverse(
stringPtr = GET_UNICHAR_STRING(objPtr);
if (stringPtr->hasUnicode) {
- Tcl_UniChar *from = TclGetUnicodeFromObj_(objPtr, NULL);
+ Tcl_UniChar *from = TclGetUnicodeFromObj(objPtr, NULL);
stringPtr = GET_UNICHAR_STRING(objPtr);
Tcl_UniChar *src = from + stringPtr->numChars;
Tcl_UniChar *to;
@@ -4320,57 +4263,22 @@ TclStringReverse(
objPtr = TclNewUnicodeObj(&ch, 1);
Tcl_SetObjLength(objPtr, stringPtr->numChars);
- to = TclGetUnicodeFromObj_(objPtr, NULL);
+ to = TclGetUnicodeFromObj(objPtr, NULL);
stringPtr = GET_UNICHAR_STRING(objPtr);
while (--src >= from) {
-#if TCL_UTF_MAX < 4
- ch = *src;
- if (SURROGATE(ch)) {
- needFlip = 1;
- }
- *to++ = ch;
-#else
*to++ = *src;
-#endif
}
} else {
/*
* Reversing in place.
*/
-#if TCL_UTF_MAX < 4
- to = src;
-#endif
while (--src > from) {
ch = *src;
-#if TCL_UTF_MAX < 4
- if (SURROGATE(ch)) {
- needFlip = 1;
- }
-#endif
*src = *from;
*from++ = ch;
}
}
-#if TCL_UTF_MAX < 4
- if (needFlip) {
- /*
- * Flip back surrogate pairs.
- */
-
- from = to - stringPtr->numChars;
- while (--to >= from) {
- ch = *to;
- if (HIGH_SURROGATE(ch)) {
- if ((to-1 >= from) && LOW_SURROGATE(to[-1])) {
- to[0] = to[-1];
- to[-1] = ch;
- --to;
- }
- }
- }
- }
-#endif
}
if (objPtr->bytes) {
@@ -4404,7 +4312,7 @@ TclStringReverse(
* skip calling Tcl_UtfCharComplete() here.
*/
- int bytesInChar = TclUtfToUCS4(from, &chw);
+ int bytesInChar = Tcl_UtfToUniChar(from, &chw);
ReverseBytes((unsigned char *)to, (unsigned char *)from,
bytesInChar);
@@ -4549,7 +4457,7 @@ TclStringReplace(
/* The traditional implementation... */
{
Tcl_Size numChars;
- Tcl_UniChar *ustring = TclGetUnicodeFromObj_(objPtr, &numChars);
+ Tcl_UniChar *ustring = TclGetUnicodeFromObj(objPtr, &numChars);
/* TODO: Is there an in-place option worth pursuing here? */
@@ -4628,14 +4536,12 @@ ExtendUnicodeRepWithString(
dst = stringPtr->unicode + numOrigChars;
if (numAppendChars-- > 0) {
bytes += TclUtfToUniChar(bytes, &unichar);
-#if TCL_UTF_MAX > 3
/* join upper/lower surrogate */
if (bytes && (stringPtr->unicode[numOrigChars - 1] | 0x3FF) == 0xDBFF && (unichar | 0x3FF) == 0xDFFF) {
stringPtr->numChars--;
unichar = ((stringPtr->unicode[numOrigChars - 1] & 0x3FF) << 10) + (unichar & 0x3FF) + 0x10000;
dst--;
}
-#endif
*dst++ = unichar;
while (numAppendChars-- > 0) {
bytes += TclUtfToUniChar(bytes, &unichar);
diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c
index 470290c..059ae76 100644
--- a/generic/tclStubInit.c
+++ b/generic/tclStubInit.c
@@ -50,8 +50,6 @@
#undef Tcl_UniCharCaseMatch
#undef Tcl_UniCharLen
#undef Tcl_UniCharNcmp
-#undef Tcl_GetRange
-#undef Tcl_GetUniChar
#undef Tcl_DumpActiveMemory
#undef Tcl_ValidateAllMemory
#undef Tcl_FindHashEntry
@@ -80,11 +78,6 @@
#undef Tcl_MacOSXOpenBundleResources
#undef TclWinConvertWSAError
#undef TclWinConvertError
-#undef Tcl_NumUtfChars
-#undef Tcl_GetCharLength
-#undef Tcl_UtfAtIndex
-#undef Tcl_GetRange
-#undef Tcl_GetUniChar
#undef TclObjInterpProc
#if defined(_WIN32) || defined(__CYGWIN__)
@@ -93,7 +86,7 @@
#endif
-#if TCL_UTF_MAX > 3 && defined(TCL_NO_DEPRECATED)
+#if defined(TCL_NO_DEPRECATED)
static void uniCodePanic(void) {
Tcl_Panic("Tcl is compiled without the the UTF16 compatibility layer (-DTCL_NO_DEPRECATED)");
}
@@ -592,16 +585,6 @@ static int exprIntObj(Tcl_Interp *interp, Tcl_Obj*expr, int *ptr){
return result;
}
#define Tcl_ExprLongObj (int(*)(Tcl_Interp*,Tcl_Obj*,long*))exprIntObj
-#if TCL_UTF_MAX < 4 && !defined(TCL_NO_DEPRECATED)
-static int uniCharNcmp(const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned int n){
- return Tcl_UniCharNcmp(ucs, uct, (unsigned long)n);
-}
-#define Tcl_UniCharNcmp (int(*)(const Tcl_UniChar*,const Tcl_UniChar*,unsigned long))(void *)uniCharNcmp
-static int uniCharNcasecmp(const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned int n){
- return Tcl_UniCharNcasecmp(ucs, uct, (unsigned long)n);
-}
-#define Tcl_UniCharNcasecmp (int(*)(const Tcl_UniChar*,const Tcl_UniChar*,unsigned long))(void *)uniCharNcasecmp
-#endif
static int utfNcmp(const char *s1, const char *s2, unsigned int n){
return Tcl_UtfNcmp(s1, s2, (unsigned long)n);
}
@@ -702,13 +685,6 @@ static int utfNcasecmp(const char *s1, const char *s2, unsigned int n){
# define Tcl_SetExitProc 0
# define Tcl_SetPanicProc 0
# define Tcl_FindExecutable 0
-#if TCL_UTF_MAX < 4
-# define Tcl_GetUnicode 0
-# define Tcl_AppendUnicodeToObj 0
-# define Tcl_UniCharCaseMatch 0
-# define Tcl_UniCharNcasecmp 0
-# define Tcl_UniCharNcmp 0
-#endif
# undef Tcl_StringMatch
# define Tcl_StringMatch 0
# define TclBN_reverse 0
diff --git a/generic/tclTest.c b/generic/tclTest.c
index c57dfa0..47b59c3 100644
--- a/generic/tclTest.c
+++ b/generic/tclTest.c
@@ -542,9 +542,6 @@ static const char version[] = TCL_PATCH_LEVEL "+" STRINGIFY(TCL_VERSION_UUID)
#ifdef STATIC_BUILD
".static"
#endif
-#if TCL_UTF_MAX < 4
- ".utf-16"
-#endif
;
int
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 6e14689..1ac7475 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -408,7 +408,7 @@ Tcl_Char16ToUtfDString(
* Tcl_UtfCharComplete() before calling this routine to ensure that
* enough bytes remain in the string.
*
- * If TCL_UTF_MAX <= 4, special handling of Surrogate pairs is done:
+ * Special handling of Surrogate pairs is done:
* For any UTF-8 string containing a character outside of the BMP, the
* first call to this function will fill *chPtr with the high surrogate
* and generate a return value of 1. Calling Tcl_UtfToUniChar again
@@ -676,11 +676,11 @@ Tcl_UtfToUniCharDString(
endPtr = src + length;
optPtr = endPtr - 4;
while (p <= optPtr) {
- p += TclUtfToUCS4(p, &ch);
+ p += Tcl_UtfToUniChar(p, &ch);
*w++ = ch;
}
while ((p < endPtr) && Tcl_UtfCharComplete(p, endPtr-p)) {
- p += TclUtfToUCS4(p, &ch);
+ p += Tcl_UtfToUniChar(p, &ch);
*w++ = ch;
}
while (p < endPtr) {
@@ -849,8 +849,7 @@ TclNumUtfChars(
return i;
}
-#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED)
-#undef Tcl_NumUtfChars
+#if !defined(TCL_NO_DEPRECATED)
int
Tcl_NumUtfChars(
const char *src, /* The UTF-8 string to measure. */
@@ -929,7 +928,7 @@ Tcl_UtfFindFirst(
int ch) /* The Unicode character to search for. */
{
while (1) {
- int find, len = TclUtfToUCS4(src, &find);
+ int find, len = Tcl_UtfToUniChar(src, &find);
if (find == ch) {
return src;
@@ -968,7 +967,7 @@ Tcl_UtfFindLast(
const char *last = NULL;
while (1) {
- int find, len = TclUtfToUCS4(src, &find);
+ int find, len = Tcl_UtfToUniChar(src, &find);
if (find == ch) {
last = src;
@@ -1195,7 +1194,7 @@ Tcl_UniCharAtIndex(
/* Index points at character following high Surrogate */
return -1;
}
- TclUtfToUCS4(src, &i);
+ Tcl_UtfToUniChar(src, &i);
return i;
}
@@ -1205,9 +1204,7 @@ Tcl_UniCharAtIndex(
* Tcl_UtfAtIndex --
*
* Returns a pointer to the specified character (not byte) position in
- * the UTF-8 string. If TCL_UTF_MAX < 4, characters > U+FFFF count as
- * 2 positions, but then the pointer should never be placed between
- * the two positions.
+ * the UTF-8 string.
*
* Results:
* As above.
@@ -1218,11 +1215,6 @@ Tcl_UniCharAtIndex(
*---------------------------------------------------------------------------
*/
-#if TCL_UTF_MAX < 4
-# undef Tcl_UtfToUniChar
-# define Tcl_UtfToUniChar Tcl_UtfToChar16
-#endif
-
const char *
TclUtfAtIndex(
const char *src, /* The UTF-8 string. */
@@ -1235,17 +1227,10 @@ TclUtfAtIndex(
len = (Tcl_UtfToUniChar)(src, &ch);
src += len;
}
-#if TCL_UTF_MAX < 4
- if ((ch >= 0xD800) && (len < 3)) {
- /* Index points at character following high Surrogate */
- src += (Tcl_UtfToUniChar)(src, &ch);
- }
-#endif
return src;
}
-#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED)
-#undef Tcl_UtfAtIndex
+#if !defined(TCL_NO_DEPRECATED)
const char *
Tcl_UtfAtIndex(
const char *src, /* The UTF-8 string. */
@@ -1353,7 +1338,7 @@ Tcl_UtfToUpper(
src = dst = str;
while (*src) {
- len = TclUtfToUCS4(src, &ch);
+ len = Tcl_UtfToUniChar(src, &ch);
upChar = Tcl_UniCharToUpper(ch);
/*
@@ -1406,7 +1391,7 @@ Tcl_UtfToLower(
src = dst = str;
while (*src) {
- len = TclUtfToUCS4(src, &ch);
+ len = Tcl_UtfToUniChar(src, &ch);
lowChar = Tcl_UniCharToLower(ch);
/*
@@ -1462,7 +1447,7 @@ Tcl_UtfToTitle(
src = dst = str;
if (*src) {
- len = TclUtfToUCS4(src, &ch);
+ len = Tcl_UtfToUniChar(src, &ch);
titleChar = Tcl_UniCharToTitle(ch);
if ((len < TclUtfCount(titleChar)) || ((titleChar & ~0x7FF) == 0xD800)) {
@@ -1474,7 +1459,7 @@ Tcl_UtfToTitle(
src += len;
}
while (*src) {
- len = TclUtfToUCS4(src, &ch);
+ len = Tcl_UtfToUniChar(src, &ch);
lowChar = ch;
/* Special exception for Georgian Asomtavruli chars, no titlecase. */
if ((unsigned)(lowChar - 0x1C90) >= 0x30) {
@@ -1581,16 +1566,6 @@ Tcl_UtfNcmp(
cs += TclUtfToUniChar(cs, &ch1);
ct += TclUtfToUniChar(ct, &ch2);
if (ch1 != ch2) {
-#if TCL_UTF_MAX < 4
- /* Surrogates always report higher than non-surrogates */
- if (((ch1 & 0xFC00) == 0xD800)) {
- if ((ch2 & 0xFC00) != 0xD800) {
- return ch1;
- }
- } else if ((ch2 & 0xFC00) == 0xD800) {
- return -ch2;
- }
-#endif
return (ch1 - ch2);
}
}
@@ -1632,16 +1607,6 @@ Tcl_UtfNcasecmp(
cs += TclUtfToUniChar(cs, &ch1);
ct += TclUtfToUniChar(ct, &ch2);
if (ch1 != ch2) {
-#if TCL_UTF_MAX < 4
- /* Surrogates always report higher than non-surrogates */
- if (((ch1 & 0xFC00) == 0xD800)) {
- if ((ch2 & 0xFC00) != 0xD800) {
- return ch1;
- }
- } else if ((ch2 & 0xFC00) == 0xD800) {
- return -ch2;
- }
-#endif
ch1 = Tcl_UniCharToLower(ch1);
ch2 = Tcl_UniCharToLower(ch2);
if (ch1 != ch2) {
@@ -1681,16 +1646,6 @@ TclUtfCmp(
cs += TclUtfToUniChar(cs, &ch1);
ct += TclUtfToUniChar(ct, &ch2);
if (ch1 != ch2) {
-#if TCL_UTF_MAX < 4
- /* Surrogates always report higher than non-surrogates */
- if (((ch1 & 0xFC00) == 0xD800)) {
- if ((ch2 & 0xFC00) != 0xD800) {
- return ch1;
- }
- } else if ((ch2 & 0xFC00) == 0xD800) {
- return -ch2;
- }
-#endif
return ch1 - ch2;
}
}
@@ -1727,16 +1682,6 @@ TclUtfCasecmp(
cs += TclUtfToUniChar(cs, &ch1);
ct += TclUtfToUniChar(ct, &ch2);
if (ch1 != ch2) {
-#if TCL_UTF_MAX < 4
- /* Surrogates always report higher than non-surrogates */
- if (((ch1 & 0xFC00) == 0xD800)) {
- if ((ch2 & 0xFC00) != 0xD800) {
- return ch1;
- }
- } else if ((ch2 & 0xFC00) == 0xD800) {
- return -ch2;
- }
-#endif
ch1 = Tcl_UniCharToLower(ch1);
ch2 = Tcl_UniCharToLower(ch2);
if (ch1 != ch2) {
@@ -1935,7 +1880,7 @@ TclUniCharNcmp(
const Tcl_UniChar *uct, /* Unicode string ucs is compared to. */
unsigned long numChars) /* Number of unichars to compare. */
{
-#if defined(WORDS_BIGENDIAN) && (TCL_UTF_MAX > 3)
+#if defined(WORDS_BIGENDIAN)
/*
* We are definitely on a big-endian machine; memcmp() is safe
*/
@@ -1956,14 +1901,14 @@ TclUniCharNcmp(
#endif /* WORDS_BIGENDIAN */
}
-#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED)
+#if !defined(TCL_NO_DEPRECATED)
int
Tcl_UniCharNcmp(
const unsigned short *ucs, /* Unicode string to compare to uct. */
const unsigned short *uct, /* Unicode string ucs is compared to. */
unsigned long numChars) /* Number of unichars to compare. */
{
-#if defined(WORDS_BIGENDIAN) && (TCL_UTF_MAX > 3)
+#if defined(WORDS_BIGENDIAN)
/*
* We are definitely on a big-endian machine; memcmp() is safe
*/
@@ -2027,7 +1972,7 @@ TclUniCharNcasecmp(
return 0;
}
-#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED)
+#if !defined(TCL_NO_DEPRECATED)
int
Tcl_UniCharNcasecmp(
const unsigned short *ucs, /* Unicode string to compare to uct. */
@@ -2583,7 +2528,7 @@ TclUniCharCaseMatch(
}
}
-#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED)
+#if !defined(TCL_NO_DEPRECATED)
int
Tcl_UniCharCaseMatch(
const unsigned short *uniStr, /* Unicode String. */
@@ -2945,71 +2890,6 @@ TclUniCharMatch(
}
/*
- *---------------------------------------------------------------------------
- *
- * TclUtfToUCS4 --
- *
- * Extracts the 4-byte codepoint from the leading bytes of the
- * Modified UTF-8 string "src". This is a utility routine to
- * contain the surrogate gymnastics in one place.
- *
- * The caller must ensure that the source buffer is long enough that this
- * routine does not run off the end and dereference non-existent memory
- * looking for trail bytes. If the source buffer is known to be '\0'
- * terminated, this cannot happen. Otherwise, the caller should call
- * Tcl_UtfCharComplete() before calling this routine to ensure that
- * enough bytes remain in the string.
- *
- * Results:
- * Fills *usc4Ptr with the UCS4 code point and returns the number of bytes
- * consumed from the source string.
- *
- * Side effects:
- * None.
- *
- *---------------------------------------------------------------------------
- */
-
-#if TCL_UTF_MAX < 4
-int
-TclUtfToUCS4(
- const char *src, /* The UTF-8 string. */
- int *ucs4Ptr) /* Filled with the UCS4 codepoint represented
- * by the UTF-8 string. */
-{
-# undef Tcl_UtfToUniChar
- return Tcl_UtfToUniChar(src, ucs4Ptr);
-}
-
-int
-TclUniCharToUCS4(
- const Tcl_UniChar *src, /* The Tcl_UniChar string. */
- int *ucs4Ptr) /* Filled with the UCS4 codepoint represented
- * by the Tcl_UniChar string. */
-{
- if (((src[0] & 0xFC00) == 0xD800) && ((src[1] & 0xFC00) == 0xDC00)) {
- *ucs4Ptr = (((src[0] & 0x3FF) << 10) | (src[1] & 0x3FF)) + 0x10000;
- return 2;
- }
- *ucs4Ptr = src[0];
- return 1;
-}
-
-const Tcl_UniChar *TclUCS4Prev(const Tcl_UniChar *src, const Tcl_UniChar *ptr) {
- if (src <= ptr + 1) {
- return ptr;
- }
- if (((src[-1] & 0xFC00) == 0xDC00) && ((src[-2] & 0xFC00) == 0xD800)) {
- return src - 2;
- }
- return src - 1;
-}
-
-
-
-#endif
-
-/*
* Local Variables:
* mode: c
* c-basic-offset: 4
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index f9f6ae0..3ab741a 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -1717,7 +1717,7 @@ TclTrimRight(
pp = Tcl_UtfPrev(p, bytes);
do {
pp += pInc;
- pInc = TclUtfToUCS4(pp, &ch1);
+ pInc = Tcl_UtfToUniChar(pp, &ch1);
} while (pp + pInc < p);
/*
@@ -1725,7 +1725,7 @@ TclTrimRight(
*/
do {
- pInc = TclUtfToUCS4(q, &ch2);
+ pInc = Tcl_UtfToUniChar(q, &ch2);
if (ch1 == ch2) {
break;
@@ -1790,7 +1790,7 @@ TclTrimLeft(
*/
do {
- Tcl_Size pInc = TclUtfToUCS4(p, &ch1);
+ Tcl_Size pInc = Tcl_UtfToUniChar(p, &ch1);
const char *q = trim;
Tcl_Size bytesLeft = numTrim;
@@ -1799,7 +1799,7 @@ TclTrimLeft(
*/
do {
- Tcl_Size qInc = TclUtfToUCS4(q, &ch2);
+ Tcl_Size qInc = Tcl_UtfToUniChar(q, &ch2);
if (ch1 == ch2) {
break;
@@ -1866,7 +1866,7 @@ TclTrim(
if (numBytes > 0) {
int ch;
const char *first = bytes + trimLeft;
- bytes += TclUtfToUCS4(first, &ch);
+ bytes += Tcl_UtfToUniChar(first, &ch);
numBytes -= (bytes - first);
if (numBytes > 0) {
@@ -2221,7 +2221,7 @@ Tcl_StringCaseMatch(
ch2 = (int)
(nocase ? tolower(UCHAR(*pattern)) : UCHAR(*pattern));
} else {
- TclUtfToUCS4(pattern, &ch2);
+ Tcl_UtfToUniChar(pattern, &ch2);
if (nocase) {
ch2 = Tcl_UniCharToLower(ch2);
}
@@ -2237,7 +2237,7 @@ Tcl_StringCaseMatch(
if ((p != '[') && (p != '?') && (p != '\\')) {
if (nocase) {
while (*str) {
- charLen = TclUtfToUCS4(str, &ch1);
+ charLen = Tcl_UtfToUniChar(str, &ch1);
if (ch2==ch1 || ch2==Tcl_UniCharToLower(ch1)) {
break;
}
@@ -2251,7 +2251,7 @@ Tcl_StringCaseMatch(
*/
while (*str) {
- charLen = TclUtfToUCS4(str, &ch1);
+ charLen = Tcl_UtfToUniChar(str, &ch1);
if (ch2 == ch1) {
break;
}
@@ -2265,7 +2265,7 @@ Tcl_StringCaseMatch(
if (*str == '\0') {
return 0;
}
- str += TclUtfToUCS4(str, &ch1);
+ str += Tcl_UtfToUniChar(str, &ch1);
}
}
@@ -2276,7 +2276,7 @@ Tcl_StringCaseMatch(
if (p == '?') {
pattern++;
- str += TclUtfToUCS4(str, &ch1);
+ str += Tcl_UtfToUniChar(str, &ch1);
continue;
}
@@ -2295,7 +2295,7 @@ Tcl_StringCaseMatch(
(nocase ? tolower(UCHAR(*str)) : UCHAR(*str));
str++;
} else {
- str += TclUtfToUCS4(str, &ch1);
+ str += Tcl_UtfToUniChar(str, &ch1);
if (nocase) {
ch1 = Tcl_UniCharToLower(ch1);
}
@@ -2309,7 +2309,7 @@ Tcl_StringCaseMatch(
? tolower(UCHAR(*pattern)) : UCHAR(*pattern));
pattern++;
} else {
- pattern += TclUtfToUCS4(pattern, &startChar);
+ pattern += Tcl_UtfToUniChar(pattern, &startChar);
if (nocase) {
startChar = Tcl_UniCharToLower(startChar);
}
@@ -2324,7 +2324,7 @@ Tcl_StringCaseMatch(
? tolower(UCHAR(*pattern)) : UCHAR(*pattern));
pattern++;
} else {
- pattern += TclUtfToUCS4(pattern, &endChar);
+ pattern += Tcl_UtfToUniChar(pattern, &endChar);
if (nocase) {
endChar = Tcl_UniCharToLower(endChar);
}
@@ -2372,8 +2372,8 @@ Tcl_StringCaseMatch(
* each string match.
*/
- str += TclUtfToUCS4(str, &ch1);
- pattern += TclUtfToUCS4(pattern, &ch2);
+ str += Tcl_UtfToUniChar(str, &ch1);
+ pattern += Tcl_UtfToUniChar(pattern, &ch2);
if (nocase) {
if (Tcl_UniCharToLower(ch1) != Tcl_UniCharToLower(ch2)) {
return 0;
@@ -2601,8 +2601,8 @@ TclStringMatchObj(
if (TclHasInternalRep(strObj, &tclUniCharStringType) || (strObj->typePtr == NULL)) {
Tcl_UniChar *udata, *uptn;
- udata = TclGetUnicodeFromObj_(strObj, &length);
- uptn = TclGetUnicodeFromObj_(ptnObj, &plen);
+ udata = TclGetUnicodeFromObj(strObj, &length);
+ uptn = TclGetUnicodeFromObj(ptnObj, &plen);
match = TclUniCharMatch(udata, length, uptn, plen, flags);
} else if (TclIsPureByteArray(strObj) && TclIsPureByteArray(ptnObj)
&& !flags) {
diff --git a/tests/utf.test b/tests/utf.test
index edd0c79..0b639d8 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -18,17 +18,6 @@ catch [list package require -exact tcl::test [info patchlevel]]
source [file join [file dirname [info script]] tcltests.tcl]
-testConstraint ucs2 [expr {[format %c 0x010000] eq "\uFFFD"}]
-testConstraint fullutf [expr {[format %c 0x010000] ne "\uFFFD"}]
-testConstraint utf16 [expr {[string length [format %c 0x10000]] == 2}]
-testConstraint utf32 [expr {[testConstraint fullutf]
- && [string length [format %c 0x10000]] == 1}]
-
-testConstraint Uesc [expr {"\U0041" eq "A"}]
-testConstraint pre388 [expr {"\x741" eq "A"}]
-testConstraint pairsTo4bytes [expr {[llength [info commands teststringbytes]]
- && [string length [teststringbytes \uD83D\uDCA9]] == 4}]
-
testConstraint testbytestring [llength [info commands testbytestring]]
testConstraint testfindfirst [llength [info commands testfindfirst]]
testConstraint testfindlast [llength [info commands testfindlast]]
@@ -60,12 +49,9 @@ test utf-1.5 {Tcl_UniCharToUtf: overflowed Tcl_UniChar} testbytestring {
test utf-1.6 {Tcl_UniCharToUtf: negative Tcl_UniChar} testbytestring {
expr {[format %c -1] eq [testbytestring \xEF\xBF\xBD]}
} 1
-test utf-1.7.0 {Tcl_UniCharToUtf: 4 byte sequences} {fullutf testbytestring} {
+test utf-1.7.0 {Tcl_UniCharToUtf: 4 byte sequences} {testbytestring} {
expr {"\U014E4E" eq [testbytestring \xF0\x94\xB9\x8E]}
} 1
-test utf-1.7.1 {Tcl_UniCharToUtf: 4 byte sequences} {Uesc ucs2 testbytestring} {
- expr {"\U014E4E" eq [testbytestring \xF0\x94\xB9\x8E]}
-} 0
test utf-1.8 {Tcl_UniCharToUtf: 3 byte sequence, high surrogate} testbytestring {
expr {"\uD842" eq [testbytestring \xED\xA1\x82]}
} 1
@@ -78,13 +64,10 @@ test utf-1.10 {Tcl_UniCharToUtf: 3 byte sequence, high surrogate} testbytestring
test utf-1.11 {Tcl_UniCharToUtf: 3 byte sequence, low surrogate} testbytestring {
expr {[format %c 0xDC42] eq [testbytestring \xED\xB1\x82]}
} 1
-test utf-1.12 {Tcl_UniCharToUtf: 4 byte sequence, high/low surrogate} {pairsTo4bytes testbytestring} {
+test utf-1.12 {Tcl_UniCharToUtf: 4 byte sequence, high/low surrogate} {testbytestring} {
expr {"\uD842\uDC42" eq [testbytestring \xF0\xA0\xA1\x82]}
} 1
-test utf-1.13.0 {Tcl_UniCharToUtf: Invalid surrogate} {Uesc ucs2} {
- expr {"\UD842" eq "\uD842"}
-} 1
-test utf-1.13.1 {Tcl_UniCharToUtf: Invalid surrogate} {fullutf testbytestring} {
+test utf-1.13 {Tcl_UniCharToUtf: Invalid surrogate} {testbytestring} {
expr {"\UD842" eq [testbytestring \xEF\xBF\xBD]}
} 1
test utf-1.14 {Tcl_UniCharToUtf: surrogate pairs from concat} {
@@ -128,22 +111,10 @@ test utf-2.6 {Tcl_UtfToUniChar: lead (3-byte) followed by 1 trail} testbytestrin
test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} testbytestring {
string length [testbytestring \xE4\xB9\x8E]
} 1
-test utf-2.8.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {ucs2 testbytestring} {
- string length [testbytestring \xF0\x90\x80\x80]
-} 2
-test utf-2.8.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} utf16 {
- string length 𐀀
-} 2
-test utf-2.8.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} utf32 {
+test utf-2.8 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {
string length 𐀀
} 1
-test utf-2.9.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {ucs2 testbytestring} {
- string length [testbytestring \xF4\x8F\xBF\xBF]
-} 2
-test utf-2.9.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} utf16 {
- string length \U10FFFF
-} 2
-test utf-2.9.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} utf32 {
+test utf-2.9 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {
string length \U10FFFF
} 1
test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} testbytestring {
@@ -243,10 +214,7 @@ test utf-6.9 {Tcl_UtfNext} {testutfnext testbytestring} {
test utf-6.10 {Tcl_UtfNext} {testutfnext testbytestring} {
testutfnext [testbytestring \xA0]G
} 1
-test utf-6.11.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
- testutfnext [testbytestring \xA0\xA0\x00]
-} 1
-test utf-6.11.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
+test utf-6.11 {Tcl_UtfNext} {testutfnext testbytestring} {
testutfnext [testbytestring \xA0\xA0\x00]
} 2
test utf-6.12 {Tcl_UtfNext} {testutfnext testbytestring} {
@@ -303,19 +271,13 @@ test utf-6.28 {Tcl_UtfNext} {testutfnext testbytestring} {
test utf-6.29 {Tcl_UtfNext} {testutfnext testbytestring} {
testutfnext [testbytestring \xE8\xF8]
} 1
-test utf-6.30.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
- testutfnext [testbytestring \xF2]
-} 1
-test utf-6.30.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
+test utf-6.30 {Tcl_UtfNext} {testutfnext testbytestring} {
testutfnext [testbytestring \xF2\x00]
} 1
test utf-6.31 {Tcl_UtfNext} {testutfnext testbytestring} {
testutfnext [testbytestring \xF2]G
} 1
-test utf-6.32.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
- testutfnext [testbytestring \xF2\xA0]
-} 1
-test utf-6.32.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
+test utf-6.32 {Tcl_UtfNext} {testutfnext testbytestring} {
testutfnext [testbytestring \xF2\xA0\x00]
} 1
test utf-6.33 {Tcl_UtfNext} {testutfnext testbytestring} {
@@ -426,10 +388,7 @@ test utf-6.67 {Tcl_UtfNext} {testutfnext testbytestring} {
test utf-6.68 {Tcl_UtfNext} {testutfnext testbytestring} {
testutfnext [testbytestring \xF2\xA0\xA0]G
} 1
-test utf-6.69.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
- testutfnext [testbytestring \xF2\xA0\xA0\xA0]
-} 1
-test utf-6.69.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
+test utf-6.69 {Tcl_UtfNext} {testutfnext testbytestring} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0]
} 4
test utf-6.70 {Tcl_UtfNext} {testutfnext testbytestring} {
@@ -444,40 +403,22 @@ test utf-6.72 {Tcl_UtfNext} {testutfnext testbytestring} {
test utf-6.73 {Tcl_UtfNext} {testutfnext testbytestring} {
testutfnext [testbytestring \xF2\xA0\xA0\xF8]
} 1
-test utf-6.74.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
- testutfnext [testbytestring \xF2\xA0\xA0\xA0]G
-} 1
-test utf-6.74.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
+test utf-6.74 {Tcl_UtfNext} {testutfnext testbytestring} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0]G
} 4
-test utf-6.75.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
- testutfnext [testbytestring \xF2\xA0\xA0\xA0\xA0]
-} 1
-test utf-6.75.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
+test utf-6.75 {Tcl_UtfNext} {testutfnext testbytestring} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0\xA0]
} 4
-test utf-6.76.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
- testutfnext [testbytestring \xF2\xA0\xA0\xA0\xD0]
-} 1
-test utf-6.76.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
+test utf-6.76 {Tcl_UtfNext} {testutfnext testbytestring} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0\xD0]
} 4
-test utf-6.77.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
- testutfnext [testbytestring \xF2\xA0\xA0\xA0\xE8]
-} 1
-test utf-6.77.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
+test utf-6.77 {Tcl_UtfNext} {testutfnext testbytestring} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0\xE8]
} 4
-test utf-6.78.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
- testutfnext [testbytestring \xF2\xA0\xA0\xA0\xF2]
-} 1
-test utf-6.78.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
+test utf-6.78 {Tcl_UtfNext} {testutfnext testbytestring} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0\xF2]
} 4
-test utf-6.79.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
- testutfnext [testbytestring \xF2\xA0\xA0\xA0G\xF8]
-} 1
-test utf-6.79.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
+test utf-6.79 {Tcl_UtfNext} {testutfnext testbytestring} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0G\xF8]
} 4
test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext {
@@ -501,55 +442,31 @@ test utf-6.85 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring} {
test utf-6.86 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring} {
testutfnext [testbytestring \xF0\x80\x80\x80]
} 1
-test utf-6.87.0 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring ucs2} {
- testutfnext [testbytestring \xF0\x90\x80\x80]
-} 1
-test utf-6.87.1 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring fullutf} {
+test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring} {
testutfnext [testbytestring \xF0\x90\x80\x80]
} 4
-test utf-6.88.0 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext testbytestring ucs2} {
- testutfnext [testbytestring \xA0\xA0\x00]
-} 1
-test utf-6.88.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext testbytestring fullutf} {
+test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext testbytestring} {
testutfnext [testbytestring \xA0\xA0\x00]
} 2
-test utf-6.89.0 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext testbytestring ucs2} {
- testutfnext [testbytestring \x80\x80\x00]
-} 1
-test utf-6.89.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext testbytestring fullutf} {
+test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext testbytestring} {
testutfnext [testbytestring \x80\x80\x00]
} 2
-test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbytestring ucs2} {
- testutfnext [testbytestring \xF4\x8F\xBF\xBF]
-} 1
-test utf-6.90.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbytestring fullutf} {
+test utf-6.90 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbytestring} {
testutfnext [testbytestring \xF4\x8F\xBF\xBF]
} 4
test utf-6.91 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbytestring} {
testutfnext [testbytestring \xF4\x90\x80\x80]
} 1
-test utf-6.92.0 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} {testutfnext testbytestring ucs2} {
- testutfnext [testbytestring \xA0\xA0\xA0]
-} 1
-test utf-6.92.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} {testutfnext testbytestring fullutf} {
+test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} {testutfnext testbytestring} {
testutfnext [testbytestring \xA0\xA0\xA0]
} 3
-test utf-6.93.0 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext testbytestring ucs2} {
- testutfnext [testbytestring \x80\x80\x80]
-} 1
-test utf-6.93.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext testbytestring fullutf} {
+test utf-6.93 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext testbytestring} {
testutfnext [testbytestring \x80\x80\x80]
} 3
-test utf-6.94.0 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring ucs2} {
- testutfnext [testbytestring \xA0\xA0\xA0\xA0]
-} 1
-test utf-6.94.1 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring fullutf} {
+test utf-6.94 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring} {
testutfnext [testbytestring \xA0\xA0\xA0\xA0]
} 3
-test utf-6.95.0 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring ucs2} {
- testutfnext [testbytestring \x80\x80\x80\x80]
-} 1
-test utf-6.95.1 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring fullutf} {
+test utf-6.95 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring} {
testutfnext [testbytestring \x80\x80\x80\x80]
} 3
@@ -616,22 +533,13 @@ test utf-7.9.1 {Tcl_UtfPrev} {testutfprev testbytestring} {
test utf-7.9.2 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF8\xA0\xF8\xA0] 3
} 2
-test utf-7.10.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} {
- testutfprev A[testbytestring \xF2\xA0]
-} 2
-test utf-7.10.1 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} {
+test utf-7.10.1 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF2\xA0]
} 1
-test utf-7.10.2 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} {
- testutfprev A[testbytestring \xF2\xA0\xA0\xA0] 3
-} 2
-test utf-7.10.3 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} {
+test utf-7.10.2 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF2\xA0\xA0\xA0] 3
} 1
-test utf-7.10.4 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} {
- testutfprev A[testbytestring \xF2\xA0\xF8\xA0] 3
-} 2
-test utf-7.10.5 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} {
+test utf-7.10.3 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF2\xA0\xF8\xA0] 3
} 1
test utf-7.11 {Tcl_UtfPrev} {testutfprev testbytestring} {
@@ -673,22 +581,13 @@ test utf-7.14.1 {Tcl_UtfPrev} {testutfprev testbytestring} {
test utf-7.14.2 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF8\xA0\xA0\xF8] 4
} 3
-test utf-7.15.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} {
- testutfprev A[testbytestring \xF2\xA0\xA0]
-} 3
-test utf-7.15.1 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} {
+test utf-7.15.1 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF2\xA0\xA0]
} 1
-test utf-7.15.2 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} {
- testutfprev A[testbytestring \xF2\xA0\xA0\xA0] 4
-} 3
-test utf-7.15.3 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} {
+test utf-7.15.3 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF2\xA0\xA0\xA0] 4
} 1
-test utf-7.15.4 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} {
- testutfprev A[testbytestring \xF2\xA0\xA0\xF8] 4
-} 3
-test utf-7.15.5 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} {
+test utf-7.15.5 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF2\xA0\xA0\xF8] 4
} 1
test utf-7.16 {Tcl_UtfPrev} testutfprev {
@@ -721,10 +620,7 @@ test utf-7.18.2 {Tcl_UtfPrev} {testutfprev testbytestring} {
test utf-7.19 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev [testbytestring A\xF8\xA0\xA0\xA0]
} 4
-test utf-7.20.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} {
- testutfprev [testbytestring A\xF2\xA0\xA0\xA0]
-} 4
-test utf-7.20.1 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} {
+test utf-7.20 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev [testbytestring A\xF2\xA0\xA0\xA0]
} 1
test utf-7.21 {Tcl_UtfPrev} {testutfprev testbytestring} {
@@ -787,22 +683,13 @@ test utf-7.37 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} {
test utf-7.38 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} {
testutfprev A[testbytestring \xE0\xA0\x80] 2
} 1
-test utf-7.39.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs2} {
- testutfprev A[testbytestring \xF0\x90\x80\x80]
-} 4
-test utf-7.39.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring fullutf} {
+test utf-7.39 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF0\x90\x80\x80]
} 1
-test utf-7.40.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs2} {
- testutfprev A[testbytestring \xF0\x90\x80\x80] 4
-} 3
-test utf-7.40.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring fullutf} {
+test utf-7.40 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF0\x90\x80\x80] 4
} 1
-test utf-7.41.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs2} {
- testutfprev A[testbytestring \xF0\x90\x80\x80] 3
-} 2
-test utf-7.41.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring fullutf} {
+test utf-7.41 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF0\x90\x80\x80] 3
} 1
test utf-7.42 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} {
@@ -829,28 +716,19 @@ test utf-7.47.1 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} tes
test utf-7.47.2 {Tcl_UtfPrev, pointing to 3th byte of 3-byte invalid sequence} {testutfprev testbytestring} {
testutfprev [testbytestring \xE8\xA0\x00] 2
} 0
-test utf-7.48.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs2} {
- testutfprev A[testbytestring \xF4\x8F\xBF\xBF]
-} 4
-test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring fullutf} {
+test utf-7.48 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF4\x8F\xBF\xBF]
} 1
-test utf-7.48.2 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs2} {
- testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 4
-} 3
-test utf-7.48.3 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring fullutf} {
+test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 4
} 1
-test utf-7.48.4 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs2} {
- testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 3
-} 2
-test utf-7.48.5 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring fullutf} {
+test utf-7.48.2 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 3
} 1
-test utf-7.48.6 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} {
+test utf-7.48.3 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 2
} 1
-test utf-7.49.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} {
+test utf-7.49 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF4\x90\x80\x80]
} 4
test utf-7.49.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} {
@@ -875,72 +753,30 @@ test utf-8.3 {Tcl_UniCharAtIndex: index > 0} {
test utf-8.4 {Tcl_UniCharAtIndex: index > 0} {
string index δΉŽΙšΓΏΥƒ 2
} ΓΏ
-test utf-8.5.0 {Tcl_UniCharAtIndex: high surrogate} ucs2 {
- string index \uD842 0
-} \uD842
-test utf-8.5.1 {Tcl_UniCharAtIndex: high surrogate} utf32 {
- string index \uD842 0
-} \uD842
-test utf-8.5.2 {Tcl_UniCharAtIndex: high surrogate} utf16 {
+test utf-8.5 {Tcl_UniCharAtIndex: high surrogate} {
string index \uD842 0
} \uD842
test utf-8.6 {Tcl_UniCharAtIndex: low surrogate} {
string index \uDC42 0
} \uDC42
-test utf-8.7.0 {Tcl_UniCharAtIndex: Emoji} ucs2 {
- string index \uD83D\uDE00G 0
-} \uD83D
-test utf-8.7.1 {Tcl_UniCharAtIndex: Emoji} utf32 {
- string index πŸ˜€G 0
-} πŸ˜€
-test utf-8.7.2 {Tcl_UniCharAtIndex: Emoji} utf16 {
+test utf-8.7 {Tcl_UniCharAtIndex: Emoji} {
string index πŸ˜€G 0
} πŸ˜€
-test utf-8.8.0 {Tcl_UniCharAtIndex: Emoji} ucs2 {
- string index \uD83D\uDE00G 1
-} \uDE00
-test utf-8.8.1 {Tcl_UniCharAtIndex: Emoji} utf32 {
+test utf-8.8 {Tcl_UniCharAtIndex: Emoji} {
string index πŸ˜€G 1
} G
-test utf-8.8.2 {Tcl_UniCharAtIndex: Emoji} utf16 {
- string index πŸ˜€G 1
-} {}
-test utf-8.9.0 {Tcl_UniCharAtIndex: Emoji} ucs2 {
- string index \uD83D\uDE00G 2
-} G
-test utf-8.9.1 {Tcl_UniCharAtIndex: Emoji} utf32 {
+test utf-8.9 {Tcl_UniCharAtIndex: Emoji} {
string index πŸ˜€G 2
} {}
-test utf-8.9.2 {Tcl_UniCharAtIndex: Emoji} utf16 {
- string index πŸ˜€G 2
-} G
-test utf-8.10.0 {Tcl_UniCharAtIndex: Emoji} ucs2 {
- string index πŸ˜€G 0
-} \uFFFD
-test utf-8.10.1 {Tcl_UniCharAtIndex: Emoji} utf32 {
+test utf-8.10 {Tcl_UniCharAtIndex: Emoji} {
string index πŸ˜€G 0
} πŸ˜€
-test utf-8.10.2 {Tcl_UniCharAtIndex: Emoji} utf16 {
- string index πŸ˜€G 0
-} πŸ˜€
-test utf-8.11.0 {Tcl_UniCharAtIndex: Emoji} ucs2 {
- string index πŸ˜€G 1
-} G
-test utf-8.11.1 {Tcl_UniCharAtIndex: Emoji} utf32 {
+test utf-8.11 {Tcl_UniCharAtIndex: Emoji} {
string index πŸ˜€G 1
} G
-test utf-8.11.2 {Tcl_UniCharAtIndex: Emoji} utf16 {
- string index πŸ˜€G 1
-} {}
-test utf-8.12.0 {Tcl_UniCharAtIndex: Emoji} ucs2 {
- string index πŸ˜€G 2
-} {}
-test utf-8.12.1 {Tcl_UniCharAtIndex: Emoji} utf32 {
+test utf-8.12 {Tcl_UniCharAtIndex: Emoji} {
string index πŸ˜€G 2
} {}
-test utf-8.12.2 {Tcl_UniCharAtIndex: Emoji} utf16 {
- string index πŸ˜€G 2
-} G
test utf-9.1 {Tcl_UtfAtIndex: index = 0} {
string range abcd 0 2
@@ -948,60 +784,24 @@ test utf-9.1 {Tcl_UtfAtIndex: index = 0} {
test utf-9.2 {Tcl_UtfAtIndex: index > 0} {
string range δΉŽΙšΓΏΥƒklmnop 1 5
} ΙšΓΏΥƒkl
-test utf-9.3.0 {Tcl_UtfAtIndex: index = 0, Emoji} ucs2 {
- string range \uD83D\uDE00G 0 0
-} \uD83D
-test utf-9.3.1 {Tcl_UtfAtIndex: index = 0, Emoji} utf32 {
- string range πŸ˜€G 0 0
-} πŸ˜€
-test utf-9.3.2 {Tcl_UtfAtIndex: index = 0, Emoji} utf16 {
+test utf-9.3 {Tcl_UtfAtIndex: index = 0, Emoji} {
string range πŸ˜€G 0 0
} πŸ˜€
-test utf-9.4.0 {Tcl_UtfAtIndex: index > 0, Emoji} ucs2 {
- string range \uD83D\uDE00G 1 1
-} \uDE00
-test utf-9.4.1 {Tcl_UtfAtIndex: index > 0, Emoji} utf32 {
+test utf-9.4 {Tcl_UtfAtIndex: index > 0, Emoji} {
string range πŸ˜€G 1 1
} G
-test utf-9.4.2 {Tcl_UtfAtIndex: index > 0, Emoji} utf16 {
- string range πŸ˜€G 1 1
-} {}
-test utf-9.5.0 {Tcl_UtfAtIndex: index > 0, Emoji} ucs2 {
- string range \uD83D\uDE00G 2 2
-} G
-test utf-9.5.1 {Tcl_UtfAtIndex: index > 0, Emoji} utf32 {
+test utf-9.5 {Tcl_UtfAtIndex: index > 0, Emoji} {
string range πŸ˜€G 2 2
} {}
-test utf-9.5.2 {Tcl_UtfAtIndex: index > 0, Emoji} utf16 {
- string range πŸ˜€G 2 2
-} G
-test utf-9.6.0 {Tcl_UtfAtIndex: index = 0, Emoji} ucs2 {
- string range πŸ˜€G 0 0
-} \uFFFD
-test utf-9.6.1 {Tcl_UtfAtIndex: index = 0, Emoji} utf32 {
+test utf-9.6 {Tcl_UtfAtIndex: index = 0, Emoji} {
string range πŸ˜€G 0 0
} πŸ˜€
-test utf-9.6.2 {Tcl_UtfAtIndex: index = 0, Emoji} utf16 {
- string range πŸ˜€G 0 0
-} πŸ˜€
-test utf-9.7.0 {Tcl_UtfAtIndex: index > 0, Emoji} ucs2 {
- string range πŸ˜€G 1 1
-} G
-test utf-9.7.1 {Tcl_UtfAtIndex: index > 0, Emoji} utf32 {
+test utf-9.7 {Tcl_UtfAtIndex: index > 0, Emoji} {
string range πŸ˜€G 1 1
} G
-test utf-9.7.2 {Tcl_UtfAtIndex: index > 0, Emoji} utf16 {
- string range πŸ˜€G 1 1
-} {}
-test utf-9.8.0 {Tcl_UtfAtIndex: index > 0, Emoji} ucs2 {
- string range πŸ˜€G 2 2
-} {}
-test utf-9.8.1 {Tcl_UtfAtIndex: index > 0, Emoji} utf32 {
+test utf-9.8 {Tcl_UtfAtIndex: index > 0, Emoji} {
string range πŸ˜€G 2 2
} {}
-test utf-9.8.2 {Tcl_UtfAtIndex: index > 0, Emoji} utf16 {
- string range πŸ˜€G 2 2
-} G
test utf-10.1 {Tcl_UtfBackslash: dst == NULL} {
set x \n
@@ -1019,10 +819,10 @@ test utf-10.4 {Tcl_UtfBackslash: stops at first non-hex} testbytestring {
test utf-10.5 {Tcl_UtfBackslash: stops after 4 hex chars} testbytestring {
expr {"\u4E216" eq "[testbytestring \xE4\xB8\xA1]6"}
} 1
-test utf-10.6 {Tcl_UtfBackslash: stops after 5 hex chars} {fullutf testbytestring} {
+test utf-10.6 {Tcl_UtfBackslash: stops after 5 hex chars} {testbytestring} {
expr {"\U1E2165" eq "[testbytestring \xF0\x9E\x88\x96]5"}
} 1
-test utf-10.7 {Tcl_UtfBackslash: stops after 6 hex chars} {fullutf testbytestring} {
+test utf-10.7 {Tcl_UtfBackslash: stops after 6 hex chars} {testbytestring} {
expr {"\U10E2165" eq "[testbytestring \xF4\x8E\x88\x96]5"}
} 1
@@ -1063,8 +863,7 @@ bsCheck \x 120
bsCheck \xa 10
bsCheck \xA 10
bsCheck \x41 65
-bsCheck \x541 65 pre388 ;# == \x41
-bsCheck \x541 84 !pre388 ;# == \x54 1
+bsCheck \x541 84
bsCheck \u 117
bsCheck \uk 117
bsCheck \u41 65
@@ -1073,25 +872,24 @@ bsCheck \uA 10
bsCheck \340 224
bsCheck \uA1 161
bsCheck \u4E21 20001
-bsCheck \741 225 pre388 ;# == \341
-bsCheck \741 60 !pre388 ;# == \74 1
+bsCheck \741 60
bsCheck \U 85
bsCheck \Uk 85
-bsCheck \U41 65 Uesc
-bsCheck \Ua 10 Uesc
-bsCheck \UA 10 Uesc
-bsCheck \UA1 161 Uesc
-bsCheck \U4E21 20001 Uesc
-bsCheck \U004E21 20001 Uesc
-bsCheck \U00004E21 20001 Uesc
-bsCheck \U0000004E21 78 Uesc
-bsCheck \U00110000 69632 fullutf
-bsCheck \U01100000 69632 fullutf
-bsCheck \U11000000 69632 fullutf
-bsCheck \U0010FFFF 1114111 fullutf
-bsCheck \U010FFFF0 1114111 fullutf
-bsCheck \U10FFFF00 1114111 fullutf
-bsCheck \UFFFFFFFF 1048575 fullutf
+bsCheck \U41 65
+bsCheck \Ua 10
+bsCheck \UA 10
+bsCheck \UA1 161
+bsCheck \U4E21 20001
+bsCheck \U004E21 20001
+bsCheck \U00004E21 20001
+bsCheck \U0000004E21 78
+bsCheck \U00110000 69632
+bsCheck \U01100000 69632
+bsCheck \U11000000 69632
+bsCheck \U0010FFFF 1114111
+bsCheck \U010FFFF0 1114111
+bsCheck \U10FFFF00 1114111
+bsCheck \UFFFFFFFF 1048575
test utf-11.1 {Tcl_UtfToUpper} {
string toupper {}
@@ -1108,10 +906,10 @@ test utf-11.4 {Tcl_UtfToUpper} {
test utf-11.5 {Tcl_UtfToUpper Georgian (new in Unicode 11)} {
string toupper აᲐ
} ᲐᲐ
-test utf-11.6 {Tcl_UtfToUpper beyond U+FFFF} fullutf {
+test utf-11.6 {Tcl_UtfToUpper beyond U+FFFF} {
string toupper 𐐨
} 𐐀
-test utf-11.7 {Tcl_UtfToUpper beyond U+FFFF} fullutf {
+test utf-11.7 {Tcl_UtfToUpper beyond U+FFFF} {
string toupper 𐐨
} 𐐀
test utf-11.8 {Tcl_UtfToUpper low/high surrogate)} {
@@ -1136,10 +934,10 @@ test utf-12.5 {Tcl_UtfToLower Georgian (new in Unicode 11)} {
test utf-12.6 {Tcl_UtfToLower low/high surrogate)} {
string tolower \uDC24\uD824
} \uDC24\uD824
-test utf-12.7 {Tcl_UtfToLower beyond U+FFFF} fullutf {
+test utf-12.7 {Tcl_UtfToLower beyond U+FFFF} {
string tolower 𐐀
} 𐐨
-test utf-12.8 {Tcl_UtfToLower beyond U+FFFF} fullutf {
+test utf-12.8 {Tcl_UtfToLower beyond U+FFFF} {
string tolower 𐐀
} 𐐨
@@ -1164,10 +962,10 @@ test utf-13.6 {Tcl_UtfToTitle Georgian (new in Unicode 11)} {
test utf-13.7 {Tcl_UtfToTitle low/high surrogate)} {
string totitle \uDC24\uD824
} \uDC24\uD824
-test utf-13.8 {Tcl_UtfToTitle beyond U+FFFF} fullutf {
+test utf-13.8 {Tcl_UtfToTitle beyond U+FFFF} {
string totitle 𐐨𐐀
} 𐐀𐐨
-test utf-13.9 {Tcl_UtfToTitle beyond U+FFFF} fullutf {
+test utf-13.9 {Tcl_UtfToTitle beyond U+FFFF} {
string totitle 𐐨𐐀
} 𐐀𐐨
@@ -1227,10 +1025,10 @@ test utf-19.1 {TclUniCharLen} -body {
unset -nocomplain foo
} -result {1 4}
-test utf-20.1 {TclUniCharNcmp} utf32 {
+test utf-20.1 {TclUniCharNcmp} {
string compare [string range [format %c 0xFFFF] 0 0] [string range [format %c 0x10000] 0 0]
} -1
-test utf-20.2 {[4c591fa487] TclUniCharNcmp/TclUtfNcmp} utf32 {
+test utf-20.2 {[4c591fa487] TclUniCharNcmp/TclUtfNcmp} {
set one [format %c 0xFFFF]
set two [format %c 0x10000]
set first [string compare $one $two]
@@ -1357,10 +1155,10 @@ UniCharCaseCmpTest < a b
UniCharCaseCmpTest > b a
UniCharCaseCmpTest > B a
UniCharCaseCmpTest > aBcB abca
-UniCharCaseCmpTest < \uFFFF [format %c 0x10000] utf32
-UniCharCaseCmpTest < \uFFFF \U10000 utf32
-UniCharCaseCmpTest > [format %c 0x10000] \uFFFF utf32
-UniCharCaseCmpTest > \U10000 \uFFFF utf32
+UniCharCaseCmpTest < \uFFFF [format %c 0x10000]
+UniCharCaseCmpTest < \uFFFF \U10000
+UniCharCaseCmpTest > [format %c 0x10000] \uFFFF
+UniCharCaseCmpTest > \U10000 \uFFFF
test utf-26.1 {Tcl_UniCharDString} -setup {
diff --git a/win/makefile.vc b/win/makefile.vc
index 27ee064..1fdfeb0 100644
--- a/win/makefile.vc
+++ b/win/makefile.vc
@@ -52,7 +52,7 @@
# turn on the 64-bit compiler, if your SDK has it.
#
# Basic macros and options usable on the commandline (see rules.vc for more info):
-# OPTS=msvcrt,noembed,nothreads,pdbs,profile,static,symbols,thrdalloc,time64bit,unchecked,utf16,none
+# OPTS=msvcrt,noembed,nothreads,pdbs,profile,static,symbols,thrdalloc,time64bit,unchecked,none
# Sets special options for the core. The default is for none.
# Any combination of the above may be used (comma separated).
# 'none' will over-ride everything to nothing.
@@ -80,7 +80,6 @@
# unchecked = Allows a symbols build to not use the debug
# enabled runtime (msvcrt.dll not msvcrtd.dll
# or libcmt.lib not libcmtd.lib).
-# utf16 = Forces a build using UTF-16 representation internally.
#
# STATS=compdbg,memdbg,none
# Sets optional memory and bytecode compiler debugging code added
diff --git a/win/rules.vc b/win/rules.vc
index d8b3b12..3a95aab 100644
--- a/win/rules.vc
+++ b/win/rules.vc
@@ -816,7 +816,6 @@ DOTSEPARATED=$(DOTSEPARATED:b=.)
# configuration (ignored for Tcl itself)
# _USE_64BIT_TIME_T - forces a build using 64-bit time_t for 32-bit build
# (CRT library should support this, not needed for Tcl 9.x)
-# TCL_UTF_MAX=3 - forces a build using UTF-16 internally (not recommended).
# Further, LINKERFLAGS are modified based on above.
# Default values for all the above
@@ -887,11 +886,6 @@ TCL_BUILD_FOR = 8
!message *** Force 64-bit time_t
_USE_64BIT_TIME_T = 1
!endif
-
-!if [nmakehlp -f $(OPTS) "utf16"]
-!message *** Force UTF-16 internally
-TCL_UTF_MAX = 3
-!endif
!endif
# Yes, it's weird that the "symbols" option controls DEBUG and
@@ -1451,9 +1445,6 @@ OPTDEFINES = $(OPTDEFINES) /D_USE_64BIT_TIME_T=1
# _ATL_XP_TARGETING - Newer SDK's need this to build for XP
COMPILERFLAGS = /D_ATL_XP_TARGETING
!endif
-!if "$(TCL_UTF_MAX)" == "3"
-OPTDEFINES = $(OPTDEFINES) /DTCL_UTF_MAX=3
-!endif
!if "$(TCL_BUILD_FOR)" == "8"
OPTDEFINES = $(OPTDEFINES) /DTCL_MAJOR_VERSION=8
!endif