summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2023-09-12 10:11:55 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2023-09-12 10:11:55 (GMT)
commit3b3d524d3ab6cd4d2bb66f30dae043c5c622cf23 (patch)
treea7d3be3be5996d60c7e526bd007ff12be6c8e575
parentf03ee8f9815a46ca1520e2b40825a81218c1b4d1 (diff)
parentb9bb8fd5b282207069eb191367f3a67d1daf50d5 (diff)
downloadtcl-3b3d524d3ab6cd4d2bb66f30dae043c5c622cf23.zip
tcl-3b3d524d3ab6cd4d2bb66f30dae043c5c622cf23.tar.gz
tcl-3b3d524d3ab6cd4d2bb66f30dae043c5c622cf23.tar.bz2
Merge 8.7
-rw-r--r--.github/workflows/linux-build.yml1
-rw-r--r--.github/workflows/win-build.yml2
-rw-r--r--.travis.yml21
-rw-r--r--generic/regcustom.h6
-rw-r--r--generic/tcl.h6
-rw-r--r--generic/tclBinary.c8
-rw-r--r--generic/tclCmdIL.c4
-rw-r--r--generic/tclCmdMZ.c22
-rw-r--r--generic/tclCompExpr.c4
-rw-r--r--generic/tclDisassemble.c2
-rw-r--r--generic/tclEncoding.c173
-rw-r--r--generic/tclEvent.c3
-rw-r--r--generic/tclExecute.c7
-rw-r--r--generic/tclInt.h9
-rw-r--r--generic/tclObj.c9
-rw-r--r--generic/tclParse.c22
-rw-r--r--generic/tclScan.c2
-rw-r--r--generic/tclStringObj.c93
-rw-r--r--generic/tclStringRep.h2
-rw-r--r--generic/tclStubInit.c18
-rw-r--r--generic/tclUtf.c182
-rw-r--r--generic/tclUtil.c30
-rw-r--r--tests/utf.test373
-rw-r--r--win/makefile.vc3
-rw-r--r--win/rules.vc9
25 files changed, 151 insertions, 860 deletions
diff --git a/.github/workflows/linux-build.yml b/.github/workflows/linux-build.yml
index 2819b92..7c015df 100644
--- a/.github/workflows/linux-build.yml
+++ b/.github/workflows/linux-build.yml
@@ -16,7 +16,6 @@ jobs:
matrix:
cfgopt:
- ""
- - "CFLAGS=-DTCL_UTF_MAX=3"
- "CFLAGS=-DTCL_NO_DEPRECATED=1"
- "--disable-shared"
- "--enable-symbols"
diff --git a/.github/workflows/win-build.yml b/.github/workflows/win-build.yml
index b718ed3..8af5ac7 100644
--- a/.github/workflows/win-build.yml
+++ b/.github/workflows/win-build.yml
@@ -22,7 +22,6 @@ jobs:
matrix:
cfgopt:
- ""
- - "OPTS=utf16"
- "CHECKS=nodep"
- "OPTS=static"
- "OPTS=symbols"
@@ -66,7 +65,6 @@ jobs:
matrix:
cfgopt:
- ""
- - "CFLAGS=-DTCL_UTF_MAX=3"
- "CFLAGS=-DTCL_NO_DEPRECATED=1"
- "--disable-shared"
- "--enable-symbols"
diff --git a/.travis.yml b/.travis.yml
index 99b692d..295ba77 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -20,13 +20,6 @@ jobs:
compiler: gcc
env:
- BUILD_DIR=unix
- - name: "Linux/GCC/Shared: UTF_MAX=3"
- os: linux
- dist: focal
- compiler: gcc
- env:
- - BUILD_DIR=unix
- - CFGOPT="CFLAGS=-DTCL_UTF_MAX=3"
- name: "Linux/GCC/Shared: NO_DEPRECATED"
os: linux
dist: focal
@@ -301,13 +294,6 @@ jobs:
- touch generic/tclStubInit.c generic/tclOOStubInit.c generic/tclOOScript.h
- choco install -y make zip
- cd ${BUILD_DIR}
- - name: "Windows/GCC/Shared: UTF_MAX=3"
- os: windows
- compiler: gcc
- env:
- - BUILD_DIR=win
- - CFGOPT="--enable-64bit CFLAGS=-DTCL_UTF_MAX=3"
- before_install: *makepreinst
- name: "Windows/GCC/Shared: NO_DEPRECATED"
os: windows
compiler: gcc
@@ -343,13 +329,6 @@ jobs:
env:
- BUILD_DIR=win
before_install: *makepreinst
- - name: "Windows/GCC-x86/Shared: UTF_MAX=3"
- os: windows
- compiler: gcc
- env:
- - BUILD_DIR=win
- - CFGOPT="CFLAGS=-DTCL_UTF_MAX=3"
- before_install: *makepreinst
- name: "Windows/GCC-x86/Shared: NO_DEPRECATED"
os: windows
compiler: gcc
diff --git a/generic/regcustom.h b/generic/regcustom.h
index 1d55671..e5d7f12 100644
--- a/generic/regcustom.h
+++ b/generic/regcustom.h
@@ -84,15 +84,9 @@ typedef int celt; /* Type to hold chr, or NOCELT */
#define NOCELT (-1) /* Celt value which is not valid chr */
#define CHR(c) (UCHAR(c)) /* Turn char literal into chr literal */
#define DIGITVAL(c) ((c)-'0') /* Turn chr digit into its value */
-#if TCL_UTF_MAX > 3
#define CHRBITS 32 /* Bits in a chr; must not use sizeof */
#define CHR_MIN 0x00000000 /* Smallest and largest chr; the value */
#define CHR_MAX 0x10FFFF /* CHR_MAX-CHR_MIN+1 should fit in uchr */
-#else
-#define CHRBITS 16 /* Bits in a chr; must not use sizeof */
-#define CHR_MIN 0x0000 /* Smallest and largest chr; the value */
-#define CHR_MAX 0xFFFF /* CHR_MAX-CHR_MIN+1 should fit in uchr */
-#endif
/*
* Functions operating on chr.
diff --git a/generic/tcl.h b/generic/tcl.h
index a41f809..1f8c32b 100644
--- a/generic/tcl.h
+++ b/generic/tcl.h
@@ -2095,15 +2095,17 @@ typedef struct Tcl_EncodingType {
* reflected in regcustom.h.
*/
-#if TCL_UTF_MAX > 3
+#if TCL_UTF_MAX == 4
/*
* int isn't 100% accurate as it should be a strict 4-byte value
* (perhaps int32_t). ILP64/SILP64 systems may have troubles. The
* size of this value must be reflected correctly in regcustom.h.
*/
typedef int Tcl_UniChar;
-#else
+#elif TCL_UTF_MAX == 3 && !defined(BUILD_tcl)
typedef unsigned short Tcl_UniChar;
+#else
+# error "This TCL_UTF_MAX value is not supported"
#endif
/*
diff --git a/generic/tclBinary.c b/generic/tclBinary.c
index 6278d27..9c32cd7 100644
--- a/generic/tclBinary.c
+++ b/generic/tclBinary.c
@@ -509,7 +509,7 @@ MakeByteArray(
for (; src < srcEnd && dst < dstEnd; ) {
int ch;
- int count = TclUtfToUCS4(src, &ch);
+ int count = Tcl_UtfToUniChar(src, &ch);
if (ch > 255) {
proper = 0;
@@ -2561,7 +2561,7 @@ BinaryDecodeHex(
if (pure) {
ucs4 = c;
} else {
- TclUtfToUCS4((const char *)(data - 1), &ucs4);
+ Tcl_UtfToUniChar((const char *)(data - 1), &ucs4);
}
TclDecrRefCount(resultObj);
Tcl_SetObjResult(interp, Tcl_ObjPrintf(
@@ -3026,7 +3026,7 @@ BinaryDecodeUu(
if (pure) {
ucs4 = c;
} else {
- TclUtfToUCS4((const char *)(data - 1), &ucs4);
+ Tcl_UtfToUniChar((const char *)(data - 1), &ucs4);
}
Tcl_SetObjResult(interp, Tcl_ObjPrintf(
"invalid uuencode character \"%c\" (U+%06X) at position %"
@@ -3201,7 +3201,7 @@ BinaryDecode64(
* of a multi-byte character. */
/* Safe because we know data is NUL-terminated */
- TclUtfToUCS4((const char *)(data - 1), &ucs4);
+ Tcl_UtfToUniChar((const char *)(data - 1), &ucs4);
}
Tcl_SetObjResult(interp, Tcl_ObjPrintf(
diff --git a/generic/tclCmdIL.c b/generic/tclCmdIL.c
index aed9a85..8e07040 100644
--- a/generic/tclCmdIL.c
+++ b/generic/tclCmdIL.c
@@ -5453,8 +5453,8 @@ DictionaryCompare(
*/
if ((*left != '\0') && (*right != '\0')) {
- left += TclUtfToUCS4(left, &uniLeft);
- right += TclUtfToUCS4(right, &uniRight);
+ left += Tcl_UtfToUniChar(left, &uniLeft);
+ right += Tcl_UtfToUniChar(right, &uniRight);
/*
* Convert both chars to lower for the comparison, because
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index b27d3a9..116d453 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -1217,7 +1217,7 @@ Tcl_SplitObjCmd(
Tcl_InitHashTable(&charReuseTable, TCL_ONE_WORD_KEYS);
for ( ; stringPtr < end; stringPtr += len) {
- len = TclUtfToUCS4(stringPtr, &ch);
+ len = Tcl_UtfToUniChar(stringPtr, &ch);
hPtr = Tcl_CreateHashEntry(&charReuseTable, INT2PTR(ch), &isNew);
if (isNew) {
TclNewStringObj(objPtr, stringPtr, len);
@@ -1263,9 +1263,9 @@ Tcl_SplitObjCmd(
splitEnd = splitChars + splitCharLen;
for (element = stringPtr; stringPtr < end; stringPtr += len) {
- len = TclUtfToUCS4(stringPtr, &ch);
+ len = Tcl_UtfToUniChar(stringPtr, &ch);
for (p = splitChars; p < splitEnd; p += splitLen) {
- splitLen = TclUtfToUCS4(p, &splitChar);
+ splitLen = Tcl_UtfToUniChar(p, &splitChar);
if (ch == splitChar) {
TclNewStringObj(objPtr, element, stringPtr - element);
Tcl_ListObjAppendElement(NULL, listPtr, objPtr);
@@ -1431,11 +1431,6 @@ StringIndexCmd(
char buf[4] = "";
end = Tcl_UniCharToUtf(ch, buf);
-#if TCL_UTF_MAX < 4
- if ((ch >= 0xD800) && (end < 3)) {
- end += Tcl_UniCharToUtf(-1, buf + end);
- }
-#endif
Tcl_SetObjResult(interp, Tcl_NewStringObj(buf, end));
}
}
@@ -1896,7 +1891,7 @@ StringIsCmd(
for (; string1 < end; string1 += length2, failat++) {
int ucs4;
- length2 = TclUtfToUCS4(string1, &ucs4);
+ length2 = Tcl_UtfToUniChar(string1, &ucs4);
if (!chcomp(ucs4)) {
result = 0;
break;
@@ -2524,7 +2519,7 @@ StringStartCmd(
if (index > 0) {
p = &string[index];
- (void)TclUniCharToUCS4(p, &ch);
+ ch = *p;
for (cur = index; cur != TCL_INDEX_NONE; cur--) {
int delta = 0;
const Tcl_UniChar *next;
@@ -2533,10 +2528,11 @@ StringStartCmd(
break;
}
- next = TclUCS4Prev(p, string);
+ next = ((p > string) ? (p - 1) : p);
do {
next += delta;
- delta = TclUniCharToUCS4(next, &ch);
+ ch = *next;
+ delta = 1;
} while (next + delta < p);
p = next;
}
@@ -2594,7 +2590,7 @@ StringEndCmd(
p = &string[index];
end = string+length;
for (cur = index; p < end; cur++) {
- p += TclUniCharToUCS4(p, &ch);
+ ch = *p++;
if (!Tcl_UniCharIsWordChar(ch)) {
break;
}
diff --git a/generic/tclCompExpr.c b/generic/tclCompExpr.c
index ae365c3..fcabd37 100644
--- a/generic/tclCompExpr.c
+++ b/generic/tclCompExpr.c
@@ -2149,13 +2149,13 @@ ParseLexeme(
if (!TclIsBareword(*start) || *start == '_') {
Tcl_Size scanned;
if (Tcl_UtfCharComplete(start, numBytes)) {
- scanned = TclUtfToUCS4(start, &ch);
+ scanned = Tcl_UtfToUniChar(start, &ch);
} else {
char utfBytes[8];
memcpy(utfBytes, start, numBytes);
utfBytes[numBytes] = '\0';
- scanned = TclUtfToUCS4(utfBytes, &ch);
+ scanned = Tcl_UtfToUniChar(utfBytes, &ch);
}
*lexemePtr = INVALID;
Tcl_DecrRefCount(literal);
diff --git a/generic/tclDisassemble.c b/generic/tclDisassemble.c
index f3eee37..e8a620c 100644
--- a/generic/tclDisassemble.c
+++ b/generic/tclDisassemble.c
@@ -876,7 +876,7 @@ PrintSourceToObj(
for (; (*p != '\0') && (i < maxChars); p+=len) {
int ucs4;
- len = TclUtfToUCS4(p, &ucs4);
+ len = Tcl_UtfToUniChar(p, &ucs4);
switch (ucs4) {
case '"':
Tcl_AppendToObj(appendObj, "\\\"", -1);
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index b6c6571..c72122c 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -1637,7 +1637,7 @@ Tcl_UtfToExternalDStringEx(
Tcl_Size pos = Tcl_NumUtfChars(srcStart, nBytesProcessed);
int ucs4;
char buf[TCL_INTEGER_SPACE];
- TclUtfToUCS4(&srcStart[nBytesProcessed], &ucs4);
+ Tcl_UtfToUniChar(&srcStart[nBytesProcessed], &ucs4);
snprintf(buf, sizeof(buf), "%" TCL_SIZE_MODIFIER "u", nBytesProcessed);
Tcl_SetObjResult(
interp,
@@ -2544,15 +2544,6 @@ UtfToUtfProc(
dstEnd = dst + dstLen - ((flags & ENCODING_UTF) ? TCL_UTF_MAX : 6);
-#if TCL_UTF_MAX < 4
- /* Initialize the buffer so that some random data doesn't trick
- * Tcl_UniCharToUtf() into thinking it should combine surrogate pairs.
- * Once TCL_UTF_MAX == 3 is removed and Tcl_UniCharToUtf restored to its
- * prior non-stateful nature, this call to memset can also be removed.
- */
- memset(dst, 0xff, dstLen);
-#endif
-
profile = ENCODING_PROFILE_GET(flags);
for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) {
@@ -2600,7 +2591,7 @@ UtfToUtfProc(
} else if (!Tcl_UtfCharComplete(src, srcEnd - src)) {
/*
* Incomplete byte sequence.
- * Always check before using TclUtfToUCS4. Not doing so can cause it
+ * Always check before using Tcl_UtfToUniChar. Not doing so can cause it
* run beyond the end of the buffer! If we happen on such an incomplete
* char its bytes are made to represent themselves unless the user has
* explicitly asked to be told.
@@ -2622,12 +2613,12 @@ UtfToUtfProc(
/* TCL_ENCODING_PROFILE_TCL8 */
char chbuf[2];
chbuf[0] = UCHAR(*src++); chbuf[1] = 0;
- TclUtfToUCS4(chbuf, &ch);
+ Tcl_UtfToUniChar(chbuf, &ch);
}
dst += Tcl_UniCharToUtf(ch, dst);
} else {
int isInvalid = 0;
- size_t len = TclUtfToUCS4(src, &ch);
+ size_t len = Tcl_UtfToUniChar(src, &ch);
if (flags & ENCODING_INPUT) {
if ((len < 2) && (ch != 0)) {
isInvalid = 1;
@@ -2655,44 +2646,10 @@ UtfToUtfProc(
*dst++ = (char) (((ch >> 10) & 0x3F) | 0x80);
ch = (ch & 0x0CFF) | 0xDC00;
}
-#if TCL_UTF_MAX < 4
- cesu8:
-#endif
*dst++ = (char) (((ch >> 12) | 0xE0) & 0xEF);
*dst++ = (char) (((ch >> 6) | 0x80) & 0xBF);
*dst++ = (char) ((ch | 0x80) & 0xBF);
continue;
-#if TCL_UTF_MAX < 4
- } else if (SURROGATE(ch)) {
- /*
- * A surrogate character is detected, handle especially.
- */
- if (PROFILE_STRICT(profile) && (flags & ENCODING_UTF)) {
- result = TCL_CONVERT_UNKNOWN;
- src = saveSrc;
- break;
- }
- if (PROFILE_REPLACE(profile)) {
- /* TODO - is this right for cesu8 or should we fall through below? */
- ch = UNICODE_REPLACE_CHAR;
- } else {
- int low = ch;
- len = (src <= srcEnd - 3) ? TclUtfToUCS4(src, &low) : 0;
-
- if ((!LOW_SURROGATE(low)) || (ch & 0x400)) {
-
- if (PROFILE_STRICT(profile)) {
- result = TCL_CONVERT_UNKNOWN;
- src = saveSrc;
- break;
- }
- goto cesu8;
- }
- src += len;
- dst += Tcl_UniCharToUtf(ch, dst);
- ch = low;
- }
-#endif
} else if (PROFILE_STRICT(profile) &&
(!(flags & ENCODING_INPUT)) &&
SURROGATE(ch)) {
@@ -2767,15 +2724,6 @@ Utf32ToUtfProc(
}
result = TCL_OK;
-#if TCL_UTF_MAX < 4
- /* Initialize the buffer so that some random data doesn't trick
- * Tcl_UniCharToUtf() into thinking it should combine surrogate pairs.
- * Once TCL_UTF_MAX == 3 is removed and Tcl_UniCharToUtf restored to its
- * prior non-stateful nature, this call to memset can also be removed.
- */
- memset(dst, 0xff, dstLen);
-#endif
-
/*
* Check alignment with utf-32 (4 == sizeof(UTF-32))
*/
@@ -2785,21 +2733,6 @@ Utf32ToUtfProc(
srcLen -= bytesLeft;
}
-#if TCL_UTF_MAX < 4
- /*
- * If last code point is a high surrogate, we cannot handle that yet,
- * unless we are at the end.
- */
-
- if (!(flags & TCL_ENCODING_END) && (srcLen >= 4) &&
- ((src[srcLen - ((flags & TCL_ENCODING_LE)?3:2)] & 0xFC) == 0xD8) &&
- ((src[srcLen - ((flags & TCL_ENCODING_LE)?2:3)]) == 0) &&
- ((src[srcLen - ((flags & TCL_ENCODING_LE)?1:4)]) == 0)) {
- result = TCL_CONVERT_MULTIBYTE;
- srcLen-= 4;
- }
-#endif
-
srcStart = src;
srcEnd = src + srcLen;
@@ -2812,21 +2745,11 @@ Utf32ToUtfProc(
break;
}
-#if TCL_UTF_MAX < 4
- int prev = ch;
-#endif
if (flags & TCL_ENCODING_LE) {
ch = (unsigned int)(src[3] & 0xFF) << 24 | (src[2] & 0xFF) << 16 | (src[1] & 0xFF) << 8 | (src[0] & 0xFF);
} else {
ch = (unsigned int)(src[0] & 0xFF) << 24 | (src[1] & 0xFF) << 16 | (src[2] & 0xFF) << 8 | (src[3] & 0xFF);
}
-#if TCL_UTF_MAX < 4
- if (HIGH_SURROGATE(prev) && !LOW_SURROGATE(ch)) {
- /* Bug [10c2c17c32]. If Hi surrogate not followed by Lo surrogate, finish 3-byte UTF-8 */
- dst += Tcl_UniCharToUtf(-1, dst);
- }
-#endif
-
if ((unsigned)ch > 0x10FFFF) {
ch = UNICODE_REPLACE_CHAR;
if (PROFILE_STRICT(flags)) {
@@ -2835,9 +2758,6 @@ Utf32ToUtfProc(
}
} else if (PROFILE_STRICT(flags) && SURROGATE(ch)) {
result = TCL_CONVERT_SYNTAX;
-#if TCL_UTF_MAX < 4
- ch = 0;
-#endif
break;
} else if (PROFILE_REPLACE(flags) && SURROGATE(ch)) {
ch = UNICODE_REPLACE_CHAR;
@@ -2851,11 +2771,6 @@ Utf32ToUtfProc(
if ((unsigned)ch - 1 < 0x7F) {
*dst++ = (ch & 0xFF);
} else {
-#if TCL_UTF_MAX < 4
- if (!HIGH_SURROGATE(prev) && LOW_SURROGATE(ch)) {
- *dst = 0; /* In case of lower surrogate, don't try to combine */
- }
-#endif
dst += Tcl_UniCharToUtf(ch, dst);
}
src += 4;
@@ -2865,13 +2780,6 @@ Utf32ToUtfProc(
* If we had a truncated code unit at the end AND this is the last
* fragment AND profile is not "strict", stick FFFD in its place.
*/
-#if TCL_UTF_MAX < 4
- if (HIGH_SURROGATE(ch)) {
- /* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */
- dst += Tcl_UniCharToUtf(-1, dst);
- }
-#endif
-
if ((flags & TCL_ENCODING_END) && (result == TCL_CONVERT_MULTIBYTE)) {
if (dst > dstEnd) {
result = TCL_CONVERT_NOSPACE;
@@ -2964,7 +2872,7 @@ UtfToUtf32Proc(
result = TCL_CONVERT_NOSPACE;
break;
}
- len = TclUtfToUCS4(src, &ch);
+ len = Tcl_UtfToUniChar(src, &ch);
if (SURROGATE(ch)) {
if (PROFILE_STRICT(flags)) {
result = TCL_CONVERT_UNKNOWN;
@@ -3045,15 +2953,6 @@ Utf16ToUtfProc(
}
result = TCL_OK;
-#if TCL_UTF_MAX < 4
- /* Initialize the buffer so that some random data doesn't trick
- * Tcl_UniCharToUtf() into thinking it should combine surrogate pairs.
- * Once TCL_UTF_MAX == 3 is removed and Tcl_UniCharToUtf restored to its
- * prior non-stateful nature, this call to memset can also be removed.
- */
- memset(dst, 0xff, dstLen);
-#endif
-
/*
* Check alignment with utf-16 (2 == sizeof(UTF-16))
*/
@@ -3252,7 +3151,7 @@ UtfToUtf16Proc(
result = TCL_CONVERT_NOSPACE;
break;
}
- len = TclUtfToUCS4(src, &ch);
+ len = Tcl_UtfToUniChar(src, &ch);
if (SURROGATE(ch)) {
if (PROFILE_STRICT(flags)) {
result = TCL_CONVERT_UNKNOWN;
@@ -3361,18 +3260,6 @@ UtfToUcs2Proc(
result = TCL_CONVERT_NOSPACE;
break;
}
-#if TCL_UTF_MAX < 4
- len = TclUtfToUniChar(src, &ch);
- if ((ch >= 0xD800) && (len < 3)) {
- if (PROFILE_STRICT(flags)) {
- result = TCL_CONVERT_UNKNOWN;
- break;
- }
- src += len;
- src += TclUtfToUniChar(src, &ch);
- ch = UNICODE_REPLACE_CHAR;
- }
-#else
len = TclUtfToUniChar(src, &ch);
if (ch > 0xFFFF) {
if (PROFILE_STRICT(flags)) {
@@ -3381,7 +3268,6 @@ UtfToUcs2Proc(
}
ch = UNICODE_REPLACE_CHAR;
}
-#endif
if (PROFILE_STRICT(flags) && SURROGATE(ch)) {
result = TCL_CONVERT_SYNTAX;
break;
@@ -3467,15 +3353,6 @@ TableToUtfProc(
dstStart = dst;
dstEnd = dst + dstLen - TCL_UTF_MAX;
-#if TCL_UTF_MAX < 4
- /* Initialize the buffer so that some random data doesn't trick
- * Tcl_UniCharToUtf() into thinking it should combine surrogate pairs.
- * Once TCL_UTF_MAX == 3 is removed and Tcl_UniCharToUtf restored to its
- * prior non-stateful nature, this call to memset can also be removed.
- */
- memset(dst, 0xff, dstLen);
-#endif
-
toUnicode = (const unsigned short *const *) dataPtr->toUnicode;
prefixBytes = dataPtr->prefixBytes;
pageZero = toUnicode[0];
@@ -3619,17 +3496,12 @@ TableFromUtfProc(
}
len = TclUtfToUniChar(src, &ch);
-#if TCL_UTF_MAX > 3
/* Unicode chars > +U0FFFF cannot be represented in any table encoding */
if (ch & 0xFFFF0000) {
word = 0;
- } else
-#else
- if (!len) {
- word = 0;
- } else
-#endif
+ } else {
word = fromUnicode[(ch >> 8)][ch & 0xFF];
+ }
if ((word == 0) && (ch != 0)) {
if (PROFILE_STRICT(flags)) {
@@ -3716,15 +3588,6 @@ Iso88591ToUtfProc(
dstStart = dst;
dstEnd = dst + dstLen - TCL_UTF_MAX;
-#if TCL_UTF_MAX < 4
- /* Initialize the buffer so that some random data doesn't trick
- * Tcl_UniCharToUtf() into thinking it should combine surrogate pairs.
- * Once TCL_UTF_MAX == 3 is removed and Tcl_UniCharToUtf restored to its
- * prior non-stateful nature, this call to memset can also be removed.
- */
- memset(dst, 0xff, dstLen);
-#endif
-
result = TCL_OK;
for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) {
Tcl_UniChar ch = 0;
@@ -3826,20 +3689,11 @@ Iso88591FromUtfProc(
* Check for illegal characters.
*/
- if (ch > 0xFF
-#if TCL_UTF_MAX < 4
- || ((ch >= 0xD800) && (len < 3))
-#endif
- ) {
+ if (ch > 0xFF) {
if (PROFILE_STRICT(flags)) {
result = TCL_CONVERT_UNKNOWN;
break;
}
-#if TCL_UTF_MAX < 4
- if ((ch >= 0xD800) && (len < 3)) {
- len = 4;
- }
-#endif
/*
* Plunge on, using '?' as a fallback character.
*/
@@ -3964,15 +3818,6 @@ EscapeToUtfProc(
dstStart = dst;
dstEnd = dst + dstLen - TCL_UTF_MAX;
-#if TCL_UTF_MAX < 4
- /* Initialize the buffer so that some random data doesn't trick
- * Tcl_UniCharToUtf() into thinking it should combine surrogate pairs.
- * Once TCL_UTF_MAX == 3 is removed and Tcl_UniCharToUtf restored to its
- * prior non-stateful nature, this call to memset can also be removed.
- */
- memset(dst, 0xff, dstLen);
-#endif
-
state = PTR2INT(*statePtr);
if (flags & TCL_ENCODING_START) {
state = 0;
diff --git a/generic/tclEvent.c b/generic/tclEvent.c
index 8fb309d..196b615 100644
--- a/generic/tclEvent.c
+++ b/generic/tclEvent.c
@@ -1108,9 +1108,6 @@ static const struct {
#ifdef STATIC_BUILD
".static"
#endif
-#if TCL_UTF_MAX < 4
- ".utf-16"
-#endif
}};
const char *
diff --git a/generic/tclExecute.c b/generic/tclExecute.c
index f17ff75..19bce64 100644
--- a/generic/tclExecute.c
+++ b/generic/tclExecute.c
@@ -5353,11 +5353,6 @@ TEBCresume(
TclNewObj(objResultPtr);
} else {
slength = Tcl_UniCharToUtf(ch, buf);
-#if TCL_UTF_MAX < 4
- if ((ch >= 0xD800) && (slength < 3)) {
- slength += Tcl_UniCharToUtf(-1, buf + slength);
- }
-#endif
objResultPtr = Tcl_NewStringObj(buf, slength);
}
}
@@ -5569,7 +5564,7 @@ TEBCresume(
int ch;
end = ustring1 + slength;
for (p=ustring1 ; p<end ; ) {
- p += TclUniCharToUCS4(p, &ch);
+ ch = *p++;
if (!tclStringClassTable[opnd].comparator(ch)) {
match = 0;
break;
diff --git a/generic/tclInt.h b/generic/tclInt.h
index 3ee3199..8d0d577 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -3572,15 +3572,6 @@ MODULE_SCOPE void TclRegisterCommandTypeName(
MODULE_SCOPE int TclUtfCmp(const char *cs, const char *ct);
MODULE_SCOPE int TclUtfCasecmp(const char *cs, const char *ct);
MODULE_SCOPE int TclUtfCount(int ch);
-#if TCL_UTF_MAX > 3
-# define TclUtfToUCS4 Tcl_UtfToUniChar
-# define TclUniCharToUCS4(src, ptr) (*ptr = *(src),1)
-# define TclUCS4Prev(src, ptr) (((src) > (ptr)) ? ((src) - 1) : (src))
-#else
- MODULE_SCOPE int TclUtfToUCS4(const char *, int *);
- MODULE_SCOPE int TclUniCharToUCS4(const Tcl_UniChar *, int *);
- MODULE_SCOPE const Tcl_UniChar *TclUCS4Prev(const Tcl_UniChar *, const Tcl_UniChar *);
-#endif
MODULE_SCOPE Tcl_Obj * TclpNativeToNormalized(void *clientData);
MODULE_SCOPE Tcl_Obj * TclpFilesystemPathType(Tcl_Obj *pathPtr);
MODULE_SCOPE int TclpDlopen(Tcl_Interp *interp, Tcl_Obj *pathPtr,
diff --git a/generic/tclObj.c b/generic/tclObj.c
index 9ddb397..8d3ddd5 100644
--- a/generic/tclObj.c
+++ b/generic/tclObj.c
@@ -386,15 +386,6 @@ TclInitObjSubsystem(void)
Tcl_RegisterObjType(&tclRegexpType);
Tcl_RegisterObjType(&tclProcBodyType);
- /* For backward compatibility only ... */
-#if !defined(TCL_NO_DEPRECATED) && TCL_MAJOR_VERSION < 9
- Tcl_RegisterObjType(&tclIntType);
-#if !defined(TCL_WIDE_INT_IS_LONG)
- Tcl_RegisterObjType(&oldIntType);
-#endif
- Tcl_RegisterObjType(&oldBooleanType);
-#endif
-
#ifdef TCL_COMPILE_STATS
Tcl_MutexLock(&tclObjMutex);
tclObjsAlloced = 0;
diff --git a/generic/tclParse.c b/generic/tclParse.c
index 55fd63d..6417514 100644
--- a/generic/tclParse.c
+++ b/generic/tclParse.c
@@ -871,18 +871,6 @@ TclParseBackslash(
* No hexdigits -> This is just "u".
*/
result = 'u';
-#if TCL_UTF_MAX < 4
- } else if (((result & 0xFC00) == 0xD800) && (count == 6)
- && (p[5] == '\\') && (p[6] == 'u') && (numBytes >= 10)) {
- /* If high surrogate is immediately followed by a low surrogate
- * escape, combine them into one character. */
- int low;
- int count2 = ParseHex(p+7, 4, &low);
- if ((count2 == 4) && ((low & 0xFC00) == 0xDC00)) {
- result = ((result & 0x3FF)<<10 | (low & 0x3FF)) + 0x10000;
- count += count2 + 2;
- }
-#endif
}
break;
case 'U':
@@ -938,13 +926,13 @@ TclParseBackslash(
*/
if (Tcl_UtfCharComplete(p, numBytes - 1)) {
- count = TclUtfToUCS4(p, &unichar) + 1; /* +1 for '\' */
+ count = Tcl_UtfToUniChar(p, &unichar) + 1; /* +1 for '\' */
} else {
char utfBytes[8];
memcpy(utfBytes, p, numBytes - 1);
utfBytes[numBytes - 1] = '\0';
- count = TclUtfToUCS4(utfBytes, &unichar) + 1;
+ count = Tcl_UtfToUniChar(utfBytes, &unichar) + 1;
}
result = unichar;
break;
@@ -955,12 +943,6 @@ TclParseBackslash(
*readPtr = count;
}
count = Tcl_UniCharToUtf(result, dst);
-#if TCL_UTF_MAX < 4
- if ((result >= 0xD800) && (count < 3)) {
- /* Special case for handling high surrogates. */
- count += Tcl_UniCharToUtf(-1, dst + count);
- }
-#endif
return count;
}
diff --git a/generic/tclScan.c b/generic/tclScan.c
index 42838ac..d3a8036 100644
--- a/generic/tclScan.c
+++ b/generic/tclScan.c
@@ -903,7 +903,7 @@ Tcl_ScanObjCmd(
* Scan a single Unicode character.
*/
- offset = TclUtfToUCS4(string, &i);
+ offset = Tcl_UtfToUniChar(string, &i);
string += offset;
if (!(flags & SCAN_SUPPRESS)) {
TclNewIntObj(objPtr, i);
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index 3e1df0b..850bf45 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -8,7 +8,7 @@
*
* Conceptually, a string is a sequence of Unicode code points. Internally
* it may be stored in an encoding form such as a modified version of
- * UTF-8 or UTF-16 (when TCL_UTF_MAX=3) or UTF-32.
+ * UTF-8 or UTF-32.
*
* The String object is optimized for the case where each UTF char
* in a string is only one byte. In this case, we store the value of
@@ -67,15 +67,8 @@ static void SetUnicodeObj(Tcl_Obj *objPtr,
static Tcl_Size UnicodeLength(const Tcl_UniChar *unicode);
static void UpdateStringOfString(Tcl_Obj *objPtr);
-#if TCL_UTF_MAX > 3
#define ISCONTINUATION(bytes) (\
((bytes)[0] & 0xC0) == 0x80)
-#else
-#define ISCONTINUATION(bytes) (\
- ((((bytes)[0] & 0xC0) == 0x80) || (((bytes)[0] == '\xED') \
- && (((bytes)[1] & 0xF0) == 0xB0) && (((bytes)[2] & 0xC0) == 0x80))))
-#endif
-
/*
* The structure below defines the string Tcl object type by means of
@@ -592,22 +585,6 @@ Tcl_GetUniChar(
return -1;
}
ch = stringPtr->unicode[index];
-#if TCL_UTF_MAX < 4
- /* See: bug [11ae2be95dac9417] */
- if ((ch & 0xF800) == 0xD800) {
- if (ch & 0x400) {
- if ((index > 0)
- && ((stringPtr->unicode[index-1] & 0xFC00) == 0xD800)) {
- ch = -1; /* low surrogate preceded by high surrogate */
- }
- } else if ((++index < stringPtr->numChars)
- && ((stringPtr->unicode[index] & 0xFC00) == 0xDC00)) {
- /* high surrogate followed by low surrogate */
- ch = (((ch & 0x3FF) << 10) |
- (stringPtr->unicode[index] & 0x3FF)) + 0x10000;
- }
- }
-#endif
return ch;
}
@@ -819,18 +796,6 @@ Tcl_GetRange(
TclNewObj(newObjPtr);
return newObjPtr;
}
-#if TCL_UTF_MAX < 4
- /* See: bug [11ae2be95dac9417] */
- if ((first > 0) && ((stringPtr->unicode[first] & 0xFC00) == 0xDC00)
- && ((stringPtr->unicode[first-1] & 0xFC00) == 0xD800)) {
- ++first;
- }
- if ((last + 1 < stringPtr->numChars)
- && ((stringPtr->unicode[last+1] & 0xFC00) == 0xDC00)
- && ((stringPtr->unicode[last] & 0xFC00) == 0xD800)) {
- ++last;
- }
-#endif
return Tcl_NewUnicodeObj(stringPtr->unicode + first, last - first + 1);
}
@@ -2160,12 +2125,6 @@ Tcl_AppendFormatToObj(
code = 0xFFFD;
}
length = Tcl_UniCharToUtf(code, buf);
-#if TCL_UTF_MAX < 4
- if ((code >= 0xD800) && (length < 3)) {
- /* Special case for handling high surrogates. */
- length += Tcl_UniCharToUtf(-1, buf + length);
- }
-#endif
segment = Tcl_NewStringObj(buf, length);
Tcl_IncrRefCount(segment);
allocSegment = 1;
@@ -3570,7 +3529,7 @@ TclStringCmp(
s1 = (char *) Tcl_GetUnicode(value1Ptr);
s2 = (char *) Tcl_GetUnicode(value2Ptr);
if (
-#if defined(WORDS_BIGENDIAN) && (TCL_UTF_MAX > 3)
+#if defined(WORDS_BIGENDIAN)
1
#else
checkEq
@@ -3930,9 +3889,6 @@ TclStringReverse(
String *stringPtr;
Tcl_UniChar ch = 0;
int inPlace = flags & TCL_STRING_IN_PLACE;
-#if TCL_UTF_MAX < 4
- int needFlip = 0;
-#endif
if (TclIsPureByteArray(objPtr)) {
Tcl_Size numBytes = 0;
@@ -3965,54 +3921,19 @@ TclStringReverse(
to = Tcl_GetUnicode(objPtr);
stringPtr = GET_STRING(objPtr);
while (--src >= from) {
-#if TCL_UTF_MAX < 4
- ch = *src;
- if ((ch & 0xF800) == 0xD800) {
- needFlip = 1;
- }
- *to++ = ch;
-#else
*to++ = *src;
-#endif
}
} else {
/*
* Reversing in place.
*/
-#if TCL_UTF_MAX < 4
- to = src;
-#endif
while (--src > from) {
ch = *src;
-#if TCL_UTF_MAX < 4
- if ((ch & 0xF800) == 0xD800) {
- needFlip = 1;
- }
-#endif
*src = *from;
*from++ = ch;
}
}
-#if TCL_UTF_MAX < 4
- if (needFlip) {
- /*
- * Flip back surrogate pairs.
- */
-
- from = to - stringPtr->numChars;
- while (--to >= from) {
- ch = *to;
- if ((ch & 0xFC00) == 0xD800) {
- if ((to-1 >= from) && ((to[-1] & 0xFC00) == 0xDC00)) {
- to[0] = to[-1];
- to[-1] = ch;
- --to;
- }
- }
- }
- }
-#endif
}
if (objPtr->bytes) {
@@ -4046,7 +3967,7 @@ TclStringReverse(
* skip calling Tcl_UtfCharComplete() here.
*/
- int bytesInChar = TclUtfToUCS4(from, &chw);
+ int bytesInChar = Tcl_UtfToUniChar(from, &chw);
ReverseBytes((unsigned char *)to, (unsigned char *)from,
bytesInChar);
@@ -4495,14 +4416,6 @@ ExtendStringRepWithUnicode(
copyBytes:
dst = objPtr->bytes + origLength;
-#if TCL_UTF_MAX < 4
- /* Initialize the buffer so that some random data doesn't trick
- * Tcl_UniCharToUtf() into thinking it should combine surrogate pairs.
- * Once TCL_UTF_MAX == 3 is removed and Tcl_UniCharToUtf restored to its
- * prior non-stateful nature, this call to memset can also be removed.
- */
- memset(dst, 0xff, stringPtr->allocated - origLength);
-#endif
for (i = 0; i < numChars; i++) {
dst += Tcl_UniCharToUtf(unicode[i], dst);
}
diff --git a/generic/tclStringRep.h b/generic/tclStringRep.h
index 6f3c2f1..4e38a64 100644
--- a/generic/tclStringRep.h
+++ b/generic/tclStringRep.h
@@ -6,7 +6,7 @@
*
* Conceptually, a string is a sequence of Unicode code points. Internally
* it may be stored in an encoding form such as a modified version of UTF-8
- * or UTF-16 (when TCL_UTF_MAX=3) or UTF-32.
+ * or UTF-32.
*
* Copyright (c) 1995-1997 Sun Microsystems, Inc.
* Copyright (c) 1999 by Scriptics Corporation.
diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c
index 518e323..83133f9 100644
--- a/generic/tclStubInit.c
+++ b/generic/tclStubInit.c
@@ -85,29 +85,13 @@
# undef TclGetUnicodeFromObj
# define TclGetStringFromObj 0
# define TclGetBytesFromObj 0
-# if TCL_UTF_MAX > 3
-# define TclGetUnicodeFromObj 0
-# endif
+# define TclGetUnicodeFromObj 0
#endif
#undef Tcl_Close
#define Tcl_Close 0
#undef Tcl_GetByteArrayFromObj
#define Tcl_GetByteArrayFromObj 0
#define TclUnusedStubEntry 0
-
-
-#if TCL_UTF_MAX < 4
-static void uniCodePanic() {
- Tcl_Panic("This extension uses a deprecated function, not available now: Tcl is compiled with -DTCL_UTF_MAX==%d", TCL_UTF_MAX);
-}
-
-# define Tcl_GetUnicodeFromObj (Tcl_UniChar *(*)(Tcl_Obj *, Tcl_Size *))(void *)uniCodePanic
-# define TclGetUnicodeFromObj (Tcl_UniChar *(*)(Tcl_Obj *, void *))(void *)uniCodePanic
-# define Tcl_NewUnicodeObj (Tcl_Obj *(*)(const Tcl_UniChar *, Tcl_Size))(void *)uniCodePanic
-# define Tcl_SetUnicodeObj (void(*)(Tcl_Obj *, const Tcl_UniChar *, Tcl_Size))(void *)uniCodePanic
-# define Tcl_AppendUnicodeToObj (void(*)(Tcl_Obj *, const Tcl_UniChar *, Tcl_Size))(void *)uniCodePanic
-#endif
-
#define TclUtfCharComplete Tcl_UtfCharComplete
#define TclUtfNext Tcl_UtfNext
#define TclUtfPrev Tcl_UtfPrev
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 68112c5..ba5948f 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -215,9 +215,7 @@ Tcl_UniCharToUtf(
* character (at most 4 bytes).
*/
{
-#if TCL_UTF_MAX > 3
int flags = ch;
-#endif
if (ch >= TCL_COMBINE) {
ch &= (TCL_COMBINE - 1);
@@ -234,9 +232,7 @@ Tcl_UniCharToUtf(
}
if (ch <= 0xFFFF) {
if (
-#if TCL_UTF_MAX > 3
(flags & TCL_COMBINE) &&
-#endif
((ch & 0xF800) == 0xD800)) {
if (ch & 0x0400) {
/* Low surrogate */
@@ -349,15 +345,6 @@ Tcl_UniCharToUtfDString(
p = string;
wEnd = uniStr + uniLength;
-#if TCL_UTF_MAX < 4
- /* Initialize the buffer so that some random data doesn't trick
- * Tcl_UniCharToUtf() into thinking it should combine surrogate pairs.
- * Once TCL_UTF_MAX == 3 is removed and Tcl_UniCharToUtf restored to its
- * prior non-stateful nature, this call to memset can also be removed.
- */
- memset(p, 0xff, Tcl_DStringLength(dsPtr) - oldLength);
-#endif
-
for (w = uniStr; w < wEnd; ) {
p += Tcl_UniCharToUtf(*w, p);
w++;
@@ -402,15 +389,6 @@ Tcl_Char16ToUtfDString(
p = string;
wEnd = uniStr + uniLength;
-#if TCL_UTF_MAX < 4
- /* Initialize the buffer so that some random data doesn't trick
- * Tcl_UniCharToUtf() into thinking it should combine surrogate pairs.
- * Because TCL_COMBINE is used here, memset() is required even when
- * TCL_UTF_MAX == 4.
- */
- memset(p, 0xff, Tcl_DStringLength(dsPtr) - oldLength);
-#endif
-
for (w = uniStr; w < wEnd; ) {
if (!len && ((*w & 0xFC00) != 0xDC00)) {
/* Special case for handling high surrogates. */
@@ -447,15 +425,6 @@ Tcl_Char16ToUtfDString(
* Tcl_UtfCharComplete() before calling this routine to ensure that
* enough bytes remain in the string.
*
- * If TCL_UTF_MAX < 4, special handling of Surrogate pairs is done:
- * For any UTF-8 string containing a character outside of the BMP, the
- * first call to this function will fill *chPtr with the high surrogate
- * and generate a return value of 1. Calling Tcl_UtfToUniChar again
- * will produce the low surrogate and a return value of 3. Because *chPtr
- * is used to remember whether the high surrogate is already produced, it
- * is recommended to initialize the variable it points to as 0 before
- * the first call to Tcl_UtfToUniChar is done.
- *
* Results:
* *chPtr is filled with the Tcl_UniChar, and the return value is the
* number of bytes from the UTF-8 string that were consumed.
@@ -715,11 +684,11 @@ Tcl_UtfToUniCharDString(
endPtr = src + length;
optPtr = endPtr - 4;
while (p <= optPtr) {
- p += TclUtfToUCS4(p, &ch);
+ p += Tcl_UtfToUniChar(p, &ch);
*w++ = ch;
}
while ((p < endPtr) && Tcl_UtfCharComplete(p, endPtr-p)) {
- p += TclUtfToUCS4(p, &ch);
+ p += Tcl_UtfToUniChar(p, &ch);
*w++ = ch;
}
while (p < endPtr) {
@@ -965,7 +934,7 @@ Tcl_UtfFindFirst(
int ch) /* The Unicode character to search for. */
{
while (1) {
- int find, len = TclUtfToUCS4(src, &find);
+ int find, len = Tcl_UtfToUniChar(src, &find);
if (find == ch) {
return src;
@@ -1004,7 +973,7 @@ Tcl_UtfFindLast(
const char *last = NULL;
while (1) {
- int find, len = TclUtfToUCS4(src, &find);
+ int find, len = Tcl_UtfToUniChar(src, &find);
if (find == ch) {
last = src;
@@ -1227,13 +1196,7 @@ Tcl_UniCharAtIndex(
i = TclUtfToUniChar(src, &ch);
src += i;
}
-#if TCL_UTF_MAX < 4
- if ((ch >= 0xD800) && (i < 3)) {
- /* Index points at character following high Surrogate */
- return -1;
- }
-#endif
- TclUtfToUCS4(src, &i);
+ Tcl_UtfToUniChar(src, &i);
return i;
}
@@ -1243,9 +1206,7 @@ Tcl_UniCharAtIndex(
* Tcl_UtfAtIndex --
*
* Returns a pointer to the specified character (not byte) position in
- * the UTF-8 string. If TCL_UTF_MAX < 4, characters > U+FFFF count as
- * 2 positions, but then the pointer should never be placed between
- * the two positions.
+ * the UTF-8 string.
*
* Results:
* As above.
@@ -1377,7 +1338,7 @@ Tcl_UtfToUpper(
src = dst = str;
while (*src) {
- len = TclUtfToUCS4(src, &ch);
+ len = Tcl_UtfToUniChar(src, &ch);
upChar = Tcl_UniCharToUpper(ch);
/*
@@ -1430,7 +1391,7 @@ Tcl_UtfToLower(
src = dst = str;
while (*src) {
- len = TclUtfToUCS4(src, &ch);
+ len = Tcl_UtfToUniChar(src, &ch);
lowChar = Tcl_UniCharToLower(ch);
/*
@@ -1486,7 +1447,7 @@ Tcl_UtfToTitle(
src = dst = str;
if (*src) {
- len = TclUtfToUCS4(src, &ch);
+ len = Tcl_UtfToUniChar(src, &ch);
titleChar = Tcl_UniCharToTitle(ch);
if (len < TclUtfCount(titleChar)) {
@@ -1498,7 +1459,7 @@ Tcl_UtfToTitle(
src += len;
}
while (*src) {
- len = TclUtfToUCS4(src, &ch);
+ len = Tcl_UtfToUniChar(src, &ch);
lowChar = ch;
/* Special exception for Georgian Asomtavruli chars, no titlecase. */
if ((unsigned)(lowChar - 0x1C90) >= 0x30) {
@@ -1605,16 +1566,6 @@ Tcl_UtfNcmp(
cs += TclUtfToUniChar(cs, &ch1);
ct += TclUtfToUniChar(ct, &ch2);
if (ch1 != ch2) {
-#if TCL_UTF_MAX < 4
- /* Surrogates always report higher than non-surrogates */
- if (((ch1 & 0xFC00) == 0xD800)) {
- if ((ch2 & 0xFC00) != 0xD800) {
- return ch1;
- }
- } else if ((ch2 & 0xFC00) == 0xD800) {
- return -ch2;
- }
-#endif
return (ch1 - ch2);
}
}
@@ -1656,16 +1607,6 @@ Tcl_UtfNcasecmp(
cs += TclUtfToUniChar(cs, &ch1);
ct += TclUtfToUniChar(ct, &ch2);
if (ch1 != ch2) {
-#if TCL_UTF_MAX < 4
- /* Surrogates always report higher than non-surrogates */
- if (((ch1 & 0xFC00) == 0xD800)) {
- if ((ch2 & 0xFC00) != 0xD800) {
- return ch1;
- }
- } else if ((ch2 & 0xFC00) == 0xD800) {
- return -ch2;
- }
-#endif
ch1 = Tcl_UniCharToLower(ch1);
ch2 = Tcl_UniCharToLower(ch2);
if (ch1 != ch2) {
@@ -1705,16 +1646,6 @@ TclUtfCmp(
cs += TclUtfToUniChar(cs, &ch1);
ct += TclUtfToUniChar(ct, &ch2);
if (ch1 != ch2) {
-#if TCL_UTF_MAX < 4
- /* Surrogates always report higher than non-surrogates */
- if (((ch1 & 0xFC00) == 0xD800)) {
- if ((ch2 & 0xFC00) != 0xD800) {
- return ch1;
- }
- } else if ((ch2 & 0xFC00) == 0xD800) {
- return -ch2;
- }
-#endif
return ch1 - ch2;
}
}
@@ -1751,16 +1682,6 @@ TclUtfCasecmp(
cs += TclUtfToUniChar(cs, &ch1);
ct += TclUtfToUniChar(ct, &ch2);
if (ch1 != ch2) {
-#if TCL_UTF_MAX < 4
- /* Surrogates always report higher than non-surrogates */
- if (((ch1 & 0xFC00) == 0xD800)) {
- if ((ch2 & 0xFC00) != 0xD800) {
- return ch1;
- }
- } else if ((ch2 & 0xFC00) == 0xD800) {
- return -ch2;
- }
-#endif
ch1 = Tcl_UniCharToLower(ch1);
ch2 = Tcl_UniCharToLower(ch2);
if (ch1 != ch2) {
@@ -1959,7 +1880,7 @@ TclUniCharNcmp(
const Tcl_UniChar *uct, /* Unicode string ucs is compared to. */
size_t numChars) /* Number of unichars to compare. */
{
-#if defined(WORDS_BIGENDIAN) && (TCL_UTF_MAX > 3)
+#if defined(WORDS_BIGENDIAN)
/*
* We are definitely on a big-endian machine; memcmp() is safe
*/
@@ -1973,14 +1894,6 @@ TclUniCharNcmp(
for ( ; numChars != 0; ucs++, uct++, numChars--) {
if (*ucs != *uct) {
-#if TCL_UTF_MAX < 4
- /* special case for handling upper surrogates */
- if (((*ucs & 0xFC00) == 0xD800) && ((*uct & 0xFC00) != 0xD800)) {
- return 1;
- } else if (((*uct & 0xFC00) == 0xD800)) {
- return -1;
- }
-#endif
return (*ucs - *uct);
}
}
@@ -2018,14 +1931,6 @@ TclUniCharNcasecmp(
Tcl_UniChar lct = Tcl_UniCharToLower(*uct);
if (lcs != lct) {
-#if TCL_UTF_MAX < 4
- /* special case for handling upper surrogates */
- if (((lcs & 0xFC00) == 0xD800) && ((lct & 0xFC00) != 0xD800)) {
- return 1;
- } else if (((lct & 0xFC00) == 0xD800)) {
- return -1;
- }
-#endif
return (lcs - lct);
}
}
@@ -2753,71 +2658,6 @@ TclUniCharMatch(
}
/*
- *---------------------------------------------------------------------------
- *
- * TclUtfToUCS4 --
- *
- * Extracts the 4-byte codepoint from the leading bytes of the
- * Modified UTF-8 string "src". This is a utility routine to
- * contain the surrogate gymnastics in one place.
- *
- * The caller must ensure that the source buffer is long enough that this
- * routine does not run off the end and dereference non-existent memory
- * looking for trail bytes. If the source buffer is known to be '\0'
- * terminated, this cannot happen. Otherwise, the caller should call
- * Tcl_UtfCharComplete() before calling this routine to ensure that
- * enough bytes remain in the string.
- *
- * Results:
- * Fills *usc4Ptr with the UCS4 code point and returns the number of bytes
- * consumed from the source string.
- *
- * Side effects:
- * None.
- *
- *---------------------------------------------------------------------------
- */
-
-#if TCL_UTF_MAX < 4
-int
-TclUtfToUCS4(
- const char *src, /* The UTF-8 string. */
- int *ucs4Ptr) /* Filled with the UCS4 codepoint represented
- * by the UTF-8 string. */
-{
- /* Make use of the #undef Tcl_UtfToUniChar above, which already handles UCS4. */
- return Tcl_UtfToUniChar(src, ucs4Ptr);
-}
-
-int
-TclUniCharToUCS4(
- const Tcl_UniChar *src, /* The Tcl_UniChar string. */
- int *ucs4Ptr) /* Filled with the UCS4 codepoint represented
- * by the Tcl_UniChar string. */
-{
- if (((src[0] & 0xFC00) == 0xD800) && ((src[1] & 0xFC00) == 0xDC00)) {
- *ucs4Ptr = (((src[0] & 0x3FF) << 10) | (src[1] & 0x3FF)) + 0x10000;
- return 2;
- }
- *ucs4Ptr = src[0];
- return 1;
-}
-
-const Tcl_UniChar *TclUCS4Prev(const Tcl_UniChar *src, const Tcl_UniChar *ptr) {
- if (src <= ptr + 1) {
- return ptr;
- }
- if (((src[-1] & 0xFC00) == 0xDC00) && ((src[-2] & 0xFC00) == 0xD800)) {
- return src - 2;
- }
- return src - 1;
-}
-
-
-
-#endif
-
-/*
* Local Variables:
* mode: c
* c-basic-offset: 4
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index 7bafdf7..485e65b 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -1672,7 +1672,7 @@ TclTrimRight(
pp = Tcl_UtfPrev(p, bytes);
do {
pp += pInc;
- pInc = TclUtfToUCS4(pp, &ch1);
+ pInc = Tcl_UtfToUniChar(pp, &ch1);
} while (pp + pInc < p);
/*
@@ -1680,7 +1680,7 @@ TclTrimRight(
*/
do {
- pInc = TclUtfToUCS4(q, &ch2);
+ pInc = Tcl_UtfToUniChar(q, &ch2);
if (ch1 == ch2) {
break;
@@ -1745,7 +1745,7 @@ TclTrimLeft(
*/
do {
- Tcl_Size pInc = TclUtfToUCS4(p, &ch1);
+ Tcl_Size pInc = Tcl_UtfToUniChar(p, &ch1);
const char *q = trim;
Tcl_Size bytesLeft = numTrim;
@@ -1754,7 +1754,7 @@ TclTrimLeft(
*/
do {
- Tcl_Size qInc = TclUtfToUCS4(q, &ch2);
+ Tcl_Size qInc = Tcl_UtfToUniChar(q, &ch2);
if (ch1 == ch2) {
break;
@@ -1821,7 +1821,7 @@ TclTrim(
if (numBytes > 0) {
int ch;
const char *first = bytes + trimLeft;
- bytes += TclUtfToUCS4(first, &ch);
+ bytes += Tcl_UtfToUniChar(first, &ch);
numBytes -= (bytes - first);
if (numBytes > 0) {
@@ -2151,7 +2151,7 @@ Tcl_StringCaseMatch(
ch2 = (int)
(nocase ? tolower(UCHAR(*pattern)) : UCHAR(*pattern));
} else {
- TclUtfToUCS4(pattern, &ch2);
+ Tcl_UtfToUniChar(pattern, &ch2);
if (nocase) {
ch2 = Tcl_UniCharToLower(ch2);
}
@@ -2167,7 +2167,7 @@ Tcl_StringCaseMatch(
if ((p != '[') && (p != '?') && (p != '\\')) {
if (nocase) {
while (*str) {
- charLen = TclUtfToUCS4(str, &ch1);
+ charLen = Tcl_UtfToUniChar(str, &ch1);
if (ch2==ch1 || ch2==Tcl_UniCharToLower(ch1)) {
break;
}
@@ -2181,7 +2181,7 @@ Tcl_StringCaseMatch(
*/
while (*str) {
- charLen = TclUtfToUCS4(str, &ch1);
+ charLen = Tcl_UtfToUniChar(str, &ch1);
if (ch2 == ch1) {
break;
}
@@ -2195,7 +2195,7 @@ Tcl_StringCaseMatch(
if (*str == '\0') {
return 0;
}
- str += TclUtfToUCS4(str, &ch1);
+ str += Tcl_UtfToUniChar(str, &ch1);
}
}
@@ -2206,7 +2206,7 @@ Tcl_StringCaseMatch(
if (p == '?') {
pattern++;
- str += TclUtfToUCS4(str, &ch1);
+ str += Tcl_UtfToUniChar(str, &ch1);
continue;
}
@@ -2225,7 +2225,7 @@ Tcl_StringCaseMatch(
(nocase ? tolower(UCHAR(*str)) : UCHAR(*str));
str++;
} else {
- str += TclUtfToUCS4(str, &ch1);
+ str += Tcl_UtfToUniChar(str, &ch1);
if (nocase) {
ch1 = Tcl_UniCharToLower(ch1);
}
@@ -2239,7 +2239,7 @@ Tcl_StringCaseMatch(
? tolower(UCHAR(*pattern)) : UCHAR(*pattern));
pattern++;
} else {
- pattern += TclUtfToUCS4(pattern, &startChar);
+ pattern += Tcl_UtfToUniChar(pattern, &startChar);
if (nocase) {
startChar = Tcl_UniCharToLower(startChar);
}
@@ -2254,7 +2254,7 @@ Tcl_StringCaseMatch(
? tolower(UCHAR(*pattern)) : UCHAR(*pattern));
pattern++;
} else {
- pattern += TclUtfToUCS4(pattern, &endChar);
+ pattern += Tcl_UtfToUniChar(pattern, &endChar);
if (nocase) {
endChar = Tcl_UniCharToLower(endChar);
}
@@ -2302,8 +2302,8 @@ Tcl_StringCaseMatch(
* each string match.
*/
- str += TclUtfToUCS4(str, &ch1);
- pattern += TclUtfToUCS4(pattern, &ch2);
+ str += Tcl_UtfToUniChar(str, &ch1);
+ pattern += Tcl_UtfToUniChar(pattern, &ch2);
if (nocase) {
if (Tcl_UniCharToLower(ch1) != Tcl_UniCharToLower(ch2)) {
return 0;
diff --git a/tests/utf.test b/tests/utf.test
index fec0ba4..b2e34c8 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -16,17 +16,6 @@ if {"::tcltest" ni [namespace children]} {
::tcltest::loadTestedCommands
catch [list package require -exact tcl::test [info patchlevel]]
-testConstraint ucs2 [expr {[format %c 0x010000] eq "\uFFFD"}]
-testConstraint fullutf [expr {[format %c 0x010000] ne "\uFFFD"}]
-testConstraint utf16 [expr {[string length [format %c 0x10000]] == 2}]
-testConstraint utf32 [expr {[testConstraint fullutf]
- && [string length [format %c 0x10000]] == 1}]
-
-testConstraint Uesc [expr {"\U0041" eq "A"}]
-testConstraint pre388 [expr {"\x741" eq "A"}]
-testConstraint pairsTo4bytes [expr {[llength [info commands teststringbytes]]
- && [string length [teststringbytes \uD83D\uDCA9]] == 4}]
-
testConstraint testbytestring [llength [info commands testbytestring]]
testConstraint testfindfirst [llength [info commands testfindfirst]]
testConstraint testfindlast [llength [info commands testfindlast]]
@@ -58,12 +47,9 @@ test utf-1.5 {Tcl_UniCharToUtf: overflowed Tcl_UniChar} testbytestring {
test utf-1.6 {Tcl_UniCharToUtf: negative Tcl_UniChar} testbytestring {
expr {[format %c -1] eq [testbytestring \xEF\xBF\xBD]}
} 1
-test utf-1.7.0 {Tcl_UniCharToUtf: 4 byte sequences} {fullutf testbytestring} {
+test utf-1.7 {Tcl_UniCharToUtf: 4 byte sequences} testbytestring {
expr {"\U014E4E" eq [testbytestring \xF0\x94\xB9\x8E]}
} 1
-test utf-1.7.1 {Tcl_UniCharToUtf: 4 byte sequences} {Uesc ucs2 testbytestring} {
- expr {"\U014E4E" eq [testbytestring \xF0\x94\xB9\x8E]}
-} 0
test utf-1.8 {Tcl_UniCharToUtf: 3 byte sequence, high surrogate} testbytestring {
expr {"\uD842" eq [testbytestring \xED\xA1\x82]}
} 1
@@ -76,13 +62,10 @@ test utf-1.10 {Tcl_UniCharToUtf: 3 byte sequence, high surrogate} testbytestring
test utf-1.11 {Tcl_UniCharToUtf: 3 byte sequence, low surrogate} testbytestring {
expr {[format %c 0xDC42] eq [testbytestring \xED\xB1\x82]}
} 1
-test utf-1.12 {Tcl_UniCharToUtf: 4 byte sequence, high/low surrogate} {pairsTo4bytes testbytestring} {
- expr {"\uD842\uDC42" eq [testbytestring \xF0\xA0\xA1\x82]}
-} 1
-test utf-1.13.0 {Tcl_UniCharToUtf: Invalid surrogate} Uesc {
+test utf-1.12 {Tcl_UniCharToUtf: Invalid surrogate} {
expr {"\UD842" eq "\uD842"}
} 1
-test utf-1.13.1 {Tcl_UniCharToUtf: Invalid surrogate} {fullutf testbytestring} {
+test utf-1.13 {Tcl_UniCharToUtf: Invalid surrogate} testbytestring {
expr {"\UD842" eq [testbytestring \xED\xA1\x82]}
} 1
test utf-1.14 {Tcl_UniCharToUtf: surrogate pairs from concat} {
@@ -126,22 +109,10 @@ test utf-2.6 {Tcl_UtfToUniChar: lead (3-byte) followed by 1 trail} testbytestrin
test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} testbytestring {
string length [testbytestring \xE4\xB9\x8E]
} 1
-test utf-2.8.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {ucs2 testbytestring} {
- string length [testbytestring \xF0\x90\x80\x80]
-} 2
-test utf-2.8.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} utf16 {
- string length 𐀀
-} 2
-test utf-2.8.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} utf32 {
+test utf-2.8 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {
string length 𐀀
} 1
-test utf-2.9.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {ucs2 testbytestring} {
- string length [testbytestring \xF4\x8F\xBF\xBF]
-} 2
-test utf-2.9.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} utf16 {
- string length \U10FFFF
-} 2
-test utf-2.9.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} utf32 {
+test utf-2.9 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {
string length \U10FFFF
} 1
test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} testbytestring {
@@ -192,10 +163,7 @@ test utf-4.10 {Tcl_NumUtfChars: #x00, calc len, overcomplete} {testnumutfchars t
test utf-4.11 {Tcl_NumUtfChars: 3 bytes of 4-byte UTF-8 characater} {testnumutfchars testbytestring} {
testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end-1
} 3
-test utf-4.12.0 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring ucs2} {
- testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end
-} 2
-test utf-4.12.1 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring utf32} {
+test utf-4.12.1 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring} {
testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end
} 1
test utf-4.13 {Tcl_NumUtfChars: end of string} {testnumutfchars testbytestring} {
@@ -244,10 +212,7 @@ test utf-6.9 {Tcl_UtfNext} {testutfnext testbytestring} {
test utf-6.10 {Tcl_UtfNext} {testutfnext testbytestring} {
testutfnext [testbytestring \xA0]G
} 1
-test utf-6.11.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
- testutfnext [testbytestring \xA0\xA0\x00]
-} 1
-test utf-6.11.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
+test utf-6.11.1 {Tcl_UtfNext} {testutfnext testbytestring} {
testutfnext [testbytestring \xA0\xA0\x00]
} 2
test utf-6.12 {Tcl_UtfNext} {testutfnext testbytestring} {
@@ -304,19 +269,13 @@ test utf-6.28 {Tcl_UtfNext} {testutfnext testbytestring} {
test utf-6.29 {Tcl_UtfNext} {testutfnext testbytestring} {
testutfnext [testbytestring \xE8\xF8]
} 1
-test utf-6.30.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
- testutfnext [testbytestring \xF2]
-} 1
-test utf-6.30.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
+test utf-6.30 {Tcl_UtfNext} {testutfnext testbytestring} {
testutfnext [testbytestring \xF2\x00]
} 1
test utf-6.31 {Tcl_UtfNext} {testutfnext testbytestring} {
testutfnext [testbytestring \xF2]G
} 1
-test utf-6.32.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
- testutfnext [testbytestring \xF2\xA0]
-} 1
-test utf-6.32.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
+test utf-6.32 {Tcl_UtfNext} {testutfnext testbytestring} {
testutfnext [testbytestring \xF2\xA0\x00]
} 1
test utf-6.33 {Tcl_UtfNext} {testutfnext testbytestring} {
@@ -427,10 +386,7 @@ test utf-6.67 {Tcl_UtfNext} {testutfnext testbytestring} {
test utf-6.68 {Tcl_UtfNext} {testutfnext testbytestring} {
testutfnext [testbytestring \xF2\xA0\xA0]G
} 1
-test utf-6.69.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
- testutfnext [testbytestring \xF2\xA0\xA0\xA0]
-} 1
-test utf-6.69.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
+test utf-6.69 {Tcl_UtfNext} {testutfnext testbytestring} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0]
} 4
test utf-6.70 {Tcl_UtfNext} {testutfnext testbytestring} {
@@ -445,40 +401,22 @@ test utf-6.72 {Tcl_UtfNext} {testutfnext testbytestring} {
test utf-6.73 {Tcl_UtfNext} {testutfnext testbytestring} {
testutfnext [testbytestring \xF2\xA0\xA0\xF8]
} 1
-test utf-6.74.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
- testutfnext [testbytestring \xF2\xA0\xA0\xA0]G
-} 1
-test utf-6.74.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
+test utf-6.74 {Tcl_UtfNext} {testutfnext testbytestring} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0]G
} 4
-test utf-6.75.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
- testutfnext [testbytestring \xF2\xA0\xA0\xA0\xA0]
-} 1
-test utf-6.75.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
+test utf-6.75 {Tcl_UtfNext} {testutfnext testbytestring} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0\xA0]
} 4
-test utf-6.76.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
- testutfnext [testbytestring \xF2\xA0\xA0\xA0\xD0]
-} 1
-test utf-6.76.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
+test utf-6.76 {Tcl_UtfNext} {testutfnext testbytestring} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0\xD0]
} 4
-test utf-6.77.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
- testutfnext [testbytestring \xF2\xA0\xA0\xA0\xE8]
-} 1
-test utf-6.77.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
+test utf-6.77 {Tcl_UtfNext} {testutfnext testbytestring} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0\xE8]
} 4
-test utf-6.78.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
- testutfnext [testbytestring \xF2\xA0\xA0\xA0\xF2]
-} 1
-test utf-6.78.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
+test utf-6.78 {Tcl_UtfNext} {testutfnext testbytestring} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0\xF2]
} 4
-test utf-6.79.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
- testutfnext [testbytestring \xF2\xA0\xA0\xA0G\xF8]
-} 1
-test utf-6.79.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
+test utf-6.79 {Tcl_UtfNext} {testutfnext testbytestring} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0G\xF8]
} 4
test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext {
@@ -502,55 +440,31 @@ test utf-6.85 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring} {
test utf-6.86 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring} {
testutfnext [testbytestring \xF0\x80\x80\x80]
} 1
-test utf-6.87.0 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring ucs2} {
- testutfnext [testbytestring \xF0\x90\x80\x80]
-} 1
-test utf-6.87.1 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring fullutf} {
+test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring} {
testutfnext [testbytestring \xF0\x90\x80\x80]
} 4
-test utf-6.88.0 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext testbytestring ucs2} {
- testutfnext [testbytestring \xA0\xA0\x00]
-} 1
-test utf-6.88.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext testbytestring fullutf} {
+test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext testbytestring} {
testutfnext [testbytestring \xA0\xA0\x00]
} 2
-test utf-6.89.0 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext testbytestring ucs2} {
- testutfnext [testbytestring \x80\x80\x00]
-} 1
-test utf-6.89.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext testbytestring fullutf} {
+test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext testbytestring} {
testutfnext [testbytestring \x80\x80\x00]
} 2
-test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbytestring ucs2} {
- testutfnext [testbytestring \xF4\x8F\xBF\xBF]
-} 1
-test utf-6.90.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbytestring fullutf} {
+test utf-6.90 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbytestring} {
testutfnext [testbytestring \xF4\x8F\xBF\xBF]
} 4
test utf-6.91 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbytestring} {
testutfnext [testbytestring \xF4\x90\x80\x80]
} 1
-test utf-6.92.0 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} {testutfnext testbytestring ucs2} {
- testutfnext [testbytestring \xA0\xA0\xA0]
-} 1
-test utf-6.92.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} {testutfnext testbytestring fullutf} {
+test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} {testutfnext testbytestring} {
testutfnext [testbytestring \xA0\xA0\xA0]
} 3
-test utf-6.93.0 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext testbytestring ucs2} {
- testutfnext [testbytestring \x80\x80\x80]
-} 1
-test utf-6.93.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext testbytestring fullutf} {
+test utf-6.93 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext testbytestring} {
testutfnext [testbytestring \x80\x80\x80]
} 3
-test utf-6.94.0 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring ucs2} {
- testutfnext [testbytestring \xA0\xA0\xA0\xA0]
-} 1
-test utf-6.94.1 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring fullutf} {
+test utf-6.94 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring} {
testutfnext [testbytestring \xA0\xA0\xA0\xA0]
} 3
-test utf-6.95.0 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring ucs2} {
- testutfnext [testbytestring \x80\x80\x80\x80]
-} 1
-test utf-6.95.1 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring fullutf} {
+test utf-6.95 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring} {
testutfnext [testbytestring \x80\x80\x80\x80]
} 3
@@ -617,22 +531,13 @@ test utf-7.9.1 {Tcl_UtfPrev} {testutfprev testbytestring} {
test utf-7.9.2 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF8\xA0\xF8\xA0] 3
} 2
-test utf-7.10.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} {
- testutfprev A[testbytestring \xF2\xA0]
-} 2
-test utf-7.10.1 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} {
+test utf-7.10.0 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF2\xA0]
} 1
-test utf-7.10.2 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} {
- testutfprev A[testbytestring \xF2\xA0\xA0\xA0] 3
-} 2
-test utf-7.10.3 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} {
+test utf-7.10.1 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF2\xA0\xA0\xA0] 3
} 1
-test utf-7.10.4 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} {
- testutfprev A[testbytestring \xF2\xA0\xF8\xA0] 3
-} 2
-test utf-7.10.5 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} {
+test utf-7.10.2 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF2\xA0\xF8\xA0] 3
} 1
test utf-7.11 {Tcl_UtfPrev} {testutfprev testbytestring} {
@@ -674,22 +579,13 @@ test utf-7.14.1 {Tcl_UtfPrev} {testutfprev testbytestring} {
test utf-7.14.2 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF8\xA0\xA0\xF8] 4
} 3
-test utf-7.15.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} {
- testutfprev A[testbytestring \xF2\xA0\xA0]
-} 3
-test utf-7.15.1 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} {
+test utf-7.15.0 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF2\xA0\xA0]
} 1
-test utf-7.15.2 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} {
- testutfprev A[testbytestring \xF2\xA0\xA0\xA0] 4
-} 3
-test utf-7.15.3 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} {
+test utf-7.15.1 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF2\xA0\xA0\xA0] 4
} 1
-test utf-7.15.4 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} {
- testutfprev A[testbytestring \xF2\xA0\xA0\xF8] 4
-} 3
-test utf-7.15.5 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} {
+test utf-7.15.2 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF2\xA0\xA0\xF8] 4
} 1
test utf-7.16 {Tcl_UtfPrev} testutfprev {
@@ -722,10 +618,7 @@ test utf-7.18.2 {Tcl_UtfPrev} {testutfprev testbytestring} {
test utf-7.19 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev [testbytestring A\xF8\xA0\xA0\xA0]
} 4
-test utf-7.20.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} {
- testutfprev [testbytestring A\xF2\xA0\xA0\xA0]
-} 4
-test utf-7.20.1 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} {
+test utf-7.20 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev [testbytestring A\xF2\xA0\xA0\xA0]
} 1
test utf-7.21 {Tcl_UtfPrev} {testutfprev testbytestring} {
@@ -788,22 +681,13 @@ test utf-7.37 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} {
test utf-7.38 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} {
testutfprev A[testbytestring \xE0\xA0\x80] 2
} 1
-test utf-7.39.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs2} {
- testutfprev A[testbytestring \xF0\x90\x80\x80]
-} 4
-test utf-7.39.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring fullutf} {
+test utf-7.39 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF0\x90\x80\x80]
} 1
-test utf-7.40.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs2} {
- testutfprev A[testbytestring \xF0\x90\x80\x80] 4
-} 3
-test utf-7.40.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring fullutf} {
+test utf-7.40 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF0\x90\x80\x80] 4
} 1
-test utf-7.41.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs2} {
- testutfprev A[testbytestring \xF0\x90\x80\x80] 3
-} 2
-test utf-7.41.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring fullutf} {
+test utf-7.41 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF0\x90\x80\x80] 3
} 1
test utf-7.42 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} {
@@ -830,25 +714,16 @@ test utf-7.47.1 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} tes
test utf-7.47.2 {Tcl_UtfPrev, pointing to 3th byte of 3-byte invalid sequence} {testutfprev testbytestring} {
testutfprev [testbytestring \xE8\xA0\x00] 2
} 0
-test utf-7.48.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs2} {
- testutfprev A[testbytestring \xF4\x8F\xBF\xBF]
-} 4
-test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring fullutf} {
+test utf-7.48.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring } {
testutfprev A[testbytestring \xF4\x8F\xBF\xBF]
} 1
-test utf-7.48.2 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs2} {
- testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 4
-} 3
-test utf-7.48.3 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring fullutf} {
+test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 4
} 1
-test utf-7.48.4 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs2} {
- testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 3
-} 2
-test utf-7.48.5 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring fullutf} {
+test utf-7.48.2 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 3
} 1
-test utf-7.48.6 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} {
+test utf-7.48.3 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 2
} 1
test utf-7.49.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} {
@@ -876,72 +751,30 @@ test utf-8.3 {Tcl_UniCharAtIndex: index > 0} {
test utf-8.4 {Tcl_UniCharAtIndex: index > 0} {
string index δΉŽΙšΓΏΥƒ 2
} ΓΏ
-test utf-8.5.0 {Tcl_UniCharAtIndex: high surrogate} ucs2 {
- string index \uD842 0
-} \uD842
-test utf-8.5.1 {Tcl_UniCharAtIndex: high surrogate} utf32 {
- string index \uD842 0
-} \uD842
-test utf-8.5.2 {Tcl_UniCharAtIndex: high surrogate} utf16 {
+test utf-8.5 {Tcl_UniCharAtIndex: high surrogate} {
string index \uD842 0
} \uD842
test utf-8.6 {Tcl_UniCharAtIndex: low surrogate} {
string index \uDC42 0
} \uDC42
-test utf-8.7.0 {Tcl_UniCharAtIndex: Emoji} ucs2 {
- string index \uD83D\uDE00G 0
-} \uD83D
-test utf-8.7.1 {Tcl_UniCharAtIndex: Emoji} utf32 {
- string index πŸ˜€G 0
-} πŸ˜€
-test utf-8.7.2 {Tcl_UniCharAtIndex: Emoji} utf16 {
+test utf-8.7 {Tcl_UniCharAtIndex: Emoji} {
string index πŸ˜€G 0
} πŸ˜€
-test utf-8.8.0 {Tcl_UniCharAtIndex: Emoji} ucs2 {
- string index \uD83D\uDE00G 1
-} \uDE00
-test utf-8.8.1 {Tcl_UniCharAtIndex: Emoji} utf32 {
- string index πŸ˜€G 1
-} G
-test utf-8.8.2 {Tcl_UniCharAtIndex: Emoji} utf16 {
+test utf-8.8 {Tcl_UniCharAtIndex: Emoji} {
string index πŸ˜€G 1
-} {}
-test utf-8.9.0 {Tcl_UniCharAtIndex: Emoji} ucs2 {
- string index \uD83D\uDE00G 2
} G
-test utf-8.9.1 {Tcl_UniCharAtIndex: Emoji} utf32 {
+test utf-8.9 {Tcl_UniCharAtIndex: Emoji} {
string index πŸ˜€G 2
} {}
-test utf-8.9.2 {Tcl_UniCharAtIndex: Emoji} utf16 {
- string index πŸ˜€G 2
-} G
-test utf-8.10.0 {Tcl_UniCharAtIndex: Emoji} ucs2 {
- string index πŸ˜€G 0
-} \uFFFD
-test utf-8.10.1 {Tcl_UniCharAtIndex: Emoji} utf32 {
+test utf-8.10 {Tcl_UniCharAtIndex: Emoji} {
string index πŸ˜€G 0
} πŸ˜€
-test utf-8.10.2 {Tcl_UniCharAtIndex: Emoji} utf16 {
- string index πŸ˜€G 0
-} πŸ˜€
-test utf-8.11.0 {Tcl_UniCharAtIndex: Emoji} ucs2 {
- string index πŸ˜€G 1
-} G
-test utf-8.11.1 {Tcl_UniCharAtIndex: Emoji} utf32 {
+test utf-8.11 {Tcl_UniCharAtIndex: Emoji} {
string index πŸ˜€G 1
} G
-test utf-8.11.2 {Tcl_UniCharAtIndex: Emoji} utf16 {
- string index πŸ˜€G 1
-} {}
-test utf-8.12.0 {Tcl_UniCharAtIndex: Emoji} ucs2 {
- string index πŸ˜€G 2
-} {}
-test utf-8.12.1 {Tcl_UniCharAtIndex: Emoji} utf32 {
+test utf-8.12 {Tcl_UniCharAtIndex: Emoji} {
string index πŸ˜€G 2
} {}
-test utf-8.12.2 {Tcl_UniCharAtIndex: Emoji} utf16 {
- string index πŸ˜€G 2
-} G
test utf-9.1 {Tcl_UtfAtIndex: index = 0} {
string range abcd 0 2
@@ -949,60 +782,24 @@ test utf-9.1 {Tcl_UtfAtIndex: index = 0} {
test utf-9.2 {Tcl_UtfAtIndex: index > 0} {
string range δΉŽΙšΓΏΥƒklmnop 1 5
} ΙšΓΏΥƒkl
-test utf-9.3.0 {Tcl_UtfAtIndex: index = 0, Emoji} ucs2 {
- string range \uD83D\uDE00G 0 0
-} \uD83D
-test utf-9.3.1 {Tcl_UtfAtIndex: index = 0, Emoji} utf32 {
+test utf-9.3 {Tcl_UtfAtIndex: index = 0, Emoji} {
string range πŸ˜€G 0 0
} πŸ˜€
-test utf-9.3.2 {Tcl_UtfAtIndex: index = 0, Emoji} utf16 {
- string range πŸ˜€G 0 0
-} πŸ˜€
-test utf-9.4.0 {Tcl_UtfAtIndex: index > 0, Emoji} ucs2 {
- string range \uD83D\uDE00G 1 1
-} \uDE00
-test utf-9.4.1 {Tcl_UtfAtIndex: index > 0, Emoji} utf32 {
- string range πŸ˜€G 1 1
-} G
-test utf-9.4.2 {Tcl_UtfAtIndex: index > 0, Emoji} utf16 {
+test utf-9.4 {Tcl_UtfAtIndex: index > 0, Emoji} {
string range πŸ˜€G 1 1
-} {}
-test utf-9.5.0 {Tcl_UtfAtIndex: index > 0, Emoji} ucs2 {
- string range \uD83D\uDE00G 2 2
} G
-test utf-9.5.1 {Tcl_UtfAtIndex: index > 0, Emoji} utf32 {
+test utf-9.5 {Tcl_UtfAtIndex: index > 0, Emoji} {
string range πŸ˜€G 2 2
} {}
-test utf-9.5.2 {Tcl_UtfAtIndex: index > 0, Emoji} utf16 {
- string range πŸ˜€G 2 2
-} G
-test utf-9.6.0 {Tcl_UtfAtIndex: index = 0, Emoji} ucs2 {
- string range πŸ˜€G 0 0
-} \uFFFD
-test utf-9.6.1 {Tcl_UtfAtIndex: index = 0, Emoji} utf32 {
+test utf-9.6 {Tcl_UtfAtIndex: index = 0, Emoji} {
string range πŸ˜€G 0 0
} πŸ˜€
-test utf-9.6.2 {Tcl_UtfAtIndex: index = 0, Emoji} utf16 {
- string range πŸ˜€G 0 0
-} πŸ˜€
-test utf-9.7.0 {Tcl_UtfAtIndex: index > 0, Emoji} ucs2 {
- string range πŸ˜€G 1 1
-} G
-test utf-9.7.1 {Tcl_UtfAtIndex: index > 0, Emoji} utf32 {
+test utf-9.7 {Tcl_UtfAtIndex: index > 0, Emoji} {
string range πŸ˜€G 1 1
} G
-test utf-9.7.2 {Tcl_UtfAtIndex: index > 0, Emoji} utf16 {
- string range πŸ˜€G 1 1
-} {}
-test utf-9.8.0 {Tcl_UtfAtIndex: index > 0, Emoji} ucs2 {
+test utf-9.8 {Tcl_UtfAtIndex: index > 0, Emoji} {
string range πŸ˜€G 2 2
} {}
-test utf-9.8.1 {Tcl_UtfAtIndex: index > 0, Emoji} utf32 {
- string range πŸ˜€G 2 2
-} {}
-test utf-9.8.2 {Tcl_UtfAtIndex: index > 0, Emoji} utf16 {
- string range πŸ˜€G 2 2
-} G
test utf-10.1 {Tcl_UtfBackslash: dst == NULL} {
set x \n
@@ -1020,10 +817,10 @@ test utf-10.4 {Tcl_UtfBackslash: stops at first non-hex} testbytestring {
test utf-10.5 {Tcl_UtfBackslash: stops after 4 hex chars} testbytestring {
expr {"\u4E216" eq "[testbytestring \xE4\xB8\xA1]6"}
} 1
-test utf-10.6 {Tcl_UtfBackslash: stops after 5 hex chars} {fullutf testbytestring} {
+test utf-10.6 {Tcl_UtfBackslash: stops after 5 hex chars} testbytestring {
expr {"\U1E2165" eq "[testbytestring \xF0\x9E\x88\x96]5"}
} 1
-test utf-10.7 {Tcl_UtfBackslash: stops after 6 hex chars} {fullutf testbytestring} {
+test utf-10.7 {Tcl_UtfBackslash: stops after 6 hex chars} testbytestring {
expr {"\U10E2165" eq "[testbytestring \xF4\x8E\x88\x96]5"}
} 1
@@ -1064,8 +861,7 @@ bsCheck \x 120
bsCheck \xa 10
bsCheck \xA 10
bsCheck \x41 65
-bsCheck \x541 65 pre388 ;# == \x41
-bsCheck \x541 84 !pre388 ;# == \x54 1
+bsCheck \x541 84
bsCheck \u 117
bsCheck \uk 117
bsCheck \u41 65
@@ -1074,25 +870,24 @@ bsCheck \uA 10
bsCheck \340 224
bsCheck \uA1 161
bsCheck \u4E21 20001
-bsCheck \741 225 pre388 ;# == \341
-bsCheck \741 60 !pre388 ;# == \74 1
+bsCheck \741 60
bsCheck \U 85
bsCheck \Uk 85
-bsCheck \U41 65 Uesc
-bsCheck \Ua 10 Uesc
-bsCheck \UA 10 Uesc
-bsCheck \UA1 161 Uesc
-bsCheck \U4E21 20001 Uesc
-bsCheck \U004E21 20001 Uesc
-bsCheck \U00004E21 20001 Uesc
-bsCheck \U0000004E21 78 Uesc
-bsCheck \U00110000 69632 fullutf
-bsCheck \U01100000 69632 fullutf
-bsCheck \U11000000 69632 fullutf
-bsCheck \U0010FFFF 1114111 fullutf
-bsCheck \U010FFFF0 1114111 fullutf
-bsCheck \U10FFFF00 1114111 fullutf
-bsCheck \UFFFFFFFF 1048575 fullutf
+bsCheck \U41 65
+bsCheck \Ua 10
+bsCheck \UA 10
+bsCheck \UA1 161
+bsCheck \U4E21 20001
+bsCheck \U004E21 20001
+bsCheck \U00004E21 20001
+bsCheck \U0000004E21 78
+bsCheck \U00110000 69632
+bsCheck \U01100000 69632
+bsCheck \U11000000 69632
+bsCheck \U0010FFFF 1114111
+bsCheck \U010FFFF0 1114111
+bsCheck \U10FFFF00 1114111
+bsCheck \UFFFFFFFF 1048575
test utf-11.1 {Tcl_UtfToUpper} {
string toupper {}
@@ -1109,13 +904,13 @@ test utf-11.4 {Tcl_UtfToUpper} {
test utf-11.5 {Tcl_UtfToUpper Georgian (new in Unicode 11)} {
string toupper აᲐ
} ᲐᲐ
-test utf-11.6 {Tcl_UtfToUpper beyond U+FFFF} fullutf {
+test utf-11.6 {Tcl_UtfToUpper beyond U+FFFF} {
string toupper 𐐨
} 𐐀
-test utf-11.7 {Tcl_UtfToUpper beyond U+FFFF} fullutf {
+test utf-11.7 {Tcl_UtfToUpper beyond U+FFFF} {
string toupper 𐐨
} 𐐀
-test utf-11.8 {Tcl_UtfToUpper low/high surrogate)} utf32 {
+test utf-11.8 {Tcl_UtfToUpper low/high surrogate)} {
string toupper \uDC24\uD824
} \uDC24\uD824
@@ -1134,13 +929,13 @@ test utf-12.4 {Tcl_UtfToLower} {
test utf-12.5 {Tcl_UtfToLower Georgian (new in Unicode 11)} {
string tolower აᲐ
} აა
-test utf-12.6 {Tcl_UtfToLower low/high surrogate)} utf32 {
+test utf-12.6 {Tcl_UtfToLower low/high surrogate)} {
string tolower \uDC24\uD824
} \uDC24\uD824
-test utf-12.7 {Tcl_UtfToLower beyond U+FFFF} fullutf {
+test utf-12.7 {Tcl_UtfToLower beyond U+FFFF} {
string tolower 𐐀
} 𐐨
-test utf-12.8 {Tcl_UtfToLower beyond U+FFFF} fullutf {
+test utf-12.8 {Tcl_UtfToLower beyond U+FFFF} {
string tolower 𐐀
} 𐐨
@@ -1162,13 +957,13 @@ test utf-13.5 {Tcl_UtfToTitle Georgian (new in Unicode 11)} {
test utf-13.6 {Tcl_UtfToTitle Georgian (new in Unicode 11)} {
string totitle Აა
} Აა
-test utf-13.7 {Tcl_UtfToTitle low/high surrogate)} utf32 {
+test utf-13.7 {Tcl_UtfToTitle low/high surrogate)} {
string totitle \uDC24\uD824
} \uDC24\uD824
-test utf-13.8 {Tcl_UtfToTitle beyond U+FFFF} fullutf {
+test utf-13.8 {Tcl_UtfToTitle beyond U+FFFF} {
string totitle 𐐨𐐀
} 𐐀𐐨
-test utf-13.9 {Tcl_UtfToTitle beyond U+FFFF} fullutf {
+test utf-13.9 {Tcl_UtfToTitle beyond U+FFFF} {
string totitle 𐐨𐐀
} 𐐀𐐨
@@ -1228,7 +1023,7 @@ test utf-19.1 {TclUniCharLen} -body {
unset -nocomplain foo
} -result {1 4}
-test utf-20.1 {TclUniCharNcmp} utf32 {
+test utf-20.1 {TclUniCharNcmp} {
string compare [string range [format %c 0xFFFF] 0 0] [string range [format %c 0x10000] 0 0]
} -1
test utf-20.2 {[4c591fa487] TclUniCharNcmp/TclUtfNcmp} {
@@ -1358,10 +1153,10 @@ UniCharCaseCmpTest < a b
UniCharCaseCmpTest > b a
UniCharCaseCmpTest > B a
UniCharCaseCmpTest > aBcB abca
-UniCharCaseCmpTest < \uFFFF [format %c 0x10000] utf32
-UniCharCaseCmpTest < \uFFFF \U10000 utf32
-UniCharCaseCmpTest > [format %c 0x10000] \uFFFF utf32
-UniCharCaseCmpTest > \U10000 \uFFFF utf32
+UniCharCaseCmpTest < \uFFFF [format %c 0x10000]
+UniCharCaseCmpTest < \uFFFF \U10000
+UniCharCaseCmpTest > [format %c 0x10000] \uFFFF
+UniCharCaseCmpTest > \U10000 \uFFFF
test utf-26.1 {Tcl_UniCharDString} -setup {
diff --git a/win/makefile.vc b/win/makefile.vc
index 7440bd5..1b587e0 100644
--- a/win/makefile.vc
+++ b/win/makefile.vc
@@ -52,7 +52,7 @@
# turn on the 64-bit compiler, if your SDK has it.
#
# Basic macros and options usable on the commandline (see rules.vc for more info):
-# OPTS=nomsvcrt,noembed,nothreads,pdbs,profile,static,symbols,thrdalloc,unchecked,utf16,none
+# OPTS=nomsvcrt,noembed,nothreads,pdbs,profile,static,symbols,thrdalloc,unchecked,none
# Sets special options for the core. The default is for none.
# Any combination of the above may be used (comma separated).
# 'none' will over-ride everything to nothing.
@@ -78,7 +78,6 @@
# unchecked = Allows a symbols build to not use the debug
# enabled runtime (msvcrt.dll not msvcrtd.dll
# or libcmt.lib not libcmtd.lib).
-# utf16 = Forces a build using UTF-16 representation internally.
#
# STATS=compdbg,memdbg,none
# Sets optional memory and bytecode compiler debugging code added
diff --git a/win/rules.vc b/win/rules.vc
index 87b6fa5..3a95aab 100644
--- a/win/rules.vc
+++ b/win/rules.vc
@@ -816,7 +816,6 @@ DOTSEPARATED=$(DOTSEPARATED:b=.)
# configuration (ignored for Tcl itself)
# _USE_64BIT_TIME_T - forces a build using 64-bit time_t for 32-bit build
# (CRT library should support this, not needed for Tcl 9.x)
-# TCL_UTF_MAX=3 - forces a build using UTF-16 internally (not recommended).
# Further, LINKERFLAGS are modified based on above.
# Default values for all the above
@@ -889,11 +888,6 @@ _USE_64BIT_TIME_T = 1
!endif
!endif
-!if [nmakehlp -f $(OPTS) "utf16"]
-!message *** Force UTF-16 internally
-TCL_UTF_MAX = 3
-!endif
-
# Yes, it's weird that the "symbols" option controls DEBUG and
# the "pdbs" option controls SYMBOLS. That's historical.
!if [nmakehlp -f $(OPTS) "symbols"]
@@ -1451,9 +1445,6 @@ OPTDEFINES = $(OPTDEFINES) /D_USE_64BIT_TIME_T=1
# _ATL_XP_TARGETING - Newer SDK's need this to build for XP
COMPILERFLAGS = /D_ATL_XP_TARGETING
!endif
-!if "$(TCL_UTF_MAX)" == "3"
-OPTDEFINES = $(OPTDEFINES) /DTCL_UTF_MAX=3
-!endif
!if "$(TCL_BUILD_FOR)" == "8"
OPTDEFINES = $(OPTDEFINES) /DTCL_MAJOR_VERSION=8
!endif