summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.github/workflows/linux-build.yml2
-rw-r--r--.github/workflows/win-build.yml4
-rw-r--r--generic/tcl.decls37
-rw-r--r--generic/tcl.h6
-rw-r--r--generic/tclBinary.c4
-rw-r--r--generic/tclCmdMZ.c120
-rw-r--r--generic/tclCompCmdsSZ.c2
-rw-r--r--generic/tclDecls.h70
-rw-r--r--generic/tclEncoding.c2
-rw-r--r--generic/tclExecute.c40
-rw-r--r--generic/tclIO.c4
-rw-r--r--generic/tclInt.h65
-rw-r--r--generic/tclObj.c2
-rw-r--r--generic/tclProc.c2
-rw-r--r--generic/tclRegexp.c6
-rw-r--r--generic/tclStringObj.c715
-rw-r--r--generic/tclStringRep.h11
-rw-r--r--generic/tclStubInit.c36
-rw-r--r--generic/tclTest.c7
-rw-r--r--generic/tclTestObj.c31
-rw-r--r--generic/tclUtf.c342
-rw-r--r--generic/tclUtil.c6
-rw-r--r--tests/obj.test4
-rw-r--r--tests/string.test2
-rw-r--r--tests/stringObj.test43
-rw-r--r--tests/utf.test46
-rw-r--r--win/makefile.vc4
-rw-r--r--win/rules.vc15
28 files changed, 1210 insertions, 418 deletions
diff --git a/.github/workflows/linux-build.yml b/.github/workflows/linux-build.yml
index 1549b5f..c365faa 100644
--- a/.github/workflows/linux-build.yml
+++ b/.github/workflows/linux-build.yml
@@ -7,7 +7,7 @@ jobs:
matrix:
cfgopt:
- ""
- - "CFLAGS=-DTCL_UTF_MAX=4"
+ - "CFLAGS=-DTCL_UTF_MAX=3"
- "CFLAGS=-DTCL_NO_DEPRECATED=1"
- "--disable-shared"
- "--enable-symbols"
diff --git a/.github/workflows/win-build.yml b/.github/workflows/win-build.yml
index dce303b..5b1b144 100644
--- a/.github/workflows/win-build.yml
+++ b/.github/workflows/win-build.yml
@@ -13,7 +13,7 @@ jobs:
matrix:
cfgopt:
- ""
- - "OPTS=utfmax"
+ - "OPTS=utf16"
- "CHECKS=nodep"
- "OPTS=static"
- "OPTS=symbols"
@@ -52,7 +52,7 @@ jobs:
matrix:
cfgopt:
- ""
- - "CFLAGS=-DTCL_UTF_MAX=4"
+ - "CFLAGS=-DTCL_UTF_MAX=3"
- "CFLAGS=-DTCL_NO_DEPRECATED=1"
- "--disable-shared"
- "--enable-symbols"
diff --git a/generic/tcl.decls b/generic/tcl.decls
index 3cf794e..f0718c1 100644
--- a/generic/tcl.decls
+++ b/generic/tcl.decls
@@ -1248,7 +1248,7 @@ declare 352 {
int Tcl_Char16Len(const unsigned short *uniStr)
}
declare 353 {deprecated {Use Tcl_UtfNcmp}} {
- int Tcl_UniCharNcmp(const Tcl_UniChar *ucs, const Tcl_UniChar *uct,
+ int Tcl_UniCharNcmp(const unsigned short *ucs, const unsigned short *uct,
unsigned long numChars)
}
declare 354 {
@@ -1338,10 +1338,10 @@ declare 377 {
void Tcl_RegExpGetInfo(Tcl_RegExp regexp, Tcl_RegExpInfo *infoPtr)
}
declare 378 {
- Tcl_Obj *Tcl_NewUnicodeObj(const Tcl_UniChar *unicode, int numChars)
+ Tcl_Obj *Tcl_NewUnicodeObj(const unsigned short *unicode, int numChars)
}
declare 379 {
- void Tcl_SetUnicodeObj(Tcl_Obj *objPtr, const Tcl_UniChar *unicode,
+ void Tcl_SetUnicodeObj(Tcl_Obj *objPtr, const unsigned short *unicode,
int numChars)
}
declare 380 {
@@ -1351,13 +1351,13 @@ declare 381 {
int Tcl_GetUniChar(Tcl_Obj *objPtr, int index)
}
declare 382 {deprecated {No longer in use, changed to macro}} {
- Tcl_UniChar *Tcl_GetUnicode(Tcl_Obj *objPtr)
+ unsigned short *Tcl_GetUnicode(Tcl_Obj *objPtr)
}
declare 383 {
Tcl_Obj *Tcl_GetRange(Tcl_Obj *objPtr, int first, int last)
}
-declare 384 {deprecated {Use Tcl_AppendStringsToObj}} {
- void Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr, const Tcl_UniChar *unicode,
+declare 384 {
+ void Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr, const unsigned short *unicode,
int length)
}
declare 385 {
@@ -1483,12 +1483,12 @@ declare 418 {
int Tcl_IsChannelExisting(const char *channelName)
}
declare 419 {deprecated {Use Tcl_UtfNcasecmp}} {
- int Tcl_UniCharNcasecmp(const Tcl_UniChar *ucs, const Tcl_UniChar *uct,
+ int Tcl_UniCharNcasecmp(const unsigned short *ucs, const unsigned short *uct,
unsigned long numChars)
}
declare 420 {deprecated {Use Tcl_StringCaseMatch}} {
- int Tcl_UniCharCaseMatch(const Tcl_UniChar *uniStr,
- const Tcl_UniChar *uniPattern, int nocase)
+ int Tcl_UniCharCaseMatch(const unsigned short *uniStr,
+ const unsigned short *uniPattern, int nocase)
}
declare 421 {
Tcl_HashEntry *Tcl_FindHashEntry(Tcl_HashTable *tablePtr, const void *key)
@@ -1541,7 +1541,7 @@ declare 433 {
# introduced in 8.4a3
declare 434 {
- Tcl_UniChar *Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr, int *lengthPtr)
+ unsigned short *Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr, int *lengthPtr)
}
# TIP#15 (math function introspection) dkf
@@ -2417,7 +2417,7 @@ declare 651 {
char *TclGetStringFromObj(Tcl_Obj *objPtr, size_t *lengthPtr)
}
declare 652 {
- Tcl_UniChar *TclGetUnicodeFromObj(Tcl_Obj *objPtr, size_t *lengthPtr)
+ unsigned short *TclGetUnicodeFromObj(Tcl_Obj *objPtr, size_t *lengthPtr)
}
declare 653 {
unsigned char *TclGetByteArrayFromObj(Tcl_Obj *objPtr, size_t *numBytesPtr)
@@ -2454,6 +2454,21 @@ declare 660 {
declare 668 {
int Tcl_UniCharLen(const int *uniStr)
}
+declare 669 {
+ int TclNumUtfChars(const char *src, int length)
+}
+declare 670 {
+ int TclGetCharLength(Tcl_Obj *objPtr)
+}
+declare 671 {
+ const char *TclUtfAtIndex(const char *src, int index)
+}
+declare 672 {
+ Tcl_Obj *TclGetRange(Tcl_Obj *objPtr, int first, int last)
+}
+declare 673 {
+ int TclGetUniChar(Tcl_Obj *objPtr, int index)
+}
# ----- BASELINE -- FOR -- 8.7.0 ----- #
diff --git a/generic/tcl.h b/generic/tcl.h
index eff58b3..e6cc4be 100644
--- a/generic/tcl.h
+++ b/generic/tcl.h
@@ -2142,7 +2142,11 @@ typedef struct Tcl_EncodingType {
*/
#ifndef TCL_UTF_MAX
-#define TCL_UTF_MAX 3
+# ifdef BUILD_tcl
+# define TCL_UTF_MAX 4
+# else
+# define TCL_UTF_MAX 3
+# endif
#endif
/*
diff --git a/generic/tclBinary.c b/generic/tclBinary.c
index 4717b05..bc17232 100644
--- a/generic/tclBinary.c
+++ b/generic/tclBinary.c
@@ -434,7 +434,7 @@ TclGetBytesFromObj(
irPtr = TclFetchInternalRep(objPtr, &tclByteArrayType);
baPtr = GET_BYTEARRAY(irPtr);
- nonbyte = Tcl_UtfAtIndex(Tcl_GetString(objPtr), baPtr->bad);
+ nonbyte = TclUtfAtIndex(Tcl_GetString(objPtr), baPtr->bad);
TclUtfToUCS4(nonbyte, &ucs4);
Tcl_SetObjResult(interp, Tcl_ObjPrintf(
@@ -473,7 +473,7 @@ Tcl_GetBytesFromObj(
irPtr = TclFetchInternalRep(objPtr, &tclByteArrayType);
baPtr = GET_BYTEARRAY(irPtr);
- nonbyte = Tcl_UtfAtIndex(Tcl_GetString(objPtr), baPtr->bad);
+ nonbyte = TclUtfAtIndex(Tcl_GetString(objPtr), baPtr->bad);
TclUtfToUCS4(nonbyte, &ucs4);
Tcl_SetObjResult(interp, Tcl_ObjPrintf(
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index f394035..29a73cf 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -256,7 +256,7 @@ Tcl_RegexpObjCmd(
*/
objPtr = objv[1];
- stringLength = Tcl_GetCharLength(objPtr);
+ stringLength = TclGetCharLength(objPtr);
if (startIndex) {
TclGetIntForIndexM(interp, startIndex, stringLength, &offset);
@@ -310,7 +310,7 @@ Tcl_RegexpObjCmd(
eflags = 0;
} else if (offset > stringLength) {
eflags = TCL_REG_NOTBOL;
- } else if (Tcl_GetUniChar(objPtr, offset-1) == '\n') {
+ } else if (TclGetUniChar(objPtr, offset-1) == '\n') {
eflags = 0;
} else {
eflags = TCL_REG_NOTBOL;
@@ -395,7 +395,7 @@ Tcl_RegexpObjCmd(
newPtr = Tcl_NewListObj(2, objs);
} else {
if ((i <= info.nsubs) && (info.matches[i].end > 0)) {
- newPtr = Tcl_GetRange(objPtr,
+ newPtr = TclGetRange(objPtr,
offset + info.matches[i].start,
offset + info.matches[i].end - 1);
} else {
@@ -581,7 +581,7 @@ Tcl_RegsubObjCmd(
objv += idx;
if (startIndex) {
- int stringLength = Tcl_GetCharLength(objv[1]);
+ int stringLength = TclGetCharLength(objv[1]);
TclGetIntForIndexM(interp, startIndex, stringLength, &offset);
Tcl_DecrRefCount(startIndex);
@@ -604,11 +604,11 @@ Tcl_RegsubObjCmd(
numMatches = 0;
nocase = (cflags & TCL_REG_NOCASE);
- strCmpFn = nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp;
+ strCmpFn = nocase ? TclUniCharNcasecmp : TclUniCharNcmp;
- wsrc = Tcl_GetUnicodeFromObj(objv[0], &slen);
- wstring = Tcl_GetUnicodeFromObj(objv[1], &wlen);
- wsubspec = Tcl_GetUnicodeFromObj(objv[2], &wsublen);
+ wsrc = TclGetUnicodeFromObj_(objv[0], &slen);
+ wstring = TclGetUnicodeFromObj_(objv[1], &wlen);
+ wsubspec = TclGetUnicodeFromObj_(objv[2], &wsublen);
wend = wstring + wlen - (slen ? slen - 1 : 0);
result = TCL_OK;
@@ -619,11 +619,11 @@ Tcl_RegsubObjCmd(
*/
if (wstring < wend) {
- resultPtr = Tcl_NewUnicodeObj(wstring, 0);
+ resultPtr = TclNewUnicodeObj(wstring, 0);
Tcl_IncrRefCount(resultPtr);
for (; wstring < wend; wstring++) {
- Tcl_AppendUnicodeToObj(resultPtr, wsubspec, wsublen);
- Tcl_AppendUnicodeToObj(resultPtr, wstring, 1);
+ TclAppendUnicodeToObj(resultPtr, wsubspec, wsublen);
+ TclAppendUnicodeToObj(resultPtr, wstring, 1);
numMatches++;
}
wlen = 0;
@@ -636,18 +636,18 @@ Tcl_RegsubObjCmd(
(slen==1 || (strCmpFn(wstring, wsrc,
(unsigned long) slen) == 0))) {
if (numMatches == 0) {
- resultPtr = Tcl_NewUnicodeObj(wstring, 0);
+ resultPtr = TclNewUnicodeObj(wstring, 0);
Tcl_IncrRefCount(resultPtr);
}
if (p != wstring) {
- Tcl_AppendUnicodeToObj(resultPtr, p, wstring - p);
+ TclAppendUnicodeToObj(resultPtr, p, wstring - p);
p = wstring + slen;
} else {
p += slen;
}
wstring = p - 1;
- Tcl_AppendUnicodeToObj(resultPtr, wsubspec, wsublen);
+ TclAppendUnicodeToObj(resultPtr, wsubspec, wsublen);
numMatches++;
}
}
@@ -699,14 +699,14 @@ Tcl_RegsubObjCmd(
} else {
objPtr = objv[1];
}
- wstring = Tcl_GetUnicodeFromObj(objPtr, &wlen);
+ wstring = TclGetUnicodeFromObj_(objPtr, &wlen);
if (objv[2] == objv[0]) {
subPtr = Tcl_DuplicateObj(objv[2]);
} else {
subPtr = objv[2];
}
if (!command) {
- wsubspec = Tcl_GetUnicodeFromObj(subPtr, &wsublen);
+ wsubspec = TclGetUnicodeFromObj_(subPtr, &wsublen);
}
result = TCL_OK;
@@ -742,7 +742,7 @@ Tcl_RegsubObjCmd(
break;
}
if (numMatches == 0) {
- resultPtr = Tcl_NewUnicodeObj(wstring, 0);
+ resultPtr = TclNewUnicodeObj(wstring, 0);
Tcl_IncrRefCount(resultPtr);
if (offset > 0) {
/*
@@ -750,7 +750,7 @@ Tcl_RegsubObjCmd(
* specified.
*/
- Tcl_AppendUnicodeToObj(resultPtr, wstring, offset);
+ TclAppendUnicodeToObj(resultPtr, wstring, offset);
}
}
numMatches++;
@@ -763,7 +763,7 @@ Tcl_RegsubObjCmd(
Tcl_RegExpGetInfo(regExpr, &info);
start = info.matches[0].start;
end = info.matches[0].end;
- Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, start);
+ TclAppendUnicodeToObj(resultPtr, wstring + offset, start);
/*
* In command-prefix mode, the substitutions are added as quoted
@@ -785,7 +785,7 @@ Tcl_RegsubObjCmd(
subStart = info.matches[idx].start;
subEnd = info.matches[idx].end;
if ((subStart >= 0) && (subEnd >= 0)) {
- args[idx + numParts] = Tcl_NewUnicodeObj(
+ args[idx + numParts] = TclNewUnicodeObj(
wstring + offset + subStart, subEnd - subStart);
} else {
TclNewObj(args[idx + numParts]);
@@ -826,7 +826,7 @@ Tcl_RegsubObjCmd(
* the user code.
*/
- wstring = Tcl_GetUnicodeFromObj(objPtr, &wlen);
+ wstring = TclGetUnicodeFromObj_(objPtr, &wlen);
offset += end;
if (end == 0 || start == end) {
@@ -838,7 +838,7 @@ Tcl_RegsubObjCmd(
*/
if (offset < wlen) {
- Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, 1);
+ TclAppendUnicodeToObj(resultPtr, wstring + offset, 1);
}
offset++;
}
@@ -867,7 +867,7 @@ Tcl_RegsubObjCmd(
idx = ch - '0';
} else if ((ch == '\\') || (ch == '&')) {
*wsrc = ch;
- Tcl_AppendUnicodeToObj(resultPtr, wfirstChar,
+ TclAppendUnicodeToObj(resultPtr, wfirstChar,
wsrc - wfirstChar + 1);
*wsrc = '\\';
wfirstChar = wsrc + 2;
@@ -881,7 +881,7 @@ Tcl_RegsubObjCmd(
}
if (wfirstChar != wsrc) {
- Tcl_AppendUnicodeToObj(resultPtr, wfirstChar,
+ TclAppendUnicodeToObj(resultPtr, wfirstChar,
wsrc - wfirstChar);
}
@@ -889,7 +889,7 @@ Tcl_RegsubObjCmd(
subStart = info.matches[idx].start;
subEnd = info.matches[idx].end;
if ((subStart >= 0) && (subEnd >= 0)) {
- Tcl_AppendUnicodeToObj(resultPtr,
+ TclAppendUnicodeToObj(resultPtr,
wstring + offset + subStart, subEnd - subStart);
}
}
@@ -901,7 +901,7 @@ Tcl_RegsubObjCmd(
}
if (wfirstChar != wsrc) {
- Tcl_AppendUnicodeToObj(resultPtr, wfirstChar, wsrc - wfirstChar);
+ TclAppendUnicodeToObj(resultPtr, wfirstChar, wsrc - wfirstChar);
}
if (end == 0) {
@@ -911,7 +911,7 @@ Tcl_RegsubObjCmd(
*/
if (offset < wlen) {
- Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, 1);
+ TclAppendUnicodeToObj(resultPtr, wstring + offset, 1);
}
offset++;
} else {
@@ -923,7 +923,7 @@ Tcl_RegsubObjCmd(
*/
if (offset < wlen) {
- Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, 1);
+ TclAppendUnicodeToObj(resultPtr, wstring + offset, 1);
}
offset++;
}
@@ -948,7 +948,7 @@ Tcl_RegsubObjCmd(
resultPtr = objv[1];
Tcl_IncrRefCount(resultPtr);
} else if (offset < wlen) {
- Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, wlen - offset);
+ TclAppendUnicodeToObj(resultPtr, wstring + offset, wlen - offset);
}
if (objc == 4) {
if (Tcl_ObjSetVar2(interp, objv[3], NULL, resultPtr,
@@ -1316,7 +1316,7 @@ StringFirstCmd(
}
if (objc == 4) {
- int size = Tcl_GetCharLength(objv[2]);
+ int size = TclGetCharLength(objv[2]);
if (TCL_OK != TclGetIntForIndexM(interp, objv[3], size - 1, &start)) {
return TCL_ERROR;
@@ -1360,7 +1360,7 @@ StringLastCmd(
}
if (objc == 4) {
- int size = Tcl_GetCharLength(objv[2]);
+ int size = TclGetCharLength(objv[2]);
if (TCL_OK != TclGetIntForIndexM(interp, objv[3], size - 1, &last)) {
return TCL_ERROR;
@@ -1406,13 +1406,13 @@ StringIndexCmd(
* Get the char length to calculate what 'end' means.
*/
- length = Tcl_GetCharLength(objv[1]);
+ length = TclGetCharLength(objv[1]);
if (TclGetIntForIndexM(interp, objv[2], length-1, &index) != TCL_OK) {
return TCL_ERROR;
}
if ((index >= 0) && (index < length)) {
- int ch = Tcl_GetUniChar(objv[1], index);
+ int ch = TclGetUniChar(objv[1], index);
if (ch == -1) {
return TCL_OK;
@@ -1474,7 +1474,7 @@ StringInsertCmd(
return TCL_ERROR;
}
- length = Tcl_GetCharLength(objv[1]);
+ length = TclGetCharLength(objv[1]);
if (TclGetIntForIndexM(interp, objv[2], length, &index) != TCL_OK) {
return TCL_ERROR;
}
@@ -1669,7 +1669,7 @@ StringIsCmd(
p++;
}
TclNewStringObj(tmpStr, string1, p-string1);
- failat = Tcl_GetCharLength(tmpStr);
+ failat = TclGetCharLength(tmpStr);
TclDecrRefCount(tmpStr);
break;
}
@@ -1849,7 +1849,7 @@ StringIsCmd(
p++;
}
TclNewStringObj(tmpStr, string1, p-string1);
- failat = Tcl_GetCharLength(tmpStr);
+ failat = TclGetCharLength(tmpStr);
TclDecrRefCount(tmpStr);
break;
}
@@ -2060,7 +2060,7 @@ StringMapCmd(
} else {
sourceObj = objv[objc-1];
}
- ustring1 = Tcl_GetUnicodeFromObj(sourceObj, &length1);
+ ustring1 = TclGetUnicodeFromObj_(sourceObj, &length1);
if (length1 == 0) {
/*
* Empty input string, just stop now.
@@ -2070,13 +2070,13 @@ StringMapCmd(
}
end = ustring1 + length1;
- strCmpFn = (nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp);
+ strCmpFn = (nocase ? TclUniCharNcasecmp : TclUniCharNcmp);
/*
* Force result to be Unicode
*/
- resultPtr = Tcl_NewUnicodeObj(ustring1, 0);
+ resultPtr = TclNewUnicodeObj(ustring1, 0);
if (mapElemc == 2) {
/*
@@ -2089,7 +2089,7 @@ StringMapCmd(
int mapLen, u2lc;
Tcl_UniChar *mapString;
- ustring2 = Tcl_GetUnicodeFromObj(mapElemv[0], &length2);
+ ustring2 = TclGetUnicodeFromObj_(mapElemv[0], &length2);
p = ustring1;
if ((length2 > length1) || (length2 == 0)) {
/*
@@ -2098,7 +2098,7 @@ StringMapCmd(
ustring1 = end;
} else {
- mapString = Tcl_GetUnicodeFromObj(mapElemv[1], &mapLen);
+ mapString = TclGetUnicodeFromObj_(mapElemv[1], &mapLen);
u2lc = (nocase ? Tcl_UniCharToLower(*ustring2) : 0);
for (; ustring1 < end; ustring1++) {
if (((*ustring1 == *ustring2) ||
@@ -2106,14 +2106,14 @@ StringMapCmd(
(length2==1 || strCmpFn(ustring1, ustring2,
(unsigned long) length2) == 0)) {
if (p != ustring1) {
- Tcl_AppendUnicodeToObj(resultPtr, p, ustring1-p);
+ TclAppendUnicodeToObj(resultPtr, p, ustring1-p);
p = ustring1 + length2;
} else {
p += length2;
}
ustring1 = p - 1;
- Tcl_AppendUnicodeToObj(resultPtr, mapString, mapLen);
+ TclAppendUnicodeToObj(resultPtr, mapString, mapLen);
}
}
}
@@ -2134,7 +2134,7 @@ StringMapCmd(
u2lc = (int *)TclStackAlloc(interp, mapElemc * sizeof(int));
}
for (index = 0; index < mapElemc; index++) {
- mapStrings[index] = Tcl_GetUnicodeFromObj(mapElemv[index],
+ mapStrings[index] = TclGetUnicodeFromObj_(mapElemv[index],
mapLens+index);
if (nocase && ((index % 2) == 0)) {
u2lc[index/2] = Tcl_UniCharToLower(*mapStrings[index]);
@@ -2158,7 +2158,7 @@ StringMapCmd(
* Put the skipped chars onto the result first.
*/
- Tcl_AppendUnicodeToObj(resultPtr, p, ustring1-p);
+ TclAppendUnicodeToObj(resultPtr, p, ustring1-p);
p = ustring1 + length2;
} else {
p += length2;
@@ -2174,7 +2174,7 @@ StringMapCmd(
* Append the map value to the unicode string.
*/
- Tcl_AppendUnicodeToObj(resultPtr,
+ TclAppendUnicodeToObj(resultPtr,
mapStrings[index+1], mapLens[index+1]);
break;
}
@@ -2191,7 +2191,7 @@ StringMapCmd(
* Put the rest of the unmapped chars onto result.
*/
- Tcl_AppendUnicodeToObj(resultPtr, p, ustring1 - p);
+ TclAppendUnicodeToObj(resultPtr, p, ustring1 - p);
}
Tcl_SetObjResult(interp, resultPtr);
done:
@@ -2293,7 +2293,7 @@ StringRangeCmd(
* 'end' refers to the last character, not one past it.
*/
- length = Tcl_GetCharLength(objv[1]) - 1;
+ length = TclGetCharLength(objv[1]) - 1;
if (TclGetIntForIndexM(interp, objv[2], length, &first) != TCL_OK ||
TclGetIntForIndexM(interp, objv[3], length, &last) != TCL_OK) {
@@ -2301,7 +2301,7 @@ StringRangeCmd(
}
if (last >= 0) {
- Tcl_SetObjResult(interp, Tcl_GetRange(objv[1], first, last));
+ Tcl_SetObjResult(interp, TclGetRange(objv[1], first, last));
}
return TCL_OK;
}
@@ -2394,7 +2394,7 @@ StringRplcCmd(
return TCL_ERROR;
}
- length = Tcl_GetCharLength(objv[1]);
+ length = TclGetCharLength(objv[1]);
end = length - 1;
if (TclGetIntForIndexM(interp, objv[2], end, &first) != TCL_OK ||
@@ -2506,7 +2506,7 @@ StringStartCmd(
return TCL_ERROR;
}
- string = Tcl_GetUnicodeFromObj(objv[1], &length);
+ string = TclGetUnicodeFromObj_(objv[1], &length);
if (TclGetIntForIndexM(interp, objv[2], length-1, &index) != TCL_OK) {
return TCL_ERROR;
}
@@ -2576,7 +2576,7 @@ StringEndCmd(
return TCL_ERROR;
}
- string = Tcl_GetUnicodeFromObj(objv[1], &length);
+ string = TclGetUnicodeFromObj_(objv[1], &length);
if (TclGetIntForIndexM(interp, objv[2], length-1, &index) != TCL_OK) {
return TCL_ERROR;
}
@@ -2880,7 +2880,7 @@ StringLenCmd(
return TCL_ERROR;
}
- Tcl_SetObjResult(interp, Tcl_NewWideIntObj(Tcl_GetCharLength(objv[1])));
+ Tcl_SetObjResult(interp, Tcl_NewWideIntObj(TclGetCharLength(objv[1])));
return TCL_OK;
}
@@ -2954,8 +2954,8 @@ StringLowerCmd(
}
string1 = TclGetStringFromObj(objv[1], &length1);
- start = Tcl_UtfAtIndex(string1, first);
- end = Tcl_UtfAtIndex(start, last - first + 1);
+ start = TclUtfAtIndex(string1, first);
+ end = TclUtfAtIndex(start, last - first + 1);
resultPtr = Tcl_NewStringObj(string1, end - string1);
string2 = TclGetString(resultPtr) + (start - string1);
@@ -3039,8 +3039,8 @@ StringUpperCmd(
}
string1 = TclGetStringFromObj(objv[1], &length1);
- start = Tcl_UtfAtIndex(string1, first);
- end = Tcl_UtfAtIndex(start, last - first + 1);
+ start = TclUtfAtIndex(string1, first);
+ end = TclUtfAtIndex(start, last - first + 1);
resultPtr = Tcl_NewStringObj(string1, end - string1);
string2 = TclGetString(resultPtr) + (start - string1);
@@ -3124,8 +3124,8 @@ StringTitleCmd(
}
string1 = TclGetStringFromObj(objv[1], &length1);
- start = Tcl_UtfAtIndex(string1, first);
- end = Tcl_UtfAtIndex(start, last - first + 1);
+ start = TclUtfAtIndex(string1, first);
+ end = TclUtfAtIndex(start, last - first + 1);
resultPtr = Tcl_NewStringObj(string1, end - string1);
string2 = TclGetString(resultPtr) + (start - string1);
@@ -3790,7 +3790,7 @@ TclNRSwitchObjCmd(
if (matchVarObj != NULL) {
Tcl_Obj *substringObj;
- substringObj = Tcl_GetRange(stringObj,
+ substringObj = TclGetRange(stringObj,
info.matches[j].start, info.matches[j].end-1);
/*
diff --git a/generic/tclCompCmdsSZ.c b/generic/tclCompCmdsSZ.c
index aa2d13e..62909eb 100644
--- a/generic/tclCompCmdsSZ.c
+++ b/generic/tclCompCmdsSZ.c
@@ -890,7 +890,7 @@ TclCompileStringLenCmd(
*/
char buf[TCL_INTEGER_SPACE];
- int len = Tcl_GetCharLength(objPtr);
+ int len = TclGetCharLength(objPtr);
len = sprintf(buf, "%d", len);
PushLiteral(envPtr, buf, len);
diff --git a/generic/tclDecls.h b/generic/tclDecls.h
index 790cddb..609a1f2 100644
--- a/generic/tclDecls.h
+++ b/generic/tclDecls.h
@@ -1066,8 +1066,8 @@ EXTERN int Tcl_UniCharIsWordChar(int ch);
EXTERN int Tcl_Char16Len(const unsigned short *uniStr);
/* 353 */
TCL_DEPRECATED("Use Tcl_UtfNcmp")
-int Tcl_UniCharNcmp(const Tcl_UniChar *ucs,
- const Tcl_UniChar *uct,
+int Tcl_UniCharNcmp(const unsigned short *ucs,
+ const unsigned short *uct,
unsigned long numChars);
/* 354 */
EXTERN char * Tcl_Char16ToUtfDString(const unsigned short *uniStr,
@@ -1142,24 +1142,23 @@ EXTERN int Tcl_RegExpExecObj(Tcl_Interp *interp,
EXTERN void Tcl_RegExpGetInfo(Tcl_RegExp regexp,
Tcl_RegExpInfo *infoPtr);
/* 378 */
-EXTERN Tcl_Obj * Tcl_NewUnicodeObj(const Tcl_UniChar *unicode,
+EXTERN Tcl_Obj * Tcl_NewUnicodeObj(const unsigned short *unicode,
int numChars);
/* 379 */
EXTERN void Tcl_SetUnicodeObj(Tcl_Obj *objPtr,
- const Tcl_UniChar *unicode, int numChars);
+ const unsigned short *unicode, int numChars);
/* 380 */
EXTERN int Tcl_GetCharLength(Tcl_Obj *objPtr);
/* 381 */
EXTERN int Tcl_GetUniChar(Tcl_Obj *objPtr, int index);
/* 382 */
TCL_DEPRECATED("No longer in use, changed to macro")
-Tcl_UniChar * Tcl_GetUnicode(Tcl_Obj *objPtr);
+unsigned short * Tcl_GetUnicode(Tcl_Obj *objPtr);
/* 383 */
EXTERN Tcl_Obj * Tcl_GetRange(Tcl_Obj *objPtr, int first, int last);
/* 384 */
-TCL_DEPRECATED("Use Tcl_AppendStringsToObj")
-void Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr,
- const Tcl_UniChar *unicode, int length);
+EXTERN void Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr,
+ const unsigned short *unicode, int length);
/* 385 */
EXTERN int Tcl_RegExpMatchObj(Tcl_Interp *interp,
Tcl_Obj *textObj, Tcl_Obj *patternObj);
@@ -1256,13 +1255,13 @@ EXTERN void Tcl_ClearChannelHandlers(Tcl_Channel channel);
EXTERN int Tcl_IsChannelExisting(const char *channelName);
/* 419 */
TCL_DEPRECATED("Use Tcl_UtfNcasecmp")
-int Tcl_UniCharNcasecmp(const Tcl_UniChar *ucs,
- const Tcl_UniChar *uct,
+int Tcl_UniCharNcasecmp(const unsigned short *ucs,
+ const unsigned short *uct,
unsigned long numChars);
/* 420 */
TCL_DEPRECATED("Use Tcl_StringCaseMatch")
-int Tcl_UniCharCaseMatch(const Tcl_UniChar *uniStr,
- const Tcl_UniChar *uniPattern, int nocase);
+int Tcl_UniCharCaseMatch(const unsigned short *uniStr,
+ const unsigned short *uniPattern, int nocase);
/* 421 */
EXTERN Tcl_HashEntry * Tcl_FindHashEntry(Tcl_HashTable *tablePtr,
const void *key);
@@ -1304,7 +1303,7 @@ EXTERN int Tcl_AttemptSetObjLength(Tcl_Obj *objPtr, int length);
/* 433 */
EXTERN Tcl_ThreadId Tcl_GetChannelThread(Tcl_Channel channel);
/* 434 */
-EXTERN Tcl_UniChar * Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr,
+EXTERN unsigned short * Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr,
int *lengthPtr);
/* 435 */
TCL_DEPRECATED("")
@@ -1930,7 +1929,7 @@ EXTERN unsigned char * Tcl_GetBytesFromObj(Tcl_Interp *interp,
EXTERN char * TclGetStringFromObj(Tcl_Obj *objPtr,
size_t *lengthPtr);
/* 652 */
-EXTERN Tcl_UniChar * TclGetUnicodeFromObj(Tcl_Obj *objPtr,
+EXTERN unsigned short * TclGetUnicodeFromObj(Tcl_Obj *objPtr,
size_t *lengthPtr);
/* 653 */
EXTERN unsigned char * TclGetByteArrayFromObj(Tcl_Obj *objPtr,
@@ -1963,6 +1962,16 @@ EXTERN int Tcl_AsyncMarkFromSignal(Tcl_AsyncHandler async,
/* Slot 667 is reserved */
/* 668 */
EXTERN int Tcl_UniCharLen(const int *uniStr);
+/* 669 */
+EXTERN int TclNumUtfChars(const char *src, int length);
+/* 670 */
+EXTERN int TclGetCharLength(Tcl_Obj *objPtr);
+/* 671 */
+EXTERN const char * TclUtfAtIndex(const char *src, int index);
+/* 672 */
+EXTERN Tcl_Obj * TclGetRange(Tcl_Obj *objPtr, int first, int last);
+/* 673 */
+EXTERN int TclGetUniChar(Tcl_Obj *objPtr, int index);
typedef struct {
const struct TclPlatStubs *tclPlatStubs;
@@ -2351,7 +2360,7 @@ typedef struct TclStubs {
int (*tcl_UniCharIsUpper) (int ch); /* 350 */
int (*tcl_UniCharIsWordChar) (int ch); /* 351 */
int (*tcl_Char16Len) (const unsigned short *uniStr); /* 352 */
- TCL_DEPRECATED_API("Use Tcl_UtfNcmp") int (*tcl_UniCharNcmp) (const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned long numChars); /* 353 */
+ TCL_DEPRECATED_API("Use Tcl_UtfNcmp") int (*tcl_UniCharNcmp) (const unsigned short *ucs, const unsigned short *uct, unsigned long numChars); /* 353 */
char * (*tcl_Char16ToUtfDString) (const unsigned short *uniStr, int uniLength, Tcl_DString *dsPtr); /* 354 */
unsigned short * (*tcl_UtfToChar16DString) (const char *src, int length, Tcl_DString *dsPtr); /* 355 */
Tcl_RegExp (*tcl_GetRegExpFromObj) (Tcl_Interp *interp, Tcl_Obj *patObj, int flags); /* 356 */
@@ -2376,13 +2385,13 @@ typedef struct TclStubs {
int (*tcl_UniCharIsPunct) (int ch); /* 375 */
int (*tcl_RegExpExecObj) (Tcl_Interp *interp, Tcl_RegExp regexp, Tcl_Obj *textObj, int offset, int nmatches, int flags); /* 376 */
void (*tcl_RegExpGetInfo) (Tcl_RegExp regexp, Tcl_RegExpInfo *infoPtr); /* 377 */
- Tcl_Obj * (*tcl_NewUnicodeObj) (const Tcl_UniChar *unicode, int numChars); /* 378 */
- void (*tcl_SetUnicodeObj) (Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int numChars); /* 379 */
+ Tcl_Obj * (*tcl_NewUnicodeObj) (const unsigned short *unicode, int numChars); /* 378 */
+ void (*tcl_SetUnicodeObj) (Tcl_Obj *objPtr, const unsigned short *unicode, int numChars); /* 379 */
int (*tcl_GetCharLength) (Tcl_Obj *objPtr); /* 380 */
int (*tcl_GetUniChar) (Tcl_Obj *objPtr, int index); /* 381 */
- TCL_DEPRECATED_API("No longer in use, changed to macro") Tcl_UniChar * (*tcl_GetUnicode) (Tcl_Obj *objPtr); /* 382 */
+ TCL_DEPRECATED_API("No longer in use, changed to macro") unsigned short * (*tcl_GetUnicode) (Tcl_Obj *objPtr); /* 382 */
Tcl_Obj * (*tcl_GetRange) (Tcl_Obj *objPtr, int first, int last); /* 383 */
- TCL_DEPRECATED_API("Use Tcl_AppendStringsToObj") void (*tcl_AppendUnicodeToObj) (Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int length); /* 384 */
+ void (*tcl_AppendUnicodeToObj) (Tcl_Obj *objPtr, const unsigned short *unicode, int length); /* 384 */
int (*tcl_RegExpMatchObj) (Tcl_Interp *interp, Tcl_Obj *textObj, Tcl_Obj *patternObj); /* 385 */
void (*tcl_SetNotifier) (const Tcl_NotifierProcs *notifierProcPtr); /* 386 */
Tcl_Mutex * (*tcl_GetAllocMutex) (void); /* 387 */
@@ -2417,8 +2426,8 @@ typedef struct TclStubs {
void (*tcl_SpliceChannel) (Tcl_Channel channel); /* 416 */
void (*tcl_ClearChannelHandlers) (Tcl_Channel channel); /* 417 */
int (*tcl_IsChannelExisting) (const char *channelName); /* 418 */
- TCL_DEPRECATED_API("Use Tcl_UtfNcasecmp") int (*tcl_UniCharNcasecmp) (const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned long numChars); /* 419 */
- TCL_DEPRECATED_API("Use Tcl_StringCaseMatch") int (*tcl_UniCharCaseMatch) (const Tcl_UniChar *uniStr, const Tcl_UniChar *uniPattern, int nocase); /* 420 */
+ TCL_DEPRECATED_API("Use Tcl_UtfNcasecmp") int (*tcl_UniCharNcasecmp) (const unsigned short *ucs, const unsigned short *uct, unsigned long numChars); /* 419 */
+ TCL_DEPRECATED_API("Use Tcl_StringCaseMatch") int (*tcl_UniCharCaseMatch) (const unsigned short *uniStr, const unsigned short *uniPattern, int nocase); /* 420 */
Tcl_HashEntry * (*tcl_FindHashEntry) (Tcl_HashTable *tablePtr, const void *key); /* 421 */
Tcl_HashEntry * (*tcl_CreateHashEntry) (Tcl_HashTable *tablePtr, const void *key, int *newPtr); /* 422 */
void (*tcl_InitCustomHashTable) (Tcl_HashTable *tablePtr, int keyType, const Tcl_HashKeyType *typePtr); /* 423 */
@@ -2432,7 +2441,7 @@ typedef struct TclStubs {
char * (*tcl_AttemptDbCkrealloc) (char *ptr, unsigned int size, const char *file, int line); /* 431 */
int (*tcl_AttemptSetObjLength) (Tcl_Obj *objPtr, int length); /* 432 */
Tcl_ThreadId (*tcl_GetChannelThread) (Tcl_Channel channel); /* 433 */
- Tcl_UniChar * (*tcl_GetUnicodeFromObj) (Tcl_Obj *objPtr, int *lengthPtr); /* 434 */
+ unsigned short * (*tcl_GetUnicodeFromObj) (Tcl_Obj *objPtr, int *lengthPtr); /* 434 */
TCL_DEPRECATED_API("") int (*tcl_GetMathFuncInfo) (Tcl_Interp *interp, const char *name, int *numArgsPtr, Tcl_ValueType **argTypesPtr, Tcl_MathProc **procPtr, ClientData *clientDataPtr); /* 435 */
TCL_DEPRECATED_API("") Tcl_Obj * (*tcl_ListMathFuncs) (Tcl_Interp *interp, const char *pattern); /* 436 */
Tcl_Obj * (*tcl_SubstObj) (Tcl_Interp *interp, Tcl_Obj *objPtr, int flags); /* 437 */
@@ -2650,7 +2659,7 @@ typedef struct TclStubs {
unsigned char * (*tclGetBytesFromObj) (Tcl_Interp *interp, Tcl_Obj *objPtr, int *numBytesPtr); /* 649 */
unsigned char * (*tcl_GetBytesFromObj) (Tcl_Interp *interp, Tcl_Obj *objPtr, size_t *numBytesPtr); /* 650 */
char * (*tclGetStringFromObj) (Tcl_Obj *objPtr, size_t *lengthPtr); /* 651 */
- Tcl_UniChar * (*tclGetUnicodeFromObj) (Tcl_Obj *objPtr, size_t *lengthPtr); /* 652 */
+ unsigned short * (*tclGetUnicodeFromObj) (Tcl_Obj *objPtr, size_t *lengthPtr); /* 652 */
unsigned char * (*tclGetByteArrayFromObj) (Tcl_Obj *objPtr, size_t *numBytesPtr); /* 653 */
int (*tcl_UtfCharComplete) (const char *src, int length); /* 654 */
const char * (*tcl_UtfNext) (const char *src); /* 655 */
@@ -2667,6 +2676,11 @@ typedef struct TclStubs {
void (*reserved666)(void);
void (*reserved667)(void);
int (*tcl_UniCharLen) (const int *uniStr); /* 668 */
+ int (*tclNumUtfChars) (const char *src, int length); /* 669 */
+ int (*tclGetCharLength) (Tcl_Obj *objPtr); /* 670 */
+ const char * (*tclUtfAtIndex) (const char *src, int index); /* 671 */
+ Tcl_Obj * (*tclGetRange) (Tcl_Obj *objPtr, int first, int last); /* 672 */
+ int (*tclGetUniChar) (Tcl_Obj *objPtr, int index); /* 673 */
} TclStubs;
extern const TclStubs *tclStubsPtr;
@@ -4028,6 +4042,16 @@ extern const TclStubs *tclStubsPtr;
/* Slot 667 is reserved */
#define Tcl_UniCharLen \
(tclStubsPtr->tcl_UniCharLen) /* 668 */
+#define TclNumUtfChars \
+ (tclStubsPtr->tclNumUtfChars) /* 669 */
+#define TclGetCharLength \
+ (tclStubsPtr->tclGetCharLength) /* 670 */
+#define TclUtfAtIndex \
+ (tclStubsPtr->tclUtfAtIndex) /* 671 */
+#define TclGetRange \
+ (tclStubsPtr->tclGetRange) /* 672 */
+#define TclGetUniChar \
+ (tclStubsPtr->tclGetUniChar) /* 673 */
#endif /* defined(USE_TCL_STUBS) */
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index fd0386c..2765f2a 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -1338,7 +1338,7 @@ Tcl_ExternalToUtf(
if (*dstCharsPtr <= maxChars) {
break;
}
- dstLen = Tcl_UtfAtIndex(dst, maxChars) - dst + (TCL_UTF_MAX - 1);
+ dstLen = TclUtfAtIndex(dst, maxChars) - dst + (TCL_UTF_MAX - 1);
*statePtr = savedState;
} while (1);
if (!noTerminate) {
diff --git a/generic/tclExecute.c b/generic/tclExecute.c
index 0ec2404..1f72d63 100644
--- a/generic/tclExecute.c
+++ b/generic/tclExecute.c
@@ -5244,7 +5244,7 @@ TEBCresume(
case INST_STR_LEN:
valuePtr = OBJ_AT_TOS;
- length = Tcl_GetCharLength(valuePtr);
+ length = TclGetCharLength(valuePtr);
TclNewIntObj(objResultPtr, length);
TRACE(("\"%.20s\" => %d\n", O2S(valuePtr), length));
NEXT_INST_F(1, 1, 1);
@@ -5310,7 +5310,7 @@ TEBCresume(
* Get char length to calulate what 'end' means.
*/
- length = Tcl_GetCharLength(valuePtr);
+ length = TclGetCharLength(valuePtr);
DECACHE_STACK_INFO();
if (TclGetIntForIndexM(interp, value2Ptr, length-1, &index)!=TCL_OK) {
CACHE_STACK_INFO();
@@ -5329,7 +5329,7 @@ TEBCresume(
valuePtr->bytes+index, 1);
} else {
char buf[4] = "";
- int ch = Tcl_GetUniChar(valuePtr, index);
+ int ch = TclGetUniChar(valuePtr, index);
/*
* This could be: Tcl_NewUnicodeObj((const Tcl_UniChar *)&ch, 1)
@@ -5353,7 +5353,7 @@ TEBCresume(
case INST_STR_RANGE:
TRACE(("\"%.20s\" %.20s %.20s =>",
O2S(OBJ_AT_DEPTH(2)), O2S(OBJ_UNDER_TOS), O2S(OBJ_AT_TOS)));
- length = Tcl_GetCharLength(OBJ_AT_DEPTH(2)) - 1;
+ length = TclGetCharLength(OBJ_AT_DEPTH(2)) - 1;
DECACHE_STACK_INFO();
if (TclGetIntForIndexM(interp, OBJ_UNDER_TOS, length,
@@ -5373,7 +5373,7 @@ TEBCresume(
if (toIdx < 0) {
TclNewObj(objResultPtr);
} else {
- objResultPtr = Tcl_GetRange(OBJ_AT_DEPTH(2), fromIdx, toIdx);
+ objResultPtr = TclGetRange(OBJ_AT_DEPTH(2), fromIdx, toIdx);
}
TRACE_APPEND(("\"%.30s\"\n", O2S(objResultPtr)));
NEXT_INST_V(1, 3, 1);
@@ -5382,7 +5382,7 @@ TEBCresume(
valuePtr = OBJ_AT_TOS;
fromIdx = TclGetInt4AtPtr(pc+1);
toIdx = TclGetInt4AtPtr(pc+5);
- length = Tcl_GetCharLength(valuePtr);
+ length = TclGetCharLength(valuePtr);
TRACE(("\"%.20s\" %d %d => ", O2S(valuePtr), fromIdx, toIdx));
/* Every range of an empty value is an empty value */
@@ -5414,7 +5414,7 @@ TEBCresume(
if (toIdx < 0) {
TclNewObj(objResultPtr);
} else {
- objResultPtr = Tcl_GetRange(valuePtr, fromIdx, toIdx);
+ objResultPtr = TclGetRange(valuePtr, fromIdx, toIdx);
}
}
TRACE_APPEND(("%.30s\n", O2S(objResultPtr)));
@@ -5428,7 +5428,7 @@ TEBCresume(
case INST_STR_REPLACE:
value3Ptr = POP_OBJECT();
valuePtr = OBJ_AT_DEPTH(2);
- endIdx = Tcl_GetCharLength(valuePtr) - 1;
+ endIdx = TclGetCharLength(valuePtr) - 1;
TRACE(("\"%.20s\" %s %s \"%.20s\" => ", O2S(valuePtr),
O2S(OBJ_UNDER_TOS), O2S(OBJ_AT_TOS), O2S(value3Ptr)));
DECACHE_STACK_INFO();
@@ -5495,12 +5495,12 @@ TEBCresume(
objResultPtr = value3Ptr;
goto doneStringMap;
}
- ustring1 = Tcl_GetUnicodeFromObj(valuePtr, &length);
+ ustring1 = TclGetUnicodeFromObj_(valuePtr, &length);
if (length == 0) {
objResultPtr = valuePtr;
goto doneStringMap;
}
- ustring2 = Tcl_GetUnicodeFromObj(value2Ptr, &length2);
+ ustring2 = TclGetUnicodeFromObj_(value2Ptr, &length2);
if (length2 > length || length2 == 0) {
objResultPtr = valuePtr;
goto doneStringMap;
@@ -5512,9 +5512,9 @@ TEBCresume(
}
goto doneStringMap;
}
- ustring3 = Tcl_GetUnicodeFromObj(value3Ptr, &length3);
+ ustring3 = TclGetUnicodeFromObj_(value3Ptr, &length3);
- objResultPtr = Tcl_NewUnicodeObj(ustring1, 0);
+ objResultPtr = TclNewUnicodeObj(ustring1, 0);
p = ustring1;
end = ustring1 + length;
for (; ustring1 < end; ustring1++) {
@@ -5524,14 +5524,14 @@ TEBCresume(
memcmp(ustring1, ustring2, sizeof(Tcl_UniChar) * length2)
== 0)) {
if (p != ustring1) {
- Tcl_AppendUnicodeToObj(objResultPtr, p, ustring1-p);
+ TclAppendUnicodeToObj(objResultPtr, p, ustring1-p);
p = ustring1 + length2;
} else {
p += length2;
}
ustring1 = p - 1;
- Tcl_AppendUnicodeToObj(objResultPtr, ustring3, length3);
+ TclAppendUnicodeToObj(objResultPtr, ustring3, length3);
}
}
if (p != ustring1) {
@@ -5539,7 +5539,7 @@ TEBCresume(
* Put the rest of the unmapped chars onto result.
*/
- Tcl_AppendUnicodeToObj(objResultPtr, p, ustring1 - p);
+ TclAppendUnicodeToObj(objResultPtr, p, ustring1 - p);
}
doneStringMap:
TRACE_WITH_OBJ(("%.20s %.20s %.20s => ",
@@ -5565,7 +5565,7 @@ TEBCresume(
valuePtr = OBJ_AT_TOS;
TRACE(("%s \"%.30s\" => ", tclStringClassTable[opnd].name,
O2S(valuePtr)));
- ustring1 = Tcl_GetUnicodeFromObj(valuePtr, &length);
+ ustring1 = TclGetUnicodeFromObj_(valuePtr, &length);
match = 1;
if (length > 0) {
int ch;
@@ -5592,12 +5592,12 @@ TEBCresume(
* both.
*/
- if (TclHasInternalRep(valuePtr, &tclStringType)
- || TclHasInternalRep(value2Ptr, &tclStringType)) {
+ if (TclHasInternalRep(valuePtr, &tclUniCharStringType)
+ || TclHasInternalRep(value2Ptr, &tclUniCharStringType)) {
Tcl_UniChar *ustring1, *ustring2;
- ustring1 = Tcl_GetUnicodeFromObj(valuePtr, &length);
- ustring2 = Tcl_GetUnicodeFromObj(value2Ptr, &length2);
+ ustring1 = TclGetUnicodeFromObj_(valuePtr, &length);
+ ustring2 = TclGetUnicodeFromObj_(value2Ptr, &length2);
match = TclUniCharMatch(ustring1, length, ustring2, length2,
nocase);
} else if (TclIsPureByteArray(valuePtr) && !nocase) {
diff --git a/generic/tclIO.c b/generic/tclIO.c
index 92bd91b..af14071 100644
--- a/generic/tclIO.c
+++ b/generic/tclIO.c
@@ -3556,7 +3556,7 @@ Tcl_Close(
result = flushcode;
}
if ((result != 0) && (result != TCL_ERROR) && (interp != NULL)
- && 0 == Tcl_GetCharLength(Tcl_GetObjResult(interp))) {
+ && 0 == TclGetCharLength(Tcl_GetObjResult(interp))) {
Tcl_SetErrno(result);
Tcl_SetObjResult(interp,
Tcl_NewStringObj(Tcl_PosixError(interp), -1));
@@ -6379,7 +6379,7 @@ ReadChars(
* bytes demanded by the Tcl_ExternalToUtf() call!
*/
- dstLimit = Tcl_UtfAtIndex(dst, charsToRead) - dst + (TCL_UTF_MAX - 1);
+ dstLimit = TclUtfAtIndex(dst, charsToRead) - dst + (TCL_UTF_MAX - 1);
statePtr->flags = savedFlags;
statePtr->inputEncodingFlags = savedIEFlags;
statePtr->inputEncodingState = savedState;
diff --git a/generic/tclInt.h b/generic/tclInt.h
index af839fc..eff8eab 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -2771,6 +2771,7 @@ MODULE_SCOPE const Tcl_ObjType tclListType;
MODULE_SCOPE const Tcl_ObjType tclDictType;
MODULE_SCOPE const Tcl_ObjType tclProcBodyType;
MODULE_SCOPE const Tcl_ObjType tclStringType;
+MODULE_SCOPE const Tcl_ObjType tclUniCharStringType;
MODULE_SCOPE const Tcl_ObjType tclEnsembleCmdType;
MODULE_SCOPE const Tcl_ObjType tclRegexpType;
MODULE_SCOPE Tcl_ObjType tclCmdNameType;
@@ -3322,6 +3323,44 @@ MODULE_SCOPE void TclErrorStackResetIf(Tcl_Interp *interp,
MODULE_SCOPE int TclZipfs_Init(Tcl_Interp *interp);
+#if TCL_UTF_MAX > 3
+ MODULE_SCOPE int *TclGetUnicodeFromObj_(Tcl_Obj *, int *);
+ MODULE_SCOPE Tcl_Obj *TclNewUnicodeObj(const int *, int);
+ MODULE_SCOPE void TclAppendUnicodeToObj(Tcl_Obj *, const int *, int);
+ MODULE_SCOPE int TclUniCharNcasecmp(const int *, const int *, unsigned long);
+ MODULE_SCOPE int TclUniCharCaseMatch(const int *, const int *, int);
+ MODULE_SCOPE int TclUniCharNcmp(const int *, const int *, unsigned long);
+# undef Tcl_NumUtfChars
+# define Tcl_NumUtfChars TclNumUtfChars
+# undef Tcl_GetCharLength
+# define Tcl_GetCharLength TclGetCharLength
+# undef Tcl_UtfAtIndex
+# define Tcl_UtfAtIndex TclUtfAtIndex
+# undef Tcl_GetRange
+# define Tcl_GetRange TclGetRange
+# undef Tcl_GetUniChar
+# define Tcl_GetUniChar TclGetUniChar
+#else
+# define tclUniCharStringType tclStringType
+# define TclGetUnicodeFromObj_ Tcl_GetUnicodeFromObj
+# define TclNewUnicodeObj Tcl_NewUnicodeObj
+# define TclAppendUnicodeToObj Tcl_AppendUnicodeToObj
+# define TclUniCharNcasecmp Tcl_UniCharNcasecmp
+# define TclUniCharCaseMatch Tcl_UniCharCaseMatch
+# define TclUniCharNcmp Tcl_UniCharNcmp
+# undef TclNumUtfChars
+# define TclNumUtfChars Tcl_NumUtfChars
+# undef TclGetCharLength
+# define TclGetCharLength Tcl_GetCharLength
+# undef TclUtfAtIndex
+# define TclUtfAtIndex Tcl_UtfAtIndex
+# undef TclGetRange
+# define TclGetRange Tcl_GetRange
+# undef TclGetUniChar
+# define TclGetUniChar Tcl_GetUniChar
+#endif
+
+
/*
* Many parsing tasks need a common definition of whitespace.
* Use this routine and macro to achieve that and place
@@ -4725,8 +4764,8 @@ MODULE_SCOPE const TclFileAttrProcs tclpFileAttrProcs[];
: Tcl_UtfToUniChar(str, chPtr))
#else
#define TclUtfToUniChar(str, chPtr) \
- ((((unsigned char) *(str)) < 0x80) ? \
- ((*(chPtr) = (unsigned char) *(str)), 1) \
+ (((UCHAR(*(str))) < 0x80) ? \
+ ((*(chPtr) = UCHAR(*(str))), 1) \
: Tcl_UtfToChar16(str, chPtr))
#endif
@@ -4742,14 +4781,14 @@ MODULE_SCOPE const TclFileAttrProcs tclpFileAttrProcs[];
*----------------------------------------------------------------
*/
-#define TclNumUtfChars(numChars, bytes, numBytes) \
+#define TclNumUtfCharsM(numChars, bytes, numBytes) \
do { \
int _count, _i = (numBytes); \
unsigned char *_str = (unsigned char *) (bytes); \
while (_i && (*_str < 0xC0)) { _i--; _str++; } \
_count = (numBytes) - _i; \
if (_i) { \
- _count += Tcl_NumUtfChars((bytes) + _count, _i); \
+ _count += TclNumUtfChars((bytes) + _count, _i); \
} \
(numChars) = _count; \
} while (0);
@@ -4780,24 +4819,6 @@ MODULE_SCOPE int TclIsPureByteArray(Tcl_Obj *objPtr);
/*
*----------------------------------------------------------------
- * Macro used by the Tcl core to compare Unicode strings. On big-endian
- * systems we can use the more efficient memcmp, but this would not be
- * lexically correct on little-endian systems. The ANSI C "prototype" for
- * this macro is:
- *
- * MODULE_SCOPE int TclUniCharNcmp(const Tcl_UniChar *cs,
- * const Tcl_UniChar *ct, unsigned long n);
- *----------------------------------------------------------------
- */
-
-#if defined(WORDS_BIGENDIAN) && (TCL_UTF_MAX > 3)
-# define TclUniCharNcmp(cs,ct,n) memcmp((cs),(ct),(n)*sizeof(Tcl_UniChar))
-#else /* !WORDS_BIGENDIAN */
-# define TclUniCharNcmp Tcl_UniCharNcmp
-#endif /* WORDS_BIGENDIAN */
-
-/*
- *----------------------------------------------------------------
* Macro used by the Tcl core to increment a namespace's export epoch
* counter. The ANSI C "prototype" for this macro is:
*
diff --git a/generic/tclObj.c b/generic/tclObj.c
index a06b8fd..15fe98f 100644
--- a/generic/tclObj.c
+++ b/generic/tclObj.c
@@ -387,7 +387,9 @@ TclInitObjSubsystem(void)
Tcl_RegisterObjType(&tclByteArrayType);
Tcl_RegisterObjType(&tclDoubleType);
+#if (TCL_UTF_MAX < 4) || !defined(TCL_NO_DEPRECATED)
Tcl_RegisterObjType(&tclStringType);
+#endif
Tcl_RegisterObjType(&tclListType);
Tcl_RegisterObjType(&tclDictType);
Tcl_RegisterObjType(&tclByteCodeType);
diff --git a/generic/tclProc.c b/generic/tclProc.c
index 45d1afd..75687f0 100644
--- a/generic/tclProc.c
+++ b/generic/tclProc.c
@@ -529,7 +529,7 @@ TclCreateProc(
"FORMALARGUMENTFORMAT", NULL);
goto procError;
}
- if ((fieldCount == 0) || (Tcl_GetCharLength(fieldValues[0]) == 0)) {
+ if ((fieldCount == 0) || (TclGetCharLength(fieldValues[0]) == 0)) {
Tcl_SetObjResult(interp, Tcl_NewStringObj(
"argument with no name", -1));
Tcl_SetErrorCode(interp, "TCL", "OPERATION", "PROC",
diff --git a/generic/tclRegexp.c b/generic/tclRegexp.c
index 8e588ac..ff7c72c 100644
--- a/generic/tclRegexp.c
+++ b/generic/tclRegexp.c
@@ -271,8 +271,8 @@ Tcl_RegExpRange(
} else {
string = regexpPtr->string;
}
- *startPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_so);
- *endPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_eo);
+ *startPtr = TclUtfAtIndex(string, regexpPtr->matches[index].rm_so);
+ *endPtr = TclUtfAtIndex(string, regexpPtr->matches[index].rm_eo);
}
}
@@ -482,7 +482,7 @@ Tcl_RegExpExecObj(
regexpPtr->string = NULL;
regexpPtr->objPtr = textObj;
- udata = Tcl_GetUnicodeFromObj(textObj, &length);
+ udata = TclGetUnicodeFromObj_(textObj, &length);
if (offset > length) {
offset = length;
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index 114e8a6..33f84bd 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -69,6 +69,12 @@ static void SetUnicodeObj(Tcl_Obj *objPtr,
const Tcl_UniChar *unicode, int numChars);
static int UnicodeLength(const Tcl_UniChar *unicode);
static void UpdateStringOfString(Tcl_Obj *objPtr);
+#if (TCL_UTF_MAX) > 3 && !defined(TCL_NO_DEPRECATED)
+static void DupUTF16StringInternalRep(Tcl_Obj *objPtr,
+ Tcl_Obj *copyPtr);
+static int SetUTF16StringFromAny(Tcl_Interp *interp, Tcl_Obj *objPtr);
+static void UpdateStringOfUTF16String(Tcl_Obj *objPtr);
+#endif
#define ISCONTINUATION(bytes) (\
((((bytes)[0] & 0xC0) == 0x80) || (((bytes)[0] == '\xED') \
@@ -80,6 +86,20 @@ static void UpdateStringOfString(Tcl_Obj *objPtr);
* functions that can be invoked by generic object code.
*/
+#if TCL_UTF_MAX < 4
+
+#define tclUniCharStringType tclStringType
+#define GET_UNICHAR_STRING GET_STRING
+#define UniCharString String
+#define UNICHAR_STRING_MAXCHARS STRING_MAXCHARS
+#define uniCharStringAlloc stringAlloc
+#define uniCharStringRealloc stringRealloc
+#define uniCharStringAttemptAlloc stringAttemptAlloc
+#define uniCharStringAttemptRealloc stringAttemptRealloc
+#define uniCharStringCheckLimits stringCheckLimits
+#define SET_UNICHAR_STRING SET_STRING
+#define UNICHAR_STRING_SIZE STRING_SIZE
+
const Tcl_ObjType tclStringType = {
"string", /* name */
FreeStringInternalRep, /* freeIntRepPro */
@@ -87,7 +107,149 @@ const Tcl_ObjType tclStringType = {
UpdateStringOfString, /* updateStringProc */
SetStringFromAny /* setFromAnyProc */
};
-
+
+#else
+
+#ifndef TCL_NO_DEPRECATED
+const Tcl_ObjType tclStringType = {
+ "string", /* name */
+ FreeStringInternalRep, /* freeIntRepPro */
+ DupUTF16StringInternalRep, /* dupIntRepProc */
+ UpdateStringOfUTF16String, /* updateStringProc */
+ SetUTF16StringFromAny /* setFromAnyProc */
+};
+#endif
+
+const Tcl_ObjType tclUniCharStringType = {
+ "utf32string", /* name */
+ FreeStringInternalRep, /* freeIntRepPro */
+ DupStringInternalRep, /* dupIntRepProc */
+ UpdateStringOfString, /* updateStringProc */
+ SetStringFromAny /* setFromAnyProc */
+};
+
+typedef struct {
+ int numChars; /* The number of chars in the string. -1 means
+ * this value has not been calculated. >= 0
+ * means that there is a valid Unicode rep, or
+ * that the number of UTF bytes == the number
+ * of chars. */
+ int allocated; /* The amount of space actually allocated for
+ * the UTF string (minus 1 byte for the
+ * termination char). */
+ int maxChars; /* Max number of chars that can fit in the
+ * space allocated for the unicode array. */
+ int hasUnicode; /* Boolean determining whether the string has
+ * a Unicode representation. */
+ Tcl_UniChar unicode[TCLFLEXARRAY]; /* The array of Unicode chars. The actual size
+ * of this field depends on the 'maxChars'
+ * field above. */
+} UniCharString;
+
+#define UNICHAR_STRING_MAXCHARS \
+ (int)(((size_t)UINT_MAX - offsetof(UniCharString, unicode))/sizeof(Tcl_UniChar) - 1)
+#define UNICHAR_STRING_SIZE(numChars) \
+ (offsetof(UniCharString, unicode) + sizeof(Tcl_UniChar) + ((numChars) * sizeof(Tcl_UniChar)))
+#define uniCharStringCheckLimits(numChars) \
+ do { \
+ if ((numChars) < 0 || (numChars) > UNICHAR_STRING_MAXCHARS) { \
+ Tcl_Panic("max length for a Tcl unicode value (%d chars) exceeded", \
+ UNICHAR_STRING_MAXCHARS); \
+ } \
+ } while (0)
+#define uniCharStringAttemptAlloc(numChars) \
+ (UniCharString *) attemptckalloc(UNICHAR_STRING_SIZE(numChars))
+#define uniCharStringAlloc(numChars) \
+ (UniCharString *) ckalloc(UNICHAR_STRING_SIZE(numChars))
+#define uniCharStringRealloc(ptr, numChars) \
+ (UniCharString *) ckrealloc((ptr), UNICHAR_STRING_SIZE(numChars))
+#define uniCharStringAttemptRealloc(ptr, numChars) \
+ (UniCharString *) attemptckrealloc((ptr), UNICHAR_STRING_SIZE(numChars))
+#define GET_UNICHAR_STRING(objPtr) \
+ ((UniCharString *) (objPtr)->internalRep.twoPtrValue.ptr1)
+#define SET_UNICHAR_STRING(objPtr, stringPtr) \
+ ((objPtr)->internalRep.twoPtrValue.ptr2 = NULL), \
+ ((objPtr)->internalRep.twoPtrValue.ptr1 = (void *) (stringPtr))
+
+
+#ifndef TCL_NO_DEPRECATED
+static void
+DupUTF16StringInternalRep(
+ Tcl_Obj *srcPtr, /* Object with internal rep to copy. Must have
+ * an internal rep of type "String". */
+ Tcl_Obj *copyPtr) /* Object with internal rep to set. Must not
+ * currently have an internal rep.*/
+{
+ String *srcStringPtr = GET_STRING(srcPtr);
+ size_t size = offsetof(String, unicode) + (((srcStringPtr->allocated) + 1U) * sizeof(unsigned short));
+ String *copyStringPtr = (String *)ckalloc(size);
+ memcpy(copyStringPtr, srcStringPtr, size);
+
+ SET_STRING(copyPtr, copyStringPtr);
+ copyPtr->typePtr = &tclStringType;
+}
+
+static int
+SetUTF16StringFromAny(
+ TCL_UNUSED(Tcl_Interp *),
+ Tcl_Obj *objPtr) /* The object to convert. */
+{
+ if (!TclHasInternalRep(objPtr, &tclStringType)) {
+ Tcl_DString ds;
+
+ /*
+ * Convert whatever we have into an untyped value. Just A String.
+ */
+
+ (void) TclGetString(objPtr);
+ TclFreeInternalRep(objPtr);
+
+ /*
+ * Create a basic String internalrep that just points to the UTF-8 string
+ * already in place at objPtr->bytes.
+ */
+
+ Tcl_DStringInit(&ds);
+ unsigned short *utf16string = Tcl_UtfToChar16DString(objPtr->bytes, objPtr->length, &ds);
+ int size = Tcl_DStringLength(&ds);
+ String *stringPtr = (String *)ckalloc((offsetof(String, unicode) + sizeof(unsigned short)) + size);
+
+ memcpy(stringPtr->unicode, utf16string, size);
+ Tcl_DStringFree(&ds);
+ size /= sizeof(unsigned short);
+ stringPtr->unicode[size] = 0;
+
+ stringPtr->numChars = size;
+ stringPtr->allocated = size;
+ stringPtr->maxChars = size;
+ stringPtr->hasUnicode = 1;
+ SET_STRING(objPtr, stringPtr);
+ objPtr->typePtr = &tclStringType;
+ }
+ return TCL_OK;
+}
+
+static void
+UpdateStringOfUTF16String(
+ Tcl_Obj *objPtr) /* Object with string rep to update. */
+{
+ Tcl_DString ds;
+ String *stringPtr = GET_STRING(objPtr);
+
+ Tcl_DStringInit(&ds);
+ const char *string = Tcl_Char16ToUtfDString(stringPtr->unicode, stringPtr->numChars, &ds);
+
+ char *bytes = (char *)ckalloc(Tcl_DStringLength(&ds) + 1U);
+ memcpy(bytes, string, Tcl_DStringLength(&ds));
+ bytes[Tcl_DStringLength(&ds)] = 0;
+ objPtr->bytes = bytes;
+ objPtr->length = Tcl_DStringLength(&ds);
+ Tcl_DStringFree(&ds);
+}
+#endif
+
+#endif
+
/*
* TCL STRING GROWTH ALGORITHM
*
@@ -138,7 +300,7 @@ GrowStringBuffer(
* flag || objPtr->bytes != NULL
*/
- String *stringPtr = GET_STRING(objPtr);
+ UniCharString *stringPtr = GET_UNICHAR_STRING(objPtr);
char *ptr = NULL;
int attempt;
@@ -185,10 +347,10 @@ GrowUnicodeBuffer(
* Pre-conditions:
* objPtr->typePtr == &tclStringType
* needed > stringPtr->maxChars
- * needed < STRING_MAXCHARS
+ * needed < UNICHAR_STRING_MAXCHARS
*/
- String *ptr = NULL, *stringPtr = GET_STRING(objPtr);
+ UniCharString *ptr = NULL, *stringPtr = GET_UNICHAR_STRING(objPtr);
int attempt;
if (stringPtr->maxChars > 0) {
@@ -196,9 +358,9 @@ GrowUnicodeBuffer(
* Subsequent appends - apply the growth algorithm.
*/
- if (needed <= STRING_MAXCHARS / 2) {
+ if (needed <= UNICHAR_STRING_MAXCHARS / 2) {
attempt = 2 * needed;
- ptr = stringAttemptRealloc(stringPtr, attempt);
+ ptr = uniCharStringAttemptRealloc(stringPtr, attempt);
}
if (ptr == NULL) {
/*
@@ -206,13 +368,13 @@ GrowUnicodeBuffer(
* overflow into invalid argument values for attempt.
*/
- unsigned int limit = STRING_MAXCHARS - needed;
+ unsigned int limit = UNICHAR_STRING_MAXCHARS - needed;
unsigned int extra = needed - stringPtr->numChars
+ TCL_MIN_UNICHAR_GROWTH;
int growth = (int) ((extra > limit) ? limit : extra);
attempt = needed + growth;
- ptr = stringAttemptRealloc(stringPtr, attempt);
+ ptr = uniCharStringAttemptRealloc(stringPtr, attempt);
}
}
if (ptr == NULL) {
@@ -221,11 +383,11 @@ GrowUnicodeBuffer(
*/
attempt = needed;
- ptr = stringRealloc(stringPtr, attempt);
+ ptr = uniCharStringRealloc(stringPtr, attempt);
}
stringPtr = ptr;
stringPtr->maxChars = attempt;
- SET_STRING(objPtr, stringPtr);
+ SET_UNICHAR_STRING(objPtr, stringPtr);
}
/*
@@ -374,7 +536,7 @@ Tcl_DbNewStringObj(
*/
Tcl_Obj *
-Tcl_NewUnicodeObj(
+TclNewUnicodeObj(
const Tcl_UniChar *unicode, /* The unicode string used to initialize the
* new object. */
int numChars) /* Number of characters in the unicode
@@ -387,6 +549,35 @@ Tcl_NewUnicodeObj(
return objPtr;
}
+#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED)
+Tcl_Obj *
+Tcl_NewUnicodeObj(
+ const unsigned short *unicode, /* The unicode string used to initialize the
+ * new object. */
+ int numChars) /* Number of characters in the unicode
+ * string. */
+{
+ Tcl_Obj *objPtr;
+
+ TclNewObj(objPtr);
+ TclInvalidateStringRep(objPtr);
+
+ String *stringPtr = (String *)ckalloc((offsetof(String, unicode)
+ + sizeof(unsigned short)) + numChars * sizeof(unsigned short));
+ memcpy(stringPtr->unicode, unicode, numChars);
+ stringPtr->unicode[numChars] = 0;
+
+ stringPtr->numChars = numChars;
+ stringPtr->allocated = numChars;
+ stringPtr->maxChars = numChars;
+ stringPtr->hasUnicode = 1;
+ SET_STRING(objPtr, stringPtr);
+ objPtr->typePtr = &tclStringType;
+
+ return objPtr;
+}
+#endif
+
/*
*----------------------------------------------------------------------
*
@@ -405,11 +596,11 @@ Tcl_NewUnicodeObj(
*/
int
-Tcl_GetCharLength(
+TclGetCharLength(
Tcl_Obj *objPtr) /* The String object to get the num chars
* of. */
{
- String *stringPtr;
+ UniCharString *stringPtr;
int numChars;
/*
@@ -444,7 +635,7 @@ Tcl_GetCharLength(
*/
SetStringFromAny(NULL, objPtr);
- stringPtr = GET_STRING(objPtr);
+ stringPtr = GET_UNICHAR_STRING(objPtr);
numChars = stringPtr->numChars;
/*
@@ -452,12 +643,52 @@ Tcl_GetCharLength(
*/
if (numChars == -1) {
- TclNumUtfChars(numChars, objPtr->bytes, objPtr->length);
+ TclNumUtfCharsM(numChars, objPtr->bytes, objPtr->length);
stringPtr->numChars = numChars;
}
return numChars;
}
+#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED)
+#undef Tcl_GetCharLength
+int
+Tcl_GetCharLength(
+ Tcl_Obj *objPtr) /* The String object to get the num chars
+ * of. */
+{
+ int numChars;
+
+ /*
+ * Quick, no-shimmer return for short string reps.
+ */
+
+ if ((objPtr->bytes) && (objPtr->length < 2)) {
+ /* 0 bytes -> 0 chars; 1 byte -> 1 char */
+ return objPtr->length;
+ }
+
+ /*
+ * Optimize the case where we're really dealing with a bytearray object;
+ * we don't need to convert to a string to perform the get-length operation.
+ *
+ * Starting in Tcl 8.7, we check for a "pure" bytearray, because the
+ * machinery behind that test is using a proper bytearray ObjType. We
+ * could also compute length of an improper bytearray without shimmering
+ * but there's no value in that. We *want* to shimmer an improper bytearray
+ * because improper bytearrays have worthless internal reps.
+ */
+
+ if (TclIsPureByteArray(objPtr)) {
+
+ (void) Tcl_GetByteArrayFromObj(objPtr, &numChars);
+ } else {
+ Tcl_GetString(objPtr);
+ numChars = Tcl_NumUtfChars(objPtr->bytes, objPtr->length);
+ }
+ return numChars;
+}
+#endif
+
/*
*----------------------------------------------------------------------
*
@@ -518,6 +749,8 @@ TclCheckEmptyString(
*----------------------------------------------------------------------
*/
+#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED)
+#undef Tcl_GetUniChar
int
Tcl_GetUniChar(
Tcl_Obj *objPtr, /* The object to get the Unicode charater
@@ -549,22 +782,78 @@ Tcl_GetUniChar(
* OK, need to work with the object as a string.
*/
- SetStringFromAny(NULL, objPtr);
+ SetUTF16StringFromAny(NULL, objPtr);
stringPtr = GET_STRING(objPtr);
+ if (index >= stringPtr->numChars) {
+ return -1;
+ }
+ ch = stringPtr->unicode[index];
+ /* See: bug [11ae2be95dac9417] */
+ if ((ch & 0xF800) == 0xD800) {
+ if (ch & 0x400) {
+ if ((index > 0)
+ && ((stringPtr->unicode[index-1] & 0xFC00) == 0xD800)) {
+ ch = -1; /* low surrogate preceded by high surrogate */
+ }
+ } else if ((++index < stringPtr->numChars)
+ && ((stringPtr->unicode[index] & 0xFC00) == 0xDC00)) {
+ /* high surrogate followed by low surrogate */
+ ch = (((ch & 0x3FF) << 10) |
+ (stringPtr->unicode[index] & 0x3FF)) + 0x10000;
+ }
+ }
+ return ch;
+}
+#endif
+
+int
+TclGetUniChar(
+ Tcl_Obj *objPtr, /* The object to get the Unicode charater
+ * from. */
+ int index) /* Get the index'th Unicode character. */
+{
+ UniCharString *stringPtr;
+ int ch, length;
+
+ if (index < 0) {
+ return -1;
+ }
+
+ /*
+ * Optimize the case where we're really dealing with a bytearray object
+ * we don't need to convert to a string to perform the indexing operation.
+ */
+
+ if (TclIsPureByteArray(objPtr)) {
+ unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &length);
+ if (index >= length) {
+ return -1;
+ }
+
+ return (int) bytes[index];
+ }
+
+ /*
+ * OK, need to work with the object as a string.
+ */
+
+ SetStringFromAny(NULL, objPtr);
+ stringPtr = GET_UNICHAR_STRING(objPtr);
+
if (stringPtr->hasUnicode == 0) {
/*
* If numChars is unknown, compute it.
*/
if (stringPtr->numChars == -1) {
- TclNumUtfChars(stringPtr->numChars, objPtr->bytes, objPtr->length);
+ TclNumUtfCharsM(stringPtr->numChars, objPtr->bytes, objPtr->length);
}
if (stringPtr->numChars == objPtr->length) {
return (unsigned char) objPtr->bytes[index];
}
FillUnicodeRep(objPtr);
- stringPtr = GET_STRING(objPtr);
+ stringPtr = GET_UNICHAR_STRING(objPtr);
}
if (index >= stringPtr->numChars) {
@@ -612,12 +901,12 @@ Tcl_GetUniChar(
#undef Tcl_GetUnicodeFromObj
#ifndef TCL_NO_DEPRECATED
#undef Tcl_GetUnicode
-Tcl_UniChar *
+unsigned short *
Tcl_GetUnicode(
Tcl_Obj *objPtr) /* The object to find the unicode string
* for. */
{
- return Tcl_GetUnicodeFromObj(objPtr, (int *)NULL);
+ return TclGetUnicodeFromObj(objPtr, NULL);
}
#endif /* TCL_NO_DEPRECATED */
@@ -641,21 +930,21 @@ Tcl_GetUnicode(
*/
Tcl_UniChar *
-Tcl_GetUnicodeFromObj(
+TclGetUnicodeFromObj_(
Tcl_Obj *objPtr, /* The object to find the unicode string
* for. */
int *lengthPtr) /* If non-NULL, the location where the string
* rep's unichar length should be stored. If
* NULL, no length is stored. */
{
- String *stringPtr;
+ UniCharString *stringPtr;
SetStringFromAny(NULL, objPtr);
- stringPtr = GET_STRING(objPtr);
+ stringPtr = GET_UNICHAR_STRING(objPtr);
if (stringPtr->hasUnicode == 0) {
FillUnicodeRep(objPtr);
- stringPtr = GET_STRING(objPtr);
+ stringPtr = GET_UNICHAR_STRING(objPtr);
}
if (lengthPtr != NULL) {
@@ -663,7 +952,29 @@ Tcl_GetUnicodeFromObj(
}
return stringPtr->unicode;
}
-Tcl_UniChar *
+
+#if TCL_UTF_MAX > 3 && !defined(TCL_NO_DEPRECATED)
+unsigned short *
+Tcl_GetUnicodeFromObj(
+ Tcl_Obj *objPtr, /* The object to find the unicode string
+ * for. */
+ int *lengthPtr) /* If non-NULL, the location where the string
+ * rep's unichar length should be stored. If
+ * NULL, no length is stored. */
+{
+ String *stringPtr;
+
+ SetUTF16StringFromAny(NULL, objPtr);
+ stringPtr = GET_STRING(objPtr);
+
+ if (lengthPtr != NULL) {
+ *lengthPtr = stringPtr->numChars;
+ }
+ return stringPtr->unicode;
+}
+#endif
+
+unsigned short *
TclGetUnicodeFromObj(
Tcl_Obj *objPtr, /* The object to find the unicode string
* for. */
@@ -676,17 +987,8 @@ TclGetUnicodeFromObj(
SetStringFromAny(NULL, objPtr);
stringPtr = GET_STRING(objPtr);
- if (stringPtr->hasUnicode == 0) {
- FillUnicodeRep(objPtr);
- stringPtr = GET_STRING(objPtr);
- }
-
if (lengthPtr != NULL) {
-#if TCL_MAJOR_VERSION > 8
*lengthPtr = stringPtr->numChars;
-#else
- *lengthPtr = ((size_t)(unsigned)(stringPtr->numChars + 1)) - 1;
-#endif
}
return stringPtr->unicode;
}
@@ -709,6 +1011,8 @@ TclGetUnicodeFromObj(
*----------------------------------------------------------------------
*/
+#if TCL_UTF_MAX > 3 && !defined(TCL_NO_DEPRECATED)
+#undef Tcl_GetRange
Tcl_Obj *
Tcl_GetRange(
Tcl_Obj *objPtr, /* The Tcl object to find the range of. */
@@ -716,7 +1020,53 @@ Tcl_GetRange(
int last) /* Last index of the range. */
{
Tcl_Obj *newObjPtr; /* The Tcl object to find the range of. */
- String *stringPtr;
+ int length;
+
+ if (first < 0) {
+ first = 0;
+ }
+
+ /*
+ * Optimize the case where we're really dealing with a bytearray object
+ * we don't need to convert to a string to perform the substring operation.
+ */
+
+ if (TclIsPureByteArray(objPtr)) {
+ unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &length);
+
+ if (last < 0 || last >= length) {
+ last = length - 1;
+ }
+ if (last < first) {
+ TclNewObj(newObjPtr);
+ return newObjPtr;
+ }
+ return Tcl_NewByteArrayObj(bytes + first, last - first + 1);
+ }
+
+ int numChars = Tcl_NumUtfChars(objPtr->bytes, objPtr->length);
+
+ if (last >= numChars) {
+ last = numChars - 1;
+ }
+ if (last < first) {
+ TclNewObj(newObjPtr);
+ return newObjPtr;
+ }
+ const char *begin = Tcl_UtfAtIndex(objPtr->bytes, first);
+ const char *end = Tcl_UtfAtIndex(objPtr->bytes, last + 1);
+ return Tcl_NewStringObj(begin, end - begin);
+}
+#endif
+
+Tcl_Obj *
+TclGetRange(
+ Tcl_Obj *objPtr, /* The Tcl object to find the range of. */
+ int first, /* First index of the range. */
+ int last) /* Last index of the range. */
+{
+ Tcl_Obj *newObjPtr; /* The Tcl object to find the range of. */
+ UniCharString *stringPtr;
int length;
if (first < 0) {
@@ -746,7 +1096,7 @@ Tcl_GetRange(
*/
SetStringFromAny(NULL, objPtr);
- stringPtr = GET_STRING(objPtr);
+ stringPtr = GET_UNICHAR_STRING(objPtr);
if (stringPtr->hasUnicode == 0) {
/*
@@ -754,7 +1104,7 @@ Tcl_GetRange(
*/
if (stringPtr->numChars == -1) {
- TclNumUtfChars(stringPtr->numChars, objPtr->bytes, objPtr->length);
+ TclNumUtfCharsM(stringPtr->numChars, objPtr->bytes, objPtr->length);
}
if (stringPtr->numChars == objPtr->length) {
if (last < 0 || last >= stringPtr->numChars) {
@@ -771,12 +1121,12 @@ Tcl_GetRange(
*/
SetStringFromAny(NULL, newObjPtr);
- stringPtr = GET_STRING(newObjPtr);
+ stringPtr = GET_UNICHAR_STRING(newObjPtr);
stringPtr->numChars = newObjPtr->length;
return newObjPtr;
}
FillUnicodeRep(objPtr);
- stringPtr = GET_STRING(objPtr);
+ stringPtr = GET_UNICHAR_STRING(objPtr);
}
if (last < 0 || last >= stringPtr->numChars) {
last = stringPtr->numChars - 1;
@@ -797,7 +1147,7 @@ Tcl_GetRange(
++last;
}
#endif
- return Tcl_NewUnicodeObj(stringPtr->unicode + first, last - first + 1);
+ return TclNewUnicodeObj(stringPtr->unicode + first, last - first + 1);
}
/*
@@ -883,7 +1233,7 @@ Tcl_SetObjLength(
* representation of object, not including
* terminating null byte. */
{
- String *stringPtr;
+ UniCharString *stringPtr;
if (length < 0) {
/*
@@ -903,7 +1253,7 @@ Tcl_SetObjLength(
}
SetStringFromAny(NULL, objPtr);
- stringPtr = GET_STRING(objPtr);
+ stringPtr = GET_UNICHAR_STRING(objPtr);
if (objPtr->bytes != NULL) {
/*
@@ -935,10 +1285,10 @@ Tcl_SetObjLength(
* Changing length of pure unicode string.
*/
- stringCheckLimits(length);
+ uniCharStringCheckLimits(length);
if (length > stringPtr->maxChars) {
- stringPtr = stringRealloc(stringPtr, length);
- SET_STRING(objPtr, stringPtr);
+ stringPtr = uniCharStringRealloc(stringPtr, length);
+ SET_UNICHAR_STRING(objPtr, stringPtr);
stringPtr->maxChars = length;
}
@@ -988,7 +1338,7 @@ Tcl_AttemptSetObjLength(
* representation of object, not including
* terminating null byte. */
{
- String *stringPtr;
+ UniCharString *stringPtr;
if (length < 0) {
/*
@@ -1006,7 +1356,7 @@ Tcl_AttemptSetObjLength(
}
SetStringFromAny(NULL, objPtr);
- stringPtr = GET_STRING(objPtr);
+ stringPtr = GET_UNICHAR_STRING(objPtr);
if (objPtr->bytes != NULL) {
/*
@@ -1045,15 +1395,15 @@ Tcl_AttemptSetObjLength(
* Changing length of pure unicode string.
*/
- if (length > STRING_MAXCHARS) {
+ if (length > UNICHAR_STRING_MAXCHARS) {
return 0;
}
if (length > stringPtr->maxChars) {
- stringPtr = stringAttemptRealloc(stringPtr, length);
+ stringPtr = uniCharStringAttemptRealloc(stringPtr, length);
if (stringPtr == NULL) {
return 0;
}
- SET_STRING(objPtr, stringPtr);
+ SET_UNICHAR_STRING(objPtr, stringPtr);
stringPtr->maxChars = length;
}
@@ -1089,20 +1439,47 @@ Tcl_AttemptSetObjLength(
*---------------------------------------------------------------------------
*/
+#if !defined(TCL_NO_DEPRECATED)
void
Tcl_SetUnicodeObj(
Tcl_Obj *objPtr, /* The object to set the string of. */
- const Tcl_UniChar *unicode, /* The unicode string used to initialize the
+ const unsigned short *unicode, /* The unicode string used to initialize the
* object. */
int numChars) /* Number of characters in the unicode
* string. */
{
- if (Tcl_IsShared(objPtr)) {
- Tcl_Panic("%s called with shared object", "Tcl_SetUnicodeObj");
+ String *stringPtr;
+
+ if (numChars < 0) {
+ numChars = 0;
+
+ if (unicode) {
+ while (numChars >= 0 && unicode[numChars] != 0) {
+ numChars++;
+ }
+ }
+ stringCheckLimits(numChars);
}
- TclFreeInternalRep(objPtr);
- SetUnicodeObj(objPtr, unicode, numChars);
+
+ /*
+ * Allocate enough space for the String structure + Unicode string.
+ */
+
+ stringCheckLimits(numChars);
+ stringPtr = stringAlloc(numChars);
+ SET_STRING(objPtr, stringPtr);
+ objPtr->typePtr = &tclStringType;
+
+ stringPtr->maxChars = numChars;
+ memcpy(stringPtr->unicode, unicode, numChars * sizeof(unsigned char));
+ stringPtr->unicode[numChars] = 0;
+ stringPtr->numChars = numChars;
+ stringPtr->hasUnicode = 1;
+
+ TclInvalidateStringRep(objPtr);
+ stringPtr->allocated = numChars;
}
+#endif
static int
UnicodeLength(
@@ -1115,7 +1492,7 @@ UnicodeLength(
numChars++;
}
}
- stringCheckLimits(numChars);
+ uniCharStringCheckLimits(numChars);
return numChars;
}
@@ -1127,7 +1504,7 @@ SetUnicodeObj(
int numChars) /* Number of characters in the unicode
* string. */
{
- String *stringPtr;
+ UniCharString *stringPtr;
if (numChars < 0) {
numChars = UnicodeLength(unicode);
@@ -1137,10 +1514,10 @@ SetUnicodeObj(
* Allocate enough space for the String structure + Unicode string.
*/
- stringCheckLimits(numChars);
- stringPtr = stringAlloc(numChars);
- SET_STRING(objPtr, stringPtr);
- objPtr->typePtr = &tclStringType;
+ uniCharStringCheckLimits(numChars);
+ stringPtr = uniCharStringAlloc(numChars);
+ SET_UNICHAR_STRING(objPtr, stringPtr);
+ objPtr->typePtr = &tclUniCharStringType;
stringPtr->maxChars = numChars;
memcpy(stringPtr->unicode, unicode, numChars * sizeof(Tcl_UniChar));
@@ -1184,7 +1561,7 @@ Tcl_AppendLimitedToObj(
* object to indicate not all available bytes
* at "bytes" were appended. */
{
- String *stringPtr;
+ UniCharString *stringPtr;
int toCopy = 0;
int eLen = 0;
@@ -1223,13 +1600,13 @@ Tcl_AppendLimitedToObj(
}
SetStringFromAny(NULL, objPtr);
- stringPtr = GET_STRING(objPtr);
+ stringPtr = GET_UNICHAR_STRING(objPtr);
/* If appended string starts with a continuation byte or a lower surrogate,
* force objPtr to unicode representation. See [7f1162a867] */
if (bytes && ISCONTINUATION(bytes)) {
- Tcl_GetUnicode(objPtr);
- stringPtr = GET_STRING(objPtr);
+ TclGetUnicodeFromObj_(objPtr, NULL);
+ stringPtr = GET_UNICHAR_STRING(objPtr);
}
if (stringPtr->hasUnicode && stringPtr->numChars > 0) {
AppendUtfToUnicodeRep(objPtr, bytes, toCopy);
@@ -1241,7 +1618,7 @@ Tcl_AppendLimitedToObj(
return;
}
- stringPtr = GET_STRING(objPtr);
+ stringPtr = GET_UNICHAR_STRING(objPtr);
if (stringPtr->hasUnicode && stringPtr->numChars > 0) {
AppendUtfToUnicodeRep(objPtr, ellipsis, eLen);
} else {
@@ -1296,13 +1673,13 @@ Tcl_AppendToObj(
*/
void
-Tcl_AppendUnicodeToObj(
+TclAppendUnicodeToObj(
Tcl_Obj *objPtr, /* Points to the object to append to. */
const Tcl_UniChar *unicode, /* The unicode string to append to the
* object. */
int length) /* Number of chars in "unicode". */
{
- String *stringPtr;
+ UniCharString *stringPtr;
if (Tcl_IsShared(objPtr)) {
Tcl_Panic("%s called with shared object", "Tcl_AppendUnicodeToObj");
@@ -1313,7 +1690,7 @@ Tcl_AppendUnicodeToObj(
}
SetStringFromAny(NULL, objPtr);
- stringPtr = GET_STRING(objPtr);
+ stringPtr = GET_UNICHAR_STRING(objPtr);
/*
* If objPtr has a valid Unicode rep, then append the "unicode" to the
@@ -1328,6 +1705,34 @@ Tcl_AppendUnicodeToObj(
}
}
+#if TCL_UTF_MAX > 3 && !defined(TCL_NO_DEPRECATED)
+void
+Tcl_AppendUnicodeToObj(
+ Tcl_Obj *objPtr, /* Points to the object to append to. */
+ const unsigned short *unicode, /* The unicode string to append to the
+ * object. */
+ int length) /* Number of chars in "unicode". */
+{
+ String *stringPtr;
+
+ if (Tcl_IsShared(objPtr)) {
+ Tcl_Panic("%s called with shared object", "Tcl_AppendUnicodeToObj");
+ }
+
+ if (length == 0) {
+ return;
+ }
+
+ SetStringFromAny(NULL, objPtr);
+ stringPtr = GET_STRING(objPtr);
+ stringPtr = stringAttemptRealloc(stringPtr, stringPtr->numChars + length);
+ memcpy(&stringPtr->unicode[stringPtr->numChars], unicode, length);
+ stringPtr->maxChars = stringPtr->allocated = stringPtr->numChars += length;
+ stringPtr->unicode[stringPtr->numChars] = 0;
+ SET_STRING(objPtr, stringPtr);
+}
+#endif
+
/*
*----------------------------------------------------------------------
*
@@ -1353,7 +1758,7 @@ Tcl_AppendObjToObj(
Tcl_Obj *objPtr, /* Points to the object to append to. */
Tcl_Obj *appendObjPtr) /* Object to append. */
{
- String *stringPtr;
+ UniCharString *stringPtr;
int length, numChars, appendNumChars = -1;
const char *bytes;
@@ -1426,14 +1831,14 @@ Tcl_AppendObjToObj(
*/
SetStringFromAny(NULL, objPtr);
- stringPtr = GET_STRING(objPtr);
+ stringPtr = GET_UNICHAR_STRING(objPtr);
/* If appended string starts with a continuation byte or a lower surrogate,
* force objPtr to unicode representation. See [7f1162a867]
* This fixes append-3.4, append-3.7 and utf-1.18 testcases. */
if (ISCONTINUATION(TclGetString(appendObjPtr))) {
- Tcl_GetUnicode(objPtr);
- stringPtr = GET_STRING(objPtr);
+ TclGetUnicodeFromObj_(objPtr, NULL);
+ stringPtr = GET_UNICHAR_STRING(objPtr);
}
/*
* If objPtr has a valid Unicode rep, then get a Unicode string from
@@ -1445,9 +1850,9 @@ Tcl_AppendObjToObj(
* If appendObjPtr is not of the "String" type, don't convert it.
*/
- if (TclHasInternalRep(appendObjPtr, &tclStringType)) {
+ if (TclHasInternalRep(appendObjPtr, &tclUniCharStringType)) {
Tcl_UniChar *unicode =
- Tcl_GetUnicodeFromObj(appendObjPtr, &numChars);
+ TclGetUnicodeFromObj_(appendObjPtr, &numChars);
AppendUnicodeToUnicodeRep(objPtr, unicode, numChars);
} else {
@@ -1466,8 +1871,8 @@ Tcl_AppendObjToObj(
bytes = TclGetStringFromObj(appendObjPtr, &length);
numChars = stringPtr->numChars;
- if ((numChars >= 0) && TclHasInternalRep(appendObjPtr, &tclStringType)) {
- String *appendStringPtr = GET_STRING(appendObjPtr);
+ if ((numChars >= 0) && TclHasInternalRep(appendObjPtr, &tclUniCharStringType)) {
+ UniCharString *appendStringPtr = GET_UNICHAR_STRING(appendObjPtr);
appendNumChars = appendStringPtr->numChars;
}
@@ -1502,7 +1907,7 @@ AppendUnicodeToUnicodeRep(
const Tcl_UniChar *unicode, /* String to append. */
int appendNumChars) /* Number of chars of "unicode" to append. */
{
- String *stringPtr;
+ UniCharString *stringPtr;
int numChars;
if (appendNumChars < 0) {
@@ -1513,7 +1918,7 @@ AppendUnicodeToUnicodeRep(
}
SetStringFromAny(NULL, objPtr);
- stringPtr = GET_STRING(objPtr);
+ stringPtr = GET_UNICHAR_STRING(objPtr);
/*
* If not enough space has been allocated for the unicode rep, reallocate
@@ -1524,7 +1929,7 @@ AppendUnicodeToUnicodeRep(
*/
numChars = stringPtr->numChars + appendNumChars;
- stringCheckLimits(numChars);
+ uniCharStringCheckLimits(numChars);
if (numChars > stringPtr->maxChars) {
int offset = -1;
@@ -1541,7 +1946,7 @@ AppendUnicodeToUnicodeRep(
}
GrowUnicodeBuffer(objPtr, numChars);
- stringPtr = GET_STRING(objPtr);
+ stringPtr = GET_UNICHAR_STRING(objPtr);
/*
* Relocate unicode if needed; see above.
@@ -1591,7 +1996,7 @@ AppendUnicodeToUtfRep(
const Tcl_UniChar *unicode, /* String to convert to UTF. */
int numChars) /* Number of chars of "unicode" to convert. */
{
- String *stringPtr = GET_STRING(objPtr);
+ UniCharString *stringPtr = GET_UNICHAR_STRING(objPtr);
numChars = ExtendStringRepWithUnicode(objPtr, unicode, numChars);
@@ -1624,7 +2029,7 @@ AppendUtfToUnicodeRep(
const char *bytes, /* String to convert to Unicode. */
int numBytes) /* Number of bytes of "bytes" to convert. */
{
- String *stringPtr;
+ UniCharString *stringPtr;
if (numBytes == 0) {
return;
@@ -1632,7 +2037,7 @@ AppendUtfToUnicodeRep(
ExtendUnicodeRepWithString(objPtr, bytes, numBytes, -1);
TclInvalidateStringRep(objPtr);
- stringPtr = GET_STRING(objPtr);
+ stringPtr = GET_UNICHAR_STRING(objPtr);
stringPtr->allocated = 0;
}
@@ -1660,7 +2065,7 @@ AppendUtfToUtfRep(
const char *bytes, /* String to append. */
int numBytes) /* Number of bytes of "bytes" to append. */
{
- String *stringPtr;
+ UniCharString *stringPtr;
int newLength, oldLength;
if (numBytes == 0) {
@@ -1681,7 +2086,7 @@ AppendUtfToUtfRep(
}
newLength = numBytes + oldLength;
- stringPtr = GET_STRING(objPtr);
+ stringPtr = GET_UNICHAR_STRING(objPtr);
if (newLength > stringPtr->allocated) {
int offset = -1;
@@ -2085,12 +2490,12 @@ Tcl_AppendFormatToObj(
goto errorMsg;
case 's':
if (gotPrecision) {
- numChars = Tcl_GetCharLength(segment);
+ numChars = TclGetCharLength(segment);
if (precision < numChars) {
if (precision < 1) {
TclNewObj(segment);
} else {
- segment = Tcl_GetRange(segment, 0, precision - 1);
+ segment = TclGetRange(segment, 0, precision - 1);
}
numChars = precision;
Tcl_IncrRefCount(segment);
@@ -2270,7 +2675,7 @@ Tcl_AppendFormatToObj(
gotZero = 0;
}
if (gotZero) {
- length += Tcl_GetCharLength(segment);
+ length += TclGetCharLength(segment);
if (length < width) {
segmentLimit -= width - length;
}
@@ -2401,7 +2806,7 @@ Tcl_AppendFormatToObj(
gotZero = 0;
}
if (gotZero) {
- length += Tcl_GetCharLength(segment);
+ length += TclGetCharLength(segment);
if (length < width) {
segmentLimit -= width - length;
}
@@ -2512,7 +2917,7 @@ Tcl_AppendFormatToObj(
}
if (width>0 && numChars<0) {
- numChars = Tcl_GetCharLength(segment);
+ numChars = TclGetCharLength(segment);
}
if (!gotMinus && width>0) {
if (numChars < width) {
@@ -2873,13 +3278,13 @@ TclGetStringStorage(
Tcl_Obj *objPtr,
unsigned int *sizePtr)
{
- String *stringPtr;
+ UniCharString *stringPtr;
- if (!TclHasInternalRep(objPtr, &tclStringType) || objPtr->bytes == NULL) {
+ if (!TclHasInternalRep(objPtr, &tclUniCharStringType) || objPtr->bytes == NULL) {
return TclGetStringFromObj(objPtr, (int *)sizePtr);
}
- stringPtr = GET_STRING(objPtr);
+ stringPtr = GET_UNICHAR_STRING(objPtr);
*sizePtr = stringPtr->allocated;
return objPtr->bytes;
}
@@ -2923,8 +3328,8 @@ TclStringRepeat(
*/
if (!binary) {
- if (TclHasInternalRep(objPtr, &tclStringType)) {
- String *stringPtr = GET_STRING(objPtr);
+ if (TclHasInternalRep(objPtr, &tclUniCharStringType)) {
+ UniCharString *stringPtr = GET_UNICHAR_STRING(objPtr);
if (stringPtr->hasUnicode) {
unichar = 1;
}
@@ -2936,7 +3341,7 @@ TclStringRepeat(
Tcl_GetByteArrayFromObj(objPtr, &length);
} else if (unichar) {
/* Result will be pure Tcl_UniChar array. Pre-size it. */
- Tcl_GetUnicodeFromObj(objPtr, &length);
+ TclGetUnicodeFromObj_(objPtr, &length);
} else {
/* Result will be concat of string reps. Pre-size it. */
Tcl_GetStringFromObj(objPtr, &length);
@@ -2976,7 +3381,7 @@ TclStringRepeat(
*/
if (!inPlace || Tcl_IsShared(objPtr)) {
- objResultPtr = Tcl_NewUnicodeObj(Tcl_GetUnicode(objPtr), length);
+ objResultPtr = TclNewUnicodeObj(TclGetUnicodeFromObj_(objPtr, NULL), length);
} else {
TclInvalidateStringRep(objPtr);
objResultPtr = objPtr;
@@ -2987,7 +3392,7 @@ TclStringRepeat(
Tcl_SetObjResult(interp, Tcl_ObjPrintf(
"string size overflow: unable to alloc %"
TCL_Z_MODIFIER "u bytes",
- STRING_SIZE(count*length)));
+ UNICHAR_STRING_SIZE(count*length)));
Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL);
}
return NULL;
@@ -2997,7 +3402,7 @@ TclStringRepeat(
Tcl_AppendObjToObj(objResultPtr, objResultPtr);
done *= 2;
}
- Tcl_AppendUnicodeToObj(objResultPtr, Tcl_GetUnicode(objResultPtr),
+ TclAppendUnicodeToObj(objResultPtr, TclGetUnicodeFromObj_(objResultPtr, NULL),
(count - done) * length);
} else {
/*
@@ -3094,7 +3499,7 @@ TclStringCat(
binary = 0;
if (ov > objv+1 && ISCONTINUATION(TclGetString(objPtr))) {
forceUniChar = 1;
- } else if ((objPtr->typePtr) && (objPtr->typePtr != &tclStringType)) {
+ } else if ((objPtr->typePtr) && (objPtr->typePtr != &tclUniCharStringType)) {
/* Prevent shimmer of non-string types. */
allowUniChar = 0;
}
@@ -3102,7 +3507,7 @@ TclStringCat(
} else {
/* assert (objPtr->typePtr != NULL) -- stork! */
binary = 0;
- if (TclHasInternalRep(objPtr, &tclStringType)) {
+ if (TclHasInternalRep(objPtr, &tclUniCharStringType)) {
/* Have a pure Unicode value; ask to preserve it */
requestUniChar = 1;
} else {
@@ -3156,7 +3561,7 @@ TclStringCat(
if ((objPtr->bytes == NULL) || (objPtr->length)) {
int numChars;
- Tcl_GetUnicodeFromObj(objPtr, &numChars); /* PANIC? */
+ TclGetUnicodeFromObj_(objPtr, &numChars); /* PANIC? */
if (numChars) {
last = objc - oc;
if (length == 0) {
@@ -3306,43 +3711,43 @@ TclStringCat(
objResultPtr = *objv++; objc--;
/* Ugly interface! Force resize of the unicode array. */
- Tcl_GetUnicodeFromObj(objResultPtr, &start);
+ TclGetUnicodeFromObj_(objResultPtr, &start);
Tcl_InvalidateStringRep(objResultPtr);
if (0 == Tcl_AttemptSetObjLength(objResultPtr, length)) {
if (interp) {
Tcl_SetObjResult(interp, Tcl_ObjPrintf(
"concatenation failed: unable to alloc %"
TCL_Z_MODIFIER "u bytes",
- STRING_SIZE(length)));
+ UNICHAR_STRING_SIZE(length)));
Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL);
}
return NULL;
}
- dst = Tcl_GetUnicode(objResultPtr) + start;
+ dst = TclGetUnicodeFromObj_(objResultPtr, NULL) + start;
} else {
Tcl_UniChar ch = 0;
/* Ugly interface! No scheme to init array size. */
- objResultPtr = Tcl_NewUnicodeObj(&ch, 0); /* PANIC? */
+ objResultPtr = TclNewUnicodeObj(&ch, 0); /* PANIC? */
if (0 == Tcl_AttemptSetObjLength(objResultPtr, length)) {
Tcl_DecrRefCount(objResultPtr);
if (interp) {
Tcl_SetObjResult(interp, Tcl_ObjPrintf(
"concatenation failed: unable to alloc %"
TCL_Z_MODIFIER "u bytes",
- STRING_SIZE(length)));
+ UNICHAR_STRING_SIZE(length)));
Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL);
}
return NULL;
}
- dst = Tcl_GetUnicode(objResultPtr);
+ dst = TclGetUnicodeFromObj_(objResultPtr, NULL);
}
while (objc--) {
Tcl_Obj *objPtr = *objv++;
if ((objPtr->bytes == NULL) || (objPtr->length)) {
int more;
- Tcl_UniChar *src = Tcl_GetUnicodeFromObj(objPtr, &more);
+ Tcl_UniChar *src = TclGetUnicodeFromObj_(objPtr, &more);
memcpy(dst, src, more * sizeof(Tcl_UniChar));
dst += more;
}
@@ -3455,8 +3860,8 @@ TclStringCmp(
s1 = (char *) Tcl_GetByteArrayFromObj(value1Ptr, &s1len);
s2 = (char *) Tcl_GetByteArrayFromObj(value2Ptr, &s2len);
memCmpFn = memcmp;
- } else if (TclHasInternalRep(value1Ptr, &tclStringType)
- && TclHasInternalRep(value2Ptr, &tclStringType)) {
+ } else if (TclHasInternalRep(value1Ptr, &tclUniCharStringType)
+ && TclHasInternalRep(value2Ptr, &tclUniCharStringType)) {
/*
* Do a unicode-specific comparison if both of the args are of
* String type. If the char length == byte length, we can do a
@@ -3465,12 +3870,12 @@ TclStringCmp(
*/
if (nocase) {
- s1 = (char *) Tcl_GetUnicodeFromObj(value1Ptr, &s1len);
- s2 = (char *) Tcl_GetUnicodeFromObj(value2Ptr, &s2len);
- memCmpFn = (memCmpFn_t)(void *)Tcl_UniCharNcasecmp;
+ s1 = (char *) TclGetUnicodeFromObj_(value1Ptr, &s1len);
+ s2 = (char *) TclGetUnicodeFromObj_(value2Ptr, &s2len);
+ memCmpFn = (memCmpFn_t)(void *)TclUniCharNcasecmp;
} else {
- s1len = Tcl_GetCharLength(value1Ptr);
- s2len = Tcl_GetCharLength(value2Ptr);
+ s1len = TclGetCharLength(value1Ptr);
+ s2len = TclGetCharLength(value2Ptr);
if ((s1len == value1Ptr->length)
&& (value1Ptr->bytes != NULL)
&& (s2len == value2Ptr->length)
@@ -3479,8 +3884,8 @@ TclStringCmp(
s2 = value2Ptr->bytes;
memCmpFn = memcmp;
} else {
- s1 = (char *) Tcl_GetUnicode(value1Ptr);
- s2 = (char *) Tcl_GetUnicode(value2Ptr);
+ s1 = (char *) TclGetUnicodeFromObj_(value1Ptr, NULL);
+ s2 = (char *) TclGetUnicodeFromObj_(value2Ptr, NULL);
if (
#if defined(WORDS_BIGENDIAN) && (TCL_UTF_MAX > 3)
1
@@ -3492,7 +3897,7 @@ TclStringCmp(
s1len *= sizeof(Tcl_UniChar);
s2len *= sizeof(Tcl_UniChar);
} else {
- memCmpFn = (memCmpFn_t)(void *)Tcl_UniCharNcmp;
+ memCmpFn = (memCmpFn_t)(void *)TclUniCharNcmp;
}
}
}
@@ -3615,7 +4020,7 @@ TclStringFirst(
Tcl_Obj *haystack,
int start)
{
- int lh, ln = Tcl_GetCharLength(needle);
+ int lh, ln = TclGetCharLength(needle);
Tcl_Obj *result;
int value = -1;
Tcl_UniChar *checkStr, *endStr, *uh, *un;
@@ -3678,8 +4083,8 @@ TclStringFirst(
* do only the well-defined Tcl_UniChar array search.
*/
- un = Tcl_GetUnicodeFromObj(needle, &ln);
- uh = Tcl_GetUnicodeFromObj(haystack, &lh);
+ un = TclGetUnicodeFromObj_(needle, &ln);
+ uh = TclGetUnicodeFromObj_(haystack, &lh);
if ((lh < ln) || (start > lh - ln)) {
/* Don't start the loop if there cannot be a valid answer */
goto firstEnd;
@@ -3722,7 +4127,7 @@ TclStringLast(
Tcl_Obj *haystack,
int last)
{
- int lh, ln = Tcl_GetCharLength(needle);
+ int lh, ln = TclGetCharLength(needle);
Tcl_Obj *result;
int value = -1;
Tcl_UniChar *checkStr, *uh, *un;
@@ -3761,8 +4166,8 @@ TclStringLast(
goto lastEnd;
}
- uh = Tcl_GetUnicodeFromObj(haystack, &lh);
- un = Tcl_GetUnicodeFromObj(needle, &ln);
+ uh = TclGetUnicodeFromObj_(haystack, &lh);
+ un = TclGetUnicodeFromObj_(needle, &ln);
if (last >= lh) {
last = lh - 1;
@@ -3832,7 +4237,7 @@ TclStringReverse(
Tcl_Obj *objPtr,
int flags)
{
- String *stringPtr;
+ UniCharString *stringPtr;
Tcl_UniChar ch = 0;
int inPlace = flags & TCL_STRING_IN_PLACE;
#if TCL_UTF_MAX < 4
@@ -3851,11 +4256,11 @@ TclStringReverse(
}
SetStringFromAny(NULL, objPtr);
- stringPtr = GET_STRING(objPtr);
+ stringPtr = GET_UNICHAR_STRING(objPtr);
if (stringPtr->hasUnicode) {
- Tcl_UniChar *from = Tcl_GetUnicode(objPtr);
- stringPtr = GET_STRING(objPtr);
+ Tcl_UniChar *from = TclGetUnicodeFromObj_(objPtr, NULL);
+ stringPtr = GET_UNICHAR_STRING(objPtr);
Tcl_UniChar *src = from + stringPtr->numChars;
Tcl_UniChar *to;
@@ -3865,10 +4270,10 @@ TclStringReverse(
* Tcl_SetObjLength into growing the unicode rep buffer.
*/
- objPtr = Tcl_NewUnicodeObj(&ch, 1);
+ objPtr = TclNewUnicodeObj(&ch, 1);
Tcl_SetObjLength(objPtr, stringPtr->numChars);
- to = Tcl_GetUnicode(objPtr);
- stringPtr = GET_STRING(objPtr);
+ to = TclGetUnicodeFromObj_(objPtr, NULL);
+ stringPtr = GET_UNICHAR_STRING(objPtr);
while (--src >= from) {
#if TCL_UTF_MAX < 4
ch = *src;
@@ -4097,16 +4502,16 @@ TclStringReplace(
/* The traditional implementation... */
{
int numChars;
- Tcl_UniChar *ustring = Tcl_GetUnicodeFromObj(objPtr, &numChars);
+ Tcl_UniChar *ustring = TclGetUnicodeFromObj_(objPtr, &numChars);
/* TODO: Is there an in-place option worth pursuing here? */
- result = Tcl_NewUnicodeObj(ustring, first);
+ result = TclNewUnicodeObj(ustring, first);
if (insertPtr) {
Tcl_AppendObjToObj(result, insertPtr);
}
if (first + count < numChars) {
- Tcl_AppendUnicodeToObj(result, ustring + first + count,
+ TclAppendUnicodeToObj(result, ustring + first + count,
numChars - first - count);
}
@@ -4136,7 +4541,7 @@ FillUnicodeRep(
Tcl_Obj *objPtr) /* The object in which to fill the unicode
* rep. */
{
- String *stringPtr = GET_STRING(objPtr);
+ UniCharString *stringPtr = GET_UNICHAR_STRING(objPtr);
ExtendUnicodeRepWithString(objPtr, objPtr->bytes, objPtr->length,
stringPtr->numChars);
@@ -4149,7 +4554,7 @@ ExtendUnicodeRepWithString(
int numBytes,
int numAppendChars)
{
- String *stringPtr = GET_STRING(objPtr);
+ UniCharString *stringPtr = GET_UNICHAR_STRING(objPtr);
int needed, numOrigChars = 0;
Tcl_UniChar *dst, unichar = 0;
@@ -4157,14 +4562,14 @@ ExtendUnicodeRepWithString(
numOrigChars = stringPtr->numChars;
}
if (numAppendChars == -1) {
- TclNumUtfChars(numAppendChars, bytes, numBytes);
+ TclNumUtfCharsM(numAppendChars, bytes, numBytes);
}
needed = numOrigChars + numAppendChars;
- stringCheckLimits(needed);
+ uniCharStringCheckLimits(needed);
if (needed > stringPtr->maxChars) {
GrowUnicodeBuffer(objPtr, needed);
- stringPtr = GET_STRING(objPtr);
+ stringPtr = GET_UNICHAR_STRING(objPtr);
}
stringPtr->hasUnicode = 1;
@@ -4218,8 +4623,8 @@ DupStringInternalRep(
Tcl_Obj *copyPtr) /* Object with internal rep to set. Must not
* currently have an internal rep.*/
{
- String *srcStringPtr = GET_STRING(srcPtr);
- String *copyStringPtr = NULL;
+ UniCharString *srcStringPtr = GET_UNICHAR_STRING(srcPtr);
+ UniCharString *copyStringPtr = NULL;
if (srcStringPtr->numChars == -1) {
/*
@@ -4239,17 +4644,17 @@ DupStringInternalRep(
} else {
copyMaxChars = srcStringPtr->maxChars;
}
- copyStringPtr = stringAttemptAlloc(copyMaxChars);
+ copyStringPtr = uniCharStringAttemptAlloc(copyMaxChars);
if (copyStringPtr == NULL) {
copyMaxChars = srcStringPtr->numChars;
- copyStringPtr = stringAlloc(copyMaxChars);
+ copyStringPtr = uniCharStringAlloc(copyMaxChars);
}
copyStringPtr->maxChars = copyMaxChars;
memcpy(copyStringPtr->unicode, srcStringPtr->unicode,
srcStringPtr->numChars * sizeof(Tcl_UniChar));
copyStringPtr->unicode[srcStringPtr->numChars] = 0;
} else {
- copyStringPtr = stringAlloc(0);
+ copyStringPtr = uniCharStringAlloc(0);
copyStringPtr->maxChars = 0;
copyStringPtr->unicode[0] = 0;
}
@@ -4264,8 +4669,8 @@ DupStringInternalRep(
copyStringPtr->allocated = copyPtr->bytes ? copyPtr->length : 0;
- SET_STRING(copyPtr, copyStringPtr);
- copyPtr->typePtr = &tclStringType;
+ SET_UNICHAR_STRING(copyPtr, copyStringPtr);
+ copyPtr->typePtr = &tclUniCharStringType;
}
/*
@@ -4290,8 +4695,8 @@ SetStringFromAny(
TCL_UNUSED(Tcl_Interp *),
Tcl_Obj *objPtr) /* The object to convert. */
{
- if (!TclHasInternalRep(objPtr, &tclStringType)) {
- String *stringPtr = stringAlloc(0);
+ if (!TclHasInternalRep(objPtr, &tclUniCharStringType)) {
+ UniCharString *stringPtr = uniCharStringAlloc(0);
/*
* Convert whatever we have into an untyped value. Just A String.
@@ -4309,8 +4714,8 @@ SetStringFromAny(
stringPtr->allocated = objPtr->length;
stringPtr->maxChars = 0;
stringPtr->hasUnicode = 0;
- SET_STRING(objPtr, stringPtr);
- objPtr->typePtr = &tclStringType;
+ SET_UNICHAR_STRING(objPtr, stringPtr);
+ objPtr->typePtr = &tclUniCharStringType;
}
return TCL_OK;
}
@@ -4337,7 +4742,7 @@ static void
UpdateStringOfString(
Tcl_Obj *objPtr) /* Object with string rep to update. */
{
- String *stringPtr = GET_STRING(objPtr);
+ UniCharString *stringPtr = GET_UNICHAR_STRING(objPtr);
/*
* This routine is only called when we need to generate the
@@ -4369,7 +4774,7 @@ ExtendStringRepWithUnicode(
int i, origLength, size = 0;
char *dst;
- String *stringPtr = GET_STRING(objPtr);
+ UniCharString *stringPtr = GET_UNICHAR_STRING(objPtr);
if (numChars < 0) {
numChars = UnicodeLength(unicode);
@@ -4423,7 +4828,7 @@ ExtendStringRepWithUnicode(
*
* FreeStringInternalRep --
*
- * Deallocate the storage associated with a String data object's internal
+ * Deallocate the storage associated with a (UniChar)String data object's internal
* representation.
*
* Results:
diff --git a/generic/tclStringRep.h b/generic/tclStringRep.h
index 27c3fc2..faa2c2c 100644
--- a/generic/tclStringRep.h
+++ b/generic/tclStringRep.h
@@ -39,11 +39,6 @@
* Unicode reps of the String object with fewer mallocs. To optimize string
* length and indexing operations, this structure also stores the number of
* characters (same of UTF and Unicode!) once that value has been computed.
- *
- * Under normal configurations, what Tcl calls "Unicode" is actually UTF-16
- * restricted to the Basic Multilingual Plane (i.e. U+00000 to U+0FFFF). This
- * can be officially modified by altering the definition of Tcl_UniChar in
- * tcl.h, but do not do that unless you are sure what you're doing!
*/
typedef struct {
@@ -59,15 +54,15 @@ typedef struct {
* space allocated for the unicode array. */
int hasUnicode; /* Boolean determining whether the string has
* a Unicode representation. */
- Tcl_UniChar unicode[TCLFLEXARRAY]; /* The array of Unicode chars. The actual size
+ unsigned short unicode[TCLFLEXARRAY]; /* The array of Unicode chars. The actual size
* of this field depends on the 'maxChars'
* field above. */
} String;
#define STRING_MAXCHARS \
- (int)(((size_t)UINT_MAX - offsetof(String, unicode))/sizeof(Tcl_UniChar) - 1)
+ (int)(((size_t)UINT_MAX - offsetof(String, unicode))/sizeof(unsigned short) - 1)
#define STRING_SIZE(numChars) \
- (offsetof(String, unicode) + sizeof(Tcl_UniChar) + ((numChars) * sizeof(Tcl_UniChar)))
+ (offsetof(String, unicode) + sizeof(unsigned short) + ((numChars) * sizeof(unsigned short)))
#define stringCheckLimits(numChars) \
do { \
if ((numChars) < 0 || (numChars) > STRING_MAXCHARS) { \
diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c
index 257c3ce..7d04481 100644
--- a/generic/tclStubInit.c
+++ b/generic/tclStubInit.c
@@ -48,6 +48,8 @@
#undef Tcl_UniCharCaseMatch
#undef Tcl_UniCharLen
#undef Tcl_UniCharNcmp
+#undef Tcl_GetRange
+#undef Tcl_GetUniChar
#undef Tcl_DumpActiveMemory
#undef Tcl_ValidateAllMemory
#undef Tcl_FindHashEntry
@@ -76,24 +78,31 @@
#undef Tcl_MacOSXOpenBundleResources
#undef TclWinConvertWSAError
#undef TclWinConvertError
+#undef Tcl_GetCharLength
+#undef Tcl_UtfAtIndex
+
#if defined(_WIN32) || defined(__CYGWIN__)
#define TclWinConvertWSAError (void (*)(DWORD))(void *)Tcl_WinConvertError
#define TclWinConvertError (void (*)(DWORD))(void *)Tcl_WinConvertError
#endif
-#if TCL_UTF_MAX > 3
+#if TCL_UTF_MAX > 3 && defined(TCL_NO_DEPRECATED)
static void uniCodePanic(void) {
- Tcl_Panic("This extension uses a deprecated function, not available now: Tcl is compiled with -DTCL_UTF_MAX==%d", TCL_UTF_MAX);
+ Tcl_Panic("Tcl is compiled without the the UTF16 compatibility layer (-DTCL_NO_DEPRECATED)");
}
-# define Tcl_GetUnicode (Tcl_UniChar *(*)(Tcl_Obj *))(void *)uniCodePanic
-# define Tcl_GetUnicodeFromObj (Tcl_UniChar *(*)(Tcl_Obj *, int *))(void *)uniCodePanic
-# define Tcl_NewUnicodeObj (Tcl_Obj *(*)(const int *, int))(void *)uniCodePanic
-# define Tcl_SetUnicodeObj (void(*)(Tcl_Obj *, const Tcl_UniChar *, int))(void *)uniCodePanic
-# define Tcl_AppendUnicodeToObj (void(*)(Tcl_Obj *, const Tcl_UniChar *, int))(void *)uniCodePanic
-# define Tcl_UniCharNcasecmp (int(*)(const Tcl_UniChar *, const Tcl_UniChar *, unsigned long))(void *)uniCodePanic
-# define Tcl_UniCharCaseMatch (int(*)(const Tcl_UniChar *, const Tcl_UniChar *, int))(void *)uniCodePanic
-# define Tcl_UniCharNcmp (int(*)(const Tcl_UniChar *, const Tcl_UniChar *, unsigned long))(void *)uniCodePanic
+# define Tcl_GetUnicode (unsigned short *(*)(Tcl_Obj *))(void *)uniCodePanic
+# define Tcl_GetUnicodeFromObj (unsigned short *(*)(Tcl_Obj *, int *))(void *)uniCodePanic
+# define Tcl_NewUnicodeObj (Tcl_Obj *(*)(const unsigned short *, int))(void *)uniCodePanic
+# define Tcl_SetUnicodeObj (void(*)(Tcl_Obj *, const unsigned short *, int))(void *)uniCodePanic
+# define Tcl_AppendUnicodeToObj (void(*)(Tcl_Obj *, const unsigned short *, int))(void *)uniCodePanic
+# define Tcl_UtfAtIndex (const char *(*)(const char *, int))(void *)uniCodePanic
+# define Tcl_GetCharLength (int(*)(Tcl_Obj *))(void *)uniCodePanic
+# define Tcl_UniCharNcmp (int(*)(const unsigned short *, const unsigned short *, unsigned long))(void *)uniCodePanic
+# define Tcl_UniCharNcasecmp (int(*)(const unsigned short *, const unsigned short *, unsigned long))(void *)uniCodePanic
+# define Tcl_UniCharCaseMatch (int(*)(const unsigned short *, const unsigned short *, int))(void *)uniCodePanic
+# define Tcl_GetRange (Tcl_Obj *(*)(Tcl_Obj *, int, int))(void *)uniCodePanic
+# define Tcl_GetUniChar (int(*)(Tcl_Obj *, int))(void *)uniCodePanic
#endif
#define TclUtfCharComplete UtfCharComplete
@@ -683,8 +692,8 @@ static int utfNcasecmp(const char *s1, const char *s2, unsigned int n){
# define Tcl_SetExitProc 0
# define Tcl_SetPanicProc 0
# define Tcl_FindExecutable 0
-# define Tcl_GetUnicode 0
#if TCL_UTF_MAX < 4
+# define Tcl_GetUnicode 0
# define Tcl_AppendUnicodeToObj 0
# define Tcl_UniCharCaseMatch 0
# define Tcl_UniCharNcasecmp 0
@@ -1950,6 +1959,11 @@ const TclStubs tclStubs = {
0, /* 666 */
0, /* 667 */
Tcl_UniCharLen, /* 668 */
+ TclNumUtfChars, /* 669 */
+ TclGetCharLength, /* 670 */
+ TclUtfAtIndex, /* 671 */
+ TclGetRange, /* 672 */
+ TclGetUniChar, /* 673 */
};
/* !END!: Do not edit above this line. */
diff --git a/generic/tclTest.c b/generic/tclTest.c
index 0ce5e83..cbd1f70 100644
--- a/generic/tclTest.c
+++ b/generic/tclTest.c
@@ -16,10 +16,13 @@
*/
#undef STATIC_BUILD
+#undef BUILD_tcl
#ifndef USE_TCL_STUBS
# define USE_TCL_STUBS
#endif
-#ifndef TCL_NO_DEPRECATED
+#ifdef TCL_NO_DEPRECATED
+# define TCL_UTF_MAX 4
+#else
# define TCL_NO_DEPRECATED
#endif
#include "tclInt.h"
@@ -6969,7 +6972,7 @@ TestUtfNextCmd(
int objc,
Tcl_Obj *const objv[])
{
- size_t numBytes;
+ int numBytes;
char *bytes;
const char *result, *first;
char buffer[32];
diff --git a/generic/tclTestObj.c b/generic/tclTestObj.c
index a235002..223eb98 100644
--- a/generic/tclTestObj.c
+++ b/generic/tclTestObj.c
@@ -13,7 +13,7 @@
* See the file "license.terms" for information on usage and redistribution of
* this file, and for a DISCLAIMER OF ALL WARRANTIES.
*/
-
+#undef BUILD_tcl
#ifndef USE_TCL_STUBS
# define USE_TCL_STUBS
#endif
@@ -1073,8 +1073,9 @@ TestobjCmd(
Tcl_SetObjResult(interp, Tcl_NewStringObj("none", -1));
} else {
typeName = objv[2]->typePtr->name;
+ if (!strcmp(typeName, "utf32string")) typeName = "string";
#ifndef TCL_WIDE_INT_IS_LONG
- if (!strcmp(typeName, "wideInt")) typeName = "int";
+ else if (!strcmp(typeName, "wideInt")) typeName = "int";
#endif
Tcl_SetObjResult(interp, Tcl_NewStringObj(typeName, -1));
}
@@ -1153,7 +1154,7 @@ TeststringobjCmd(
int objc, /* Number of arguments. */
Tcl_Obj *const objv[]) /* Argument objects. */
{
- Tcl_UniChar *unicode;
+ unsigned short *unicode;
size_t varIndex;
int size, option, i;
Tcl_WideInt length;
@@ -1263,10 +1264,14 @@ TeststringobjCmd(
goto wrongNumArgs;
}
if (varPtr[varIndex] != NULL) {
- Tcl_ConvertToType(NULL, varPtr[varIndex],
- Tcl_GetObjType("string"));
- strPtr = (String *)varPtr[varIndex]->internalRep.twoPtrValue.ptr1;
- length = (int) strPtr->allocated;
+ const Tcl_ObjType *objType = Tcl_GetObjType("string");
+ if (objType != NULL) {
+ Tcl_ConvertToType(NULL, varPtr[varIndex], objType);
+ strPtr = (String *)varPtr[varIndex]->internalRep.twoPtrValue.ptr1;
+ length = (int) strPtr->allocated;
+ } else {
+ length = -1;
+ }
} else {
length = -1;
}
@@ -1317,10 +1322,14 @@ TeststringobjCmd(
goto wrongNumArgs;
}
if (varPtr[varIndex] != NULL) {
- Tcl_ConvertToType(NULL, varPtr[varIndex],
- Tcl_GetObjType("string"));
- strPtr = (String *)varPtr[varIndex]->internalRep.twoPtrValue.ptr1;
- length = strPtr->maxChars;
+ const Tcl_ObjType *objType = Tcl_GetObjType("string");
+ if (objType != NULL) {
+ Tcl_ConvertToType(NULL, varPtr[varIndex],objType);
+ strPtr = (String *)varPtr[varIndex]->internalRep.twoPtrValue.ptr1;
+ length = strPtr->maxChars;
+ } else {
+ length = -1;
+ }
} else {
length = -1;
}
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 169f240..82adf65 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -799,7 +799,7 @@ Tcl_UtfCharComplete(
*/
int
-Tcl_NumUtfChars(
+TclNumUtfChars(
const char *src, /* The UTF-8 string to measure. */
int length) /* The length of the string in bytes, or -1
* for strlen(string). */
@@ -850,6 +850,61 @@ Tcl_NumUtfChars(
return i;
}
+#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED)
+#undef Tcl_NumUtfChars
+int
+Tcl_NumUtfChars(
+ const char *src, /* The UTF-8 string to measure. */
+ int length) /* The length of the string in bytes, or -1
+ * for strlen(string). */
+{
+ unsigned short ch = 0;
+ int i = 0;
+
+ if (length < 0) {
+ /* string is NUL-terminated, so TclUtfToUniChar calls are safe. */
+ while ((*src != '\0') && (i < INT_MAX)) {
+ src += Tcl_UtfToChar16(src, &ch);
+ i++;
+ }
+ } else {
+ /* Will return value between 0 and length. No overflow checks. */
+
+ /* Pointer to the end of string. Never read endPtr[0] */
+ const char *endPtr = src + length;
+ /* Pointer to last byte where optimization still can be used */
+ const char *optPtr = endPtr - 4;
+
+ /*
+ * Optimize away the call in this loop. Justified because...
+ * when (src <= optPtr), (endPtr - src) >= (endPtr - optPtr)
+ * By initialization above (endPtr - optPtr) = TCL_UTF_MAX
+ * So (endPtr - src) >= TCL_UTF_MAX, and passing that to
+ * Tcl_UtfCharComplete we know will cause return of 1.
+ */
+ while (src <= optPtr
+ /* && Tcl_UtfCharComplete(src, endPtr - src) */ ) {
+ src += Tcl_UtfToChar16(src, &ch);
+ i++;
+ }
+ /* Loop over the remaining string where call must happen */
+ while (src < endPtr) {
+ if (Tcl_UtfCharComplete(src, endPtr - src)) {
+ src += Tcl_UtfToChar16(src, &ch);
+ } else {
+ /*
+ * src points to incomplete UTF-8 sequence
+ * Treat first byte as character and count it
+ */
+ src++;
+ }
+ i++;
+ }
+ }
+ return i;
+}
+#endif
+
/*
*---------------------------------------------------------------------------
*
@@ -1127,22 +1182,20 @@ Tcl_UniCharAtIndex(
const char *src, /* The UTF-8 string to dereference. */
int index) /* The position of the desired character. */
{
- Tcl_UniChar ch = 0;
+ unsigned short ch = 0;
int i = 0;
if (index < 0) {
return -1;
}
while (index-- > 0) {
- i = TclUtfToUniChar(src, &ch);
+ i = Tcl_UtfToChar16(src, &ch);
src += i;
}
-#if TCL_UTF_MAX < 4
if ((ch >= 0xD800) && (i < 3)) {
/* Index points at character following high Surrogate */
return -1;
}
-#endif
TclUtfToUCS4(src, &i);
return i;
}
@@ -1166,27 +1219,56 @@ Tcl_UniCharAtIndex(
*---------------------------------------------------------------------------
*/
+#if TCL_UTF_MAX < 4
+# undef Tcl_UtfToUniChar
+# define Tcl_UtfToUniChar Tcl_UtfToChar16
+#endif
+
const char *
-Tcl_UtfAtIndex(
+TclUtfAtIndex(
const char *src, /* The UTF-8 string. */
int index) /* The position of the desired character. */
{
- Tcl_UniChar ch = 0;
+ Tcl_UniChar ch = 0;
int len = 0;
while (index-- > 0) {
- len = TclUtfToUniChar(src, &ch);
+ len = (Tcl_UtfToUniChar)(src, &ch);
src += len;
}
#if TCL_UTF_MAX < 4
if ((ch >= 0xD800) && (len < 3)) {
/* Index points at character following high Surrogate */
- src += TclUtfToUniChar(src, &ch);
+ src += (Tcl_UtfToUniChar)(src, &ch);
}
#endif
return src;
}
+#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED)
+#undef Tcl_UtfAtIndex
+const char *
+Tcl_UtfAtIndex(
+ const char *src, /* The UTF-8 string. */
+ int index) /* The position of the desired character. */
+{
+ unsigned short ch = 0;
+ int len = 0;
+
+ while (index-- > 0) {
+ len = Tcl_UtfToChar16(src, &ch);
+ src += len;
+ }
+ if ((ch >= 0xD800) && (len < 3)) {
+ /* Index points at character following high Surrogate */
+ src += Tcl_UtfToChar16(src, &ch);
+ }
+ return src;
+}
+
+
+#endif
+
/*
*---------------------------------------------------------------------------
*
@@ -1849,7 +1931,7 @@ Tcl_UniCharLen(
*/
int
-Tcl_UniCharNcmp(
+TclUniCharNcmp(
const Tcl_UniChar *ucs, /* Unicode string to compare to uct. */
const Tcl_UniChar *uct, /* Unicode string ucs is compared to. */
unsigned long numChars) /* Number of unichars to compare. */
@@ -1868,21 +1950,47 @@ Tcl_UniCharNcmp(
for ( ; numChars != 0; ucs++, uct++, numChars--) {
if (*ucs != *uct) {
-#if TCL_UTF_MAX < 4
+ return (*ucs - *uct);
+ }
+ }
+ return 0;
+#endif /* WORDS_BIGENDIAN */
+}
+
+#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED)
+int
+Tcl_UniCharNcmp(
+ const unsigned short *ucs, /* Unicode string to compare to uct. */
+ const unsigned short *uct, /* Unicode string ucs is compared to. */
+ unsigned long numChars) /* Number of unichars to compare. */
+{
+#if defined(WORDS_BIGENDIAN) && (TCL_UTF_MAX > 3)
+ /*
+ * We are definitely on a big-endian machine; memcmp() is safe
+ */
+
+ return memcmp(ucs, uct, numChars*sizeof(Tcl_UniChar));
+
+#else /* !WORDS_BIGENDIAN */
+ /*
+ * We can't simply call memcmp() because that is not lexically correct.
+ */
+
+ for ( ; numChars != 0; ucs++, uct++, numChars--) {
+ if (*ucs != *uct) {
/* special case for handling upper surrogates */
if (((*ucs & 0xFC00) == 0xD800) && ((*uct & 0xFC00) != 0xD800)) {
return 1;
} else if (((*uct & 0xFC00) == 0xD800)) {
return -1;
}
-#endif
return (*ucs - *uct);
}
}
return 0;
#endif /* WORDS_BIGENDIAN */
}
-
+#endif
/*
*----------------------------------------------------------------------
*
@@ -1902,31 +2010,51 @@ Tcl_UniCharNcmp(
*/
int
-Tcl_UniCharNcasecmp(
+TclUniCharNcasecmp(
const Tcl_UniChar *ucs, /* Unicode string to compare to uct. */
const Tcl_UniChar *uct, /* Unicode string ucs is compared to. */
unsigned long numChars) /* Number of unichars to compare. */
{
for ( ; numChars != 0; numChars--, ucs++, uct++) {
if (*ucs != *uct) {
- Tcl_UniChar lcs = Tcl_UniCharToLower(*ucs);
- Tcl_UniChar lct = Tcl_UniCharToLower(*uct);
+ int lcs = Tcl_UniCharToLower(*ucs);
+ int lct = Tcl_UniCharToLower(*uct);
+
+ if (lcs != lct) {
+ return (lcs - lct);
+ }
+ }
+ }
+ return 0;
+}
+
+#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED)
+int
+Tcl_UniCharNcasecmp(
+ const unsigned short *ucs, /* Unicode string to compare to uct. */
+ const unsigned short *uct, /* Unicode string ucs is compared to. */
+ unsigned long numChars) /* Number of unichars to compare. */
+{
+ for ( ; numChars != 0; numChars--, ucs++, uct++) {
+ if (*ucs != *uct) {
+ unsigned short lcs = Tcl_UniCharToLower(*ucs);
+ unsigned short lct = Tcl_UniCharToLower(*uct);
if (lcs != lct) {
-#if TCL_UTF_MAX < 4
/* special case for handling upper surrogates */
if (((lcs & 0xFC00) == 0xD800) && ((lct & 0xFC00) != 0xD800)) {
return 1;
} else if (((lct & 0xFC00) == 0xD800)) {
return -1;
}
-#endif
return (lcs - lct);
}
}
}
return 0;
}
+#endif
+
/*
*----------------------------------------------------------------------
@@ -2290,14 +2418,182 @@ Tcl_UniCharIsWordChar(
*/
int
-Tcl_UniCharCaseMatch(
+TclUniCharCaseMatch(
const Tcl_UniChar *uniStr, /* Unicode String. */
const Tcl_UniChar *uniPattern,
/* Pattern, which may contain special
* characters. */
int nocase) /* 0 for case sensitive, 1 for insensitive */
{
- Tcl_UniChar ch1 = 0, p;
+ int ch1 = 0, p;
+
+ while (1) {
+ p = *uniPattern;
+
+ /*
+ * See if we're at the end of both the pattern and the string. If so,
+ * we succeeded. If we're at the end of the pattern but not at the end
+ * of the string, we failed.
+ */
+
+ if (p == 0) {
+ return (*uniStr == 0);
+ }
+ if ((*uniStr == 0) && (p != '*')) {
+ return 0;
+ }
+
+ /*
+ * Check for a "*" as the next pattern character. It matches any
+ * substring. We handle this by skipping all the characters up to the
+ * next matching one in the pattern, and then calling ourselves
+ * recursively for each postfix of string, until either we match or we
+ * reach the end of the string.
+ */
+
+ if (p == '*') {
+ /*
+ * Skip all successive *'s in the pattern
+ */
+
+ while (*(++uniPattern) == '*') {
+ /* empty body */
+ }
+ p = *uniPattern;
+ if (p == 0) {
+ return 1;
+ }
+ if (nocase) {
+ p = Tcl_UniCharToLower(p);
+ }
+ while (1) {
+ /*
+ * Optimization for matching - cruise through the string
+ * quickly if the next char in the pattern isn't a special
+ * character
+ */
+
+ if ((p != '[') && (p != '?') && (p != '\\')) {
+ if (nocase) {
+ while (*uniStr && (p != *uniStr)
+ && (p != Tcl_UniCharToLower(*uniStr))) {
+ uniStr++;
+ }
+ } else {
+ while (*uniStr && (p != *uniStr)) {
+ uniStr++;
+ }
+ }
+ }
+ if (TclUniCharCaseMatch(uniStr, uniPattern, nocase)) {
+ return 1;
+ }
+ if (*uniStr == 0) {
+ return 0;
+ }
+ uniStr++;
+ }
+ }
+
+ /*
+ * Check for a "?" as the next pattern character. It matches any
+ * single character.
+ */
+
+ if (p == '?') {
+ uniPattern++;
+ uniStr++;
+ continue;
+ }
+
+ /*
+ * Check for a "[" as the next pattern character. It is followed by a
+ * list of characters that are acceptable, or by a range (two
+ * characters separated by "-").
+ */
+
+ if (p == '[') {
+ int startChar, endChar;
+
+ uniPattern++;
+ ch1 = (nocase ? Tcl_UniCharToLower(*uniStr) : *uniStr);
+ uniStr++;
+ while (1) {
+ if ((*uniPattern == ']') || (*uniPattern == 0)) {
+ return 0;
+ }
+ startChar = (nocase ? Tcl_UniCharToLower(*uniPattern)
+ : *uniPattern);
+ uniPattern++;
+ if (*uniPattern == '-') {
+ uniPattern++;
+ if (*uniPattern == 0) {
+ return 0;
+ }
+ endChar = (nocase ? Tcl_UniCharToLower(*uniPattern)
+ : *uniPattern);
+ uniPattern++;
+ if (((startChar <= ch1) && (ch1 <= endChar))
+ || ((endChar <= ch1) && (ch1 <= startChar))) {
+ /*
+ * Matches ranges of form [a-z] or [z-a].
+ */
+ break;
+ }
+ } else if (startChar == ch1) {
+ break;
+ }
+ }
+ while (*uniPattern != ']') {
+ if (*uniPattern == 0) {
+ uniPattern--;
+ break;
+ }
+ uniPattern++;
+ }
+ uniPattern++;
+ continue;
+ }
+
+ /*
+ * If the next pattern character is '\', just strip off the '\' so we
+ * do exact matching on the character that follows.
+ */
+
+ if (p == '\\') {
+ if (*(++uniPattern) == '\0') {
+ return 0;
+ }
+ }
+
+ /*
+ * There's no special character. Just make sure that the next bytes of
+ * each string match.
+ */
+
+ if (nocase) {
+ if (Tcl_UniCharToLower(*uniStr) !=
+ Tcl_UniCharToLower(*uniPattern)) {
+ return 0;
+ }
+ } else if (*uniStr != *uniPattern) {
+ return 0;
+ }
+ uniStr++;
+ uniPattern++;
+ }
+}
+
+#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED)
+int
+Tcl_UniCharCaseMatch(
+ const unsigned short *uniStr, /* Unicode String. */
+ const unsigned short *uniPattern,
+ /* Pattern, which may contain special
+ * characters. */
+ int nocase) /* 0 for case sensitive, 1 for insensitive */
+{
+ unsigned short ch1 = 0, p;
while (1) {
p = *uniPattern;
@@ -2385,7 +2681,7 @@ Tcl_UniCharCaseMatch(
*/
if (p == '[') {
- Tcl_UniChar startChar, endChar;
+ unsigned short startChar, endChar;
uniPattern++;
ch1 = (nocase ? Tcl_UniCharToLower(*uniStr) : *uniStr);
@@ -2455,7 +2751,9 @@ Tcl_UniCharCaseMatch(
uniPattern++;
}
}
+#endif
+
/*
*----------------------------------------------------------------------
*
@@ -2680,7 +2978,7 @@ TclUtfToUCS4(
int *ucs4Ptr) /* Filled with the UCS4 codepoint represented
* by the UTF-8 string. */
{
- /* Make use of the #undef Tcl_UtfToUniChar above, which already handles UCS4. */
+# undef Tcl_UtfToUniChar
return Tcl_UtfToUniChar(src, ucs4Ptr);
}
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index 66d1009..3537ecc 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -2591,11 +2591,11 @@ TclStringMatchObj(
trivial = nocase ? 0 : TclMatchIsTrivial(TclGetString(ptnObj));
*/
- if (TclHasInternalRep(strObj, &tclStringType) || (strObj->typePtr == NULL)) {
+ if (TclHasInternalRep(strObj, &tclUniCharStringType) || (strObj->typePtr == NULL)) {
Tcl_UniChar *udata, *uptn;
- udata = Tcl_GetUnicodeFromObj(strObj, &length);
- uptn = Tcl_GetUnicodeFromObj(ptnObj, &plen);
+ udata = TclGetUnicodeFromObj_(strObj, &length);
+ uptn = TclGetUnicodeFromObj_(ptnObj, &plen);
match = TclUniCharMatch(udata, length, uptn, plen, flags);
} else if (TclIsPureByteArray(strObj) && TclIsPureByteArray(ptnObj)
&& !flags) {
diff --git a/tests/obj.test b/tests/obj.test
index 4fa8d3a..7563422 100644
--- a/tests/obj.test
+++ b/tests/obj.test
@@ -19,11 +19,13 @@ if {"::tcltest" ni [namespace children]} {
::tcltest::loadTestedCommands
catch [list package require -exact tcl::test [info patchlevel]]
+package require tcltests
+
testConstraint testobj [llength [info commands testobj]]
testConstraint longIs32bit [expr {$tcl_platform(wordSize) == 4}]
testConstraint wideIs64bit [expr {wide(0x8000000000000000) < 0}]
-test obj-1.1 {Tcl_AppendAllObjTypes, and InitTypeTable, Tcl_RegisterObjType} testobj {
+test obj-1.1 {Tcl_AppendAllObjTypes, and InitTypeTable, Tcl_RegisterObjType} {testobj deprecated} {
set r 1
foreach {t} {
bytearray
diff --git a/tests/string.test b/tests/string.test
index 203d0c6..6863c23 100644
--- a/tests/string.test
+++ b/tests/string.test
@@ -422,7 +422,7 @@ test string-4.16.$noComp {string first, normal string vs pure unicode string} -b
# Representation checks are canaries
run {list [representationpoke $s] [representationpoke $m] \
[string first $m $s]}
-} -result {{string 1} {string 0} 2}
+} -match glob -result {{*string 1} {*string 0} 2}
test string-4.17.$noComp {string first, corner case} -body {
run {string first a aaa 4294967295}
} -result {-1}
diff --git a/tests/stringObj.test b/tests/stringObj.test
index abe02b2..0aa9a47 100644
--- a/tests/stringObj.test
+++ b/tests/stringObj.test
@@ -25,8 +25,9 @@ testConstraint testobj [llength [info commands testobj]]
testConstraint testbytestring [llength [info commands testbytestring]]
testConstraint testdstring [llength [info commands testdstring]]
testConstraint tip389 [expr {[string length \U010000] == 2}]
-
-test stringObj-1.1 {string type registration} testobj {
+testConstraint utf32 [expr {[string length [format %c 0x10000]] == 1}]
+
+test stringObj-1.1 {string type registration} {testobj deprecated} {
set t [testobj types]
set first [string first "string" $t]
set result [expr {$first >= 0}]
@@ -57,27 +58,27 @@ test stringObj-3.2 {Tcl_SetStringObj, existing non-"empty string" object} testob
lappend result [testobj refcount 1]
} {{} 512 foo string 2}
-test stringObj-4.1 {Tcl_SetObjLength procedure, string gets shorter} testobj {
+test stringObj-4.1 {Tcl_SetObjLength procedure, string gets shorter} {testobj utf32 deprecated} {
testobj freeallvars
teststringobj set 1 test
teststringobj setlength 1 3
list [teststringobj length 1] [teststringobj length2 1] \
[teststringobj get 1]
-} {3 4 tes}
-test stringObj-4.2 {Tcl_SetObjLength procedure, string gets longer} testobj {
+} {3 3 tes}
+test stringObj-4.2 {Tcl_SetObjLength procedure, string gets longer} {testobj deprecated} {
testobj freeallvars
teststringobj set 1 abcdef
teststringobj setlength 1 10
list [teststringobj length 1] [teststringobj length2 1]
} {10 10}
-test stringObj-4.3 {Tcl_SetObjLength procedure, string gets longer} testobj {
+test stringObj-4.3 {Tcl_SetObjLength procedure, string gets longer} {testobj utf32 deprecated} {
testobj freeallvars
teststringobj set 1 abcdef
teststringobj append 1 xyzq -1
list [teststringobj length 1] [teststringobj length2 1] \
[teststringobj get 1]
-} {10 20 abcdefxyzq}
-test stringObj-4.4 {Tcl_SetObjLength procedure, "expty string", length 0} testobj {
+} {10 10 abcdefxyzq}
+test stringObj-4.4 {Tcl_SetObjLength procedure, "expty string", length 0} {testobj deprecated} {
testobj freeallvars
testobj newobj 1
teststringobj setlength 1 0
@@ -97,7 +98,7 @@ test stringObj-5.2 {Tcl_AppendToObj procedure, length calculation} testobj {
teststringobj append 1 123 -1
teststringobj get 1
} {x y bbCC123}
-test stringObj-5.3 {Tcl_AppendToObj procedure, reallocating space} testobj {
+test stringObj-5.3 {Tcl_AppendToObj procedure, reallocating space} {testobj utf32 deprecated} {
testobj freeallvars
teststringobj set 1 xyz
teststringobj setlength 1 15
@@ -109,7 +110,7 @@ test stringObj-5.3 {Tcl_AppendToObj procedure, reallocating space} testobj {
teststringobj append 1 abcdef -1
lappend result [teststringobj length 1] [teststringobj length2 1] \
[teststringobj get 1]
-} {15 15 16 32 xy12345678abcdef}
+} {15 15 16 16 xy12345678abcdef}
test stringObj-6.1 {Tcl_AppendStringsToObj procedure, type conversion} testobj {
testobj freeallvars
@@ -135,13 +136,13 @@ test stringObj-6.4 {Tcl_AppendStringsToObj procedure, counting space} testobj {
teststringobj appendstrings 1 { 123 } abcdefg
list [teststringobj length 1] [teststringobj get 1]
} {15 {abc 123 abcdefg}}
-test stringObj-6.5 {Tcl_AppendStringsToObj procedure, don't double space if initial string empty} testobj {
+test stringObj-6.5 {Tcl_AppendStringsToObj procedure, don't double space if initial string empty} {testobj utf32 deprecated} {
testobj freeallvars
testobj newobj 1
teststringobj appendstrings 1 123 abcdefg
list [teststringobj length 1] [teststringobj length2 1] [teststringobj get 1]
-} {10 20 123abcdefg}
-test stringObj-6.6 {Tcl_AppendStringsToObj procedure, space reallocation} testobj {
+} {10 10 123abcdefg}
+test stringObj-6.6 {Tcl_AppendStringsToObj procedure, space reallocation} {testobj deprecated} {
testobj freeallvars
teststringobj set 1 abc
teststringobj setlength 1 10
@@ -150,7 +151,7 @@ test stringObj-6.6 {Tcl_AppendStringsToObj procedure, space reallocation} testob
list [teststringobj length 1] [teststringobj length2 1] \
[teststringobj get 1]
} {10 10 ab34567890}
-test stringObj-6.7 {Tcl_AppendStringsToObj procedure, space reallocation} testobj {
+test stringObj-6.7 {Tcl_AppendStringsToObj procedure, space reallocation} {testobj utf32 deprecated} {
testobj freeallvars
teststringobj set 1 abc
teststringobj setlength 1 10
@@ -158,8 +159,8 @@ test stringObj-6.7 {Tcl_AppendStringsToObj procedure, space reallocation} testob
teststringobj appendstrings 1 34567890x
list [teststringobj length 1] [teststringobj length2 1] \
[teststringobj get 1]
-} {11 22 ab34567890x}
-test stringObj-6.8 {Tcl_AppendStringsToObj procedure, object totally empty} testobj {
+} {11 11 ab34567890x}
+test stringObj-6.8 {Tcl_AppendStringsToObj procedure, object totally empty} {testobj deprecated} {
testobj freeallvars
testobj newobj 1
teststringobj appendstrings 1 {}
@@ -172,14 +173,14 @@ test stringObj-6.9 {Tcl_AppendStringToObj, pure unicode} testobj {
teststringobj get 1
} adcfoobarsoom
-test stringObj-7.1 {SetStringFromAny procedure} testobj {
+test stringObj-7.1 {SetStringFromAny procedure} {testobj utf32 deprecated} {
testobj freeallvars
teststringobj set2 1 [list a b]
teststringobj append 1 x -1
list [teststringobj length 1] [teststringobj length2 1] \
[teststringobj get 1]
-} {4 8 {a bx}}
-test stringObj-7.2 {SetStringFromAny procedure, null object} testobj {
+} {4 4 {a bx}}
+test stringObj-7.2 {SetStringFromAny procedure, null object} {testobj deprecated} {
testobj freeallvars
testobj newobj 1
teststringobj appendstrings 1 {}
@@ -197,7 +198,7 @@ test stringObj-7.4 {SetStringFromAny called with string obj} testobj {
[string length $x] [testobj objtype $x]
} {6 string 6 string}
-test stringObj-8.1 {DupStringInternalRep procedure} testobj {
+test stringObj-8.1 {DupStringInternalRep procedure} {testobj utf32 deprecated} {
testobj freeallvars
teststringobj set 1 {}
teststringobj append 1 abcde -1
@@ -206,7 +207,7 @@ test stringObj-8.1 {DupStringInternalRep procedure} testobj {
[teststringobj maxchars 1] [teststringobj get 1] \
[teststringobj length 2] [teststringobj length2 2] \
[teststringobj maxchars 2] [teststringobj get 2]
-} {5 10 0 abcde 5 5 0 abcde}
+} {5 5 5 abcde 5 5 5 abcde}
test stringObj-8.2 {DupUnicodeInternalRep, mixed width chars} testobj {
set x abc\xEF\xBF\xAEghi
string length $x
diff --git a/tests/utf.test b/tests/utf.test
index 477216c..c0d64e2 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -19,7 +19,7 @@ catch [list package require -exact tcl::test [info patchlevel]]
testConstraint ucs2 [expr {[format %c 0x010000] eq "\uFFFD"}]
testConstraint fullutf [expr {[format %c 0x010000] ne "\uFFFD"}]
testConstraint utf16 [expr {[string length [format %c 0x10000]] == 2}]
-testConstraint ucs4 [expr {[testConstraint fullutf]
+testConstraint utf32 [expr {[testConstraint fullutf]
&& [string length [format %c 0x10000]] == 1}]
testConstraint Uesc [expr {"\U0041" eq "A"}]
@@ -131,7 +131,7 @@ test utf-2.8.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {ucs2 testb
test utf-2.8.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} utf16 {
string length 𐀀
} 2
-test utf-2.8.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} ucs4 {
+test utf-2.8.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} utf32 {
string length 𐀀
} 1
test utf-2.9.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {ucs2 testbytestring} {
@@ -140,7 +140,7 @@ test utf-2.9.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {ucs2 testb
test utf-2.9.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} utf16 {
string length \U10FFFF
} 2
-test utf-2.9.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} ucs4 {
+test utf-2.9.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} utf32 {
string length \U10FFFF
} 1
test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} testbytestring {
@@ -194,7 +194,7 @@ test utf-4.11 {Tcl_NumUtfChars: 3 bytes of 4-byte UTF-8 characater} {testnumutfc
test utf-4.12.0 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring ucs2} {
testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end
} 2
-test utf-4.12.1 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring ucs4} {
+test utf-4.12.1 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring utf32} {
testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end
} 1
test utf-4.13 {Tcl_NumUtfChars: end of string} {testnumutfchars testbytestring} {
@@ -878,7 +878,7 @@ test utf-8.4 {Tcl_UniCharAtIndex: index > 0} {
test utf-8.5.0 {Tcl_UniCharAtIndex: high surrogate} ucs2 {
string index \uD842 0
} \uD842
-test utf-8.5.1 {Tcl_UniCharAtIndex: high surrogate} ucs4 {
+test utf-8.5.1 {Tcl_UniCharAtIndex: high surrogate} utf32 {
string index \uD842 0
} \uD842
test utf-8.5.2 {Tcl_UniCharAtIndex: high surrogate} utf16 {
@@ -890,7 +890,7 @@ test utf-8.6 {Tcl_UniCharAtIndex: low surrogate} {
test utf-8.7.0 {Tcl_UniCharAtIndex: Emoji} ucs2 {
string index \uD83D\uDE00G 0
} \uD83D
-test utf-8.7.1 {Tcl_UniCharAtIndex: Emoji} ucs4 {
+test utf-8.7.1 {Tcl_UniCharAtIndex: Emoji} utf32 {
string index 😀G 0
} 😀
test utf-8.7.2 {Tcl_UniCharAtIndex: Emoji} utf16 {
@@ -899,7 +899,7 @@ test utf-8.7.2 {Tcl_UniCharAtIndex: Emoji} utf16 {
test utf-8.8.0 {Tcl_UniCharAtIndex: Emoji} ucs2 {
string index \uD83D\uDE00G 1
} \uDE00
-test utf-8.8.1 {Tcl_UniCharAtIndex: Emoji} ucs4 {
+test utf-8.8.1 {Tcl_UniCharAtIndex: Emoji} utf32 {
string index 😀G 1
} G
test utf-8.8.2 {Tcl_UniCharAtIndex: Emoji} utf16 {
@@ -908,7 +908,7 @@ test utf-8.8.2 {Tcl_UniCharAtIndex: Emoji} utf16 {
test utf-8.9.0 {Tcl_UniCharAtIndex: Emoji} ucs2 {
string index \uD83D\uDE00G 2
} G
-test utf-8.9.1 {Tcl_UniCharAtIndex: Emoji} ucs4 {
+test utf-8.9.1 {Tcl_UniCharAtIndex: Emoji} utf32 {
string index 😀G 2
} {}
test utf-8.9.2 {Tcl_UniCharAtIndex: Emoji} utf16 {
@@ -917,7 +917,7 @@ test utf-8.9.2 {Tcl_UniCharAtIndex: Emoji} utf16 {
test utf-8.10.0 {Tcl_UniCharAtIndex: Emoji} ucs2 {
string index 😀G 0
} \uFFFD
-test utf-8.10.1 {Tcl_UniCharAtIndex: Emoji} ucs4 {
+test utf-8.10.1 {Tcl_UniCharAtIndex: Emoji} utf32 {
string index 😀G 0
} 😀
test utf-8.10.2 {Tcl_UniCharAtIndex: Emoji} utf16 {
@@ -926,7 +926,7 @@ test utf-8.10.2 {Tcl_UniCharAtIndex: Emoji} utf16 {
test utf-8.11.0 {Tcl_UniCharAtIndex: Emoji} ucs2 {
string index 😀G 1
} G
-test utf-8.11.1 {Tcl_UniCharAtIndex: Emoji} ucs4 {
+test utf-8.11.1 {Tcl_UniCharAtIndex: Emoji} utf32 {
string index 😀G 1
} G
test utf-8.11.2 {Tcl_UniCharAtIndex: Emoji} utf16 {
@@ -935,7 +935,7 @@ test utf-8.11.2 {Tcl_UniCharAtIndex: Emoji} utf16 {
test utf-8.12.0 {Tcl_UniCharAtIndex: Emoji} ucs2 {
string index 😀G 2
} {}
-test utf-8.12.1 {Tcl_UniCharAtIndex: Emoji} ucs4 {
+test utf-8.12.1 {Tcl_UniCharAtIndex: Emoji} utf32 {
string index 😀G 2
} {}
test utf-8.12.2 {Tcl_UniCharAtIndex: Emoji} utf16 {
@@ -951,7 +951,7 @@ test utf-9.2 {Tcl_UtfAtIndex: index > 0} {
test utf-9.3.0 {Tcl_UtfAtIndex: index = 0, Emoji} ucs2 {
string range \uD83D\uDE00G 0 0
} \uD83D
-test utf-9.3.1 {Tcl_UtfAtIndex: index = 0, Emoji} ucs4 {
+test utf-9.3.1 {Tcl_UtfAtIndex: index = 0, Emoji} utf32 {
string range 😀G 0 0
} 😀
test utf-9.3.2 {Tcl_UtfAtIndex: index = 0, Emoji} utf16 {
@@ -960,7 +960,7 @@ test utf-9.3.2 {Tcl_UtfAtIndex: index = 0, Emoji} utf16 {
test utf-9.4.0 {Tcl_UtfAtIndex: index > 0, Emoji} ucs2 {
string range \uD83D\uDE00G 1 1
} \uDE00
-test utf-9.4.1 {Tcl_UtfAtIndex: index > 0, Emoji} ucs4 {
+test utf-9.4.1 {Tcl_UtfAtIndex: index > 0, Emoji} utf32 {
string range 😀G 1 1
} G
test utf-9.4.2 {Tcl_UtfAtIndex: index > 0, Emoji} utf16 {
@@ -969,7 +969,7 @@ test utf-9.4.2 {Tcl_UtfAtIndex: index > 0, Emoji} utf16 {
test utf-9.5.0 {Tcl_UtfAtIndex: index > 0, Emoji} ucs2 {
string range \uD83D\uDE00G 2 2
} G
-test utf-9.5.1 {Tcl_UtfAtIndex: index > 0, Emoji} ucs4 {
+test utf-9.5.1 {Tcl_UtfAtIndex: index > 0, Emoji} utf32 {
string range 😀G 2 2
} {}
test utf-9.5.2 {Tcl_UtfAtIndex: index > 0, Emoji} utf16 {
@@ -978,7 +978,7 @@ test utf-9.5.2 {Tcl_UtfAtIndex: index > 0, Emoji} utf16 {
test utf-9.6.0 {Tcl_UtfAtIndex: index = 0, Emoji} ucs2 {
string range 😀G 0 0
} \uFFFD
-test utf-9.6.1 {Tcl_UtfAtIndex: index = 0, Emoji} ucs4 {
+test utf-9.6.1 {Tcl_UtfAtIndex: index = 0, Emoji} utf32 {
string range 😀G 0 0
} 😀
test utf-9.6.2 {Tcl_UtfAtIndex: index = 0, Emoji} utf16 {
@@ -987,7 +987,7 @@ test utf-9.6.2 {Tcl_UtfAtIndex: index = 0, Emoji} utf16 {
test utf-9.7.0 {Tcl_UtfAtIndex: index > 0, Emoji} ucs2 {
string range 😀G 1 1
} G
-test utf-9.7.1 {Tcl_UtfAtIndex: index > 0, Emoji} ucs4 {
+test utf-9.7.1 {Tcl_UtfAtIndex: index > 0, Emoji} utf32 {
string range 😀G 1 1
} G
test utf-9.7.2 {Tcl_UtfAtIndex: index > 0, Emoji} utf16 {
@@ -996,7 +996,7 @@ test utf-9.7.2 {Tcl_UtfAtIndex: index > 0, Emoji} utf16 {
test utf-9.8.0 {Tcl_UtfAtIndex: index > 0, Emoji} ucs2 {
string range 😀G 2 2
} {}
-test utf-9.8.1 {Tcl_UtfAtIndex: index > 0, Emoji} ucs4 {
+test utf-9.8.1 {Tcl_UtfAtIndex: index > 0, Emoji} utf32 {
string range 😀G 2 2
} {}
test utf-9.8.2 {Tcl_UtfAtIndex: index > 0, Emoji} utf16 {
@@ -1227,10 +1227,10 @@ test utf-19.1 {TclUniCharLen} -body {
unset -nocomplain foo
} -result {1 4}
-test utf-20.1 {TclUniCharNcmp} ucs4 {
+test utf-20.1 {TclUniCharNcmp} utf32 {
string compare [string range [format %c 0xFFFF] 0 0] [string range [format %c 0x10000] 0 0]
} -1
-test utf-20.2 {[4c591fa487] TclUniCharNcmp/TclUtfNcmp} {
+test utf-20.2 {[4c591fa487] TclUniCharNcmp/TclUtfNcmp} utf32 {
set one [format %c 0xFFFF]
set two [format %c 0x10000]
set first [string compare $one $two]
@@ -1357,10 +1357,10 @@ UniCharCaseCmpTest < a b
UniCharCaseCmpTest > b a
UniCharCaseCmpTest > B a
UniCharCaseCmpTest > aBcB abca
-UniCharCaseCmpTest < \uFFFF [format %c 0x10000] ucs4
-UniCharCaseCmpTest < \uFFFF \U10000 ucs4
-UniCharCaseCmpTest > [format %c 0x10000] \uFFFF ucs4
-UniCharCaseCmpTest > \U10000 \uFFFF ucs4
+UniCharCaseCmpTest < \uFFFF [format %c 0x10000] utf32
+UniCharCaseCmpTest < \uFFFF \U10000 utf32
+UniCharCaseCmpTest > [format %c 0x10000] \uFFFF utf32
+UniCharCaseCmpTest > \U10000 \uFFFF utf32
test utf-26.1 {Tcl_UniCharDString} -setup {
diff --git a/win/makefile.vc b/win/makefile.vc
index 1ef64f2..6ff6118 100644
--- a/win/makefile.vc
+++ b/win/makefile.vc
@@ -52,7 +52,7 @@
# turn on the 64-bit compiler, if your SDK has it.
#
# Basic macros and options usable on the commandline (see rules.vc for more info):
-# OPTS=msvcrt,noembed,nothreads,pdbs,profile,static,symbols,thrdalloc,time64bit,unchecked,utfmax,none
+# OPTS=msvcrt,noembed,nothreads,pdbs,profile,static,symbols,thrdalloc,time64bit,unchecked,utf16,none
# Sets special options for the core. The default is for none.
# Any combination of the above may be used (comma separated).
# 'none' will over-ride everything to nothing.
@@ -80,7 +80,7 @@
# unchecked = Allows a symbols build to not use the debug
# enabled runtime (msvcrt.dll not msvcrtd.dll
# or libcmt.lib not libcmtd.lib).
-# utfmax = Forces a build using UTF-32 representation internally.
+# utf16 = Forces a build using UTF-16 representation internally.
#
# STATS=compdbg,memdbg,none
# Sets optional memory and bytecode compiler debugging code added
diff --git a/win/rules.vc b/win/rules.vc
index a571899..3107756 100644
--- a/win/rules.vc
+++ b/win/rules.vc
@@ -816,8 +816,7 @@ DOTSEPARATED=$(DOTSEPARATED:b=.)
# configuration (ignored for Tcl itself)
# _USE_64BIT_TIME_T - forces a build using 64-bit time_t for 32-bit build
# (CRT library should support this, not needed for Tcl 9.x)
-# TCL_UTF_MAX=4 - forces a build allowing 4-byte UTF-8 sequences internally.
-# (Not needed for Tcl 9.x)
+# TCL_UTF_MAX=3 - forces a build using UTF-16 internally (not recommended).
# Further, LINKERFLAGS are modified based on above.
# Default values for all the above
@@ -884,9 +883,9 @@ USE_THREAD_ALLOC= 0
_USE_64BIT_TIME_T = 1
!endif
-!if [nmakehlp -f $(OPTS) "utfmax"]
-!message *** Force allowing 4-byte UTF-8 sequences internally
-TCL_UTF_MAX = 4
+!if [nmakehlp -f $(OPTS) "utf16"]
+!message *** Force UTF-16 internally
+TCL_UTF_MAX = 3
!endif
!endif
@@ -1423,13 +1422,13 @@ OPTDEFINES = $(OPTDEFINES) /DNO_STRTOI64=1
!if "$(_USE_64BIT_TIME_T)" == "1"
OPTDEFINES = $(OPTDEFINES) /D_USE_64BIT_TIME_T=1
!endif
-!if "$(TCL_UTF_MAX)" == "4"
-OPTDEFINES = $(OPTDEFINES) /DTCL_UTF_MAX=4
-!endif
# _ATL_XP_TARGETING - Newer SDK's need this to build for XP
COMPILERFLAGS = /D_ATL_XP_TARGETING
!endif
+!if "$(TCL_UTF_MAX)" == "3"
+OPTDEFINES = $(OPTDEFINES) /DTCL_UTF_MAX=3
+!endif
# Like the TEA system only set this non empty for non-Tk extensions
# Note: some extensions use PACKAGE_NAME and others use PACKAGE_TCLNAME