summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2022-03-12 23:21:12 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2022-03-12 23:21:12 (GMT)
commitd05ac0b98bbc039639b7784e313620b8d3757ddf (patch)
tree1cf8c03e114c6f7dc85fc732630a63128be5deaa
parente709f7c5392a8506414be6727a3f8b6bdd7fbae4 (diff)
downloadtcl-d05ac0b98bbc039639b7784e313620b8d3757ddf.zip
tcl-d05ac0b98bbc039639b7784e313620b8d3757ddf.tar.gz
tcl-d05ac0b98bbc039639b7784e313620b8d3757ddf.tar.bz2
Start defining "utf32string" type
-rw-r--r--generic/tcl.decls4
-rw-r--r--generic/tclCmdMZ.c62
-rw-r--r--generic/tclDecls.h8
-rw-r--r--generic/tclExecute.c18
-rw-r--r--generic/tclRegexp.c2
-rw-r--r--generic/tclStringObj.c139
-rw-r--r--generic/tclTestObj.c1
-rw-r--r--generic/tclUtil.c4
-rw-r--r--tests/string.test2
9 files changed, 149 insertions, 91 deletions
diff --git a/generic/tcl.decls b/generic/tcl.decls
index 9c83e81..6dbb457 100644
--- a/generic/tcl.decls
+++ b/generic/tcl.decls
@@ -1357,7 +1357,7 @@ declare 383 {
Tcl_Obj *Tcl_GetRange(Tcl_Obj *objPtr, int first, int last)
}
declare 384 {deprecated {Use Tcl_AppendStringsToObj}} {
- void Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr, const Tcl_UniChar *unicode,
+ void Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr, const unsigned short *unicode,
int length)
}
declare 385 {
@@ -1541,7 +1541,7 @@ declare 433 {
# introduced in 8.4a3
declare 434 {
- Tcl_UniChar *Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr, int *lengthPtr)
+ unsigned short *Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr, int *lengthPtr)
}
# TIP#15 (math function introspection) dkf
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index 11b383f..db4002a 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -606,9 +606,9 @@ Tcl_RegsubObjCmd(
nocase = (cflags & TCL_REG_NOCASE);
strCmpFn = nocase ? TclUniCharNcasecmp : TclUniCharNcmp;
- wsrc = Tcl_GetUnicodeFromObj(objv[0], &slen);
- wstring = Tcl_GetUnicodeFromObj(objv[1], &wlen);
- wsubspec = Tcl_GetUnicodeFromObj(objv[2], &wsublen);
+ wsrc = TclGetUnicodeFromObj_(objv[0], &slen);
+ wstring = TclGetUnicodeFromObj_(objv[1], &wlen);
+ wsubspec = TclGetUnicodeFromObj_(objv[2], &wsublen);
wend = wstring + wlen - (slen ? slen - 1 : 0);
result = TCL_OK;
@@ -622,8 +622,8 @@ Tcl_RegsubObjCmd(
resultPtr = TclNewUnicodeObj(wstring, 0);
Tcl_IncrRefCount(resultPtr);
for (; wstring < wend; wstring++) {
- Tcl_AppendUnicodeToObj(resultPtr, wsubspec, wsublen);
- Tcl_AppendUnicodeToObj(resultPtr, wstring, 1);
+ TclAppendUnicodeToObj(resultPtr, wsubspec, wsublen);
+ TclAppendUnicodeToObj(resultPtr, wstring, 1);
numMatches++;
}
wlen = 0;
@@ -640,14 +640,14 @@ Tcl_RegsubObjCmd(
Tcl_IncrRefCount(resultPtr);
}
if (p != wstring) {
- Tcl_AppendUnicodeToObj(resultPtr, p, wstring - p);
+ TclAppendUnicodeToObj(resultPtr, p, wstring - p);
p = wstring + slen;
} else {
p += slen;
}
wstring = p - 1;
- Tcl_AppendUnicodeToObj(resultPtr, wsubspec, wsublen);
+ TclAppendUnicodeToObj(resultPtr, wsubspec, wsublen);
numMatches++;
}
}
@@ -699,14 +699,14 @@ Tcl_RegsubObjCmd(
} else {
objPtr = objv[1];
}
- wstring = Tcl_GetUnicodeFromObj(objPtr, &wlen);
+ wstring = TclGetUnicodeFromObj_(objPtr, &wlen);
if (objv[2] == objv[0]) {
subPtr = Tcl_DuplicateObj(objv[2]);
} else {
subPtr = objv[2];
}
if (!command) {
- wsubspec = Tcl_GetUnicodeFromObj(subPtr, &wsublen);
+ wsubspec = TclGetUnicodeFromObj_(subPtr, &wsublen);
}
result = TCL_OK;
@@ -750,7 +750,7 @@ Tcl_RegsubObjCmd(
* specified.
*/
- Tcl_AppendUnicodeToObj(resultPtr, wstring, offset);
+ TclAppendUnicodeToObj(resultPtr, wstring, offset);
}
}
numMatches++;
@@ -763,7 +763,7 @@ Tcl_RegsubObjCmd(
Tcl_RegExpGetInfo(regExpr, &info);
start = info.matches[0].start;
end = info.matches[0].end;
- Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, start);
+ TclAppendUnicodeToObj(resultPtr, wstring + offset, start);
/*
* In command-prefix mode, the substitutions are added as quoted
@@ -826,7 +826,7 @@ Tcl_RegsubObjCmd(
* the user code.
*/
- wstring = Tcl_GetUnicodeFromObj(objPtr, &wlen);
+ wstring = TclGetUnicodeFromObj_(objPtr, &wlen);
offset += end;
if (end == 0 || start == end) {
@@ -838,7 +838,7 @@ Tcl_RegsubObjCmd(
*/
if (offset < wlen) {
- Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, 1);
+ TclAppendUnicodeToObj(resultPtr, wstring + offset, 1);
}
offset++;
}
@@ -867,7 +867,7 @@ Tcl_RegsubObjCmd(
idx = ch - '0';
} else if ((ch == '\\') || (ch == '&')) {
*wsrc = ch;
- Tcl_AppendUnicodeToObj(resultPtr, wfirstChar,
+ TclAppendUnicodeToObj(resultPtr, wfirstChar,
wsrc - wfirstChar + 1);
*wsrc = '\\';
wfirstChar = wsrc + 2;
@@ -881,7 +881,7 @@ Tcl_RegsubObjCmd(
}
if (wfirstChar != wsrc) {
- Tcl_AppendUnicodeToObj(resultPtr, wfirstChar,
+ TclAppendUnicodeToObj(resultPtr, wfirstChar,
wsrc - wfirstChar);
}
@@ -889,7 +889,7 @@ Tcl_RegsubObjCmd(
subStart = info.matches[idx].start;
subEnd = info.matches[idx].end;
if ((subStart >= 0) && (subEnd >= 0)) {
- Tcl_AppendUnicodeToObj(resultPtr,
+ TclAppendUnicodeToObj(resultPtr,
wstring + offset + subStart, subEnd - subStart);
}
}
@@ -901,7 +901,7 @@ Tcl_RegsubObjCmd(
}
if (wfirstChar != wsrc) {
- Tcl_AppendUnicodeToObj(resultPtr, wfirstChar, wsrc - wfirstChar);
+ TclAppendUnicodeToObj(resultPtr, wfirstChar, wsrc - wfirstChar);
}
if (end == 0) {
@@ -911,7 +911,7 @@ Tcl_RegsubObjCmd(
*/
if (offset < wlen) {
- Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, 1);
+ TclAppendUnicodeToObj(resultPtr, wstring + offset, 1);
}
offset++;
} else {
@@ -923,7 +923,7 @@ Tcl_RegsubObjCmd(
*/
if (offset < wlen) {
- Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, 1);
+ TclAppendUnicodeToObj(resultPtr, wstring + offset, 1);
}
offset++;
}
@@ -948,7 +948,7 @@ Tcl_RegsubObjCmd(
resultPtr = objv[1];
Tcl_IncrRefCount(resultPtr);
} else if (offset < wlen) {
- Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, wlen - offset);
+ TclAppendUnicodeToObj(resultPtr, wstring + offset, wlen - offset);
}
if (objc == 4) {
if (Tcl_ObjSetVar2(interp, objv[3], NULL, resultPtr,
@@ -2060,7 +2060,7 @@ StringMapCmd(
} else {
sourceObj = objv[objc-1];
}
- ustring1 = Tcl_GetUnicodeFromObj(sourceObj, &length1);
+ ustring1 = TclGetUnicodeFromObj_(sourceObj, &length1);
if (length1 == 0) {
/*
* Empty input string, just stop now.
@@ -2089,7 +2089,7 @@ StringMapCmd(
int mapLen, u2lc;
Tcl_UniChar *mapString;
- ustring2 = Tcl_GetUnicodeFromObj(mapElemv[0], &length2);
+ ustring2 = TclGetUnicodeFromObj_(mapElemv[0], &length2);
p = ustring1;
if ((length2 > length1) || (length2 == 0)) {
/*
@@ -2098,7 +2098,7 @@ StringMapCmd(
ustring1 = end;
} else {
- mapString = Tcl_GetUnicodeFromObj(mapElemv[1], &mapLen);
+ mapString = TclGetUnicodeFromObj_(mapElemv[1], &mapLen);
u2lc = (nocase ? Tcl_UniCharToLower(*ustring2) : 0);
for (; ustring1 < end; ustring1++) {
if (((*ustring1 == *ustring2) ||
@@ -2106,14 +2106,14 @@ StringMapCmd(
(length2==1 || strCmpFn(ustring1, ustring2,
(unsigned long) length2) == 0)) {
if (p != ustring1) {
- Tcl_AppendUnicodeToObj(resultPtr, p, ustring1-p);
+ TclAppendUnicodeToObj(resultPtr, p, ustring1-p);
p = ustring1 + length2;
} else {
p += length2;
}
ustring1 = p - 1;
- Tcl_AppendUnicodeToObj(resultPtr, mapString, mapLen);
+ TclAppendUnicodeToObj(resultPtr, mapString, mapLen);
}
}
}
@@ -2134,7 +2134,7 @@ StringMapCmd(
u2lc = (int *)TclStackAlloc(interp, mapElemc * sizeof(int));
}
for (index = 0; index < mapElemc; index++) {
- mapStrings[index] = Tcl_GetUnicodeFromObj(mapElemv[index],
+ mapStrings[index] = TclGetUnicodeFromObj_(mapElemv[index],
mapLens+index);
if (nocase && ((index % 2) == 0)) {
u2lc[index/2] = Tcl_UniCharToLower(*mapStrings[index]);
@@ -2158,7 +2158,7 @@ StringMapCmd(
* Put the skipped chars onto the result first.
*/
- Tcl_AppendUnicodeToObj(resultPtr, p, ustring1-p);
+ TclAppendUnicodeToObj(resultPtr, p, ustring1-p);
p = ustring1 + length2;
} else {
p += length2;
@@ -2174,7 +2174,7 @@ StringMapCmd(
* Append the map value to the unicode string.
*/
- Tcl_AppendUnicodeToObj(resultPtr,
+ TclAppendUnicodeToObj(resultPtr,
mapStrings[index+1], mapLens[index+1]);
break;
}
@@ -2191,7 +2191,7 @@ StringMapCmd(
* Put the rest of the unmapped chars onto result.
*/
- Tcl_AppendUnicodeToObj(resultPtr, p, ustring1 - p);
+ TclAppendUnicodeToObj(resultPtr, p, ustring1 - p);
}
Tcl_SetObjResult(interp, resultPtr);
done:
@@ -2506,7 +2506,7 @@ StringStartCmd(
return TCL_ERROR;
}
- string = Tcl_GetUnicodeFromObj(objv[1], &length);
+ string = TclGetUnicodeFromObj_(objv[1], &length);
if (TclGetIntForIndexM(interp, objv[2], length-1, &index) != TCL_OK) {
return TCL_ERROR;
}
@@ -2576,7 +2576,7 @@ StringEndCmd(
return TCL_ERROR;
}
- string = Tcl_GetUnicodeFromObj(objv[1], &length);
+ string = TclGetUnicodeFromObj_(objv[1], &length);
if (TclGetIntForIndexM(interp, objv[2], length-1, &index) != TCL_OK) {
return TCL_ERROR;
}
diff --git a/generic/tclDecls.h b/generic/tclDecls.h
index 4217e9c..bf15862 100644
--- a/generic/tclDecls.h
+++ b/generic/tclDecls.h
@@ -1159,7 +1159,7 @@ EXTERN Tcl_Obj * Tcl_GetRange(Tcl_Obj *objPtr, int first, int last);
/* 384 */
TCL_DEPRECATED("Use Tcl_AppendStringsToObj")
void Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr,
- const Tcl_UniChar *unicode, int length);
+ const unsigned short *unicode, int length);
/* 385 */
EXTERN int Tcl_RegExpMatchObj(Tcl_Interp *interp,
Tcl_Obj *textObj, Tcl_Obj *patternObj);
@@ -1304,7 +1304,7 @@ EXTERN int Tcl_AttemptSetObjLength(Tcl_Obj *objPtr, int length);
/* 433 */
EXTERN Tcl_ThreadId Tcl_GetChannelThread(Tcl_Channel channel);
/* 434 */
-EXTERN Tcl_UniChar * Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr,
+EXTERN unsigned short * Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr,
int *lengthPtr);
/* 435 */
TCL_DEPRECATED("")
@@ -2376,7 +2376,7 @@ typedef struct TclStubs {
int (*tcl_GetUniChar) (Tcl_Obj *objPtr, int index); /* 381 */
TCL_DEPRECATED_API("No longer in use, changed to macro") unsigned short * (*tcl_GetUnicode) (Tcl_Obj *objPtr); /* 382 */
Tcl_Obj * (*tcl_GetRange) (Tcl_Obj *objPtr, int first, int last); /* 383 */
- TCL_DEPRECATED_API("Use Tcl_AppendStringsToObj") void (*tcl_AppendUnicodeToObj) (Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int length); /* 384 */
+ TCL_DEPRECATED_API("Use Tcl_AppendStringsToObj") void (*tcl_AppendUnicodeToObj) (Tcl_Obj *objPtr, const unsigned short *unicode, int length); /* 384 */
int (*tcl_RegExpMatchObj) (Tcl_Interp *interp, Tcl_Obj *textObj, Tcl_Obj *patternObj); /* 385 */
void (*tcl_SetNotifier) (const Tcl_NotifierProcs *notifierProcPtr); /* 386 */
Tcl_Mutex * (*tcl_GetAllocMutex) (void); /* 387 */
@@ -2426,7 +2426,7 @@ typedef struct TclStubs {
char * (*tcl_AttemptDbCkrealloc) (char *ptr, unsigned int size, const char *file, int line); /* 431 */
int (*tcl_AttemptSetObjLength) (Tcl_Obj *objPtr, int length); /* 432 */
Tcl_ThreadId (*tcl_GetChannelThread) (Tcl_Channel channel); /* 433 */
- Tcl_UniChar * (*tcl_GetUnicodeFromObj) (Tcl_Obj *objPtr, int *lengthPtr); /* 434 */
+ unsigned short * (*tcl_GetUnicodeFromObj) (Tcl_Obj *objPtr, int *lengthPtr); /* 434 */
TCL_DEPRECATED_API("") int (*tcl_GetMathFuncInfo) (Tcl_Interp *interp, const char *name, int *numArgsPtr, Tcl_ValueType **argTypesPtr, Tcl_MathProc **procPtr, ClientData *clientDataPtr); /* 435 */
TCL_DEPRECATED_API("") Tcl_Obj * (*tcl_ListMathFuncs) (Tcl_Interp *interp, const char *pattern); /* 436 */
Tcl_Obj * (*tcl_SubstObj) (Tcl_Interp *interp, Tcl_Obj *objPtr, int flags); /* 437 */
diff --git a/generic/tclExecute.c b/generic/tclExecute.c
index 7cc002f..7a1025d 100644
--- a/generic/tclExecute.c
+++ b/generic/tclExecute.c
@@ -5495,12 +5495,12 @@ TEBCresume(
objResultPtr = value3Ptr;
goto doneStringMap;
}
- ustring1 = Tcl_GetUnicodeFromObj(valuePtr, &length);
+ ustring1 = TclGetUnicodeFromObj_(valuePtr, &length);
if (length == 0) {
objResultPtr = valuePtr;
goto doneStringMap;
}
- ustring2 = Tcl_GetUnicodeFromObj(value2Ptr, &length2);
+ ustring2 = TclGetUnicodeFromObj_(value2Ptr, &length2);
if (length2 > length || length2 == 0) {
objResultPtr = valuePtr;
goto doneStringMap;
@@ -5512,7 +5512,7 @@ TEBCresume(
}
goto doneStringMap;
}
- ustring3 = Tcl_GetUnicodeFromObj(value3Ptr, &length3);
+ ustring3 = TclGetUnicodeFromObj_(value3Ptr, &length3);
objResultPtr = TclNewUnicodeObj(ustring1, 0);
p = ustring1;
@@ -5524,14 +5524,14 @@ TEBCresume(
memcmp(ustring1, ustring2, sizeof(Tcl_UniChar) * length2)
== 0)) {
if (p != ustring1) {
- Tcl_AppendUnicodeToObj(objResultPtr, p, ustring1-p);
+ TclAppendUnicodeToObj(objResultPtr, p, ustring1-p);
p = ustring1 + length2;
} else {
p += length2;
}
ustring1 = p - 1;
- Tcl_AppendUnicodeToObj(objResultPtr, ustring3, length3);
+ TclAppendUnicodeToObj(objResultPtr, ustring3, length3);
}
}
if (p != ustring1) {
@@ -5539,7 +5539,7 @@ TEBCresume(
* Put the rest of the unmapped chars onto result.
*/
- Tcl_AppendUnicodeToObj(objResultPtr, p, ustring1 - p);
+ TclAppendUnicodeToObj(objResultPtr, p, ustring1 - p);
}
doneStringMap:
TRACE_WITH_OBJ(("%.20s %.20s %.20s => ",
@@ -5565,7 +5565,7 @@ TEBCresume(
valuePtr = OBJ_AT_TOS;
TRACE(("%s \"%.30s\" => ", tclStringClassTable[opnd].name,
O2S(valuePtr)));
- ustring1 = Tcl_GetUnicodeFromObj(valuePtr, &length);
+ ustring1 = TclGetUnicodeFromObj_(valuePtr, &length);
match = 1;
if (length > 0) {
int ch;
@@ -5596,8 +5596,8 @@ TEBCresume(
|| TclHasInternalRep(value2Ptr, &tclStringType)) {
Tcl_UniChar *ustring1, *ustring2;
- ustring1 = Tcl_GetUnicodeFromObj(valuePtr, &length);
- ustring2 = Tcl_GetUnicodeFromObj(value2Ptr, &length2);
+ ustring1 = TclGetUnicodeFromObj_(valuePtr, &length);
+ ustring2 = TclGetUnicodeFromObj_(value2Ptr, &length2);
match = TclUniCharMatch(ustring1, length, ustring2, length2,
nocase);
} else if (TclIsPureByteArray(valuePtr) && !nocase) {
diff --git a/generic/tclRegexp.c b/generic/tclRegexp.c
index 8e588ac..bb8a6ad 100644
--- a/generic/tclRegexp.c
+++ b/generic/tclRegexp.c
@@ -482,7 +482,7 @@ Tcl_RegExpExecObj(
regexpPtr->string = NULL;
regexpPtr->objPtr = textObj;
- udata = Tcl_GetUnicodeFromObj(textObj, &length);
+ udata = TclGetUnicodeFromObj_(textObj, &length);
if (offset > length) {
offset = length;
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index 972eef7..a723586 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -80,14 +80,41 @@ static void UpdateStringOfString(Tcl_Obj *objPtr);
* functions that can be invoked by generic object code.
*/
-const Tcl_ObjType tclStringType = {
- "string", /* name */
+static const Tcl_ObjType utf32StringType = {
+ "utf32string", /* name */
FreeStringInternalRep, /* freeIntRepPro */
DupStringInternalRep, /* dupIntRepProc */
UpdateStringOfString, /* updateStringProc */
SetStringFromAny /* setFromAnyProc */
};
+typedef struct {
+ int numChars; /* The number of chars in the string. */
+ int allocated; /* The amount of space actually allocated for
+ * the UTF-16 string (minus 1 byte for the
+ * termination char). */
+ int maxChars; /* Max number of chars that can fit in the
+ * space allocated for the UTF-16 array. */
+ int hasUnicode; /* Boolean determining whether the string has
+ * a UTF-16 representation. Always 1 */
+ unsigned short unicode[TCLFLEXARRAY]; /* The array of Unicode chars. The actual size
+ * of this field depends on the 'maxChars'
+ * field above. */
+} UTF16String;
+
+const Tcl_ObjType tclStringType = {
+ "string", /* name */
+ FreeStringInternalRep, /* freeIntRepPro */
+#if 0
+ /* TODO JN */
+ DupUTF16StringInternalRep, /* dupIntRepProc */
+ UpdateUTF16StringOfString, /* updateStringProc */
+ SetUTF16StringFromAny /* setFromAnyProc */
+#endif
+ NULL, NULL, NULL
+};
+
+
/*
* TCL STRING GROWTH ALGORITHM
*
@@ -656,8 +683,8 @@ Tcl_GetUnicode(
*----------------------------------------------------------------------
*/
-Tcl_UniChar *
-Tcl_GetUnicodeFromObj(
+int *
+TclGetUnicodeFromObj_(
Tcl_Obj *objPtr, /* The object to find the unicode string
* for. */
int *lengthPtr) /* If non-NULL, the location where the string
@@ -679,6 +706,22 @@ Tcl_GetUnicodeFromObj(
}
return stringPtr->unicode;
}
+
+unsigned short *
+Tcl_GetUnicodeFromObj(
+ Tcl_Obj *objPtr, /* The object to find the unicode string
+ * for. */
+ int *lengthPtr) /* If non-NULL, the location where the string
+ * rep's unichar length should be stored. If
+ * NULL, no length is stored. */
+{
+ (void)objPtr;
+ (void)lengthPtr;
+
+ /* TODO JN */
+ return NULL;
+}
+
unsigned short *
TclGetUnicodeFromObj(
Tcl_Obj *objPtr, /* The object to find the unicode string
@@ -1142,7 +1185,7 @@ SetUnicodeObj(
stringCheckLimits(numChars);
stringPtr = stringAlloc(numChars);
SET_STRING(objPtr, stringPtr);
- objPtr->typePtr = &tclStringType;
+ objPtr->typePtr = &utf32StringType;
stringPtr->maxChars = numChars;
memcpy(stringPtr->unicode, unicode, numChars * sizeof(Tcl_UniChar));
@@ -1230,7 +1273,7 @@ Tcl_AppendLimitedToObj(
/* If appended string starts with a continuation byte or a lower surrogate,
* force objPtr to unicode representation. See [7f1162a867] */
if (bytes && ISCONTINUATION(bytes)) {
- Tcl_GetUnicodeFromObj(objPtr, NULL);
+ TclGetUnicodeFromObj_(objPtr, NULL);
stringPtr = GET_STRING(objPtr);
}
if (stringPtr->hasUnicode && stringPtr->numChars > 0) {
@@ -1298,9 +1341,9 @@ Tcl_AppendToObj(
*/
void
-Tcl_AppendUnicodeToObj(
+TclAppendUnicodeToObj(
Tcl_Obj *objPtr, /* Points to the object to append to. */
- const Tcl_UniChar *unicode, /* The unicode string to append to the
+ const int *unicode, /* The unicode string to append to the
* object. */
int length) /* Number of chars in "unicode". */
{
@@ -1330,6 +1373,20 @@ Tcl_AppendUnicodeToObj(
}
}
+void
+Tcl_AppendUnicodeToObj(
+ Tcl_Obj *objPtr, /* Points to the object to append to. */
+ const unsigned short *unicode, /* The unicode string to append to the
+ * object. */
+ int length) /* Number of chars in "unicode". */
+{
+ (void)objPtr;
+ (void)unicode;
+ (void)length;
+
+ /* TODO JN */
+}
+
/*
*----------------------------------------------------------------------
*
@@ -1434,7 +1491,7 @@ Tcl_AppendObjToObj(
* force objPtr to unicode representation. See [7f1162a867]
* This fixes append-3.4, append-3.7 and utf-1.18 testcases. */
if (ISCONTINUATION(TclGetString(appendObjPtr))) {
- Tcl_GetUnicodeFromObj(objPtr, NULL);
+ TclGetUnicodeFromObj_(objPtr, NULL);
stringPtr = GET_STRING(objPtr);
}
/*
@@ -1447,9 +1504,9 @@ Tcl_AppendObjToObj(
* If appendObjPtr is not of the "String" type, don't convert it.
*/
- if (TclHasInternalRep(appendObjPtr, &tclStringType)) {
+ if (TclHasInternalRep(appendObjPtr, &utf32StringType)) {
Tcl_UniChar *unicode =
- Tcl_GetUnicodeFromObj(appendObjPtr, &numChars);
+ TclGetUnicodeFromObj_(appendObjPtr, &numChars);
AppendUnicodeToUnicodeRep(objPtr, unicode, numChars);
} else {
@@ -1468,7 +1525,7 @@ Tcl_AppendObjToObj(
bytes = TclGetStringFromObj(appendObjPtr, &length);
numChars = stringPtr->numChars;
- if ((numChars >= 0) && TclHasInternalRep(appendObjPtr, &tclStringType)) {
+ if ((numChars >= 0) && TclHasInternalRep(appendObjPtr, &utf32StringType)) {
String *appendStringPtr = GET_STRING(appendObjPtr);
appendNumChars = appendStringPtr->numChars;
@@ -2877,7 +2934,7 @@ TclGetStringStorage(
{
String *stringPtr;
- if (!TclHasInternalRep(objPtr, &tclStringType) || objPtr->bytes == NULL) {
+ if (!TclHasInternalRep(objPtr, &utf32StringType) || objPtr->bytes == NULL) {
return TclGetStringFromObj(objPtr, (int *)sizePtr);
}
@@ -2925,7 +2982,7 @@ TclStringRepeat(
*/
if (!binary) {
- if (TclHasInternalRep(objPtr, &tclStringType)) {
+ if (TclHasInternalRep(objPtr, &utf32StringType)) {
String *stringPtr = GET_STRING(objPtr);
if (stringPtr->hasUnicode) {
unichar = 1;
@@ -2938,7 +2995,7 @@ TclStringRepeat(
Tcl_GetByteArrayFromObj(objPtr, &length);
} else if (unichar) {
/* Result will be pure Tcl_UniChar array. Pre-size it. */
- Tcl_GetUnicodeFromObj(objPtr, &length);
+ TclGetUnicodeFromObj_(objPtr, &length);
} else {
/* Result will be concat of string reps. Pre-size it. */
Tcl_GetStringFromObj(objPtr, &length);
@@ -2978,7 +3035,7 @@ TclStringRepeat(
*/
if (!inPlace || Tcl_IsShared(objPtr)) {
- objResultPtr = TclNewUnicodeObj(Tcl_GetUnicodeFromObj(objPtr, NULL), length);
+ objResultPtr = TclNewUnicodeObj(TclGetUnicodeFromObj_(objPtr, NULL), length);
} else {
TclInvalidateStringRep(objPtr);
objResultPtr = objPtr;
@@ -2999,7 +3056,7 @@ TclStringRepeat(
Tcl_AppendObjToObj(objResultPtr, objResultPtr);
done *= 2;
}
- Tcl_AppendUnicodeToObj(objResultPtr, Tcl_GetUnicodeFromObj(objResultPtr, NULL),
+ TclAppendUnicodeToObj(objResultPtr, TclGetUnicodeFromObj_(objResultPtr, NULL),
(count - done) * length);
} else {
/*
@@ -3096,7 +3153,7 @@ TclStringCat(
binary = 0;
if (ov > objv+1 && ISCONTINUATION(TclGetString(objPtr))) {
forceUniChar = 1;
- } else if ((objPtr->typePtr) && (objPtr->typePtr != &tclStringType)) {
+ } else if ((objPtr->typePtr) && (objPtr->typePtr != &utf32StringType)) {
/* Prevent shimmer of non-string types. */
allowUniChar = 0;
}
@@ -3104,7 +3161,7 @@ TclStringCat(
} else {
/* assert (objPtr->typePtr != NULL) -- stork! */
binary = 0;
- if (TclHasInternalRep(objPtr, &tclStringType)) {
+ if (TclHasInternalRep(objPtr, &utf32StringType)) {
/* Have a pure Unicode value; ask to preserve it */
requestUniChar = 1;
} else {
@@ -3158,7 +3215,7 @@ TclStringCat(
if ((objPtr->bytes == NULL) || (objPtr->length)) {
int numChars;
- Tcl_GetUnicodeFromObj(objPtr, &numChars); /* PANIC? */
+ TclGetUnicodeFromObj_(objPtr, &numChars); /* PANIC? */
if (numChars) {
last = objc - oc;
if (length == 0) {
@@ -3308,7 +3365,7 @@ TclStringCat(
objResultPtr = *objv++; objc--;
/* Ugly interface! Force resize of the unicode array. */
- Tcl_GetUnicodeFromObj(objResultPtr, &start);
+ TclGetUnicodeFromObj_(objResultPtr, &start);
Tcl_InvalidateStringRep(objResultPtr);
if (0 == Tcl_AttemptSetObjLength(objResultPtr, length)) {
if (interp) {
@@ -3320,7 +3377,7 @@ TclStringCat(
}
return NULL;
}
- dst = Tcl_GetUnicodeFromObj(objResultPtr, NULL) + start;
+ dst = TclGetUnicodeFromObj_(objResultPtr, NULL) + start;
} else {
Tcl_UniChar ch = 0;
@@ -3337,14 +3394,14 @@ TclStringCat(
}
return NULL;
}
- dst = Tcl_GetUnicodeFromObj(objResultPtr, NULL);
+ dst = TclGetUnicodeFromObj_(objResultPtr, NULL);
}
while (objc--) {
Tcl_Obj *objPtr = *objv++;
if ((objPtr->bytes == NULL) || (objPtr->length)) {
int more;
- Tcl_UniChar *src = Tcl_GetUnicodeFromObj(objPtr, &more);
+ Tcl_UniChar *src = TclGetUnicodeFromObj_(objPtr, &more);
memcpy(dst, src, more * sizeof(Tcl_UniChar));
dst += more;
}
@@ -3457,8 +3514,8 @@ TclStringCmp(
s1 = (char *) Tcl_GetByteArrayFromObj(value1Ptr, &s1len);
s2 = (char *) Tcl_GetByteArrayFromObj(value2Ptr, &s2len);
memCmpFn = memcmp;
- } else if (TclHasInternalRep(value1Ptr, &tclStringType)
- && TclHasInternalRep(value2Ptr, &tclStringType)) {
+ } else if (TclHasInternalRep(value1Ptr, &utf32StringType)
+ && TclHasInternalRep(value2Ptr, &utf32StringType)) {
/*
* Do a unicode-specific comparison if both of the args are of
* String type. If the char length == byte length, we can do a
@@ -3467,8 +3524,8 @@ TclStringCmp(
*/
if (nocase) {
- s1 = (char *) Tcl_GetUnicodeFromObj(value1Ptr, &s1len);
- s2 = (char *) Tcl_GetUnicodeFromObj(value2Ptr, &s2len);
+ s1 = (char *) TclGetUnicodeFromObj_(value1Ptr, &s1len);
+ s2 = (char *) TclGetUnicodeFromObj_(value2Ptr, &s2len);
memCmpFn = (memCmpFn_t)(void *)TclUniCharNcasecmp;
} else {
s1len = Tcl_GetCharLength(value1Ptr);
@@ -3481,8 +3538,8 @@ TclStringCmp(
s2 = value2Ptr->bytes;
memCmpFn = memcmp;
} else {
- s1 = (char *) Tcl_GetUnicodeFromObj(value1Ptr, NULL);
- s2 = (char *) Tcl_GetUnicodeFromObj(value2Ptr, NULL);
+ s1 = (char *) TclGetUnicodeFromObj_(value1Ptr, NULL);
+ s2 = (char *) TclGetUnicodeFromObj_(value2Ptr, NULL);
if (
#if defined(WORDS_BIGENDIAN) && (TCL_UTF_MAX > 3)
1
@@ -3680,8 +3737,8 @@ TclStringFirst(
* do only the well-defined Tcl_UniChar array search.
*/
- un = Tcl_GetUnicodeFromObj(needle, &ln);
- uh = Tcl_GetUnicodeFromObj(haystack, &lh);
+ un = TclGetUnicodeFromObj_(needle, &ln);
+ uh = TclGetUnicodeFromObj_(haystack, &lh);
if ((lh < ln) || (start > lh - ln)) {
/* Don't start the loop if there cannot be a valid answer */
goto firstEnd;
@@ -3763,8 +3820,8 @@ TclStringLast(
goto lastEnd;
}
- uh = Tcl_GetUnicodeFromObj(haystack, &lh);
- un = Tcl_GetUnicodeFromObj(needle, &ln);
+ uh = TclGetUnicodeFromObj_(haystack, &lh);
+ un = TclGetUnicodeFromObj_(needle, &ln);
if (last >= lh) {
last = lh - 1;
@@ -3856,7 +3913,7 @@ TclStringReverse(
stringPtr = GET_STRING(objPtr);
if (stringPtr->hasUnicode) {
- Tcl_UniChar *from = Tcl_GetUnicodeFromObj(objPtr, NULL);
+ Tcl_UniChar *from = TclGetUnicodeFromObj_(objPtr, NULL);
stringPtr = GET_STRING(objPtr);
Tcl_UniChar *src = from + stringPtr->numChars;
Tcl_UniChar *to;
@@ -3869,7 +3926,7 @@ TclStringReverse(
objPtr = TclNewUnicodeObj(&ch, 1);
Tcl_SetObjLength(objPtr, stringPtr->numChars);
- to = Tcl_GetUnicodeFromObj(objPtr, NULL);
+ to = TclGetUnicodeFromObj_(objPtr, NULL);
stringPtr = GET_STRING(objPtr);
while (--src >= from) {
#if TCL_UTF_MAX < 4
@@ -4099,7 +4156,7 @@ TclStringReplace(
/* The traditional implementation... */
{
int numChars;
- Tcl_UniChar *ustring = Tcl_GetUnicodeFromObj(objPtr, &numChars);
+ Tcl_UniChar *ustring = TclGetUnicodeFromObj_(objPtr, &numChars);
/* TODO: Is there an in-place option worth pursuing here? */
@@ -4108,7 +4165,7 @@ TclStringReplace(
Tcl_AppendObjToObj(result, insertPtr);
}
if (first + count < numChars) {
- Tcl_AppendUnicodeToObj(result, ustring + first + count,
+ TclAppendUnicodeToObj(result, ustring + first + count,
numChars - first - count);
}
@@ -4267,7 +4324,7 @@ DupStringInternalRep(
copyStringPtr->allocated = copyPtr->bytes ? copyPtr->length : 0;
SET_STRING(copyPtr, copyStringPtr);
- copyPtr->typePtr = &tclStringType;
+ copyPtr->typePtr = &utf32StringType;
}
/*
@@ -4292,7 +4349,7 @@ SetStringFromAny(
TCL_UNUSED(Tcl_Interp *),
Tcl_Obj *objPtr) /* The object to convert. */
{
- if (!TclHasInternalRep(objPtr, &tclStringType)) {
+ if (!TclHasInternalRep(objPtr, &utf32StringType)) {
String *stringPtr = stringAlloc(0);
/*
@@ -4312,7 +4369,7 @@ SetStringFromAny(
stringPtr->maxChars = 0;
stringPtr->hasUnicode = 0;
SET_STRING(objPtr, stringPtr);
- objPtr->typePtr = &tclStringType;
+ objPtr->typePtr = &utf32StringType;
}
return TCL_OK;
}
diff --git a/generic/tclTestObj.c b/generic/tclTestObj.c
index 9814cfe..e99d4c1 100644
--- a/generic/tclTestObj.c
+++ b/generic/tclTestObj.c
@@ -1076,6 +1076,7 @@ TestobjCmd(
#ifndef TCL_WIDE_INT_IS_LONG
if (!strcmp(typeName, "wideInt")) typeName = "int";
#endif
+ if (!strcmp(typeName, "utf32string")) typeName = "string";
Tcl_SetObjResult(interp, Tcl_NewStringObj(typeName, -1));
}
} else if (strcmp(subCmd, "refcount") == 0) {
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index 66d1009..9cc82cb 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -2594,8 +2594,8 @@ TclStringMatchObj(
if (TclHasInternalRep(strObj, &tclStringType) || (strObj->typePtr == NULL)) {
Tcl_UniChar *udata, *uptn;
- udata = Tcl_GetUnicodeFromObj(strObj, &length);
- uptn = Tcl_GetUnicodeFromObj(ptnObj, &plen);
+ udata = TclGetUnicodeFromObj_(strObj, &length);
+ uptn = TclGetUnicodeFromObj_(ptnObj, &plen);
match = TclUniCharMatch(udata, length, uptn, plen, flags);
} else if (TclIsPureByteArray(strObj) && TclIsPureByteArray(ptnObj)
&& !flags) {
diff --git a/tests/string.test b/tests/string.test
index 203d0c6..9cac73d 100644
--- a/tests/string.test
+++ b/tests/string.test
@@ -422,7 +422,7 @@ test string-4.16.$noComp {string first, normal string vs pure unicode string} -b
# Representation checks are canaries
run {list [representationpoke $s] [representationpoke $m] \
[string first $m $s]}
-} -result {{string 1} {string 0} 2}
+} -result {{utf32string 1} {utf32string 0} 2}
test string-4.17.$noComp {string first, corner case} -body {
run {string first a aaa 4294967295}
} -result {-1}