diff options
| author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2022-03-12 23:21:12 (GMT) |
|---|---|---|
| committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2022-03-12 23:21:12 (GMT) |
| commit | d05ac0b98bbc039639b7784e313620b8d3757ddf (patch) | |
| tree | 1cf8c03e114c6f7dc85fc732630a63128be5deaa | |
| parent | e709f7c5392a8506414be6727a3f8b6bdd7fbae4 (diff) | |
| download | tcl-d05ac0b98bbc039639b7784e313620b8d3757ddf.zip tcl-d05ac0b98bbc039639b7784e313620b8d3757ddf.tar.gz tcl-d05ac0b98bbc039639b7784e313620b8d3757ddf.tar.bz2 | |
Start defining "utf32string" type
| -rw-r--r-- | generic/tcl.decls | 4 | ||||
| -rw-r--r-- | generic/tclCmdMZ.c | 62 | ||||
| -rw-r--r-- | generic/tclDecls.h | 8 | ||||
| -rw-r--r-- | generic/tclExecute.c | 18 | ||||
| -rw-r--r-- | generic/tclRegexp.c | 2 | ||||
| -rw-r--r-- | generic/tclStringObj.c | 139 | ||||
| -rw-r--r-- | generic/tclTestObj.c | 1 | ||||
| -rw-r--r-- | generic/tclUtil.c | 4 | ||||
| -rw-r--r-- | tests/string.test | 2 |
9 files changed, 149 insertions, 91 deletions
diff --git a/generic/tcl.decls b/generic/tcl.decls index 9c83e81..6dbb457 100644 --- a/generic/tcl.decls +++ b/generic/tcl.decls @@ -1357,7 +1357,7 @@ declare 383 { Tcl_Obj *Tcl_GetRange(Tcl_Obj *objPtr, int first, int last) } declare 384 {deprecated {Use Tcl_AppendStringsToObj}} { - void Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr, const Tcl_UniChar *unicode, + void Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr, const unsigned short *unicode, int length) } declare 385 { @@ -1541,7 +1541,7 @@ declare 433 { # introduced in 8.4a3 declare 434 { - Tcl_UniChar *Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr, int *lengthPtr) + unsigned short *Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr, int *lengthPtr) } # TIP#15 (math function introspection) dkf diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index 11b383f..db4002a 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -606,9 +606,9 @@ Tcl_RegsubObjCmd( nocase = (cflags & TCL_REG_NOCASE); strCmpFn = nocase ? TclUniCharNcasecmp : TclUniCharNcmp; - wsrc = Tcl_GetUnicodeFromObj(objv[0], &slen); - wstring = Tcl_GetUnicodeFromObj(objv[1], &wlen); - wsubspec = Tcl_GetUnicodeFromObj(objv[2], &wsublen); + wsrc = TclGetUnicodeFromObj_(objv[0], &slen); + wstring = TclGetUnicodeFromObj_(objv[1], &wlen); + wsubspec = TclGetUnicodeFromObj_(objv[2], &wsublen); wend = wstring + wlen - (slen ? slen - 1 : 0); result = TCL_OK; @@ -622,8 +622,8 @@ Tcl_RegsubObjCmd( resultPtr = TclNewUnicodeObj(wstring, 0); Tcl_IncrRefCount(resultPtr); for (; wstring < wend; wstring++) { - Tcl_AppendUnicodeToObj(resultPtr, wsubspec, wsublen); - Tcl_AppendUnicodeToObj(resultPtr, wstring, 1); + TclAppendUnicodeToObj(resultPtr, wsubspec, wsublen); + TclAppendUnicodeToObj(resultPtr, wstring, 1); numMatches++; } wlen = 0; @@ -640,14 +640,14 @@ Tcl_RegsubObjCmd( Tcl_IncrRefCount(resultPtr); } if (p != wstring) { - Tcl_AppendUnicodeToObj(resultPtr, p, wstring - p); + TclAppendUnicodeToObj(resultPtr, p, wstring - p); p = wstring + slen; } else { p += slen; } wstring = p - 1; - Tcl_AppendUnicodeToObj(resultPtr, wsubspec, wsublen); + TclAppendUnicodeToObj(resultPtr, wsubspec, wsublen); numMatches++; } } @@ -699,14 +699,14 @@ Tcl_RegsubObjCmd( } else { objPtr = objv[1]; } - wstring = Tcl_GetUnicodeFromObj(objPtr, &wlen); + wstring = TclGetUnicodeFromObj_(objPtr, &wlen); if (objv[2] == objv[0]) { subPtr = Tcl_DuplicateObj(objv[2]); } else { subPtr = objv[2]; } if (!command) { - wsubspec = Tcl_GetUnicodeFromObj(subPtr, &wsublen); + wsubspec = TclGetUnicodeFromObj_(subPtr, &wsublen); } result = TCL_OK; @@ -750,7 +750,7 @@ Tcl_RegsubObjCmd( * specified. */ - Tcl_AppendUnicodeToObj(resultPtr, wstring, offset); + TclAppendUnicodeToObj(resultPtr, wstring, offset); } } numMatches++; @@ -763,7 +763,7 @@ Tcl_RegsubObjCmd( Tcl_RegExpGetInfo(regExpr, &info); start = info.matches[0].start; end = info.matches[0].end; - Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, start); + TclAppendUnicodeToObj(resultPtr, wstring + offset, start); /* * In command-prefix mode, the substitutions are added as quoted @@ -826,7 +826,7 @@ Tcl_RegsubObjCmd( * the user code. */ - wstring = Tcl_GetUnicodeFromObj(objPtr, &wlen); + wstring = TclGetUnicodeFromObj_(objPtr, &wlen); offset += end; if (end == 0 || start == end) { @@ -838,7 +838,7 @@ Tcl_RegsubObjCmd( */ if (offset < wlen) { - Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, 1); + TclAppendUnicodeToObj(resultPtr, wstring + offset, 1); } offset++; } @@ -867,7 +867,7 @@ Tcl_RegsubObjCmd( idx = ch - '0'; } else if ((ch == '\\') || (ch == '&')) { *wsrc = ch; - Tcl_AppendUnicodeToObj(resultPtr, wfirstChar, + TclAppendUnicodeToObj(resultPtr, wfirstChar, wsrc - wfirstChar + 1); *wsrc = '\\'; wfirstChar = wsrc + 2; @@ -881,7 +881,7 @@ Tcl_RegsubObjCmd( } if (wfirstChar != wsrc) { - Tcl_AppendUnicodeToObj(resultPtr, wfirstChar, + TclAppendUnicodeToObj(resultPtr, wfirstChar, wsrc - wfirstChar); } @@ -889,7 +889,7 @@ Tcl_RegsubObjCmd( subStart = info.matches[idx].start; subEnd = info.matches[idx].end; if ((subStart >= 0) && (subEnd >= 0)) { - Tcl_AppendUnicodeToObj(resultPtr, + TclAppendUnicodeToObj(resultPtr, wstring + offset + subStart, subEnd - subStart); } } @@ -901,7 +901,7 @@ Tcl_RegsubObjCmd( } if (wfirstChar != wsrc) { - Tcl_AppendUnicodeToObj(resultPtr, wfirstChar, wsrc - wfirstChar); + TclAppendUnicodeToObj(resultPtr, wfirstChar, wsrc - wfirstChar); } if (end == 0) { @@ -911,7 +911,7 @@ Tcl_RegsubObjCmd( */ if (offset < wlen) { - Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, 1); + TclAppendUnicodeToObj(resultPtr, wstring + offset, 1); } offset++; } else { @@ -923,7 +923,7 @@ Tcl_RegsubObjCmd( */ if (offset < wlen) { - Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, 1); + TclAppendUnicodeToObj(resultPtr, wstring + offset, 1); } offset++; } @@ -948,7 +948,7 @@ Tcl_RegsubObjCmd( resultPtr = objv[1]; Tcl_IncrRefCount(resultPtr); } else if (offset < wlen) { - Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, wlen - offset); + TclAppendUnicodeToObj(resultPtr, wstring + offset, wlen - offset); } if (objc == 4) { if (Tcl_ObjSetVar2(interp, objv[3], NULL, resultPtr, @@ -2060,7 +2060,7 @@ StringMapCmd( } else { sourceObj = objv[objc-1]; } - ustring1 = Tcl_GetUnicodeFromObj(sourceObj, &length1); + ustring1 = TclGetUnicodeFromObj_(sourceObj, &length1); if (length1 == 0) { /* * Empty input string, just stop now. @@ -2089,7 +2089,7 @@ StringMapCmd( int mapLen, u2lc; Tcl_UniChar *mapString; - ustring2 = Tcl_GetUnicodeFromObj(mapElemv[0], &length2); + ustring2 = TclGetUnicodeFromObj_(mapElemv[0], &length2); p = ustring1; if ((length2 > length1) || (length2 == 0)) { /* @@ -2098,7 +2098,7 @@ StringMapCmd( ustring1 = end; } else { - mapString = Tcl_GetUnicodeFromObj(mapElemv[1], &mapLen); + mapString = TclGetUnicodeFromObj_(mapElemv[1], &mapLen); u2lc = (nocase ? Tcl_UniCharToLower(*ustring2) : 0); for (; ustring1 < end; ustring1++) { if (((*ustring1 == *ustring2) || @@ -2106,14 +2106,14 @@ StringMapCmd( (length2==1 || strCmpFn(ustring1, ustring2, (unsigned long) length2) == 0)) { if (p != ustring1) { - Tcl_AppendUnicodeToObj(resultPtr, p, ustring1-p); + TclAppendUnicodeToObj(resultPtr, p, ustring1-p); p = ustring1 + length2; } else { p += length2; } ustring1 = p - 1; - Tcl_AppendUnicodeToObj(resultPtr, mapString, mapLen); + TclAppendUnicodeToObj(resultPtr, mapString, mapLen); } } } @@ -2134,7 +2134,7 @@ StringMapCmd( u2lc = (int *)TclStackAlloc(interp, mapElemc * sizeof(int)); } for (index = 0; index < mapElemc; index++) { - mapStrings[index] = Tcl_GetUnicodeFromObj(mapElemv[index], + mapStrings[index] = TclGetUnicodeFromObj_(mapElemv[index], mapLens+index); if (nocase && ((index % 2) == 0)) { u2lc[index/2] = Tcl_UniCharToLower(*mapStrings[index]); @@ -2158,7 +2158,7 @@ StringMapCmd( * Put the skipped chars onto the result first. */ - Tcl_AppendUnicodeToObj(resultPtr, p, ustring1-p); + TclAppendUnicodeToObj(resultPtr, p, ustring1-p); p = ustring1 + length2; } else { p += length2; @@ -2174,7 +2174,7 @@ StringMapCmd( * Append the map value to the unicode string. */ - Tcl_AppendUnicodeToObj(resultPtr, + TclAppendUnicodeToObj(resultPtr, mapStrings[index+1], mapLens[index+1]); break; } @@ -2191,7 +2191,7 @@ StringMapCmd( * Put the rest of the unmapped chars onto result. */ - Tcl_AppendUnicodeToObj(resultPtr, p, ustring1 - p); + TclAppendUnicodeToObj(resultPtr, p, ustring1 - p); } Tcl_SetObjResult(interp, resultPtr); done: @@ -2506,7 +2506,7 @@ StringStartCmd( return TCL_ERROR; } - string = Tcl_GetUnicodeFromObj(objv[1], &length); + string = TclGetUnicodeFromObj_(objv[1], &length); if (TclGetIntForIndexM(interp, objv[2], length-1, &index) != TCL_OK) { return TCL_ERROR; } @@ -2576,7 +2576,7 @@ StringEndCmd( return TCL_ERROR; } - string = Tcl_GetUnicodeFromObj(objv[1], &length); + string = TclGetUnicodeFromObj_(objv[1], &length); if (TclGetIntForIndexM(interp, objv[2], length-1, &index) != TCL_OK) { return TCL_ERROR; } diff --git a/generic/tclDecls.h b/generic/tclDecls.h index 4217e9c..bf15862 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -1159,7 +1159,7 @@ EXTERN Tcl_Obj * Tcl_GetRange(Tcl_Obj *objPtr, int first, int last); /* 384 */ TCL_DEPRECATED("Use Tcl_AppendStringsToObj") void Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr, - const Tcl_UniChar *unicode, int length); + const unsigned short *unicode, int length); /* 385 */ EXTERN int Tcl_RegExpMatchObj(Tcl_Interp *interp, Tcl_Obj *textObj, Tcl_Obj *patternObj); @@ -1304,7 +1304,7 @@ EXTERN int Tcl_AttemptSetObjLength(Tcl_Obj *objPtr, int length); /* 433 */ EXTERN Tcl_ThreadId Tcl_GetChannelThread(Tcl_Channel channel); /* 434 */ -EXTERN Tcl_UniChar * Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr, +EXTERN unsigned short * Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr, int *lengthPtr); /* 435 */ TCL_DEPRECATED("") @@ -2376,7 +2376,7 @@ typedef struct TclStubs { int (*tcl_GetUniChar) (Tcl_Obj *objPtr, int index); /* 381 */ TCL_DEPRECATED_API("No longer in use, changed to macro") unsigned short * (*tcl_GetUnicode) (Tcl_Obj *objPtr); /* 382 */ Tcl_Obj * (*tcl_GetRange) (Tcl_Obj *objPtr, int first, int last); /* 383 */ - TCL_DEPRECATED_API("Use Tcl_AppendStringsToObj") void (*tcl_AppendUnicodeToObj) (Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int length); /* 384 */ + TCL_DEPRECATED_API("Use Tcl_AppendStringsToObj") void (*tcl_AppendUnicodeToObj) (Tcl_Obj *objPtr, const unsigned short *unicode, int length); /* 384 */ int (*tcl_RegExpMatchObj) (Tcl_Interp *interp, Tcl_Obj *textObj, Tcl_Obj *patternObj); /* 385 */ void (*tcl_SetNotifier) (const Tcl_NotifierProcs *notifierProcPtr); /* 386 */ Tcl_Mutex * (*tcl_GetAllocMutex) (void); /* 387 */ @@ -2426,7 +2426,7 @@ typedef struct TclStubs { char * (*tcl_AttemptDbCkrealloc) (char *ptr, unsigned int size, const char *file, int line); /* 431 */ int (*tcl_AttemptSetObjLength) (Tcl_Obj *objPtr, int length); /* 432 */ Tcl_ThreadId (*tcl_GetChannelThread) (Tcl_Channel channel); /* 433 */ - Tcl_UniChar * (*tcl_GetUnicodeFromObj) (Tcl_Obj *objPtr, int *lengthPtr); /* 434 */ + unsigned short * (*tcl_GetUnicodeFromObj) (Tcl_Obj *objPtr, int *lengthPtr); /* 434 */ TCL_DEPRECATED_API("") int (*tcl_GetMathFuncInfo) (Tcl_Interp *interp, const char *name, int *numArgsPtr, Tcl_ValueType **argTypesPtr, Tcl_MathProc **procPtr, ClientData *clientDataPtr); /* 435 */ TCL_DEPRECATED_API("") Tcl_Obj * (*tcl_ListMathFuncs) (Tcl_Interp *interp, const char *pattern); /* 436 */ Tcl_Obj * (*tcl_SubstObj) (Tcl_Interp *interp, Tcl_Obj *objPtr, int flags); /* 437 */ diff --git a/generic/tclExecute.c b/generic/tclExecute.c index 7cc002f..7a1025d 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -5495,12 +5495,12 @@ TEBCresume( objResultPtr = value3Ptr; goto doneStringMap; } - ustring1 = Tcl_GetUnicodeFromObj(valuePtr, &length); + ustring1 = TclGetUnicodeFromObj_(valuePtr, &length); if (length == 0) { objResultPtr = valuePtr; goto doneStringMap; } - ustring2 = Tcl_GetUnicodeFromObj(value2Ptr, &length2); + ustring2 = TclGetUnicodeFromObj_(value2Ptr, &length2); if (length2 > length || length2 == 0) { objResultPtr = valuePtr; goto doneStringMap; @@ -5512,7 +5512,7 @@ TEBCresume( } goto doneStringMap; } - ustring3 = Tcl_GetUnicodeFromObj(value3Ptr, &length3); + ustring3 = TclGetUnicodeFromObj_(value3Ptr, &length3); objResultPtr = TclNewUnicodeObj(ustring1, 0); p = ustring1; @@ -5524,14 +5524,14 @@ TEBCresume( memcmp(ustring1, ustring2, sizeof(Tcl_UniChar) * length2) == 0)) { if (p != ustring1) { - Tcl_AppendUnicodeToObj(objResultPtr, p, ustring1-p); + TclAppendUnicodeToObj(objResultPtr, p, ustring1-p); p = ustring1 + length2; } else { p += length2; } ustring1 = p - 1; - Tcl_AppendUnicodeToObj(objResultPtr, ustring3, length3); + TclAppendUnicodeToObj(objResultPtr, ustring3, length3); } } if (p != ustring1) { @@ -5539,7 +5539,7 @@ TEBCresume( * Put the rest of the unmapped chars onto result. */ - Tcl_AppendUnicodeToObj(objResultPtr, p, ustring1 - p); + TclAppendUnicodeToObj(objResultPtr, p, ustring1 - p); } doneStringMap: TRACE_WITH_OBJ(("%.20s %.20s %.20s => ", @@ -5565,7 +5565,7 @@ TEBCresume( valuePtr = OBJ_AT_TOS; TRACE(("%s \"%.30s\" => ", tclStringClassTable[opnd].name, O2S(valuePtr))); - ustring1 = Tcl_GetUnicodeFromObj(valuePtr, &length); + ustring1 = TclGetUnicodeFromObj_(valuePtr, &length); match = 1; if (length > 0) { int ch; @@ -5596,8 +5596,8 @@ TEBCresume( || TclHasInternalRep(value2Ptr, &tclStringType)) { Tcl_UniChar *ustring1, *ustring2; - ustring1 = Tcl_GetUnicodeFromObj(valuePtr, &length); - ustring2 = Tcl_GetUnicodeFromObj(value2Ptr, &length2); + ustring1 = TclGetUnicodeFromObj_(valuePtr, &length); + ustring2 = TclGetUnicodeFromObj_(value2Ptr, &length2); match = TclUniCharMatch(ustring1, length, ustring2, length2, nocase); } else if (TclIsPureByteArray(valuePtr) && !nocase) { diff --git a/generic/tclRegexp.c b/generic/tclRegexp.c index 8e588ac..bb8a6ad 100644 --- a/generic/tclRegexp.c +++ b/generic/tclRegexp.c @@ -482,7 +482,7 @@ Tcl_RegExpExecObj( regexpPtr->string = NULL; regexpPtr->objPtr = textObj; - udata = Tcl_GetUnicodeFromObj(textObj, &length); + udata = TclGetUnicodeFromObj_(textObj, &length); if (offset > length) { offset = length; diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 972eef7..a723586 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -80,14 +80,41 @@ static void UpdateStringOfString(Tcl_Obj *objPtr); * functions that can be invoked by generic object code. */ -const Tcl_ObjType tclStringType = { - "string", /* name */ +static const Tcl_ObjType utf32StringType = { + "utf32string", /* name */ FreeStringInternalRep, /* freeIntRepPro */ DupStringInternalRep, /* dupIntRepProc */ UpdateStringOfString, /* updateStringProc */ SetStringFromAny /* setFromAnyProc */ }; +typedef struct { + int numChars; /* The number of chars in the string. */ + int allocated; /* The amount of space actually allocated for + * the UTF-16 string (minus 1 byte for the + * termination char). */ + int maxChars; /* Max number of chars that can fit in the + * space allocated for the UTF-16 array. */ + int hasUnicode; /* Boolean determining whether the string has + * a UTF-16 representation. Always 1 */ + unsigned short unicode[TCLFLEXARRAY]; /* The array of Unicode chars. The actual size + * of this field depends on the 'maxChars' + * field above. */ +} UTF16String; + +const Tcl_ObjType tclStringType = { + "string", /* name */ + FreeStringInternalRep, /* freeIntRepPro */ +#if 0 + /* TODO JN */ + DupUTF16StringInternalRep, /* dupIntRepProc */ + UpdateUTF16StringOfString, /* updateStringProc */ + SetUTF16StringFromAny /* setFromAnyProc */ +#endif + NULL, NULL, NULL +}; + + /* * TCL STRING GROWTH ALGORITHM * @@ -656,8 +683,8 @@ Tcl_GetUnicode( *---------------------------------------------------------------------- */ -Tcl_UniChar * -Tcl_GetUnicodeFromObj( +int * +TclGetUnicodeFromObj_( Tcl_Obj *objPtr, /* The object to find the unicode string * for. */ int *lengthPtr) /* If non-NULL, the location where the string @@ -679,6 +706,22 @@ Tcl_GetUnicodeFromObj( } return stringPtr->unicode; } + +unsigned short * +Tcl_GetUnicodeFromObj( + Tcl_Obj *objPtr, /* The object to find the unicode string + * for. */ + int *lengthPtr) /* If non-NULL, the location where the string + * rep's unichar length should be stored. If + * NULL, no length is stored. */ +{ + (void)objPtr; + (void)lengthPtr; + + /* TODO JN */ + return NULL; +} + unsigned short * TclGetUnicodeFromObj( Tcl_Obj *objPtr, /* The object to find the unicode string @@ -1142,7 +1185,7 @@ SetUnicodeObj( stringCheckLimits(numChars); stringPtr = stringAlloc(numChars); SET_STRING(objPtr, stringPtr); - objPtr->typePtr = &tclStringType; + objPtr->typePtr = &utf32StringType; stringPtr->maxChars = numChars; memcpy(stringPtr->unicode, unicode, numChars * sizeof(Tcl_UniChar)); @@ -1230,7 +1273,7 @@ Tcl_AppendLimitedToObj( /* If appended string starts with a continuation byte or a lower surrogate, * force objPtr to unicode representation. See [7f1162a867] */ if (bytes && ISCONTINUATION(bytes)) { - Tcl_GetUnicodeFromObj(objPtr, NULL); + TclGetUnicodeFromObj_(objPtr, NULL); stringPtr = GET_STRING(objPtr); } if (stringPtr->hasUnicode && stringPtr->numChars > 0) { @@ -1298,9 +1341,9 @@ Tcl_AppendToObj( */ void -Tcl_AppendUnicodeToObj( +TclAppendUnicodeToObj( Tcl_Obj *objPtr, /* Points to the object to append to. */ - const Tcl_UniChar *unicode, /* The unicode string to append to the + const int *unicode, /* The unicode string to append to the * object. */ int length) /* Number of chars in "unicode". */ { @@ -1330,6 +1373,20 @@ Tcl_AppendUnicodeToObj( } } +void +Tcl_AppendUnicodeToObj( + Tcl_Obj *objPtr, /* Points to the object to append to. */ + const unsigned short *unicode, /* The unicode string to append to the + * object. */ + int length) /* Number of chars in "unicode". */ +{ + (void)objPtr; + (void)unicode; + (void)length; + + /* TODO JN */ +} + /* *---------------------------------------------------------------------- * @@ -1434,7 +1491,7 @@ Tcl_AppendObjToObj( * force objPtr to unicode representation. See [7f1162a867] * This fixes append-3.4, append-3.7 and utf-1.18 testcases. */ if (ISCONTINUATION(TclGetString(appendObjPtr))) { - Tcl_GetUnicodeFromObj(objPtr, NULL); + TclGetUnicodeFromObj_(objPtr, NULL); stringPtr = GET_STRING(objPtr); } /* @@ -1447,9 +1504,9 @@ Tcl_AppendObjToObj( * If appendObjPtr is not of the "String" type, don't convert it. */ - if (TclHasInternalRep(appendObjPtr, &tclStringType)) { + if (TclHasInternalRep(appendObjPtr, &utf32StringType)) { Tcl_UniChar *unicode = - Tcl_GetUnicodeFromObj(appendObjPtr, &numChars); + TclGetUnicodeFromObj_(appendObjPtr, &numChars); AppendUnicodeToUnicodeRep(objPtr, unicode, numChars); } else { @@ -1468,7 +1525,7 @@ Tcl_AppendObjToObj( bytes = TclGetStringFromObj(appendObjPtr, &length); numChars = stringPtr->numChars; - if ((numChars >= 0) && TclHasInternalRep(appendObjPtr, &tclStringType)) { + if ((numChars >= 0) && TclHasInternalRep(appendObjPtr, &utf32StringType)) { String *appendStringPtr = GET_STRING(appendObjPtr); appendNumChars = appendStringPtr->numChars; @@ -2877,7 +2934,7 @@ TclGetStringStorage( { String *stringPtr; - if (!TclHasInternalRep(objPtr, &tclStringType) || objPtr->bytes == NULL) { + if (!TclHasInternalRep(objPtr, &utf32StringType) || objPtr->bytes == NULL) { return TclGetStringFromObj(objPtr, (int *)sizePtr); } @@ -2925,7 +2982,7 @@ TclStringRepeat( */ if (!binary) { - if (TclHasInternalRep(objPtr, &tclStringType)) { + if (TclHasInternalRep(objPtr, &utf32StringType)) { String *stringPtr = GET_STRING(objPtr); if (stringPtr->hasUnicode) { unichar = 1; @@ -2938,7 +2995,7 @@ TclStringRepeat( Tcl_GetByteArrayFromObj(objPtr, &length); } else if (unichar) { /* Result will be pure Tcl_UniChar array. Pre-size it. */ - Tcl_GetUnicodeFromObj(objPtr, &length); + TclGetUnicodeFromObj_(objPtr, &length); } else { /* Result will be concat of string reps. Pre-size it. */ Tcl_GetStringFromObj(objPtr, &length); @@ -2978,7 +3035,7 @@ TclStringRepeat( */ if (!inPlace || Tcl_IsShared(objPtr)) { - objResultPtr = TclNewUnicodeObj(Tcl_GetUnicodeFromObj(objPtr, NULL), length); + objResultPtr = TclNewUnicodeObj(TclGetUnicodeFromObj_(objPtr, NULL), length); } else { TclInvalidateStringRep(objPtr); objResultPtr = objPtr; @@ -2999,7 +3056,7 @@ TclStringRepeat( Tcl_AppendObjToObj(objResultPtr, objResultPtr); done *= 2; } - Tcl_AppendUnicodeToObj(objResultPtr, Tcl_GetUnicodeFromObj(objResultPtr, NULL), + TclAppendUnicodeToObj(objResultPtr, TclGetUnicodeFromObj_(objResultPtr, NULL), (count - done) * length); } else { /* @@ -3096,7 +3153,7 @@ TclStringCat( binary = 0; if (ov > objv+1 && ISCONTINUATION(TclGetString(objPtr))) { forceUniChar = 1; - } else if ((objPtr->typePtr) && (objPtr->typePtr != &tclStringType)) { + } else if ((objPtr->typePtr) && (objPtr->typePtr != &utf32StringType)) { /* Prevent shimmer of non-string types. */ allowUniChar = 0; } @@ -3104,7 +3161,7 @@ TclStringCat( } else { /* assert (objPtr->typePtr != NULL) -- stork! */ binary = 0; - if (TclHasInternalRep(objPtr, &tclStringType)) { + if (TclHasInternalRep(objPtr, &utf32StringType)) { /* Have a pure Unicode value; ask to preserve it */ requestUniChar = 1; } else { @@ -3158,7 +3215,7 @@ TclStringCat( if ((objPtr->bytes == NULL) || (objPtr->length)) { int numChars; - Tcl_GetUnicodeFromObj(objPtr, &numChars); /* PANIC? */ + TclGetUnicodeFromObj_(objPtr, &numChars); /* PANIC? */ if (numChars) { last = objc - oc; if (length == 0) { @@ -3308,7 +3365,7 @@ TclStringCat( objResultPtr = *objv++; objc--; /* Ugly interface! Force resize of the unicode array. */ - Tcl_GetUnicodeFromObj(objResultPtr, &start); + TclGetUnicodeFromObj_(objResultPtr, &start); Tcl_InvalidateStringRep(objResultPtr); if (0 == Tcl_AttemptSetObjLength(objResultPtr, length)) { if (interp) { @@ -3320,7 +3377,7 @@ TclStringCat( } return NULL; } - dst = Tcl_GetUnicodeFromObj(objResultPtr, NULL) + start; + dst = TclGetUnicodeFromObj_(objResultPtr, NULL) + start; } else { Tcl_UniChar ch = 0; @@ -3337,14 +3394,14 @@ TclStringCat( } return NULL; } - dst = Tcl_GetUnicodeFromObj(objResultPtr, NULL); + dst = TclGetUnicodeFromObj_(objResultPtr, NULL); } while (objc--) { Tcl_Obj *objPtr = *objv++; if ((objPtr->bytes == NULL) || (objPtr->length)) { int more; - Tcl_UniChar *src = Tcl_GetUnicodeFromObj(objPtr, &more); + Tcl_UniChar *src = TclGetUnicodeFromObj_(objPtr, &more); memcpy(dst, src, more * sizeof(Tcl_UniChar)); dst += more; } @@ -3457,8 +3514,8 @@ TclStringCmp( s1 = (char *) Tcl_GetByteArrayFromObj(value1Ptr, &s1len); s2 = (char *) Tcl_GetByteArrayFromObj(value2Ptr, &s2len); memCmpFn = memcmp; - } else if (TclHasInternalRep(value1Ptr, &tclStringType) - && TclHasInternalRep(value2Ptr, &tclStringType)) { + } else if (TclHasInternalRep(value1Ptr, &utf32StringType) + && TclHasInternalRep(value2Ptr, &utf32StringType)) { /* * Do a unicode-specific comparison if both of the args are of * String type. If the char length == byte length, we can do a @@ -3467,8 +3524,8 @@ TclStringCmp( */ if (nocase) { - s1 = (char *) Tcl_GetUnicodeFromObj(value1Ptr, &s1len); - s2 = (char *) Tcl_GetUnicodeFromObj(value2Ptr, &s2len); + s1 = (char *) TclGetUnicodeFromObj_(value1Ptr, &s1len); + s2 = (char *) TclGetUnicodeFromObj_(value2Ptr, &s2len); memCmpFn = (memCmpFn_t)(void *)TclUniCharNcasecmp; } else { s1len = Tcl_GetCharLength(value1Ptr); @@ -3481,8 +3538,8 @@ TclStringCmp( s2 = value2Ptr->bytes; memCmpFn = memcmp; } else { - s1 = (char *) Tcl_GetUnicodeFromObj(value1Ptr, NULL); - s2 = (char *) Tcl_GetUnicodeFromObj(value2Ptr, NULL); + s1 = (char *) TclGetUnicodeFromObj_(value1Ptr, NULL); + s2 = (char *) TclGetUnicodeFromObj_(value2Ptr, NULL); if ( #if defined(WORDS_BIGENDIAN) && (TCL_UTF_MAX > 3) 1 @@ -3680,8 +3737,8 @@ TclStringFirst( * do only the well-defined Tcl_UniChar array search. */ - un = Tcl_GetUnicodeFromObj(needle, &ln); - uh = Tcl_GetUnicodeFromObj(haystack, &lh); + un = TclGetUnicodeFromObj_(needle, &ln); + uh = TclGetUnicodeFromObj_(haystack, &lh); if ((lh < ln) || (start > lh - ln)) { /* Don't start the loop if there cannot be a valid answer */ goto firstEnd; @@ -3763,8 +3820,8 @@ TclStringLast( goto lastEnd; } - uh = Tcl_GetUnicodeFromObj(haystack, &lh); - un = Tcl_GetUnicodeFromObj(needle, &ln); + uh = TclGetUnicodeFromObj_(haystack, &lh); + un = TclGetUnicodeFromObj_(needle, &ln); if (last >= lh) { last = lh - 1; @@ -3856,7 +3913,7 @@ TclStringReverse( stringPtr = GET_STRING(objPtr); if (stringPtr->hasUnicode) { - Tcl_UniChar *from = Tcl_GetUnicodeFromObj(objPtr, NULL); + Tcl_UniChar *from = TclGetUnicodeFromObj_(objPtr, NULL); stringPtr = GET_STRING(objPtr); Tcl_UniChar *src = from + stringPtr->numChars; Tcl_UniChar *to; @@ -3869,7 +3926,7 @@ TclStringReverse( objPtr = TclNewUnicodeObj(&ch, 1); Tcl_SetObjLength(objPtr, stringPtr->numChars); - to = Tcl_GetUnicodeFromObj(objPtr, NULL); + to = TclGetUnicodeFromObj_(objPtr, NULL); stringPtr = GET_STRING(objPtr); while (--src >= from) { #if TCL_UTF_MAX < 4 @@ -4099,7 +4156,7 @@ TclStringReplace( /* The traditional implementation... */ { int numChars; - Tcl_UniChar *ustring = Tcl_GetUnicodeFromObj(objPtr, &numChars); + Tcl_UniChar *ustring = TclGetUnicodeFromObj_(objPtr, &numChars); /* TODO: Is there an in-place option worth pursuing here? */ @@ -4108,7 +4165,7 @@ TclStringReplace( Tcl_AppendObjToObj(result, insertPtr); } if (first + count < numChars) { - Tcl_AppendUnicodeToObj(result, ustring + first + count, + TclAppendUnicodeToObj(result, ustring + first + count, numChars - first - count); } @@ -4267,7 +4324,7 @@ DupStringInternalRep( copyStringPtr->allocated = copyPtr->bytes ? copyPtr->length : 0; SET_STRING(copyPtr, copyStringPtr); - copyPtr->typePtr = &tclStringType; + copyPtr->typePtr = &utf32StringType; } /* @@ -4292,7 +4349,7 @@ SetStringFromAny( TCL_UNUSED(Tcl_Interp *), Tcl_Obj *objPtr) /* The object to convert. */ { - if (!TclHasInternalRep(objPtr, &tclStringType)) { + if (!TclHasInternalRep(objPtr, &utf32StringType)) { String *stringPtr = stringAlloc(0); /* @@ -4312,7 +4369,7 @@ SetStringFromAny( stringPtr->maxChars = 0; stringPtr->hasUnicode = 0; SET_STRING(objPtr, stringPtr); - objPtr->typePtr = &tclStringType; + objPtr->typePtr = &utf32StringType; } return TCL_OK; } diff --git a/generic/tclTestObj.c b/generic/tclTestObj.c index 9814cfe..e99d4c1 100644 --- a/generic/tclTestObj.c +++ b/generic/tclTestObj.c @@ -1076,6 +1076,7 @@ TestobjCmd( #ifndef TCL_WIDE_INT_IS_LONG if (!strcmp(typeName, "wideInt")) typeName = "int"; #endif + if (!strcmp(typeName, "utf32string")) typeName = "string"; Tcl_SetObjResult(interp, Tcl_NewStringObj(typeName, -1)); } } else if (strcmp(subCmd, "refcount") == 0) { diff --git a/generic/tclUtil.c b/generic/tclUtil.c index 66d1009..9cc82cb 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -2594,8 +2594,8 @@ TclStringMatchObj( if (TclHasInternalRep(strObj, &tclStringType) || (strObj->typePtr == NULL)) { Tcl_UniChar *udata, *uptn; - udata = Tcl_GetUnicodeFromObj(strObj, &length); - uptn = Tcl_GetUnicodeFromObj(ptnObj, &plen); + udata = TclGetUnicodeFromObj_(strObj, &length); + uptn = TclGetUnicodeFromObj_(ptnObj, &plen); match = TclUniCharMatch(udata, length, uptn, plen, flags); } else if (TclIsPureByteArray(strObj) && TclIsPureByteArray(ptnObj) && !flags) { diff --git a/tests/string.test b/tests/string.test index 203d0c6..9cac73d 100644 --- a/tests/string.test +++ b/tests/string.test @@ -422,7 +422,7 @@ test string-4.16.$noComp {string first, normal string vs pure unicode string} -b # Representation checks are canaries run {list [representationpoke $s] [representationpoke $m] \ [string first $m $s]} -} -result {{string 1} {string 0} 2} +} -result {{utf32string 1} {utf32string 0} 2} test string-4.17.$noComp {string first, corner case} -body { run {string first a aaa 4294967295} } -result {-1} |
