diff options
-rw-r--r-- | .github/workflows/linux-build.yml | 2 | ||||
-rw-r--r-- | .github/workflows/win-build.yml | 4 | ||||
-rw-r--r-- | generic/tcl.decls | 37 | ||||
-rw-r--r-- | generic/tcl.h | 6 | ||||
-rw-r--r-- | generic/tclBinary.c | 4 | ||||
-rw-r--r-- | generic/tclCmdMZ.c | 120 | ||||
-rw-r--r-- | generic/tclCompCmdsSZ.c | 2 | ||||
-rw-r--r-- | generic/tclDecls.h | 70 | ||||
-rw-r--r-- | generic/tclEncoding.c | 2 | ||||
-rw-r--r-- | generic/tclExecute.c | 40 | ||||
-rw-r--r-- | generic/tclIO.c | 4 | ||||
-rw-r--r-- | generic/tclInt.h | 65 | ||||
-rw-r--r-- | generic/tclObj.c | 2 | ||||
-rw-r--r-- | generic/tclProc.c | 2 | ||||
-rw-r--r-- | generic/tclRegexp.c | 6 | ||||
-rw-r--r-- | generic/tclStringObj.c | 715 | ||||
-rw-r--r-- | generic/tclStringRep.h | 11 | ||||
-rw-r--r-- | generic/tclStubInit.c | 36 | ||||
-rw-r--r-- | generic/tclTest.c | 7 | ||||
-rw-r--r-- | generic/tclTestObj.c | 31 | ||||
-rw-r--r-- | generic/tclUtf.c | 342 | ||||
-rw-r--r-- | generic/tclUtil.c | 6 | ||||
-rw-r--r-- | tests/obj.test | 4 | ||||
-rw-r--r-- | tests/string.test | 2 | ||||
-rw-r--r-- | tests/stringObj.test | 43 | ||||
-rw-r--r-- | tests/utf.test | 46 | ||||
-rw-r--r-- | win/makefile.vc | 4 | ||||
-rw-r--r-- | win/rules.vc | 15 |
28 files changed, 1210 insertions, 418 deletions
diff --git a/.github/workflows/linux-build.yml b/.github/workflows/linux-build.yml index 1549b5f..c365faa 100644 --- a/.github/workflows/linux-build.yml +++ b/.github/workflows/linux-build.yml @@ -7,7 +7,7 @@ jobs: matrix: cfgopt: - "" - - "CFLAGS=-DTCL_UTF_MAX=4" + - "CFLAGS=-DTCL_UTF_MAX=3" - "CFLAGS=-DTCL_NO_DEPRECATED=1" - "--disable-shared" - "--enable-symbols" diff --git a/.github/workflows/win-build.yml b/.github/workflows/win-build.yml index dce303b..5b1b144 100644 --- a/.github/workflows/win-build.yml +++ b/.github/workflows/win-build.yml @@ -13,7 +13,7 @@ jobs: matrix: cfgopt: - "" - - "OPTS=utfmax" + - "OPTS=utf16" - "CHECKS=nodep" - "OPTS=static" - "OPTS=symbols" @@ -52,7 +52,7 @@ jobs: matrix: cfgopt: - "" - - "CFLAGS=-DTCL_UTF_MAX=4" + - "CFLAGS=-DTCL_UTF_MAX=3" - "CFLAGS=-DTCL_NO_DEPRECATED=1" - "--disable-shared" - "--enable-symbols" diff --git a/generic/tcl.decls b/generic/tcl.decls index 3cf794e..f0718c1 100644 --- a/generic/tcl.decls +++ b/generic/tcl.decls @@ -1248,7 +1248,7 @@ declare 352 { int Tcl_Char16Len(const unsigned short *uniStr) } declare 353 {deprecated {Use Tcl_UtfNcmp}} { - int Tcl_UniCharNcmp(const Tcl_UniChar *ucs, const Tcl_UniChar *uct, + int Tcl_UniCharNcmp(const unsigned short *ucs, const unsigned short *uct, unsigned long numChars) } declare 354 { @@ -1338,10 +1338,10 @@ declare 377 { void Tcl_RegExpGetInfo(Tcl_RegExp regexp, Tcl_RegExpInfo *infoPtr) } declare 378 { - Tcl_Obj *Tcl_NewUnicodeObj(const Tcl_UniChar *unicode, int numChars) + Tcl_Obj *Tcl_NewUnicodeObj(const unsigned short *unicode, int numChars) } declare 379 { - void Tcl_SetUnicodeObj(Tcl_Obj *objPtr, const Tcl_UniChar *unicode, + void Tcl_SetUnicodeObj(Tcl_Obj *objPtr, const unsigned short *unicode, int numChars) } declare 380 { @@ -1351,13 +1351,13 @@ declare 381 { int Tcl_GetUniChar(Tcl_Obj *objPtr, int index) } declare 382 {deprecated {No longer in use, changed to macro}} { - Tcl_UniChar *Tcl_GetUnicode(Tcl_Obj *objPtr) + unsigned short *Tcl_GetUnicode(Tcl_Obj *objPtr) } declare 383 { Tcl_Obj *Tcl_GetRange(Tcl_Obj *objPtr, int first, int last) } -declare 384 {deprecated {Use Tcl_AppendStringsToObj}} { - void Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr, const Tcl_UniChar *unicode, +declare 384 { + void Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr, const unsigned short *unicode, int length) } declare 385 { @@ -1483,12 +1483,12 @@ declare 418 { int Tcl_IsChannelExisting(const char *channelName) } declare 419 {deprecated {Use Tcl_UtfNcasecmp}} { - int Tcl_UniCharNcasecmp(const Tcl_UniChar *ucs, const Tcl_UniChar *uct, + int Tcl_UniCharNcasecmp(const unsigned short *ucs, const unsigned short *uct, unsigned long numChars) } declare 420 {deprecated {Use Tcl_StringCaseMatch}} { - int Tcl_UniCharCaseMatch(const Tcl_UniChar *uniStr, - const Tcl_UniChar *uniPattern, int nocase) + int Tcl_UniCharCaseMatch(const unsigned short *uniStr, + const unsigned short *uniPattern, int nocase) } declare 421 { Tcl_HashEntry *Tcl_FindHashEntry(Tcl_HashTable *tablePtr, const void *key) @@ -1541,7 +1541,7 @@ declare 433 { # introduced in 8.4a3 declare 434 { - Tcl_UniChar *Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr, int *lengthPtr) + unsigned short *Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr, int *lengthPtr) } # TIP#15 (math function introspection) dkf @@ -2417,7 +2417,7 @@ declare 651 { char *TclGetStringFromObj(Tcl_Obj *objPtr, size_t *lengthPtr) } declare 652 { - Tcl_UniChar *TclGetUnicodeFromObj(Tcl_Obj *objPtr, size_t *lengthPtr) + unsigned short *TclGetUnicodeFromObj(Tcl_Obj *objPtr, size_t *lengthPtr) } declare 653 { unsigned char *TclGetByteArrayFromObj(Tcl_Obj *objPtr, size_t *numBytesPtr) @@ -2454,6 +2454,21 @@ declare 660 { declare 668 { int Tcl_UniCharLen(const int *uniStr) } +declare 669 { + int TclNumUtfChars(const char *src, int length) +} +declare 670 { + int TclGetCharLength(Tcl_Obj *objPtr) +} +declare 671 { + const char *TclUtfAtIndex(const char *src, int index) +} +declare 672 { + Tcl_Obj *TclGetRange(Tcl_Obj *objPtr, int first, int last) +} +declare 673 { + int TclGetUniChar(Tcl_Obj *objPtr, int index) +} # ----- BASELINE -- FOR -- 8.7.0 ----- # diff --git a/generic/tcl.h b/generic/tcl.h index eff58b3..e6cc4be 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -2142,7 +2142,11 @@ typedef struct Tcl_EncodingType { */ #ifndef TCL_UTF_MAX -#define TCL_UTF_MAX 3 +# ifdef BUILD_tcl +# define TCL_UTF_MAX 4 +# else +# define TCL_UTF_MAX 3 +# endif #endif /* diff --git a/generic/tclBinary.c b/generic/tclBinary.c index 4717b05..bc17232 100644 --- a/generic/tclBinary.c +++ b/generic/tclBinary.c @@ -434,7 +434,7 @@ TclGetBytesFromObj( irPtr = TclFetchInternalRep(objPtr, &tclByteArrayType); baPtr = GET_BYTEARRAY(irPtr); - nonbyte = Tcl_UtfAtIndex(Tcl_GetString(objPtr), baPtr->bad); + nonbyte = TclUtfAtIndex(Tcl_GetString(objPtr), baPtr->bad); TclUtfToUCS4(nonbyte, &ucs4); Tcl_SetObjResult(interp, Tcl_ObjPrintf( @@ -473,7 +473,7 @@ Tcl_GetBytesFromObj( irPtr = TclFetchInternalRep(objPtr, &tclByteArrayType); baPtr = GET_BYTEARRAY(irPtr); - nonbyte = Tcl_UtfAtIndex(Tcl_GetString(objPtr), baPtr->bad); + nonbyte = TclUtfAtIndex(Tcl_GetString(objPtr), baPtr->bad); TclUtfToUCS4(nonbyte, &ucs4); Tcl_SetObjResult(interp, Tcl_ObjPrintf( diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index f394035..29a73cf 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -256,7 +256,7 @@ Tcl_RegexpObjCmd( */ objPtr = objv[1]; - stringLength = Tcl_GetCharLength(objPtr); + stringLength = TclGetCharLength(objPtr); if (startIndex) { TclGetIntForIndexM(interp, startIndex, stringLength, &offset); @@ -310,7 +310,7 @@ Tcl_RegexpObjCmd( eflags = 0; } else if (offset > stringLength) { eflags = TCL_REG_NOTBOL; - } else if (Tcl_GetUniChar(objPtr, offset-1) == '\n') { + } else if (TclGetUniChar(objPtr, offset-1) == '\n') { eflags = 0; } else { eflags = TCL_REG_NOTBOL; @@ -395,7 +395,7 @@ Tcl_RegexpObjCmd( newPtr = Tcl_NewListObj(2, objs); } else { if ((i <= info.nsubs) && (info.matches[i].end > 0)) { - newPtr = Tcl_GetRange(objPtr, + newPtr = TclGetRange(objPtr, offset + info.matches[i].start, offset + info.matches[i].end - 1); } else { @@ -581,7 +581,7 @@ Tcl_RegsubObjCmd( objv += idx; if (startIndex) { - int stringLength = Tcl_GetCharLength(objv[1]); + int stringLength = TclGetCharLength(objv[1]); TclGetIntForIndexM(interp, startIndex, stringLength, &offset); Tcl_DecrRefCount(startIndex); @@ -604,11 +604,11 @@ Tcl_RegsubObjCmd( numMatches = 0; nocase = (cflags & TCL_REG_NOCASE); - strCmpFn = nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp; + strCmpFn = nocase ? TclUniCharNcasecmp : TclUniCharNcmp; - wsrc = Tcl_GetUnicodeFromObj(objv[0], &slen); - wstring = Tcl_GetUnicodeFromObj(objv[1], &wlen); - wsubspec = Tcl_GetUnicodeFromObj(objv[2], &wsublen); + wsrc = TclGetUnicodeFromObj_(objv[0], &slen); + wstring = TclGetUnicodeFromObj_(objv[1], &wlen); + wsubspec = TclGetUnicodeFromObj_(objv[2], &wsublen); wend = wstring + wlen - (slen ? slen - 1 : 0); result = TCL_OK; @@ -619,11 +619,11 @@ Tcl_RegsubObjCmd( */ if (wstring < wend) { - resultPtr = Tcl_NewUnicodeObj(wstring, 0); + resultPtr = TclNewUnicodeObj(wstring, 0); Tcl_IncrRefCount(resultPtr); for (; wstring < wend; wstring++) { - Tcl_AppendUnicodeToObj(resultPtr, wsubspec, wsublen); - Tcl_AppendUnicodeToObj(resultPtr, wstring, 1); + TclAppendUnicodeToObj(resultPtr, wsubspec, wsublen); + TclAppendUnicodeToObj(resultPtr, wstring, 1); numMatches++; } wlen = 0; @@ -636,18 +636,18 @@ Tcl_RegsubObjCmd( (slen==1 || (strCmpFn(wstring, wsrc, (unsigned long) slen) == 0))) { if (numMatches == 0) { - resultPtr = Tcl_NewUnicodeObj(wstring, 0); + resultPtr = TclNewUnicodeObj(wstring, 0); Tcl_IncrRefCount(resultPtr); } if (p != wstring) { - Tcl_AppendUnicodeToObj(resultPtr, p, wstring - p); + TclAppendUnicodeToObj(resultPtr, p, wstring - p); p = wstring + slen; } else { p += slen; } wstring = p - 1; - Tcl_AppendUnicodeToObj(resultPtr, wsubspec, wsublen); + TclAppendUnicodeToObj(resultPtr, wsubspec, wsublen); numMatches++; } } @@ -699,14 +699,14 @@ Tcl_RegsubObjCmd( } else { objPtr = objv[1]; } - wstring = Tcl_GetUnicodeFromObj(objPtr, &wlen); + wstring = TclGetUnicodeFromObj_(objPtr, &wlen); if (objv[2] == objv[0]) { subPtr = Tcl_DuplicateObj(objv[2]); } else { subPtr = objv[2]; } if (!command) { - wsubspec = Tcl_GetUnicodeFromObj(subPtr, &wsublen); + wsubspec = TclGetUnicodeFromObj_(subPtr, &wsublen); } result = TCL_OK; @@ -742,7 +742,7 @@ Tcl_RegsubObjCmd( break; } if (numMatches == 0) { - resultPtr = Tcl_NewUnicodeObj(wstring, 0); + resultPtr = TclNewUnicodeObj(wstring, 0); Tcl_IncrRefCount(resultPtr); if (offset > 0) { /* @@ -750,7 +750,7 @@ Tcl_RegsubObjCmd( * specified. */ - Tcl_AppendUnicodeToObj(resultPtr, wstring, offset); + TclAppendUnicodeToObj(resultPtr, wstring, offset); } } numMatches++; @@ -763,7 +763,7 @@ Tcl_RegsubObjCmd( Tcl_RegExpGetInfo(regExpr, &info); start = info.matches[0].start; end = info.matches[0].end; - Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, start); + TclAppendUnicodeToObj(resultPtr, wstring + offset, start); /* * In command-prefix mode, the substitutions are added as quoted @@ -785,7 +785,7 @@ Tcl_RegsubObjCmd( subStart = info.matches[idx].start; subEnd = info.matches[idx].end; if ((subStart >= 0) && (subEnd >= 0)) { - args[idx + numParts] = Tcl_NewUnicodeObj( + args[idx + numParts] = TclNewUnicodeObj( wstring + offset + subStart, subEnd - subStart); } else { TclNewObj(args[idx + numParts]); @@ -826,7 +826,7 @@ Tcl_RegsubObjCmd( * the user code. */ - wstring = Tcl_GetUnicodeFromObj(objPtr, &wlen); + wstring = TclGetUnicodeFromObj_(objPtr, &wlen); offset += end; if (end == 0 || start == end) { @@ -838,7 +838,7 @@ Tcl_RegsubObjCmd( */ if (offset < wlen) { - Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, 1); + TclAppendUnicodeToObj(resultPtr, wstring + offset, 1); } offset++; } @@ -867,7 +867,7 @@ Tcl_RegsubObjCmd( idx = ch - '0'; } else if ((ch == '\\') || (ch == '&')) { *wsrc = ch; - Tcl_AppendUnicodeToObj(resultPtr, wfirstChar, + TclAppendUnicodeToObj(resultPtr, wfirstChar, wsrc - wfirstChar + 1); *wsrc = '\\'; wfirstChar = wsrc + 2; @@ -881,7 +881,7 @@ Tcl_RegsubObjCmd( } if (wfirstChar != wsrc) { - Tcl_AppendUnicodeToObj(resultPtr, wfirstChar, + TclAppendUnicodeToObj(resultPtr, wfirstChar, wsrc - wfirstChar); } @@ -889,7 +889,7 @@ Tcl_RegsubObjCmd( subStart = info.matches[idx].start; subEnd = info.matches[idx].end; if ((subStart >= 0) && (subEnd >= 0)) { - Tcl_AppendUnicodeToObj(resultPtr, + TclAppendUnicodeToObj(resultPtr, wstring + offset + subStart, subEnd - subStart); } } @@ -901,7 +901,7 @@ Tcl_RegsubObjCmd( } if (wfirstChar != wsrc) { - Tcl_AppendUnicodeToObj(resultPtr, wfirstChar, wsrc - wfirstChar); + TclAppendUnicodeToObj(resultPtr, wfirstChar, wsrc - wfirstChar); } if (end == 0) { @@ -911,7 +911,7 @@ Tcl_RegsubObjCmd( */ if (offset < wlen) { - Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, 1); + TclAppendUnicodeToObj(resultPtr, wstring + offset, 1); } offset++; } else { @@ -923,7 +923,7 @@ Tcl_RegsubObjCmd( */ if (offset < wlen) { - Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, 1); + TclAppendUnicodeToObj(resultPtr, wstring + offset, 1); } offset++; } @@ -948,7 +948,7 @@ Tcl_RegsubObjCmd( resultPtr = objv[1]; Tcl_IncrRefCount(resultPtr); } else if (offset < wlen) { - Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, wlen - offset); + TclAppendUnicodeToObj(resultPtr, wstring + offset, wlen - offset); } if (objc == 4) { if (Tcl_ObjSetVar2(interp, objv[3], NULL, resultPtr, @@ -1316,7 +1316,7 @@ StringFirstCmd( } if (objc == 4) { - int size = Tcl_GetCharLength(objv[2]); + int size = TclGetCharLength(objv[2]); if (TCL_OK != TclGetIntForIndexM(interp, objv[3], size - 1, &start)) { return TCL_ERROR; @@ -1360,7 +1360,7 @@ StringLastCmd( } if (objc == 4) { - int size = Tcl_GetCharLength(objv[2]); + int size = TclGetCharLength(objv[2]); if (TCL_OK != TclGetIntForIndexM(interp, objv[3], size - 1, &last)) { return TCL_ERROR; @@ -1406,13 +1406,13 @@ StringIndexCmd( * Get the char length to calculate what 'end' means. */ - length = Tcl_GetCharLength(objv[1]); + length = TclGetCharLength(objv[1]); if (TclGetIntForIndexM(interp, objv[2], length-1, &index) != TCL_OK) { return TCL_ERROR; } if ((index >= 0) && (index < length)) { - int ch = Tcl_GetUniChar(objv[1], index); + int ch = TclGetUniChar(objv[1], index); if (ch == -1) { return TCL_OK; @@ -1474,7 +1474,7 @@ StringInsertCmd( return TCL_ERROR; } - length = Tcl_GetCharLength(objv[1]); + length = TclGetCharLength(objv[1]); if (TclGetIntForIndexM(interp, objv[2], length, &index) != TCL_OK) { return TCL_ERROR; } @@ -1669,7 +1669,7 @@ StringIsCmd( p++; } TclNewStringObj(tmpStr, string1, p-string1); - failat = Tcl_GetCharLength(tmpStr); + failat = TclGetCharLength(tmpStr); TclDecrRefCount(tmpStr); break; } @@ -1849,7 +1849,7 @@ StringIsCmd( p++; } TclNewStringObj(tmpStr, string1, p-string1); - failat = Tcl_GetCharLength(tmpStr); + failat = TclGetCharLength(tmpStr); TclDecrRefCount(tmpStr); break; } @@ -2060,7 +2060,7 @@ StringMapCmd( } else { sourceObj = objv[objc-1]; } - ustring1 = Tcl_GetUnicodeFromObj(sourceObj, &length1); + ustring1 = TclGetUnicodeFromObj_(sourceObj, &length1); if (length1 == 0) { /* * Empty input string, just stop now. @@ -2070,13 +2070,13 @@ StringMapCmd( } end = ustring1 + length1; - strCmpFn = (nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp); + strCmpFn = (nocase ? TclUniCharNcasecmp : TclUniCharNcmp); /* * Force result to be Unicode */ - resultPtr = Tcl_NewUnicodeObj(ustring1, 0); + resultPtr = TclNewUnicodeObj(ustring1, 0); if (mapElemc == 2) { /* @@ -2089,7 +2089,7 @@ StringMapCmd( int mapLen, u2lc; Tcl_UniChar *mapString; - ustring2 = Tcl_GetUnicodeFromObj(mapElemv[0], &length2); + ustring2 = TclGetUnicodeFromObj_(mapElemv[0], &length2); p = ustring1; if ((length2 > length1) || (length2 == 0)) { /* @@ -2098,7 +2098,7 @@ StringMapCmd( ustring1 = end; } else { - mapString = Tcl_GetUnicodeFromObj(mapElemv[1], &mapLen); + mapString = TclGetUnicodeFromObj_(mapElemv[1], &mapLen); u2lc = (nocase ? Tcl_UniCharToLower(*ustring2) : 0); for (; ustring1 < end; ustring1++) { if (((*ustring1 == *ustring2) || @@ -2106,14 +2106,14 @@ StringMapCmd( (length2==1 || strCmpFn(ustring1, ustring2, (unsigned long) length2) == 0)) { if (p != ustring1) { - Tcl_AppendUnicodeToObj(resultPtr, p, ustring1-p); + TclAppendUnicodeToObj(resultPtr, p, ustring1-p); p = ustring1 + length2; } else { p += length2; } ustring1 = p - 1; - Tcl_AppendUnicodeToObj(resultPtr, mapString, mapLen); + TclAppendUnicodeToObj(resultPtr, mapString, mapLen); } } } @@ -2134,7 +2134,7 @@ StringMapCmd( u2lc = (int *)TclStackAlloc(interp, mapElemc * sizeof(int)); } for (index = 0; index < mapElemc; index++) { - mapStrings[index] = Tcl_GetUnicodeFromObj(mapElemv[index], + mapStrings[index] = TclGetUnicodeFromObj_(mapElemv[index], mapLens+index); if (nocase && ((index % 2) == 0)) { u2lc[index/2] = Tcl_UniCharToLower(*mapStrings[index]); @@ -2158,7 +2158,7 @@ StringMapCmd( * Put the skipped chars onto the result first. */ - Tcl_AppendUnicodeToObj(resultPtr, p, ustring1-p); + TclAppendUnicodeToObj(resultPtr, p, ustring1-p); p = ustring1 + length2; } else { p += length2; @@ -2174,7 +2174,7 @@ StringMapCmd( * Append the map value to the unicode string. */ - Tcl_AppendUnicodeToObj(resultPtr, + TclAppendUnicodeToObj(resultPtr, mapStrings[index+1], mapLens[index+1]); break; } @@ -2191,7 +2191,7 @@ StringMapCmd( * Put the rest of the unmapped chars onto result. */ - Tcl_AppendUnicodeToObj(resultPtr, p, ustring1 - p); + TclAppendUnicodeToObj(resultPtr, p, ustring1 - p); } Tcl_SetObjResult(interp, resultPtr); done: @@ -2293,7 +2293,7 @@ StringRangeCmd( * 'end' refers to the last character, not one past it. */ - length = Tcl_GetCharLength(objv[1]) - 1; + length = TclGetCharLength(objv[1]) - 1; if (TclGetIntForIndexM(interp, objv[2], length, &first) != TCL_OK || TclGetIntForIndexM(interp, objv[3], length, &last) != TCL_OK) { @@ -2301,7 +2301,7 @@ StringRangeCmd( } if (last >= 0) { - Tcl_SetObjResult(interp, Tcl_GetRange(objv[1], first, last)); + Tcl_SetObjResult(interp, TclGetRange(objv[1], first, last)); } return TCL_OK; } @@ -2394,7 +2394,7 @@ StringRplcCmd( return TCL_ERROR; } - length = Tcl_GetCharLength(objv[1]); + length = TclGetCharLength(objv[1]); end = length - 1; if (TclGetIntForIndexM(interp, objv[2], end, &first) != TCL_OK || @@ -2506,7 +2506,7 @@ StringStartCmd( return TCL_ERROR; } - string = Tcl_GetUnicodeFromObj(objv[1], &length); + string = TclGetUnicodeFromObj_(objv[1], &length); if (TclGetIntForIndexM(interp, objv[2], length-1, &index) != TCL_OK) { return TCL_ERROR; } @@ -2576,7 +2576,7 @@ StringEndCmd( return TCL_ERROR; } - string = Tcl_GetUnicodeFromObj(objv[1], &length); + string = TclGetUnicodeFromObj_(objv[1], &length); if (TclGetIntForIndexM(interp, objv[2], length-1, &index) != TCL_OK) { return TCL_ERROR; } @@ -2880,7 +2880,7 @@ StringLenCmd( return TCL_ERROR; } - Tcl_SetObjResult(interp, Tcl_NewWideIntObj(Tcl_GetCharLength(objv[1]))); + Tcl_SetObjResult(interp, Tcl_NewWideIntObj(TclGetCharLength(objv[1]))); return TCL_OK; } @@ -2954,8 +2954,8 @@ StringLowerCmd( } string1 = TclGetStringFromObj(objv[1], &length1); - start = Tcl_UtfAtIndex(string1, first); - end = Tcl_UtfAtIndex(start, last - first + 1); + start = TclUtfAtIndex(string1, first); + end = TclUtfAtIndex(start, last - first + 1); resultPtr = Tcl_NewStringObj(string1, end - string1); string2 = TclGetString(resultPtr) + (start - string1); @@ -3039,8 +3039,8 @@ StringUpperCmd( } string1 = TclGetStringFromObj(objv[1], &length1); - start = Tcl_UtfAtIndex(string1, first); - end = Tcl_UtfAtIndex(start, last - first + 1); + start = TclUtfAtIndex(string1, first); + end = TclUtfAtIndex(start, last - first + 1); resultPtr = Tcl_NewStringObj(string1, end - string1); string2 = TclGetString(resultPtr) + (start - string1); @@ -3124,8 +3124,8 @@ StringTitleCmd( } string1 = TclGetStringFromObj(objv[1], &length1); - start = Tcl_UtfAtIndex(string1, first); - end = Tcl_UtfAtIndex(start, last - first + 1); + start = TclUtfAtIndex(string1, first); + end = TclUtfAtIndex(start, last - first + 1); resultPtr = Tcl_NewStringObj(string1, end - string1); string2 = TclGetString(resultPtr) + (start - string1); @@ -3790,7 +3790,7 @@ TclNRSwitchObjCmd( if (matchVarObj != NULL) { Tcl_Obj *substringObj; - substringObj = Tcl_GetRange(stringObj, + substringObj = TclGetRange(stringObj, info.matches[j].start, info.matches[j].end-1); /* diff --git a/generic/tclCompCmdsSZ.c b/generic/tclCompCmdsSZ.c index aa2d13e..62909eb 100644 --- a/generic/tclCompCmdsSZ.c +++ b/generic/tclCompCmdsSZ.c @@ -890,7 +890,7 @@ TclCompileStringLenCmd( */ char buf[TCL_INTEGER_SPACE]; - int len = Tcl_GetCharLength(objPtr); + int len = TclGetCharLength(objPtr); len = sprintf(buf, "%d", len); PushLiteral(envPtr, buf, len); diff --git a/generic/tclDecls.h b/generic/tclDecls.h index 790cddb..609a1f2 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -1066,8 +1066,8 @@ EXTERN int Tcl_UniCharIsWordChar(int ch); EXTERN int Tcl_Char16Len(const unsigned short *uniStr); /* 353 */ TCL_DEPRECATED("Use Tcl_UtfNcmp") -int Tcl_UniCharNcmp(const Tcl_UniChar *ucs, - const Tcl_UniChar *uct, +int Tcl_UniCharNcmp(const unsigned short *ucs, + const unsigned short *uct, unsigned long numChars); /* 354 */ EXTERN char * Tcl_Char16ToUtfDString(const unsigned short *uniStr, @@ -1142,24 +1142,23 @@ EXTERN int Tcl_RegExpExecObj(Tcl_Interp *interp, EXTERN void Tcl_RegExpGetInfo(Tcl_RegExp regexp, Tcl_RegExpInfo *infoPtr); /* 378 */ -EXTERN Tcl_Obj * Tcl_NewUnicodeObj(const Tcl_UniChar *unicode, +EXTERN Tcl_Obj * Tcl_NewUnicodeObj(const unsigned short *unicode, int numChars); /* 379 */ EXTERN void Tcl_SetUnicodeObj(Tcl_Obj *objPtr, - const Tcl_UniChar *unicode, int numChars); + const unsigned short *unicode, int numChars); /* 380 */ EXTERN int Tcl_GetCharLength(Tcl_Obj *objPtr); /* 381 */ EXTERN int Tcl_GetUniChar(Tcl_Obj *objPtr, int index); /* 382 */ TCL_DEPRECATED("No longer in use, changed to macro") -Tcl_UniChar * Tcl_GetUnicode(Tcl_Obj *objPtr); +unsigned short * Tcl_GetUnicode(Tcl_Obj *objPtr); /* 383 */ EXTERN Tcl_Obj * Tcl_GetRange(Tcl_Obj *objPtr, int first, int last); /* 384 */ -TCL_DEPRECATED("Use Tcl_AppendStringsToObj") -void Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr, - const Tcl_UniChar *unicode, int length); +EXTERN void Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr, + const unsigned short *unicode, int length); /* 385 */ EXTERN int Tcl_RegExpMatchObj(Tcl_Interp *interp, Tcl_Obj *textObj, Tcl_Obj *patternObj); @@ -1256,13 +1255,13 @@ EXTERN void Tcl_ClearChannelHandlers(Tcl_Channel channel); EXTERN int Tcl_IsChannelExisting(const char *channelName); /* 419 */ TCL_DEPRECATED("Use Tcl_UtfNcasecmp") -int Tcl_UniCharNcasecmp(const Tcl_UniChar *ucs, - const Tcl_UniChar *uct, +int Tcl_UniCharNcasecmp(const unsigned short *ucs, + const unsigned short *uct, unsigned long numChars); /* 420 */ TCL_DEPRECATED("Use Tcl_StringCaseMatch") -int Tcl_UniCharCaseMatch(const Tcl_UniChar *uniStr, - const Tcl_UniChar *uniPattern, int nocase); +int Tcl_UniCharCaseMatch(const unsigned short *uniStr, + const unsigned short *uniPattern, int nocase); /* 421 */ EXTERN Tcl_HashEntry * Tcl_FindHashEntry(Tcl_HashTable *tablePtr, const void *key); @@ -1304,7 +1303,7 @@ EXTERN int Tcl_AttemptSetObjLength(Tcl_Obj *objPtr, int length); /* 433 */ EXTERN Tcl_ThreadId Tcl_GetChannelThread(Tcl_Channel channel); /* 434 */ -EXTERN Tcl_UniChar * Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr, +EXTERN unsigned short * Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr, int *lengthPtr); /* 435 */ TCL_DEPRECATED("") @@ -1930,7 +1929,7 @@ EXTERN unsigned char * Tcl_GetBytesFromObj(Tcl_Interp *interp, EXTERN char * TclGetStringFromObj(Tcl_Obj *objPtr, size_t *lengthPtr); /* 652 */ -EXTERN Tcl_UniChar * TclGetUnicodeFromObj(Tcl_Obj *objPtr, +EXTERN unsigned short * TclGetUnicodeFromObj(Tcl_Obj *objPtr, size_t *lengthPtr); /* 653 */ EXTERN unsigned char * TclGetByteArrayFromObj(Tcl_Obj *objPtr, @@ -1963,6 +1962,16 @@ EXTERN int Tcl_AsyncMarkFromSignal(Tcl_AsyncHandler async, /* Slot 667 is reserved */ /* 668 */ EXTERN int Tcl_UniCharLen(const int *uniStr); +/* 669 */ +EXTERN int TclNumUtfChars(const char *src, int length); +/* 670 */ +EXTERN int TclGetCharLength(Tcl_Obj *objPtr); +/* 671 */ +EXTERN const char * TclUtfAtIndex(const char *src, int index); +/* 672 */ +EXTERN Tcl_Obj * TclGetRange(Tcl_Obj *objPtr, int first, int last); +/* 673 */ +EXTERN int TclGetUniChar(Tcl_Obj *objPtr, int index); typedef struct { const struct TclPlatStubs *tclPlatStubs; @@ -2351,7 +2360,7 @@ typedef struct TclStubs { int (*tcl_UniCharIsUpper) (int ch); /* 350 */ int (*tcl_UniCharIsWordChar) (int ch); /* 351 */ int (*tcl_Char16Len) (const unsigned short *uniStr); /* 352 */ - TCL_DEPRECATED_API("Use Tcl_UtfNcmp") int (*tcl_UniCharNcmp) (const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned long numChars); /* 353 */ + TCL_DEPRECATED_API("Use Tcl_UtfNcmp") int (*tcl_UniCharNcmp) (const unsigned short *ucs, const unsigned short *uct, unsigned long numChars); /* 353 */ char * (*tcl_Char16ToUtfDString) (const unsigned short *uniStr, int uniLength, Tcl_DString *dsPtr); /* 354 */ unsigned short * (*tcl_UtfToChar16DString) (const char *src, int length, Tcl_DString *dsPtr); /* 355 */ Tcl_RegExp (*tcl_GetRegExpFromObj) (Tcl_Interp *interp, Tcl_Obj *patObj, int flags); /* 356 */ @@ -2376,13 +2385,13 @@ typedef struct TclStubs { int (*tcl_UniCharIsPunct) (int ch); /* 375 */ int (*tcl_RegExpExecObj) (Tcl_Interp *interp, Tcl_RegExp regexp, Tcl_Obj *textObj, int offset, int nmatches, int flags); /* 376 */ void (*tcl_RegExpGetInfo) (Tcl_RegExp regexp, Tcl_RegExpInfo *infoPtr); /* 377 */ - Tcl_Obj * (*tcl_NewUnicodeObj) (const Tcl_UniChar *unicode, int numChars); /* 378 */ - void (*tcl_SetUnicodeObj) (Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int numChars); /* 379 */ + Tcl_Obj * (*tcl_NewUnicodeObj) (const unsigned short *unicode, int numChars); /* 378 */ + void (*tcl_SetUnicodeObj) (Tcl_Obj *objPtr, const unsigned short *unicode, int numChars); /* 379 */ int (*tcl_GetCharLength) (Tcl_Obj *objPtr); /* 380 */ int (*tcl_GetUniChar) (Tcl_Obj *objPtr, int index); /* 381 */ - TCL_DEPRECATED_API("No longer in use, changed to macro") Tcl_UniChar * (*tcl_GetUnicode) (Tcl_Obj *objPtr); /* 382 */ + TCL_DEPRECATED_API("No longer in use, changed to macro") unsigned short * (*tcl_GetUnicode) (Tcl_Obj *objPtr); /* 382 */ Tcl_Obj * (*tcl_GetRange) (Tcl_Obj *objPtr, int first, int last); /* 383 */ - TCL_DEPRECATED_API("Use Tcl_AppendStringsToObj") void (*tcl_AppendUnicodeToObj) (Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int length); /* 384 */ + void (*tcl_AppendUnicodeToObj) (Tcl_Obj *objPtr, const unsigned short *unicode, int length); /* 384 */ int (*tcl_RegExpMatchObj) (Tcl_Interp *interp, Tcl_Obj *textObj, Tcl_Obj *patternObj); /* 385 */ void (*tcl_SetNotifier) (const Tcl_NotifierProcs *notifierProcPtr); /* 386 */ Tcl_Mutex * (*tcl_GetAllocMutex) (void); /* 387 */ @@ -2417,8 +2426,8 @@ typedef struct TclStubs { void (*tcl_SpliceChannel) (Tcl_Channel channel); /* 416 */ void (*tcl_ClearChannelHandlers) (Tcl_Channel channel); /* 417 */ int (*tcl_IsChannelExisting) (const char *channelName); /* 418 */ - TCL_DEPRECATED_API("Use Tcl_UtfNcasecmp") int (*tcl_UniCharNcasecmp) (const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned long numChars); /* 419 */ - TCL_DEPRECATED_API("Use Tcl_StringCaseMatch") int (*tcl_UniCharCaseMatch) (const Tcl_UniChar *uniStr, const Tcl_UniChar *uniPattern, int nocase); /* 420 */ + TCL_DEPRECATED_API("Use Tcl_UtfNcasecmp") int (*tcl_UniCharNcasecmp) (const unsigned short *ucs, const unsigned short *uct, unsigned long numChars); /* 419 */ + TCL_DEPRECATED_API("Use Tcl_StringCaseMatch") int (*tcl_UniCharCaseMatch) (const unsigned short *uniStr, const unsigned short *uniPattern, int nocase); /* 420 */ Tcl_HashEntry * (*tcl_FindHashEntry) (Tcl_HashTable *tablePtr, const void *key); /* 421 */ Tcl_HashEntry * (*tcl_CreateHashEntry) (Tcl_HashTable *tablePtr, const void *key, int *newPtr); /* 422 */ void (*tcl_InitCustomHashTable) (Tcl_HashTable *tablePtr, int keyType, const Tcl_HashKeyType *typePtr); /* 423 */ @@ -2432,7 +2441,7 @@ typedef struct TclStubs { char * (*tcl_AttemptDbCkrealloc) (char *ptr, unsigned int size, const char *file, int line); /* 431 */ int (*tcl_AttemptSetObjLength) (Tcl_Obj *objPtr, int length); /* 432 */ Tcl_ThreadId (*tcl_GetChannelThread) (Tcl_Channel channel); /* 433 */ - Tcl_UniChar * (*tcl_GetUnicodeFromObj) (Tcl_Obj *objPtr, int *lengthPtr); /* 434 */ + unsigned short * (*tcl_GetUnicodeFromObj) (Tcl_Obj *objPtr, int *lengthPtr); /* 434 */ TCL_DEPRECATED_API("") int (*tcl_GetMathFuncInfo) (Tcl_Interp *interp, const char *name, int *numArgsPtr, Tcl_ValueType **argTypesPtr, Tcl_MathProc **procPtr, ClientData *clientDataPtr); /* 435 */ TCL_DEPRECATED_API("") Tcl_Obj * (*tcl_ListMathFuncs) (Tcl_Interp *interp, const char *pattern); /* 436 */ Tcl_Obj * (*tcl_SubstObj) (Tcl_Interp *interp, Tcl_Obj *objPtr, int flags); /* 437 */ @@ -2650,7 +2659,7 @@ typedef struct TclStubs { unsigned char * (*tclGetBytesFromObj) (Tcl_Interp *interp, Tcl_Obj *objPtr, int *numBytesPtr); /* 649 */ unsigned char * (*tcl_GetBytesFromObj) (Tcl_Interp *interp, Tcl_Obj *objPtr, size_t *numBytesPtr); /* 650 */ char * (*tclGetStringFromObj) (Tcl_Obj *objPtr, size_t *lengthPtr); /* 651 */ - Tcl_UniChar * (*tclGetUnicodeFromObj) (Tcl_Obj *objPtr, size_t *lengthPtr); /* 652 */ + unsigned short * (*tclGetUnicodeFromObj) (Tcl_Obj *objPtr, size_t *lengthPtr); /* 652 */ unsigned char * (*tclGetByteArrayFromObj) (Tcl_Obj *objPtr, size_t *numBytesPtr); /* 653 */ int (*tcl_UtfCharComplete) (const char *src, int length); /* 654 */ const char * (*tcl_UtfNext) (const char *src); /* 655 */ @@ -2667,6 +2676,11 @@ typedef struct TclStubs { void (*reserved666)(void); void (*reserved667)(void); int (*tcl_UniCharLen) (const int *uniStr); /* 668 */ + int (*tclNumUtfChars) (const char *src, int length); /* 669 */ + int (*tclGetCharLength) (Tcl_Obj *objPtr); /* 670 */ + const char * (*tclUtfAtIndex) (const char *src, int index); /* 671 */ + Tcl_Obj * (*tclGetRange) (Tcl_Obj *objPtr, int first, int last); /* 672 */ + int (*tclGetUniChar) (Tcl_Obj *objPtr, int index); /* 673 */ } TclStubs; extern const TclStubs *tclStubsPtr; @@ -4028,6 +4042,16 @@ extern const TclStubs *tclStubsPtr; /* Slot 667 is reserved */ #define Tcl_UniCharLen \ (tclStubsPtr->tcl_UniCharLen) /* 668 */ +#define TclNumUtfChars \ + (tclStubsPtr->tclNumUtfChars) /* 669 */ +#define TclGetCharLength \ + (tclStubsPtr->tclGetCharLength) /* 670 */ +#define TclUtfAtIndex \ + (tclStubsPtr->tclUtfAtIndex) /* 671 */ +#define TclGetRange \ + (tclStubsPtr->tclGetRange) /* 672 */ +#define TclGetUniChar \ + (tclStubsPtr->tclGetUniChar) /* 673 */ #endif /* defined(USE_TCL_STUBS) */ diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index fd0386c..2765f2a 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -1338,7 +1338,7 @@ Tcl_ExternalToUtf( if (*dstCharsPtr <= maxChars) { break; } - dstLen = Tcl_UtfAtIndex(dst, maxChars) - dst + (TCL_UTF_MAX - 1); + dstLen = TclUtfAtIndex(dst, maxChars) - dst + (TCL_UTF_MAX - 1); *statePtr = savedState; } while (1); if (!noTerminate) { diff --git a/generic/tclExecute.c b/generic/tclExecute.c index 0ec2404..1f72d63 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -5244,7 +5244,7 @@ TEBCresume( case INST_STR_LEN: valuePtr = OBJ_AT_TOS; - length = Tcl_GetCharLength(valuePtr); + length = TclGetCharLength(valuePtr); TclNewIntObj(objResultPtr, length); TRACE(("\"%.20s\" => %d\n", O2S(valuePtr), length)); NEXT_INST_F(1, 1, 1); @@ -5310,7 +5310,7 @@ TEBCresume( * Get char length to calulate what 'end' means. */ - length = Tcl_GetCharLength(valuePtr); + length = TclGetCharLength(valuePtr); DECACHE_STACK_INFO(); if (TclGetIntForIndexM(interp, value2Ptr, length-1, &index)!=TCL_OK) { CACHE_STACK_INFO(); @@ -5329,7 +5329,7 @@ TEBCresume( valuePtr->bytes+index, 1); } else { char buf[4] = ""; - int ch = Tcl_GetUniChar(valuePtr, index); + int ch = TclGetUniChar(valuePtr, index); /* * This could be: Tcl_NewUnicodeObj((const Tcl_UniChar *)&ch, 1) @@ -5353,7 +5353,7 @@ TEBCresume( case INST_STR_RANGE: TRACE(("\"%.20s\" %.20s %.20s =>", O2S(OBJ_AT_DEPTH(2)), O2S(OBJ_UNDER_TOS), O2S(OBJ_AT_TOS))); - length = Tcl_GetCharLength(OBJ_AT_DEPTH(2)) - 1; + length = TclGetCharLength(OBJ_AT_DEPTH(2)) - 1; DECACHE_STACK_INFO(); if (TclGetIntForIndexM(interp, OBJ_UNDER_TOS, length, @@ -5373,7 +5373,7 @@ TEBCresume( if (toIdx < 0) { TclNewObj(objResultPtr); } else { - objResultPtr = Tcl_GetRange(OBJ_AT_DEPTH(2), fromIdx, toIdx); + objResultPtr = TclGetRange(OBJ_AT_DEPTH(2), fromIdx, toIdx); } TRACE_APPEND(("\"%.30s\"\n", O2S(objResultPtr))); NEXT_INST_V(1, 3, 1); @@ -5382,7 +5382,7 @@ TEBCresume( valuePtr = OBJ_AT_TOS; fromIdx = TclGetInt4AtPtr(pc+1); toIdx = TclGetInt4AtPtr(pc+5); - length = Tcl_GetCharLength(valuePtr); + length = TclGetCharLength(valuePtr); TRACE(("\"%.20s\" %d %d => ", O2S(valuePtr), fromIdx, toIdx)); /* Every range of an empty value is an empty value */ @@ -5414,7 +5414,7 @@ TEBCresume( if (toIdx < 0) { TclNewObj(objResultPtr); } else { - objResultPtr = Tcl_GetRange(valuePtr, fromIdx, toIdx); + objResultPtr = TclGetRange(valuePtr, fromIdx, toIdx); } } TRACE_APPEND(("%.30s\n", O2S(objResultPtr))); @@ -5428,7 +5428,7 @@ TEBCresume( case INST_STR_REPLACE: value3Ptr = POP_OBJECT(); valuePtr = OBJ_AT_DEPTH(2); - endIdx = Tcl_GetCharLength(valuePtr) - 1; + endIdx = TclGetCharLength(valuePtr) - 1; TRACE(("\"%.20s\" %s %s \"%.20s\" => ", O2S(valuePtr), O2S(OBJ_UNDER_TOS), O2S(OBJ_AT_TOS), O2S(value3Ptr))); DECACHE_STACK_INFO(); @@ -5495,12 +5495,12 @@ TEBCresume( objResultPtr = value3Ptr; goto doneStringMap; } - ustring1 = Tcl_GetUnicodeFromObj(valuePtr, &length); + ustring1 = TclGetUnicodeFromObj_(valuePtr, &length); if (length == 0) { objResultPtr = valuePtr; goto doneStringMap; } - ustring2 = Tcl_GetUnicodeFromObj(value2Ptr, &length2); + ustring2 = TclGetUnicodeFromObj_(value2Ptr, &length2); if (length2 > length || length2 == 0) { objResultPtr = valuePtr; goto doneStringMap; @@ -5512,9 +5512,9 @@ TEBCresume( } goto doneStringMap; } - ustring3 = Tcl_GetUnicodeFromObj(value3Ptr, &length3); + ustring3 = TclGetUnicodeFromObj_(value3Ptr, &length3); - objResultPtr = Tcl_NewUnicodeObj(ustring1, 0); + objResultPtr = TclNewUnicodeObj(ustring1, 0); p = ustring1; end = ustring1 + length; for (; ustring1 < end; ustring1++) { @@ -5524,14 +5524,14 @@ TEBCresume( memcmp(ustring1, ustring2, sizeof(Tcl_UniChar) * length2) == 0)) { if (p != ustring1) { - Tcl_AppendUnicodeToObj(objResultPtr, p, ustring1-p); + TclAppendUnicodeToObj(objResultPtr, p, ustring1-p); p = ustring1 + length2; } else { p += length2; } ustring1 = p - 1; - Tcl_AppendUnicodeToObj(objResultPtr, ustring3, length3); + TclAppendUnicodeToObj(objResultPtr, ustring3, length3); } } if (p != ustring1) { @@ -5539,7 +5539,7 @@ TEBCresume( * Put the rest of the unmapped chars onto result. */ - Tcl_AppendUnicodeToObj(objResultPtr, p, ustring1 - p); + TclAppendUnicodeToObj(objResultPtr, p, ustring1 - p); } doneStringMap: TRACE_WITH_OBJ(("%.20s %.20s %.20s => ", @@ -5565,7 +5565,7 @@ TEBCresume( valuePtr = OBJ_AT_TOS; TRACE(("%s \"%.30s\" => ", tclStringClassTable[opnd].name, O2S(valuePtr))); - ustring1 = Tcl_GetUnicodeFromObj(valuePtr, &length); + ustring1 = TclGetUnicodeFromObj_(valuePtr, &length); match = 1; if (length > 0) { int ch; @@ -5592,12 +5592,12 @@ TEBCresume( * both. */ - if (TclHasInternalRep(valuePtr, &tclStringType) - || TclHasInternalRep(value2Ptr, &tclStringType)) { + if (TclHasInternalRep(valuePtr, &tclUniCharStringType) + || TclHasInternalRep(value2Ptr, &tclUniCharStringType)) { Tcl_UniChar *ustring1, *ustring2; - ustring1 = Tcl_GetUnicodeFromObj(valuePtr, &length); - ustring2 = Tcl_GetUnicodeFromObj(value2Ptr, &length2); + ustring1 = TclGetUnicodeFromObj_(valuePtr, &length); + ustring2 = TclGetUnicodeFromObj_(value2Ptr, &length2); match = TclUniCharMatch(ustring1, length, ustring2, length2, nocase); } else if (TclIsPureByteArray(valuePtr) && !nocase) { diff --git a/generic/tclIO.c b/generic/tclIO.c index 92bd91b..af14071 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -3556,7 +3556,7 @@ Tcl_Close( result = flushcode; } if ((result != 0) && (result != TCL_ERROR) && (interp != NULL) - && 0 == Tcl_GetCharLength(Tcl_GetObjResult(interp))) { + && 0 == TclGetCharLength(Tcl_GetObjResult(interp))) { Tcl_SetErrno(result); Tcl_SetObjResult(interp, Tcl_NewStringObj(Tcl_PosixError(interp), -1)); @@ -6379,7 +6379,7 @@ ReadChars( * bytes demanded by the Tcl_ExternalToUtf() call! */ - dstLimit = Tcl_UtfAtIndex(dst, charsToRead) - dst + (TCL_UTF_MAX - 1); + dstLimit = TclUtfAtIndex(dst, charsToRead) - dst + (TCL_UTF_MAX - 1); statePtr->flags = savedFlags; statePtr->inputEncodingFlags = savedIEFlags; statePtr->inputEncodingState = savedState; diff --git a/generic/tclInt.h b/generic/tclInt.h index af839fc..eff8eab 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -2771,6 +2771,7 @@ MODULE_SCOPE const Tcl_ObjType tclListType; MODULE_SCOPE const Tcl_ObjType tclDictType; MODULE_SCOPE const Tcl_ObjType tclProcBodyType; MODULE_SCOPE const Tcl_ObjType tclStringType; +MODULE_SCOPE const Tcl_ObjType tclUniCharStringType; MODULE_SCOPE const Tcl_ObjType tclEnsembleCmdType; MODULE_SCOPE const Tcl_ObjType tclRegexpType; MODULE_SCOPE Tcl_ObjType tclCmdNameType; @@ -3322,6 +3323,44 @@ MODULE_SCOPE void TclErrorStackResetIf(Tcl_Interp *interp, MODULE_SCOPE int TclZipfs_Init(Tcl_Interp *interp); +#if TCL_UTF_MAX > 3 + MODULE_SCOPE int *TclGetUnicodeFromObj_(Tcl_Obj *, int *); + MODULE_SCOPE Tcl_Obj *TclNewUnicodeObj(const int *, int); + MODULE_SCOPE void TclAppendUnicodeToObj(Tcl_Obj *, const int *, int); + MODULE_SCOPE int TclUniCharNcasecmp(const int *, const int *, unsigned long); + MODULE_SCOPE int TclUniCharCaseMatch(const int *, const int *, int); + MODULE_SCOPE int TclUniCharNcmp(const int *, const int *, unsigned long); +# undef Tcl_NumUtfChars +# define Tcl_NumUtfChars TclNumUtfChars +# undef Tcl_GetCharLength +# define Tcl_GetCharLength TclGetCharLength +# undef Tcl_UtfAtIndex +# define Tcl_UtfAtIndex TclUtfAtIndex +# undef Tcl_GetRange +# define Tcl_GetRange TclGetRange +# undef Tcl_GetUniChar +# define Tcl_GetUniChar TclGetUniChar +#else +# define tclUniCharStringType tclStringType +# define TclGetUnicodeFromObj_ Tcl_GetUnicodeFromObj +# define TclNewUnicodeObj Tcl_NewUnicodeObj +# define TclAppendUnicodeToObj Tcl_AppendUnicodeToObj +# define TclUniCharNcasecmp Tcl_UniCharNcasecmp +# define TclUniCharCaseMatch Tcl_UniCharCaseMatch +# define TclUniCharNcmp Tcl_UniCharNcmp +# undef TclNumUtfChars +# define TclNumUtfChars Tcl_NumUtfChars +# undef TclGetCharLength +# define TclGetCharLength Tcl_GetCharLength +# undef TclUtfAtIndex +# define TclUtfAtIndex Tcl_UtfAtIndex +# undef TclGetRange +# define TclGetRange Tcl_GetRange +# undef TclGetUniChar +# define TclGetUniChar Tcl_GetUniChar +#endif + + /* * Many parsing tasks need a common definition of whitespace. * Use this routine and macro to achieve that and place @@ -4725,8 +4764,8 @@ MODULE_SCOPE const TclFileAttrProcs tclpFileAttrProcs[]; : Tcl_UtfToUniChar(str, chPtr)) #else #define TclUtfToUniChar(str, chPtr) \ - ((((unsigned char) *(str)) < 0x80) ? \ - ((*(chPtr) = (unsigned char) *(str)), 1) \ + (((UCHAR(*(str))) < 0x80) ? \ + ((*(chPtr) = UCHAR(*(str))), 1) \ : Tcl_UtfToChar16(str, chPtr)) #endif @@ -4742,14 +4781,14 @@ MODULE_SCOPE const TclFileAttrProcs tclpFileAttrProcs[]; *---------------------------------------------------------------- */ -#define TclNumUtfChars(numChars, bytes, numBytes) \ +#define TclNumUtfCharsM(numChars, bytes, numBytes) \ do { \ int _count, _i = (numBytes); \ unsigned char *_str = (unsigned char *) (bytes); \ while (_i && (*_str < 0xC0)) { _i--; _str++; } \ _count = (numBytes) - _i; \ if (_i) { \ - _count += Tcl_NumUtfChars((bytes) + _count, _i); \ + _count += TclNumUtfChars((bytes) + _count, _i); \ } \ (numChars) = _count; \ } while (0); @@ -4780,24 +4819,6 @@ MODULE_SCOPE int TclIsPureByteArray(Tcl_Obj *objPtr); /* *---------------------------------------------------------------- - * Macro used by the Tcl core to compare Unicode strings. On big-endian - * systems we can use the more efficient memcmp, but this would not be - * lexically correct on little-endian systems. The ANSI C "prototype" for - * this macro is: - * - * MODULE_SCOPE int TclUniCharNcmp(const Tcl_UniChar *cs, - * const Tcl_UniChar *ct, unsigned long n); - *---------------------------------------------------------------- - */ - -#if defined(WORDS_BIGENDIAN) && (TCL_UTF_MAX > 3) -# define TclUniCharNcmp(cs,ct,n) memcmp((cs),(ct),(n)*sizeof(Tcl_UniChar)) -#else /* !WORDS_BIGENDIAN */ -# define TclUniCharNcmp Tcl_UniCharNcmp -#endif /* WORDS_BIGENDIAN */ - -/* - *---------------------------------------------------------------- * Macro used by the Tcl core to increment a namespace's export epoch * counter. The ANSI C "prototype" for this macro is: * diff --git a/generic/tclObj.c b/generic/tclObj.c index a06b8fd..15fe98f 100644 --- a/generic/tclObj.c +++ b/generic/tclObj.c @@ -387,7 +387,9 @@ TclInitObjSubsystem(void) Tcl_RegisterObjType(&tclByteArrayType); Tcl_RegisterObjType(&tclDoubleType); +#if (TCL_UTF_MAX < 4) || !defined(TCL_NO_DEPRECATED) Tcl_RegisterObjType(&tclStringType); +#endif Tcl_RegisterObjType(&tclListType); Tcl_RegisterObjType(&tclDictType); Tcl_RegisterObjType(&tclByteCodeType); diff --git a/generic/tclProc.c b/generic/tclProc.c index 45d1afd..75687f0 100644 --- a/generic/tclProc.c +++ b/generic/tclProc.c @@ -529,7 +529,7 @@ TclCreateProc( "FORMALARGUMENTFORMAT", NULL); goto procError; } - if ((fieldCount == 0) || (Tcl_GetCharLength(fieldValues[0]) == 0)) { + if ((fieldCount == 0) || (TclGetCharLength(fieldValues[0]) == 0)) { Tcl_SetObjResult(interp, Tcl_NewStringObj( "argument with no name", -1)); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "PROC", diff --git a/generic/tclRegexp.c b/generic/tclRegexp.c index 8e588ac..ff7c72c 100644 --- a/generic/tclRegexp.c +++ b/generic/tclRegexp.c @@ -271,8 +271,8 @@ Tcl_RegExpRange( } else { string = regexpPtr->string; } - *startPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_so); - *endPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_eo); + *startPtr = TclUtfAtIndex(string, regexpPtr->matches[index].rm_so); + *endPtr = TclUtfAtIndex(string, regexpPtr->matches[index].rm_eo); } } @@ -482,7 +482,7 @@ Tcl_RegExpExecObj( regexpPtr->string = NULL; regexpPtr->objPtr = textObj; - udata = Tcl_GetUnicodeFromObj(textObj, &length); + udata = TclGetUnicodeFromObj_(textObj, &length); if (offset > length) { offset = length; diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 114e8a6..33f84bd 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -69,6 +69,12 @@ static void SetUnicodeObj(Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int numChars); static int UnicodeLength(const Tcl_UniChar *unicode); static void UpdateStringOfString(Tcl_Obj *objPtr); +#if (TCL_UTF_MAX) > 3 && !defined(TCL_NO_DEPRECATED) +static void DupUTF16StringInternalRep(Tcl_Obj *objPtr, + Tcl_Obj *copyPtr); +static int SetUTF16StringFromAny(Tcl_Interp *interp, Tcl_Obj *objPtr); +static void UpdateStringOfUTF16String(Tcl_Obj *objPtr); +#endif #define ISCONTINUATION(bytes) (\ ((((bytes)[0] & 0xC0) == 0x80) || (((bytes)[0] == '\xED') \ @@ -80,6 +86,20 @@ static void UpdateStringOfString(Tcl_Obj *objPtr); * functions that can be invoked by generic object code. */ +#if TCL_UTF_MAX < 4 + +#define tclUniCharStringType tclStringType +#define GET_UNICHAR_STRING GET_STRING +#define UniCharString String +#define UNICHAR_STRING_MAXCHARS STRING_MAXCHARS +#define uniCharStringAlloc stringAlloc +#define uniCharStringRealloc stringRealloc +#define uniCharStringAttemptAlloc stringAttemptAlloc +#define uniCharStringAttemptRealloc stringAttemptRealloc +#define uniCharStringCheckLimits stringCheckLimits +#define SET_UNICHAR_STRING SET_STRING +#define UNICHAR_STRING_SIZE STRING_SIZE + const Tcl_ObjType tclStringType = { "string", /* name */ FreeStringInternalRep, /* freeIntRepPro */ @@ -87,7 +107,149 @@ const Tcl_ObjType tclStringType = { UpdateStringOfString, /* updateStringProc */ SetStringFromAny /* setFromAnyProc */ }; - + +#else + +#ifndef TCL_NO_DEPRECATED +const Tcl_ObjType tclStringType = { + "string", /* name */ + FreeStringInternalRep, /* freeIntRepPro */ + DupUTF16StringInternalRep, /* dupIntRepProc */ + UpdateStringOfUTF16String, /* updateStringProc */ + SetUTF16StringFromAny /* setFromAnyProc */ +}; +#endif + +const Tcl_ObjType tclUniCharStringType = { + "utf32string", /* name */ + FreeStringInternalRep, /* freeIntRepPro */ + DupStringInternalRep, /* dupIntRepProc */ + UpdateStringOfString, /* updateStringProc */ + SetStringFromAny /* setFromAnyProc */ +}; + +typedef struct { + int numChars; /* The number of chars in the string. -1 means + * this value has not been calculated. >= 0 + * means that there is a valid Unicode rep, or + * that the number of UTF bytes == the number + * of chars. */ + int allocated; /* The amount of space actually allocated for + * the UTF string (minus 1 byte for the + * termination char). */ + int maxChars; /* Max number of chars that can fit in the + * space allocated for the unicode array. */ + int hasUnicode; /* Boolean determining whether the string has + * a Unicode representation. */ + Tcl_UniChar unicode[TCLFLEXARRAY]; /* The array of Unicode chars. The actual size + * of this field depends on the 'maxChars' + * field above. */ +} UniCharString; + +#define UNICHAR_STRING_MAXCHARS \ + (int)(((size_t)UINT_MAX - offsetof(UniCharString, unicode))/sizeof(Tcl_UniChar) - 1) +#define UNICHAR_STRING_SIZE(numChars) \ + (offsetof(UniCharString, unicode) + sizeof(Tcl_UniChar) + ((numChars) * sizeof(Tcl_UniChar))) +#define uniCharStringCheckLimits(numChars) \ + do { \ + if ((numChars) < 0 || (numChars) > UNICHAR_STRING_MAXCHARS) { \ + Tcl_Panic("max length for a Tcl unicode value (%d chars) exceeded", \ + UNICHAR_STRING_MAXCHARS); \ + } \ + } while (0) +#define uniCharStringAttemptAlloc(numChars) \ + (UniCharString *) attemptckalloc(UNICHAR_STRING_SIZE(numChars)) +#define uniCharStringAlloc(numChars) \ + (UniCharString *) ckalloc(UNICHAR_STRING_SIZE(numChars)) +#define uniCharStringRealloc(ptr, numChars) \ + (UniCharString *) ckrealloc((ptr), UNICHAR_STRING_SIZE(numChars)) +#define uniCharStringAttemptRealloc(ptr, numChars) \ + (UniCharString *) attemptckrealloc((ptr), UNICHAR_STRING_SIZE(numChars)) +#define GET_UNICHAR_STRING(objPtr) \ + ((UniCharString *) (objPtr)->internalRep.twoPtrValue.ptr1) +#define SET_UNICHAR_STRING(objPtr, stringPtr) \ + ((objPtr)->internalRep.twoPtrValue.ptr2 = NULL), \ + ((objPtr)->internalRep.twoPtrValue.ptr1 = (void *) (stringPtr)) + + +#ifndef TCL_NO_DEPRECATED +static void +DupUTF16StringInternalRep( + Tcl_Obj *srcPtr, /* Object with internal rep to copy. Must have + * an internal rep of type "String". */ + Tcl_Obj *copyPtr) /* Object with internal rep to set. Must not + * currently have an internal rep.*/ +{ + String *srcStringPtr = GET_STRING(srcPtr); + size_t size = offsetof(String, unicode) + (((srcStringPtr->allocated) + 1U) * sizeof(unsigned short)); + String *copyStringPtr = (String *)ckalloc(size); + memcpy(copyStringPtr, srcStringPtr, size); + + SET_STRING(copyPtr, copyStringPtr); + copyPtr->typePtr = &tclStringType; +} + +static int +SetUTF16StringFromAny( + TCL_UNUSED(Tcl_Interp *), + Tcl_Obj *objPtr) /* The object to convert. */ +{ + if (!TclHasInternalRep(objPtr, &tclStringType)) { + Tcl_DString ds; + + /* + * Convert whatever we have into an untyped value. Just A String. + */ + + (void) TclGetString(objPtr); + TclFreeInternalRep(objPtr); + + /* + * Create a basic String internalrep that just points to the UTF-8 string + * already in place at objPtr->bytes. + */ + + Tcl_DStringInit(&ds); + unsigned short *utf16string = Tcl_UtfToChar16DString(objPtr->bytes, objPtr->length, &ds); + int size = Tcl_DStringLength(&ds); + String *stringPtr = (String *)ckalloc((offsetof(String, unicode) + sizeof(unsigned short)) + size); + + memcpy(stringPtr->unicode, utf16string, size); + Tcl_DStringFree(&ds); + size /= sizeof(unsigned short); + stringPtr->unicode[size] = 0; + + stringPtr->numChars = size; + stringPtr->allocated = size; + stringPtr->maxChars = size; + stringPtr->hasUnicode = 1; + SET_STRING(objPtr, stringPtr); + objPtr->typePtr = &tclStringType; + } + return TCL_OK; +} + +static void +UpdateStringOfUTF16String( + Tcl_Obj *objPtr) /* Object with string rep to update. */ +{ + Tcl_DString ds; + String *stringPtr = GET_STRING(objPtr); + + Tcl_DStringInit(&ds); + const char *string = Tcl_Char16ToUtfDString(stringPtr->unicode, stringPtr->numChars, &ds); + + char *bytes = (char *)ckalloc(Tcl_DStringLength(&ds) + 1U); + memcpy(bytes, string, Tcl_DStringLength(&ds)); + bytes[Tcl_DStringLength(&ds)] = 0; + objPtr->bytes = bytes; + objPtr->length = Tcl_DStringLength(&ds); + Tcl_DStringFree(&ds); +} +#endif + +#endif + /* * TCL STRING GROWTH ALGORITHM * @@ -138,7 +300,7 @@ GrowStringBuffer( * flag || objPtr->bytes != NULL */ - String *stringPtr = GET_STRING(objPtr); + UniCharString *stringPtr = GET_UNICHAR_STRING(objPtr); char *ptr = NULL; int attempt; @@ -185,10 +347,10 @@ GrowUnicodeBuffer( * Pre-conditions: * objPtr->typePtr == &tclStringType * needed > stringPtr->maxChars - * needed < STRING_MAXCHARS + * needed < UNICHAR_STRING_MAXCHARS */ - String *ptr = NULL, *stringPtr = GET_STRING(objPtr); + UniCharString *ptr = NULL, *stringPtr = GET_UNICHAR_STRING(objPtr); int attempt; if (stringPtr->maxChars > 0) { @@ -196,9 +358,9 @@ GrowUnicodeBuffer( * Subsequent appends - apply the growth algorithm. */ - if (needed <= STRING_MAXCHARS / 2) { + if (needed <= UNICHAR_STRING_MAXCHARS / 2) { attempt = 2 * needed; - ptr = stringAttemptRealloc(stringPtr, attempt); + ptr = uniCharStringAttemptRealloc(stringPtr, attempt); } if (ptr == NULL) { /* @@ -206,13 +368,13 @@ GrowUnicodeBuffer( * overflow into invalid argument values for attempt. */ - unsigned int limit = STRING_MAXCHARS - needed; + unsigned int limit = UNICHAR_STRING_MAXCHARS - needed; unsigned int extra = needed - stringPtr->numChars + TCL_MIN_UNICHAR_GROWTH; int growth = (int) ((extra > limit) ? limit : extra); attempt = needed + growth; - ptr = stringAttemptRealloc(stringPtr, attempt); + ptr = uniCharStringAttemptRealloc(stringPtr, attempt); } } if (ptr == NULL) { @@ -221,11 +383,11 @@ GrowUnicodeBuffer( */ attempt = needed; - ptr = stringRealloc(stringPtr, attempt); + ptr = uniCharStringRealloc(stringPtr, attempt); } stringPtr = ptr; stringPtr->maxChars = attempt; - SET_STRING(objPtr, stringPtr); + SET_UNICHAR_STRING(objPtr, stringPtr); } /* @@ -374,7 +536,7 @@ Tcl_DbNewStringObj( */ Tcl_Obj * -Tcl_NewUnicodeObj( +TclNewUnicodeObj( const Tcl_UniChar *unicode, /* The unicode string used to initialize the * new object. */ int numChars) /* Number of characters in the unicode @@ -387,6 +549,35 @@ Tcl_NewUnicodeObj( return objPtr; } +#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED) +Tcl_Obj * +Tcl_NewUnicodeObj( + const unsigned short *unicode, /* The unicode string used to initialize the + * new object. */ + int numChars) /* Number of characters in the unicode + * string. */ +{ + Tcl_Obj *objPtr; + + TclNewObj(objPtr); + TclInvalidateStringRep(objPtr); + + String *stringPtr = (String *)ckalloc((offsetof(String, unicode) + + sizeof(unsigned short)) + numChars * sizeof(unsigned short)); + memcpy(stringPtr->unicode, unicode, numChars); + stringPtr->unicode[numChars] = 0; + + stringPtr->numChars = numChars; + stringPtr->allocated = numChars; + stringPtr->maxChars = numChars; + stringPtr->hasUnicode = 1; + SET_STRING(objPtr, stringPtr); + objPtr->typePtr = &tclStringType; + + return objPtr; +} +#endif + /* *---------------------------------------------------------------------- * @@ -405,11 +596,11 @@ Tcl_NewUnicodeObj( */ int -Tcl_GetCharLength( +TclGetCharLength( Tcl_Obj *objPtr) /* The String object to get the num chars * of. */ { - String *stringPtr; + UniCharString *stringPtr; int numChars; /* @@ -444,7 +635,7 @@ Tcl_GetCharLength( */ SetStringFromAny(NULL, objPtr); - stringPtr = GET_STRING(objPtr); + stringPtr = GET_UNICHAR_STRING(objPtr); numChars = stringPtr->numChars; /* @@ -452,12 +643,52 @@ Tcl_GetCharLength( */ if (numChars == -1) { - TclNumUtfChars(numChars, objPtr->bytes, objPtr->length); + TclNumUtfCharsM(numChars, objPtr->bytes, objPtr->length); stringPtr->numChars = numChars; } return numChars; } +#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED) +#undef Tcl_GetCharLength +int +Tcl_GetCharLength( + Tcl_Obj *objPtr) /* The String object to get the num chars + * of. */ +{ + int numChars; + + /* + * Quick, no-shimmer return for short string reps. + */ + + if ((objPtr->bytes) && (objPtr->length < 2)) { + /* 0 bytes -> 0 chars; 1 byte -> 1 char */ + return objPtr->length; + } + + /* + * Optimize the case where we're really dealing with a bytearray object; + * we don't need to convert to a string to perform the get-length operation. + * + * Starting in Tcl 8.7, we check for a "pure" bytearray, because the + * machinery behind that test is using a proper bytearray ObjType. We + * could also compute length of an improper bytearray without shimmering + * but there's no value in that. We *want* to shimmer an improper bytearray + * because improper bytearrays have worthless internal reps. + */ + + if (TclIsPureByteArray(objPtr)) { + + (void) Tcl_GetByteArrayFromObj(objPtr, &numChars); + } else { + Tcl_GetString(objPtr); + numChars = Tcl_NumUtfChars(objPtr->bytes, objPtr->length); + } + return numChars; +} +#endif + /* *---------------------------------------------------------------------- * @@ -518,6 +749,8 @@ TclCheckEmptyString( *---------------------------------------------------------------------- */ +#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED) +#undef Tcl_GetUniChar int Tcl_GetUniChar( Tcl_Obj *objPtr, /* The object to get the Unicode charater @@ -549,22 +782,78 @@ Tcl_GetUniChar( * OK, need to work with the object as a string. */ - SetStringFromAny(NULL, objPtr); + SetUTF16StringFromAny(NULL, objPtr); stringPtr = GET_STRING(objPtr); + if (index >= stringPtr->numChars) { + return -1; + } + ch = stringPtr->unicode[index]; + /* See: bug [11ae2be95dac9417] */ + if ((ch & 0xF800) == 0xD800) { + if (ch & 0x400) { + if ((index > 0) + && ((stringPtr->unicode[index-1] & 0xFC00) == 0xD800)) { + ch = -1; /* low surrogate preceded by high surrogate */ + } + } else if ((++index < stringPtr->numChars) + && ((stringPtr->unicode[index] & 0xFC00) == 0xDC00)) { + /* high surrogate followed by low surrogate */ + ch = (((ch & 0x3FF) << 10) | + (stringPtr->unicode[index] & 0x3FF)) + 0x10000; + } + } + return ch; +} +#endif + +int +TclGetUniChar( + Tcl_Obj *objPtr, /* The object to get the Unicode charater + * from. */ + int index) /* Get the index'th Unicode character. */ +{ + UniCharString *stringPtr; + int ch, length; + + if (index < 0) { + return -1; + } + + /* + * Optimize the case where we're really dealing with a bytearray object + * we don't need to convert to a string to perform the indexing operation. + */ + + if (TclIsPureByteArray(objPtr)) { + unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &length); + if (index >= length) { + return -1; + } + + return (int) bytes[index]; + } + + /* + * OK, need to work with the object as a string. + */ + + SetStringFromAny(NULL, objPtr); + stringPtr = GET_UNICHAR_STRING(objPtr); + if (stringPtr->hasUnicode == 0) { /* * If numChars is unknown, compute it. */ if (stringPtr->numChars == -1) { - TclNumUtfChars(stringPtr->numChars, objPtr->bytes, objPtr->length); + TclNumUtfCharsM(stringPtr->numChars, objPtr->bytes, objPtr->length); } if (stringPtr->numChars == objPtr->length) { return (unsigned char) objPtr->bytes[index]; } FillUnicodeRep(objPtr); - stringPtr = GET_STRING(objPtr); + stringPtr = GET_UNICHAR_STRING(objPtr); } if (index >= stringPtr->numChars) { @@ -612,12 +901,12 @@ Tcl_GetUniChar( #undef Tcl_GetUnicodeFromObj #ifndef TCL_NO_DEPRECATED #undef Tcl_GetUnicode -Tcl_UniChar * +unsigned short * Tcl_GetUnicode( Tcl_Obj *objPtr) /* The object to find the unicode string * for. */ { - return Tcl_GetUnicodeFromObj(objPtr, (int *)NULL); + return TclGetUnicodeFromObj(objPtr, NULL); } #endif /* TCL_NO_DEPRECATED */ @@ -641,21 +930,21 @@ Tcl_GetUnicode( */ Tcl_UniChar * -Tcl_GetUnicodeFromObj( +TclGetUnicodeFromObj_( Tcl_Obj *objPtr, /* The object to find the unicode string * for. */ int *lengthPtr) /* If non-NULL, the location where the string * rep's unichar length should be stored. If * NULL, no length is stored. */ { - String *stringPtr; + UniCharString *stringPtr; SetStringFromAny(NULL, objPtr); - stringPtr = GET_STRING(objPtr); + stringPtr = GET_UNICHAR_STRING(objPtr); if (stringPtr->hasUnicode == 0) { FillUnicodeRep(objPtr); - stringPtr = GET_STRING(objPtr); + stringPtr = GET_UNICHAR_STRING(objPtr); } if (lengthPtr != NULL) { @@ -663,7 +952,29 @@ Tcl_GetUnicodeFromObj( } return stringPtr->unicode; } -Tcl_UniChar * + +#if TCL_UTF_MAX > 3 && !defined(TCL_NO_DEPRECATED) +unsigned short * +Tcl_GetUnicodeFromObj( + Tcl_Obj *objPtr, /* The object to find the unicode string + * for. */ + int *lengthPtr) /* If non-NULL, the location where the string + * rep's unichar length should be stored. If + * NULL, no length is stored. */ +{ + String *stringPtr; + + SetUTF16StringFromAny(NULL, objPtr); + stringPtr = GET_STRING(objPtr); + + if (lengthPtr != NULL) { + *lengthPtr = stringPtr->numChars; + } + return stringPtr->unicode; +} +#endif + +unsigned short * TclGetUnicodeFromObj( Tcl_Obj *objPtr, /* The object to find the unicode string * for. */ @@ -676,17 +987,8 @@ TclGetUnicodeFromObj( SetStringFromAny(NULL, objPtr); stringPtr = GET_STRING(objPtr); - if (stringPtr->hasUnicode == 0) { - FillUnicodeRep(objPtr); - stringPtr = GET_STRING(objPtr); - } - if (lengthPtr != NULL) { -#if TCL_MAJOR_VERSION > 8 *lengthPtr = stringPtr->numChars; -#else - *lengthPtr = ((size_t)(unsigned)(stringPtr->numChars + 1)) - 1; -#endif } return stringPtr->unicode; } @@ -709,6 +1011,8 @@ TclGetUnicodeFromObj( *---------------------------------------------------------------------- */ +#if TCL_UTF_MAX > 3 && !defined(TCL_NO_DEPRECATED) +#undef Tcl_GetRange Tcl_Obj * Tcl_GetRange( Tcl_Obj *objPtr, /* The Tcl object to find the range of. */ @@ -716,7 +1020,53 @@ Tcl_GetRange( int last) /* Last index of the range. */ { Tcl_Obj *newObjPtr; /* The Tcl object to find the range of. */ - String *stringPtr; + int length; + + if (first < 0) { + first = 0; + } + + /* + * Optimize the case where we're really dealing with a bytearray object + * we don't need to convert to a string to perform the substring operation. + */ + + if (TclIsPureByteArray(objPtr)) { + unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &length); + + if (last < 0 || last >= length) { + last = length - 1; + } + if (last < first) { + TclNewObj(newObjPtr); + return newObjPtr; + } + return Tcl_NewByteArrayObj(bytes + first, last - first + 1); + } + + int numChars = Tcl_NumUtfChars(objPtr->bytes, objPtr->length); + + if (last >= numChars) { + last = numChars - 1; + } + if (last < first) { + TclNewObj(newObjPtr); + return newObjPtr; + } + const char *begin = Tcl_UtfAtIndex(objPtr->bytes, first); + const char *end = Tcl_UtfAtIndex(objPtr->bytes, last + 1); + return Tcl_NewStringObj(begin, end - begin); +} +#endif + +Tcl_Obj * +TclGetRange( + Tcl_Obj *objPtr, /* The Tcl object to find the range of. */ + int first, /* First index of the range. */ + int last) /* Last index of the range. */ +{ + Tcl_Obj *newObjPtr; /* The Tcl object to find the range of. */ + UniCharString *stringPtr; int length; if (first < 0) { @@ -746,7 +1096,7 @@ Tcl_GetRange( */ SetStringFromAny(NULL, objPtr); - stringPtr = GET_STRING(objPtr); + stringPtr = GET_UNICHAR_STRING(objPtr); if (stringPtr->hasUnicode == 0) { /* @@ -754,7 +1104,7 @@ Tcl_GetRange( */ if (stringPtr->numChars == -1) { - TclNumUtfChars(stringPtr->numChars, objPtr->bytes, objPtr->length); + TclNumUtfCharsM(stringPtr->numChars, objPtr->bytes, objPtr->length); } if (stringPtr->numChars == objPtr->length) { if (last < 0 || last >= stringPtr->numChars) { @@ -771,12 +1121,12 @@ Tcl_GetRange( */ SetStringFromAny(NULL, newObjPtr); - stringPtr = GET_STRING(newObjPtr); + stringPtr = GET_UNICHAR_STRING(newObjPtr); stringPtr->numChars = newObjPtr->length; return newObjPtr; } FillUnicodeRep(objPtr); - stringPtr = GET_STRING(objPtr); + stringPtr = GET_UNICHAR_STRING(objPtr); } if (last < 0 || last >= stringPtr->numChars) { last = stringPtr->numChars - 1; @@ -797,7 +1147,7 @@ Tcl_GetRange( ++last; } #endif - return Tcl_NewUnicodeObj(stringPtr->unicode + first, last - first + 1); + return TclNewUnicodeObj(stringPtr->unicode + first, last - first + 1); } /* @@ -883,7 +1233,7 @@ Tcl_SetObjLength( * representation of object, not including * terminating null byte. */ { - String *stringPtr; + UniCharString *stringPtr; if (length < 0) { /* @@ -903,7 +1253,7 @@ Tcl_SetObjLength( } SetStringFromAny(NULL, objPtr); - stringPtr = GET_STRING(objPtr); + stringPtr = GET_UNICHAR_STRING(objPtr); if (objPtr->bytes != NULL) { /* @@ -935,10 +1285,10 @@ Tcl_SetObjLength( * Changing length of pure unicode string. */ - stringCheckLimits(length); + uniCharStringCheckLimits(length); if (length > stringPtr->maxChars) { - stringPtr = stringRealloc(stringPtr, length); - SET_STRING(objPtr, stringPtr); + stringPtr = uniCharStringRealloc(stringPtr, length); + SET_UNICHAR_STRING(objPtr, stringPtr); stringPtr->maxChars = length; } @@ -988,7 +1338,7 @@ Tcl_AttemptSetObjLength( * representation of object, not including * terminating null byte. */ { - String *stringPtr; + UniCharString *stringPtr; if (length < 0) { /* @@ -1006,7 +1356,7 @@ Tcl_AttemptSetObjLength( } SetStringFromAny(NULL, objPtr); - stringPtr = GET_STRING(objPtr); + stringPtr = GET_UNICHAR_STRING(objPtr); if (objPtr->bytes != NULL) { /* @@ -1045,15 +1395,15 @@ Tcl_AttemptSetObjLength( * Changing length of pure unicode string. */ - if (length > STRING_MAXCHARS) { + if (length > UNICHAR_STRING_MAXCHARS) { return 0; } if (length > stringPtr->maxChars) { - stringPtr = stringAttemptRealloc(stringPtr, length); + stringPtr = uniCharStringAttemptRealloc(stringPtr, length); if (stringPtr == NULL) { return 0; } - SET_STRING(objPtr, stringPtr); + SET_UNICHAR_STRING(objPtr, stringPtr); stringPtr->maxChars = length; } @@ -1089,20 +1439,47 @@ Tcl_AttemptSetObjLength( *--------------------------------------------------------------------------- */ +#if !defined(TCL_NO_DEPRECATED) void Tcl_SetUnicodeObj( Tcl_Obj *objPtr, /* The object to set the string of. */ - const Tcl_UniChar *unicode, /* The unicode string used to initialize the + const unsigned short *unicode, /* The unicode string used to initialize the * object. */ int numChars) /* Number of characters in the unicode * string. */ { - if (Tcl_IsShared(objPtr)) { - Tcl_Panic("%s called with shared object", "Tcl_SetUnicodeObj"); + String *stringPtr; + + if (numChars < 0) { + numChars = 0; + + if (unicode) { + while (numChars >= 0 && unicode[numChars] != 0) { + numChars++; + } + } + stringCheckLimits(numChars); } - TclFreeInternalRep(objPtr); - SetUnicodeObj(objPtr, unicode, numChars); + + /* + * Allocate enough space for the String structure + Unicode string. + */ + + stringCheckLimits(numChars); + stringPtr = stringAlloc(numChars); + SET_STRING(objPtr, stringPtr); + objPtr->typePtr = &tclStringType; + + stringPtr->maxChars = numChars; + memcpy(stringPtr->unicode, unicode, numChars * sizeof(unsigned char)); + stringPtr->unicode[numChars] = 0; + stringPtr->numChars = numChars; + stringPtr->hasUnicode = 1; + + TclInvalidateStringRep(objPtr); + stringPtr->allocated = numChars; } +#endif static int UnicodeLength( @@ -1115,7 +1492,7 @@ UnicodeLength( numChars++; } } - stringCheckLimits(numChars); + uniCharStringCheckLimits(numChars); return numChars; } @@ -1127,7 +1504,7 @@ SetUnicodeObj( int numChars) /* Number of characters in the unicode * string. */ { - String *stringPtr; + UniCharString *stringPtr; if (numChars < 0) { numChars = UnicodeLength(unicode); @@ -1137,10 +1514,10 @@ SetUnicodeObj( * Allocate enough space for the String structure + Unicode string. */ - stringCheckLimits(numChars); - stringPtr = stringAlloc(numChars); - SET_STRING(objPtr, stringPtr); - objPtr->typePtr = &tclStringType; + uniCharStringCheckLimits(numChars); + stringPtr = uniCharStringAlloc(numChars); + SET_UNICHAR_STRING(objPtr, stringPtr); + objPtr->typePtr = &tclUniCharStringType; stringPtr->maxChars = numChars; memcpy(stringPtr->unicode, unicode, numChars * sizeof(Tcl_UniChar)); @@ -1184,7 +1561,7 @@ Tcl_AppendLimitedToObj( * object to indicate not all available bytes * at "bytes" were appended. */ { - String *stringPtr; + UniCharString *stringPtr; int toCopy = 0; int eLen = 0; @@ -1223,13 +1600,13 @@ Tcl_AppendLimitedToObj( } SetStringFromAny(NULL, objPtr); - stringPtr = GET_STRING(objPtr); + stringPtr = GET_UNICHAR_STRING(objPtr); /* If appended string starts with a continuation byte or a lower surrogate, * force objPtr to unicode representation. See [7f1162a867] */ if (bytes && ISCONTINUATION(bytes)) { - Tcl_GetUnicode(objPtr); - stringPtr = GET_STRING(objPtr); + TclGetUnicodeFromObj_(objPtr, NULL); + stringPtr = GET_UNICHAR_STRING(objPtr); } if (stringPtr->hasUnicode && stringPtr->numChars > 0) { AppendUtfToUnicodeRep(objPtr, bytes, toCopy); @@ -1241,7 +1618,7 @@ Tcl_AppendLimitedToObj( return; } - stringPtr = GET_STRING(objPtr); + stringPtr = GET_UNICHAR_STRING(objPtr); if (stringPtr->hasUnicode && stringPtr->numChars > 0) { AppendUtfToUnicodeRep(objPtr, ellipsis, eLen); } else { @@ -1296,13 +1673,13 @@ Tcl_AppendToObj( */ void -Tcl_AppendUnicodeToObj( +TclAppendUnicodeToObj( Tcl_Obj *objPtr, /* Points to the object to append to. */ const Tcl_UniChar *unicode, /* The unicode string to append to the * object. */ int length) /* Number of chars in "unicode". */ { - String *stringPtr; + UniCharString *stringPtr; if (Tcl_IsShared(objPtr)) { Tcl_Panic("%s called with shared object", "Tcl_AppendUnicodeToObj"); @@ -1313,7 +1690,7 @@ Tcl_AppendUnicodeToObj( } SetStringFromAny(NULL, objPtr); - stringPtr = GET_STRING(objPtr); + stringPtr = GET_UNICHAR_STRING(objPtr); /* * If objPtr has a valid Unicode rep, then append the "unicode" to the @@ -1328,6 +1705,34 @@ Tcl_AppendUnicodeToObj( } } +#if TCL_UTF_MAX > 3 && !defined(TCL_NO_DEPRECATED) +void +Tcl_AppendUnicodeToObj( + Tcl_Obj *objPtr, /* Points to the object to append to. */ + const unsigned short *unicode, /* The unicode string to append to the + * object. */ + int length) /* Number of chars in "unicode". */ +{ + String *stringPtr; + + if (Tcl_IsShared(objPtr)) { + Tcl_Panic("%s called with shared object", "Tcl_AppendUnicodeToObj"); + } + + if (length == 0) { + return; + } + + SetStringFromAny(NULL, objPtr); + stringPtr = GET_STRING(objPtr); + stringPtr = stringAttemptRealloc(stringPtr, stringPtr->numChars + length); + memcpy(&stringPtr->unicode[stringPtr->numChars], unicode, length); + stringPtr->maxChars = stringPtr->allocated = stringPtr->numChars += length; + stringPtr->unicode[stringPtr->numChars] = 0; + SET_STRING(objPtr, stringPtr); +} +#endif + /* *---------------------------------------------------------------------- * @@ -1353,7 +1758,7 @@ Tcl_AppendObjToObj( Tcl_Obj *objPtr, /* Points to the object to append to. */ Tcl_Obj *appendObjPtr) /* Object to append. */ { - String *stringPtr; + UniCharString *stringPtr; int length, numChars, appendNumChars = -1; const char *bytes; @@ -1426,14 +1831,14 @@ Tcl_AppendObjToObj( */ SetStringFromAny(NULL, objPtr); - stringPtr = GET_STRING(objPtr); + stringPtr = GET_UNICHAR_STRING(objPtr); /* If appended string starts with a continuation byte or a lower surrogate, * force objPtr to unicode representation. See [7f1162a867] * This fixes append-3.4, append-3.7 and utf-1.18 testcases. */ if (ISCONTINUATION(TclGetString(appendObjPtr))) { - Tcl_GetUnicode(objPtr); - stringPtr = GET_STRING(objPtr); + TclGetUnicodeFromObj_(objPtr, NULL); + stringPtr = GET_UNICHAR_STRING(objPtr); } /* * If objPtr has a valid Unicode rep, then get a Unicode string from @@ -1445,9 +1850,9 @@ Tcl_AppendObjToObj( * If appendObjPtr is not of the "String" type, don't convert it. */ - if (TclHasInternalRep(appendObjPtr, &tclStringType)) { + if (TclHasInternalRep(appendObjPtr, &tclUniCharStringType)) { Tcl_UniChar *unicode = - Tcl_GetUnicodeFromObj(appendObjPtr, &numChars); + TclGetUnicodeFromObj_(appendObjPtr, &numChars); AppendUnicodeToUnicodeRep(objPtr, unicode, numChars); } else { @@ -1466,8 +1871,8 @@ Tcl_AppendObjToObj( bytes = TclGetStringFromObj(appendObjPtr, &length); numChars = stringPtr->numChars; - if ((numChars >= 0) && TclHasInternalRep(appendObjPtr, &tclStringType)) { - String *appendStringPtr = GET_STRING(appendObjPtr); + if ((numChars >= 0) && TclHasInternalRep(appendObjPtr, &tclUniCharStringType)) { + UniCharString *appendStringPtr = GET_UNICHAR_STRING(appendObjPtr); appendNumChars = appendStringPtr->numChars; } @@ -1502,7 +1907,7 @@ AppendUnicodeToUnicodeRep( const Tcl_UniChar *unicode, /* String to append. */ int appendNumChars) /* Number of chars of "unicode" to append. */ { - String *stringPtr; + UniCharString *stringPtr; int numChars; if (appendNumChars < 0) { @@ -1513,7 +1918,7 @@ AppendUnicodeToUnicodeRep( } SetStringFromAny(NULL, objPtr); - stringPtr = GET_STRING(objPtr); + stringPtr = GET_UNICHAR_STRING(objPtr); /* * If not enough space has been allocated for the unicode rep, reallocate @@ -1524,7 +1929,7 @@ AppendUnicodeToUnicodeRep( */ numChars = stringPtr->numChars + appendNumChars; - stringCheckLimits(numChars); + uniCharStringCheckLimits(numChars); if (numChars > stringPtr->maxChars) { int offset = -1; @@ -1541,7 +1946,7 @@ AppendUnicodeToUnicodeRep( } GrowUnicodeBuffer(objPtr, numChars); - stringPtr = GET_STRING(objPtr); + stringPtr = GET_UNICHAR_STRING(objPtr); /* * Relocate unicode if needed; see above. @@ -1591,7 +1996,7 @@ AppendUnicodeToUtfRep( const Tcl_UniChar *unicode, /* String to convert to UTF. */ int numChars) /* Number of chars of "unicode" to convert. */ { - String *stringPtr = GET_STRING(objPtr); + UniCharString *stringPtr = GET_UNICHAR_STRING(objPtr); numChars = ExtendStringRepWithUnicode(objPtr, unicode, numChars); @@ -1624,7 +2029,7 @@ AppendUtfToUnicodeRep( const char *bytes, /* String to convert to Unicode. */ int numBytes) /* Number of bytes of "bytes" to convert. */ { - String *stringPtr; + UniCharString *stringPtr; if (numBytes == 0) { return; @@ -1632,7 +2037,7 @@ AppendUtfToUnicodeRep( ExtendUnicodeRepWithString(objPtr, bytes, numBytes, -1); TclInvalidateStringRep(objPtr); - stringPtr = GET_STRING(objPtr); + stringPtr = GET_UNICHAR_STRING(objPtr); stringPtr->allocated = 0; } @@ -1660,7 +2065,7 @@ AppendUtfToUtfRep( const char *bytes, /* String to append. */ int numBytes) /* Number of bytes of "bytes" to append. */ { - String *stringPtr; + UniCharString *stringPtr; int newLength, oldLength; if (numBytes == 0) { @@ -1681,7 +2086,7 @@ AppendUtfToUtfRep( } newLength = numBytes + oldLength; - stringPtr = GET_STRING(objPtr); + stringPtr = GET_UNICHAR_STRING(objPtr); if (newLength > stringPtr->allocated) { int offset = -1; @@ -2085,12 +2490,12 @@ Tcl_AppendFormatToObj( goto errorMsg; case 's': if (gotPrecision) { - numChars = Tcl_GetCharLength(segment); + numChars = TclGetCharLength(segment); if (precision < numChars) { if (precision < 1) { TclNewObj(segment); } else { - segment = Tcl_GetRange(segment, 0, precision - 1); + segment = TclGetRange(segment, 0, precision - 1); } numChars = precision; Tcl_IncrRefCount(segment); @@ -2270,7 +2675,7 @@ Tcl_AppendFormatToObj( gotZero = 0; } if (gotZero) { - length += Tcl_GetCharLength(segment); + length += TclGetCharLength(segment); if (length < width) { segmentLimit -= width - length; } @@ -2401,7 +2806,7 @@ Tcl_AppendFormatToObj( gotZero = 0; } if (gotZero) { - length += Tcl_GetCharLength(segment); + length += TclGetCharLength(segment); if (length < width) { segmentLimit -= width - length; } @@ -2512,7 +2917,7 @@ Tcl_AppendFormatToObj( } if (width>0 && numChars<0) { - numChars = Tcl_GetCharLength(segment); + numChars = TclGetCharLength(segment); } if (!gotMinus && width>0) { if (numChars < width) { @@ -2873,13 +3278,13 @@ TclGetStringStorage( Tcl_Obj *objPtr, unsigned int *sizePtr) { - String *stringPtr; + UniCharString *stringPtr; - if (!TclHasInternalRep(objPtr, &tclStringType) || objPtr->bytes == NULL) { + if (!TclHasInternalRep(objPtr, &tclUniCharStringType) || objPtr->bytes == NULL) { return TclGetStringFromObj(objPtr, (int *)sizePtr); } - stringPtr = GET_STRING(objPtr); + stringPtr = GET_UNICHAR_STRING(objPtr); *sizePtr = stringPtr->allocated; return objPtr->bytes; } @@ -2923,8 +3328,8 @@ TclStringRepeat( */ if (!binary) { - if (TclHasInternalRep(objPtr, &tclStringType)) { - String *stringPtr = GET_STRING(objPtr); + if (TclHasInternalRep(objPtr, &tclUniCharStringType)) { + UniCharString *stringPtr = GET_UNICHAR_STRING(objPtr); if (stringPtr->hasUnicode) { unichar = 1; } @@ -2936,7 +3341,7 @@ TclStringRepeat( Tcl_GetByteArrayFromObj(objPtr, &length); } else if (unichar) { /* Result will be pure Tcl_UniChar array. Pre-size it. */ - Tcl_GetUnicodeFromObj(objPtr, &length); + TclGetUnicodeFromObj_(objPtr, &length); } else { /* Result will be concat of string reps. Pre-size it. */ Tcl_GetStringFromObj(objPtr, &length); @@ -2976,7 +3381,7 @@ TclStringRepeat( */ if (!inPlace || Tcl_IsShared(objPtr)) { - objResultPtr = Tcl_NewUnicodeObj(Tcl_GetUnicode(objPtr), length); + objResultPtr = TclNewUnicodeObj(TclGetUnicodeFromObj_(objPtr, NULL), length); } else { TclInvalidateStringRep(objPtr); objResultPtr = objPtr; @@ -2987,7 +3392,7 @@ TclStringRepeat( Tcl_SetObjResult(interp, Tcl_ObjPrintf( "string size overflow: unable to alloc %" TCL_Z_MODIFIER "u bytes", - STRING_SIZE(count*length))); + UNICHAR_STRING_SIZE(count*length))); Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); } return NULL; @@ -2997,7 +3402,7 @@ TclStringRepeat( Tcl_AppendObjToObj(objResultPtr, objResultPtr); done *= 2; } - Tcl_AppendUnicodeToObj(objResultPtr, Tcl_GetUnicode(objResultPtr), + TclAppendUnicodeToObj(objResultPtr, TclGetUnicodeFromObj_(objResultPtr, NULL), (count - done) * length); } else { /* @@ -3094,7 +3499,7 @@ TclStringCat( binary = 0; if (ov > objv+1 && ISCONTINUATION(TclGetString(objPtr))) { forceUniChar = 1; - } else if ((objPtr->typePtr) && (objPtr->typePtr != &tclStringType)) { + } else if ((objPtr->typePtr) && (objPtr->typePtr != &tclUniCharStringType)) { /* Prevent shimmer of non-string types. */ allowUniChar = 0; } @@ -3102,7 +3507,7 @@ TclStringCat( } else { /* assert (objPtr->typePtr != NULL) -- stork! */ binary = 0; - if (TclHasInternalRep(objPtr, &tclStringType)) { + if (TclHasInternalRep(objPtr, &tclUniCharStringType)) { /* Have a pure Unicode value; ask to preserve it */ requestUniChar = 1; } else { @@ -3156,7 +3561,7 @@ TclStringCat( if ((objPtr->bytes == NULL) || (objPtr->length)) { int numChars; - Tcl_GetUnicodeFromObj(objPtr, &numChars); /* PANIC? */ + TclGetUnicodeFromObj_(objPtr, &numChars); /* PANIC? */ if (numChars) { last = objc - oc; if (length == 0) { @@ -3306,43 +3711,43 @@ TclStringCat( objResultPtr = *objv++; objc--; /* Ugly interface! Force resize of the unicode array. */ - Tcl_GetUnicodeFromObj(objResultPtr, &start); + TclGetUnicodeFromObj_(objResultPtr, &start); Tcl_InvalidateStringRep(objResultPtr); if (0 == Tcl_AttemptSetObjLength(objResultPtr, length)) { if (interp) { Tcl_SetObjResult(interp, Tcl_ObjPrintf( "concatenation failed: unable to alloc %" TCL_Z_MODIFIER "u bytes", - STRING_SIZE(length))); + UNICHAR_STRING_SIZE(length))); Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); } return NULL; } - dst = Tcl_GetUnicode(objResultPtr) + start; + dst = TclGetUnicodeFromObj_(objResultPtr, NULL) + start; } else { Tcl_UniChar ch = 0; /* Ugly interface! No scheme to init array size. */ - objResultPtr = Tcl_NewUnicodeObj(&ch, 0); /* PANIC? */ + objResultPtr = TclNewUnicodeObj(&ch, 0); /* PANIC? */ if (0 == Tcl_AttemptSetObjLength(objResultPtr, length)) { Tcl_DecrRefCount(objResultPtr); if (interp) { Tcl_SetObjResult(interp, Tcl_ObjPrintf( "concatenation failed: unable to alloc %" TCL_Z_MODIFIER "u bytes", - STRING_SIZE(length))); + UNICHAR_STRING_SIZE(length))); Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); } return NULL; } - dst = Tcl_GetUnicode(objResultPtr); + dst = TclGetUnicodeFromObj_(objResultPtr, NULL); } while (objc--) { Tcl_Obj *objPtr = *objv++; if ((objPtr->bytes == NULL) || (objPtr->length)) { int more; - Tcl_UniChar *src = Tcl_GetUnicodeFromObj(objPtr, &more); + Tcl_UniChar *src = TclGetUnicodeFromObj_(objPtr, &more); memcpy(dst, src, more * sizeof(Tcl_UniChar)); dst += more; } @@ -3455,8 +3860,8 @@ TclStringCmp( s1 = (char *) Tcl_GetByteArrayFromObj(value1Ptr, &s1len); s2 = (char *) Tcl_GetByteArrayFromObj(value2Ptr, &s2len); memCmpFn = memcmp; - } else if (TclHasInternalRep(value1Ptr, &tclStringType) - && TclHasInternalRep(value2Ptr, &tclStringType)) { + } else if (TclHasInternalRep(value1Ptr, &tclUniCharStringType) + && TclHasInternalRep(value2Ptr, &tclUniCharStringType)) { /* * Do a unicode-specific comparison if both of the args are of * String type. If the char length == byte length, we can do a @@ -3465,12 +3870,12 @@ TclStringCmp( */ if (nocase) { - s1 = (char *) Tcl_GetUnicodeFromObj(value1Ptr, &s1len); - s2 = (char *) Tcl_GetUnicodeFromObj(value2Ptr, &s2len); - memCmpFn = (memCmpFn_t)(void *)Tcl_UniCharNcasecmp; + s1 = (char *) TclGetUnicodeFromObj_(value1Ptr, &s1len); + s2 = (char *) TclGetUnicodeFromObj_(value2Ptr, &s2len); + memCmpFn = (memCmpFn_t)(void *)TclUniCharNcasecmp; } else { - s1len = Tcl_GetCharLength(value1Ptr); - s2len = Tcl_GetCharLength(value2Ptr); + s1len = TclGetCharLength(value1Ptr); + s2len = TclGetCharLength(value2Ptr); if ((s1len == value1Ptr->length) && (value1Ptr->bytes != NULL) && (s2len == value2Ptr->length) @@ -3479,8 +3884,8 @@ TclStringCmp( s2 = value2Ptr->bytes; memCmpFn = memcmp; } else { - s1 = (char *) Tcl_GetUnicode(value1Ptr); - s2 = (char *) Tcl_GetUnicode(value2Ptr); + s1 = (char *) TclGetUnicodeFromObj_(value1Ptr, NULL); + s2 = (char *) TclGetUnicodeFromObj_(value2Ptr, NULL); if ( #if defined(WORDS_BIGENDIAN) && (TCL_UTF_MAX > 3) 1 @@ -3492,7 +3897,7 @@ TclStringCmp( s1len *= sizeof(Tcl_UniChar); s2len *= sizeof(Tcl_UniChar); } else { - memCmpFn = (memCmpFn_t)(void *)Tcl_UniCharNcmp; + memCmpFn = (memCmpFn_t)(void *)TclUniCharNcmp; } } } @@ -3615,7 +4020,7 @@ TclStringFirst( Tcl_Obj *haystack, int start) { - int lh, ln = Tcl_GetCharLength(needle); + int lh, ln = TclGetCharLength(needle); Tcl_Obj *result; int value = -1; Tcl_UniChar *checkStr, *endStr, *uh, *un; @@ -3678,8 +4083,8 @@ TclStringFirst( * do only the well-defined Tcl_UniChar array search. */ - un = Tcl_GetUnicodeFromObj(needle, &ln); - uh = Tcl_GetUnicodeFromObj(haystack, &lh); + un = TclGetUnicodeFromObj_(needle, &ln); + uh = TclGetUnicodeFromObj_(haystack, &lh); if ((lh < ln) || (start > lh - ln)) { /* Don't start the loop if there cannot be a valid answer */ goto firstEnd; @@ -3722,7 +4127,7 @@ TclStringLast( Tcl_Obj *haystack, int last) { - int lh, ln = Tcl_GetCharLength(needle); + int lh, ln = TclGetCharLength(needle); Tcl_Obj *result; int value = -1; Tcl_UniChar *checkStr, *uh, *un; @@ -3761,8 +4166,8 @@ TclStringLast( goto lastEnd; } - uh = Tcl_GetUnicodeFromObj(haystack, &lh); - un = Tcl_GetUnicodeFromObj(needle, &ln); + uh = TclGetUnicodeFromObj_(haystack, &lh); + un = TclGetUnicodeFromObj_(needle, &ln); if (last >= lh) { last = lh - 1; @@ -3832,7 +4237,7 @@ TclStringReverse( Tcl_Obj *objPtr, int flags) { - String *stringPtr; + UniCharString *stringPtr; Tcl_UniChar ch = 0; int inPlace = flags & TCL_STRING_IN_PLACE; #if TCL_UTF_MAX < 4 @@ -3851,11 +4256,11 @@ TclStringReverse( } SetStringFromAny(NULL, objPtr); - stringPtr = GET_STRING(objPtr); + stringPtr = GET_UNICHAR_STRING(objPtr); if (stringPtr->hasUnicode) { - Tcl_UniChar *from = Tcl_GetUnicode(objPtr); - stringPtr = GET_STRING(objPtr); + Tcl_UniChar *from = TclGetUnicodeFromObj_(objPtr, NULL); + stringPtr = GET_UNICHAR_STRING(objPtr); Tcl_UniChar *src = from + stringPtr->numChars; Tcl_UniChar *to; @@ -3865,10 +4270,10 @@ TclStringReverse( * Tcl_SetObjLength into growing the unicode rep buffer. */ - objPtr = Tcl_NewUnicodeObj(&ch, 1); + objPtr = TclNewUnicodeObj(&ch, 1); Tcl_SetObjLength(objPtr, stringPtr->numChars); - to = Tcl_GetUnicode(objPtr); - stringPtr = GET_STRING(objPtr); + to = TclGetUnicodeFromObj_(objPtr, NULL); + stringPtr = GET_UNICHAR_STRING(objPtr); while (--src >= from) { #if TCL_UTF_MAX < 4 ch = *src; @@ -4097,16 +4502,16 @@ TclStringReplace( /* The traditional implementation... */ { int numChars; - Tcl_UniChar *ustring = Tcl_GetUnicodeFromObj(objPtr, &numChars); + Tcl_UniChar *ustring = TclGetUnicodeFromObj_(objPtr, &numChars); /* TODO: Is there an in-place option worth pursuing here? */ - result = Tcl_NewUnicodeObj(ustring, first); + result = TclNewUnicodeObj(ustring, first); if (insertPtr) { Tcl_AppendObjToObj(result, insertPtr); } if (first + count < numChars) { - Tcl_AppendUnicodeToObj(result, ustring + first + count, + TclAppendUnicodeToObj(result, ustring + first + count, numChars - first - count); } @@ -4136,7 +4541,7 @@ FillUnicodeRep( Tcl_Obj *objPtr) /* The object in which to fill the unicode * rep. */ { - String *stringPtr = GET_STRING(objPtr); + UniCharString *stringPtr = GET_UNICHAR_STRING(objPtr); ExtendUnicodeRepWithString(objPtr, objPtr->bytes, objPtr->length, stringPtr->numChars); @@ -4149,7 +4554,7 @@ ExtendUnicodeRepWithString( int numBytes, int numAppendChars) { - String *stringPtr = GET_STRING(objPtr); + UniCharString *stringPtr = GET_UNICHAR_STRING(objPtr); int needed, numOrigChars = 0; Tcl_UniChar *dst, unichar = 0; @@ -4157,14 +4562,14 @@ ExtendUnicodeRepWithString( numOrigChars = stringPtr->numChars; } if (numAppendChars == -1) { - TclNumUtfChars(numAppendChars, bytes, numBytes); + TclNumUtfCharsM(numAppendChars, bytes, numBytes); } needed = numOrigChars + numAppendChars; - stringCheckLimits(needed); + uniCharStringCheckLimits(needed); if (needed > stringPtr->maxChars) { GrowUnicodeBuffer(objPtr, needed); - stringPtr = GET_STRING(objPtr); + stringPtr = GET_UNICHAR_STRING(objPtr); } stringPtr->hasUnicode = 1; @@ -4218,8 +4623,8 @@ DupStringInternalRep( Tcl_Obj *copyPtr) /* Object with internal rep to set. Must not * currently have an internal rep.*/ { - String *srcStringPtr = GET_STRING(srcPtr); - String *copyStringPtr = NULL; + UniCharString *srcStringPtr = GET_UNICHAR_STRING(srcPtr); + UniCharString *copyStringPtr = NULL; if (srcStringPtr->numChars == -1) { /* @@ -4239,17 +4644,17 @@ DupStringInternalRep( } else { copyMaxChars = srcStringPtr->maxChars; } - copyStringPtr = stringAttemptAlloc(copyMaxChars); + copyStringPtr = uniCharStringAttemptAlloc(copyMaxChars); if (copyStringPtr == NULL) { copyMaxChars = srcStringPtr->numChars; - copyStringPtr = stringAlloc(copyMaxChars); + copyStringPtr = uniCharStringAlloc(copyMaxChars); } copyStringPtr->maxChars = copyMaxChars; memcpy(copyStringPtr->unicode, srcStringPtr->unicode, srcStringPtr->numChars * sizeof(Tcl_UniChar)); copyStringPtr->unicode[srcStringPtr->numChars] = 0; } else { - copyStringPtr = stringAlloc(0); + copyStringPtr = uniCharStringAlloc(0); copyStringPtr->maxChars = 0; copyStringPtr->unicode[0] = 0; } @@ -4264,8 +4669,8 @@ DupStringInternalRep( copyStringPtr->allocated = copyPtr->bytes ? copyPtr->length : 0; - SET_STRING(copyPtr, copyStringPtr); - copyPtr->typePtr = &tclStringType; + SET_UNICHAR_STRING(copyPtr, copyStringPtr); + copyPtr->typePtr = &tclUniCharStringType; } /* @@ -4290,8 +4695,8 @@ SetStringFromAny( TCL_UNUSED(Tcl_Interp *), Tcl_Obj *objPtr) /* The object to convert. */ { - if (!TclHasInternalRep(objPtr, &tclStringType)) { - String *stringPtr = stringAlloc(0); + if (!TclHasInternalRep(objPtr, &tclUniCharStringType)) { + UniCharString *stringPtr = uniCharStringAlloc(0); /* * Convert whatever we have into an untyped value. Just A String. @@ -4309,8 +4714,8 @@ SetStringFromAny( stringPtr->allocated = objPtr->length; stringPtr->maxChars = 0; stringPtr->hasUnicode = 0; - SET_STRING(objPtr, stringPtr); - objPtr->typePtr = &tclStringType; + SET_UNICHAR_STRING(objPtr, stringPtr); + objPtr->typePtr = &tclUniCharStringType; } return TCL_OK; } @@ -4337,7 +4742,7 @@ static void UpdateStringOfString( Tcl_Obj *objPtr) /* Object with string rep to update. */ { - String *stringPtr = GET_STRING(objPtr); + UniCharString *stringPtr = GET_UNICHAR_STRING(objPtr); /* * This routine is only called when we need to generate the @@ -4369,7 +4774,7 @@ ExtendStringRepWithUnicode( int i, origLength, size = 0; char *dst; - String *stringPtr = GET_STRING(objPtr); + UniCharString *stringPtr = GET_UNICHAR_STRING(objPtr); if (numChars < 0) { numChars = UnicodeLength(unicode); @@ -4423,7 +4828,7 @@ ExtendStringRepWithUnicode( * * FreeStringInternalRep -- * - * Deallocate the storage associated with a String data object's internal + * Deallocate the storage associated with a (UniChar)String data object's internal * representation. * * Results: diff --git a/generic/tclStringRep.h b/generic/tclStringRep.h index 27c3fc2..faa2c2c 100644 --- a/generic/tclStringRep.h +++ b/generic/tclStringRep.h @@ -39,11 +39,6 @@ * Unicode reps of the String object with fewer mallocs. To optimize string * length and indexing operations, this structure also stores the number of * characters (same of UTF and Unicode!) once that value has been computed. - * - * Under normal configurations, what Tcl calls "Unicode" is actually UTF-16 - * restricted to the Basic Multilingual Plane (i.e. U+00000 to U+0FFFF). This - * can be officially modified by altering the definition of Tcl_UniChar in - * tcl.h, but do not do that unless you are sure what you're doing! */ typedef struct { @@ -59,15 +54,15 @@ typedef struct { * space allocated for the unicode array. */ int hasUnicode; /* Boolean determining whether the string has * a Unicode representation. */ - Tcl_UniChar unicode[TCLFLEXARRAY]; /* The array of Unicode chars. The actual size + unsigned short unicode[TCLFLEXARRAY]; /* The array of Unicode chars. The actual size * of this field depends on the 'maxChars' * field above. */ } String; #define STRING_MAXCHARS \ - (int)(((size_t)UINT_MAX - offsetof(String, unicode))/sizeof(Tcl_UniChar) - 1) + (int)(((size_t)UINT_MAX - offsetof(String, unicode))/sizeof(unsigned short) - 1) #define STRING_SIZE(numChars) \ - (offsetof(String, unicode) + sizeof(Tcl_UniChar) + ((numChars) * sizeof(Tcl_UniChar))) + (offsetof(String, unicode) + sizeof(unsigned short) + ((numChars) * sizeof(unsigned short))) #define stringCheckLimits(numChars) \ do { \ if ((numChars) < 0 || (numChars) > STRING_MAXCHARS) { \ diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c index 257c3ce..7d04481 100644 --- a/generic/tclStubInit.c +++ b/generic/tclStubInit.c @@ -48,6 +48,8 @@ #undef Tcl_UniCharCaseMatch #undef Tcl_UniCharLen #undef Tcl_UniCharNcmp +#undef Tcl_GetRange +#undef Tcl_GetUniChar #undef Tcl_DumpActiveMemory #undef Tcl_ValidateAllMemory #undef Tcl_FindHashEntry @@ -76,24 +78,31 @@ #undef Tcl_MacOSXOpenBundleResources #undef TclWinConvertWSAError #undef TclWinConvertError +#undef Tcl_GetCharLength +#undef Tcl_UtfAtIndex + #if defined(_WIN32) || defined(__CYGWIN__) #define TclWinConvertWSAError (void (*)(DWORD))(void *)Tcl_WinConvertError #define TclWinConvertError (void (*)(DWORD))(void *)Tcl_WinConvertError #endif -#if TCL_UTF_MAX > 3 +#if TCL_UTF_MAX > 3 && defined(TCL_NO_DEPRECATED) static void uniCodePanic(void) { - Tcl_Panic("This extension uses a deprecated function, not available now: Tcl is compiled with -DTCL_UTF_MAX==%d", TCL_UTF_MAX); + Tcl_Panic("Tcl is compiled without the the UTF16 compatibility layer (-DTCL_NO_DEPRECATED)"); } -# define Tcl_GetUnicode (Tcl_UniChar *(*)(Tcl_Obj *))(void *)uniCodePanic -# define Tcl_GetUnicodeFromObj (Tcl_UniChar *(*)(Tcl_Obj *, int *))(void *)uniCodePanic -# define Tcl_NewUnicodeObj (Tcl_Obj *(*)(const int *, int))(void *)uniCodePanic -# define Tcl_SetUnicodeObj (void(*)(Tcl_Obj *, const Tcl_UniChar *, int))(void *)uniCodePanic -# define Tcl_AppendUnicodeToObj (void(*)(Tcl_Obj *, const Tcl_UniChar *, int))(void *)uniCodePanic -# define Tcl_UniCharNcasecmp (int(*)(const Tcl_UniChar *, const Tcl_UniChar *, unsigned long))(void *)uniCodePanic -# define Tcl_UniCharCaseMatch (int(*)(const Tcl_UniChar *, const Tcl_UniChar *, int))(void *)uniCodePanic -# define Tcl_UniCharNcmp (int(*)(const Tcl_UniChar *, const Tcl_UniChar *, unsigned long))(void *)uniCodePanic +# define Tcl_GetUnicode (unsigned short *(*)(Tcl_Obj *))(void *)uniCodePanic +# define Tcl_GetUnicodeFromObj (unsigned short *(*)(Tcl_Obj *, int *))(void *)uniCodePanic +# define Tcl_NewUnicodeObj (Tcl_Obj *(*)(const unsigned short *, int))(void *)uniCodePanic +# define Tcl_SetUnicodeObj (void(*)(Tcl_Obj *, const unsigned short *, int))(void *)uniCodePanic +# define Tcl_AppendUnicodeToObj (void(*)(Tcl_Obj *, const unsigned short *, int))(void *)uniCodePanic +# define Tcl_UtfAtIndex (const char *(*)(const char *, int))(void *)uniCodePanic +# define Tcl_GetCharLength (int(*)(Tcl_Obj *))(void *)uniCodePanic +# define Tcl_UniCharNcmp (int(*)(const unsigned short *, const unsigned short *, unsigned long))(void *)uniCodePanic +# define Tcl_UniCharNcasecmp (int(*)(const unsigned short *, const unsigned short *, unsigned long))(void *)uniCodePanic +# define Tcl_UniCharCaseMatch (int(*)(const unsigned short *, const unsigned short *, int))(void *)uniCodePanic +# define Tcl_GetRange (Tcl_Obj *(*)(Tcl_Obj *, int, int))(void *)uniCodePanic +# define Tcl_GetUniChar (int(*)(Tcl_Obj *, int))(void *)uniCodePanic #endif #define TclUtfCharComplete UtfCharComplete @@ -683,8 +692,8 @@ static int utfNcasecmp(const char *s1, const char *s2, unsigned int n){ # define Tcl_SetExitProc 0 # define Tcl_SetPanicProc 0 # define Tcl_FindExecutable 0 -# define Tcl_GetUnicode 0 #if TCL_UTF_MAX < 4 +# define Tcl_GetUnicode 0 # define Tcl_AppendUnicodeToObj 0 # define Tcl_UniCharCaseMatch 0 # define Tcl_UniCharNcasecmp 0 @@ -1950,6 +1959,11 @@ const TclStubs tclStubs = { 0, /* 666 */ 0, /* 667 */ Tcl_UniCharLen, /* 668 */ + TclNumUtfChars, /* 669 */ + TclGetCharLength, /* 670 */ + TclUtfAtIndex, /* 671 */ + TclGetRange, /* 672 */ + TclGetUniChar, /* 673 */ }; /* !END!: Do not edit above this line. */ diff --git a/generic/tclTest.c b/generic/tclTest.c index 0ce5e83..cbd1f70 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -16,10 +16,13 @@ */ #undef STATIC_BUILD +#undef BUILD_tcl #ifndef USE_TCL_STUBS # define USE_TCL_STUBS #endif -#ifndef TCL_NO_DEPRECATED +#ifdef TCL_NO_DEPRECATED +# define TCL_UTF_MAX 4 +#else # define TCL_NO_DEPRECATED #endif #include "tclInt.h" @@ -6969,7 +6972,7 @@ TestUtfNextCmd( int objc, Tcl_Obj *const objv[]) { - size_t numBytes; + int numBytes; char *bytes; const char *result, *first; char buffer[32]; diff --git a/generic/tclTestObj.c b/generic/tclTestObj.c index a235002..223eb98 100644 --- a/generic/tclTestObj.c +++ b/generic/tclTestObj.c @@ -13,7 +13,7 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. */ - +#undef BUILD_tcl #ifndef USE_TCL_STUBS # define USE_TCL_STUBS #endif @@ -1073,8 +1073,9 @@ TestobjCmd( Tcl_SetObjResult(interp, Tcl_NewStringObj("none", -1)); } else { typeName = objv[2]->typePtr->name; + if (!strcmp(typeName, "utf32string")) typeName = "string"; #ifndef TCL_WIDE_INT_IS_LONG - if (!strcmp(typeName, "wideInt")) typeName = "int"; + else if (!strcmp(typeName, "wideInt")) typeName = "int"; #endif Tcl_SetObjResult(interp, Tcl_NewStringObj(typeName, -1)); } @@ -1153,7 +1154,7 @@ TeststringobjCmd( int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ { - Tcl_UniChar *unicode; + unsigned short *unicode; size_t varIndex; int size, option, i; Tcl_WideInt length; @@ -1263,10 +1264,14 @@ TeststringobjCmd( goto wrongNumArgs; } if (varPtr[varIndex] != NULL) { - Tcl_ConvertToType(NULL, varPtr[varIndex], - Tcl_GetObjType("string")); - strPtr = (String *)varPtr[varIndex]->internalRep.twoPtrValue.ptr1; - length = (int) strPtr->allocated; + const Tcl_ObjType *objType = Tcl_GetObjType("string"); + if (objType != NULL) { + Tcl_ConvertToType(NULL, varPtr[varIndex], objType); + strPtr = (String *)varPtr[varIndex]->internalRep.twoPtrValue.ptr1; + length = (int) strPtr->allocated; + } else { + length = -1; + } } else { length = -1; } @@ -1317,10 +1322,14 @@ TeststringobjCmd( goto wrongNumArgs; } if (varPtr[varIndex] != NULL) { - Tcl_ConvertToType(NULL, varPtr[varIndex], - Tcl_GetObjType("string")); - strPtr = (String *)varPtr[varIndex]->internalRep.twoPtrValue.ptr1; - length = strPtr->maxChars; + const Tcl_ObjType *objType = Tcl_GetObjType("string"); + if (objType != NULL) { + Tcl_ConvertToType(NULL, varPtr[varIndex],objType); + strPtr = (String *)varPtr[varIndex]->internalRep.twoPtrValue.ptr1; + length = strPtr->maxChars; + } else { + length = -1; + } } else { length = -1; } diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 169f240..82adf65 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -799,7 +799,7 @@ Tcl_UtfCharComplete( */ int -Tcl_NumUtfChars( +TclNumUtfChars( const char *src, /* The UTF-8 string to measure. */ int length) /* The length of the string in bytes, or -1 * for strlen(string). */ @@ -850,6 +850,61 @@ Tcl_NumUtfChars( return i; } +#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED) +#undef Tcl_NumUtfChars +int +Tcl_NumUtfChars( + const char *src, /* The UTF-8 string to measure. */ + int length) /* The length of the string in bytes, or -1 + * for strlen(string). */ +{ + unsigned short ch = 0; + int i = 0; + + if (length < 0) { + /* string is NUL-terminated, so TclUtfToUniChar calls are safe. */ + while ((*src != '\0') && (i < INT_MAX)) { + src += Tcl_UtfToChar16(src, &ch); + i++; + } + } else { + /* Will return value between 0 and length. No overflow checks. */ + + /* Pointer to the end of string. Never read endPtr[0] */ + const char *endPtr = src + length; + /* Pointer to last byte where optimization still can be used */ + const char *optPtr = endPtr - 4; + + /* + * Optimize away the call in this loop. Justified because... + * when (src <= optPtr), (endPtr - src) >= (endPtr - optPtr) + * By initialization above (endPtr - optPtr) = TCL_UTF_MAX + * So (endPtr - src) >= TCL_UTF_MAX, and passing that to + * Tcl_UtfCharComplete we know will cause return of 1. + */ + while (src <= optPtr + /* && Tcl_UtfCharComplete(src, endPtr - src) */ ) { + src += Tcl_UtfToChar16(src, &ch); + i++; + } + /* Loop over the remaining string where call must happen */ + while (src < endPtr) { + if (Tcl_UtfCharComplete(src, endPtr - src)) { + src += Tcl_UtfToChar16(src, &ch); + } else { + /* + * src points to incomplete UTF-8 sequence + * Treat first byte as character and count it + */ + src++; + } + i++; + } + } + return i; +} +#endif + /* *--------------------------------------------------------------------------- * @@ -1127,22 +1182,20 @@ Tcl_UniCharAtIndex( const char *src, /* The UTF-8 string to dereference. */ int index) /* The position of the desired character. */ { - Tcl_UniChar ch = 0; + unsigned short ch = 0; int i = 0; if (index < 0) { return -1; } while (index-- > 0) { - i = TclUtfToUniChar(src, &ch); + i = Tcl_UtfToChar16(src, &ch); src += i; } -#if TCL_UTF_MAX < 4 if ((ch >= 0xD800) && (i < 3)) { /* Index points at character following high Surrogate */ return -1; } -#endif TclUtfToUCS4(src, &i); return i; } @@ -1166,27 +1219,56 @@ Tcl_UniCharAtIndex( *--------------------------------------------------------------------------- */ +#if TCL_UTF_MAX < 4 +# undef Tcl_UtfToUniChar +# define Tcl_UtfToUniChar Tcl_UtfToChar16 +#endif + const char * -Tcl_UtfAtIndex( +TclUtfAtIndex( const char *src, /* The UTF-8 string. */ int index) /* The position of the desired character. */ { - Tcl_UniChar ch = 0; + Tcl_UniChar ch = 0; int len = 0; while (index-- > 0) { - len = TclUtfToUniChar(src, &ch); + len = (Tcl_UtfToUniChar)(src, &ch); src += len; } #if TCL_UTF_MAX < 4 if ((ch >= 0xD800) && (len < 3)) { /* Index points at character following high Surrogate */ - src += TclUtfToUniChar(src, &ch); + src += (Tcl_UtfToUniChar)(src, &ch); } #endif return src; } +#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED) +#undef Tcl_UtfAtIndex +const char * +Tcl_UtfAtIndex( + const char *src, /* The UTF-8 string. */ + int index) /* The position of the desired character. */ +{ + unsigned short ch = 0; + int len = 0; + + while (index-- > 0) { + len = Tcl_UtfToChar16(src, &ch); + src += len; + } + if ((ch >= 0xD800) && (len < 3)) { + /* Index points at character following high Surrogate */ + src += Tcl_UtfToChar16(src, &ch); + } + return src; +} + + +#endif + /* *--------------------------------------------------------------------------- * @@ -1849,7 +1931,7 @@ Tcl_UniCharLen( */ int -Tcl_UniCharNcmp( +TclUniCharNcmp( const Tcl_UniChar *ucs, /* Unicode string to compare to uct. */ const Tcl_UniChar *uct, /* Unicode string ucs is compared to. */ unsigned long numChars) /* Number of unichars to compare. */ @@ -1868,21 +1950,47 @@ Tcl_UniCharNcmp( for ( ; numChars != 0; ucs++, uct++, numChars--) { if (*ucs != *uct) { -#if TCL_UTF_MAX < 4 + return (*ucs - *uct); + } + } + return 0; +#endif /* WORDS_BIGENDIAN */ +} + +#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED) +int +Tcl_UniCharNcmp( + const unsigned short *ucs, /* Unicode string to compare to uct. */ + const unsigned short *uct, /* Unicode string ucs is compared to. */ + unsigned long numChars) /* Number of unichars to compare. */ +{ +#if defined(WORDS_BIGENDIAN) && (TCL_UTF_MAX > 3) + /* + * We are definitely on a big-endian machine; memcmp() is safe + */ + + return memcmp(ucs, uct, numChars*sizeof(Tcl_UniChar)); + +#else /* !WORDS_BIGENDIAN */ + /* + * We can't simply call memcmp() because that is not lexically correct. + */ + + for ( ; numChars != 0; ucs++, uct++, numChars--) { + if (*ucs != *uct) { /* special case for handling upper surrogates */ if (((*ucs & 0xFC00) == 0xD800) && ((*uct & 0xFC00) != 0xD800)) { return 1; } else if (((*uct & 0xFC00) == 0xD800)) { return -1; } -#endif return (*ucs - *uct); } } return 0; #endif /* WORDS_BIGENDIAN */ } - +#endif /* *---------------------------------------------------------------------- * @@ -1902,31 +2010,51 @@ Tcl_UniCharNcmp( */ int -Tcl_UniCharNcasecmp( +TclUniCharNcasecmp( const Tcl_UniChar *ucs, /* Unicode string to compare to uct. */ const Tcl_UniChar *uct, /* Unicode string ucs is compared to. */ unsigned long numChars) /* Number of unichars to compare. */ { for ( ; numChars != 0; numChars--, ucs++, uct++) { if (*ucs != *uct) { - Tcl_UniChar lcs = Tcl_UniCharToLower(*ucs); - Tcl_UniChar lct = Tcl_UniCharToLower(*uct); + int lcs = Tcl_UniCharToLower(*ucs); + int lct = Tcl_UniCharToLower(*uct); + + if (lcs != lct) { + return (lcs - lct); + } + } + } + return 0; +} + +#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED) +int +Tcl_UniCharNcasecmp( + const unsigned short *ucs, /* Unicode string to compare to uct. */ + const unsigned short *uct, /* Unicode string ucs is compared to. */ + unsigned long numChars) /* Number of unichars to compare. */ +{ + for ( ; numChars != 0; numChars--, ucs++, uct++) { + if (*ucs != *uct) { + unsigned short lcs = Tcl_UniCharToLower(*ucs); + unsigned short lct = Tcl_UniCharToLower(*uct); if (lcs != lct) { -#if TCL_UTF_MAX < 4 /* special case for handling upper surrogates */ if (((lcs & 0xFC00) == 0xD800) && ((lct & 0xFC00) != 0xD800)) { return 1; } else if (((lct & 0xFC00) == 0xD800)) { return -1; } -#endif return (lcs - lct); } } } return 0; } +#endif + /* *---------------------------------------------------------------------- @@ -2290,14 +2418,182 @@ Tcl_UniCharIsWordChar( */ int -Tcl_UniCharCaseMatch( +TclUniCharCaseMatch( const Tcl_UniChar *uniStr, /* Unicode String. */ const Tcl_UniChar *uniPattern, /* Pattern, which may contain special * characters. */ int nocase) /* 0 for case sensitive, 1 for insensitive */ { - Tcl_UniChar ch1 = 0, p; + int ch1 = 0, p; + + while (1) { + p = *uniPattern; + + /* + * See if we're at the end of both the pattern and the string. If so, + * we succeeded. If we're at the end of the pattern but not at the end + * of the string, we failed. + */ + + if (p == 0) { + return (*uniStr == 0); + } + if ((*uniStr == 0) && (p != '*')) { + return 0; + } + + /* + * Check for a "*" as the next pattern character. It matches any + * substring. We handle this by skipping all the characters up to the + * next matching one in the pattern, and then calling ourselves + * recursively for each postfix of string, until either we match or we + * reach the end of the string. + */ + + if (p == '*') { + /* + * Skip all successive *'s in the pattern + */ + + while (*(++uniPattern) == '*') { + /* empty body */ + } + p = *uniPattern; + if (p == 0) { + return 1; + } + if (nocase) { + p = Tcl_UniCharToLower(p); + } + while (1) { + /* + * Optimization for matching - cruise through the string + * quickly if the next char in the pattern isn't a special + * character + */ + + if ((p != '[') && (p != '?') && (p != '\\')) { + if (nocase) { + while (*uniStr && (p != *uniStr) + && (p != Tcl_UniCharToLower(*uniStr))) { + uniStr++; + } + } else { + while (*uniStr && (p != *uniStr)) { + uniStr++; + } + } + } + if (TclUniCharCaseMatch(uniStr, uniPattern, nocase)) { + return 1; + } + if (*uniStr == 0) { + return 0; + } + uniStr++; + } + } + + /* + * Check for a "?" as the next pattern character. It matches any + * single character. + */ + + if (p == '?') { + uniPattern++; + uniStr++; + continue; + } + + /* + * Check for a "[" as the next pattern character. It is followed by a + * list of characters that are acceptable, or by a range (two + * characters separated by "-"). + */ + + if (p == '[') { + int startChar, endChar; + + uniPattern++; + ch1 = (nocase ? Tcl_UniCharToLower(*uniStr) : *uniStr); + uniStr++; + while (1) { + if ((*uniPattern == ']') || (*uniPattern == 0)) { + return 0; + } + startChar = (nocase ? Tcl_UniCharToLower(*uniPattern) + : *uniPattern); + uniPattern++; + if (*uniPattern == '-') { + uniPattern++; + if (*uniPattern == 0) { + return 0; + } + endChar = (nocase ? Tcl_UniCharToLower(*uniPattern) + : *uniPattern); + uniPattern++; + if (((startChar <= ch1) && (ch1 <= endChar)) + || ((endChar <= ch1) && (ch1 <= startChar))) { + /* + * Matches ranges of form [a-z] or [z-a]. + */ + break; + } + } else if (startChar == ch1) { + break; + } + } + while (*uniPattern != ']') { + if (*uniPattern == 0) { + uniPattern--; + break; + } + uniPattern++; + } + uniPattern++; + continue; + } + + /* + * If the next pattern character is '\', just strip off the '\' so we + * do exact matching on the character that follows. + */ + + if (p == '\\') { + if (*(++uniPattern) == '\0') { + return 0; + } + } + + /* + * There's no special character. Just make sure that the next bytes of + * each string match. + */ + + if (nocase) { + if (Tcl_UniCharToLower(*uniStr) != + Tcl_UniCharToLower(*uniPattern)) { + return 0; + } + } else if (*uniStr != *uniPattern) { + return 0; + } + uniStr++; + uniPattern++; + } +} + +#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED) +int +Tcl_UniCharCaseMatch( + const unsigned short *uniStr, /* Unicode String. */ + const unsigned short *uniPattern, + /* Pattern, which may contain special + * characters. */ + int nocase) /* 0 for case sensitive, 1 for insensitive */ +{ + unsigned short ch1 = 0, p; while (1) { p = *uniPattern; @@ -2385,7 +2681,7 @@ Tcl_UniCharCaseMatch( */ if (p == '[') { - Tcl_UniChar startChar, endChar; + unsigned short startChar, endChar; uniPattern++; ch1 = (nocase ? Tcl_UniCharToLower(*uniStr) : *uniStr); @@ -2455,7 +2751,9 @@ Tcl_UniCharCaseMatch( uniPattern++; } } +#endif + /* *---------------------------------------------------------------------- * @@ -2680,7 +2978,7 @@ TclUtfToUCS4( int *ucs4Ptr) /* Filled with the UCS4 codepoint represented * by the UTF-8 string. */ { - /* Make use of the #undef Tcl_UtfToUniChar above, which already handles UCS4. */ +# undef Tcl_UtfToUniChar return Tcl_UtfToUniChar(src, ucs4Ptr); } diff --git a/generic/tclUtil.c b/generic/tclUtil.c index 66d1009..3537ecc 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -2591,11 +2591,11 @@ TclStringMatchObj( trivial = nocase ? 0 : TclMatchIsTrivial(TclGetString(ptnObj)); */ - if (TclHasInternalRep(strObj, &tclStringType) || (strObj->typePtr == NULL)) { + if (TclHasInternalRep(strObj, &tclUniCharStringType) || (strObj->typePtr == NULL)) { Tcl_UniChar *udata, *uptn; - udata = Tcl_GetUnicodeFromObj(strObj, &length); - uptn = Tcl_GetUnicodeFromObj(ptnObj, &plen); + udata = TclGetUnicodeFromObj_(strObj, &length); + uptn = TclGetUnicodeFromObj_(ptnObj, &plen); match = TclUniCharMatch(udata, length, uptn, plen, flags); } else if (TclIsPureByteArray(strObj) && TclIsPureByteArray(ptnObj) && !flags) { diff --git a/tests/obj.test b/tests/obj.test index 4fa8d3a..7563422 100644 --- a/tests/obj.test +++ b/tests/obj.test @@ -19,11 +19,13 @@ if {"::tcltest" ni [namespace children]} { ::tcltest::loadTestedCommands catch [list package require -exact tcl::test [info patchlevel]] +package require tcltests + testConstraint testobj [llength [info commands testobj]] testConstraint longIs32bit [expr {$tcl_platform(wordSize) == 4}] testConstraint wideIs64bit [expr {wide(0x8000000000000000) < 0}] -test obj-1.1 {Tcl_AppendAllObjTypes, and InitTypeTable, Tcl_RegisterObjType} testobj { +test obj-1.1 {Tcl_AppendAllObjTypes, and InitTypeTable, Tcl_RegisterObjType} {testobj deprecated} { set r 1 foreach {t} { bytearray diff --git a/tests/string.test b/tests/string.test index 203d0c6..6863c23 100644 --- a/tests/string.test +++ b/tests/string.test @@ -422,7 +422,7 @@ test string-4.16.$noComp {string first, normal string vs pure unicode string} -b # Representation checks are canaries run {list [representationpoke $s] [representationpoke $m] \ [string first $m $s]} -} -result {{string 1} {string 0} 2} +} -match glob -result {{*string 1} {*string 0} 2} test string-4.17.$noComp {string first, corner case} -body { run {string first a aaa 4294967295} } -result {-1} diff --git a/tests/stringObj.test b/tests/stringObj.test index abe02b2..0aa9a47 100644 --- a/tests/stringObj.test +++ b/tests/stringObj.test @@ -25,8 +25,9 @@ testConstraint testobj [llength [info commands testobj]] testConstraint testbytestring [llength [info commands testbytestring]] testConstraint testdstring [llength [info commands testdstring]] testConstraint tip389 [expr {[string length \U010000] == 2}] - -test stringObj-1.1 {string type registration} testobj { +testConstraint utf32 [expr {[string length [format %c 0x10000]] == 1}] + +test stringObj-1.1 {string type registration} {testobj deprecated} { set t [testobj types] set first [string first "string" $t] set result [expr {$first >= 0}] @@ -57,27 +58,27 @@ test stringObj-3.2 {Tcl_SetStringObj, existing non-"empty string" object} testob lappend result [testobj refcount 1] } {{} 512 foo string 2} -test stringObj-4.1 {Tcl_SetObjLength procedure, string gets shorter} testobj { +test stringObj-4.1 {Tcl_SetObjLength procedure, string gets shorter} {testobj utf32 deprecated} { testobj freeallvars teststringobj set 1 test teststringobj setlength 1 3 list [teststringobj length 1] [teststringobj length2 1] \ [teststringobj get 1] -} {3 4 tes} -test stringObj-4.2 {Tcl_SetObjLength procedure, string gets longer} testobj { +} {3 3 tes} +test stringObj-4.2 {Tcl_SetObjLength procedure, string gets longer} {testobj deprecated} { testobj freeallvars teststringobj set 1 abcdef teststringobj setlength 1 10 list [teststringobj length 1] [teststringobj length2 1] } {10 10} -test stringObj-4.3 {Tcl_SetObjLength procedure, string gets longer} testobj { +test stringObj-4.3 {Tcl_SetObjLength procedure, string gets longer} {testobj utf32 deprecated} { testobj freeallvars teststringobj set 1 abcdef teststringobj append 1 xyzq -1 list [teststringobj length 1] [teststringobj length2 1] \ [teststringobj get 1] -} {10 20 abcdefxyzq} -test stringObj-4.4 {Tcl_SetObjLength procedure, "expty string", length 0} testobj { +} {10 10 abcdefxyzq} +test stringObj-4.4 {Tcl_SetObjLength procedure, "expty string", length 0} {testobj deprecated} { testobj freeallvars testobj newobj 1 teststringobj setlength 1 0 @@ -97,7 +98,7 @@ test stringObj-5.2 {Tcl_AppendToObj procedure, length calculation} testobj { teststringobj append 1 123 -1 teststringobj get 1 } {x y bbCC123} -test stringObj-5.3 {Tcl_AppendToObj procedure, reallocating space} testobj { +test stringObj-5.3 {Tcl_AppendToObj procedure, reallocating space} {testobj utf32 deprecated} { testobj freeallvars teststringobj set 1 xyz teststringobj setlength 1 15 @@ -109,7 +110,7 @@ test stringObj-5.3 {Tcl_AppendToObj procedure, reallocating space} testobj { teststringobj append 1 abcdef -1 lappend result [teststringobj length 1] [teststringobj length2 1] \ [teststringobj get 1] -} {15 15 16 32 xy12345678abcdef} +} {15 15 16 16 xy12345678abcdef} test stringObj-6.1 {Tcl_AppendStringsToObj procedure, type conversion} testobj { testobj freeallvars @@ -135,13 +136,13 @@ test stringObj-6.4 {Tcl_AppendStringsToObj procedure, counting space} testobj { teststringobj appendstrings 1 { 123 } abcdefg list [teststringobj length 1] [teststringobj get 1] } {15 {abc 123 abcdefg}} -test stringObj-6.5 {Tcl_AppendStringsToObj procedure, don't double space if initial string empty} testobj { +test stringObj-6.5 {Tcl_AppendStringsToObj procedure, don't double space if initial string empty} {testobj utf32 deprecated} { testobj freeallvars testobj newobj 1 teststringobj appendstrings 1 123 abcdefg list [teststringobj length 1] [teststringobj length2 1] [teststringobj get 1] -} {10 20 123abcdefg} -test stringObj-6.6 {Tcl_AppendStringsToObj procedure, space reallocation} testobj { +} {10 10 123abcdefg} +test stringObj-6.6 {Tcl_AppendStringsToObj procedure, space reallocation} {testobj deprecated} { testobj freeallvars teststringobj set 1 abc teststringobj setlength 1 10 @@ -150,7 +151,7 @@ test stringObj-6.6 {Tcl_AppendStringsToObj procedure, space reallocation} testob list [teststringobj length 1] [teststringobj length2 1] \ [teststringobj get 1] } {10 10 ab34567890} -test stringObj-6.7 {Tcl_AppendStringsToObj procedure, space reallocation} testobj { +test stringObj-6.7 {Tcl_AppendStringsToObj procedure, space reallocation} {testobj utf32 deprecated} { testobj freeallvars teststringobj set 1 abc teststringobj setlength 1 10 @@ -158,8 +159,8 @@ test stringObj-6.7 {Tcl_AppendStringsToObj procedure, space reallocation} testob teststringobj appendstrings 1 34567890x list [teststringobj length 1] [teststringobj length2 1] \ [teststringobj get 1] -} {11 22 ab34567890x} -test stringObj-6.8 {Tcl_AppendStringsToObj procedure, object totally empty} testobj { +} {11 11 ab34567890x} +test stringObj-6.8 {Tcl_AppendStringsToObj procedure, object totally empty} {testobj deprecated} { testobj freeallvars testobj newobj 1 teststringobj appendstrings 1 {} @@ -172,14 +173,14 @@ test stringObj-6.9 {Tcl_AppendStringToObj, pure unicode} testobj { teststringobj get 1 } adcfoobarsoom -test stringObj-7.1 {SetStringFromAny procedure} testobj { +test stringObj-7.1 {SetStringFromAny procedure} {testobj utf32 deprecated} { testobj freeallvars teststringobj set2 1 [list a b] teststringobj append 1 x -1 list [teststringobj length 1] [teststringobj length2 1] \ [teststringobj get 1] -} {4 8 {a bx}} -test stringObj-7.2 {SetStringFromAny procedure, null object} testobj { +} {4 4 {a bx}} +test stringObj-7.2 {SetStringFromAny procedure, null object} {testobj deprecated} { testobj freeallvars testobj newobj 1 teststringobj appendstrings 1 {} @@ -197,7 +198,7 @@ test stringObj-7.4 {SetStringFromAny called with string obj} testobj { [string length $x] [testobj objtype $x] } {6 string 6 string} -test stringObj-8.1 {DupStringInternalRep procedure} testobj { +test stringObj-8.1 {DupStringInternalRep procedure} {testobj utf32 deprecated} { testobj freeallvars teststringobj set 1 {} teststringobj append 1 abcde -1 @@ -206,7 +207,7 @@ test stringObj-8.1 {DupStringInternalRep procedure} testobj { [teststringobj maxchars 1] [teststringobj get 1] \ [teststringobj length 2] [teststringobj length2 2] \ [teststringobj maxchars 2] [teststringobj get 2] -} {5 10 0 abcde 5 5 0 abcde} +} {5 5 5 abcde 5 5 5 abcde} test stringObj-8.2 {DupUnicodeInternalRep, mixed width chars} testobj { set x abc\xEF\xBF\xAEghi string length $x diff --git a/tests/utf.test b/tests/utf.test index 477216c..c0d64e2 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -19,7 +19,7 @@ catch [list package require -exact tcl::test [info patchlevel]] testConstraint ucs2 [expr {[format %c 0x010000] eq "\uFFFD"}] testConstraint fullutf [expr {[format %c 0x010000] ne "\uFFFD"}] testConstraint utf16 [expr {[string length [format %c 0x10000]] == 2}] -testConstraint ucs4 [expr {[testConstraint fullutf] +testConstraint utf32 [expr {[testConstraint fullutf] && [string length [format %c 0x10000]] == 1}] testConstraint Uesc [expr {"\U0041" eq "A"}] @@ -131,7 +131,7 @@ test utf-2.8.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {ucs2 testb test utf-2.8.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} utf16 { string length 𐀀 } 2 -test utf-2.8.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} ucs4 { +test utf-2.8.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} utf32 { string length 𐀀 } 1 test utf-2.9.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {ucs2 testbytestring} { @@ -140,7 +140,7 @@ test utf-2.9.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {ucs2 testb test utf-2.9.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} utf16 { string length \U10FFFF } 2 -test utf-2.9.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} ucs4 { +test utf-2.9.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} utf32 { string length \U10FFFF } 1 test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} testbytestring { @@ -194,7 +194,7 @@ test utf-4.11 {Tcl_NumUtfChars: 3 bytes of 4-byte UTF-8 characater} {testnumutfc test utf-4.12.0 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring ucs2} { testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end } 2 -test utf-4.12.1 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring ucs4} { +test utf-4.12.1 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring utf32} { testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end } 1 test utf-4.13 {Tcl_NumUtfChars: end of string} {testnumutfchars testbytestring} { @@ -878,7 +878,7 @@ test utf-8.4 {Tcl_UniCharAtIndex: index > 0} { test utf-8.5.0 {Tcl_UniCharAtIndex: high surrogate} ucs2 { string index \uD842 0 } \uD842 -test utf-8.5.1 {Tcl_UniCharAtIndex: high surrogate} ucs4 { +test utf-8.5.1 {Tcl_UniCharAtIndex: high surrogate} utf32 { string index \uD842 0 } \uD842 test utf-8.5.2 {Tcl_UniCharAtIndex: high surrogate} utf16 { @@ -890,7 +890,7 @@ test utf-8.6 {Tcl_UniCharAtIndex: low surrogate} { test utf-8.7.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { string index \uD83D\uDE00G 0 } \uD83D -test utf-8.7.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { +test utf-8.7.1 {Tcl_UniCharAtIndex: Emoji} utf32 { string index 😀G 0 } 😀 test utf-8.7.2 {Tcl_UniCharAtIndex: Emoji} utf16 { @@ -899,7 +899,7 @@ test utf-8.7.2 {Tcl_UniCharAtIndex: Emoji} utf16 { test utf-8.8.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { string index \uD83D\uDE00G 1 } \uDE00 -test utf-8.8.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { +test utf-8.8.1 {Tcl_UniCharAtIndex: Emoji} utf32 { string index 😀G 1 } G test utf-8.8.2 {Tcl_UniCharAtIndex: Emoji} utf16 { @@ -908,7 +908,7 @@ test utf-8.8.2 {Tcl_UniCharAtIndex: Emoji} utf16 { test utf-8.9.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { string index \uD83D\uDE00G 2 } G -test utf-8.9.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { +test utf-8.9.1 {Tcl_UniCharAtIndex: Emoji} utf32 { string index 😀G 2 } {} test utf-8.9.2 {Tcl_UniCharAtIndex: Emoji} utf16 { @@ -917,7 +917,7 @@ test utf-8.9.2 {Tcl_UniCharAtIndex: Emoji} utf16 { test utf-8.10.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { string index 😀G 0 } \uFFFD -test utf-8.10.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { +test utf-8.10.1 {Tcl_UniCharAtIndex: Emoji} utf32 { string index 😀G 0 } 😀 test utf-8.10.2 {Tcl_UniCharAtIndex: Emoji} utf16 { @@ -926,7 +926,7 @@ test utf-8.10.2 {Tcl_UniCharAtIndex: Emoji} utf16 { test utf-8.11.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { string index 😀G 1 } G -test utf-8.11.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { +test utf-8.11.1 {Tcl_UniCharAtIndex: Emoji} utf32 { string index 😀G 1 } G test utf-8.11.2 {Tcl_UniCharAtIndex: Emoji} utf16 { @@ -935,7 +935,7 @@ test utf-8.11.2 {Tcl_UniCharAtIndex: Emoji} utf16 { test utf-8.12.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { string index 😀G 2 } {} -test utf-8.12.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { +test utf-8.12.1 {Tcl_UniCharAtIndex: Emoji} utf32 { string index 😀G 2 } {} test utf-8.12.2 {Tcl_UniCharAtIndex: Emoji} utf16 { @@ -951,7 +951,7 @@ test utf-9.2 {Tcl_UtfAtIndex: index > 0} { test utf-9.3.0 {Tcl_UtfAtIndex: index = 0, Emoji} ucs2 { string range \uD83D\uDE00G 0 0 } \uD83D -test utf-9.3.1 {Tcl_UtfAtIndex: index = 0, Emoji} ucs4 { +test utf-9.3.1 {Tcl_UtfAtIndex: index = 0, Emoji} utf32 { string range 😀G 0 0 } 😀 test utf-9.3.2 {Tcl_UtfAtIndex: index = 0, Emoji} utf16 { @@ -960,7 +960,7 @@ test utf-9.3.2 {Tcl_UtfAtIndex: index = 0, Emoji} utf16 { test utf-9.4.0 {Tcl_UtfAtIndex: index > 0, Emoji} ucs2 { string range \uD83D\uDE00G 1 1 } \uDE00 -test utf-9.4.1 {Tcl_UtfAtIndex: index > 0, Emoji} ucs4 { +test utf-9.4.1 {Tcl_UtfAtIndex: index > 0, Emoji} utf32 { string range 😀G 1 1 } G test utf-9.4.2 {Tcl_UtfAtIndex: index > 0, Emoji} utf16 { @@ -969,7 +969,7 @@ test utf-9.4.2 {Tcl_UtfAtIndex: index > 0, Emoji} utf16 { test utf-9.5.0 {Tcl_UtfAtIndex: index > 0, Emoji} ucs2 { string range \uD83D\uDE00G 2 2 } G -test utf-9.5.1 {Tcl_UtfAtIndex: index > 0, Emoji} ucs4 { +test utf-9.5.1 {Tcl_UtfAtIndex: index > 0, Emoji} utf32 { string range 😀G 2 2 } {} test utf-9.5.2 {Tcl_UtfAtIndex: index > 0, Emoji} utf16 { @@ -978,7 +978,7 @@ test utf-9.5.2 {Tcl_UtfAtIndex: index > 0, Emoji} utf16 { test utf-9.6.0 {Tcl_UtfAtIndex: index = 0, Emoji} ucs2 { string range 😀G 0 0 } \uFFFD -test utf-9.6.1 {Tcl_UtfAtIndex: index = 0, Emoji} ucs4 { +test utf-9.6.1 {Tcl_UtfAtIndex: index = 0, Emoji} utf32 { string range 😀G 0 0 } 😀 test utf-9.6.2 {Tcl_UtfAtIndex: index = 0, Emoji} utf16 { @@ -987,7 +987,7 @@ test utf-9.6.2 {Tcl_UtfAtIndex: index = 0, Emoji} utf16 { test utf-9.7.0 {Tcl_UtfAtIndex: index > 0, Emoji} ucs2 { string range 😀G 1 1 } G -test utf-9.7.1 {Tcl_UtfAtIndex: index > 0, Emoji} ucs4 { +test utf-9.7.1 {Tcl_UtfAtIndex: index > 0, Emoji} utf32 { string range 😀G 1 1 } G test utf-9.7.2 {Tcl_UtfAtIndex: index > 0, Emoji} utf16 { @@ -996,7 +996,7 @@ test utf-9.7.2 {Tcl_UtfAtIndex: index > 0, Emoji} utf16 { test utf-9.8.0 {Tcl_UtfAtIndex: index > 0, Emoji} ucs2 { string range 😀G 2 2 } {} -test utf-9.8.1 {Tcl_UtfAtIndex: index > 0, Emoji} ucs4 { +test utf-9.8.1 {Tcl_UtfAtIndex: index > 0, Emoji} utf32 { string range 😀G 2 2 } {} test utf-9.8.2 {Tcl_UtfAtIndex: index > 0, Emoji} utf16 { @@ -1227,10 +1227,10 @@ test utf-19.1 {TclUniCharLen} -body { unset -nocomplain foo } -result {1 4} -test utf-20.1 {TclUniCharNcmp} ucs4 { +test utf-20.1 {TclUniCharNcmp} utf32 { string compare [string range [format %c 0xFFFF] 0 0] [string range [format %c 0x10000] 0 0] } -1 -test utf-20.2 {[4c591fa487] TclUniCharNcmp/TclUtfNcmp} { +test utf-20.2 {[4c591fa487] TclUniCharNcmp/TclUtfNcmp} utf32 { set one [format %c 0xFFFF] set two [format %c 0x10000] set first [string compare $one $two] @@ -1357,10 +1357,10 @@ UniCharCaseCmpTest < a b UniCharCaseCmpTest > b a UniCharCaseCmpTest > B a UniCharCaseCmpTest > aBcB abca -UniCharCaseCmpTest < \uFFFF [format %c 0x10000] ucs4 -UniCharCaseCmpTest < \uFFFF \U10000 ucs4 -UniCharCaseCmpTest > [format %c 0x10000] \uFFFF ucs4 -UniCharCaseCmpTest > \U10000 \uFFFF ucs4 +UniCharCaseCmpTest < \uFFFF [format %c 0x10000] utf32 +UniCharCaseCmpTest < \uFFFF \U10000 utf32 +UniCharCaseCmpTest > [format %c 0x10000] \uFFFF utf32 +UniCharCaseCmpTest > \U10000 \uFFFF utf32 test utf-26.1 {Tcl_UniCharDString} -setup { diff --git a/win/makefile.vc b/win/makefile.vc index 1ef64f2..6ff6118 100644 --- a/win/makefile.vc +++ b/win/makefile.vc @@ -52,7 +52,7 @@ # turn on the 64-bit compiler, if your SDK has it.
#
# Basic macros and options usable on the commandline (see rules.vc for more info):
-# OPTS=msvcrt,noembed,nothreads,pdbs,profile,static,symbols,thrdalloc,time64bit,unchecked,utfmax,none
+# OPTS=msvcrt,noembed,nothreads,pdbs,profile,static,symbols,thrdalloc,time64bit,unchecked,utf16,none
# Sets special options for the core. The default is for none.
# Any combination of the above may be used (comma separated).
# 'none' will over-ride everything to nothing.
@@ -80,7 +80,7 @@ # unchecked = Allows a symbols build to not use the debug
# enabled runtime (msvcrt.dll not msvcrtd.dll
# or libcmt.lib not libcmtd.lib).
-# utfmax = Forces a build using UTF-32 representation internally.
+# utf16 = Forces a build using UTF-16 representation internally.
#
# STATS=compdbg,memdbg,none
# Sets optional memory and bytecode compiler debugging code added
diff --git a/win/rules.vc b/win/rules.vc index a571899..3107756 100644 --- a/win/rules.vc +++ b/win/rules.vc @@ -816,8 +816,7 @@ DOTSEPARATED=$(DOTSEPARATED:b=.) # configuration (ignored for Tcl itself)
# _USE_64BIT_TIME_T - forces a build using 64-bit time_t for 32-bit build
# (CRT library should support this, not needed for Tcl 9.x)
-# TCL_UTF_MAX=4 - forces a build allowing 4-byte UTF-8 sequences internally.
-# (Not needed for Tcl 9.x)
+# TCL_UTF_MAX=3 - forces a build using UTF-16 internally (not recommended).
# Further, LINKERFLAGS are modified based on above.
# Default values for all the above
@@ -884,9 +883,9 @@ USE_THREAD_ALLOC= 0 _USE_64BIT_TIME_T = 1
!endif
-!if [nmakehlp -f $(OPTS) "utfmax"]
-!message *** Force allowing 4-byte UTF-8 sequences internally
-TCL_UTF_MAX = 4
+!if [nmakehlp -f $(OPTS) "utf16"]
+!message *** Force UTF-16 internally
+TCL_UTF_MAX = 3
!endif
!endif
@@ -1423,13 +1422,13 @@ OPTDEFINES = $(OPTDEFINES) /DNO_STRTOI64=1 !if "$(_USE_64BIT_TIME_T)" == "1"
OPTDEFINES = $(OPTDEFINES) /D_USE_64BIT_TIME_T=1
!endif
-!if "$(TCL_UTF_MAX)" == "4"
-OPTDEFINES = $(OPTDEFINES) /DTCL_UTF_MAX=4
-!endif
# _ATL_XP_TARGETING - Newer SDK's need this to build for XP
COMPILERFLAGS = /D_ATL_XP_TARGETING
!endif
+!if "$(TCL_UTF_MAX)" == "3"
+OPTDEFINES = $(OPTDEFINES) /DTCL_UTF_MAX=3
+!endif
# Like the TEA system only set this non empty for non-Tk extensions
# Note: some extensions use PACKAGE_NAME and others use PACKAGE_TCLNAME
|