diff options
Diffstat (limited to 'generic/tclStringObj.c')
-rw-r--r-- | generic/tclStringObj.c | 900 |
1 files changed, 738 insertions, 162 deletions
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index c04944d..e51b98a 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -36,15 +36,7 @@ #include "tclInt.h" #include "tommath.h" - -/* - * Set COMPAT to 1 to restore the shimmering patterns to those of Tcl 8.5. - * This is an escape hatch in case the changes have some unexpected unwelcome - * impact on performance. If things go well, this mechanism can go away when - * post-8.6 development begins. - */ - -#define COMPAT 0 +#include "tclStringRep.h" /* * Prototypes for functions defined later in this file: @@ -89,60 +81,6 @@ const Tcl_ObjType tclStringType = { UpdateStringOfString, /* updateStringProc */ SetStringFromAny /* setFromAnyProc */ }; - -/* - * The following structure is the internal rep for a String object. It keeps - * track of how much memory has been used and how much has been allocated for - * the Unicode and UTF string to enable growing and shrinking of the UTF and - * Unicode reps of the String object with fewer mallocs. To optimize string - * length and indexing operations, this structure also stores the number of - * characters (same of UTF and Unicode!) once that value has been computed. - * - * Under normal configurations, what Tcl calls "Unicode" is actually UTF-16 - * restricted to the Basic Multilingual Plane (i.e. U+00000 to U+0FFFF). This - * can be officially modified by altering the definition of Tcl_UniChar in - * tcl.h, but do not do that unless you are sure what you're doing! - */ - -typedef struct String { - int numChars; /* The number of chars in the string. -1 means - * this value has not been calculated. >= 0 - * means that there is a valid Unicode rep, or - * that the number of UTF bytes == the number - * of chars. */ - int allocated; /* The amount of space actually allocated for - * the UTF string (minus 1 byte for the - * termination char). */ - int maxChars; /* Max number of chars that can fit in the - * space allocated for the unicode array. */ - int hasUnicode; /* Boolean determining whether the string has - * a Unicode representation. */ - Tcl_UniChar unicode[1]; /* The array of Unicode chars. The actual size - * of this field depends on the 'maxChars' - * field above. */ -} String; - -#define STRING_MAXCHARS \ - (int)(((size_t)UINT_MAX - sizeof(String))/sizeof(Tcl_UniChar)) -#define STRING_SIZE(numChars) \ - (sizeof(String) + ((numChars) * sizeof(Tcl_UniChar))) -#define stringCheckLimits(numChars) \ - if ((numChars) < 0 || (numChars) > STRING_MAXCHARS) { \ - Tcl_Panic("max length for a Tcl unicode value (%d chars) exceeded", \ - STRING_MAXCHARS); \ - } -#define stringAttemptAlloc(numChars) \ - (String *) attemptckalloc((unsigned) STRING_SIZE(numChars) ) -#define stringAlloc(numChars) \ - (String *) ckalloc((unsigned) STRING_SIZE(numChars) ) -#define stringRealloc(ptr, numChars) \ - (String *) ckrealloc((ptr), (unsigned) STRING_SIZE(numChars) ) -#define stringAttemptRealloc(ptr, numChars) \ - (String *) attemptckrealloc((ptr), (unsigned) STRING_SIZE(numChars) ) -#define GET_STRING(objPtr) \ - ((String *) (objPtr)->internalRep.twoPtrValue.ptr1) -#define SET_STRING(objPtr, stringPtr) \ - ((objPtr)->internalRep.twoPtrValue.ptr1 = (void *) (stringPtr)) /* * TCL STRING GROWTH ALGORITHM @@ -188,7 +126,7 @@ GrowStringBuffer( int flag) { /* - * Pre-conditions: + * Pre-conditions: * objPtr->typePtr == &tclStringType * needed > stringPtr->allocated * flag || objPtr->bytes != NULL @@ -198,7 +136,7 @@ GrowStringBuffer( char *ptr = NULL; int attempt; - if (objPtr->bytes == tclEmptyStringRep) { + if (objPtr->bytes == &tclEmptyString) { objPtr->bytes = NULL; } if (flag == 0 || stringPtr->allocated > 0) { @@ -238,7 +176,7 @@ GrowUnicodeBuffer( int needed) { /* - * Pre-conditions: + * Pre-conditions: * objPtr->typePtr == &tclStringType * needed > stringPtr->maxChars * needed < STRING_MAXCHARS @@ -471,6 +409,15 @@ Tcl_GetCharLength( int numChars; /* + * Quick, no-shimmer return for short string reps. + */ + + if ((objPtr->bytes) && (objPtr->length < 2)) { + /* 0 bytes -> 0 chars; 1 byte -> 1 char */ + return objPtr->length; + } + + /* * Optimize the case where we're really dealing with a bytearray object * without string representation; we don't need to convert to a string to * perform the get-length operation. @@ -498,18 +445,6 @@ Tcl_GetCharLength( if (numChars == -1) { TclNumUtfChars(numChars, objPtr->bytes, objPtr->length); stringPtr->numChars = numChars; - -#if COMPAT - if (numChars < objPtr->length) { - /* - * Since we've just computed the number of chars, and not all UTF - * chars are 1-byte long, go ahead and populate the unicode - * string. - */ - - FillUnicodeRep(objPtr); - } -#endif } return numChars; } @@ -832,7 +767,7 @@ Tcl_SetObjLength( /* * Need to enlarge the buffer. */ - if (objPtr->bytes == tclEmptyStringRep) { + if (objPtr->bytes == &tclEmptyString) { objPtr->bytes = ckalloc(length + 1); } else { objPtr->bytes = ckrealloc(objPtr->bytes, length + 1); @@ -938,7 +873,7 @@ Tcl_AttemptSetObjLength( char *newBytes; - if (objPtr->bytes == tclEmptyStringRep) { + if (objPtr->bytes == &tclEmptyString) { newBytes = attemptckalloc(length + 1); } else { newBytes = attemptckrealloc(objPtr->bytes, length + 1); @@ -1226,11 +1161,7 @@ Tcl_AppendUnicodeToObj( * objPtr's string rep. */ - if (stringPtr->hasUnicode -#if COMPAT - && stringPtr->numChars > 0 -#endif - ) { + if (stringPtr->hasUnicode) { AppendUnicodeToUnicodeRep(objPtr, unicode, length); } else { AppendUnicodeToUtfRep(objPtr, unicode, length); @@ -1251,6 +1182,8 @@ Tcl_AppendUnicodeToObj( * Side effects: * The string rep of appendObjPtr is appended to the string * representation of objPtr. + * IMPORTANT: This routine does not and MUST NOT shimmer appendObjPtr. + * Callers are counting on that. * *---------------------------------------------------------------------- */ @@ -1269,7 +1202,7 @@ Tcl_AppendObjToObj( * that appending nothing to anything leaves that starting anything... */ - if (appendObjPtr->bytes == tclEmptyStringRep) { + if (appendObjPtr->bytes == &tclEmptyString) { return; } @@ -1280,7 +1213,7 @@ Tcl_AppendObjToObj( * information; this is a special-case optimization only. */ - if ((TclIsPureByteArray(objPtr) || objPtr->bytes == tclEmptyStringRep) + if ((TclIsPureByteArray(objPtr) || objPtr->bytes == &tclEmptyString) && TclIsPureByteArray(appendObjPtr)) { /* @@ -1334,11 +1267,7 @@ Tcl_AppendObjToObj( * appendObjPtr and append it. */ - if (stringPtr->hasUnicode -#if COMPAT - && stringPtr->numChars > 0 -#endif - ) { + if (stringPtr->hasUnicode) { /* * If appendObjPtr is not of the "String" type, don't convert it. */ @@ -1371,11 +1300,7 @@ Tcl_AppendObjToObj( AppendUtfToUtfRep(objPtr, bytes, length); - if (numChars >= 0 && appendNumChars >= 0 -#if COMPAT - && appendNumChars == length -#endif - ) { + if (numChars >= 0 && appendNumChars >= 0) { stringPtr->numChars = numChars + appendNumChars; } } @@ -1499,14 +1424,6 @@ AppendUnicodeToUtfRep( if (stringPtr->numChars != -1) { stringPtr->numChars += numChars; } - -#if COMPAT - /* - * Invalidate the unicode rep. - */ - - stringPtr->hasUnicode = 0; -#endif } /* @@ -1753,8 +1670,11 @@ Tcl_AppendFormatToObj( while (*format != '\0') { char *end; - int gotMinus, gotHash, gotZero, gotSpace, gotPlus, sawFlag; - int width, gotPrecision, precision, useShort, useWide, useBig; + int gotMinus = 0, gotHash = 0, gotZero = 0, gotSpace = 0, gotPlus = 0; + int width, gotPrecision, precision, sawFlag, useShort = 0, useBig = 0; +#ifndef TCL_WIDE_INT_IS_LONG + int useWide = 0; +#endif int newXpg, numChars, allocSegment = 0, segmentLimit, segmentNumBytes; Tcl_Obj *segment; Tcl_UniChar ch; @@ -1830,7 +1750,6 @@ Tcl_AppendFormatToObj( * Step 2. Set of flags. */ - gotMinus = gotHash = gotZero = gotSpace = gotPlus = 0; sawFlag = 1; do { switch (ch) { @@ -1931,7 +1850,6 @@ Tcl_AppendFormatToObj( * Step 5. Length modifier. */ - useShort = useWide = useBig = 0; if (ch == 'h') { useShort = 1; format += step; @@ -1948,6 +1866,34 @@ Tcl_AppendFormatToObj( useWide = 1; #endif } + } else if (ch == 'I') { + if ((format[1] == '6') && (format[2] == '4')) { + format += (step + 2); + step = Tcl_UtfToUniChar(format, &ch); +#ifndef TCL_WIDE_INT_IS_LONG + useWide = 1; +#endif + } else if ((format[1] == '3') && (format[2] == '2')) { + format += (step + 2); + step = Tcl_UtfToUniChar(format, &ch); + } else { + format += step; + step = Tcl_UtfToUniChar(format, &ch); + } + } else if ((ch == 't') || (ch == 'z')) { + format += step; + step = Tcl_UtfToUniChar(format, &ch); +#ifndef TCL_WIDE_INT_IS_LONG + if (sizeof(size_t) > sizeof(int)) { + useWide = 1; + } +#endif + } else if ((ch == 'q') ||(ch == 'j')) { + format += step; + step = Tcl_UtfToUniChar(format, &ch); +#ifndef TCL_WIDE_INT_IS_LONG + useWide = 1; +#endif } format += step; @@ -1993,13 +1939,9 @@ Tcl_AppendFormatToObj( } case 'u': - if (useBig) { - msg = "unsigned bignum format is invalid"; - errCode = "BADUNSIGNED"; - goto errorMsg; - } case 'd': case 'o': + case 'p': case 'x': case 'X': case 'b': { @@ -2010,11 +1952,26 @@ Tcl_AppendFormatToObj( mp_int big; int toAppend, isNegative = 0; +#ifndef TCL_WIDE_INT_IS_LONG + if (ch == 'p') { + useWide = 1; + } +#endif if (useBig) { if (Tcl_GetBignumFromObj(interp, segment, &big) != TCL_OK) { goto error; } isNegative = (mp_cmp_d(&big, 0) == MP_LT); + if (ch == 'u') { + if (isNegative) { + msg = "unsigned bignum format is invalid"; + errCode = "BADUNSIGNED"; + goto errorMsg; + } else { + ch = 'd'; + } + } +#ifndef TCL_WIDE_INT_IS_LONG } else if (useWide) { if (Tcl_GetWideIntFromObj(NULL, segment, &w) != TCL_OK) { Tcl_Obj *objPtr; @@ -2029,6 +1986,7 @@ Tcl_AppendFormatToObj( Tcl_DecrRefCount(objPtr); } isNegative = (w < (Tcl_WideInt) 0); +#endif } else if (TclGetLongFromObj(NULL, segment, &l) != TCL_OK) { if (Tcl_GetWideIntFromObj(NULL, segment, &w) != TCL_OK) { Tcl_Obj *objPtr; @@ -2068,13 +2026,14 @@ Tcl_AppendFormatToObj( segmentLimit -= 1; } - if (gotHash) { + if (gotHash || (ch == 'p')) { switch (ch) { case 'o': Tcl_AppendToObj(segment, "0", 1); segmentLimit -= 1; precision--; break; + case 'p': case 'x': case 'X': Tcl_AppendToObj(segment, "0x", 2); @@ -2095,8 +2054,10 @@ Tcl_AppendFormatToObj( if (useShort) { pure = Tcl_NewIntObj((int) s); +#ifndef TCL_WIDE_INT_IS_LONG } else if (useWide) { pure = Tcl_NewWideIntObj(w); +#endif } else if (useBig) { pure = Tcl_NewBignumObj(&big); } else { @@ -2153,6 +2114,7 @@ Tcl_AppendFormatToObj( case 'u': case 'o': + case 'p': case 'x': case 'X': case 'b': { @@ -2179,6 +2141,7 @@ Tcl_AppendFormatToObj( numDigits++; us /= base; } +#ifndef TCL_WIDE_INT_IS_LONG } else if (useWide) { Tcl_WideUInt uw = (Tcl_WideUInt) w; @@ -2187,6 +2150,7 @@ Tcl_AppendFormatToObj( numDigits++; uw /= base; } +#endif } else if (useBig && big.used) { int leftover = (big.used * DIGIT_BIT) % numBits; mp_digit mask = (~(mp_digit)0) << (DIGIT_BIT-leftover); @@ -2314,7 +2278,7 @@ Tcl_AppendFormatToObj( p += sprintf(p, "%d", width); if (width > length) { length = width; - } + } } if (gotPrecision) { *p++ = '.'; @@ -2379,7 +2343,7 @@ Tcl_AppendFormatToObj( } } - Tcl_GetStringFromObj(segment, &segmentNumBytes); + TclGetStringFromObj(segment, &segmentNumBytes); if (segmentNumBytes > limit) { if (allocSegment) { Tcl_DecrRefCount(segment); @@ -2542,6 +2506,7 @@ AppendPrintfToObjVA( case 'u': case 'd': case 'o': + case 'p': case 'x': case 'X': seekingConversion = 0; @@ -2555,6 +2520,10 @@ AppendPrintfToObjVA( Tcl_ListObjAppendElement(NULL, list, Tcl_NewLongObj( va_arg(argList, long))); break; + case 2: + Tcl_ListObjAppendElement(NULL, list, Tcl_NewWideIntObj( + va_arg(argList, Tcl_WideInt))); + break; } break; case 'e': @@ -2583,9 +2552,32 @@ AppendPrintfToObjVA( gotPrecision = 1; p++; break; - /* TODO: support for wide (and bignum?) arguments */ + /* TODO: support for bignum arguments */ case 'l': - size = 1; + ++size; + p++; + break; + case 't': + case 'z': + if (sizeof(size_t) == sizeof(Tcl_WideInt)) { + size = 2; + } + p++; + break; + case 'j': + case 'q': + size = 2; + p++; + break; + case 'I': + if (p[1]=='6' && p[2]=='4') { + p += 2; + size = 2; + } else if (p[1]=='3' && p[2]=='2') { + p += 2; + } else if (sizeof(size_t) == sizeof(Tcl_WideInt)) { + size = 2; + } p++; break; case 'h': @@ -2692,6 +2684,616 @@ TclGetStringStorage( *sizePtr = stringPtr->allocated; return objPtr->bytes; } + +/* + *--------------------------------------------------------------------------- + * + * TclStringRepeat -- + * + * Performs the [string repeat] function. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * Writes to *objPtrPtr the address of Tcl_Obj that is concatenation + * of count copies of the value in objPtr. + * + *--------------------------------------------------------------------------- + */ + +int +TclStringRepeat( + Tcl_Interp *interp, + Tcl_Obj *objPtr, + int count, + Tcl_Obj **objPtrPtr) +{ + Tcl_Obj *objResultPtr; + int length = 0, unichar = 0, done = 1; + int binary = TclIsPureByteArray(objPtr); + + /* assert (count >= 2) */ + + /* + * Analyze to determine what representation result should be. + * GOALS: Avoid shimmering & string rep generation. + * Produce pure bytearray when possible. + * Error on overflow. + */ + + if (!binary) { + if (objPtr->typePtr == &tclStringType) { + String *stringPtr = GET_STRING(objPtr); + if (stringPtr->hasUnicode) { + unichar = 1; + } + } + } + + if (binary) { + /* Result will be pure byte array. Pre-size it */ + Tcl_GetByteArrayFromObj(objPtr, &length); + } else if (unichar) { + /* Result will be pure Tcl_UniChar array. Pre-size it. */ + Tcl_GetUnicodeFromObj(objPtr, &length); + } else { + /* Result will be concat of string reps. Pre-size it. */ + Tcl_GetStringFromObj(objPtr, &length); + } + + if (length == 0) { + /* Any repeats of empty is empty. */ + *objPtrPtr = objPtr; + return TCL_OK; + } + + if (count > INT_MAX/length) { + if (interp) { + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "max size for a Tcl value (%d bytes) exceeded", INT_MAX)); + Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); + } + return TCL_ERROR; + } + + if (binary) { + /* Efficiently produce a pure byte array result */ + objResultPtr = Tcl_IsShared(objPtr) ? Tcl_DuplicateObj(objPtr) + : objPtr; + + Tcl_SetByteArrayLength(objResultPtr, count*length); /* PANIC? */ + Tcl_SetByteArrayLength(objResultPtr, length); + while (count - done > done) { + Tcl_AppendObjToObj(objResultPtr, objResultPtr); + done *= 2; + } + TclAppendBytesToByteArray(objResultPtr, + Tcl_GetByteArrayFromObj(objResultPtr, NULL), + (count - done) * length); + } else if (unichar) { + /* Efficiently produce a pure Tcl_UniChar array result */ + if (Tcl_IsShared(objPtr)) { + objResultPtr = Tcl_NewUnicodeObj(Tcl_GetUnicode(objPtr), length); + } else { + TclInvalidateStringRep(objPtr); + objResultPtr = objPtr; + } + + if (0 == Tcl_AttemptSetObjLength(objResultPtr, count*length)) { + if (interp) { + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "string size overflow: unable to alloc %" + TCL_LL_MODIFIER "u bytes", + (Tcl_WideUInt)STRING_SIZE(count*length))); + Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); + } + return TCL_ERROR; + } + Tcl_SetObjLength(objResultPtr, length); + while (count - done > done) { + Tcl_AppendObjToObj(objResultPtr, objResultPtr); + done *= 2; + } + Tcl_AppendUnicodeToObj(objResultPtr, Tcl_GetUnicode(objResultPtr), + (count - done) * length); + } else { + /* Efficiently concatenate string reps */ + if (Tcl_IsShared(objPtr)) { + objResultPtr = Tcl_NewStringObj(Tcl_GetString(objPtr), length); + } else { + TclFreeIntRep(objPtr); + objResultPtr = objPtr; + } + if (0 == Tcl_AttemptSetObjLength(objResultPtr, count*length)) { + if (interp) { + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "string size overflow: unable to alloc %u bytes", + count*length)); + Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); + } + return TCL_ERROR; + } + Tcl_SetObjLength(objResultPtr, length); + while (count - done > done) { + Tcl_AppendObjToObj(objResultPtr, objResultPtr); + done *= 2; + } + Tcl_AppendToObj(objResultPtr, Tcl_GetString(objResultPtr), + (count - done) * length); + } + *objPtrPtr = objResultPtr; + return TCL_OK; +} + +/* + *--------------------------------------------------------------------------- + * + * TclStringCatObjv -- + * + * Performs the [string cat] function. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * Writes to *objPtrPtr the address of Tcl_Obj that is concatenation + * of all objc values in objv. + * + *--------------------------------------------------------------------------- + */ + +int +TclStringCatObjv( + Tcl_Interp *interp, + int inPlace, + int objc, + Tcl_Obj * const objv[], + Tcl_Obj **objPtrPtr) +{ + Tcl_Obj *objPtr, *objResultPtr, * const *ov; + int oc, length = 0, binary = 1, first = 0; + int allowUniChar = 1, requestUniChar = 0; + + /* assert (objc >= 2) */ + + /* + * Analyze to determine what representation result should be. + * GOALS: Avoid shimmering & string rep generation. + * Produce pure bytearray when possible. + * Error on overflow. + */ + + ov = objv, oc = objc; + while (oc-- && (binary || allowUniChar)) { + objPtr = *ov++; + + if (objPtr->bytes) { + /* Value has a string rep. */ + if (objPtr->length) { + /* + * Non-empty string rep. Not a pure bytearray, so we + * won't create a pure bytearray + */ + binary = 0; + if ((objPtr->typePtr) && (objPtr->typePtr != &tclStringType)) { + /* Prevent shimmer of non-string types. */ + allowUniChar = 0; + } + } + } else { + /* assert (objPtr->typePtr != NULL) -- stork! */ + if (TclIsPureByteArray(objPtr)) { + allowUniChar = 0; + } else { + binary = 0; + if (objPtr->typePtr == &tclStringType) { + /* Have a pure Unicode value; ask to preserve it */ + requestUniChar = 1; + } else { + /* Have another type; prevent shimmer */ + allowUniChar = 0; + } + } + } + } + + if (binary) { + /* Result will be pure byte array. Pre-size it */ + ov = objv; oc = objc; + while (oc-- && (length >= 0)) { + objPtr = *ov++; + + if (objPtr->bytes == NULL) { + int numBytes; + + Tcl_GetByteArrayFromObj(objPtr, &numBytes); /* PANIC? */ + if (length == 0) { + first = objc - oc - 1; + } + length += numBytes; + } + } + } else if (allowUniChar && requestUniChar) { + /* Result will be pure Tcl_UniChar array. Pre-size it. */ + ov = objv; oc = objc; + while (oc-- && (length >= 0)) { + objPtr = *ov++; + + if ((objPtr->bytes == NULL) || (objPtr->length)) { + int numChars; + + Tcl_GetUnicodeFromObj(objPtr, &numChars); /* PANIC? */ + if (length == 0) { + first = objc - oc - 1; + } + length += numChars; + } + } + } else { + /* Result will be concat of string reps. Pre-size it. */ + ov = objv; oc = objc; + while (oc-- && (length >= 0)) { + int numBytes; + + objPtr = *ov++; + + Tcl_GetStringFromObj(objPtr, &numBytes); /* PANIC? */ + if ((length == 0) && numBytes) { + first = objc - oc - 1; + } + length += numBytes; + } + } + + if (length < 0) { + if (interp) { + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "max size for a Tcl value (%d bytes) exceeded", INT_MAX)); + Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); + } + return TCL_ERROR; + } + + if (length == 0) { + /* Total length of zero means every value has length zero */ + *objPtrPtr = objv[0]; + return TCL_OK; + } + + objv += first; objc -= first; + + if (binary) { + /* Efficiently produce a pure byte array result */ + unsigned char *dst; + + /* + * Broken interface! Byte array value routines offer no way + * to handle failure to allocate enough space. Following + * stanza may panic. + */ + if (inPlace && !Tcl_IsShared(*objv)) { + int start; + + objResultPtr = *objv++; objc--; + Tcl_GetByteArrayFromObj(objResultPtr, &start); + dst = Tcl_SetByteArrayLength(objResultPtr, length) + start; + } else { + objResultPtr = Tcl_NewByteArrayObj(NULL, length); + dst = Tcl_SetByteArrayLength(objResultPtr, length); + } + while (objc--) { + Tcl_Obj *objPtr = *objv++; + + if (objPtr->bytes == NULL) { + int more; + unsigned char *src = Tcl_GetByteArrayFromObj(objPtr, &more); + memcpy(dst, src, (size_t) more); + dst += more; + } + } + } else if (allowUniChar && requestUniChar) { + /* Efficiently produce a pure Tcl_UniChar array result */ + Tcl_UniChar *dst; + + if (inPlace && !Tcl_IsShared(*objv)) { + int start; + + objResultPtr = *objv++; objc--; + + /* Ugly interface! Force resize of the unicode array. */ + Tcl_GetUnicodeFromObj(objResultPtr, &start); + Tcl_InvalidateStringRep(objResultPtr); + if (0 == Tcl_AttemptSetObjLength(objResultPtr, length)) { + if (interp) { + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "concatenation failed: unable to alloc %" + TCL_LL_MODIFIER "u bytes", + (Tcl_WideUInt)STRING_SIZE(length))); + Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); + } + return TCL_ERROR; + } + dst = Tcl_GetUnicode(objResultPtr) + start; + } else { + Tcl_UniChar ch = 0; + + /* Ugly interface! No scheme to init array size. */ + objResultPtr = Tcl_NewUnicodeObj(&ch, 0); /* PANIC? */ + if (0 == Tcl_AttemptSetObjLength(objResultPtr, length)) { + if (interp) { + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "concatenation failed: unable to alloc %" + TCL_LL_MODIFIER "u bytes", + (Tcl_WideUInt)STRING_SIZE(length))); + Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); + } + return TCL_ERROR; + } + dst = Tcl_GetUnicode(objResultPtr); + } + while (objc--) { + Tcl_Obj *objPtr = *objv++; + + if ((objPtr->bytes == NULL) || (objPtr->length)) { + int more; + Tcl_UniChar *src = Tcl_GetUnicodeFromObj(objPtr, &more); + memcpy(dst, src, more * sizeof(Tcl_UniChar)); + dst += more; + } + } + } else { + /* Efficiently concatenate string reps */ + char *dst; + + if (inPlace && !Tcl_IsShared(*objv)) { + int start; + + objResultPtr = *objv++; objc--; + + Tcl_GetStringFromObj(objResultPtr, &start); + if (0 == Tcl_AttemptSetObjLength(objResultPtr, length)) { + if (interp) { + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "concatenation failed: unable to alloc %u bytes", + length)); + Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); + } + return TCL_ERROR; + } + dst = Tcl_GetString(objResultPtr) + start; + if (length > start) { + TclFreeIntRep(objResultPtr); + } + } else { + objResultPtr = Tcl_NewObj(); /* PANIC? */ + if (0 == Tcl_AttemptSetObjLength(objResultPtr, length)) { + if (interp) { + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "concatenation failed: unable to alloc %u bytes", + length)); + Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); + } + return TCL_ERROR; + } + dst = Tcl_GetString(objResultPtr); + } + while (objc--) { + Tcl_Obj *objPtr = *objv++; + + if ((objPtr->bytes == NULL) || (objPtr->length)) { + int more; + char *src = Tcl_GetStringFromObj(objPtr, &more); + memcpy(dst, src, (size_t) more); + dst += more; + } + } + } + *objPtrPtr = objResultPtr; + return TCL_OK; +} + +/* + *--------------------------------------------------------------------------- + * + * TclStringFind -- + * + * Implements the [string first] operation. + * + * Results: + * If needle is found as a substring of haystack, the index of the + * first instance of such a find is returned. If needle is not present + * as a substring of haystack, -1 is returned. + * + * Side effects: + * needle and haystack may have their Tcl_ObjType changed. + * + *--------------------------------------------------------------------------- + */ + +int +TclStringFind( + Tcl_Obj *needle, + Tcl_Obj *haystack, + int start) +{ + int lh, ln = Tcl_GetCharLength(needle); + + if (ln == 0) { + /* + * We don't find empty substrings. Bizarre! + * + * TODO: When we one day make this a true substring + * finder, change this to "return 0" + */ + return -1; + } + + if (TclIsPureByteArray(needle) && TclIsPureByteArray(haystack)) { + unsigned char *end, *try, *bh; + unsigned char *bn = Tcl_GetByteArrayFromObj(needle, &ln); + + bh = Tcl_GetByteArrayFromObj(haystack, &lh); + end = bh + lh; + + try = bh + start; + while (try + ln <= end) { + try = memchr(try, bn[0], end - try); + + if (try == NULL) { + return -1; + } + if (0 == memcmp(try+1, bn+1, ln-1)) { + return (try - bh); + } + try++; + } + return -1; + } + + lh = Tcl_GetCharLength(haystack); + if (haystack->bytes && (lh == haystack->length)) { + /* haystack is all single-byte chars */ + + if (needle->bytes && (ln == needle->length)) { + /* needle is also all single-byte chars */ + char *found = strstr(haystack->bytes + start, needle->bytes); + + if (found) { + return (found - haystack->bytes); + } else { + return -1; + } + } else { + /* + * Cannot find substring with a multi-byte char inside + * a string with no multi-byte chars. + */ + return -1; + } + } else { + Tcl_UniChar *try, *end, *uh; + Tcl_UniChar *un = Tcl_GetUnicodeFromObj(needle, &ln); + + uh = Tcl_GetUnicodeFromObj(haystack, &lh); + end = uh + lh; + + try = uh + start; + while (try + ln <= end) { + if ((*try == *un) + && (0 == memcmp(try+1, un+1, (ln-1)*sizeof(Tcl_UniChar)))) { + return (try - uh); + } + try++; + } + return -1; + } +} + +/* + *--------------------------------------------------------------------------- + * + * TclStringLast -- + * + * Implements the [string last] operation. + * + * Results: + * If needle is found as a substring of haystack, the index of the + * last instance of such a find is returned. If needle is not present + * as a substring of haystack, -1 is returned. + * + * Side effects: + * needle and haystack may have their Tcl_ObjType changed. + * + *--------------------------------------------------------------------------- + */ + +int +TclStringLast( + Tcl_Obj *needle, + Tcl_Obj *haystack, + int last) +{ + int lh, ln = Tcl_GetCharLength(needle); + + if (ln == 0) { + /* + * We don't find empty substrings. Bizarre! + * + * TODO: When we one day make this a true substring + * finder, change this to "return 0" + */ + return -1; + } + + if (ln > last + 1) { + return -1; + } + + if (TclIsPureByteArray(needle) && TclIsPureByteArray(haystack)) { + unsigned char *try, *bh; + unsigned char *bn = Tcl_GetByteArrayFromObj(needle, &ln); + + bh = Tcl_GetByteArrayFromObj(haystack, &lh); + + if (last + 1 > lh) { + last = lh - 1; + } + try = bh + last + 1 - ln; + while (try >= bh) { + if ((*try == bn[0]) + && (0 == memcmp(try+1, bn+1, ln-1))) { + return (try - bh); + } + try--; + } + return -1; + } + + lh = Tcl_GetCharLength(haystack); + if (last + 1 > lh) { + last = lh - 1; + } + if (haystack->bytes && (lh == haystack->length)) { + /* haystack is all single-byte chars */ + + if (needle->bytes && (ln == needle->length)) { + /* needle is also all single-byte chars */ + + char *try = haystack->bytes + last + 1 - ln; + while (try >= haystack->bytes) { + if ((*try == needle->bytes[0]) + && (0 == memcmp(try+1, needle->bytes + 1, ln - 1))) { + return (try - haystack->bytes); + } + try--; + } + return -1; + } else { + /* + * Cannot find substring with a multi-byte char inside + * a string with no multi-byte chars. + */ + return -1; + } + } else { + Tcl_UniChar *try, *uh; + Tcl_UniChar *un = Tcl_GetUnicodeFromObj(needle, &ln); + + uh = Tcl_GetUnicodeFromObj(haystack, &lh); + + try = uh + last + 1 - ln; + while (try >= uh) { + if ((*try == un[0]) + && (0 == memcmp(try+1, un+1, (ln-1)*sizeof(Tcl_UniChar)))) { + return (try - uh); + } + try--; + } + return -1; + } +} + /* *--------------------------------------------------------------------------- * @@ -2878,7 +3480,7 @@ ExtendUnicodeRepWithString( } needed = numOrigChars + numAppendChars; stringCheckLimits(needed); - + if (needed > stringPtr->maxChars) { GrowUnicodeBuffer(objPtr, needed); stringPtr = GET_STRING(objPtr); @@ -2924,7 +3526,6 @@ DupStringInternalRep( String *srcStringPtr = GET_STRING(srcPtr); String *copyStringPtr = NULL; -#if COMPAT==0 if (srcStringPtr->numChars == -1) { /* * The String struct in the source value holds zero useful data. Don't @@ -2967,41 +3568,6 @@ DupStringInternalRep( */ copyStringPtr->allocated = copyPtr->bytes ? copyPtr->length : 0; -#else /* COMPAT!=0 */ - /* - * If the src obj is a string of 1-byte Utf chars, then copy the string - * rep of the source object and create an "empty" Unicode internal rep for - * the new object. Otherwise, copy Unicode internal rep, and invalidate - * the string rep of the new object. - */ - - if (srcStringPtr->hasUnicode && srcStringPtr->numChars > 0) { - /* - * Copy the full allocation for the Unicode buffer. - */ - - copyStringPtr = stringAlloc(srcStringPtr->maxChars); - copyStringPtr->maxChars = srcStringPtr->maxChars; - memcpy(copyStringPtr->unicode, srcStringPtr->unicode, - srcStringPtr->numChars * sizeof(Tcl_UniChar)); - copyStringPtr->unicode[srcStringPtr->numChars] = 0; - copyStringPtr->allocated = 0; - } else { - copyStringPtr = stringAlloc(0); - copyStringPtr->unicode[0] = 0; - copyStringPtr->maxChars = 0; - - /* - * Tricky point: the string value was copied by generic object - * management code, so it doesn't contain any extra bytes that might - * exist in the source object. - */ - - copyStringPtr->allocated = copyPtr->length; - } - copyStringPtr->numChars = srcStringPtr->numChars; - copyStringPtr->hasUnicode = srcStringPtr->hasUnicode; -#endif /* COMPAT==0 */ SET_STRING(copyPtr, copyStringPtr); copyPtr->typePtr = &tclStringType; @@ -3078,8 +3644,18 @@ UpdateStringOfString( { String *stringPtr = GET_STRING(objPtr); + /* + * This routine is only called when we need to generate the + * string rep objPtr->bytes because it does not exist -- it is NULL. + * In that circumstance, any lingering claim about the size of + * memory pointed to by that NULL pointer is clearly bogus, and + * needs a reset. + */ + + stringPtr->allocated = 0; + if (stringPtr->numChars == 0) { - TclInitStringRep(objPtr, tclEmptyStringRep, 0); + TclInitStringRep(objPtr, &tclEmptyString, 0); } else { (void) ExtendStringRepWithUnicode(objPtr, stringPtr->unicode, stringPtr->numChars); @@ -3096,8 +3672,8 @@ ExtendStringRepWithUnicode( * Pre-condition: this is the "string" Tcl_ObjType. */ - int i, origLength, size = 0; - char *dst, buf[TCL_UTF_MAX]; + int i, origLength, size = 0; + char *dst; String *stringPtr = GET_STRING(objPtr); if (numChars < 0) { @@ -3112,18 +3688,18 @@ ExtendStringRepWithUnicode( objPtr->length = 0; } size = origLength = objPtr->length; - + /* * Quick cheap check in case we have more than enough room. */ - if (numChars <= (INT_MAX - size)/TCL_UTF_MAX + if (numChars <= (INT_MAX - size)/TCL_UTF_MAX && stringPtr->allocated >= size + numChars * TCL_UTF_MAX) { goto copyBytes; } for (i = 0; i < numChars && size >= 0; i++) { - size += Tcl_UniCharToUtf((int) unicode[i], buf); + size += TclUtfCount(unicode[i]); } if (size < 0) { Tcl_Panic("max size for a Tcl value (%d bytes) exceeded", INT_MAX); |