diff options
Diffstat (limited to 'generic/tclStringObj.c')
-rw-r--r-- | generic/tclStringObj.c | 584 |
1 files changed, 379 insertions, 205 deletions
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 8787fd9..dffa38c 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -32,8 +32,7 @@ * * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. - * - * RCS: @(#) $Id: tclStringObj.c,v 1.130 2009/09/30 03:11:26 dgp Exp $ */ + */ #include "tclInt.h" #include "tommath.h" @@ -41,9 +40,10 @@ /* * Set COMPAT to 1 to restore the shimmering patterns to those of Tcl 8.5. * This is an escape hatch in case the changes have some unexpected unwelcome - * impact on performance. If things go well, this mechanism can go away when + * impact on performance. If things go well, this mechanism can go away when * post-8.6 development begins. */ + #define COMPAT 0 /* @@ -131,18 +131,19 @@ typedef struct String { Tcl_Panic("max length for a Tcl unicode value (%d chars) exceeded", \ STRING_MAXCHARS); \ } +#define stringAttemptAlloc(numChars) \ + (String *) attemptckalloc((unsigned) STRING_SIZE(numChars) ) #define stringAlloc(numChars) \ (String *) ckalloc((unsigned) STRING_SIZE(numChars) ) #define stringRealloc(ptr, numChars) \ - (String *) ckrealloc((char *) ptr, (unsigned) STRING_SIZE(numChars) ) + (String *) ckrealloc((ptr), (unsigned) STRING_SIZE(numChars) ) #define stringAttemptRealloc(ptr, numChars) \ - (String *) attemptckrealloc((char *) ptr, \ - (unsigned) STRING_SIZE(numChars) ) + (String *) attemptckrealloc((ptr), (unsigned) STRING_SIZE(numChars) ) #define GET_STRING(objPtr) \ - ((String *) (objPtr)->internalRep.otherValuePtr) + ((String *) (objPtr)->internalRep.twoPtrValue.ptr1) #define SET_STRING(objPtr, stringPtr) \ - ((objPtr)->internalRep.otherValuePtr = (void *) (stringPtr)) - + ((objPtr)->internalRep.twoPtrValue.ptr1 = (void *) (stringPtr)) + /* * TCL STRING GROWTH ALGORITHM * @@ -151,8 +152,7 @@ typedef struct String { * * Attempt to allocate 2 * (originalLength + appendLength) * On failure: - * attempt to allocate originalLength + 2*appendLength + - * TCL_GROWTH_MIN_ALLOC + * attempt to allocate originalLength + 2*appendLength + TCL_MIN_GROWTH * * This algorithm allows very good performance, as it rapidly increases the * memory allocated for a given string, which minimizes the number of @@ -165,20 +165,20 @@ typedef struct String { * cover the request, but which hopefully will be less than the total * available memory. * - * The addition of TCL_GROWTH_MIN_ALLOC allows for efficient handling of very + * The addition of TCL_MIN_GROWTH allows for efficient handling of very * small appends. Without this extra slush factor, a sequence of several small * appends would cause several memory allocations. As long as - * TCL_GROWTH_MIN_ALLOC is a reasonable size, we can avoid that behavior. + * TCL_MIN_GROWTH is a reasonable size, we can avoid that behavior. * * The growth algorithm can be tuned by adjusting the following parameters: * - * TCL_GROWTH_MIN_ALLOC Additional space, in bytes, to allocate when + * TCL_MIN_GROWTH Additional space, in bytes, to allocate when * the double allocation has failed. Default is - * 1024 (1 kilobyte). + * 1024 (1 kilobyte). See tclInt.h. */ -#ifndef TCL_GROWTH_MIN_ALLOC -#define TCL_GROWTH_MIN_ALLOC 1024 +#ifndef TCL_MIN_UNICHAR_GROWTH +#define TCL_MIN_UNICHAR_GROWTH TCL_MIN_GROWTH/sizeof(Tcl_UniChar) #endif static void @@ -187,11 +187,13 @@ GrowStringBuffer( int needed, int flag) { - /* Pre-conditions: + /* + * Pre-conditions: * objPtr->typePtr == &tclStringType * needed > stringPtr->allocated * flag || objPtr->bytes != NULL */ + String *stringPtr = GET_STRING(objPtr); char *ptr = NULL; int attempt; @@ -202,24 +204,29 @@ GrowStringBuffer( if (flag == 0 || stringPtr->allocated > 0) { attempt = 2 * needed; if (attempt >= 0) { - ptr = attemptckrealloc(objPtr->bytes, (unsigned) attempt + 1); + ptr = attemptckrealloc(objPtr->bytes, attempt + 1); } if (ptr == NULL) { /* * Take care computing the amount of modest growth to avoid * overflow into invalid argument values for attempt. */ + unsigned int limit = INT_MAX - needed; - unsigned int extra = needed - objPtr->length + TCL_GROWTH_MIN_ALLOC; + unsigned int extra = needed - objPtr->length + TCL_MIN_GROWTH; int growth = (int) ((extra > limit) ? limit : extra); + attempt = needed + growth; - ptr = attemptckrealloc(objPtr->bytes, (unsigned) attempt + 1); + ptr = attemptckrealloc(objPtr->bytes, attempt + 1); } } if (ptr == NULL) { - /* First allocation - just big enough; or last chance fallback. */ + /* + * First allocation - just big enough; or last chance fallback. + */ + attempt = needed; - ptr = ckrealloc(objPtr->bytes, (unsigned) attempt + 1); + ptr = ckrealloc(objPtr->bytes, attempt + 1); } objPtr->bytes = ptr; stringPtr->allocated = attempt; @@ -230,16 +237,21 @@ GrowUnicodeBuffer( Tcl_Obj *objPtr, int needed) { - /* Pre-conditions: + /* + * Pre-conditions: * objPtr->typePtr == &tclStringType * needed > stringPtr->maxChars * needed < STRING_MAXCHARS */ + String *ptr = NULL, *stringPtr = GET_STRING(objPtr); int attempt; if (stringPtr->maxChars > 0) { - /* Subsequent appends - apply the growth algorithm. */ + /* + * Subsequent appends - apply the growth algorithm. + */ + attempt = 2 * needed; if (attempt >= 0 && attempt <= STRING_MAXCHARS) { ptr = stringAttemptRealloc(stringPtr, attempt); @@ -249,16 +261,21 @@ GrowUnicodeBuffer( * Take care computing the amount of modest growth to avoid * overflow into invalid argument values for attempt. */ + unsigned int limit = STRING_MAXCHARS - needed; unsigned int extra = needed - stringPtr->numChars - + TCL_GROWTH_MIN_ALLOC/sizeof(Tcl_UniChar); + + TCL_MIN_UNICHAR_GROWTH; int growth = (int) ((extra > limit) ? limit : extra); + attempt = needed + growth; ptr = stringAttemptRealloc(stringPtr, attempt); } } if (ptr == NULL) { - /* First allocation - just big enough; or last chance fallback. */ + /* + * First allocation - just big enough; or last chance fallback. + */ + attempt = needed; ptr = stringRealloc(stringPtr, attempt); } @@ -474,7 +491,10 @@ Tcl_GetCharLength( stringPtr = GET_STRING(objPtr); numChars = stringPtr->numChars; - /* If numChars is unknown, compute it. */ + /* + * If numChars is unknown, compute it. + */ + if (numChars == -1) { TclNumUtfChars(numChars, objPtr->bytes, objPtr->length); stringPtr->numChars = numChars; @@ -482,8 +502,8 @@ Tcl_GetCharLength( #if COMPAT if (numChars < objPtr->length) { /* - * Since we've just computed the number of chars, and not all - * UTF chars are 1-byte long, go ahead and populate the unicode + * Since we've just computed the number of chars, and not all UTF + * chars are 1-byte long, go ahead and populate the unicode * string. */ @@ -539,7 +559,10 @@ Tcl_GetUniChar( stringPtr = GET_STRING(objPtr); if (stringPtr->hasUnicode == 0) { - /* If numChars is unknown, compute it. */ + /* + * If numChars is unknown, compute it. + */ + if (stringPtr->numChars == -1) { TclNumUtfChars(stringPtr->numChars, objPtr->bytes, objPtr->length); } @@ -670,14 +693,20 @@ Tcl_GetRange( stringPtr = GET_STRING(objPtr); if (stringPtr->hasUnicode == 0) { - /* If numChars is unknown, compute it. */ + /* + * If numChars is unknown, compute it. + */ + if (stringPtr->numChars == -1) { TclNumUtfChars(stringPtr->numChars, objPtr->bytes, objPtr->length); } if (stringPtr->numChars == objPtr->length) { newObjPtr = Tcl_NewStringObj(objPtr->bytes + first, last-first+1); - /* Since we know the char length of the result, store it. */ + /* + * Since we know the char length of the result, store it. + */ + SetStringFromAny(NULL, newObjPtr); stringPtr = GET_STRING(newObjPtr); stringPtr->numChars = newObjPtr->length; @@ -729,7 +758,6 @@ Tcl_SetStringObj( */ TclFreeIntRep(objPtr); - objPtr->typePtr = NULL; /* * Free any old string rep, then set the string rep to a copy of the @@ -805,9 +833,9 @@ Tcl_SetObjLength( * Need to enlarge the buffer. */ if (objPtr->bytes == tclEmptyStringRep) { - objPtr->bytes = ckalloc((unsigned) length+1); + objPtr->bytes = ckalloc(length + 1); } else { - objPtr->bytes = ckrealloc(objPtr->bytes, (unsigned) length+1); + objPtr->bytes = ckrealloc(objPtr->bytes, length + 1); } stringPtr->allocated = length; } @@ -833,14 +861,17 @@ Tcl_SetObjLength( stringPtr->maxChars = length; } - /* Mark the new end of the unicode string */ + /* + * Mark the new end of the unicode string + */ + stringPtr->numChars = length; stringPtr->unicode[length] = 0; stringPtr->hasUnicode = 1; /* - * Can only get here when objPtr->bytes == NULL. - * No need to invalidate the string rep. + * Can only get here when objPtr->bytes == NULL. No need to invalidate + * the string rep. */ } } @@ -880,9 +911,10 @@ Tcl_AttemptSetObjLength( if (length < 0) { /* - * Setting to a negative length is nonsense. This is probably the + * Setting to a negative length is nonsense. This is probably the * result of overflowing the signed integer range. */ + return 0; } if (Tcl_IsShared(objPtr)) { @@ -903,12 +935,13 @@ Tcl_AttemptSetObjLength( /* * Need to enlarge the buffer. */ + char *newBytes; if (objPtr->bytes == tclEmptyStringRep) { - newBytes = attemptckalloc((unsigned) length+1); + newBytes = attemptckalloc(length + 1); } else { - newBytes = attemptckrealloc(objPtr->bytes, (unsigned) length+1); + newBytes = attemptckrealloc(objPtr->bytes, length + 1); } if (newBytes == NULL) { return 0; @@ -943,14 +976,17 @@ Tcl_AttemptSetObjLength( stringPtr->maxChars = length; } - /* Mark the new end of the unicode string */ + /* + * Mark the new end of the unicode string. + */ + stringPtr->unicode[length] = 0; stringPtr->numChars = length; stringPtr->hasUnicode = 1; /* - * Can only get here when objPtr->bytes == NULL. - * No need to invalidate the string rep. + * Can only get here when objPtr->bytes == NULL. No need to invalidate + * the string rep. */ } return 1; @@ -1228,31 +1264,60 @@ Tcl_AppendObjToObj( const char *bytes; /* + * Special case: second object is standard-empty is fast case. We know + * that appending nothing to anything leaves that starting anything... + */ + + if (appendObjPtr->bytes == tclEmptyStringRep) { + return; + } + + /* * Handle append of one bytearray object to another as a special case. * Note that we only do this when the objects don't have string reps; if * it did, then appending the byte arrays together could well lose * information; this is a special-case optimization only. */ - if (TclIsPureByteArray(objPtr) && TclIsPureByteArray(appendObjPtr)) { - unsigned char *bytesDst, *bytesSrc; - int lengthSrc, lengthTotal; + if ((TclIsPureByteArray(objPtr) || objPtr->bytes == tclEmptyStringRep) + && TclIsPureByteArray(appendObjPtr)) { /* - * We do not assume that objPtr and appendObjPtr must be distinct! - * This makes this code a bit more complex than it otherwise would be, - * but in turn makes it much safer. + * You might expect the code here to be + * + * bytes = Tcl_GetByteArrayFromObj(appendObjPtr, &length); + * TclAppendBytesToByteArray(objPtr, bytes, length); + * + * and essentially all of the time that would be fine. However, + * it would run into trouble in the case where objPtr and + * appendObjPtr point to the same thing. That may never be a + * good idea. It seems to violate Copy On Write, and we don't + * have any tests for the situation, since making any Tcl commands + * that call Tcl_AppendObjToObj() do that appears impossible + * (They honor Copy On Write!). For the sake of extensions that + * go off into that realm, though, here's a more complex approach + * that can handle all the cases. */ + /* Get lengths */ + int lengthSrc; + (void) Tcl_GetByteArrayFromObj(objPtr, &length); (void) Tcl_GetByteArrayFromObj(appendObjPtr, &lengthSrc); - lengthTotal = length + lengthSrc; - if (((length > lengthSrc) ? length : lengthSrc) > lengthTotal) { - Tcl_Panic("overflow when calculating byte array size"); - } - bytesDst = Tcl_SetByteArrayLength(objPtr, lengthTotal); - bytesSrc = Tcl_GetByteArrayFromObj(appendObjPtr, NULL); - memcpy(bytesDst + length, bytesSrc, lengthSrc); + + /* Grow buffer enough for the append */ + TclAppendBytesToByteArray(objPtr, NULL, lengthSrc); + + /* Reset objPtr back to the original value */ + Tcl_SetByteArrayLength(objPtr, length); + + /* + * Now do the append knowing that buffer growth cannot cause + * any trouble. + */ + + TclAppendBytesToByteArray(objPtr, + Tcl_GetByteArrayFromObj(appendObjPtr, NULL), lengthSrc); return; } @@ -1362,12 +1427,14 @@ AppendUnicodeToUnicodeRep( stringCheckLimits(numChars); if (numChars > stringPtr->maxChars) { + int offset = -1; + /* * Protect against case where unicode points into the existing - * stringPtr->unicode array. Force it to follow any relocations - * due to the reallocs below. + * stringPtr->unicode array. Force it to follow any relocations due to + * the reallocs below. */ - int offset = -1; + if (unicode >= stringPtr->unicode && unicode <= stringPtr->unicode + stringPtr->maxChars) { offset = unicode - stringPtr->unicode; @@ -1376,7 +1443,10 @@ AppendUnicodeToUnicodeRep( GrowUnicodeBuffer(objPtr, numChars); stringPtr = GET_STRING(objPtr); - /* Relocate unicode if needed; see above. */ + /* + * Relocate unicode if needed; see above. + */ + if (offset >= 0) { unicode = stringPtr->unicode + offset; } @@ -1387,7 +1457,7 @@ AppendUnicodeToUnicodeRep( * trailing null. */ - memcpy(stringPtr->unicode + stringPtr->numChars, unicode, + memmove(stringPtr->unicode + stringPtr->numChars, unicode, appendNumChars * sizeof(Tcl_UniChar)); stringPtr->unicode[numChars] = 0; stringPtr->numChars = numChars; @@ -1428,7 +1498,10 @@ AppendUnicodeToUtfRep( } #if COMPAT - /* Invalidate the unicode rep */ + /* + * Invalidate the unicode rep. + */ + stringPtr->hasUnicode = 0; #endif } @@ -1440,7 +1513,7 @@ AppendUnicodeToUtfRep( * * This function converts the contents of "bytes" to Unicode and appends * the Unicode to the Unicode rep of "objPtr". objPtr must already have a - * valid Unicode rep. numBytes must be non-negative. + * valid Unicode rep. numBytes must be non-negative. * * Results: * None. @@ -1516,22 +1589,30 @@ AppendUtfToUtfRep( stringPtr = GET_STRING(objPtr); if (newLength > stringPtr->allocated) { + int offset = -1; + /* * Protect against case where unicode points into the existing - * stringPtr->unicode array. Force it to follow any relocations - * due to the reallocs below. + * stringPtr->unicode array. Force it to follow any relocations due to + * the reallocs below. */ - int offset = -1; + if (bytes >= objPtr->bytes && bytes <= objPtr->bytes + objPtr->length) { offset = bytes - objPtr->bytes; } - /* TODO: consider passing flag=1: no overalloc on first append. - * This would make test stringObj-8.1 fail.*/ + /* + * TODO: consider passing flag=1: no overalloc on first append. This + * would make test stringObj-8.1 fail. + */ + GrowStringBuffer(objPtr, newLength, 0); - /* Relocate bytes if needed; see above. */ + /* + * Relocate bytes if needed; see above. + */ + if (offset >= 0) { bytes = objPtr->bytes + offset; } @@ -1544,11 +1625,10 @@ AppendUtfToUtfRep( stringPtr->numChars = -1; stringPtr->hasUnicode = 0; - memcpy(objPtr->bytes + oldLength, bytes, numBytes); + memmove(objPtr->bytes + oldLength, bytes, numBytes); objPtr->bytes[newLength] = 0; objPtr->length = newLength; } - /* *---------------------------------------------------------------------- @@ -1579,6 +1659,7 @@ Tcl_AppendStringsToObjVA( while (1) { const char *bytes = va_arg(argList, char *); + if (bytes == NULL) { break; } @@ -1644,7 +1725,7 @@ Tcl_AppendFormatToObj( int objc, Tcl_Obj *const objv[]) { - const char *span = format, *msg; + const char *span = format, *msg, *errCode; int numBytes = 0, objIndex = 0, gotXpg = 0, gotSequential = 0; int originalLength, limit; static const char *mixedXPG = @@ -1682,6 +1763,7 @@ Tcl_AppendFormatToObj( if (numBytes) { if (numBytes > limit) { msg = overflow; + errCode = "OVERFLOW"; goto errorMsg; } Tcl_AppendToObj(appendObj, span, numBytes); @@ -1710,6 +1792,7 @@ Tcl_AppendFormatToObj( newXpg = 0; if (isdigit(UCHAR(ch))) { int position = strtoul(format, &end, 10); + if (*end == '$') { newXpg = 1; objIndex = position - 1; @@ -1720,18 +1803,21 @@ Tcl_AppendFormatToObj( if (newXpg) { if (gotSequential) { msg = mixedXPG; + errCode = "MIXEDSPECTYPES"; goto errorMsg; } gotXpg = 1; } else { if (gotXpg) { msg = mixedXPG; + errCode = "MIXEDSPECTYPES"; goto errorMsg; } gotSequential = 1; } if ((objIndex < 0) || (objIndex >= objc)) { msg = badIndex[gotXpg]; + errCode = gotXpg ? "INDEXRANGE" : "FIELDVARMISMATCH"; goto errorMsg; } @@ -1779,6 +1865,7 @@ Tcl_AppendFormatToObj( } else if (ch == '*') { if (objIndex >= objc - 1) { msg = badIndex[gotXpg]; + errCode = gotXpg ? "INDEXRANGE" : "FIELDVARMISMATCH"; goto errorMsg; } if (TclGetIntFromObj(interp, objv[objIndex], &width) != TCL_OK) { @@ -1794,6 +1881,7 @@ Tcl_AppendFormatToObj( } if (width > limit) { msg = overflow; + errCode = "OVERFLOW"; goto errorMsg; } @@ -1814,6 +1902,7 @@ Tcl_AppendFormatToObj( } else if (ch == '*') { if (objIndex >= objc - 1) { msg = badIndex[gotXpg]; + errCode = gotXpg ? "INDEXRANGE" : "FIELDVARMISMATCH"; goto errorMsg; } if (TclGetIntFromObj(interp, objv[objIndex], &precision) @@ -1849,8 +1938,8 @@ Tcl_AppendFormatToObj( useBig = 1; format += step; step = Tcl_UtfToUniChar(format, &ch); - } else { #ifndef TCL_WIDE_INT_IS_LONG + } else { useWide = 1; #endif } @@ -1864,22 +1953,26 @@ Tcl_AppendFormatToObj( */ segment = objv[objIndex]; + numChars = -1; if (ch == 'i') { ch = 'd'; } switch (ch) { case '\0': msg = "format string ended in middle of field specifier"; + errCode = "INCOMPLETE"; goto errorMsg; - case 's': { - numChars = Tcl_GetCharLength(segment); - if (gotPrecision && (precision < numChars)) { - segment = Tcl_GetRange(segment, 0, precision - 1); - Tcl_IncrRefCount(segment); - allocSegment = 1; + case 's': + if (gotPrecision) { + numChars = Tcl_GetCharLength(segment); + if (precision < numChars) { + segment = Tcl_GetRange(segment, 0, precision - 1); + numChars = precision; + Tcl_IncrRefCount(segment); + allocSegment = 1; + } } break; - } case 'c': { char buf[TCL_UTF_MAX]; int code, length; @@ -1897,6 +1990,7 @@ Tcl_AppendFormatToObj( case 'u': if (useBig) { msg = "unsigned bignum format is invalid"; + errCode = "BADUNSIGNED"; goto errorMsg; } case 'd': @@ -1904,7 +1998,7 @@ Tcl_AppendFormatToObj( case 'x': case 'X': case 'b': { - short int s = 0; /* Silence compiler warning; only defined and + short s = 0; /* Silence compiler warning; only defined and * used when useShort is true. */ long l; Tcl_WideInt w; @@ -1929,7 +2023,7 @@ Tcl_AppendFormatToObj( Tcl_GetWideIntFromObj(NULL, objPtr, &w); Tcl_DecrRefCount(objPtr); } - isNegative = (w < (Tcl_WideInt)0); + isNegative = (w < (Tcl_WideInt) 0); } else if (TclGetLongFromObj(NULL, segment, &l) != TCL_OK) { if (Tcl_GetWideIntFromObj(NULL, segment, &w) != TCL_OK) { Tcl_Obj *objPtr; @@ -1946,16 +2040,16 @@ Tcl_AppendFormatToObj( l = Tcl_WideAsLong(w); } if (useShort) { - s = (short int) l; - isNegative = (s < (short int)0); + s = (short) l; + isNegative = (s < (short) 0); } else { - isNegative = (l < (long)0); + isNegative = (l < (long) 0); } } else if (useShort) { - s = (short int) l; - isNegative = (s < (short int)0); + s = (short) l; + isNegative = (s < (short) 0); } else { - isNegative = (l < (long)0); + isNegative = (l < (long) 0); } segment = Tcl_NewObj(); @@ -2024,7 +2118,7 @@ Tcl_AppendFormatToObj( if (gotPrecision) { if (length < precision) { - segmentLimit -= (precision - length); + segmentLimit -= precision - length; } while (length < precision) { Tcl_AppendToObj(segment, "0", 1); @@ -2035,7 +2129,7 @@ Tcl_AppendFormatToObj( if (gotZero) { length += Tcl_GetCharLength(segment); if (length < width) { - segmentLimit -= (width - length); + segmentLimit -= width - length; } while (length < width) { Tcl_AppendToObj(segment, "0", 1); @@ -2044,6 +2138,7 @@ Tcl_AppendFormatToObj( } if (toAppend > segmentLimit) { msg = overflow; + errCode = "OVERFLOW"; goto errorMsg; } Tcl_AppendToObj(segment, bytes, toAppend); @@ -2056,10 +2151,9 @@ Tcl_AppendFormatToObj( case 'x': case 'X': case 'b': { - Tcl_WideUInt bits = (Tcl_WideUInt)0; - Tcl_WideInt numDigits = (Tcl_WideInt)0; - int length, numBits = 4, base = 16; - int index = 0, shift = 0; + Tcl_WideUInt bits = (Tcl_WideUInt) 0; + Tcl_WideInt numDigits = (Tcl_WideInt) 0; + int length, numBits = 4, base = 16, index = 0, shift = 0; Tcl_Obj *pure; char *bytes; @@ -2068,12 +2162,12 @@ Tcl_AppendFormatToObj( } else if (ch == 'o') { base = 8; numBits = 3; - } else if (ch=='b') { + } else if (ch == 'b') { base = 2; numBits = 1; } if (useShort) { - unsigned short int us = (unsigned short int) s; + unsigned short us = (unsigned short) s; bits = (Tcl_WideUInt) us; while (us) { @@ -2093,17 +2187,18 @@ Tcl_AppendFormatToObj( mp_digit mask = (~(mp_digit)0) << (DIGIT_BIT-leftover); numDigits = 1 + - (((Tcl_WideInt)big.used * DIGIT_BIT) / numBits); + (((Tcl_WideInt) big.used * DIGIT_BIT) / numBits); while ((mask & big.dp[big.used-1]) == 0) { numDigits--; mask >>= numBits; } if (numDigits > INT_MAX) { msg = overflow; + errCode = "OVERFLOW"; goto errorMsg; } } else if (!useBig) { - unsigned long int ul = (unsigned long int) l; + unsigned long ul = (unsigned long) l; bits = (Tcl_WideUInt) ul; while (ul) { @@ -2120,16 +2215,16 @@ Tcl_AppendFormatToObj( numDigits = 1; } pure = Tcl_NewObj(); - Tcl_SetObjLength(pure, (int)numDigits); + Tcl_SetObjLength(pure, (int) numDigits); bytes = TclGetString(pure); - toAppend = length = (int)numDigits; + toAppend = length = (int) numDigits; while (numDigits--) { int digitOffset; if (useBig && big.used) { - if ((size_t) shift < + if (index < big.used && (size_t) shift < CHAR_BIT*sizeof(Tcl_WideUInt) - DIGIT_BIT) { - bits |= (((Tcl_WideUInt)big.dp[index++]) <<shift); + bits |= ((Tcl_WideUInt) big.dp[index++]) << shift; shift += DIGIT_BIT; } shift -= numBits; @@ -2147,7 +2242,7 @@ Tcl_AppendFormatToObj( } if (gotPrecision) { if (length < precision) { - segmentLimit -= (precision - length); + segmentLimit -= precision - length; } while (length < precision) { Tcl_AppendToObj(segment, "0", 1); @@ -2158,7 +2253,7 @@ Tcl_AppendFormatToObj( if (gotZero) { length += Tcl_GetCharLength(segment); if (length < width) { - segmentLimit -= (width - length); + segmentLimit -= width - length; } while (length < width) { Tcl_AppendToObj(segment, "0", 1); @@ -2167,6 +2262,7 @@ Tcl_AppendFormatToObj( } if (toAppend > segmentLimit) { msg = overflow; + errCode = "OVERFLOW"; goto errorMsg; } Tcl_AppendObjToObj(segment, pure); @@ -2219,7 +2315,8 @@ Tcl_AppendFormatToObj( *p++ = '.'; p += sprintf(p, "%d", precision); if (precision > INT_MAX - length) { - msg=overflow; + msg = overflow; + errCode = "OVERFLOW"; goto errorMsg; } length += precision; @@ -2236,11 +2333,13 @@ Tcl_AppendFormatToObj( allocSegment = 1; if (!Tcl_AttemptSetObjLength(segment, length)) { msg = overflow; + errCode = "OVERFLOW"; goto errorMsg; } bytes = TclGetString(segment); if (!Tcl_AttemptSetObjLength(segment, sprintf(bytes, spec, d))) { msg = overflow; + errCode = "OVERFLOW"; goto errorMsg; } break; @@ -2249,6 +2348,7 @@ Tcl_AppendFormatToObj( if (interp != NULL) { Tcl_SetObjResult(interp, Tcl_ObjPrintf("bad field specifier \"%c\"", ch)); + Tcl_SetErrorCode(interp, "TCL", "FORMAT", "BADTYPE", NULL); } goto error; } @@ -2261,10 +2361,12 @@ Tcl_AppendFormatToObj( } } - numChars = Tcl_GetCharLength(segment); - if (!gotMinus) { + if (width>0 && numChars<0) { + numChars = Tcl_GetCharLength(segment); + } + if (!gotMinus && width>0) { if (numChars < width) { - limit -= (width - numChars); + limit -= width - numChars; } while (numChars < width) { Tcl_AppendToObj(appendObj, (gotZero ? "0" : " "), 1); @@ -2274,7 +2376,11 @@ Tcl_AppendFormatToObj( Tcl_GetStringFromObj(segment, &segmentNumBytes); if (segmentNumBytes > limit) { + if (allocSegment) { + Tcl_DecrRefCount(segment); + } msg = overflow; + errCode = "OVERFLOW"; goto errorMsg; } Tcl_AppendObjToObj(appendObj, segment); @@ -2282,12 +2388,14 @@ Tcl_AppendFormatToObj( if (allocSegment) { Tcl_DecrRefCount(segment); } - if (numChars < width) { - limit -= (width - numChars); - } - while (numChars < width) { - Tcl_AppendToObj(appendObj, (gotZero ? "0" : " "), 1); - numChars++; + if (width > 0) { + if (numChars < width) { + limit -= width-numChars; + } + while (numChars < width) { + Tcl_AppendToObj(appendObj, (gotZero ? "0" : " "), 1); + numChars++; + } } objIndex += gotSequential; @@ -2295,6 +2403,7 @@ Tcl_AppendFormatToObj( if (numBytes) { if (numBytes > limit) { msg = overflow; + errCode = "OVERFLOW"; goto errorMsg; } Tcl_AppendToObj(appendObj, span, numBytes); @@ -2307,6 +2416,7 @@ Tcl_AppendFormatToObj( errorMsg: if (interp != NULL) { Tcl_SetObjResult(interp, Tcl_NewStringObj(msg, -1)); + Tcl_SetErrorCode(interp, "TCL", "FORMAT", errCode, NULL); } error: Tcl_SetObjLength(appendObj, originalLength); @@ -2322,7 +2432,7 @@ Tcl_AppendFormatToObj( * A refcount zero Tcl_Obj. * * Side effects: - * None. + * None. * *--------------------------------------------------------------------------- */ @@ -2336,6 +2446,7 @@ Tcl_Format( { int result; Tcl_Obj *objPtr = Tcl_NewObj(); + result = Tcl_AppendFormatToObj(interp, objPtr, format, objc, objv); if (result != TCL_OK) { Tcl_DecrRefCount(objPtr); @@ -2365,7 +2476,6 @@ AppendPrintfToObjVA( int code, objc; Tcl_Obj **objv, *list = Tcl_NewObj(); const char *p; - char *end; p = format; Tcl_IncrRefCount(list); @@ -2382,7 +2492,6 @@ AppendPrintfToObjVA( } do { switch (*p) { - case '\0': seekingConversion = 0; break; @@ -2435,11 +2544,11 @@ AppendPrintfToObjVA( case -1: case 0: Tcl_ListObjAppendElement(NULL, list, Tcl_NewLongObj( - (long int)va_arg(argList, int))); + (long) va_arg(argList, int))); break; case 1: Tcl_ListObjAppendElement(NULL, list, Tcl_NewLongObj( - va_arg(argList, long int))); + va_arg(argList, long))); break; } break; @@ -2453,15 +2562,18 @@ AppendPrintfToObjVA( seekingConversion = 0; break; case '*': - lastNum = (int)va_arg(argList, int); + lastNum = (int) va_arg(argList, int); Tcl_ListObjAppendElement(NULL, list, Tcl_NewIntObj(lastNum)); p++; break; case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': + case '5': case '6': case '7': case '8': case '9': { + char *end; + lastNum = (int) strtoul(p, &end, 10); p = end; break; + } case '.': gotPrecision = 1; p++; @@ -2497,7 +2609,7 @@ AppendPrintfToObjVA( * A standard Tcl result. * * Side effects: - * None. + * None. * *--------------------------------------------------------------------------- */ @@ -2524,7 +2636,7 @@ Tcl_AppendPrintfToObj( * A refcount zero Tcl_Obj. * * Side effects: - * None. + * None. * *--------------------------------------------------------------------------- */ @@ -2552,8 +2664,8 @@ Tcl_ObjPrintf( * * Results: * An unshared Tcl value which is the [string reverse] of the argument - * supplied. When sharing rules permit, the returned value might be - * the argument with modifications done in place. + * supplied. When sharing rules permit, the returned value might be the + * argument with modifications done in place. * * Side effects: * May allocate a new Tcl_Obj. @@ -2561,84 +2673,124 @@ Tcl_ObjPrintf( *--------------------------------------------------------------------------- */ +static void +ReverseBytes( + unsigned char *to, /* Copy bytes into here... */ + unsigned char *from, /* ...from here... */ + int count) /* Until this many are copied, */ + /* reversing as you go. */ +{ + unsigned char *src = from + count; + if (to == from) { + /* Reversing in place */ + while (--src > to) { + unsigned char c = *src; + *src = *to; + *to++ = c; + } + } else { + while (--src >= from) { + *to++ = *src; + } + } +} + Tcl_Obj * TclStringObjReverse( Tcl_Obj *objPtr) { String *stringPtr; - char *src = NULL, *dest = NULL; - Tcl_UniChar *usrc = NULL, *udest = NULL; - Tcl_Obj *resultPtr = NULL; + Tcl_UniChar ch; + + if (TclIsPureByteArray(objPtr)) { + int numBytes; + unsigned char *from = Tcl_GetByteArrayFromObj(objPtr, &numBytes); + + if (Tcl_IsShared(objPtr)) { + objPtr = Tcl_NewByteArrayObj(NULL, numBytes); + } + ReverseBytes(Tcl_GetByteArrayFromObj(objPtr, NULL), from, numBytes); + return objPtr; + } SetStringFromAny(NULL, objPtr); stringPtr = GET_STRING(objPtr); - if (stringPtr->hasUnicode == 0) { - if (stringPtr->numChars == -1) { - TclNumUtfChars(stringPtr->numChars, objPtr->bytes, objPtr->length); - } - if (stringPtr->numChars <= 1) { - return objPtr; - } - if (stringPtr->numChars == objPtr->length) { - /* All one-byte chars. Reverse in objPtr->bytes. */ - if (Tcl_IsShared(objPtr)) { - resultPtr = Tcl_NewObj(); - Tcl_SetObjLength(resultPtr, objPtr->length); - dest = TclGetString(resultPtr); - src = objPtr->bytes + objPtr->length - 1; - while (src >= objPtr->bytes) { - *dest++ = *src--; - } - return resultPtr; + if (stringPtr->hasUnicode) { + Tcl_UniChar *from = Tcl_GetUnicode(objPtr); + Tcl_UniChar *src = from + stringPtr->numChars; + + if (Tcl_IsShared(objPtr)) { + Tcl_UniChar *to; + + /* + * Create a non-empty, pure unicode value, so we can coax + * Tcl_SetObjLength into growing the unicode rep buffer. + */ + + ch = 0; + objPtr = Tcl_NewUnicodeObj(&ch, 1); + Tcl_SetObjLength(objPtr, stringPtr->numChars); + to = Tcl_GetUnicode(objPtr); + while (--src >= from) { + *to++ = *src; } - /* Unshared. Reverse objPtr->bytes in place. */ - dest = objPtr->bytes; - src = dest + objPtr->length - 1; - while (dest < src) { - char tmp = *src; - *src-- = *dest; - *dest++ = tmp; + } else { + /* Reversing in place */ + while (--src > from) { + ch = *src; + *src = *from; + *from++ = ch; } - return objPtr; } - FillUnicodeRep(objPtr); - stringPtr = GET_STRING(objPtr); } - if (stringPtr->numChars <= 1) { - return objPtr; - } - - /* Reverse the Unicode rep. */ - if (Tcl_IsShared(objPtr)) { - Tcl_UniChar ch = 0; - /* - * Create a non-empty, pure unicode value, so we can coax - * Tcl_SetObjLength into growing the unicode rep buffer. - */ + if (objPtr->bytes) { + int numChars = stringPtr->numChars; + int numBytes = objPtr->length; + char *to, *from = objPtr->bytes; - resultPtr = Tcl_NewUnicodeObj(&ch, 1); - Tcl_SetObjLength(resultPtr, stringPtr->numChars); - udest = Tcl_GetUnicode(resultPtr); - usrc = stringPtr->unicode + stringPtr->numChars - 1; - while (usrc >= stringPtr->unicode) { - *udest++ = *usrc--; + if (Tcl_IsShared(objPtr)) { + objPtr = Tcl_NewObj(); + Tcl_SetObjLength(objPtr, numBytes); } - return resultPtr; - } + to = objPtr->bytes; + + if (numChars < numBytes) { + /* + * Either numChars == -1 and we don't know how many chars are + * represented by objPtr->bytes and we need Pass 1 just in case, + * or numChars >= 0 and we know we have fewer chars than bytes, + * so we know there's a multibyte character needing Pass 1. + * + * Pass 1. Reverse the bytes of each multi-byte character. + */ + int charCount = 0; + int bytesLeft = numBytes; - /* Unshared. Reverse objPtr->bytes in place. */ - udest = stringPtr->unicode; - usrc = udest + stringPtr->numChars - 1; - while (udest < usrc) { - Tcl_UniChar tmp = *usrc; - *usrc-- = *udest; - *udest++ = tmp; + while (bytesLeft) { + /* + * NOTE: We know that the from buffer is NUL-terminated. + * It's part of the contract for objPtr->bytes values. + * Thus, we can skip calling Tcl_UtfCharComplete() here. + */ + int bytesInChar = Tcl_UtfToUniChar(from, &ch); + + ReverseBytes((unsigned char *)to, (unsigned char *)from, + bytesInChar); + to += bytesInChar; + from += bytesInChar; + bytesLeft -= bytesInChar; + charCount++; + } + + from = to = objPtr->bytes; + stringPtr->numChars = charCount; + } + /* Pass 2. Reverse all the bytes. */ + ReverseBytes((unsigned char *)to, (unsigned char *)from, numBytes); } - TclInvalidateStringRep(objPtr); - stringPtr->allocated = 0; return objPtr; } @@ -2665,6 +2817,7 @@ FillUnicodeRep( * rep. */ { String *stringPtr = GET_STRING(objPtr); + ExtendUnicodeRepWithString(objPtr, objPtr->bytes, objPtr->length, stringPtr->numChars); } @@ -2733,21 +2886,27 @@ DupStringInternalRep( #if COMPAT==0 if (srcStringPtr->numChars == -1) { /* - * The String struct in the source value holds zero useful data. - * Don't bother copying it. Don't even bother allocating space in - * which to copy it. Just let the copy be untyped. + * The String struct in the source value holds zero useful data. Don't + * bother copying it. Don't even bother allocating space in which to + * copy it. Just let the copy be untyped. */ + return; } if (srcStringPtr->hasUnicode) { int copyMaxChars; + if (srcStringPtr->maxChars / 2 >= srcStringPtr->numChars) { copyMaxChars = 2 * srcStringPtr->numChars; } else { copyMaxChars = srcStringPtr->maxChars; } - copyStringPtr = stringAlloc(copyMaxChars); + copyStringPtr = stringAttemptAlloc(copyMaxChars); + if (copyStringPtr == NULL) { + copyMaxChars = srcStringPtr->numChars; + copyStringPtr = stringAlloc(copyMaxChars); + } copyStringPtr->maxChars = copyMaxChars; memcpy(copyStringPtr->unicode, srcStringPtr->unicode, srcStringPtr->numChars * sizeof(Tcl_UniChar)); @@ -2761,12 +2920,13 @@ DupStringInternalRep( copyStringPtr->numChars = srcStringPtr->numChars; /* - * Tricky point: the string value was copied by generic object - * management code, so it doesn't contain any extra bytes that - * might exist in the source object. + * Tricky point: the string value was copied by generic object management + * code, so it doesn't contain any extra bytes that might exist in the + * source object. */ + copyStringPtr->allocated = copyPtr->bytes ? copyPtr->length : 0; -#else +#else /* COMPAT!=0 */ /* * If the src obj is a string of 1-byte Utf chars, then copy the string * rep of the source object and create an "empty" Unicode internal rep for @@ -2775,7 +2935,10 @@ DupStringInternalRep( */ if (srcStringPtr->hasUnicode && srcStringPtr->numChars > 0) { - /* Copy the full allocation for the Unicode buffer. */ + /* + * Copy the full allocation for the Unicode buffer. + */ + copyStringPtr = stringAlloc(srcStringPtr->maxChars); copyStringPtr->maxChars = srcStringPtr->maxChars; memcpy(copyStringPtr->unicode, srcStringPtr->unicode, @@ -2786,16 +2949,18 @@ DupStringInternalRep( copyStringPtr = stringAlloc(0); copyStringPtr->unicode[0] = 0; copyStringPtr->maxChars = 0; + /* * Tricky point: the string value was copied by generic object - * management code, so it doesn't contain any extra bytes that - * might exist in the source object. + * management code, so it doesn't contain any extra bytes that might + * exist in the source object. */ + copyStringPtr->allocated = copyPtr->length; } copyStringPtr->numChars = srcStringPtr->numChars; copyStringPtr->hasUnicode = srcStringPtr->hasUnicode; -#endif +#endif /* COMPAT==0 */ SET_STRING(copyPtr, copyStringPtr); copyPtr->typePtr = &tclStringType; @@ -2827,7 +2992,7 @@ SetStringFromAny( String *stringPtr = stringAlloc(0); /* - * Convert whatever we have into an untyped value. Just A String. + * Convert whatever we have into an untyped value. Just A String. */ (void) TclGetString(objPtr); @@ -2871,6 +3036,7 @@ UpdateStringOfString( Tcl_Obj *objPtr) /* Object with string rep to update. */ { String *stringPtr = GET_STRING(objPtr); + if (stringPtr->numChars == 0) { TclInitStringRep(objPtr, tclEmptyStringRep, 0); } else { @@ -2885,10 +3051,12 @@ ExtendStringRepWithUnicode( const Tcl_UniChar *unicode, int numChars) { + /* + * Pre-condition: this is the "string" Tcl_ObjType. + */ + int i, origLength, size = 0; char *dst, buf[TCL_UTF_MAX]; - - /* Pre-condition: this is the "string" Tcl_ObjType */ String *stringPtr = GET_STRING(objPtr); if (numChars < 0) { @@ -2904,7 +3072,10 @@ ExtendStringRepWithUnicode( } size = origLength = objPtr->length; - /* Quick cheap check in case we have more than enough room. */ + /* + * Quick cheap check in case we have more than enough room. + */ + if (numChars <= (INT_MAX - size)/TCL_UTF_MAX && stringPtr->allocated >= size + numChars * TCL_UTF_MAX) { goto copyBytes; @@ -2917,12 +3088,15 @@ ExtendStringRepWithUnicode( Tcl_Panic("max size for a Tcl value (%d bytes) exceeded", INT_MAX); } - /* Grow space if needed */ + /* + * Grow space if needed. + */ + if (size > stringPtr->allocated) { GrowStringBuffer(objPtr, size, 1); } - copyBytes: + copyBytes: dst = objPtr->bytes + origLength; for (i = 0; i < numChars; i++) { dst += Tcl_UniCharToUtf((int) unicode[i], dst); @@ -2953,7 +3127,7 @@ static void FreeStringInternalRep( Tcl_Obj *objPtr) /* Object with internal rep to free. */ { - ckfree((char *) GET_STRING(objPtr)); + ckfree(GET_STRING(objPtr)); objPtr->typePtr = NULL; } |