diff options
Diffstat (limited to 'generic/tclStringObj.c')
-rw-r--r-- | generic/tclStringObj.c | 230 |
1 files changed, 137 insertions, 93 deletions
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 0f6eff7..dffa38c 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -140,9 +140,9 @@ typedef struct String { #define stringAttemptRealloc(ptr, numChars) \ (String *) attemptckrealloc((ptr), (unsigned) STRING_SIZE(numChars) ) #define GET_STRING(objPtr) \ - ((String *) (objPtr)->internalRep.otherValuePtr) + ((String *) (objPtr)->internalRep.twoPtrValue.ptr1) #define SET_STRING(objPtr, stringPtr) \ - ((objPtr)->internalRep.otherValuePtr = (void *) (stringPtr)) + ((objPtr)->internalRep.twoPtrValue.ptr1 = (void *) (stringPtr)) /* * TCL STRING GROWTH ALGORITHM @@ -152,8 +152,7 @@ typedef struct String { * * Attempt to allocate 2 * (originalLength + appendLength) * On failure: - * attempt to allocate originalLength + 2*appendLength + - * TCL_GROWTH_MIN_ALLOC + * attempt to allocate originalLength + 2*appendLength + TCL_MIN_GROWTH * * This algorithm allows very good performance, as it rapidly increases the * memory allocated for a given string, which minimizes the number of @@ -166,20 +165,20 @@ typedef struct String { * cover the request, but which hopefully will be less than the total * available memory. * - * The addition of TCL_GROWTH_MIN_ALLOC allows for efficient handling of very + * The addition of TCL_MIN_GROWTH allows for efficient handling of very * small appends. Without this extra slush factor, a sequence of several small * appends would cause several memory allocations. As long as - * TCL_GROWTH_MIN_ALLOC is a reasonable size, we can avoid that behavior. + * TCL_MIN_GROWTH is a reasonable size, we can avoid that behavior. * * The growth algorithm can be tuned by adjusting the following parameters: * - * TCL_GROWTH_MIN_ALLOC Additional space, in bytes, to allocate when + * TCL_MIN_GROWTH Additional space, in bytes, to allocate when * the double allocation has failed. Default is - * 1024 (1 kilobyte). + * 1024 (1 kilobyte). See tclInt.h. */ -#ifndef TCL_GROWTH_MIN_ALLOC -#define TCL_GROWTH_MIN_ALLOC 1024 +#ifndef TCL_MIN_UNICHAR_GROWTH +#define TCL_MIN_UNICHAR_GROWTH TCL_MIN_GROWTH/sizeof(Tcl_UniChar) #endif static void @@ -214,7 +213,7 @@ GrowStringBuffer( */ unsigned int limit = INT_MAX - needed; - unsigned int extra = needed - objPtr->length + TCL_GROWTH_MIN_ALLOC; + unsigned int extra = needed - objPtr->length + TCL_MIN_GROWTH; int growth = (int) ((extra > limit) ? limit : extra); attempt = needed + growth; @@ -265,7 +264,7 @@ GrowUnicodeBuffer( unsigned int limit = STRING_MAXCHARS - needed; unsigned int extra = needed - stringPtr->numChars - + TCL_GROWTH_MIN_ALLOC/sizeof(Tcl_UniChar); + + TCL_MIN_UNICHAR_GROWTH; int growth = (int) ((extra > limit) ? limit : extra); attempt = needed + growth; @@ -1282,23 +1281,43 @@ Tcl_AppendObjToObj( if ((TclIsPureByteArray(objPtr) || objPtr->bytes == tclEmptyStringRep) && TclIsPureByteArray(appendObjPtr)) { - unsigned char *bytesSrc; - int lengthSrc, lengthTotal; /* - * We do not assume that objPtr and appendObjPtr must be distinct! - * This makes this code a bit more complex than it otherwise would be, - * but in turn makes it much safer. + * You might expect the code here to be + * + * bytes = Tcl_GetByteArrayFromObj(appendObjPtr, &length); + * TclAppendBytesToByteArray(objPtr, bytes, length); + * + * and essentially all of the time that would be fine. However, + * it would run into trouble in the case where objPtr and + * appendObjPtr point to the same thing. That may never be a + * good idea. It seems to violate Copy On Write, and we don't + * have any tests for the situation, since making any Tcl commands + * that call Tcl_AppendObjToObj() do that appears impossible + * (They honor Copy On Write!). For the sake of extensions that + * go off into that realm, though, here's a more complex approach + * that can handle all the cases. */ + /* Get lengths */ + int lengthSrc; + (void) Tcl_GetByteArrayFromObj(objPtr, &length); (void) Tcl_GetByteArrayFromObj(appendObjPtr, &lengthSrc); - lengthTotal = length + lengthSrc; - if (((length > lengthSrc) ? length : lengthSrc) > lengthTotal) { - Tcl_Panic("max size for a Tcl value (%d bytes) exceeded", INT_MAX); - } - bytesSrc = Tcl_GetByteArrayFromObj(appendObjPtr, NULL); - TclAppendBytesToByteArray(objPtr, bytesSrc, lengthSrc); + + /* Grow buffer enough for the append */ + TclAppendBytesToByteArray(objPtr, NULL, lengthSrc); + + /* Reset objPtr back to the original value */ + Tcl_SetByteArrayLength(objPtr, length); + + /* + * Now do the append knowing that buffer growth cannot cause + * any trouble. + */ + + TclAppendBytesToByteArray(objPtr, + Tcl_GetByteArrayFromObj(appendObjPtr, NULL), lengthSrc); return; } @@ -1438,7 +1457,7 @@ AppendUnicodeToUnicodeRep( * trailing null. */ - memcpy(stringPtr->unicode + stringPtr->numChars, unicode, + memmove(stringPtr->unicode + stringPtr->numChars, unicode, appendNumChars * sizeof(Tcl_UniChar)); stringPtr->unicode[numChars] = 0; stringPtr->numChars = numChars; @@ -1606,7 +1625,7 @@ AppendUtfToUtfRep( stringPtr->numChars = -1; stringPtr->hasUnicode = 0; - memcpy(objPtr->bytes + oldLength, bytes, numBytes); + memmove(objPtr->bytes + oldLength, bytes, numBytes); objPtr->bytes[newLength] = 0; objPtr->length = newLength; } @@ -2654,99 +2673,124 @@ Tcl_ObjPrintf( *--------------------------------------------------------------------------- */ +static void +ReverseBytes( + unsigned char *to, /* Copy bytes into here... */ + unsigned char *from, /* ...from here... */ + int count) /* Until this many are copied, */ + /* reversing as you go. */ +{ + unsigned char *src = from + count; + if (to == from) { + /* Reversing in place */ + while (--src > to) { + unsigned char c = *src; + *src = *to; + *to++ = c; + } + } else { + while (--src >= from) { + *to++ = *src; + } + } +} + Tcl_Obj * TclStringObjReverse( Tcl_Obj *objPtr) { String *stringPtr; - char *src = NULL, *dest = NULL; - Tcl_UniChar *usrc = NULL, *udest = NULL; - Tcl_Obj *resultPtr = NULL; + Tcl_UniChar ch; + + if (TclIsPureByteArray(objPtr)) { + int numBytes; + unsigned char *from = Tcl_GetByteArrayFromObj(objPtr, &numBytes); + + if (Tcl_IsShared(objPtr)) { + objPtr = Tcl_NewByteArrayObj(NULL, numBytes); + } + ReverseBytes(Tcl_GetByteArrayFromObj(objPtr, NULL), from, numBytes); + return objPtr; + } SetStringFromAny(NULL, objPtr); stringPtr = GET_STRING(objPtr); - if (stringPtr->hasUnicode == 0) { - if (stringPtr->numChars == -1) { - TclNumUtfChars(stringPtr->numChars, objPtr->bytes, objPtr->length); - } - if (stringPtr->numChars <= 1) { - return objPtr; - } - if (stringPtr->numChars == objPtr->length) { - /* - * All one-byte chars. Reverse in objPtr->bytes. - */ + if (stringPtr->hasUnicode) { + Tcl_UniChar *from = Tcl_GetUnicode(objPtr); + Tcl_UniChar *src = from + stringPtr->numChars; - if (Tcl_IsShared(objPtr)) { - resultPtr = Tcl_NewObj(); - Tcl_SetObjLength(resultPtr, objPtr->length); - dest = TclGetString(resultPtr); - src = objPtr->bytes + objPtr->length - 1; - while (src >= objPtr->bytes) { - *dest++ = *src--; - } - return resultPtr; - } + if (Tcl_IsShared(objPtr)) { + Tcl_UniChar *to; /* - * Unshared. Reverse objPtr->bytes in place. + * Create a non-empty, pure unicode value, so we can coax + * Tcl_SetObjLength into growing the unicode rep buffer. */ - dest = objPtr->bytes; - src = dest + objPtr->length - 1; - while (dest < src) { - char tmp = *src; - - *src-- = *dest; - *dest++ = tmp; + ch = 0; + objPtr = Tcl_NewUnicodeObj(&ch, 1); + Tcl_SetObjLength(objPtr, stringPtr->numChars); + to = Tcl_GetUnicode(objPtr); + while (--src >= from) { + *to++ = *src; + } + } else { + /* Reversing in place */ + while (--src > from) { + ch = *src; + *src = *from; + *from++ = ch; } - return objPtr; } - FillUnicodeRep(objPtr); - stringPtr = GET_STRING(objPtr); - } - if (stringPtr->numChars <= 1) { - return objPtr; } - /* - * Reverse the Unicode rep. - */ - - if (Tcl_IsShared(objPtr)) { - Tcl_UniChar ch = 0; + if (objPtr->bytes) { + int numChars = stringPtr->numChars; + int numBytes = objPtr->length; + char *to, *from = objPtr->bytes; - /* - * Create a non-empty, pure unicode value, so we can coax - * Tcl_SetObjLength into growing the unicode rep buffer. - */ - - resultPtr = Tcl_NewUnicodeObj(&ch, 1); - Tcl_SetObjLength(resultPtr, stringPtr->numChars); - udest = Tcl_GetUnicode(resultPtr); - usrc = stringPtr->unicode + stringPtr->numChars - 1; - while (usrc >= stringPtr->unicode) { - *udest++ = *usrc--; + if (Tcl_IsShared(objPtr)) { + objPtr = Tcl_NewObj(); + Tcl_SetObjLength(objPtr, numBytes); } - return resultPtr; - } + to = objPtr->bytes; - /* - * Unshared. Reverse objPtr->bytes in place. - */ + if (numChars < numBytes) { + /* + * Either numChars == -1 and we don't know how many chars are + * represented by objPtr->bytes and we need Pass 1 just in case, + * or numChars >= 0 and we know we have fewer chars than bytes, + * so we know there's a multibyte character needing Pass 1. + * + * Pass 1. Reverse the bytes of each multi-byte character. + */ + int charCount = 0; + int bytesLeft = numBytes; - udest = stringPtr->unicode; - usrc = udest + stringPtr->numChars - 1; - while (udest < usrc) { - Tcl_UniChar tmp = *usrc; + while (bytesLeft) { + /* + * NOTE: We know that the from buffer is NUL-terminated. + * It's part of the contract for objPtr->bytes values. + * Thus, we can skip calling Tcl_UtfCharComplete() here. + */ + int bytesInChar = Tcl_UtfToUniChar(from, &ch); + + ReverseBytes((unsigned char *)to, (unsigned char *)from, + bytesInChar); + to += bytesInChar; + from += bytesInChar; + bytesLeft -= bytesInChar; + charCount++; + } - *usrc-- = *udest; - *udest++ = tmp; + from = to = objPtr->bytes; + stringPtr->numChars = charCount; + } + /* Pass 2. Reverse all the bytes. */ + ReverseBytes((unsigned char *)to, (unsigned char *)from, numBytes); } - TclInvalidateStringRep(objPtr); - stringPtr->allocated = 0; return objPtr; } |