summaryrefslogtreecommitdiffstats
path: root/generic/tclStringObj.c
diff options
context:
space:
mode:
Diffstat (limited to 'generic/tclStringObj.c')
-rw-r--r--generic/tclStringObj.c230
1 files changed, 137 insertions, 93 deletions
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index 0f6eff7..dffa38c 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -140,9 +140,9 @@ typedef struct String {
#define stringAttemptRealloc(ptr, numChars) \
(String *) attemptckrealloc((ptr), (unsigned) STRING_SIZE(numChars) )
#define GET_STRING(objPtr) \
- ((String *) (objPtr)->internalRep.otherValuePtr)
+ ((String *) (objPtr)->internalRep.twoPtrValue.ptr1)
#define SET_STRING(objPtr, stringPtr) \
- ((objPtr)->internalRep.otherValuePtr = (void *) (stringPtr))
+ ((objPtr)->internalRep.twoPtrValue.ptr1 = (void *) (stringPtr))
/*
* TCL STRING GROWTH ALGORITHM
@@ -152,8 +152,7 @@ typedef struct String {
*
* Attempt to allocate 2 * (originalLength + appendLength)
* On failure:
- * attempt to allocate originalLength + 2*appendLength +
- * TCL_GROWTH_MIN_ALLOC
+ * attempt to allocate originalLength + 2*appendLength + TCL_MIN_GROWTH
*
* This algorithm allows very good performance, as it rapidly increases the
* memory allocated for a given string, which minimizes the number of
@@ -166,20 +165,20 @@ typedef struct String {
* cover the request, but which hopefully will be less than the total
* available memory.
*
- * The addition of TCL_GROWTH_MIN_ALLOC allows for efficient handling of very
+ * The addition of TCL_MIN_GROWTH allows for efficient handling of very
* small appends. Without this extra slush factor, a sequence of several small
* appends would cause several memory allocations. As long as
- * TCL_GROWTH_MIN_ALLOC is a reasonable size, we can avoid that behavior.
+ * TCL_MIN_GROWTH is a reasonable size, we can avoid that behavior.
*
* The growth algorithm can be tuned by adjusting the following parameters:
*
- * TCL_GROWTH_MIN_ALLOC Additional space, in bytes, to allocate when
+ * TCL_MIN_GROWTH Additional space, in bytes, to allocate when
* the double allocation has failed. Default is
- * 1024 (1 kilobyte).
+ * 1024 (1 kilobyte). See tclInt.h.
*/
-#ifndef TCL_GROWTH_MIN_ALLOC
-#define TCL_GROWTH_MIN_ALLOC 1024
+#ifndef TCL_MIN_UNICHAR_GROWTH
+#define TCL_MIN_UNICHAR_GROWTH TCL_MIN_GROWTH/sizeof(Tcl_UniChar)
#endif
static void
@@ -214,7 +213,7 @@ GrowStringBuffer(
*/
unsigned int limit = INT_MAX - needed;
- unsigned int extra = needed - objPtr->length + TCL_GROWTH_MIN_ALLOC;
+ unsigned int extra = needed - objPtr->length + TCL_MIN_GROWTH;
int growth = (int) ((extra > limit) ? limit : extra);
attempt = needed + growth;
@@ -265,7 +264,7 @@ GrowUnicodeBuffer(
unsigned int limit = STRING_MAXCHARS - needed;
unsigned int extra = needed - stringPtr->numChars
- + TCL_GROWTH_MIN_ALLOC/sizeof(Tcl_UniChar);
+ + TCL_MIN_UNICHAR_GROWTH;
int growth = (int) ((extra > limit) ? limit : extra);
attempt = needed + growth;
@@ -1282,23 +1281,43 @@ Tcl_AppendObjToObj(
if ((TclIsPureByteArray(objPtr) || objPtr->bytes == tclEmptyStringRep)
&& TclIsPureByteArray(appendObjPtr)) {
- unsigned char *bytesSrc;
- int lengthSrc, lengthTotal;
/*
- * We do not assume that objPtr and appendObjPtr must be distinct!
- * This makes this code a bit more complex than it otherwise would be,
- * but in turn makes it much safer.
+ * You might expect the code here to be
+ *
+ * bytes = Tcl_GetByteArrayFromObj(appendObjPtr, &length);
+ * TclAppendBytesToByteArray(objPtr, bytes, length);
+ *
+ * and essentially all of the time that would be fine. However,
+ * it would run into trouble in the case where objPtr and
+ * appendObjPtr point to the same thing. That may never be a
+ * good idea. It seems to violate Copy On Write, and we don't
+ * have any tests for the situation, since making any Tcl commands
+ * that call Tcl_AppendObjToObj() do that appears impossible
+ * (They honor Copy On Write!). For the sake of extensions that
+ * go off into that realm, though, here's a more complex approach
+ * that can handle all the cases.
*/
+ /* Get lengths */
+ int lengthSrc;
+
(void) Tcl_GetByteArrayFromObj(objPtr, &length);
(void) Tcl_GetByteArrayFromObj(appendObjPtr, &lengthSrc);
- lengthTotal = length + lengthSrc;
- if (((length > lengthSrc) ? length : lengthSrc) > lengthTotal) {
- Tcl_Panic("max size for a Tcl value (%d bytes) exceeded", INT_MAX);
- }
- bytesSrc = Tcl_GetByteArrayFromObj(appendObjPtr, NULL);
- TclAppendBytesToByteArray(objPtr, bytesSrc, lengthSrc);
+
+ /* Grow buffer enough for the append */
+ TclAppendBytesToByteArray(objPtr, NULL, lengthSrc);
+
+ /* Reset objPtr back to the original value */
+ Tcl_SetByteArrayLength(objPtr, length);
+
+ /*
+ * Now do the append knowing that buffer growth cannot cause
+ * any trouble.
+ */
+
+ TclAppendBytesToByteArray(objPtr,
+ Tcl_GetByteArrayFromObj(appendObjPtr, NULL), lengthSrc);
return;
}
@@ -1438,7 +1457,7 @@ AppendUnicodeToUnicodeRep(
* trailing null.
*/
- memcpy(stringPtr->unicode + stringPtr->numChars, unicode,
+ memmove(stringPtr->unicode + stringPtr->numChars, unicode,
appendNumChars * sizeof(Tcl_UniChar));
stringPtr->unicode[numChars] = 0;
stringPtr->numChars = numChars;
@@ -1606,7 +1625,7 @@ AppendUtfToUtfRep(
stringPtr->numChars = -1;
stringPtr->hasUnicode = 0;
- memcpy(objPtr->bytes + oldLength, bytes, numBytes);
+ memmove(objPtr->bytes + oldLength, bytes, numBytes);
objPtr->bytes[newLength] = 0;
objPtr->length = newLength;
}
@@ -2654,99 +2673,124 @@ Tcl_ObjPrintf(
*---------------------------------------------------------------------------
*/
+static void
+ReverseBytes(
+ unsigned char *to, /* Copy bytes into here... */
+ unsigned char *from, /* ...from here... */
+ int count) /* Until this many are copied, */
+ /* reversing as you go. */
+{
+ unsigned char *src = from + count;
+ if (to == from) {
+ /* Reversing in place */
+ while (--src > to) {
+ unsigned char c = *src;
+ *src = *to;
+ *to++ = c;
+ }
+ } else {
+ while (--src >= from) {
+ *to++ = *src;
+ }
+ }
+}
+
Tcl_Obj *
TclStringObjReverse(
Tcl_Obj *objPtr)
{
String *stringPtr;
- char *src = NULL, *dest = NULL;
- Tcl_UniChar *usrc = NULL, *udest = NULL;
- Tcl_Obj *resultPtr = NULL;
+ Tcl_UniChar ch;
+
+ if (TclIsPureByteArray(objPtr)) {
+ int numBytes;
+ unsigned char *from = Tcl_GetByteArrayFromObj(objPtr, &numBytes);
+
+ if (Tcl_IsShared(objPtr)) {
+ objPtr = Tcl_NewByteArrayObj(NULL, numBytes);
+ }
+ ReverseBytes(Tcl_GetByteArrayFromObj(objPtr, NULL), from, numBytes);
+ return objPtr;
+ }
SetStringFromAny(NULL, objPtr);
stringPtr = GET_STRING(objPtr);
- if (stringPtr->hasUnicode == 0) {
- if (stringPtr->numChars == -1) {
- TclNumUtfChars(stringPtr->numChars, objPtr->bytes, objPtr->length);
- }
- if (stringPtr->numChars <= 1) {
- return objPtr;
- }
- if (stringPtr->numChars == objPtr->length) {
- /*
- * All one-byte chars. Reverse in objPtr->bytes.
- */
+ if (stringPtr->hasUnicode) {
+ Tcl_UniChar *from = Tcl_GetUnicode(objPtr);
+ Tcl_UniChar *src = from + stringPtr->numChars;
- if (Tcl_IsShared(objPtr)) {
- resultPtr = Tcl_NewObj();
- Tcl_SetObjLength(resultPtr, objPtr->length);
- dest = TclGetString(resultPtr);
- src = objPtr->bytes + objPtr->length - 1;
- while (src >= objPtr->bytes) {
- *dest++ = *src--;
- }
- return resultPtr;
- }
+ if (Tcl_IsShared(objPtr)) {
+ Tcl_UniChar *to;
/*
- * Unshared. Reverse objPtr->bytes in place.
+ * Create a non-empty, pure unicode value, so we can coax
+ * Tcl_SetObjLength into growing the unicode rep buffer.
*/
- dest = objPtr->bytes;
- src = dest + objPtr->length - 1;
- while (dest < src) {
- char tmp = *src;
-
- *src-- = *dest;
- *dest++ = tmp;
+ ch = 0;
+ objPtr = Tcl_NewUnicodeObj(&ch, 1);
+ Tcl_SetObjLength(objPtr, stringPtr->numChars);
+ to = Tcl_GetUnicode(objPtr);
+ while (--src >= from) {
+ *to++ = *src;
+ }
+ } else {
+ /* Reversing in place */
+ while (--src > from) {
+ ch = *src;
+ *src = *from;
+ *from++ = ch;
}
- return objPtr;
}
- FillUnicodeRep(objPtr);
- stringPtr = GET_STRING(objPtr);
- }
- if (stringPtr->numChars <= 1) {
- return objPtr;
}
- /*
- * Reverse the Unicode rep.
- */
-
- if (Tcl_IsShared(objPtr)) {
- Tcl_UniChar ch = 0;
+ if (objPtr->bytes) {
+ int numChars = stringPtr->numChars;
+ int numBytes = objPtr->length;
+ char *to, *from = objPtr->bytes;
- /*
- * Create a non-empty, pure unicode value, so we can coax
- * Tcl_SetObjLength into growing the unicode rep buffer.
- */
-
- resultPtr = Tcl_NewUnicodeObj(&ch, 1);
- Tcl_SetObjLength(resultPtr, stringPtr->numChars);
- udest = Tcl_GetUnicode(resultPtr);
- usrc = stringPtr->unicode + stringPtr->numChars - 1;
- while (usrc >= stringPtr->unicode) {
- *udest++ = *usrc--;
+ if (Tcl_IsShared(objPtr)) {
+ objPtr = Tcl_NewObj();
+ Tcl_SetObjLength(objPtr, numBytes);
}
- return resultPtr;
- }
+ to = objPtr->bytes;
- /*
- * Unshared. Reverse objPtr->bytes in place.
- */
+ if (numChars < numBytes) {
+ /*
+ * Either numChars == -1 and we don't know how many chars are
+ * represented by objPtr->bytes and we need Pass 1 just in case,
+ * or numChars >= 0 and we know we have fewer chars than bytes,
+ * so we know there's a multibyte character needing Pass 1.
+ *
+ * Pass 1. Reverse the bytes of each multi-byte character.
+ */
+ int charCount = 0;
+ int bytesLeft = numBytes;
- udest = stringPtr->unicode;
- usrc = udest + stringPtr->numChars - 1;
- while (udest < usrc) {
- Tcl_UniChar tmp = *usrc;
+ while (bytesLeft) {
+ /*
+ * NOTE: We know that the from buffer is NUL-terminated.
+ * It's part of the contract for objPtr->bytes values.
+ * Thus, we can skip calling Tcl_UtfCharComplete() here.
+ */
+ int bytesInChar = Tcl_UtfToUniChar(from, &ch);
+
+ ReverseBytes((unsigned char *)to, (unsigned char *)from,
+ bytesInChar);
+ to += bytesInChar;
+ from += bytesInChar;
+ bytesLeft -= bytesInChar;
+ charCount++;
+ }
- *usrc-- = *udest;
- *udest++ = tmp;
+ from = to = objPtr->bytes;
+ stringPtr->numChars = charCount;
+ }
+ /* Pass 2. Reverse all the bytes. */
+ ReverseBytes((unsigned char *)to, (unsigned char *)from, numBytes);
}
- TclInvalidateStringRep(objPtr);
- stringPtr->allocated = 0;
return objPtr;
}