summaryrefslogtreecommitdiffstats
path: root/generic/tclStringObj.c
diff options
context:
space:
mode:
Diffstat (limited to 'generic/tclStringObj.c')
-rw-r--r--generic/tclStringObj.c168
1 files changed, 88 insertions, 80 deletions
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index e495c2e..11a57e9 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -36,6 +36,7 @@
#include "tclInt.h"
#include "tommath.h"
+#include "tclStringRep.h"
/*
* Set COMPAT to 1 to restore the shimmering patterns to those of Tcl 8.5.
@@ -89,60 +90,6 @@ const Tcl_ObjType tclStringType = {
UpdateStringOfString, /* updateStringProc */
SetStringFromAny /* setFromAnyProc */
};
-
-/*
- * The following structure is the internal rep for a String object. It keeps
- * track of how much memory has been used and how much has been allocated for
- * the Unicode and UTF string to enable growing and shrinking of the UTF and
- * Unicode reps of the String object with fewer mallocs. To optimize string
- * length and indexing operations, this structure also stores the number of
- * characters (same of UTF and Unicode!) once that value has been computed.
- *
- * Under normal configurations, what Tcl calls "Unicode" is actually UTF-16
- * restricted to the Basic Multilingual Plane (i.e. U+00000 to U+0FFFF). This
- * can be officially modified by altering the definition of Tcl_UniChar in
- * tcl.h, but do not do that unless you are sure what you're doing!
- */
-
-typedef struct String {
- int numChars; /* The number of chars in the string. -1 means
- * this value has not been calculated. >= 0
- * means that there is a valid Unicode rep, or
- * that the number of UTF bytes == the number
- * of chars. */
- int allocated; /* The amount of space actually allocated for
- * the UTF string (minus 1 byte for the
- * termination char). */
- int maxChars; /* Max number of chars that can fit in the
- * space allocated for the unicode array. */
- int hasUnicode; /* Boolean determining whether the string has
- * a Unicode representation. */
- Tcl_UniChar unicode[1]; /* The array of Unicode chars. The actual size
- * of this field depends on the 'maxChars'
- * field above. */
-} String;
-
-#define STRING_MAXCHARS \
- (int)(((size_t)UINT_MAX - sizeof(String))/sizeof(Tcl_UniChar))
-#define STRING_SIZE(numChars) \
- (sizeof(String) + ((numChars) * sizeof(Tcl_UniChar)))
-#define stringCheckLimits(numChars) \
- if ((numChars) < 0 || (numChars) > STRING_MAXCHARS) { \
- Tcl_Panic("max length for a Tcl unicode value (%d chars) exceeded", \
- STRING_MAXCHARS); \
- }
-#define stringAttemptAlloc(numChars) \
- (String *) attemptckalloc((unsigned) STRING_SIZE(numChars) )
-#define stringAlloc(numChars) \
- (String *) ckalloc((unsigned) STRING_SIZE(numChars) )
-#define stringRealloc(ptr, numChars) \
- (String *) ckrealloc((ptr), (unsigned) STRING_SIZE(numChars) )
-#define stringAttemptRealloc(ptr, numChars) \
- (String *) attemptckrealloc((ptr), (unsigned) STRING_SIZE(numChars) )
-#define GET_STRING(objPtr) \
- ((String *) (objPtr)->internalRep.twoPtrValue.ptr1)
-#define SET_STRING(objPtr, stringPtr) \
- ((objPtr)->internalRep.twoPtrValue.ptr1 = (void *) (stringPtr))
/*
* TCL STRING GROWTH ALGORITHM
@@ -188,7 +135,7 @@ GrowStringBuffer(
int flag)
{
/*
- * Pre-conditions:
+ * Pre-conditions:
* objPtr->typePtr == &tclStringType
* needed > stringPtr->allocated
* flag || objPtr->bytes != NULL
@@ -238,7 +185,7 @@ GrowUnicodeBuffer(
int needed)
{
/*
- * Pre-conditions:
+ * Pre-conditions:
* objPtr->typePtr == &tclStringType
* needed > stringPtr->maxChars
* needed < STRING_MAXCHARS
@@ -1123,7 +1070,8 @@ Tcl_AppendLimitedToObj(
if (ellipsis == NULL) {
ellipsis = "...";
}
- toCopy = Tcl_UtfPrev(bytes+limit+1-strlen(ellipsis), bytes) - bytes;
+ toCopy = (bytes == NULL) ? limit
+ : Tcl_UtfPrev(bytes+limit+1-strlen(ellipsis), bytes) - bytes;
}
/*
@@ -1281,23 +1229,43 @@ Tcl_AppendObjToObj(
if ((TclIsPureByteArray(objPtr) || objPtr->bytes == tclEmptyStringRep)
&& TclIsPureByteArray(appendObjPtr)) {
- unsigned char *bytesSrc;
- int lengthSrc, lengthTotal;
/*
- * We do not assume that objPtr and appendObjPtr must be distinct!
- * This makes this code a bit more complex than it otherwise would be,
- * but in turn makes it much safer.
+ * You might expect the code here to be
+ *
+ * bytes = Tcl_GetByteArrayFromObj(appendObjPtr, &length);
+ * TclAppendBytesToByteArray(objPtr, bytes, length);
+ *
+ * and essentially all of the time that would be fine. However,
+ * it would run into trouble in the case where objPtr and
+ * appendObjPtr point to the same thing. That may never be a
+ * good idea. It seems to violate Copy On Write, and we don't
+ * have any tests for the situation, since making any Tcl commands
+ * that call Tcl_AppendObjToObj() do that appears impossible
+ * (They honor Copy On Write!). For the sake of extensions that
+ * go off into that realm, though, here's a more complex approach
+ * that can handle all the cases.
*/
+ /* Get lengths */
+ int lengthSrc;
+
(void) Tcl_GetByteArrayFromObj(objPtr, &length);
(void) Tcl_GetByteArrayFromObj(appendObjPtr, &lengthSrc);
- lengthTotal = length + lengthSrc;
- if (((length > lengthSrc) ? length : lengthSrc) > lengthTotal) {
- Tcl_Panic("max size for a Tcl value (%d bytes) exceeded", INT_MAX);
- }
- bytesSrc = Tcl_GetByteArrayFromObj(appendObjPtr, NULL);
- TclAppendBytesToByteArray(objPtr, bytesSrc, lengthSrc);
+
+ /* Grow buffer enough for the append */
+ TclAppendBytesToByteArray(objPtr, NULL, lengthSrc);
+
+ /* Reset objPtr back to the original value */
+ Tcl_SetByteArrayLength(objPtr, length);
+
+ /*
+ * Now do the append knowing that buffer growth cannot cause
+ * any trouble.
+ */
+
+ TclAppendBytesToByteArray(objPtr,
+ Tcl_GetByteArrayFromObj(appendObjPtr, NULL), lengthSrc);
return;
}
@@ -1313,7 +1281,7 @@ Tcl_AppendObjToObj(
* appendObjPtr and append it.
*/
- if (stringPtr->hasUnicode
+ if (stringPtr->hasUnicode
#if COMPAT
&& stringPtr->numChars > 0
#endif
@@ -1415,7 +1383,7 @@ AppendUnicodeToUnicodeRep(
* the reallocs below.
*/
- if (unicode >= stringPtr->unicode
+ if (unicode && unicode >= stringPtr->unicode
&& unicode <= stringPtr->unicode + stringPtr->maxChars) {
offset = unicode - stringPtr->unicode;
}
@@ -1437,8 +1405,10 @@ AppendUnicodeToUnicodeRep(
* trailing null.
*/
- memmove(stringPtr->unicode + stringPtr->numChars, unicode,
- appendNumChars * sizeof(Tcl_UniChar));
+ if (unicode) {
+ memmove(stringPtr->unicode + stringPtr->numChars, unicode,
+ appendNumChars * sizeof(Tcl_UniChar));
+ }
stringPtr->unicode[numChars] = 0;
stringPtr->numChars = numChars;
stringPtr->allocated = 0;
@@ -1577,7 +1547,7 @@ AppendUtfToUtfRep(
* the reallocs below.
*/
- if (bytes >= objPtr->bytes
+ if (bytes && bytes >= objPtr->bytes
&& bytes <= objPtr->bytes + objPtr->length) {
offset = bytes - objPtr->bytes;
}
@@ -1605,7 +1575,9 @@ AppendUtfToUtfRep(
stringPtr->numChars = -1;
stringPtr->hasUnicode = 0;
- memmove(objPtr->bytes + oldLength, bytes, numBytes);
+ if (bytes) {
+ memmove(objPtr->bytes + oldLength, bytes, numBytes);
+ }
objPtr->bytes[newLength] = 0;
objPtr->length = newLength;
}
@@ -2289,7 +2261,7 @@ Tcl_AppendFormatToObj(
p += sprintf(p, "%d", width);
if (width > length) {
length = width;
- }
+ }
}
if (gotPrecision) {
*p++ = '.';
@@ -2638,6 +2610,38 @@ Tcl_ObjPrintf(
/*
*---------------------------------------------------------------------------
*
+ * TclGetStringStorage --
+ *
+ * Returns the string storage space of a Tcl_Obj.
+ *
+ * Results:
+ * The pointer value objPtr->bytes is returned and the number of bytes
+ * allocated there is written to *sizePtr (if known).
+ *
+ * Side effects:
+ * May set objPtr->bytes.
+ *
+ *---------------------------------------------------------------------------
+ */
+
+char *
+TclGetStringStorage(
+ Tcl_Obj *objPtr,
+ unsigned int *sizePtr)
+{
+ String *stringPtr;
+
+ if (objPtr->typePtr != &tclStringType || objPtr->bytes == NULL) {
+ return TclGetStringFromObj(objPtr, (int *)sizePtr);
+ }
+
+ stringPtr = GET_STRING(objPtr);
+ *sizePtr = stringPtr->allocated;
+ return objPtr->bytes;
+}
+/*
+ *---------------------------------------------------------------------------
+ *
* TclStringObjReverse --
*
* Implements the [string reverse] operation.
@@ -2821,14 +2825,18 @@ ExtendUnicodeRepWithString(
}
needed = numOrigChars + numAppendChars;
stringCheckLimits(needed);
-
+
if (needed > stringPtr->maxChars) {
GrowUnicodeBuffer(objPtr, needed);
stringPtr = GET_STRING(objPtr);
}
stringPtr->hasUnicode = 1;
- stringPtr->numChars = needed;
+ if (bytes) {
+ stringPtr->numChars = needed;
+ } else {
+ numAppendChars = 0;
+ }
for (dst=stringPtr->unicode + numOrigChars; numAppendChars-- > 0; dst++) {
bytes += TclUtfToUniChar(bytes, dst);
}
@@ -3035,7 +3043,7 @@ ExtendStringRepWithUnicode(
* Pre-condition: this is the "string" Tcl_ObjType.
*/
- int i, origLength, size = 0;
+ int i, origLength, size = 0;
char *dst, buf[TCL_UTF_MAX];
String *stringPtr = GET_STRING(objPtr);
@@ -3051,12 +3059,12 @@ ExtendStringRepWithUnicode(
objPtr->length = 0;
}
size = origLength = objPtr->length;
-
+
/*
* Quick cheap check in case we have more than enough room.
*/
- if (numChars <= (INT_MAX - size)/TCL_UTF_MAX
+ if (numChars <= (INT_MAX - size)/TCL_UTF_MAX
&& stringPtr->allocated >= size + numChars * TCL_UTF_MAX) {
goto copyBytes;
}