diff options
Diffstat (limited to 'generic/tclStringObj.c')
-rw-r--r-- | generic/tclStringObj.c | 142 |
1 files changed, 6 insertions, 136 deletions
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 8d70d20..b480735 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -36,15 +36,7 @@ #include "tclInt.h" #include "tommath.h" - -/* - * Set COMPAT to 1 to restore the shimmering patterns to those of Tcl 8.5. - * This is an escape hatch in case the changes have some unexpected unwelcome - * impact on performance. If things go well, this mechanism can go away when - * post-8.6 development begins. - */ - -#define COMPAT 0 +#include "tclStringRep.h" /* * Prototypes for functions defined later in this file: @@ -89,60 +81,6 @@ const Tcl_ObjType tclStringType = { UpdateStringOfString, /* updateStringProc */ SetStringFromAny /* setFromAnyProc */ }; - -/* - * The following structure is the internal rep for a String object. It keeps - * track of how much memory has been used and how much has been allocated for - * the Unicode and UTF string to enable growing and shrinking of the UTF and - * Unicode reps of the String object with fewer mallocs. To optimize string - * length and indexing operations, this structure also stores the number of - * characters (same of UTF and Unicode!) once that value has been computed. - * - * Under normal configurations, what Tcl calls "Unicode" is actually UTF-16 - * restricted to the Basic Multilingual Plane (i.e. U+00000 to U+0FFFF). This - * can be officially modified by altering the definition of Tcl_UniChar in - * tcl.h, but do not do that unless you are sure what you're doing! - */ - -typedef struct String { - int numChars; /* The number of chars in the string. -1 means - * this value has not been calculated. >= 0 - * means that there is a valid Unicode rep, or - * that the number of UTF bytes == the number - * of chars. */ - int allocated; /* The amount of space actually allocated for - * the UTF string (minus 1 byte for the - * termination char). */ - int maxChars; /* Max number of chars that can fit in the - * space allocated for the unicode array. */ - int hasUnicode; /* Boolean determining whether the string has - * a Unicode representation. */ - Tcl_UniChar unicode[1]; /* The array of Unicode chars. The actual size - * of this field depends on the 'maxChars' - * field above. */ -} String; - -#define STRING_MAXCHARS \ - (int)(((size_t)UINT_MAX - sizeof(String))/sizeof(Tcl_UniChar)) -#define STRING_SIZE(numChars) \ - (sizeof(String) + ((numChars) * sizeof(Tcl_UniChar))) -#define stringCheckLimits(numChars) \ - if ((numChars) < 0 || (numChars) > STRING_MAXCHARS) { \ - Tcl_Panic("max length for a Tcl unicode value (%d chars) exceeded", \ - STRING_MAXCHARS); \ - } -#define stringAttemptAlloc(numChars) \ - (String *) attemptckalloc((unsigned) STRING_SIZE(numChars) ) -#define stringAlloc(numChars) \ - (String *) ckalloc((unsigned) STRING_SIZE(numChars) ) -#define stringRealloc(ptr, numChars) \ - (String *) ckrealloc((ptr), (unsigned) STRING_SIZE(numChars) ) -#define stringAttemptRealloc(ptr, numChars) \ - (String *) attemptckrealloc((ptr), (unsigned) STRING_SIZE(numChars) ) -#define GET_STRING(objPtr) \ - ((String *) (objPtr)->internalRep.twoPtrValue.ptr1) -#define SET_STRING(objPtr, stringPtr) \ - ((objPtr)->internalRep.twoPtrValue.ptr1 = (void *) (stringPtr)) /* * TCL STRING GROWTH ALGORITHM @@ -498,18 +436,6 @@ Tcl_GetCharLength( if (numChars == -1) { TclNumUtfChars(numChars, objPtr->bytes, objPtr->length); stringPtr->numChars = numChars; - -#if COMPAT - if (numChars < objPtr->length) { - /* - * Since we've just computed the number of chars, and not all UTF - * chars are 1-byte long, go ahead and populate the unicode - * string. - */ - - FillUnicodeRep(objPtr); - } -#endif } return numChars; } @@ -1226,11 +1152,7 @@ Tcl_AppendUnicodeToObj( * objPtr's string rep. */ - if (stringPtr->hasUnicode -#if COMPAT - && stringPtr->numChars > 0 -#endif - ) { + if (stringPtr->hasUnicode) { AppendUnicodeToUnicodeRep(objPtr, unicode, length); } else { AppendUnicodeToUtfRep(objPtr, unicode, length); @@ -1334,11 +1256,7 @@ Tcl_AppendObjToObj( * appendObjPtr and append it. */ - if (stringPtr->hasUnicode -#if COMPAT - && stringPtr->numChars > 0 -#endif - ) { + if (stringPtr->hasUnicode) { /* * If appendObjPtr is not of the "String" type, don't convert it. */ @@ -1371,11 +1289,7 @@ Tcl_AppendObjToObj( AppendUtfToUtfRep(objPtr, bytes, length); - if (numChars >= 0 && appendNumChars >= 0 -#if COMPAT - && appendNumChars == length -#endif - ) { + if (numChars >= 0 && appendNumChars >= 0) { stringPtr->numChars = numChars + appendNumChars; } } @@ -1499,14 +1413,6 @@ AppendUnicodeToUtfRep( if (stringPtr->numChars != -1) { stringPtr->numChars += numChars; } - -#if COMPAT - /* - * Invalidate the unicode rep. - */ - - stringPtr->hasUnicode = 0; -#endif } /* @@ -2924,7 +2830,6 @@ DupStringInternalRep( String *srcStringPtr = GET_STRING(srcPtr); String *copyStringPtr = NULL; -#if COMPAT==0 if (srcStringPtr->numChars == -1) { /* * The String struct in the source value holds zero useful data. Don't @@ -2967,41 +2872,6 @@ DupStringInternalRep( */ copyStringPtr->allocated = copyPtr->bytes ? copyPtr->length : 0; -#else /* COMPAT!=0 */ - /* - * If the src obj is a string of 1-byte Utf chars, then copy the string - * rep of the source object and create an "empty" Unicode internal rep for - * the new object. Otherwise, copy Unicode internal rep, and invalidate - * the string rep of the new object. - */ - - if (srcStringPtr->hasUnicode && srcStringPtr->numChars > 0) { - /* - * Copy the full allocation for the Unicode buffer. - */ - - copyStringPtr = stringAlloc(srcStringPtr->maxChars); - copyStringPtr->maxChars = srcStringPtr->maxChars; - memcpy(copyStringPtr->unicode, srcStringPtr->unicode, - srcStringPtr->numChars * sizeof(Tcl_UniChar)); - copyStringPtr->unicode[srcStringPtr->numChars] = 0; - copyStringPtr->allocated = 0; - } else { - copyStringPtr = stringAlloc(0); - copyStringPtr->unicode[0] = 0; - copyStringPtr->maxChars = 0; - - /* - * Tricky point: the string value was copied by generic object - * management code, so it doesn't contain any extra bytes that might - * exist in the source object. - */ - - copyStringPtr->allocated = copyPtr->length; - } - copyStringPtr->numChars = srcStringPtr->numChars; - copyStringPtr->hasUnicode = srcStringPtr->hasUnicode; -#endif /* COMPAT==0 */ SET_STRING(copyPtr, copyStringPtr); copyPtr->typePtr = &tclStringType; @@ -3097,7 +2967,7 @@ ExtendStringRepWithUnicode( */ int i, origLength, size = 0; - char *dst, buf[TCL_UTF_MAX]; + char *dst; String *stringPtr = GET_STRING(objPtr); if (numChars < 0) { @@ -3123,7 +2993,7 @@ ExtendStringRepWithUnicode( } for (i = 0; i < numChars && size >= 0; i++) { - size += Tcl_UniCharToUtf((int) unicode[i], buf); + size += TclUtfCount(unicode[i]); } if (size < 0) { Tcl_Panic("max size for a Tcl value (%d bytes) exceeded", INT_MAX); |