diff options
Diffstat (limited to 'generic/tclStringObj.c')
| -rw-r--r-- | generic/tclStringObj.c | 3831 |
1 files changed, 1027 insertions, 2804 deletions
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 5ec026f..86f0c62 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -1,41 +1,42 @@ /* * tclStringObj.c -- * - * This file contains functions that implement string operations on Tcl - * objects. Some string operations work with UTF-8 encoding forms. - * Functions that require knowledge of the width of each character, - * such as indexing, operate on fixed width encoding forms such as UTF-32. - * - * Conceptually, a string is a sequence of Unicode code points. Internally - * it may be stored in an encoding form such as a modified version of - * UTF-8 or UTF-16 (when TCL_UTF_MAX=3) or UTF-32. - * - * The String object is optimized for the case where each UTF char + * This file contains functions that implement string operations on Tcl + * objects. Some string operations work with UTF strings and others + * require Unicode format. Functions that require knowledge of the width + * of each character, such as indexing, operate on Unicode data. + * + * A Unicode string is an internationalized string. Conceptually, a + * Unicode string is an array of 16-bit quantities organized as a + * sequence of properly formed UTF-8 characters. There is a one-to-one + * map between Unicode and UTF characters. Because Unicode characters + * have a fixed width, operations such as indexing operate on Unicode + * data. The String object is optimized for the case where each UTF char * in a string is only one byte. In this case, we store the value of - * numChars, but we don't store the fixed form encoding (unless - * Tcl_GetUnicode is explicitly called). + * numChars, but we don't store the Unicode data (unless Tcl_GetUnicode + * is explicitly called). * - * The String object type stores one or both formats. The default - * behavior is to store UTF-8. Once UTF-16/UTF32 is calculated, it is - * stored in the internal rep for future access (without an additional - * O(n) cost). + * The String object type stores one or both formats. The default + * behavior is to store UTF. Once Unicode is calculated by a function, it + * is stored in the internal rep for future access (without an additional + * O(n) cost). * * To allow many appends to be done to an object without constantly - * reallocating space, we allocate double the space and use the + * reallocating the space for the string or Unicode representation, we + * allocate double the space for the string or Unicode and use the * internal representation to keep track of how much space is used vs. * allocated. * - * Copyright © 1995-1997 Sun Microsystems, Inc. - * Copyright © 1999 Scriptics Corporation. + * Copyright (c) 1995-1997 Sun Microsystems, Inc. + * Copyright (c) 1999 by Scriptics Corporation. * * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. */ #include "tclInt.h" -#include "tclTomMath.h" -#include "tclStringRep.h" -#include <assert.h> +#include "tommath.h" + /* * Prototypes for functions defined later in this file: */ @@ -43,190 +44,94 @@ static void AppendPrintfToObjVA(Tcl_Obj *objPtr, const char *format, va_list argList); static void AppendUnicodeToUnicodeRep(Tcl_Obj *objPtr, - const Tcl_UniChar *unicode, Tcl_Size appendNumChars); + const Tcl_UniChar *unicode, int appendNumChars); static void AppendUnicodeToUtfRep(Tcl_Obj *objPtr, - const Tcl_UniChar *unicode, Tcl_Size numChars); + const Tcl_UniChar *unicode, int numChars); static void AppendUtfToUnicodeRep(Tcl_Obj *objPtr, - const char *bytes, Tcl_Size numBytes); + const char *bytes, int numBytes); static void AppendUtfToUtfRep(Tcl_Obj *objPtr, - const char *bytes, Tcl_Size numBytes); + const char *bytes, int numBytes); static void DupStringInternalRep(Tcl_Obj *objPtr, Tcl_Obj *copyPtr); -static Tcl_Size ExtendStringRepWithUnicode(Tcl_Obj *objPtr, - const Tcl_UniChar *unicode, Tcl_Size numChars); -static void ExtendUnicodeRepWithString(Tcl_Obj *objPtr, - const char *bytes, Tcl_Size numBytes, - Tcl_Size numAppendChars); static void FillUnicodeRep(Tcl_Obj *objPtr); static void FreeStringInternalRep(Tcl_Obj *objPtr); -static void GrowStringBuffer(Tcl_Obj *objPtr, Tcl_Size needed, int flag); -static void GrowUnicodeBuffer(Tcl_Obj *objPtr, Tcl_Size needed); +static void GrowUnicodeBuffer(Tcl_Obj *objPtr, int needed); static int SetStringFromAny(Tcl_Interp *interp, Tcl_Obj *objPtr); static void SetUnicodeObj(Tcl_Obj *objPtr, - const Tcl_UniChar *unicode, Tcl_Size numChars); -static Tcl_Size UnicodeLength(const Tcl_UniChar *unicode); -#if !defined(TCL_NO_DEPRECATED) -static int UTF16Length(const unsigned short *unicode); -#endif + const Tcl_UniChar *unicode, int numChars); +static int UnicodeLength(const Tcl_UniChar *unicode); static void UpdateStringOfString(Tcl_Obj *objPtr); -#if !defined(TCL_NO_DEPRECATED) -static void DupUTF16StringInternalRep(Tcl_Obj *objPtr, - Tcl_Obj *copyPtr); -static int SetUTF16StringFromAny(Tcl_Interp *interp, Tcl_Obj *objPtr); -static void UpdateStringOfUTF16String(Tcl_Obj *objPtr); -#endif - -#define ISCONTINUATION(bytes) (\ - ((((bytes)[0] & 0xC0) == 0x80) || (((bytes)[0] == '\xED') \ - && (((bytes)[1] & 0xF0) == 0xB0) && (((bytes)[2] & 0xC0) == 0x80)))) - -#define SURROGATE(c_) (((c_) & ~0x7FF) == 0xD800) -#define HIGH_SURROGATE(c_) (((c_) & ~0x3FF) == 0xD800) -#define LOW_SURROGATE(c_) (((c_) & ~0x3FF) == 0xDC00) /* * The structure below defines the string Tcl object type by means of * functions that can be invoked by generic object code. */ -#ifndef TCL_NO_DEPRECATED -const Tcl_ObjType tclStringType = { +Tcl_ObjType tclStringType = { "string", /* name */ FreeStringInternalRep, /* freeIntRepPro */ - DupUTF16StringInternalRep, /* dupIntRepProc */ - UpdateStringOfUTF16String, /* updateStringProc */ - SetUTF16StringFromAny /* setFromAnyProc */ -}; -#endif - -const Tcl_ObjType tclUniCharStringType = { - "utf32string", /* name */ - FreeStringInternalRep, /* freeIntRepPro */ DupStringInternalRep, /* dupIntRepProc */ UpdateStringOfString, /* updateStringProc */ SetStringFromAny /* setFromAnyProc */ }; -typedef struct { +/* + * The following structure is the internal rep for a String object. It keeps + * track of how much memory has been used and how much has been allocated for + * the Unicode and UTF string to enable growing and shrinking of the UTF and + * Unicode reps of the String object with fewer mallocs. To optimize string + * length and indexing operations, this structure also stores the number of + * characters (same of UTF and Unicode!) once that value has been computed. + * + * Under normal configurations, what Tcl calls "Unicode" is actually UTF-16 + * restricted to the Basic Multilingual Plane (i.e. U+00000 to U+0FFFF). This + * can be officially modified by altering the definition of Tcl_UniChar in + * tcl.h, but do not do that unless you are sure what you're doing! + */ + +typedef struct String { int numChars; /* The number of chars in the string. -1 means * this value has not been calculated. >= 0 * means that there is a valid Unicode rep, or * that the number of UTF bytes == the number * of chars. */ - int allocated; /* The amount of space actually allocated for + size_t allocated; /* The amount of space actually allocated for * the UTF string (minus 1 byte for the * termination char). */ - int maxChars; /* Max number of chars that can fit in the - * space allocated for the unicode array. */ + size_t uallocated; /* The amount of space actually allocated for + * the Unicode string (minus 2 bytes for the + * termination char). */ int hasUnicode; /* Boolean determining whether the string has * a Unicode representation. */ - Tcl_UniChar unicode[TCLFLEXARRAY]; /* The array of Unicode chars. The actual size - * of this field depends on the 'maxChars' + Tcl_UniChar unicode[2]; /* The array of Unicode chars. The actual size + * of this field depends on the 'uallocated' * field above. */ -} UniCharString; - -#define UNICHAR_STRING_MAXCHARS \ - (int)(((size_t)UINT_MAX - offsetof(UniCharString, unicode))/sizeof(Tcl_UniChar) - 1) -#define UNICHAR_STRING_SIZE(numChars) \ - (offsetof(UniCharString, unicode) + sizeof(Tcl_UniChar) + ((numChars) * sizeof(Tcl_UniChar))) -#define uniCharStringCheckLimits(numChars) \ - do { \ - if ((numChars) < 0 || (numChars) > UNICHAR_STRING_MAXCHARS) { \ - Tcl_Panic("max length for a Tcl unicode value (%d chars) exceeded", \ - UNICHAR_STRING_MAXCHARS); \ - } \ - } while (0) -#define uniCharStringAttemptAlloc(numChars) \ - (UniCharString *) attemptckalloc(UNICHAR_STRING_SIZE(numChars)) -#define uniCharStringAlloc(numChars) \ - (UniCharString *) ckalloc(UNICHAR_STRING_SIZE(numChars)) -#define uniCharStringRealloc(ptr, numChars) \ - (UniCharString *) ckrealloc((ptr), UNICHAR_STRING_SIZE(numChars)) -#define uniCharStringAttemptRealloc(ptr, numChars) \ - (UniCharString *) attemptckrealloc((ptr), UNICHAR_STRING_SIZE(numChars)) -#define GET_UNICHAR_STRING(objPtr) \ - ((UniCharString *) (objPtr)->internalRep.twoPtrValue.ptr1) -#define SET_UNICHAR_STRING(objPtr, stringPtr) \ - ((objPtr)->internalRep.twoPtrValue.ptr2 = NULL), \ - ((objPtr)->internalRep.twoPtrValue.ptr1 = (void *) (stringPtr)) - - -#ifndef TCL_NO_DEPRECATED -static void -DupUTF16StringInternalRep( - Tcl_Obj *srcPtr, /* Object with internal rep to copy. Must have - * an internal rep of type "String". */ - Tcl_Obj *copyPtr) /* Object with internal rep to set. Must not - * currently have an internal rep.*/ -{ - String *srcStringPtr = GET_STRING(srcPtr); - size_t size = offsetof(String, unicode) + (((srcStringPtr->allocated) + 1U) * sizeof(unsigned short)); - String *copyStringPtr = (String *)ckalloc(size); - memcpy(copyStringPtr, srcStringPtr, size); - - SET_STRING(copyPtr, copyStringPtr); - copyPtr->typePtr = &tclStringType; -} - -static int -SetUTF16StringFromAny( - TCL_UNUSED(Tcl_Interp *), - Tcl_Obj *objPtr) /* The object to convert. */ -{ - if (!TclHasInternalRep(objPtr, &tclStringType)) { - Tcl_DString ds; - - /* - * Convert whatever we have into an untyped value. Just A String. - */ - - (void) TclGetString(objPtr); - TclFreeInternalRep(objPtr); - - /* - * Create a basic String internalrep that just points to the UTF-8 string - * already in place at objPtr->bytes. - */ - - Tcl_DStringInit(&ds); - unsigned short *utf16string = Tcl_UtfToChar16DString(objPtr->bytes, objPtr->length, &ds); - int size = Tcl_DStringLength(&ds); - String *stringPtr = (String *)ckalloc((offsetof(String, unicode) + sizeof(unsigned short)) + size); - - memcpy(stringPtr->unicode, utf16string, size); - Tcl_DStringFree(&ds); - size /= sizeof(unsigned short); - stringPtr->unicode[size] = 0; - - stringPtr->numChars = size; - stringPtr->allocated = size; - stringPtr->maxChars = size; - stringPtr->hasUnicode = 1; - SET_STRING(objPtr, stringPtr); - objPtr->typePtr = &tclStringType; - } - return TCL_OK; -} +} String; + +#define STRING_MAXCHARS \ + (1 + (int)(((size_t)UINT_MAX - sizeof(String))/sizeof(Tcl_UniChar))) +#define STRING_UALLOC(numChars) \ + ((numChars) * sizeof(Tcl_UniChar)) +#define STRING_SIZE(ualloc) \ + ((unsigned) ((ualloc) \ + ? (sizeof(String) - sizeof(Tcl_UniChar) + (ualloc)) \ + : sizeof(String))) +#define stringCheckLimits(numChars) \ + if ((numChars) < 0 || (numChars) > STRING_MAXCHARS) { \ + Tcl_Panic("max length for a Tcl unicode value (%d chars) exceeded", \ + STRING_MAXCHARS); \ + } +#define stringRealloc(ptr, numChars) \ + (String *) ckrealloc((char *) ptr, \ + (unsigned) STRING_SIZE(STRING_UALLOC(numChars)) ) +#define stringAttemptRealloc(ptr, numChars) \ + (String *) attemptckrealloc((char *) ptr, \ + (unsigned) STRING_SIZE(STRING_UALLOC(numChars)) ) +#define GET_STRING(objPtr) \ + ((String *) (objPtr)->internalRep.twoPtrValue.ptr1) +#define SET_STRING(objPtr, stringPtr) \ + ((objPtr)->internalRep.twoPtrValue.ptr1 = (void *) (stringPtr)) -static void -UpdateStringOfUTF16String( - Tcl_Obj *objPtr) /* Object with string rep to update. */ -{ - Tcl_DString ds; - String *stringPtr = GET_STRING(objPtr); - - Tcl_DStringInit(&ds); - const char *string = Tcl_Char16ToUtfDString(stringPtr->unicode, stringPtr->numChars, &ds); - - char *bytes = (char *)ckalloc(Tcl_DStringLength(&ds) + 1U); - memcpy(bytes, string, Tcl_DStringLength(&ds)); - bytes[Tcl_DStringLength(&ds)] = 0; - objPtr->bytes = bytes; - objPtr->length = Tcl_DStringLength(&ds); - Tcl_DStringFree(&ds); -} -#endif - /* * TCL STRING GROWTH ALGORITHM * @@ -235,7 +140,8 @@ UpdateStringOfUTF16String( * * Attempt to allocate 2 * (originalLength + appendLength) * On failure: - * attempt to allocate originalLength + 2*appendLength + TCL_MIN_GROWTH + * attempt to allocate originalLength + 2*appendLength + + * TCL_GROWTH_MIN_ALLOC * * This algorithm allows very good performance, as it rapidly increases the * memory allocated for a given string, which minimizes the number of @@ -248,125 +154,64 @@ UpdateStringOfUTF16String( * cover the request, but which hopefully will be less than the total * available memory. * - * The addition of TCL_MIN_GROWTH allows for efficient handling of very + * The addition of TCL_GROWTH_MIN_ALLOC allows for efficient handling of very * small appends. Without this extra slush factor, a sequence of several small * appends would cause several memory allocations. As long as - * TCL_MIN_GROWTH is a reasonable size, we can avoid that behavior. + * TCL_GROWTH_MIN_ALLOC is a reasonable size, we can avoid that behavior. * * The growth algorithm can be tuned by adjusting the following parameters: * - * TCL_MIN_GROWTH Additional space, in bytes, to allocate when + * TCL_GROWTH_MIN_ALLOC Additional space, in bytes, to allocate when * the double allocation has failed. Default is - * 1024 (1 kilobyte). See tclInt.h. + * 1024 (1 kilobyte). */ -#ifndef TCL_MIN_UNICHAR_GROWTH -#define TCL_MIN_UNICHAR_GROWTH TCL_MIN_GROWTH/sizeof(Tcl_UniChar) +#ifndef TCL_GROWTH_MIN_ALLOC +#define TCL_GROWTH_MIN_ALLOC 1024 #endif static void -GrowStringBuffer( - Tcl_Obj *objPtr, - Tcl_Size needed, /* Not including terminating nul */ - int flag) /* If 0, try to overallocate */ -{ - /* - * Preconditions: - * objPtr->typePtr == &tclStringType - * needed > stringPtr->allocated - * flag || objPtr->bytes != NULL - */ - - UniCharString *stringPtr = GET_UNICHAR_STRING(objPtr); - char *ptr = NULL; - Tcl_Size capacity; - - if (objPtr->bytes == &tclEmptyString) { - objPtr->bytes = NULL; - } - if (flag == 0 || stringPtr->allocated > 0) { - if (needed <= INT_MAX / 2) { - capacity = 2 * needed; - ptr = (char *)attemptckrealloc(objPtr->bytes, capacity + 1U); - } - if (ptr == NULL) { - /* - * Take care computing the amount of modest growth to avoid - * overflow into invalid argument values for capacity. - */ - - unsigned int limit = INT_MAX - needed; - unsigned int extra = needed - objPtr->length + TCL_MIN_GROWTH; - int growth = (int) ((extra > limit) ? limit : extra); - - capacity = needed + growth; - ptr = (char *)attemptckrealloc(objPtr->bytes, capacity + 1U); - } - } - if (ptr == NULL) { - /* - * First allocation - just big enough; or last chance fallback. - */ - - capacity = needed; - ptr = (char *)ckrealloc(objPtr->bytes, capacity + 1U); - } - objPtr->bytes = ptr; - stringPtr->allocated = capacity; - memset(ptr + objPtr->length, 0, capacity + 1U - objPtr->length); -} - -static void GrowUnicodeBuffer( Tcl_Obj *objPtr, - Tcl_Size needed) + int needed) { - /* - * Preconditions: - * objPtr->typePtr == &tclStringType - * needed > stringPtr->maxChars - * needed < UNICHAR_STRING_MAXCHARS + /* Pre-conditions: + * objPtr->typePtr == &tclStringType + * STRING_UALLOC(needed) > stringPtr->uallocated + * needed < STRING_MAXCHARS */ + String *ptr = NULL, *stringPtr = GET_STRING(objPtr); + int attempt; - UniCharString *ptr = NULL, *stringPtr = GET_UNICHAR_STRING(objPtr); - Tcl_Size capacity; - - if (stringPtr->maxChars > 0) { - /* - * Subsequent appends - apply the growth algorithm. - */ - - if (needed <= UNICHAR_STRING_MAXCHARS / 2) { - capacity = 2 * needed; - ptr = uniCharStringAttemptRealloc(stringPtr, capacity); + if (stringPtr->uallocated > 0) { + /* Subsequent appends - apply the growth algorithm. */ + attempt = 2 * needed; + if (attempt >= 0 && attempt <= STRING_MAXCHARS) { + ptr = stringAttemptRealloc(stringPtr, attempt); } if (ptr == NULL) { /* * Take care computing the amount of modest growth to avoid - * overflow into invalid argument values for capacity. + * overflow into invalid argument values for attempt. */ - - unsigned int limit = UNICHAR_STRING_MAXCHARS - needed; + unsigned int limit = STRING_MAXCHARS - needed; unsigned int extra = needed - stringPtr->numChars - + TCL_MIN_UNICHAR_GROWTH; + + TCL_GROWTH_MIN_ALLOC/sizeof(Tcl_UniChar); int growth = (int) ((extra > limit) ? limit : extra); - - capacity = needed + growth; - ptr = uniCharStringAttemptRealloc(stringPtr, capacity); + attempt = needed + growth; + ptr = stringAttemptRealloc(stringPtr, attempt); } } if (ptr == NULL) { - /* - * First allocation - just big enough; or last chance fallback. - */ - - capacity = needed; - ptr = uniCharStringRealloc(stringPtr, capacity); + /* First allocation - just big enough; or last chance fallback. */ + attempt = needed; + ptr = stringRealloc(stringPtr, attempt); } stringPtr = ptr; - stringPtr->maxChars = capacity; - SET_UNICHAR_STRING(objPtr, stringPtr); + stringPtr->uallocated = STRING_UALLOC(attempt); + SET_STRING(objPtr, stringPtr); } + /* *---------------------------------------------------------------------- @@ -399,7 +244,7 @@ Tcl_Obj * Tcl_NewStringObj( const char *bytes, /* Points to the first of the length bytes * used to initialize the new object. */ - Tcl_Size length) /* The number of bytes to copy from "bytes" + int length) /* The number of bytes to copy from "bytes" * when initializing the new object. If * negative, use bytes up to the first NUL * byte. */ @@ -411,11 +256,12 @@ Tcl_Obj * Tcl_NewStringObj( const char *bytes, /* Points to the first of the length bytes * used to initialize the new object. */ - Tcl_Size length) /* The number of bytes to copy from "bytes" - * when initializing the new object. If negative, - * use bytes up to the first NUL byte. */ + int length) /* The number of bytes to copy from "bytes" + * when initializing the new object. If + * negative, use bytes up to the first NUL + * byte. */ { - Tcl_Obj *objPtr; + register Tcl_Obj *objPtr; if (length < 0) { length = (bytes? strlen(bytes) : 0); @@ -459,15 +305,16 @@ Tcl_Obj * Tcl_DbNewStringObj( const char *bytes, /* Points to the first of the length bytes * used to initialize the new object. */ - Tcl_Size length, /* The number of bytes to copy from "bytes" - * when initializing the new object. If negative, - * use bytes up to the first NUL byte. */ + int length, /* The number of bytes to copy from "bytes" + * when initializing the new object. If + * negative, use bytes up to the first NUL + * byte. */ const char *file, /* The name of the source file calling this * function; used for debugging. */ int line) /* Line number in the source file; used for * debugging. */ { - Tcl_Obj *objPtr; + register Tcl_Obj *objPtr; if (length < 0) { length = (bytes? strlen(bytes) : 0); @@ -481,12 +328,14 @@ Tcl_Obj * Tcl_DbNewStringObj( const char *bytes, /* Points to the first of the length bytes * used to initialize the new object. */ - Tcl_Size length, /* The number of bytes to copy from "bytes" + register int length, /* The number of bytes to copy from "bytes" * when initializing the new object. If * negative, use bytes up to the first NUL * byte. */ - TCL_UNUSED(const char *) /*file*/, - TCL_UNUSED(int) /*line*/) + const char *file, /* The name of the source file calling this + * function; used for debugging. */ + int line) /* Line number in the source file; used for + * debugging. */ { return Tcl_NewStringObj(bytes, length); } @@ -512,10 +361,10 @@ Tcl_DbNewStringObj( */ Tcl_Obj * -TclNewUnicodeObj( +Tcl_NewUnicodeObj( const Tcl_UniChar *unicode, /* The unicode string used to initialize the * new object. */ - Tcl_Size numChars) /* Number of characters in the unicode + int numChars) /* Number of characters in the unicode * string. */ { Tcl_Obj *objPtr; @@ -525,39 +374,6 @@ TclNewUnicodeObj( return objPtr; } -#if !defined(TCL_NO_DEPRECATED) -Tcl_Obj * -Tcl_NewUnicodeObj( - const unsigned short *unicode, /* The unicode string used to initialize the - * new object. */ - int numChars) /* Number of characters in the unicode - * string. */ -{ - Tcl_Obj *objPtr; - - TclNewObj(objPtr); - TclInvalidateStringRep(objPtr); - - if (numChars < 0) { - numChars = UTF16Length(unicode); - } - - String *stringPtr = (String *)ckalloc((offsetof(String, unicode) - + sizeof(unsigned short)) + numChars * sizeof(unsigned short)); - memcpy(stringPtr->unicode, unicode, numChars * sizeof(unsigned short)); - stringPtr->unicode[numChars] = 0; - - stringPtr->numChars = numChars; - stringPtr->allocated = numChars; - stringPtr->maxChars = numChars; - stringPtr->hasUnicode = 1; - SET_STRING(objPtr, stringPtr); - objPtr->typePtr = &tclStringType; - - return objPtr; -} -#endif - /* *---------------------------------------------------------------------- * @@ -566,7 +382,7 @@ Tcl_NewUnicodeObj( * Get the length of the Unicode string from the Tcl object. * * Results: - * Pointer to Unicode string representing the Unicode object. + * Pointer to unicode string representing the unicode object. * * Side effects: * Frees old internal rep. Allocates memory for new "String" internal @@ -575,144 +391,70 @@ Tcl_NewUnicodeObj( *---------------------------------------------------------------------- */ -Tcl_Size -TclGetCharLength( +int +Tcl_GetCharLength( Tcl_Obj *objPtr) /* The String object to get the num chars * of. */ { - UniCharString *stringPtr; - Tcl_Size numChars; - - /* - * Quick, no-shimmer return for short string reps. - */ - - if ((objPtr->bytes) && (objPtr->length < 2)) { - /* 0 bytes -> 0 chars; 1 byte -> 1 char */ - return objPtr->length; - } - - /* - * Optimize the case where we're really dealing with a bytearray object; - * we don't need to convert to a string to perform the get-length operation. - * - * Starting in Tcl 8.7, we check for a "pure" bytearray, because the - * machinery behind that test is using a proper bytearray ObjType. We - * could also compute length of an improper bytearray without shimmering - * but there's no value in that. We *want* to shimmer an improper bytearray - * because improper bytearrays have worthless internal reps. - */ - - if (TclIsPureByteArray(objPtr)) { - (void) Tcl_GetByteArrayFromObj(objPtr, &numChars); - return numChars; - } - - /* - * OK, need to work with the object as a string. - */ + String *stringPtr; SetStringFromAny(NULL, objPtr); - stringPtr = GET_UNICHAR_STRING(objPtr); - numChars = stringPtr->numChars; - - /* - * If numChars is unknown, compute it. - */ - - if (numChars < 0) { - TclNumUtfCharsM(numChars, objPtr->bytes, objPtr->length); - stringPtr->numChars = numChars; - } - return numChars; -} - -#if !defined(TCL_NO_DEPRECATED) -#undef Tcl_GetCharLength -Tcl_Size -Tcl_GetCharLength( - Tcl_Obj *objPtr) /* The String object to get the num chars - * of. */ -{ - Tcl_Size numChars = 0; + stringPtr = GET_STRING(objPtr); /* - * Quick, no-shimmer return for short string reps. + * If numChars is unknown, then calculate the number of characaters while + * populating the Unicode string. */ - if ((objPtr->bytes) && (objPtr->length < 2)) { - /* 0 bytes -> 0 chars; 1 byte -> 1 char */ - return objPtr->length; - } - - /* - * Optimize the case where we're really dealing with a bytearray object; - * we don't need to convert to a string to perform the get-length operation. - * - * Starting in Tcl 8.7, we check for a "pure" bytearray, because the - * machinery behind that test is using a proper bytearray ObjType. We - * could also compute length of an improper bytearray without shimmering - * but there's no value in that. We *want* to shimmer an improper bytearray - * because improper bytearrays have worthless internal reps. - */ - - if (TclIsPureByteArray(objPtr)) { - (void) Tcl_GetByteArrayFromObj(objPtr, &numChars); - } else { - Tcl_GetString(objPtr); - numChars = TclNumUtfChars(objPtr->bytes, objPtr->length); - } - - return numChars; -} -#endif + if (stringPtr->numChars == -1) { + register int i = objPtr->length; + register unsigned char *str = (unsigned char *) objPtr->bytes; + /* + * This is a speed sensitive function, so run specially over the + * string to count continuous ascii characters before resorting to the + * Tcl_NumUtfChars call. This is a long form of: + stringPtr->numChars = Tcl_NumUtfChars(objPtr->bytes,objPtr->length); + * + * TODO: Consider macro-izing this. + */ -/* - *---------------------------------------------------------------------- - * - * TclCheckEmptyString -- - * - * Determine whether the string value of an object is or would be the - * empty string, without generating a string representation. - * - * Results: - * Returns 1 if empty, 0 if not, and -1 if unknown. - * - * Side effects: - * None. - * - *---------------------------------------------------------------------- - */ -int -TclCheckEmptyString( - Tcl_Obj *objPtr) -{ - Tcl_Size length = TCL_INDEX_NONE; + while (i && (*str < 0xC0)) { + i--; + str++; + } + stringPtr->numChars = objPtr->length - i; + if (i) { + stringPtr->numChars += Tcl_NumUtfChars(objPtr->bytes + + (objPtr->length - i), i); + } - if (objPtr->bytes == &tclEmptyString) { - return TCL_EMPTYSTRING_YES; - } + if (stringPtr->numChars == objPtr->length) { + /* + * Since we've just calculated the number of chars, and all UTF + * chars are 1-byte long, we don't need to store the unicode + * string. + */ - if (TclIsPureByteArray(objPtr) - && TclGetCharLength(objPtr) == 0) { - return TCL_EMPTYSTRING_YES; - } + stringPtr->hasUnicode = 0; + } else { + /* + * Since we've just calucalated the number of chars, and not all + * UTF chars are 1-byte long, go ahead and populate the unicode + * string. + */ - if (TclListObjIsCanonical(objPtr)) { - TclListObjLength(NULL, objPtr, &length); - return length == 0; - } + FillUnicodeRep(objPtr); - if (TclIsPureDict(objPtr)) { - Tcl_DictObjSize(NULL, objPtr, &length); - return length == 0; - } + /* + * We need to fetch the pointer again because we have just + * reallocated the structure to make room for the Unicode data. + */ - if (objPtr->bytes == NULL) { - return TCL_EMPTYSTRING_UNKNOWN; + stringPtr = GET_STRING(objPtr); + } } - return objPtr->length == 0; + return stringPtr->numChars; } /* @@ -720,9 +462,8 @@ TclCheckEmptyString( * * Tcl_GetUniChar -- * - * Get the index'th Unicode character from the String object. If index - * is out of range or it references a low surrogate preceded by a high - * surrogate, the result = -1; + * Get the index'th Unicode character from the String object. The index + * is assumed to be in the appropriate range. * * Results: * Returns the index'th Unicode character in the Object. @@ -733,123 +474,45 @@ TclCheckEmptyString( *---------------------------------------------------------------------- */ -#if !defined(TCL_NO_DEPRECATED) -#undef Tcl_GetUniChar -int +Tcl_UniChar Tcl_GetUniChar( Tcl_Obj *objPtr, /* The object to get the Unicode charater * from. */ - Tcl_Size index) /* Get the index'th Unicode character. */ + int index) /* Get the index'th Unicode character. */ { + Tcl_UniChar unichar; String *stringPtr; - int ch; - - if (index < 0) { - return -1; - } - /* - * Optimize the case where we're really dealing with a ByteArray object - * we don't need to convert to a string to perform the indexing operation. - */ - - if (TclIsPureByteArray(objPtr)) { - Tcl_Size length; - unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &length); - if (index >= length) { - return -1; - } - - return bytes[index]; - } - - /* - * OK, need to work with the object as a string. - */ - - SetUTF16StringFromAny(NULL, objPtr); + SetStringFromAny(NULL, objPtr); stringPtr = GET_STRING(objPtr); - if (index >= stringPtr->numChars) { - return -1; - } - ch = stringPtr->unicode[index]; - /* See: bug [11ae2be95dac9417] */ - if (SURROGATE(ch)) { - if (ch & 0x400) { - if ((index > 0) - && HIGH_SURROGATE(stringPtr->unicode[index-1])) { - ch = -1; /* low surrogate preceded by high surrogate */ - } - } else if ((++index < stringPtr->numChars) - && LOW_SURROGATE(stringPtr->unicode[index])) { - /* high surrogate followed by low surrogate */ - ch = (((ch & 0x3FF) << 10) | - (stringPtr->unicode[index] & 0x3FF)) + 0x10000; - } - } - return ch; -} -#endif - -int -TclGetUniChar( - Tcl_Obj *objPtr, /* The object to get the Unicode character - * from. */ - Tcl_Size index) /* Get the index'th Unicode character. */ -{ - UniCharString *stringPtr; - int ch; - - if (index < 0) { - return -1; - } + if (stringPtr->numChars == -1) { + /* + * We haven't yet calculated the length, so we don't have the Unicode + * str. We need to know the number of chars before we can do indexing. + */ - /* - * Optimize the case where we're really dealing with a ByteArray object - * we don't need to convert to a string to perform the indexing operation. - */ + Tcl_GetCharLength(objPtr); - if (TclIsPureByteArray(objPtr)) { - Tcl_Size length; - unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &length); - if (index >= length) { - return -1; - } + /* + * We need to fetch the pointer again because we may have just + * reallocated the structure. + */ - return bytes[index]; + stringPtr = GET_STRING(objPtr); } - - /* - * OK, need to work with the object as a string. - */ - - SetStringFromAny(NULL, objPtr); - stringPtr = GET_UNICHAR_STRING(objPtr); - if (stringPtr->hasUnicode == 0) { /* - * If numChars is unknown, compute it. + * All of the characters in the Utf string are 1 byte chars, so we + * don't store the unicode char. We get the Utf string and convert the + * index'th byte to a Unicode character. */ - if (stringPtr->numChars == TCL_INDEX_NONE) { - TclNumUtfCharsM(stringPtr->numChars, objPtr->bytes, objPtr->length); - } - if (index >= stringPtr->numChars) { - return -1; - } - if (stringPtr->numChars == objPtr->length) { - return (unsigned char) objPtr->bytes[index]; - } - FillUnicodeRep(objPtr); - stringPtr = GET_UNICHAR_STRING(objPtr); - } - - if (index >= stringPtr->numChars) { - return -1; + unichar = (Tcl_UniChar) objPtr->bytes[index]; + } else { + unichar = stringPtr->unicode[index]; } - ch = stringPtr->unicode[index]; - return ch; + return unichar; } /* @@ -859,7 +522,7 @@ TclGetUniChar( * * Get the Unicode form of the String object. If the object is not * already a String object, it will be converted to one. If the String - * object does not have a Unicode rep, then one is created from the UTF + * object does not have a Unicode rep, then one is create from the UTF * string format. * * Results: @@ -871,21 +534,41 @@ TclGetUniChar( *---------------------------------------------------------------------- */ -#ifndef TCL_NO_DEPRECATED -#undef Tcl_GetUnicode -unsigned short * +Tcl_UniChar * Tcl_GetUnicode( - Tcl_Obj *objPtr) /* The object to find the Unicode string + Tcl_Obj *objPtr) /* The object to find the unicode string * for. */ { - return Tcl_GetUnicodeFromObj(objPtr, NULL); + String *stringPtr; + + SetStringFromAny(NULL, objPtr); + stringPtr = GET_STRING(objPtr); + + if ((stringPtr->numChars == -1) || (stringPtr->hasUnicode == 0)) { + /* + * We haven't yet calculated the length, or all of the characters in + * the Utf string are 1 byte chars (so we didn't store the unicode + * str). Since this function must return a unicode string, and one has + * not yet been stored, force the Unicode to be calculated and stored + * now. + */ + + FillUnicodeRep(objPtr); + + /* + * We need to fetch the pointer again because we have just reallocated + * the structure to make room for the Unicode data. + */ + + stringPtr = GET_STRING(objPtr); + } + return stringPtr->unicode; } -#endif /* TCL_NO_DEPRECATED */ /* *---------------------------------------------------------------------- * - * Tcl_GetUnicodeFromObj/TclGetUnicodeFromObj -- + * Tcl_GetUnicodeFromObj -- * * Get the Unicode form of the String object with length. If the object * is not already a String object, it will be converted to one. If the @@ -902,49 +585,42 @@ Tcl_GetUnicode( */ Tcl_UniChar * -TclGetUnicodeFromObj( - Tcl_Obj *objPtr, /* The object to find the Unicode string +Tcl_GetUnicodeFromObj( + Tcl_Obj *objPtr, /* The object to find the unicode string * for. */ int *lengthPtr) /* If non-NULL, the location where the string - * rep's Tcl_UniChar length should be stored. If + * rep's unichar length should be stored. If * NULL, no length is stored. */ { - UniCharString *stringPtr; + String *stringPtr; SetStringFromAny(NULL, objPtr); - stringPtr = GET_UNICHAR_STRING(objPtr); + stringPtr = GET_STRING(objPtr); - if (stringPtr->hasUnicode == 0) { - FillUnicodeRep(objPtr); - stringPtr = GET_UNICHAR_STRING(objPtr); - } + if ((stringPtr->numChars == -1) || (stringPtr->hasUnicode == 0)) { + /* + * We haven't yet calculated the length, or all of the characters in + * the Utf string are 1 byte chars (so we didn't store the unicode + * str). Since this function must return a unicode string, and one has + * not yet been stored, force the Unicode to be calculated and stored + * now. + */ - if (lengthPtr != NULL) { - *lengthPtr = stringPtr->numChars; - } - return stringPtr->unicode; -} + FillUnicodeRep(objPtr); -#if !defined(TCL_NO_DEPRECATED) -unsigned short * -Tcl_GetUnicodeFromObj( - Tcl_Obj *objPtr, /* The object to find the Unicode string - * for. */ - Tcl_Size *lengthPtr) /* If non-NULL, the location where the string - * rep's Tcl_UniChar length should be stored. If - * NULL, no length is stored. */ -{ - String *stringPtr; + /* + * We need to fetch the pointer again because we have just reallocated + * the structure to make room for the Unicode data. + */ - SetUTF16StringFromAny(NULL, objPtr); - stringPtr = GET_STRING(objPtr); + stringPtr = GET_STRING(objPtr); + } if (lengthPtr != NULL) { *lengthPtr = stringPtr->numChars; } return stringPtr->unicode; } -#endif /* *---------------------------------------------------------------------- @@ -953,9 +629,8 @@ Tcl_GetUnicodeFromObj( * * Create a Tcl Object that contains the chars between first and last of * the object indicated by "objPtr". If the object is not already a - * String object, convert it to one. If first is negative, the - * returned string start at the beginning of objPtr. If last is - * negative, the returned string ends at the end of objPtr. + * String object, convert it to one. The first and last indices are + * assumed to be in the appropriate range. * * Results: * Returns a new Tcl Object of the String type. @@ -966,131 +641,58 @@ Tcl_GetUnicodeFromObj( *---------------------------------------------------------------------- */ -#if !defined(TCL_NO_DEPRECATED) -#undef Tcl_GetRange Tcl_Obj * Tcl_GetRange( Tcl_Obj *objPtr, /* The Tcl object to find the range of. */ - Tcl_Size first, /* First index of the range. */ - Tcl_Size last) /* Last index of the range. */ + int first, /* First index of the range. */ + int last) /* Last index of the range. */ { Tcl_Obj *newObjPtr; /* The Tcl object to find the range of. */ - Tcl_Size length; - - if (first < 0) { - first = 0; - } - - /* - * Optimize the case where we're really dealing with a bytearray object - * we don't need to convert to a string to perform the substring operation. - */ - - if (TclIsPureByteArray(objPtr)) { - unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &length); - - if (last < 0 || last >= length) { - last = length - 1; - } - if (last < first) { - TclNewObj(newObjPtr); - return newObjPtr; - } - return Tcl_NewByteArrayObj(bytes + first, last - first + 1); - } - - Tcl_Size numChars = TclNumUtfChars(objPtr->bytes, objPtr->length); - - if (last < 0 || last >= numChars) { - last = numChars - 1; - } - if (last < first) { - TclNewObj(newObjPtr); - return newObjPtr; - } - const char *begin = Tcl_UtfAtIndex(objPtr->bytes, first); - const char *end = Tcl_UtfAtIndex(objPtr->bytes, last + 1); - return Tcl_NewStringObj(begin, end - begin); -} -#endif + String *stringPtr; -Tcl_Obj * -TclGetRange( - Tcl_Obj *objPtr, /* The Tcl object to find the range of. */ - Tcl_Size first, /* First index of the range. */ - Tcl_Size last) /* Last index of the range. */ -{ - Tcl_Obj *newObjPtr; /* The Tcl object to find the range of. */ - UniCharString *stringPtr; - Tcl_Size length; + SetStringFromAny(NULL, objPtr); + stringPtr = GET_STRING(objPtr); - if (first < 0) { - first = 0; - } + if (stringPtr->numChars == -1) { + /* + * We haven't yet calculated the length, so we don't have the Unicode + * str. We need to know the number of chars before we can do indexing. + */ - /* - * Optimize the case where we're really dealing with a bytearray object - * we don't need to convert to a string to perform the substring operation. - */ + Tcl_GetCharLength(objPtr); - if (TclIsPureByteArray(objPtr)) { - unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &length); + /* + * We need to fetch the pointer again because we may have just + * reallocated the structure. + */ - if (last < 0 || last >= length) { - last = length - 1; - } - if (last < first) { - TclNewObj(newObjPtr); - return newObjPtr; - } - return Tcl_NewByteArrayObj(bytes + first, last - first + 1); + stringPtr = GET_STRING(objPtr); } - /* - * OK, need to work with the object as a string. - */ + if (objPtr->bytes && (stringPtr->numChars == objPtr->length)) { + char *str = TclGetString(objPtr); - SetStringFromAny(NULL, objPtr); - stringPtr = GET_UNICHAR_STRING(objPtr); - - if (stringPtr->hasUnicode == 0) { /* - * If numChars is unknown, compute it. + * All of the characters in the Utf string are 1 byte chars, so we + * don't store the unicode char. Create a new string object containing + * the specified range of chars. */ - if (stringPtr->numChars == TCL_INDEX_NONE) { - TclNumUtfCharsM(stringPtr->numChars, objPtr->bytes, objPtr->length); - } - if (stringPtr->numChars == objPtr->length) { - if (last < 0 || last >= stringPtr->numChars) { - last = stringPtr->numChars - 1; - } - if (last < first) { - TclNewObj(newObjPtr); - return newObjPtr; - } - newObjPtr = Tcl_NewStringObj(objPtr->bytes + first, last - first + 1); + newObjPtr = Tcl_NewStringObj(&str[first], last-first+1); - /* - * Since we know the char length of the result, store it. - */ + /* + * Since we know the new string only has 1-byte chars, we can set it's + * numChars field. + */ - SetStringFromAny(NULL, newObjPtr); - stringPtr = GET_UNICHAR_STRING(newObjPtr); - stringPtr->numChars = newObjPtr->length; - return newObjPtr; - } - FillUnicodeRep(objPtr); - stringPtr = GET_UNICHAR_STRING(objPtr); - } - if (last < 0 || last >= stringPtr->numChars) { - last = stringPtr->numChars - 1; - } - if (last < first) { - TclNewObj(newObjPtr); - return newObjPtr; + SetStringFromAny(NULL, newObjPtr); + stringPtr = GET_STRING(newObjPtr); + stringPtr->numChars = last-first+1; + } else { + newObjPtr = Tcl_NewUnicodeObj(stringPtr->unicode + first, + last-first+1); } - return TclNewUnicodeObj(stringPtr->unicode + first, last - first + 1); + return newObjPtr; } /* @@ -1116,10 +718,10 @@ TclGetRange( void Tcl_SetStringObj( - Tcl_Obj *objPtr, /* Object whose internal rep to init. */ + register Tcl_Obj *objPtr, /* Object whose internal rep to init. */ const char *bytes, /* Points to the first of the length bytes * used to initialize the object. */ - Tcl_Size length) /* The number of bytes to copy from "bytes" + register int length) /* The number of bytes to copy from "bytes" * when initializing the object. If negative, * use bytes up to the first NUL byte.*/ { @@ -1131,7 +733,8 @@ Tcl_SetStringObj( * Set the type to NULL and free any internal rep for the old type. */ - TclFreeInternalRep(objPtr); + TclFreeIntRep(objPtr); + objPtr->typePtr = NULL; /* * Free any old string rep, then set the string rep to a copy of the @@ -1150,97 +753,120 @@ Tcl_SetStringObj( * * Tcl_SetObjLength -- * - * Changes the length of the string representation of objPtr. + * This function changes the length of the string representation of an + * object. * * Results: * None. * * Side effects: - * If the size of objPtr's string representation is greater than length, a - * new terminating null byte is stored in objPtr->bytes at length, and - * bytes at positions past length have no meaning. If the length of the - * string representation is greater than length, the storage space is - * reallocated to length+1. - * - * The object's internal representation is changed to &tclStringType. + * If the size of objPtr's string representation is greater than length, + * then it is reduced to length and a new terminating null byte is stored + * in the strength. If the length of the string representation is greater + * than length, the storage space is reallocated to the given length; a + * null byte is stored at the end, but other bytes past the end of the + * original string representation are undefined. The object's internal + * representation is changed to "expendable string". * *---------------------------------------------------------------------- */ void Tcl_SetObjLength( - Tcl_Obj *objPtr, /* Pointer to object. This object must not + register Tcl_Obj *objPtr, /* Pointer to object. This object must not * currently be shared. */ - Tcl_Size length) /* Number of bytes desired for string + register int length) /* Number of bytes desired for string * representation of object, not including * terminating null byte. */ { - UniCharString *stringPtr; + String *stringPtr; if (length < 0) { - Tcl_Panic("Tcl_SetObjLength: length requested is negative: " - "%" TCL_SIZE_MODIFIER "d (integer overflow?)", length); + /* + * Setting to a negative length is nonsense. This is probably the + * result of overflowing the signed integer range. + */ + Tcl_Panic("Tcl_SetObjLength: negative length requested: " + "%d (integer overflow?)", length); } if (Tcl_IsShared(objPtr)) { Tcl_Panic("%s called with shared object", "Tcl_SetObjLength"); } + SetStringFromAny(NULL, objPtr); - if (objPtr->bytes && objPtr->length == length) { - return; - } + stringPtr = GET_STRING(objPtr); - SetStringFromAny(NULL, objPtr); - stringPtr = GET_UNICHAR_STRING(objPtr); + /* + * Check that we're not extending a pure unicode string. + */ - if (objPtr->bytes != NULL) { + if ((size_t)length > stringPtr->allocated && + (objPtr->bytes != NULL || stringPtr->hasUnicode == 0)) { /* - * Change length of an existing string rep. + * Not enough space in current string. Reallocate the string space and + * free the old string. */ - if (length > stringPtr->allocated) { - /* - * Need to enlarge the buffer. - */ - if (objPtr->bytes == &tclEmptyString) { - objPtr->bytes = (char *)ckalloc(length + 1U); - } else { - objPtr->bytes = (char *)ckrealloc(objPtr->bytes, length + 1U); + + if (objPtr->bytes != tclEmptyStringRep) { + objPtr->bytes = ckrealloc((char *) objPtr->bytes, + (unsigned) (length + 1)); + } else { + char *newBytes = ckalloc((unsigned) (length+1)); + + if (objPtr->bytes != NULL && objPtr->length != 0) { + memcpy(newBytes, objPtr->bytes, (size_t) objPtr->length); + TclInvalidateStringRep(objPtr); } - stringPtr->allocated = length; + objPtr->bytes = newBytes; } + stringPtr->allocated = length; + + /* + * Invalidate the unicode data. + */ + + stringPtr->hasUnicode = 0; + } + if (objPtr->bytes != NULL) { objPtr->length = length; - objPtr->bytes[length] = 0; + if (objPtr->bytes != tclEmptyStringRep) { + /* + * Ensure the string is NUL-terminated. + */ + + objPtr->bytes[length] = 0; + } /* - * Invalidate the Unicode data. + * Invalidate the unicode data. */ - stringPtr->numChars = TCL_INDEX_NONE; + stringPtr->numChars = -1; stringPtr->hasUnicode = 0; } else { /* * Changing length of pure unicode string. */ - uniCharStringCheckLimits(length); - if (length > stringPtr->maxChars) { - stringPtr = uniCharStringRealloc(stringPtr, length); - SET_UNICHAR_STRING(objPtr, stringPtr); - stringPtr->maxChars = length; + size_t uallocated = STRING_UALLOC(length); + + stringCheckLimits(length); + if (uallocated > stringPtr->uallocated) { + stringPtr = stringRealloc(stringPtr, length); + SET_STRING(objPtr, stringPtr); + stringPtr->uallocated = uallocated; } + stringPtr->numChars = length; + stringPtr->hasUnicode = (length > 0); /* - * Mark the new end of the Unicode string + * Ensure the string is NUL-terminated. */ - stringPtr->numChars = length; stringPtr->unicode[length] = 0; - stringPtr->hasUnicode = 1; - - /* - * Can only get here when objPtr->bytes == NULL. No need to invalidate - * the string rep. - */ + stringPtr->allocated = 0; + objPtr->length = 0; } } @@ -1269,90 +895,111 @@ Tcl_SetObjLength( int Tcl_AttemptSetObjLength( - Tcl_Obj *objPtr, /* Pointer to object. This object must not + register Tcl_Obj *objPtr, /* Pointer to object. This object must not * currently be shared. */ - Tcl_Size length) /* Number of bytes desired for string + register int length) /* Number of bytes desired for string * representation of object, not including * terminating null byte. */ { - UniCharString *stringPtr; + String *stringPtr; if (length < 0) { - /* Negative lengths => most likely integer overflow */ + /* + * Setting to a negative length is nonsense. This is probably the + * result of overflowing the signed integer range. + */ return 0; } - if (Tcl_IsShared(objPtr)) { Tcl_Panic("%s called with shared object", "Tcl_AttemptSetObjLength"); } - if (objPtr->bytes && objPtr->length == length) { - return 1; - } - SetStringFromAny(NULL, objPtr); - stringPtr = GET_UNICHAR_STRING(objPtr); - if (objPtr->bytes != NULL) { + stringPtr = GET_STRING(objPtr); + + /* + * Check that we're not extending a pure unicode string. + */ + + if (length > (int) stringPtr->allocated && + (objPtr->bytes != NULL || stringPtr->hasUnicode == 0)) { + char *newBytes; + /* - * Change length of an existing string rep. + * Not enough space in current string. Reallocate the string space and + * free the old string. */ - if (length > stringPtr->allocated) { - /* - * Need to enlarge the buffer. - */ - char *newBytes; - - if (objPtr->bytes == &tclEmptyString) { - newBytes = (char *)attemptckalloc(length + 1U); - } else { - newBytes = (char *)attemptckrealloc(objPtr->bytes, length + 1U); + if (objPtr->bytes != tclEmptyStringRep) { + newBytes = attemptckrealloc(objPtr->bytes, + (unsigned)(length + 1)); + if (newBytes == NULL) { + return 0; } + } else { + newBytes = attemptckalloc((unsigned) (length + 1)); if (newBytes == NULL) { return 0; } - objPtr->bytes = newBytes; - stringPtr->allocated = length; + if (objPtr->bytes != NULL && objPtr->length != 0) { + memcpy(newBytes, objPtr->bytes, (size_t) objPtr->length); + TclInvalidateStringRep(objPtr); + } } + objPtr->bytes = newBytes; + stringPtr->allocated = length; + + /* + * Invalidate the unicode data. + */ + stringPtr->hasUnicode = 0; + } + + if (objPtr->bytes != NULL) { objPtr->length = length; - objPtr->bytes[length] = 0; + if (objPtr->bytes != tclEmptyStringRep) { + /* + * Ensure the string is NULL-terminated. + */ + + objPtr->bytes[length] = 0; + } /* - * Invalidate the Unicode data. + * Invalidate the unicode data. */ - stringPtr->numChars = TCL_INDEX_NONE; + stringPtr->numChars = -1; stringPtr->hasUnicode = 0; } else { /* - * Changing length of pure Unicode string. + * Changing length of pure unicode string. */ - if (length > UNICHAR_STRING_MAXCHARS) { + size_t uallocated = STRING_UALLOC(length); + if (length > STRING_MAXCHARS) { return 0; } - if (length > stringPtr->maxChars) { - stringPtr = uniCharStringAttemptRealloc(stringPtr, length); + + if (uallocated > stringPtr->uallocated) { + stringPtr = stringAttemptRealloc(stringPtr, length); if (stringPtr == NULL) { return 0; } - SET_UNICHAR_STRING(objPtr, stringPtr); - stringPtr->maxChars = length; + SET_STRING(objPtr, stringPtr); + stringPtr->uallocated = uallocated; } + stringPtr->numChars = length; + stringPtr->hasUnicode = (length > 0); /* - * Mark the new end of the Unicode string. + * Ensure the string is NUL-terminated. */ stringPtr->unicode[length] = 0; - stringPtr->numChars = length; - stringPtr->hasUnicode = 1; - - /* - * Can only get here when objPtr->bytes == NULL. No need to invalidate - * the string rep. - */ + stringPtr->allocated = 0; + objPtr->length = 0; } return 1; } @@ -1373,80 +1020,46 @@ Tcl_AttemptSetObjLength( *--------------------------------------------------------------------------- */ -#if !defined(TCL_NO_DEPRECATED) void Tcl_SetUnicodeObj( Tcl_Obj *objPtr, /* The object to set the string of. */ - const unsigned short *unicode, /* The Unicode string used to initialize the + const Tcl_UniChar *unicode, /* The unicode string used to initialize the * object. */ - Tcl_Size numChars) /* Number of characters in the Unicode + int numChars) /* Number of characters in the unicode * string. */ { - String *stringPtr; - - if (numChars < 0) { - numChars = UTF16Length(unicode); - } - - /* - * Allocate enough space for the String structure + Unicode string. - */ - - stringCheckLimits(numChars); - stringPtr = stringAlloc(numChars); - SET_STRING(objPtr, stringPtr); - objPtr->typePtr = &tclStringType; - - stringPtr->maxChars = numChars; - memcpy(stringPtr->unicode, unicode, numChars * sizeof(unsigned char)); - stringPtr->unicode[numChars] = 0; - stringPtr->numChars = numChars; - stringPtr->hasUnicode = 1; - - TclInvalidateStringRep(objPtr); - stringPtr->allocated = numChars; -} - -static Tcl_Size -UTF16Length( - const unsigned short *ucs2Ptr) -{ - Tcl_Size numChars = 0; - - if (ucs2Ptr) { - while (numChars >= 0 && ucs2Ptr[numChars] != 0) { - numChars++; - } + if (Tcl_IsShared(objPtr)) { + Tcl_Panic("%s called with shared object", "Tcl_SetUnicodeObj"); } - stringCheckLimits(numChars); - return numChars; + TclFreeIntRep(objPtr); + SetUnicodeObj(objPtr, unicode, numChars); } -#endif -static Tcl_Size +static int UnicodeLength( const Tcl_UniChar *unicode) { - Tcl_Size numChars = 0; + int numChars = 0; if (unicode) { - while ((numChars >= 0) && (unicode[numChars] != 0)) { + while (numChars >= 0 && unicode[numChars] != 0) { numChars++; } } - uniCharStringCheckLimits(numChars); + stringCheckLimits(numChars); return numChars; } static void SetUnicodeObj( Tcl_Obj *objPtr, /* The object to set the string of. */ - const Tcl_UniChar *unicode, /* The Unicode string used to initialize the + const Tcl_UniChar *unicode, /* The unicode string used to initialize the * object. */ - Tcl_Size numChars) /* Number of characters in the Unicode + int numChars) /* Number of characters in the unicode * string. */ { - UniCharString *stringPtr; + String *stringPtr; + size_t uallocated; if (numChars < 0) { numChars = UnicodeLength(unicode); @@ -1456,19 +1069,20 @@ SetUnicodeObj( * Allocate enough space for the String structure + Unicode string. */ - uniCharStringCheckLimits(numChars); - stringPtr = uniCharStringAlloc(numChars); - SET_UNICHAR_STRING(objPtr, stringPtr); - objPtr->typePtr = &tclUniCharStringType; + stringCheckLimits(numChars); + uallocated = STRING_UALLOC(numChars); + stringPtr = (String *) ckalloc(STRING_SIZE(uallocated)); - stringPtr->maxChars = numChars; - memcpy(stringPtr->unicode, unicode, numChars * sizeof(Tcl_UniChar)); - stringPtr->unicode[numChars] = 0; stringPtr->numChars = numChars; - stringPtr->hasUnicode = 1; + stringPtr->uallocated = uallocated; + stringPtr->hasUnicode = (numChars > 0); + stringPtr->allocated = 0; + memcpy(stringPtr->unicode, unicode, uallocated); + stringPtr->unicode[numChars] = 0; TclInvalidateStringRep(objPtr); - stringPtr->allocated = 0; + objPtr->typePtr = &tclStringType; + SET_STRING(objPtr, stringPtr); } /* @@ -1491,21 +1105,26 @@ SetUnicodeObj( void Tcl_AppendLimitedToObj( - Tcl_Obj *objPtr, /* Points to the object to append to. */ + register Tcl_Obj *objPtr, /* Points to the object to append to. */ const char *bytes, /* Points to the bytes to append to the * object. */ - Tcl_Size length, /* The number of bytes available to be - * appended from "bytes". If -1, then - * all bytes up to a NUL byte are available. */ - Tcl_Size limit, /* The maximum number of bytes to append to + register int length, /* The number of bytes available to be + * appended from "bytes". If < 0, then all + * bytes up to a NUL byte are available. */ + register int limit, /* The maximum number of bytes to append to * the object. */ const char *ellipsis) /* Ellipsis marker string, appended to the * object to indicate not all available bytes * at "bytes" were appended. */ { - UniCharString *stringPtr; - Tcl_Size toCopy = 0; - Tcl_Size eLen = 0; + String *stringPtr; + int toCopy = 0; + + if (Tcl_IsShared(objPtr)) { + Tcl_Panic("%s called with shared object", "Tcl_AppendLimitedToObj"); + } + + SetStringFromAny(NULL, objPtr); if (length < 0) { length = (bytes ? strlen(bytes) : 0); @@ -1513,9 +1132,6 @@ Tcl_AppendLimitedToObj( if (length == 0) { return; } - if (limit <= 0) { - return; - } if (length <= limit) { toCopy = length; @@ -1523,12 +1139,8 @@ Tcl_AppendLimitedToObj( if (ellipsis == NULL) { ellipsis = "..."; } - eLen = strlen(ellipsis); - while (eLen > limit) { - eLen = Tcl_UtfPrev(ellipsis+eLen, ellipsis) - ellipsis; - } - - toCopy = Tcl_UtfPrev(bytes+limit+1-eLen, bytes) - bytes; + toCopy = (bytes == NULL) ? limit + : Tcl_UtfPrev(bytes+limit+1-strlen(ellipsis), bytes) - bytes; } /* @@ -1537,20 +1149,8 @@ Tcl_AppendLimitedToObj( * objPtr's string rep. */ - if (Tcl_IsShared(objPtr)) { - Tcl_Panic("%s called with shared object", "Tcl_AppendLimitedToObj"); - } - - SetStringFromAny(NULL, objPtr); - stringPtr = GET_UNICHAR_STRING(objPtr); - - /* If appended string starts with a continuation byte or a lower surrogate, - * force objPtr to unicode representation. See [7f1162a867] */ - if (bytes && ISCONTINUATION(bytes)) { - TclGetUnicodeFromObj(objPtr, NULL); - stringPtr = GET_UNICHAR_STRING(objPtr); - } - if (stringPtr->hasUnicode && (stringPtr->numChars > 0)) { + stringPtr = GET_STRING(objPtr); + if (stringPtr->hasUnicode != 0) { AppendUtfToUnicodeRep(objPtr, bytes, toCopy); } else { AppendUtfToUtfRep(objPtr, bytes, toCopy); @@ -1560,11 +1160,11 @@ Tcl_AppendLimitedToObj( return; } - stringPtr = GET_UNICHAR_STRING(objPtr); - if (stringPtr->hasUnicode && (stringPtr->numChars > 0)) { - AppendUtfToUnicodeRep(objPtr, ellipsis, eLen); + stringPtr = GET_STRING(objPtr); + if (stringPtr->hasUnicode != 0) { + AppendUtfToUnicodeRep(objPtr, ellipsis, -1); } else { - AppendUtfToUtfRep(objPtr, ellipsis, eLen); + AppendUtfToUtfRep(objPtr, ellipsis, -1); } } @@ -1587,14 +1187,14 @@ Tcl_AppendLimitedToObj( void Tcl_AppendToObj( - Tcl_Obj *objPtr, /* Points to the object to append to. */ + register Tcl_Obj *objPtr, /* Points to the object to append to. */ const char *bytes, /* Points to the bytes to append to the * object. */ - Tcl_Size length) /* The number of bytes to append from "bytes". - * If negative, then append all bytes up to NUL + register int length) /* The number of bytes to append from "bytes". + * If < 0, then append all bytes up to NUL * byte. */ { - Tcl_AppendLimitedToObj(objPtr, bytes, length, TCL_SIZE_MAX, NULL); + Tcl_AppendLimitedToObj(objPtr, bytes, length, INT_MAX, NULL); } /* @@ -1603,7 +1203,7 @@ Tcl_AppendToObj( * Tcl_AppendUnicodeToObj -- * * This function appends a Unicode string to an object in the most - * efficient manner possible. + * efficient manner possible. Length must be >= 0. * * Results: * None. @@ -1615,14 +1215,13 @@ Tcl_AppendToObj( */ void -TclAppendUnicodeToObj( - Tcl_Obj *objPtr, /* Points to the object to append to. */ - const Tcl_UniChar *unicode, /* The Unicode string to append to the +Tcl_AppendUnicodeToObj( + register Tcl_Obj *objPtr, /* Points to the object to append to. */ + const Tcl_UniChar *unicode, /* The unicode string to append to the * object. */ - Tcl_Size length) /* Number of chars in Unicode. Negative - * lengths means nul terminated */ + int length) /* Number of chars in "unicode". */ { - UniCharString *stringPtr; + String *stringPtr; if (Tcl_IsShared(objPtr)) { Tcl_Panic("%s called with shared object", "Tcl_AppendUnicodeToObj"); @@ -1633,7 +1232,7 @@ TclAppendUnicodeToObj( } SetStringFromAny(NULL, objPtr); - stringPtr = GET_UNICHAR_STRING(objPtr); + stringPtr = GET_STRING(objPtr); /* * If objPtr has a valid Unicode rep, then append the "unicode" to the @@ -1641,42 +1240,13 @@ TclAppendUnicodeToObj( * objPtr's string rep. */ - if (stringPtr->hasUnicode) { + if (stringPtr->hasUnicode != 0) { AppendUnicodeToUnicodeRep(objPtr, unicode, length); } else { AppendUnicodeToUtfRep(objPtr, unicode, length); } } -#if !defined(TCL_NO_DEPRECATED) -void -Tcl_AppendUnicodeToObj( - Tcl_Obj *objPtr, /* Points to the object to append to. */ - const unsigned short *unicode, /* The unicode string to append to the - * object. */ - Tcl_Size length) /* Number of chars in Unicode. Negative - * lengths means nul terminated */ -{ - String *stringPtr; - - if (Tcl_IsShared(objPtr)) { - Tcl_Panic("%s called with shared object", "Tcl_AppendUnicodeToObj"); - } - - if (length == 0) { - return; - } - - SetUTF16StringFromAny(NULL, objPtr); - stringPtr = GET_STRING(objPtr); - stringPtr = stringAttemptRealloc(stringPtr, stringPtr->numChars + length); - memcpy(&stringPtr->unicode[stringPtr->numChars], unicode, length); - stringPtr->maxChars = stringPtr->allocated = stringPtr->numChars += length; - stringPtr->unicode[stringPtr->numChars] = 0; - SET_STRING(objPtr, stringPtr); -} -#endif - /* *---------------------------------------------------------------------- * @@ -1691,8 +1261,6 @@ Tcl_AppendUnicodeToObj( * Side effects: * The string rep of appendObjPtr is appended to the string * representation of objPtr. - * IMPORTANT: This routine does not and MUST NOT shimmer appendObjPtr. - * Callers are counting on that. * *---------------------------------------------------------------------- */ @@ -1702,104 +1270,36 @@ Tcl_AppendObjToObj( Tcl_Obj *objPtr, /* Points to the object to append to. */ Tcl_Obj *appendObjPtr) /* Object to append. */ { - UniCharString *stringPtr; - Tcl_Size length, numChars; - Tcl_Size appendNumChars = TCL_INDEX_NONE; - const char *bytes; - - /* - * Special case: second object is standard-empty is fast case. We know - * that appending nothing to anything leaves that starting anything... - */ - - if (appendObjPtr->bytes == &tclEmptyString) { - return; - } - - /* - * Handle append of one ByteArray object to another as a special case. - * Note that we only do this when the objects are pure so that the - * bytearray faithfully represent the true value; Otherwise appending the - * byte arrays together could lose information; - */ - - if ((TclIsPureByteArray(objPtr) || objPtr->bytes == &tclEmptyString) - && TclIsPureByteArray(appendObjPtr)) { - /* - * One might expect the code here to be - * - * bytes = Tcl_GetByteArrayFromObj(appendObjPtr, &length); - * TclAppendBytesToByteArray(objPtr, bytes, length); - * - * and essentially all of the time that would be fine. However, it - * would run into trouble in the case where objPtr and appendObjPtr - * point to the same thing. That may never be a good idea. It seems to - * violate Copy On Write, and we don't have any tests for the - * situation, since making any Tcl commands that call - * Tcl_AppendObjToObj() do that appears impossible (They honor Copy On - * Write!). For the sake of extensions that go off into that realm, - * though, here's a more complex approach that can handle all the - * cases. - * - * First, get the lengths. - */ - - Tcl_Size lengthSrc; - - (void) Tcl_GetByteArrayFromObj(objPtr, &length); - (void) Tcl_GetByteArrayFromObj(appendObjPtr, &lengthSrc); - - /* - * Grow buffer enough for the append. - */ - - TclAppendBytesToByteArray(objPtr, NULL, lengthSrc); - - /* - * Reset objPtr back to the original value. - */ - - Tcl_SetByteArrayLength(objPtr, length); - - /* - * Now do the append knowing that buffer growth cannot cause any - * trouble. - */ - - TclAppendBytesToByteArray(objPtr, - Tcl_GetByteArrayFromObj(appendObjPtr, (Tcl_Size *) NULL), lengthSrc); - return; - } - - /* - * Must append as strings. - */ + String *stringPtr; + int length, numChars, allOneByteChars; + char *bytes; SetStringFromAny(NULL, objPtr); - stringPtr = GET_UNICHAR_STRING(objPtr); - - /* If appended string starts with a continuation byte or a lower surrogate, - * force objPtr to unicode representation. See [7f1162a867] - * This fixes append-3.4, append-3.7 and utf-1.18 testcases. */ - if (ISCONTINUATION(TclGetString(appendObjPtr))) { - TclGetUnicodeFromObj(objPtr, NULL); - stringPtr = GET_UNICHAR_STRING(objPtr); - } + /* * If objPtr has a valid Unicode rep, then get a Unicode string from * appendObjPtr and append it. */ - if (stringPtr->hasUnicode) { + stringPtr = GET_STRING(objPtr); + if (stringPtr->hasUnicode != 0) { /* * If appendObjPtr is not of the "String" type, don't convert it. */ - if (TclHasInternalRep(appendObjPtr, &tclUniCharStringType)) { - Tcl_UniChar *unicode = - TclGetUnicodeFromObj(appendObjPtr, &numChars); + if (appendObjPtr->typePtr == &tclStringType) { + stringPtr = GET_STRING(appendObjPtr); + if ((stringPtr->numChars == -1) || (stringPtr->hasUnicode == 0)) { + /* + * If appendObjPtr is a string obj with no valid Unicode rep, + * then fill its unicode rep. + */ - AppendUnicodeToUnicodeRep(objPtr, unicode, numChars); + FillUnicodeRep(appendObjPtr); + stringPtr = GET_STRING(appendObjPtr); + } + AppendUnicodeToUnicodeRep(objPtr, stringPtr->unicode, + stringPtr->numChars); } else { bytes = TclGetStringFromObj(appendObjPtr, &length); AppendUtfToUnicodeRep(objPtr, bytes, length); @@ -1815,17 +1315,21 @@ Tcl_AppendObjToObj( bytes = TclGetStringFromObj(appendObjPtr, &length); + allOneByteChars = 0; numChars = stringPtr->numChars; - if ((numChars >= 0) && TclHasInternalRep(appendObjPtr, &tclUniCharStringType)) { - UniCharString *appendStringPtr = GET_UNICHAR_STRING(appendObjPtr); - - appendNumChars = appendStringPtr->numChars; + if ((numChars >= 0) && (appendObjPtr->typePtr == &tclStringType)) { + stringPtr = GET_STRING(appendObjPtr); + if ((stringPtr->numChars >= 0) && (stringPtr->numChars == length)) { + numChars += stringPtr->numChars; + allOneByteChars = 1; + } } AppendUtfToUtfRep(objPtr, bytes, length); - if ((numChars >= 0) && (appendNumChars >= 0)) { - stringPtr->numChars = numChars + appendNumChars; + if (allOneByteChars) { + stringPtr = GET_STRING(objPtr); + stringPtr->numChars = numChars; } } @@ -1834,8 +1338,8 @@ Tcl_AppendObjToObj( * * AppendUnicodeToUnicodeRep -- * - * Appends the contents of unicode to the Unicode rep of - * objPtr, which must already have a valid Unicode rep. + * This function appends the contents of "unicode" to the Unicode rep of + * "objPtr". objPtr must already have a valid Unicode rep. * * Results: * None. @@ -1850,10 +1354,10 @@ static void AppendUnicodeToUnicodeRep( Tcl_Obj *objPtr, /* Points to the object to append to. */ const Tcl_UniChar *unicode, /* String to append. */ - Tcl_Size appendNumChars) /* Number of chars of "unicode" to append. */ + int appendNumChars) /* Number of chars of "unicode" to append. */ { - UniCharString *stringPtr; - Tcl_Size numChars; + String *stringPtr; + int numChars; if (appendNumChars < 0) { appendNumChars = UnicodeLength(unicode); @@ -1863,10 +1367,10 @@ AppendUnicodeToUnicodeRep( } SetStringFromAny(NULL, objPtr); - stringPtr = GET_UNICHAR_STRING(objPtr); + stringPtr = GET_STRING(objPtr); /* - * If not enough space has been allocated for the Unicode rep, reallocate + * If not enough space has been allocated for the unicode rep, reallocate * the internal rep object with additional space. First try to double the * required allocation; if that fails, try a more modest increase. See the * "TCL STRING GROWTH ALGORITHM" comment at the top of this file for an @@ -1874,29 +1378,25 @@ AppendUnicodeToUnicodeRep( */ numChars = stringPtr->numChars + appendNumChars; - uniCharStringCheckLimits(numChars); - - if (numChars > stringPtr->maxChars) { - Tcl_Size offset = TCL_INDEX_NONE; + stringCheckLimits(numChars); + if (STRING_UALLOC(numChars) > stringPtr->uallocated) { /* - * Protect against case where Unicode points into the existing - * stringPtr->unicode array. Force it to follow any relocations due to - * the reallocs below. + * Protect against case where unicode points into the existing + * stringPtr->unicode array. Force it to follow any relocations + * due to the reallocs below. */ - + int offset = -1; if (unicode && unicode >= stringPtr->unicode - && unicode <= stringPtr->unicode + stringPtr->maxChars) { + && unicode <= stringPtr->unicode + + stringPtr->uallocated / sizeof(Tcl_UniChar)) { offset = unicode - stringPtr->unicode; } GrowUnicodeBuffer(objPtr, numChars); - stringPtr = GET_UNICHAR_STRING(objPtr); - - /* - * Relocate Unicode if needed; see above. - */ + stringPtr = GET_STRING(objPtr); + /* Relocate unicode if needed; see above. */ if (offset >= 0) { unicode = stringPtr->unicode + offset; } @@ -1908,7 +1408,7 @@ AppendUnicodeToUnicodeRep( */ if (unicode) { - memmove(stringPtr->unicode + stringPtr->numChars, unicode, + memcpy(stringPtr->unicode + stringPtr->numChars, unicode, appendNumChars * sizeof(Tcl_UniChar)); } stringPtr->unicode[numChars] = 0; @@ -1939,15 +1439,22 @@ static void AppendUnicodeToUtfRep( Tcl_Obj *objPtr, /* Points to the object to append to. */ const Tcl_UniChar *unicode, /* String to convert to UTF. */ - Tcl_Size numChars) /* Number of chars of Unicode to convert. */ + int numChars) /* Number of chars of "unicode" to convert. */ { - UniCharString *stringPtr = GET_UNICHAR_STRING(objPtr); - - numChars = ExtendStringRepWithUnicode(objPtr, unicode, numChars); + Tcl_DString dsPtr; + const char *bytes; - if (stringPtr->numChars != TCL_INDEX_NONE) { - stringPtr->numChars += numChars; + if (numChars < 0) { + numChars = UnicodeLength(unicode); + } + if (numChars == 0) { + return; } + + Tcl_DStringInit(&dsPtr); + bytes = Tcl_UniCharToUtfDString(unicode, numChars, &dsPtr); + AppendUtfToUtfRep(objPtr, bytes, Tcl_DStringLength(&dsPtr)); + Tcl_DStringFree(&dsPtr); } /* @@ -1957,13 +1464,13 @@ AppendUnicodeToUtfRep( * * This function converts the contents of "bytes" to Unicode and appends * the Unicode to the Unicode rep of "objPtr". objPtr must already have a - * valid Unicode rep. numBytes must be non-negative. + * valid Unicode rep. * * Results: * None. * * Side effects: - * objPtr's internal rep is reallocated and string rep is cleaned. + * objPtr's internal rep is reallocated. * *---------------------------------------------------------------------- */ @@ -1972,18 +1479,27 @@ static void AppendUtfToUnicodeRep( Tcl_Obj *objPtr, /* Points to the object to append to. */ const char *bytes, /* String to convert to Unicode. */ - Tcl_Size numBytes) /* Number of bytes of "bytes" to convert. */ + int numBytes) /* Number of bytes of "bytes" to convert. */ { - UniCharString *stringPtr; + Tcl_DString dsPtr; + int numChars = numBytes; + Tcl_UniChar *unicode = NULL; + if (numBytes < 0) { + numBytes = (bytes ? strlen(bytes) : 0); + } if (numBytes == 0) { return; } - ExtendUnicodeRepWithString(objPtr, bytes, numBytes, -1); - TclInvalidateStringRep(objPtr); - stringPtr = GET_UNICHAR_STRING(objPtr); - stringPtr->allocated = 0; + Tcl_DStringInit(&dsPtr); + if (bytes) { + numChars = Tcl_NumUtfChars(bytes, numBytes); + unicode = (Tcl_UniChar *) Tcl_UtfToUniCharDString(bytes, numBytes, + &dsPtr); + } + AppendUnicodeToUnicodeRep(objPtr, unicode, numChars); + Tcl_DStringFree(&dsPtr); } /* @@ -1993,13 +1509,12 @@ AppendUtfToUnicodeRep( * * This function appends "numBytes" bytes of "bytes" to the UTF string * rep of "objPtr". objPtr must already have a valid String rep. - * numBytes must be non-negative. * * Results: * None. * * Side effects: - * objPtr's string rep is reallocated (by TCL STRING GROWTH ALGORITHM). + * objPtr's internal rep is reallocated. * *---------------------------------------------------------------------- */ @@ -2008,11 +1523,14 @@ static void AppendUtfToUtfRep( Tcl_Obj *objPtr, /* Points to the object to append to. */ const char *bytes, /* String to append. */ - Tcl_Size numBytes) /* Number of bytes of "bytes" to append. */ + int numBytes) /* Number of bytes of "bytes" to append. */ { - UniCharString *stringPtr; - Tcl_Size newLength, oldLength; + String *stringPtr; + int newLength, oldLength; + if (numBytes < 0) { + numBytes = (bytes ? strlen(bytes) : 0); + } if (numBytes == 0) { return; } @@ -2022,42 +1540,47 @@ AppendUtfToUtfRep( * trailing null. */ - if (objPtr->bytes == NULL) { - objPtr->length = 0; - } oldLength = objPtr->length; - if (numBytes > TCL_SIZE_MAX - oldLength) { - Tcl_Panic("max size for a Tcl value (%" TCL_SIZE_MODIFIER "d bytes) exceeded", TCL_SIZE_MAX); - } newLength = numBytes + oldLength; + if (newLength < 0) { + Tcl_Panic("max size for a Tcl value (%d bytes) exceeded", INT_MAX); + } - stringPtr = GET_UNICHAR_STRING(objPtr); - if (newLength > stringPtr->allocated) { - Tcl_Size offset = TCL_INDEX_NONE; - + stringPtr = GET_STRING(objPtr); + if (newLength > (int) stringPtr->allocated) { /* * Protect against case where unicode points into the existing - * stringPtr->unicode array. Force it to follow any relocations due to - * the reallocs below. + * stringPtr->unicode array. Force it to follow any relocations + * due to the reallocs below. */ - - if (bytes && objPtr->bytes && (bytes >= objPtr->bytes) - && (bytes <= objPtr->bytes + objPtr->length)) { + int offset = -1; + if (bytes && bytes >= objPtr->bytes + && bytes <= objPtr->bytes + objPtr->length) { offset = bytes - objPtr->bytes; } /* - * TODO: consider passing flag=1: no overalloc on first append. This - * would make test stringObj-8.1 fail. + * There isn't currently enough space in the string representation so + * allocate additional space. First, try to double the length + * required. If that fails, try a more modest allocation. See the "TCL + * STRING GROWTH ALGORITHM" comment at the top of this file for an + * explanation of this growth algorithm. */ - GrowStringBuffer(objPtr, newLength, 0); + if (Tcl_AttemptSetObjLength(objPtr, 2 * newLength) == 0) { + /* + * Take care computing the amount of modest growth to avoid + * overflow into invalid argument values for Tcl_SetObjLength. + */ + unsigned int limit = INT_MAX - newLength; + unsigned int extra = numBytes + TCL_GROWTH_MIN_ALLOC; + int growth = (int) ((extra > limit) ? limit : extra); - /* - * Relocate bytes if needed; see above. - */ + Tcl_SetObjLength(objPtr, newLength + growth); + } - if (offset >= 0) { + /* Relocate bytes if needed; see above. */ + if (offset >=0) { bytes = objPtr->bytes + offset; } } @@ -2070,7 +1593,7 @@ AppendUtfToUtfRep( stringPtr->hasUnicode = 0; if (bytes) { - memmove(objPtr->bytes + oldLength, bytes, numBytes); + memcpy(objPtr->bytes + oldLength, bytes, (size_t) numBytes); } objPtr->bytes[newLength] = 0; objPtr->length = newLength; @@ -2079,39 +1602,6 @@ AppendUtfToUtfRep( /* *---------------------------------------------------------------------- * - * TclAppendUtfToUtf -- - * - * This function appends "numBytes" bytes of "bytes" to the UTF string - * rep of "objPtr" (objPtr's internal rep converted to string on demand). - * numBytes must be non-negative. - * - * Results: - * None. - * - * Side effects: - * objPtr's string rep is reallocated (by TCL STRING GROWTH ALGORITHM). - * - *---------------------------------------------------------------------- - */ - -void -TclAppendUtfToUtf( - Tcl_Obj *objPtr, /* Points to the object to append to. */ - const char *bytes, /* String to append (or NULL to enlarge buffer). */ - Tcl_Size numBytes) /* Number of bytes of "bytes" to append. */ -{ - if (Tcl_IsShared(objPtr)) { - Tcl_Panic("%s called with shared object", "TclAppendUtfToUtf"); - } - - SetStringFromAny(NULL, objPtr); - - AppendUtfToUtfRep(objPtr, bytes, numBytes); -} - -/* - *---------------------------------------------------------------------- - * * Tcl_AppendStringsToObjVA -- * * This function appends one or more null-terminated strings to an @@ -2132,18 +1622,130 @@ Tcl_AppendStringsToObjVA( Tcl_Obj *objPtr, /* Points to the object to append to. */ va_list argList) /* Variable argument list. */ { +#define STATIC_LIST_SIZE 16 + String *stringPtr; + int newLength, oldLength, attemptLength; + register char *string, *dst; + char *static_list[STATIC_LIST_SIZE]; + char **args = static_list; + int nargs_space = STATIC_LIST_SIZE; + int nargs, i; + if (Tcl_IsShared(objPtr)) { Tcl_Panic("%s called with shared object", "Tcl_AppendStringsToObj"); } + SetStringFromAny(NULL, objPtr); + + /* + * Force the existence of a string rep. so we avoid crashes operating + * on a pure unicode value. [Bug 2597185] + */ + + (void) Tcl_GetStringFromObj(objPtr, &oldLength); + + /* + * Figure out how much space is needed for all the strings, and expand the + * string representation if it isn't big enough. If no bytes would be + * appended, just return. Note that on some platforms (notably OS/390) the + * argList is an array so we need to use memcpy. + */ + + nargs = 0; + newLength = 0; while (1) { - const char *bytes = va_arg(argList, char *); + string = va_arg(argList, char *); + if (string == NULL) { + break; + } + if (nargs >= nargs_space) { + /* + * Expand the args buffer. + */ - if (bytes == NULL) { + nargs_space += STATIC_LIST_SIZE; + if (args == static_list) { + args = (void *) ckalloc(nargs_space * sizeof(char *)); + for (i = 0; i < nargs; ++i) { + args[i] = static_list[i]; + } + } else { + args = (void *) ckrealloc((void *) args, + nargs_space * sizeof(char *)); + } + } + newLength += strlen(string); + args[nargs++] = string; + } + if (newLength == 0) { + goto done; + } + + stringPtr = GET_STRING(objPtr); + if (oldLength + newLength > (int) stringPtr->allocated) { + /* + * There isn't currently enough space in the string representation, so + * allocate additional space. If the current string representation + * isn't empty (i.e. it looks like we're doing a series of appends) + * then try to allocate extra space to accomodate future growth: first + * try to double the required memory; if that fails, try a more modest + * allocation. See the "TCL STRING GROWTH ALGORITHM" comment at the + * top of this file for an explanation of this growth algorithm. + * Otherwise, if the current string representation is empty, exactly + * enough memory is allocated. + */ + + if (oldLength == 0) { + Tcl_SetObjLength(objPtr, newLength); + } else { + attemptLength = 2 * (oldLength + newLength); + if (Tcl_AttemptSetObjLength(objPtr, attemptLength) == 0) { + attemptLength = oldLength + (2 * newLength) + + TCL_GROWTH_MIN_ALLOC; + Tcl_SetObjLength(objPtr, attemptLength); + } + } + } + + /* + * Make a second pass through the arguments, appending all the strings to + * the object. + */ + + dst = objPtr->bytes + oldLength; + for (i = 0; i < nargs; ++i) { + string = args[i]; + if (string == NULL) { break; } - Tcl_AppendToObj(objPtr, bytes, -1); + while (*string != 0) { + *dst = *string; + dst++; + string++; + } + } + + /* + * Add a null byte to terminate the string. However, be careful: it's + * possible that the object is totally empty (if it was empty originally + * and there was nothing to append). In this case dst is NULL; just leave + * everything alone. + */ + + if (dst != NULL) { + *dst = 0; } + objPtr->length = oldLength + newLength; + + done: + /* + * If we had to allocate a buffer from the heap, free it now. + */ + + if (args != static_list) { + ckfree((void *) args); + } +#undef STATIC_LIST_SIZE } /* @@ -2201,16 +1803,15 @@ Tcl_AppendFormatToObj( Tcl_Interp *interp, Tcl_Obj *appendObj, const char *format, - Tcl_Size objc, + int objc, Tcl_Obj *const objv[]) { - const char *span = format, *msg, *errCode; - int gotXpg = 0, gotSequential = 0; - Tcl_Size objIndex = 0, originalLength, limit, numBytes = 0; - Tcl_UniChar ch = 0; + const char *span = format, *msg; + int numBytes = 0, objIndex = 0, gotXpg = 0, gotSequential = 0; + int originalLength, limit; static const char *mixedXPG = "cannot mix \"%\" and \"%n$\" conversion specifiers"; - static const char *const badIndex[2] = { + static const char *badIndex[2] = { "not enough arguments for all format specifiers", "\"%n$\" argument index out of range" }; @@ -2220,7 +1821,7 @@ Tcl_AppendFormatToObj( Tcl_Panic("%s called with shared object", "Tcl_AppendFormatToObj"); } TclGetStringFromObj(appendObj, &originalLength); - limit = TCL_SIZE_MAX - originalLength; + limit = INT_MAX - originalLength; /* * Format string is NUL-terminated. @@ -2228,16 +1829,12 @@ Tcl_AppendFormatToObj( while (*format != '\0') { char *end; - int gotMinus = 0, gotHash = 0, gotZero = 0, gotSpace = 0, gotPlus = 0; - int gotPrecision, sawFlag, useShort = 0, useBig = 0; - Tcl_WideInt width, precision; -#ifndef TCL_WIDE_INT_IS_LONG - int useWide = 0; -#endif - int newXpg, allocSegment = 0; - Tcl_Size numChars, segmentLimit, segmentNumBytes; + int gotMinus, gotHash, gotZero, gotSpace, gotPlus, sawFlag; + int width, gotPrecision, precision, useShort, useWide, useBig; + int newXpg, numChars, allocSegment = 0, segmentLimit, segmentNumBytes; Tcl_Obj *segment; - int step = TclUtfToUniChar(format, &ch); + Tcl_UniChar ch; + int step = Tcl_UtfToUniChar(format, &ch); format += step; if (ch != '%') { @@ -2247,7 +1844,6 @@ Tcl_AppendFormatToObj( if (numBytes) { if (numBytes > limit) { msg = overflow; - errCode = "OVERFLOW"; goto errorMsg; } Tcl_AppendToObj(appendObj, span, numBytes); @@ -2261,7 +1857,7 @@ Tcl_AppendFormatToObj( * Step 0. Handle special case of escaped format marker (i.e., %%). */ - step = TclUtfToUniChar(format, &ch); + step = Tcl_UtfToUniChar(format, &ch); if (ch == '%') { span = format; numBytes = step; @@ -2276,32 +1872,28 @@ Tcl_AppendFormatToObj( newXpg = 0; if (isdigit(UCHAR(ch))) { int position = strtoul(format, &end, 10); - if (*end == '$') { newXpg = 1; objIndex = position - 1; format = end + 1; - step = TclUtfToUniChar(format, &ch); + step = Tcl_UtfToUniChar(format, &ch); } } if (newXpg) { if (gotSequential) { msg = mixedXPG; - errCode = "MIXEDSPECTYPES"; goto errorMsg; } gotXpg = 1; } else { if (gotXpg) { msg = mixedXPG; - errCode = "MIXEDSPECTYPES"; goto errorMsg; } gotSequential = 1; } if ((objIndex < 0) || (objIndex >= objc)) { msg = badIndex[gotXpg]; - errCode = gotXpg ? "INDEXRANGE" : "FIELDVARMISMATCH"; goto errorMsg; } @@ -2309,6 +1901,7 @@ Tcl_AppendFormatToObj( * Step 2. Set of flags. */ + gotMinus = gotHash = gotZero = gotSpace = gotPlus = 0; sawFlag = 1; do { switch (ch) { @@ -2332,7 +1925,7 @@ Tcl_AppendFormatToObj( } if (sawFlag) { format += step; - step = TclUtfToUniChar(format, &ch); + step = Tcl_UtfToUniChar(format, &ch); } } while (sawFlag); @@ -2342,25 +1935,15 @@ Tcl_AppendFormatToObj( width = 0; if (isdigit(UCHAR(ch))) { - /* Note ull will be >= 0 because of isdigit check above */ - unsigned long long ull; - ull = strtoull(format, &end, 10); - /* Comparison is >=, not >, to leave room for nul */ - if (ull >= WIDE_MAX) { - msg = overflow; - errCode = "OVERFLOW"; - goto errorMsg; - } - width = (Tcl_WideInt)ull; + width = strtoul(format, &end, 10); format = end; - step = TclUtfToUniChar(format, &ch); + step = Tcl_UtfToUniChar(format, &ch); } else if (ch == '*') { if (objIndex >= objc - 1) { msg = badIndex[gotXpg]; - errCode = gotXpg ? "INDEXRANGE" : "FIELDVARMISMATCH"; goto errorMsg; } - if (TclGetWideIntFromObj(interp, objv[objIndex], &width) != TCL_OK) { + if (TclGetIntFromObj(interp, objv[objIndex], &width) != TCL_OK) { goto error; } if (width < 0) { @@ -2369,11 +1952,10 @@ Tcl_AppendFormatToObj( } objIndex++; format += step; - step = TclUtfToUniChar(format, &ch); + step = Tcl_UtfToUniChar(format, &ch); } if (width > limit) { msg = overflow; - errCode = "OVERFLOW"; goto errorMsg; } @@ -2385,28 +1967,18 @@ Tcl_AppendFormatToObj( if (ch == '.') { gotPrecision = 1; format += step; - step = TclUtfToUniChar(format, &ch); + step = Tcl_UtfToUniChar(format, &ch); } if (isdigit(UCHAR(ch))) { - /* Note ull will be >= 0 because of isdigit check above */ - unsigned long long ull; - ull = strtoull(format, &end, 10); - /* Comparison is >=, not >, to leave room for nul */ - if (ull >= WIDE_MAX) { - msg = overflow; - errCode = "OVERFLOW"; - goto errorMsg; - } - precision = (Tcl_WideInt)ull; + precision = strtoul(format, &end, 10); format = end; - step = TclUtfToUniChar(format, &ch); + step = Tcl_UtfToUniChar(format, &ch); } else if (ch == '*') { if (objIndex >= objc - 1) { msg = badIndex[gotXpg]; - errCode = gotXpg ? "INDEXRANGE" : "FIELDVARMISMATCH"; goto errorMsg; } - if (TclGetWideIntFromObj(interp, objv[objIndex], &precision) + if (TclGetIntFromObj(interp, objv[objIndex], &precision) != TCL_OK) { goto error; } @@ -2420,48 +1992,30 @@ Tcl_AppendFormatToObj( } objIndex++; format += step; - step = TclUtfToUniChar(format, &ch); + step = Tcl_UtfToUniChar(format, &ch); } /* * Step 5. Length modifier. */ + useShort = useWide = useBig = 0; if (ch == 'h') { useShort = 1; format += step; - step = TclUtfToUniChar(format, &ch); + step = Tcl_UtfToUniChar(format, &ch); } else if (ch == 'l') { format += step; - step = TclUtfToUniChar(format, &ch); + step = Tcl_UtfToUniChar(format, &ch); if (ch == 'l') { useBig = 1; format += step; - step = TclUtfToUniChar(format, &ch); -#ifndef TCL_WIDE_INT_IS_LONG + step = Tcl_UtfToUniChar(format, &ch); } else { - useWide = 1; -#endif - } - } else if (ch == 'I') { - if ((format[1] == '6') && (format[2] == '4')) { - format += (step + 2); - step = TclUtfToUniChar(format, &ch); #ifndef TCL_WIDE_INT_IS_LONG useWide = 1; #endif - } else if ((format[1] == '3') && (format[2] == '2')) { - format += (step + 2); - step = TclUtfToUniChar(format, &ch); - } else { - format += step; - step = TclUtfToUniChar(format, &ch); } - } else if ((ch == 't') || (ch == 'z') || (ch == 'q') || (ch == 'j') - || (ch == 'L')) { - format += step; - step = TclUtfToUniChar(format, &ch); - useBig = 1; } format += step; @@ -2479,17 +2033,12 @@ Tcl_AppendFormatToObj( switch (ch) { case '\0': msg = "format string ended in middle of field specifier"; - errCode = "INCOMPLETE"; goto errorMsg; case 's': if (gotPrecision) { - numChars = TclGetCharLength(segment); + numChars = Tcl_GetCharLength(segment); if (precision < numChars) { - if (precision < 1) { - TclNewObj(segment); - } else { - segment = TclGetRange(segment, 0, precision - 1); - } + segment = Tcl_GetRange(segment, 0, precision - 1); numChars = precision; Tcl_IncrRefCount(segment); allocSegment = 1; @@ -2497,20 +2046,13 @@ Tcl_AppendFormatToObj( } break; case 'c': { - char buf[4] = ""; + char buf[TCL_UTF_MAX]; int code, length; if (TclGetIntFromObj(interp, segment, &code) != TCL_OK) { goto error; } - if ((unsigned)code > 0x10FFFF) { - code = 0xFFFD; - } length = Tcl_UniCharToUtf(code, buf); - if ((code >= 0xD800) && (length < 3)) { - /* Special case for handling high surrogates. */ - length += Tcl_UniCharToUtf(-1, buf + length); - } segment = Tcl_NewStringObj(buf, length); Tcl_IncrRefCount(segment); allocSegment = 1; @@ -2518,125 +2060,107 @@ Tcl_AppendFormatToObj( } case 'u': - /* FALLTHRU */ + if (useBig) { + msg = "unsigned bignum format is invalid"; + goto errorMsg; + } case 'd': case 'o': - case 'p': case 'x': - case 'X': - case 'b': { - short s = 0; /* Silence compiler warning; only defined and + case 'X': { + short int s = 0; /* Silence compiler warning; only defined and * used when useShort is true. */ long l; Tcl_WideInt w; mp_int big; - int isNegative = 0; - Tcl_Size toAppend; + int toAppend, isNegative = 0; -#ifndef TCL_WIDE_INT_IS_LONG - if (ch == 'p') { - useWide = 1; - } -#endif if (useBig) { - int cmpResult; if (Tcl_GetBignumFromObj(interp, segment, &big) != TCL_OK) { goto error; } - cmpResult = mp_cmp_d(&big, 0); - isNegative = (cmpResult == MP_LT); - if (cmpResult == MP_EQ) gotHash = 0; - if (ch == 'u') { - if (isNegative) { - mp_clear(&big); - msg = "unsigned bignum format is invalid"; - errCode = "BADUNSIGNED"; - goto errorMsg; - } else { - ch = 'd'; - } - } -#ifndef TCL_WIDE_INT_IS_LONG + isNegative = (mp_cmp_d(&big, 0) == MP_LT); } else if (useWide) { - if (TclGetWideBitsFromObj(interp, segment, &w) != TCL_OK) { - goto error; + if (Tcl_GetWideIntFromObj(NULL, segment, &w) != TCL_OK) { + Tcl_Obj *objPtr; + + if (Tcl_GetBignumFromObj(interp,segment,&big) != TCL_OK) { + goto error; + } + mp_mod_2d(&big, (int) CHAR_BIT*sizeof(Tcl_WideInt), &big); + objPtr = Tcl_NewBignumObj(&big); + Tcl_IncrRefCount(objPtr); + Tcl_GetWideIntFromObj(NULL, objPtr, &w); + Tcl_DecrRefCount(objPtr); } - isNegative = (w < (Tcl_WideInt) 0); - if (w == (Tcl_WideInt) 0) gotHash = 0; -#endif + isNegative = (w < (Tcl_WideInt)0); } else if (TclGetLongFromObj(NULL, segment, &l) != TCL_OK) { - if (TclGetWideBitsFromObj(interp, segment, &w) != TCL_OK) { - goto error; + if (Tcl_GetWideIntFromObj(NULL, segment, &w) != TCL_OK) { + Tcl_Obj *objPtr; + + if (Tcl_GetBignumFromObj(interp,segment,&big) != TCL_OK) { + goto error; + } + mp_mod_2d(&big, (int) CHAR_BIT * sizeof(long), &big); + objPtr = Tcl_NewBignumObj(&big); + Tcl_IncrRefCount(objPtr); + TclGetLongFromObj(NULL, objPtr, &l); + Tcl_DecrRefCount(objPtr); } else { - l = (long) w; + l = Tcl_WideAsLong(w); } if (useShort) { - s = (short) l; - isNegative = (s < (short) 0); - if (s == (short) 0) gotHash = 0; + s = (short int) l; + isNegative = (s < (short int)0); } else { - isNegative = (l < (long) 0); - if (l == (long) 0) gotHash = 0; + isNegative = (l < (long)0); } } else if (useShort) { - s = (short) l; - isNegative = (s < (short) 0); - if (s == (short) 0) gotHash = 0; + s = (short int) l; + isNegative = (s < (short int)0); } else { - isNegative = (l < (long) 0); - if (l == (long) 0) gotHash = 0; + isNegative = (l < (long)0); } - TclNewObj(segment); + segment = Tcl_NewObj(); allocSegment = 1; - segmentLimit = TCL_SIZE_MAX; + segmentLimit = INT_MAX; Tcl_IncrRefCount(segment); - if ((isNegative || gotPlus || gotSpace) && (useBig || ch=='d')) { - Tcl_AppendToObj(segment, - (isNegative ? "-" : gotPlus ? "+" : " "), 1); + if ((isNegative || gotPlus || gotSpace) && (useBig || (ch == 'd'))) { + Tcl_AppendToObj(segment, (isNegative ? "-" : gotPlus ? "+" : " "), 1); segmentLimit -= 1; } - if (gotHash || (ch == 'p')) { + if (gotHash) { switch (ch) { case 'o': - Tcl_AppendToObj(segment, "0o", 2); - segmentLimit -= 2; + Tcl_AppendToObj(segment, "0", 1); + segmentLimit -= 1; + precision--; break; - case 'p': case 'x': case 'X': Tcl_AppendToObj(segment, "0x", 2); segmentLimit -= 2; break; - case 'b': - Tcl_AppendToObj(segment, "0b", 2); - segmentLimit -= 2; - break; - case 'd': - Tcl_AppendToObj(segment, "0d", 2); - segmentLimit -= 2; - break; } } switch (ch) { case 'd': { - Tcl_Size length; + int length; Tcl_Obj *pure; const char *bytes; if (useShort) { - TclNewIntObj(pure, s); -#ifndef TCL_WIDE_INT_IS_LONG + pure = Tcl_NewIntObj((int)(s)); } else if (useWide) { - TclNewIntObj(pure, w); -#endif + pure = Tcl_NewWideIntObj(w); } else if (useBig) { pure = Tcl_NewBignumObj(&big); } else { - TclNewIntObj(pure, l); + pure = Tcl_NewLongObj(l); } Tcl_IncrRefCount(pure); bytes = TclGetStringFromObj(pure, &length); @@ -2659,7 +2183,7 @@ Tcl_AppendFormatToObj( if (gotPrecision) { if (length < precision) { - segmentLimit -= precision - length; + segmentLimit -= (precision - length); } while (length < precision) { Tcl_AppendToObj(segment, "0", 1); @@ -2668,9 +2192,9 @@ Tcl_AppendFormatToObj( gotZero = 0; } if (gotZero) { - length += TclGetCharLength(segment); + length += Tcl_GetCharLength(segment); if (length < width) { - segmentLimit -= width - length; + segmentLimit -= (width - length); } while (length < width) { Tcl_AppendToObj(segment, "0", 1); @@ -2679,7 +2203,6 @@ Tcl_AppendFormatToObj( } if (toAppend > segmentLimit) { msg = overflow; - errCode = "OVERFLOW"; goto errorMsg; } Tcl_AppendToObj(segment, bytes, toAppend); @@ -2689,35 +2212,30 @@ Tcl_AppendFormatToObj( case 'u': case 'o': - case 'p': case 'x': - case 'X': - case 'b': { - Tcl_WideUInt bits = 0; - Tcl_WideInt numDigits = 0; - int numBits = 4, base = 16, index = 0, shift = 0; - Tcl_Size length; + case 'X': { + Tcl_WideUInt bits = (Tcl_WideUInt)0; + Tcl_WideInt numDigits = (Tcl_WideInt)0; + int length, numBits = 4, base = 16; + int index = 0, shift = 0; Tcl_Obj *pure; char *bytes; if (ch == 'u') { base = 10; - } else if (ch == 'o') { + } + if (ch == 'o') { base = 8; numBits = 3; - } else if (ch == 'b') { - base = 2; - numBits = 1; } if (useShort) { - unsigned short us = (unsigned short) s; + unsigned short int us = (unsigned short int) s; bits = (Tcl_WideUInt) us; while (us) { numDigits++; us /= base; } -#ifndef TCL_WIDE_INT_IS_LONG } else if (useWide) { Tcl_WideUInt uw = (Tcl_WideUInt) w; @@ -2726,24 +2244,22 @@ Tcl_AppendFormatToObj( numDigits++; uw /= base; } -#endif - } else if (useBig && !mp_iszero(&big)) { - int leftover = (big.used * MP_DIGIT_BIT) % numBits; - mp_digit mask = (~(mp_digit)0) << (MP_DIGIT_BIT-leftover); + } else if (useBig && big.used) { + int leftover = (big.used * DIGIT_BIT) % numBits; + mp_digit mask = (~(mp_digit)0) << (DIGIT_BIT-leftover); numDigits = 1 + - (((Tcl_WideInt) big.used * MP_DIGIT_BIT) / numBits); + (((Tcl_WideInt)big.used * DIGIT_BIT) / numBits); while ((mask & big.dp[big.used-1]) == 0) { numDigits--; mask >>= numBits; } if (numDigits > INT_MAX) { msg = overflow; - errCode = "OVERFLOW"; goto errorMsg; } } else if (!useBig) { - unsigned long ul = (unsigned long) l; + unsigned long int ul = (unsigned long int) l; bits = (Tcl_WideUInt) ul; while (ul) { @@ -2756,31 +2272,27 @@ Tcl_AppendFormatToObj( * Need to be sure zero becomes "0", not "". */ - if (numDigits == 0) { + if ((numDigits == 0) && !((ch == 'o') && gotHash)) { numDigits = 1; } - TclNewObj(pure); - Tcl_SetObjLength(pure, numDigits); + pure = Tcl_NewObj(); + Tcl_SetObjLength(pure, (int)numDigits); bytes = TclGetString(pure); - toAppend = length = numDigits; + toAppend = length = (int)numDigits; while (numDigits--) { int digitOffset; - if (useBig && !mp_iszero(&big)) { + if (useBig && big.used) { if (index < big.used && (size_t) shift < - CHAR_BIT*sizeof(Tcl_WideUInt) - MP_DIGIT_BIT) { - bits |= ((Tcl_WideUInt) big.dp[index++]) << shift; - shift += MP_DIGIT_BIT; + CHAR_BIT*sizeof(Tcl_WideUInt) - DIGIT_BIT) { + bits |= (((Tcl_WideUInt)big.dp[index++]) <<shift); + shift += DIGIT_BIT; } shift -= numBits; } - digitOffset = bits % base; + digitOffset = (int) (bits % base); if (digitOffset > 9) { - if (ch == 'X') { - bytes[numDigits] = 'A' + digitOffset - 10; - } else { - bytes[numDigits] = 'a' + digitOffset - 10; - } + bytes[numDigits] = 'a' + digitOffset - 10; } else { bytes[numDigits] = '0' + digitOffset; } @@ -2791,7 +2303,7 @@ Tcl_AppendFormatToObj( } if (gotPrecision) { if (length < precision) { - segmentLimit -= precision - length; + segmentLimit -= (precision - length); } while (length < precision) { Tcl_AppendToObj(segment, "0", 1); @@ -2800,9 +2312,9 @@ Tcl_AppendFormatToObj( gotZero = 0; } if (gotZero) { - length += TclGetCharLength(segment); + length += Tcl_GetCharLength(segment); if (length < width) { - segmentLimit -= width - length; + segmentLimit -= (width - length); } while (length < width) { Tcl_AppendToObj(segment, "0", 1); @@ -2811,7 +2323,6 @@ Tcl_AppendFormatToObj( } if (toAppend > segmentLimit) { msg = overflow; - errCode = "OVERFLOW"; goto errorMsg; } Tcl_AppendObjToObj(segment, pure); @@ -2823,8 +2334,6 @@ Tcl_AppendFormatToObj( break; } - case 'a': - case 'A': case 'e': case 'E': case 'f': @@ -2857,17 +2366,16 @@ Tcl_AppendFormatToObj( *p++ = '+'; } if (width) { - p += snprintf(p, TCL_INTEGER_SPACE, "%" TCL_LL_MODIFIER "d", width); + p += sprintf(p, "%d", width); if (width > length) { length = width; } } if (gotPrecision) { *p++ = '.'; - p += snprintf(p, TCL_INTEGER_SPACE, "%" TCL_LL_MODIFIER "d", precision); - if (precision > TCL_SIZE_MAX - length) { - msg = overflow; - errCode = "OVERFLOW"; + p += sprintf(p, "%d", precision); + if (precision > INT_MAX - length) { + msg=overflow; goto errorMsg; } length += precision; @@ -2880,56 +2388,56 @@ Tcl_AppendFormatToObj( *p++ = (char) ch; *p = '\0'; - TclNewObj(segment); + segment = Tcl_NewObj(); allocSegment = 1; if (!Tcl_AttemptSetObjLength(segment, length)) { msg = overflow; - errCode = "OVERFLOW"; goto errorMsg; } bytes = TclGetString(segment); - if (!Tcl_AttemptSetObjLength(segment, snprintf(bytes, segment->length, spec, d))) { + if (!Tcl_AttemptSetObjLength(segment, sprintf(bytes, spec, d))) { msg = overflow; - errCode = "OVERFLOW"; goto errorMsg; } - if (ch == 'A') { - char *q = TclGetString(segment) + 1; - *q = 'x'; - q = strchr(q, 'P'); - if (q) *q = 'p'; - } break; } default: if (interp != NULL) { Tcl_SetObjResult(interp, Tcl_ObjPrintf("bad field specifier \"%c\"", ch)); - Tcl_SetErrorCode(interp, "TCL", "FORMAT", "BADTYPE", (void *)NULL); } goto error; } - if (width>0 && numChars<0) { - numChars = TclGetCharLength(segment); + switch (ch) { + case 'E': + case 'G': + case 'X': { + Tcl_SetObjLength(segment, Tcl_UtfToUpper(TclGetString(segment))); } - if (!gotMinus && width>0) { - if (numChars < width) { - limit -= width - numChars; + } + + if (width > 0) { + if (numChars < 0) { + numChars = Tcl_GetCharLength(segment); } - while (numChars < width) { - Tcl_AppendToObj(appendObj, (gotZero ? "0" : " "), 1); - numChars++; + if (!gotMinus) { + if (numChars < width) { + limit -= (width - numChars); + } + while (numChars < width) { + Tcl_AppendToObj(appendObj, (gotZero ? "0" : " "), 1); + numChars++; + } } } - TclGetStringFromObj(segment, &segmentNumBytes); + Tcl_GetStringFromObj(segment, &segmentNumBytes); if (segmentNumBytes > limit) { if (allocSegment) { Tcl_DecrRefCount(segment); } msg = overflow; - errCode = "OVERFLOW"; goto errorMsg; } Tcl_AppendObjToObj(appendObj, segment); @@ -2939,7 +2447,7 @@ Tcl_AppendFormatToObj( } if (width > 0) { if (numChars < width) { - limit -= width-numChars; + limit -= (width - numChars); } while (numChars < width) { Tcl_AppendToObj(appendObj, (gotZero ? "0" : " "), 1); @@ -2952,7 +2460,6 @@ Tcl_AppendFormatToObj( if (numBytes) { if (numBytes > limit) { msg = overflow; - errCode = "OVERFLOW"; goto errorMsg; } Tcl_AppendToObj(appendObj, span, numBytes); @@ -2965,7 +2472,6 @@ Tcl_AppendFormatToObj( errorMsg: if (interp != NULL) { Tcl_SetObjResult(interp, Tcl_NewStringObj(msg, -1)); - Tcl_SetErrorCode(interp, "TCL", "FORMAT", errCode, (void *)NULL); } error: Tcl_SetObjLength(appendObj, originalLength); @@ -2975,13 +2481,13 @@ Tcl_AppendFormatToObj( /* *--------------------------------------------------------------------------- * - * Tcl_Format -- + * Tcl_Format-- * * Results: * A refcount zero Tcl_Obj. * * Side effects: - * None. + * None. * *--------------------------------------------------------------------------- */ @@ -2990,13 +2496,11 @@ Tcl_Obj * Tcl_Format( Tcl_Interp *interp, const char *format, - Tcl_Size objc, + int objc, Tcl_Obj *const objv[]) { int result; - Tcl_Obj *objPtr; - - TclNewObj(objPtr); + Tcl_Obj *objPtr = Tcl_NewObj(); result = Tcl_AppendFormatToObj(interp, objPtr, format, objc, objv); if (result != TCL_OK) { Tcl_DecrRefCount(objPtr); @@ -3017,44 +2521,17 @@ Tcl_Format( *--------------------------------------------------------------------------- */ -static Tcl_Obj * -NewLongObj( - char c, - long value) -{ - if ((value < 0) && strchr("puoxX", c)) { - Tcl_Obj *obj; - TclNewUIntObj(obj, (unsigned long)value); - return obj; - } - return Tcl_NewWideIntObj((long)value); -} - -static Tcl_Obj * -NewWideIntObj( - char c, - Tcl_WideInt value) -{ - if ((value < 0) && strchr("puoxX", c)) { - Tcl_Obj *obj; - TclNewUIntObj(obj, (Tcl_WideUInt)value); - return obj; - } - return Tcl_NewWideIntObj(value); -} - static void AppendPrintfToObjVA( Tcl_Obj *objPtr, const char *format, va_list argList) { - int code; - Tcl_Size objc; - Tcl_Obj **objv, *list; + int code, objc; + Tcl_Obj **objv, *list = Tcl_NewObj(); const char *p; + char *end; - TclNewObj(list); p = format; Tcl_IncrRefCount(list); while (*p != '\0') { @@ -3070,6 +2547,7 @@ AppendPrintfToObjVA( } do { switch (*p) { + case '\0': seekingConversion = 0; break; @@ -3095,26 +2573,21 @@ AppendPrintfToObjVA( */ q = Tcl_UtfPrev(end, bytes); - if (!Tcl_UtfCharComplete(q, end - q)) { + if (!Tcl_UtfCharComplete(q, (int)(end - q))) { end = q; } - q = bytes + 4; + q = bytes + TCL_UTF_MAX; while ((bytes < end) && (bytes < q) && ((*bytes & 0xC0) == 0x80)) { bytes++; } Tcl_ListObjAppendElement(NULL, list, - Tcl_NewStringObj(bytes , end - bytes)); + Tcl_NewStringObj(bytes , (int)(end - bytes))); break; } - case 'p': - if (sizeof(size_t) == sizeof(Tcl_WideInt)) { - size = 2; - } - /* FALLTHRU */ case 'c': case 'i': case 'u': @@ -3126,90 +2599,45 @@ AppendPrintfToObjVA( switch (size) { case -1: case 0: - Tcl_ListObjAppendElement(NULL, list, Tcl_NewIntObj( - va_arg(argList, int))); + Tcl_ListObjAppendElement(NULL, list, Tcl_NewLongObj( + (long int)va_arg(argList, int))); break; case 1: - Tcl_ListObjAppendElement(NULL, list, NewLongObj(*p, - va_arg(argList, long))); - break; - case 2: - Tcl_ListObjAppendElement(NULL, list, NewWideIntObj(*p, - va_arg(argList, Tcl_WideInt))); - break; - case 3: - Tcl_ListObjAppendElement(NULL, list, Tcl_NewBignumObj( - va_arg(argList, mp_int *))); + Tcl_ListObjAppendElement(NULL, list, Tcl_NewLongObj( + va_arg(argList, long int))); break; } break; - case 'a': - case 'A': case 'e': case 'E': case 'f': case 'g': case 'G': - if (size > 0) { Tcl_ListObjAppendElement(NULL, list, Tcl_NewDoubleObj( - (double)va_arg(argList, long double))); - } else { - Tcl_ListObjAppendElement(NULL, list, Tcl_NewDoubleObj( - va_arg(argList, double))); - } + va_arg(argList, double))); seekingConversion = 0; break; case '*': - lastNum = va_arg(argList, int); - Tcl_ListObjAppendElement(NULL, list, Tcl_NewWideIntObj(lastNum)); + lastNum = (int)va_arg(argList, int); + Tcl_ListObjAppendElement(NULL, list, Tcl_NewIntObj(lastNum)); p++; break; case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': { - char *end; - - lastNum = strtoul(p, &end, 10); + case '5': case '6': case '7': case '8': case '9': + lastNum = (int) strtoul(p, &end, 10); p = end; break; - } case '.': gotPrecision = 1; p++; break; + /* TODO: support for wide (and bignum?) arguments */ case 'l': - ++size; - p++; - break; - case 't': - case 'z': - if (sizeof(size_t) == sizeof(Tcl_WideInt)) { - size = 2; - } - p++; - break; - case 'j': - case 'q': - size = 2; - p++; - break; - case 'I': - if (p[1]=='6' && p[2]=='4') { - p += 2; - size = 2; - } else if (p[1]=='3' && p[2]=='2') { - p += 2; - } else if (sizeof(size_t) == sizeof(Tcl_WideInt)) { - size = 2; - } - p++; - break; - case 'L': - size = 3; + size = 1; p++; break; case 'h': size = -1; - /* FALLTHRU */ default: p++; } @@ -3234,7 +2662,7 @@ AppendPrintfToObjVA( * A standard Tcl result. * * Side effects: - * None. + * None. * *--------------------------------------------------------------------------- */ @@ -3261,7 +2689,7 @@ Tcl_AppendPrintfToObj( * A refcount zero Tcl_Obj. * * Side effects: - * None. + * None. * *--------------------------------------------------------------------------- */ @@ -3272,9 +2700,8 @@ Tcl_ObjPrintf( ...) { va_list argList; - Tcl_Obj *objPtr; + Tcl_Obj *objPtr = Tcl_NewObj(); - TclNewObj(objPtr); va_start(argList, format); AppendPrintfToObjVA(objPtr, format, argList); va_end(argList); @@ -3303,969 +2730,27 @@ TclGetStringStorage( Tcl_Obj *objPtr, unsigned int *sizePtr) { - UniCharString *stringPtr; + String *stringPtr; - if (!TclHasInternalRep(objPtr, &tclUniCharStringType) || objPtr->bytes == NULL) { - return TclGetStringFromObj(objPtr, (Tcl_Size *)sizePtr); + if (objPtr->typePtr != &tclStringType || objPtr->bytes == NULL) { + return TclGetStringFromObj(objPtr, (int *)sizePtr); } - stringPtr = GET_UNICHAR_STRING(objPtr); + stringPtr = GET_STRING(objPtr); *sizePtr = stringPtr->allocated; return objPtr->bytes; } - -/* - *--------------------------------------------------------------------------- - * - * TclStringRepeat -- - * - * Performs the [string repeat] function. - * - * Results: - * A (Tcl_Obj *) pointing to the result value, or NULL in case of an - * error. - * - * Side effects: - * On error, when interp is not NULL, error information is left in it. - * - *--------------------------------------------------------------------------- - */ - -Tcl_Obj * -TclStringRepeat( - Tcl_Interp *interp, - Tcl_Obj *objPtr, - Tcl_Size count, - int flags) -{ - Tcl_Obj *objResultPtr; - int inPlace = flags & TCL_STRING_IN_PLACE; - Tcl_Size length = 0; - int unichar = 0; - Tcl_Size done = 1; - int binary = TclIsPureByteArray(objPtr); - - /* - * Analyze to determine what representation result should be. - * GOALS: Avoid shimmering & string rep generation. - * Produce pure bytearray when possible. - * Error on overflow. - */ - - if (!binary) { - if (TclHasInternalRep(objPtr, &tclUniCharStringType)) { - UniCharString *stringPtr = GET_UNICHAR_STRING(objPtr); - if (stringPtr->hasUnicode) { - unichar = 1; - } - } - } - - if (binary) { - /* Result will be pure byte array. Pre-size it */ - Tcl_GetByteArrayFromObj(objPtr, &length); - } else if (unichar) { - /* Result will be pure Tcl_UniChar array. Pre-size it. */ - TclGetUnicodeFromObj(objPtr, &length); - } else { - /* Result will be concat of string reps. Pre-size it. */ - TclGetStringFromObj(objPtr, &length); - } - - if (length == 0) { - /* Any repeats of empty is empty. */ - return objPtr; - } - - if (count > INT_MAX/length) { - if (interp) { - Tcl_SetObjResult(interp, Tcl_ObjPrintf( - "max size for a Tcl value (%" TCL_SIZE_MODIFIER - "d bytes) exceeded", TCL_SIZE_MAX)); - Tcl_SetErrorCode(interp, "TCL", "MEMORY", (void *)NULL); - } - return NULL; - } - - if (binary) { - /* Efficiently produce a pure byte array result */ - objResultPtr = (!inPlace || Tcl_IsShared(objPtr)) ? - Tcl_DuplicateObj(objPtr) : objPtr; - - Tcl_SetByteArrayLength(objResultPtr, count*length); /* PANIC? */ - Tcl_SetByteArrayLength(objResultPtr, length); - while (count - done > done) { - Tcl_AppendObjToObj(objResultPtr, objResultPtr); - done *= 2; - } - TclAppendBytesToByteArray(objResultPtr, - Tcl_GetByteArrayFromObj(objResultPtr, (Tcl_Size *) NULL), - (count - done) * length); - } else if (unichar) { - /* - * Efficiently produce a pure Tcl_UniChar array result. - */ - - if (!inPlace || Tcl_IsShared(objPtr)) { - objResultPtr = TclNewUnicodeObj(TclGetUnicodeFromObj(objPtr, NULL), length); - } else { - TclInvalidateStringRep(objPtr); - objResultPtr = objPtr; - } - - if (0 == Tcl_AttemptSetObjLength(objResultPtr, count*length)) { - if (interp) { - Tcl_SetObjResult(interp, Tcl_ObjPrintf( - "string size overflow: unable to alloc %" - TCL_Z_MODIFIER "u bytes", - UNICHAR_STRING_SIZE(count*length))); - Tcl_SetErrorCode(interp, "TCL", "MEMORY", (void *)NULL); - } - return NULL; - } - Tcl_SetObjLength(objResultPtr, length); - while (count - done > done) { - Tcl_AppendObjToObj(objResultPtr, objResultPtr); - done *= 2; - } - TclAppendUnicodeToObj(objResultPtr, TclGetUnicodeFromObj(objResultPtr, NULL), - (count - done) * length); - } else { - /* - * Efficiently concatenate string reps. - */ - - if (!inPlace || Tcl_IsShared(objPtr)) { - objResultPtr = Tcl_NewStringObj(Tcl_GetString(objPtr), length); - } else { - TclFreeInternalRep(objPtr); - objResultPtr = objPtr; - } - if (0 == Tcl_AttemptSetObjLength(objResultPtr, count*length)) { - if (interp) { - Tcl_SetObjResult(interp, Tcl_ObjPrintf( - "string size overflow: unable to alloc %" TCL_SIZE_MODIFIER "d bytes", - count*length)); - Tcl_SetErrorCode(interp, "TCL", "MEMORY", (void *)NULL); - } - return NULL; - } - Tcl_SetObjLength(objResultPtr, length); - while (count - done > done) { - Tcl_AppendObjToObj(objResultPtr, objResultPtr); - done *= 2; - } - Tcl_AppendToObj(objResultPtr, Tcl_GetString(objResultPtr), - (count - done) * length); - } - return objResultPtr; -} - -/* - *--------------------------------------------------------------------------- - * - * TclStringCat -- - * - * Performs the [string cat] function. - * - * Results: - * A (Tcl_Obj *) pointing to the result value, or NULL in case of an - * error. - * - * Side effects: - * On error, when interp is not NULL, error information is left in it. - * - *--------------------------------------------------------------------------- - */ - -Tcl_Obj * -TclStringCat( - Tcl_Interp *interp, - Tcl_Size objc, - Tcl_Obj * const objv[], - int flags) -{ - Tcl_Obj *objResultPtr, * const *ov; - int binary = 1; - Tcl_Size oc, length = 0; - int allowUniChar = 1, requestUniChar = 0, forceUniChar = 0; - Tcl_Size first = objc - 1; /* Index of first value possibly not empty */ - Tcl_Size last = 0; /* Index of last value possibly not empty */ - int inPlace = flags & TCL_STRING_IN_PLACE; - - if (objc <= 1) { - if (objc != 1) { - /* Negative (shouldn't be) no objects; return empty */ - Tcl_Obj *obj; - TclNewObj(obj); - return obj; - } - /* One object; return first */ - return objv[0]; - } - - /* - * Analyze to determine what representation result should be. - * GOALS: Avoid shimmering & string rep generation. - * Produce pure bytearray when possible. - * Error on overflow. - */ - - ov = objv, oc = objc; - do { - Tcl_Obj *objPtr = *ov++; - - if (TclIsPureByteArray(objPtr)) { - allowUniChar = 0; - } else if (objPtr->bytes) { - /* Value has a string rep. */ - if (objPtr->length) { - /* - * Non-empty string rep. Not a pure bytearray, so we won't - * create a pure bytearray. - */ - - binary = 0; - if (ov > objv+1 && ISCONTINUATION(TclGetString(objPtr))) { - forceUniChar = 1; - } else if ((objPtr->typePtr) && (objPtr->typePtr != &tclUniCharStringType)) { - /* Prevent shimmer of non-string types. */ - allowUniChar = 0; - } - } - } else { - binary = 0; - if (TclHasInternalRep(objPtr, &tclUniCharStringType)) { - /* Have a pure Unicode value; ask to preserve it */ - requestUniChar = 1; - } else { - /* Have another type; prevent shimmer */ - allowUniChar = 0; - } - } - } while (--oc && (binary || allowUniChar)); - - if (binary) { - /* - * Result will be pure byte array. Pre-size it - */ - - Tcl_Size numBytes; - ov = objv; - oc = objc; - do { - Tcl_Obj *objPtr = *ov++; - - /* - * Every argument is either a bytearray with a ("pure") - * value we know we can safely use, or it is an empty string. - * We don't need to count bytes for the empty strings. - */ - - if (TclIsPureByteArray(objPtr)) { - Tcl_GetByteArrayFromObj(objPtr, &numBytes); /* PANIC? */ - - if (numBytes) { - last = objc - oc; - if (length == 0) { - first = last; - } - if (length > (TCL_SIZE_MAX-numBytes)) { - goto overflow; - } - length += numBytes; - } - } - } while (--oc); - } else if ((allowUniChar && requestUniChar) || forceUniChar) { - /* - * Result will be pure Tcl_UniChar array. Pre-size it. - */ - - ov = objv; - oc = objc; - do { - Tcl_Obj *objPtr = *ov++; - - if ((objPtr->bytes == NULL) || (objPtr->length)) { - Tcl_Size numChars; - - TclGetUnicodeFromObj(objPtr, &numChars); /* PANIC? */ - if (numChars) { - last = objc - oc; - if (length == 0) { - first = last; - } else if (length > TCL_SIZE_MAX - numChars) { - goto overflow; - } - length += numChars; - } - } - } while (--oc); - } else { - /* Result will be concat of string reps. Pre-size it. */ - ov = objv; oc = objc; - do { - Tcl_Obj *pendingPtr = NULL; - - /* - * Loop until a possibly non-empty value is reached. - * Keep string rep generation pending when possible. - */ - - do { - Tcl_Obj *objPtr = *ov++; - - if (objPtr->bytes == NULL) { - /* No string rep; Take the chance we can avoid making it */ - pendingPtr = objPtr; - } else { - TclGetStringFromObj(objPtr, &length); /* PANIC? */ - } - } while (--oc && (length == 0) && (pendingPtr == NULL)); - - /* - * Either we found a possibly non-empty value, and we remember - * this index as the first and last such value so far seen, - * or (oc == 0) and all values are known empty, - * so first = last = objc - 1 signals the right quick return. - */ - - first = last = objc - oc - 1; - - if (oc && (length == 0)) { - Tcl_Size numBytes; - - /* - * There's a pending value followed by more values. Loop over - * remaining values generating strings until a non-empty value - * is found, or the pending value gets its string generated. - */ - - do { - Tcl_Obj *objPtr = *ov++; - TclGetStringFromObj(objPtr, &numBytes); /* PANIC? */ - } while (--oc && numBytes == 0 && pendingPtr->bytes == NULL); - - if (numBytes) { - last = objc -oc -1; - } - if (oc || numBytes) { - TclGetStringFromObj(pendingPtr, &length); - } - if (length == 0) { - if (numBytes) { - first = last; - } - } else if (numBytes > TCL_SIZE_MAX - length) { - goto overflow; - } - length += numBytes; - } - } while (oc && (length == 0)); - - while (oc) { - Tcl_Size numBytes; - Tcl_Obj *objPtr = *ov++; - - TclGetStringFromObj(objPtr, &numBytes); /* PANIC? */ - if (numBytes) { - last = objc - oc; - if (numBytes > TCL_SIZE_MAX - length) { - goto overflow; - } - length += numBytes; - } - --oc; - } - } - - if (last <= first /*|| length == 0 */) { - /* Only one non-empty value or zero length; return first */ - /* NOTE: (length == 0) implies (last <= first) */ - return objv[first]; - } - - objv += first; objc = (last - first + 1); - - if (binary) { - /* Efficiently produce a pure byte array result */ - unsigned char *dst; - - /* - * Broken interface! Byte array value routines offer no way to handle - * failure to allocate enough space. Following stanza may panic. - */ - - if (inPlace && !Tcl_IsShared(*objv)) { - Tcl_Size start; - - objResultPtr = *objv++; objc--; - Tcl_GetByteArrayFromObj(objResultPtr, &start); - dst = Tcl_SetByteArrayLength(objResultPtr, length) + start; - } else { - objResultPtr = Tcl_NewByteArrayObj(NULL, length); - dst = Tcl_SetByteArrayLength(objResultPtr, length); - } - while (objc--) { - Tcl_Obj *objPtr = *objv++; - - /* - * Every argument is either a bytearray with a ("pure") - * value we know we can safely use, or it is an empty string. - * We don't need to copy bytes from the empty strings. - */ - - if (TclIsPureByteArray(objPtr)) { - Tcl_Size more; - unsigned char *src = Tcl_GetByteArrayFromObj(objPtr, &more); - memcpy(dst, src, more); - dst += more; - } - } - } else if ((allowUniChar && requestUniChar) || forceUniChar) { - /* Efficiently produce a pure Tcl_UniChar array result */ - Tcl_UniChar *dst; - - if (inPlace && !Tcl_IsShared(*objv)) { - Tcl_Size start; - - objResultPtr = *objv++; objc--; - - /* Ugly interface! Force resize of the unicode array. */ - TclGetUnicodeFromObj(objResultPtr, &start); - Tcl_InvalidateStringRep(objResultPtr); - if (0 == Tcl_AttemptSetObjLength(objResultPtr, length)) { - if (interp) { - Tcl_SetObjResult(interp, Tcl_ObjPrintf( - "concatenation failed: unable to alloc %" - TCL_Z_MODIFIER "u bytes", - UNICHAR_STRING_SIZE(length))); - Tcl_SetErrorCode(interp, "TCL", "MEMORY", (void *)NULL); - } - return NULL; - } - dst = TclGetUnicodeFromObj(objResultPtr, NULL) + start; - } else { - Tcl_UniChar ch = 0; - - /* Ugly interface! No scheme to init array size. */ - objResultPtr = TclNewUnicodeObj(&ch, 0); /* PANIC? */ - if (0 == Tcl_AttemptSetObjLength(objResultPtr, length)) { - Tcl_DecrRefCount(objResultPtr); - if (interp) { - Tcl_SetObjResult(interp, Tcl_ObjPrintf( - "concatenation failed: unable to alloc %" - TCL_Z_MODIFIER "u bytes", - UNICHAR_STRING_SIZE(length))); - Tcl_SetErrorCode(interp, "TCL", "MEMORY", (void *)NULL); - } - return NULL; - } - dst = TclGetUnicodeFromObj(objResultPtr, NULL); - } - while (objc--) { - Tcl_Obj *objPtr = *objv++; - - if ((objPtr->bytes == NULL) || (objPtr->length)) { - Tcl_Size more; - Tcl_UniChar *src = TclGetUnicodeFromObj(objPtr, &more); - memcpy(dst, src, more * sizeof(Tcl_UniChar)); - dst += more; - } - } - } else { - /* Efficiently concatenate string reps */ - char *dst; - - if (inPlace && !Tcl_IsShared(*objv)) { - Tcl_Size start; - - objResultPtr = *objv++; objc--; - - TclGetStringFromObj(objResultPtr, &start); - if (0 == Tcl_AttemptSetObjLength(objResultPtr, length)) { - if (interp) { - Tcl_SetObjResult(interp, Tcl_ObjPrintf( - "concatenation failed: unable to alloc %" TCL_SIZE_MODIFIER "d bytes", - length)); - Tcl_SetErrorCode(interp, "TCL", "MEMORY", (void *)NULL); - } - return NULL; - } - dst = Tcl_GetString(objResultPtr) + start; - - TclFreeInternalRep(objResultPtr); - } else { - TclNewObj(objResultPtr); /* PANIC? */ - if (0 == Tcl_AttemptSetObjLength(objResultPtr, length)) { - Tcl_DecrRefCount(objResultPtr); - if (interp) { - Tcl_SetObjResult(interp, Tcl_ObjPrintf( - "concatenation failed: unable to alloc %" TCL_SIZE_MODIFIER "d bytes", - length)); - Tcl_SetErrorCode(interp, "TCL", "MEMORY", (void *)NULL); - } - return NULL; - } - dst = Tcl_GetString(objResultPtr); - } - while (objc--) { - Tcl_Obj *objPtr = *objv++; - - if ((objPtr->bytes == NULL) || (objPtr->length)) { - Tcl_Size more; - char *src = TclGetStringFromObj(objPtr, &more); - - memcpy(dst, src, more); - dst += more; - } - } - /* Must NUL-terminate! */ - *dst = '\0'; - } - return objResultPtr; - - overflow: - if (interp) { - Tcl_SetObjResult(interp, Tcl_ObjPrintf( - "max size for a Tcl value (%" TCL_SIZE_MODIFIER "d bytes) exceeded", TCL_SIZE_MAX)); - Tcl_SetErrorCode(interp, "TCL", "MEMORY", (void *)NULL); - } - return NULL; -} - -/* - *--------------------------------------------------------------------------- - * - * TclStringCmp -- - * Compare two Tcl_Obj values as strings. - * - * Results: - * Like memcmp, return -1, 0, or 1. - * - * Side effects: - * String representations may be generated. Internal representation may - * be changed. - * - *--------------------------------------------------------------------------- - */ - -static int -UtfNmemcmp( - const void *csPtr, /* UTF string to compare to ct. */ - const void *ctPtr, /* UTF string cs is compared to. */ - size_t numBytes) /* Number of *bytes* to compare. */ -{ - const char *cs = (const char *)csPtr; - const char *ct = (const char *)ctPtr; - /* - * We can't simply call 'memcmp(cs, ct, numBytes);' because we need to - * check for Tcl's \xC0\x80 non-utf-8 null encoding. Otherwise utf-8 lexes - * fine in the strcmp manner. - */ - - int result = 0; - - for ( ; numBytes != 0; numBytes--, cs++, ct++) { - if (*cs != *ct) { - result = UCHAR(*cs) - UCHAR(*ct); - break; - } - } - if (numBytes && ((UCHAR(*cs) == 0xC0) || (UCHAR(*ct) == 0xC0))) { - unsigned char c1, c2; - - c1 = ((UCHAR(*cs) == 0xC0) && (UCHAR(cs[1]) == 0x80)) ? 0 : UCHAR(*cs); - c2 = ((UCHAR(*ct) == 0xC0) && (UCHAR(ct[1]) == 0x80)) ? 0 : UCHAR(*ct); - result = (c1 - c2); - } - return result; -} - -int -TclStringCmp( - Tcl_Obj *value1Ptr, - Tcl_Obj *value2Ptr, - int checkEq, /* comparison is only for equality */ - int nocase, /* comparison is not case sensitive */ - Tcl_Size reqlength) /* requested length in characters; - * negative to compare whole strings */ -{ - const char *s1, *s2; - int empty, match; - Tcl_Size length, s1len, s2len; - memCmpFn_t memCmpFn; - - if ((reqlength == 0) || (value1Ptr == value2Ptr)) { - /* - * Always match at 0 chars of if it is the same obj. - * Note: as documented reqlength negative means it is ignored - */ - match = 0; - } else { - if (!nocase && TclIsPureByteArray(value1Ptr) - && TclIsPureByteArray(value2Ptr)) { - /* - * Use binary versions of comparisons since that won't cause undue - * type conversions and it is much faster. Only do this if we're - * case-sensitive (which is all that really makes sense with byte - * arrays anyway, and we have no memcasecmp() for some reason... :^) - */ - - s1 = (char *) Tcl_GetByteArrayFromObj(value1Ptr, &s1len); - s2 = (char *) Tcl_GetByteArrayFromObj(value2Ptr, &s2len); - memCmpFn = memcmp; - } else if (TclHasInternalRep(value1Ptr, &tclUniCharStringType) - && TclHasInternalRep(value2Ptr, &tclUniCharStringType)) { - /* - * Do a Unicode-specific comparison if both of the args are of String - * type. If the char length == byte length, we can do a memcmp. In - * benchmark testing this proved the most efficient check between the - * Unicode and string comparison operations. - */ - - if (nocase) { - s1 = (char *) TclGetUnicodeFromObj(value1Ptr, &s1len); - s2 = (char *) TclGetUnicodeFromObj(value2Ptr, &s2len); - memCmpFn = TclUniCharNcasememcmp; - } else { - s1len = TclGetCharLength(value1Ptr); - s2len = TclGetCharLength(value2Ptr); - if ((s1len == value1Ptr->length) - && (value1Ptr->bytes != NULL) - && (s2len == value2Ptr->length) - && (value2Ptr->bytes != NULL)) { - /* each byte represents one character so s1l3n, s2l3n, and - * reqlength are in both bytes and characters - */ - s1 = value1Ptr->bytes; - s2 = value2Ptr->bytes; - memCmpFn = memcmp; - } else { - s1 = (char *) TclGetUnicodeFromObj(value1Ptr, NULL); - s2 = (char *) TclGetUnicodeFromObj(value2Ptr, NULL); - if ( -#if defined(WORDS_BIGENDIAN) - 1 -#else - checkEq -#endif - ) { - memCmpFn = memcmp; - s1len *= sizeof(Tcl_UniChar); - s2len *= sizeof(Tcl_UniChar); - if (reqlength > 0) { - reqlength *= sizeof(Tcl_UniChar); - } - } else { - memCmpFn = TclUniCharNmemcmp; - } - } - } - } else { - empty = TclCheckEmptyString(value1Ptr); - if (empty > 0) { - switch (TclCheckEmptyString(value2Ptr)) { - case -1: - s1 = 0; - s1len = 0; - s2 = TclGetStringFromObj(value2Ptr, &s2len); - break; - case 0: - match = -1; - goto matchdone; - case 1: - default: /* avoid warn: `s2` may be used uninitialized */ - match = 0; - goto matchdone; - } - } else if (TclCheckEmptyString(value2Ptr) > 0) { - switch (empty) { - case -1: - s2 = 0; - s2len = 0; - s1 = TclGetStringFromObj(value1Ptr, &s1len); - break; - case 0: - match = 1; - goto matchdone; - case 1: - default: /* avoid warn: `s1` may be used uninitialized */ - match = 0; - goto matchdone; - } - } else { - s1 = TclGetStringFromObj(value1Ptr, &s1len); - s2 = TclGetStringFromObj(value2Ptr, &s2len); - } - if (!nocase && checkEq && reqlength < 0) { - /* - * When we have equal-length we can check only for - * (in)equality. We can use memcmp in all (n)eq cases because - * we don't need to worry about lexical LE/BE variance. - */ - - memCmpFn = memcmp; - } else { - /* - * As a catch-all we will work with UTF-8. We cannot use - * memcmp() as that is unsafe with any string containing NUL - * (\xC0\x80 in Tcl's utf rep). We can use the more efficient - * TclpUtfNcmp2 if we are case-sensitive and no specific - * length was requested. - */ - - if ((reqlength < 0) && !nocase) { - memCmpFn = UtfNmemcmp; - } else { - s1len = TclNumUtfChars(s1, s1len); - s2len = TclNumUtfChars(s2, s2len); - memCmpFn = nocase ? TclUtfNcasememcmp : TclUtfNmemcmp; - } - } - } - - /* At this point s1len, s2len, and reqlength should by now have been - * adjusted so that they are all in the units expected by the selected - * comparison function. - */ - length = (s1len < s2len) ? s1len : s2len; - if (reqlength < 0) { - /* - * The requested length is negative, so ignore it by setting it - * to length + 1 to correct the match var. - */ - - reqlength = length + 1; - } else if (reqlength > 0 && reqlength < length) { - length = reqlength; - } - - if (checkEq && reqlength < 0 && (s1len != s2len)) { - match = 1; /* This will be reversed below. */ - } else { - /* - * The comparison function should compare up to the minimum byte - * length only. - */ - - match = memCmpFn(s1, s2, length); - } - if ((match == 0) && (reqlength > length)) { - match = s1len - s2len; - } - match = (match > 0) ? 1 : (match < 0) ? -1 : 0; - } - matchdone: - return match; -} - -/* - *--------------------------------------------------------------------------- - * - * TclStringFirst -- - * - * Implements the [string first] operation. - * - * Results: - * If needle is found as a substring of haystack, the index of the - * first instance of such a find is returned. If needle is not present - * as a substring of haystack, -1 is returned. - * - * Side effects: - * needle and haystack may have their Tcl_ObjType changed. - * - *--------------------------------------------------------------------------- - */ - -Tcl_Obj * -TclStringFirst( - Tcl_Obj *needle, - Tcl_Obj *haystack, - Tcl_Size start) -{ - Tcl_Size lh, ln = TclGetCharLength(needle); - Tcl_Size value = TCL_INDEX_NONE; - Tcl_UniChar *checkStr, *endStr, *uh, *un; - Tcl_Obj *obj; - - if (start < 0) { - start = 0; - } - if (ln == 0) { - /* We don't find empty substrings. Bizarre! - * Whenever this routine is turned into a proper substring - * finder, change to `return start` after limits imposed. */ - goto firstEnd; - } - - if (TclIsPureByteArray(needle) && TclIsPureByteArray(haystack)) { - unsigned char *end, *check, *bh; - unsigned char *bn = Tcl_GetByteArrayFromObj(needle, &ln); - - /* Find bytes in bytes */ - bh = Tcl_GetByteArrayFromObj(haystack, &lh); - if ((lh < ln) || (start > lh - ln)) { - /* Don't start the loop if there cannot be a valid answer */ - goto firstEnd; - } - end = bh + lh; - - check = bh + start; - while (check + ln <= end) { - /* - * Look for the leading byte of the needle in the haystack - * starting at check and stopping when there's not enough room - * for the needle left. - */ - check = (unsigned char *)memchr(check, bn[0], (end + 1 - ln) - check); - if (check == NULL) { - /* Leading byte not found -> needle cannot be found. */ - goto firstEnd; - } - /* Leading byte found, check rest of needle. */ - if (0 == memcmp(check+1, bn+1, ln-1)) { - /* Checks! Return the successful index. */ - value = (check - bh); - goto firstEnd; - } - /* Rest of needle match failed; Iterate to continue search. */ - check++; - } - goto firstEnd; - } - - /* - * TODO: It might be nice to support some cases where it is not - * necessary to shimmer to &tclStringType to compute the result, - * and instead operate just on the objPtr->bytes values directly. - * However, we also do not want the answer to change based on the - * code pathway, or if it does we want that to be for some values - * we explicitly decline to support. Getting there will involve - * locking down in practice more firmly just what encodings produce - * what supported results for the objPtr->bytes values. For now, - * do only the well-defined Tcl_UniChar array search. - */ - - un = TclGetUnicodeFromObj(needle, &ln); - uh = TclGetUnicodeFromObj(haystack, &lh); - if ((lh < ln) || (start > lh - ln)) { - /* Don't start the loop if there cannot be a valid answer */ - goto firstEnd; - } - endStr = uh + lh; - - for (checkStr = uh + start; checkStr + ln <= endStr; checkStr++) { - if ((*checkStr == *un) && (0 == - memcmp(checkStr + 1, un + 1, (ln-1) * sizeof(Tcl_UniChar)))) { - value = (checkStr - uh); - goto firstEnd; - } - } - firstEnd: - TclNewIndexObj(obj, value); - return obj; -} - /* *--------------------------------------------------------------------------- * - * TclStringLast -- - * - * Implements the [string last] operation. - * - * Results: - * If needle is found as a substring of haystack, the index of the - * last instance of such a find is returned. If needle is not present - * as a substring of haystack, -1 is returned. - * - * Side effects: - * needle and haystack may have their Tcl_ObjType changed. - * - *--------------------------------------------------------------------------- - */ - -Tcl_Obj * -TclStringLast( - Tcl_Obj *needle, - Tcl_Obj *haystack, - Tcl_Size last) -{ - Tcl_Size lh, ln = TclGetCharLength(needle); - Tcl_Size value = TCL_INDEX_NONE; - Tcl_UniChar *checkStr, *uh, *un; - Tcl_Obj *obj; - - if (ln == 0) { - /* - * We don't find empty substrings. Bizarre! - * - * TODO: When we one day make this a true substring - * finder, change this to "return last", after limitation. - */ - goto lastEnd; - } - - if (TclIsPureByteArray(needle) && TclIsPureByteArray(haystack)) { - unsigned char *check, *bh = Tcl_GetByteArrayFromObj(haystack, &lh); - unsigned char *bn = Tcl_GetByteArrayFromObj(needle, &ln); - - if (last >= lh) { - last = lh - 1; - } - if (last + 1 < ln) { - /* Don't start the loop if there cannot be a valid answer */ - goto lastEnd; - } - check = bh + last + 1 - ln; - - while (check >= bh) { - if ((*check == bn[0]) - && (0 == memcmp(check+1, bn+1, ln-1))) { - value = (check - bh); - goto lastEnd; - } - check--; - } - goto lastEnd; - } - - uh = TclGetUnicodeFromObj(haystack, &lh); - un = TclGetUnicodeFromObj(needle, &ln); - - if (last >= lh) { - last = lh - 1; - } - if (last + 1 < ln) { - /* Don't start the loop if there cannot be a valid answer */ - goto lastEnd; - } - checkStr = uh + last + 1 - ln; - while (checkStr >= uh) { - if ((*checkStr == un[0]) - && (0 == memcmp(checkStr+1, un+1, (ln-1)*sizeof(Tcl_UniChar)))) { - value = (checkStr - uh); - goto lastEnd; - } - checkStr--; - } - lastEnd: - TclNewIndexObj(obj, value); - return obj; -} - -/* - *--------------------------------------------------------------------------- - * - * TclStringReverse -- + * TclStringObjReverse -- * * Implements the [string reverse] operation. * * Results: - * A Tcl value which is the [string reverse] of the argument supplied. - * When sharing rules permit and the caller requests, the returned value - * might be the argument with modifications done in place. + * An unshared Tcl value which is the [string reverse] of the argument + * supplied. When sharing rules permit, the returned value might be + * the argument with modifications done in place. * * Side effects: * May allocate a new Tcl_Obj. @@ -4273,276 +2758,69 @@ TclStringLast( *--------------------------------------------------------------------------- */ -static void -ReverseBytes( - unsigned char *to, /* Copy bytes into here... */ - unsigned char *from, /* ...from here... */ - Tcl_Size count) /* Until this many are copied, */ - /* reversing as you go. */ -{ - unsigned char *src = from + count; - - if (to == from) { - /* Reversing in place */ - while (--src > to) { - unsigned char c = *src; - - *src = *to; - *to++ = c; - } - } else { - while (--src >= from) { - *to++ = *src; - } - } -} - Tcl_Obj * -TclStringReverse( - Tcl_Obj *objPtr, - int flags) +TclStringObjReverse( + Tcl_Obj *objPtr) { - UniCharString *stringPtr; - Tcl_UniChar ch = 0; - int inPlace = flags & TCL_STRING_IN_PLACE; - - if (TclIsPureByteArray(objPtr)) { - Tcl_Size numBytes; - unsigned char *from = Tcl_GetByteArrayFromObj(objPtr, &numBytes); + String *stringPtr; + int numChars = Tcl_GetCharLength(objPtr); + int i = 0, lastCharIdx = numChars - 1; + char *bytes; - if (!inPlace || Tcl_IsShared(objPtr)) { - objPtr = Tcl_NewByteArrayObj(NULL, numBytes); - } - ReverseBytes(Tcl_GetByteArrayFromObj(objPtr, (Tcl_Size *)NULL), from, numBytes); + if (numChars <= 1) { return objPtr; } - SetStringFromAny(NULL, objPtr); - stringPtr = GET_UNICHAR_STRING(objPtr); - + stringPtr = GET_STRING(objPtr); if (stringPtr->hasUnicode) { - Tcl_UniChar *from = TclGetUnicodeFromObj(objPtr, NULL); - stringPtr = GET_UNICHAR_STRING(objPtr); - Tcl_UniChar *src = from + stringPtr->numChars; - Tcl_UniChar *to; + Tcl_UniChar *source = stringPtr->unicode; - if (!inPlace || Tcl_IsShared(objPtr)) { - /* - * Create a non-empty, pure Unicode value, so we can coax - * Tcl_SetObjLength into growing the Unicode rep buffer. - */ + if (Tcl_IsShared(objPtr)) { + Tcl_UniChar *dest, ch = 0; - objPtr = TclNewUnicodeObj(&ch, 1); - Tcl_SetObjLength(objPtr, stringPtr->numChars); - to = TclGetUnicodeFromObj(objPtr, NULL); - stringPtr = GET_UNICHAR_STRING(objPtr); - while (--src >= from) { - *to++ = *src; - } - } else { /* - * Reversing in place. + * Create a non-empty, pure unicode value, so we can coax + * Tcl_SetObjLength into growing the unicode rep buffer. */ - while (--src > from) { - ch = *src; - *src = *from; - *from++ = ch; - } - } - } - - if (objPtr->bytes) { - Tcl_Size numChars = stringPtr->numChars; - Tcl_Size numBytes = objPtr->length; - char *to, *from = objPtr->bytes; + Tcl_Obj *resultPtr = Tcl_NewUnicodeObj(&ch, 1); + Tcl_SetObjLength(resultPtr, numChars); + dest = Tcl_GetUnicode(resultPtr); - if (!inPlace || Tcl_IsShared(objPtr)) { - TclNewObj(objPtr); - Tcl_SetObjLength(objPtr, numBytes); - } - to = objPtr->bytes; - - if (numChars < numBytes) { - /* - * Either numChars == -1 and we don't know how many chars are - * represented by objPtr->bytes and we need Pass 1 just in case, - * or numChars >= 0 and we know we have fewer chars than bytes, so - * we know there's a multibyte character needing Pass 1. - * - * Pass 1. Reverse the bytes of each multi-byte character. - */ - - Tcl_Size bytesLeft = numBytes; - int chw; - - while (bytesLeft) { - /* - * NOTE: We know that the from buffer is NUL-terminated. It's - * part of the contract for objPtr->bytes values. Thus, we can - * skip calling Tcl_UtfCharComplete() here. - */ - - int bytesInChar = TclUtfToUniChar(from, &chw); - - ReverseBytes((unsigned char *)to, (unsigned char *)from, - bytesInChar); - to += bytesInChar; - from += bytesInChar; - bytesLeft -= bytesInChar; + while (i < numChars) { + dest[i++] = source[lastCharIdx--]; } - - from = to = objPtr->bytes; + return resultPtr; } - /* Pass 2. Reverse all the bytes. */ - ReverseBytes((unsigned char *)to, (unsigned char *)from, numBytes); - } - return objPtr; -} - -/* - *--------------------------------------------------------------------------- - * - * TclStringReplace -- - * - * Implements the inner engine of the [string replace] and - * [string insert] commands. - * - * The result is a concatenation of a prefix from objPtr, characters - * 0 through first-1, the insertPtr string value, and a suffix from - * objPtr, characters from first + count to the end. The effect is as if - * the inner substring of characters first through first+count-1 are - * removed and replaced with insertPtr. If insertPtr is NULL, it is - * treated as an empty string. When passed the flag TCL_STRING_IN_PLACE, - * this routine will try to do the work within objPtr, so long as no - * sharing forbids it. Without that request, or as needed, a new Tcl - * value will be allocated to be the result. - * - * Results: - * A Tcl value that is the result of the substring replacement. May - * return NULL in case of an error. When NULL is returned and interp is - * non-NULL, error information is left in interp - * - *--------------------------------------------------------------------------- - */ - -Tcl_Obj * -TclStringReplace( - Tcl_Interp *interp, /* For error reporting, may be NULL */ - Tcl_Obj *objPtr, /* String to act upon */ - Tcl_Size first, /* First index to replace */ - Tcl_Size count, /* How many chars to replace */ - Tcl_Obj *insertPtr, /* Replacement string, may be NULL */ - int flags) /* TCL_STRING_IN_PLACE => attempt in-place */ -{ - int inPlace = flags & TCL_STRING_IN_PLACE; - Tcl_Obj *result; - - /* Replace nothing with nothing */ - if ((insertPtr == NULL) && (count <= 0)) { - if (inPlace) { - return objPtr; - } else { - return Tcl_DuplicateObj(objPtr); + while (i < lastCharIdx) { + Tcl_UniChar tmp = source[lastCharIdx]; + source[lastCharIdx--] = source[i]; + source[i++] = tmp; } - } - if (first < 0) { - first = 0; + TclInvalidateStringRep(objPtr); + stringPtr->allocated = 0; + return objPtr; } - /* - * The caller very likely had to call Tcl_GetCharLength() or similar - * to be able to process index values. This means it is likely that - * objPtr is either a proper "bytearray" or a "string" or else it has - * a known and short string rep. - */ - - if (TclIsPureByteArray(objPtr)) { - Tcl_Size numBytes; - unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &numBytes); - - if (insertPtr == NULL) { - /* Replace something with nothing. */ - - assert ( first <= numBytes ) ; - assert ( count <= numBytes ) ; - assert ( first + count <= numBytes ) ; - - result = Tcl_NewByteArrayObj(NULL, numBytes - count);/* PANIC? */ - TclAppendBytesToByteArray(result, bytes, first); - TclAppendBytesToByteArray(result, bytes + first + count, - numBytes - count - first); - return result; - } - - /* Replace everything */ - if ((first == 0) && (count == numBytes)) { - return insertPtr; - } - - if (TclIsPureByteArray(insertPtr)) { - Tcl_Size newBytes; - unsigned char *iBytes - = Tcl_GetByteArrayFromObj(insertPtr, &newBytes); - - if (count == newBytes && inPlace && !Tcl_IsShared(objPtr)) { - /* - * Removal count and replacement count are equal. - * Other conditions permit. Do in-place splice. - */ - - memcpy(bytes + first, iBytes, count); - Tcl_InvalidateStringRep(objPtr); - return objPtr; - } - - if (newBytes > (TCL_SIZE_MAX - (numBytes - count))) { - if (interp) { - Tcl_SetObjResult(interp, Tcl_ObjPrintf( - "max size for a Tcl value (%" TCL_SIZE_MODIFIER "d bytes) exceeded", - TCL_SIZE_MAX)); - Tcl_SetErrorCode(interp, "TCL", "MEMORY", (void *)NULL); - } - return NULL; - } - result = Tcl_NewByteArrayObj(NULL, numBytes - count + newBytes); - /* PANIC? */ - Tcl_SetByteArrayLength(result, 0); - TclAppendBytesToByteArray(result, bytes, first); - TclAppendBytesToByteArray(result, iBytes, newBytes); - TclAppendBytesToByteArray(result, bytes + first + count, - numBytes - count - first); - return result; + bytes = TclGetString(objPtr); + if (Tcl_IsShared(objPtr)) { + char *dest; + Tcl_Obj *resultPtr = Tcl_NewObj(); + Tcl_SetObjLength(resultPtr, numChars); + dest = TclGetString(resultPtr); + while (i < numChars) { + dest[i++] = bytes[lastCharIdx--]; } - - /* Flow through to try other approaches below */ + return resultPtr; } - /* - * TODO: Figure out how not to generate a Tcl_UniChar array rep - * when it can be determined objPtr->bytes points to a string of - * all single-byte characters so we can index it directly. - */ - - /* The traditional implementation... */ - { - Tcl_Size numChars; - Tcl_UniChar *ustring = TclGetUnicodeFromObj(objPtr, &numChars); - - /* TODO: Is there an in-place option worth pursuing here? */ - - result = TclNewUnicodeObj(ustring, first); - if (insertPtr) { - Tcl_AppendObjToObj(result, insertPtr); - } - if ((first + count) < numChars) { - TclAppendUnicodeToObj(result, ustring + first + count, - numChars - first - count); - } - - return result; + while (i < lastCharIdx) { + char tmp = bytes[lastCharIdx]; + bytes[lastCharIdx--] = bytes[i]; + bytes[i++] = tmp; } + return objPtr; } /* @@ -4551,7 +2829,7 @@ TclStringReplace( * FillUnicodeRep -- * * Populate the Unicode internal rep with the Unicode form of its string - * rep. The object must already have a "String" internal rep. + * rep. The object must alread have a "String" internal rep. * * Results: * None. @@ -4567,59 +2845,35 @@ FillUnicodeRep( Tcl_Obj *objPtr) /* The object in which to fill the unicode * rep. */ { - UniCharString *stringPtr = GET_UNICHAR_STRING(objPtr); - - ExtendUnicodeRepWithString(objPtr, objPtr->bytes, objPtr->length, - stringPtr->numChars); -} - -static void -ExtendUnicodeRepWithString( - Tcl_Obj *objPtr, - const char *bytes, - Tcl_Size numBytes, - Tcl_Size numAppendChars) -{ - UniCharString *stringPtr = GET_UNICHAR_STRING(objPtr); - Tcl_Size needed, numOrigChars = 0; - Tcl_UniChar *dst, unichar = 0; + String *stringPtr; + size_t uallocated; + char *srcEnd, *src = objPtr->bytes; + Tcl_UniChar *dst; - if (stringPtr->hasUnicode) { - numOrigChars = stringPtr->numChars; - } - if (numAppendChars < 0) { - TclNumUtfCharsM(numAppendChars, bytes, numBytes); + stringPtr = GET_STRING(objPtr); + if (stringPtr->numChars == -1) { + stringPtr->numChars = Tcl_NumUtfChars(src, objPtr->length); } - needed = numOrigChars + numAppendChars; - uniCharStringCheckLimits(needed); + stringPtr->hasUnicode = (stringPtr->numChars > 0); - if (needed > stringPtr->maxChars) { - GrowUnicodeBuffer(objPtr, needed); - stringPtr = GET_UNICHAR_STRING(objPtr); + stringCheckLimits(stringPtr->numChars); + uallocated = STRING_UALLOC(stringPtr->numChars); + if (uallocated > stringPtr->uallocated) { + GrowUnicodeBuffer(objPtr, stringPtr->numChars); + stringPtr = GET_STRING(objPtr); } - stringPtr->hasUnicode = 1; - if (bytes) { - stringPtr->numChars = needed; - } else { - numAppendChars = 0; - } - dst = stringPtr->unicode + numOrigChars; - if (numAppendChars-- > 0) { - bytes += TclUtfToUniChar(bytes, &unichar); - /* join upper/lower surrogate */ - if (bytes && (stringPtr->unicode[numOrigChars - 1] | 0x3FF) == 0xDBFF && (unichar | 0x3FF) == 0xDFFF) { - stringPtr->numChars--; - unichar = ((stringPtr->unicode[numOrigChars - 1] & 0x3FF) << 10) + (unichar & 0x3FF) + 0x10000; - dst--; - } - *dst++ = unichar; - while (numAppendChars-- > 0) { - bytes += TclUtfToUniChar(bytes, &unichar); - *dst++ = unichar; - } + /* + * Convert src to Unicode and store the coverted data in "unicode". + */ + + srcEnd = src + objPtr->length; + for (dst = stringPtr->unicode; src < srcEnd; dst++) { + src += TclUtfToUniChar(src, dst); } *dst = 0; + + SET_STRING(objPtr, stringPtr); } /* @@ -4642,47 +2896,36 @@ ExtendUnicodeRepWithString( static void DupStringInternalRep( - Tcl_Obj *srcPtr, /* Object with internal rep to copy. Must have + register Tcl_Obj *srcPtr, /* Object with internal rep to copy. Must have * an internal rep of type "String". */ - Tcl_Obj *copyPtr) /* Object with internal rep to set. Must not + register Tcl_Obj *copyPtr) /* Object with internal rep to set. Must not * currently have an internal rep.*/ { - UniCharString *srcStringPtr = GET_UNICHAR_STRING(srcPtr); - UniCharString *copyStringPtr = NULL; + String *srcStringPtr = GET_STRING(srcPtr); + String *copyStringPtr = NULL; - if (srcStringPtr->numChars == TCL_INDEX_NONE) { - /* - * The String struct in the source value holds zero useful data. Don't - * bother copying it. Don't even bother allocating space in which to - * copy it. Just let the copy be untyped. - */ - return; - } + /* + * If the src obj is a string of 1-byte Utf chars, then copy the string + * rep of the source object and create an "empty" Unicode internal rep for + * the new object. Otherwise, copy Unicode internal rep, and invalidate + * the string rep of the new object. + */ - if (srcStringPtr->hasUnicode) { - int copyMaxChars; + if (srcStringPtr->hasUnicode == 0) { + copyStringPtr = (String *) ckalloc(sizeof(String)); + copyStringPtr->uallocated = 0; + } else { + copyStringPtr = (String *) ckalloc( + STRING_SIZE(srcStringPtr->uallocated)); + copyStringPtr->uallocated = srcStringPtr->uallocated; - if (srcStringPtr->maxChars / 2 >= srcStringPtr->numChars) { - copyMaxChars = 2 * srcStringPtr->numChars; - } else { - copyMaxChars = srcStringPtr->maxChars; - } - copyStringPtr = uniCharStringAttemptAlloc(copyMaxChars); - if (copyStringPtr == NULL) { - copyMaxChars = srcStringPtr->numChars; - copyStringPtr = uniCharStringAlloc(copyMaxChars); - } - copyStringPtr->maxChars = copyMaxChars; memcpy(copyStringPtr->unicode, srcStringPtr->unicode, - srcStringPtr->numChars * sizeof(Tcl_UniChar)); + (size_t) srcStringPtr->numChars * sizeof(Tcl_UniChar)); copyStringPtr->unicode[srcStringPtr->numChars] = 0; - } else { - copyStringPtr = uniCharStringAlloc(0); - copyStringPtr->maxChars = 0; - copyStringPtr->unicode[0] = 0; } - copyStringPtr->hasUnicode = srcStringPtr->hasUnicode; copyStringPtr->numChars = srcStringPtr->numChars; + copyStringPtr->hasUnicode = srcStringPtr->hasUnicode; + copyStringPtr->allocated = srcStringPtr->allocated; /* * Tricky point: the string value was copied by generic object management @@ -4690,10 +2933,10 @@ DupStringInternalRep( * source object. */ - copyStringPtr->allocated = copyPtr->bytes ? copyPtr->length : 0; + copyStringPtr->allocated = copyPtr->length; - SET_UNICHAR_STRING(copyPtr, copyStringPtr); - copyPtr->typePtr = &tclUniCharStringType; + SET_STRING(copyPtr, copyStringPtr); + copyPtr->typePtr = &tclStringType; } /* @@ -4707,38 +2950,52 @@ DupStringInternalRep( * This operation always succeeds and returns TCL_OK. * * Side effects: - * Any old internal representation for objPtr is freed and the internal - * representation is set to &tclStringType. + * Any old internal reputation for objPtr is freed and the internal + * representation is set to "String". * *---------------------------------------------------------------------- */ static int SetStringFromAny( - TCL_UNUSED(Tcl_Interp *), - Tcl_Obj *objPtr) /* The object to convert. */ + Tcl_Interp *interp, /* Used for error reporting if not NULL. */ + register Tcl_Obj *objPtr) /* The object to convert. */ { - if (!TclHasInternalRep(objPtr, &tclUniCharStringType)) { - UniCharString *stringPtr = uniCharStringAlloc(0); + /* + * The Unicode object is optimized for the case where each UTF char in a + * string is only one byte. In this case, we store the value of numChars, + * but we don't copy the bytes to the unicodeObj->unicode. + */ - /* - * Convert whatever we have into an untyped value. Just A String. - */ + if (objPtr->typePtr != &tclStringType) { + String *stringPtr; - (void) TclGetString(objPtr); - TclFreeInternalRep(objPtr); + if (objPtr->typePtr != NULL) { + if (objPtr->bytes == NULL) { + objPtr->typePtr->updateStringProc(objPtr); + } + TclFreeIntRep(objPtr); + } + objPtr->typePtr = &tclStringType; /* - * Create a basic String internalrep that just points to the UTF-8 string - * already in place at objPtr->bytes. + * Allocate enough space for the basic String structure. */ + stringPtr = (String *) ckalloc(sizeof(String)); stringPtr->numChars = -1; - stringPtr->allocated = objPtr->length; - stringPtr->maxChars = 0; + stringPtr->uallocated = 0; stringPtr->hasUnicode = 0; - SET_UNICHAR_STRING(objPtr, stringPtr); - objPtr->typePtr = &tclUniCharStringType; + + if (objPtr->bytes != NULL) { + stringPtr->allocated = objPtr->length; + if (objPtr->bytes != tclEmptyStringRep) { + objPtr->bytes[objPtr->length] = 0; + } + } else { + objPtr->length = 0; + } + SET_STRING(objPtr, stringPtr); } return TCL_OK; } @@ -4755,7 +3012,7 @@ SetStringFromAny( * None. * * Side effects: - * The object's string may be set by converting its Unicode representation + * The object's string may be set by converting its Unicode represention * to UTF format. * *---------------------------------------------------------------------- @@ -4765,91 +3022,57 @@ static void UpdateStringOfString( Tcl_Obj *objPtr) /* Object with string rep to update. */ { - UniCharString *stringPtr = GET_UNICHAR_STRING(objPtr); - - /* - * This routine is only called when we need to generate the - * string rep objPtr->bytes because it does not exist -- it is NULL. - * In that circumstance, any lingering claim about the size of - * memory pointed to by that NULL pointer is clearly bogus, and - * needs a reset. - */ - - stringPtr->allocated = 0; - - if (stringPtr->numChars == 0) { - TclInitEmptyStringRep(objPtr); - } else { - (void) ExtendStringRepWithUnicode(objPtr, stringPtr->unicode, - stringPtr->numChars); - } -} - -static Tcl_Size -ExtendStringRepWithUnicode( - Tcl_Obj *objPtr, - const Tcl_UniChar *unicode, - Tcl_Size numChars) -{ - /* - * Precondition: this is the "string" Tcl_ObjType. - */ - - Tcl_Size i, origLength, size = 0; + int i, size; + Tcl_UniChar *unicode; + char dummy[TCL_UTF_MAX]; char *dst; - UniCharString *stringPtr = GET_UNICHAR_STRING(objPtr); - - if (numChars < 0) { - numChars = UnicodeLength(unicode); - } + String *stringPtr; - if (numChars == 0) { - return 0; - } + stringPtr = GET_STRING(objPtr); + if ((objPtr->bytes == NULL) || (stringPtr->allocated == 0)) { + if (stringPtr->numChars <= 0) { + /* + * If there is no Unicode rep, or the string has 0 chars, then set + * the string rep to an empty string. + */ - if (objPtr->bytes == NULL) { - objPtr->length = 0; - } - size = origLength = objPtr->length; + objPtr->bytes = tclEmptyStringRep; + objPtr->length = 0; + return; + } - /* - * Quick cheap check in case we have more than enough room. - */ + unicode = stringPtr->unicode; - if (numChars <= (TCL_SIZE_MAX - size)/TCL_UTF_MAX - && stringPtr->allocated >= size + numChars * TCL_UTF_MAX) { - goto copyBytes; - } + /* + * Translate the Unicode string to UTF. "size" will hold the amount of + * space the UTF string needs. + */ - for (i = 0; i < numChars && size >= 0; i++) { - size += TclUtfCount(unicode[i]); - } - if (size < 0) { - Tcl_Panic("max size for a Tcl value (%" TCL_SIZE_MODIFIER "d bytes) exceeded", TCL_SIZE_MAX); - } + if (stringPtr->numChars <= INT_MAX/TCL_UTF_MAX + && stringPtr->allocated >= stringPtr->numChars * (size_t)TCL_UTF_MAX) { + goto copyBytes; + } - /* - * Grow space if needed. - */ + size = 0; + for (i = 0; i < stringPtr->numChars && size >= 0; i++) { + size += Tcl_UniCharToUtf((int) unicode[i], dummy); + } + if (size < 0) { + Tcl_Panic("max size for a Tcl value (%d bytes) exceeded", INT_MAX); + } - if (size > stringPtr->allocated) { - GrowStringBuffer(objPtr, size, 1); - } + objPtr->bytes = (char *) ckalloc((unsigned) (size + 1)); + objPtr->length = size; + stringPtr->allocated = size; - copyBytes: - dst = objPtr->bytes + origLength; - for (i = 0; i < numChars; i++) { - if (LOW_SURROGATE(unicode[i]) && ((i == 0) || !HIGH_SURROGATE(unicode[i-1]))) { - *dst = 0; /* In case of lower surrogate, don't try to combine */ - } - dst += Tcl_UniCharToUtf(unicode[i], dst); - if (HIGH_SURROGATE(unicode[i]) && ((i+1 >= numChars) || !LOW_SURROGATE(unicode[i+1]))) { - dst += Tcl_UniCharToUtf(-1, dst); + copyBytes: + dst = objPtr->bytes; + for (i = 0; i < stringPtr->numChars; i++) { + dst += Tcl_UniCharToUtf(unicode[i], dst); } + *dst = '\0'; } - *dst = '\0'; - objPtr->length = dst - objPtr->bytes; - return numChars; + return; } /* @@ -4857,7 +3080,7 @@ ExtendStringRepWithUnicode( * * FreeStringInternalRep -- * - * Deallocate the storage associated with a (UniChar)String data object's internal + * Deallocate the storage associated with a String data object's internal * representation. * * Results: @@ -4873,7 +3096,7 @@ static void FreeStringInternalRep( Tcl_Obj *objPtr) /* Object with internal rep to free. */ { - ckfree(GET_STRING(objPtr)); + ckfree((char *) GET_STRING(objPtr)); objPtr->typePtr = NULL; } |
