diff options
Diffstat (limited to 'generic/tclStringObj.c')
-rw-r--r-- | generic/tclStringObj.c | 971 |
1 files changed, 727 insertions, 244 deletions
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 0195656..c103bea 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -38,6 +38,7 @@ #include "tommath.h" #include "tclStringRep.h" +#include "assert.h" /* * Prototypes for functions defined later in this file: */ @@ -140,8 +141,8 @@ GrowStringBuffer( objPtr->bytes = NULL; } if (flag == 0 || stringPtr->allocated > 0) { - attempt = 2 * needed; - if (attempt >= 0) { + if (needed <= INT_MAX / 2) { + attempt = 2 * needed; ptr = attemptckrealloc(objPtr->bytes, attempt + 1); } if (ptr == NULL) { @@ -190,8 +191,8 @@ GrowUnicodeBuffer( * Subsequent appends - apply the growth algorithm. */ - attempt = 2 * needed; - if (attempt >= 0 && attempt <= STRING_MAXCHARS) { + if (needed <= STRING_MAXCHARS / 2) { + attempt = 2 * needed; ptr = stringAttemptRealloc(stringPtr, attempt); } if (ptr == NULL) { @@ -418,9 +419,14 @@ Tcl_GetCharLength( } /* - * Optimize the case where we're really dealing with a bytearray object - * without string representation; we don't need to convert to a string to - * perform the get-length operation. + * Optimize the case where we're really dealing with a bytearray object; + * we don't need to convert to a string to perform the get-length operation. + * + * Starting in Tcl 8.7, we check for a "pure" bytearray, because the + * machinery behind that test is using a proper bytearray ObjType. We + * could also compute length of an improper bytearray without shimmering + * but there's no value in that. We *want* to shimmer an improper bytearray + * because improper bytearrays have worthless internal reps. */ if (TclIsPureByteArray(objPtr)) { @@ -452,10 +458,53 @@ Tcl_GetCharLength( /* *---------------------------------------------------------------------- * + * TclCheckEmptyString -- + * + * Determine whether the string value of an object is or would be the + * empty string, without generating a string representation. + * + * Results: + * Returns 1 if empty, 0 if not, and -1 if unknown. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ +int +TclCheckEmptyString( + Tcl_Obj *objPtr) +{ + int length = -1; + + if (objPtr->bytes == &tclEmptyString) { + return TCL_EMPTYSTRING_YES; + } + + if (TclListObjIsCanonical(objPtr)) { + Tcl_ListObjLength(NULL, objPtr, &length); + return length == 0; + } + + if (TclIsPureDict(objPtr)) { + Tcl_DictObjSize(NULL, objPtr, &length); + return length == 0; + } + + if (objPtr->bytes == NULL) { + return TCL_EMPTYSTRING_UNKNOWN; + } + return objPtr->length == 0; +} + +/* + *---------------------------------------------------------------------- + * * Tcl_GetUniChar -- * - * Get the index'th Unicode character from the String object. The index - * is assumed to be in the appropriate range. + * Get the index'th Unicode character from the String object. If index + * is out of range or it references a low surrogate preceded by a high + * surrogate, the result = -1; * * Results: * Returns the index'th Unicode character in the Object. @@ -466,24 +515,31 @@ Tcl_GetCharLength( *---------------------------------------------------------------------- */ -Tcl_UniChar +int Tcl_GetUniChar( Tcl_Obj *objPtr, /* The object to get the Unicode charater * from. */ int index) /* Get the index'th Unicode character. */ { String *stringPtr; + int ch, length; + + if (index < 0) { + return -1; + } /* * Optimize the case where we're really dealing with a bytearray object - * without string representation; we don't need to convert to a string to - * perform the indexing operation. + * we don't need to convert to a string to perform the indexing operation. */ if (TclIsPureByteArray(objPtr)) { - unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, NULL); + unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &length); + if (index >= length) { + return -1; + } - return (Tcl_UniChar) bytes[index]; + return (int) bytes[index]; } /* @@ -507,7 +563,28 @@ Tcl_GetUniChar( FillUnicodeRep(objPtr); stringPtr = GET_STRING(objPtr); } - return stringPtr->unicode[index]; + + if (index >= stringPtr->numChars) { + return -1; + } + ch = stringPtr->unicode[index]; +#if TCL_UTF_MAX <= 4 + /* See: bug [11ae2be95dac9417] */ + if ((ch & 0xF800) == 0xD800) { + if (ch & 0x400) { + if ((index > 0) + && ((stringPtr->unicode[index-1] & 0xFC00) == 0xD800)) { + ch = -1; /* low surrogate preceded by high surrogate */ + } + } else if ((++index < stringPtr->numChars) + && ((stringPtr->unicode[index] & 0xFC00) == 0xDC00)) { + /* high surrogate followed by low surrogate */ + ch = (((ch & 0x3FF) << 10) | + (stringPtr->unicode[index] & 0x3FF)) + 0x10000; + } + } +#endif + return ch; } /* @@ -517,7 +594,7 @@ Tcl_GetUniChar( * * Get the Unicode form of the String object. If the object is not * already a String object, it will be converted to one. If the String - * object does not have a Unicode rep, then one is create from the UTF + * object does not have a Unicode rep, then one is created from the UTF * string format. * * Results: @@ -529,6 +606,8 @@ Tcl_GetUniChar( *---------------------------------------------------------------------- */ +#ifndef TCL_NO_DEPRECATED +#undef Tcl_GetUnicode Tcl_UniChar * Tcl_GetUnicode( Tcl_Obj *objPtr) /* The object to find the unicode string @@ -536,6 +615,7 @@ Tcl_GetUnicode( { return Tcl_GetUnicodeFromObj(objPtr, NULL); } +#endif /* TCL_NO_DEPRECATED */ /* *---------------------------------------------------------------------- @@ -631,17 +711,27 @@ Tcl_GetRange( { Tcl_Obj *newObjPtr; /* The Tcl object to find the range of. */ String *stringPtr; + int length; + + if (first < 0) { + first = 0; + } /* * Optimize the case where we're really dealing with a bytearray object - * without string representation; we don't need to convert to a string to - * perform the substring operation. + * we don't need to convert to a string to perform the substring operation. */ if (TclIsPureByteArray(objPtr)) { - unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, NULL); + unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &length); - return Tcl_NewByteArrayObj(bytes+first, last-first+1); + if (last >= length) { + last = length - 1; + } + if (last < first) { + return Tcl_NewObj(); + } + return Tcl_NewByteArrayObj(bytes + first, last - first + 1); } /* @@ -660,6 +750,12 @@ Tcl_GetRange( TclNumUtfChars(stringPtr->numChars, objPtr->bytes, objPtr->length); } if (stringPtr->numChars == objPtr->length) { + if (last >= stringPtr->numChars) { + last = stringPtr->numChars - 1; + } + if (last < first) { + return Tcl_NewObj(); + } newObjPtr = Tcl_NewStringObj(objPtr->bytes + first, last-first+1); /* @@ -674,8 +770,25 @@ Tcl_GetRange( FillUnicodeRep(objPtr); stringPtr = GET_STRING(objPtr); } - - return Tcl_NewUnicodeObj(stringPtr->unicode + first, last-first+1); + if (last > stringPtr->numChars) { + last = stringPtr->numChars; + } + if (last < first) { + return Tcl_NewObj(); + } +#if TCL_UTF_MAX <= 4 + /* See: bug [11ae2be95dac9417] */ + if ((first > 0) && ((stringPtr->unicode[first] & 0xFC00) == 0xDC00) + && ((stringPtr->unicode[first-1] & 0xFC00) == 0xD800)) { + ++first; + } + if ((last + 1 < stringPtr->numChars) + && ((stringPtr->unicode[last+1] & 0xFC00) == 0xDC00) + && ((stringPtr->unicode[last] & 0xFC00) == 0xD800)) { + ++last; + } +#endif + return Tcl_NewUnicodeObj(stringPtr->unicode + first, last - first + 1); } /* @@ -1232,46 +1345,52 @@ Tcl_AppendObjToObj( /* * Handle append of one bytearray object to another as a special case. - * Note that we only do this when the objects don't have string reps; if - * it did, then appending the byte arrays together could well lose - * information; this is a special-case optimization only. + * Note that we only do this when the objects are pure so that the + * bytearray faithfully represent the true value; Otherwise appending the + * byte arrays together could lose information; */ if ((TclIsPureByteArray(objPtr) || objPtr->bytes == &tclEmptyString) && TclIsPureByteArray(appendObjPtr)) { - /* * You might expect the code here to be * * bytes = Tcl_GetByteArrayFromObj(appendObjPtr, &length); * TclAppendBytesToByteArray(objPtr, bytes, length); * - * and essentially all of the time that would be fine. However, - * it would run into trouble in the case where objPtr and - * appendObjPtr point to the same thing. That may never be a - * good idea. It seems to violate Copy On Write, and we don't - * have any tests for the situation, since making any Tcl commands - * that call Tcl_AppendObjToObj() do that appears impossible - * (They honor Copy On Write!). For the sake of extensions that - * go off into that realm, though, here's a more complex approach - * that can handle all the cases. + * and essentially all of the time that would be fine. However, it + * would run into trouble in the case where objPtr and appendObjPtr + * point to the same thing. That may never be a good idea. It seems to + * violate Copy On Write, and we don't have any tests for the + * situation, since making any Tcl commands that call + * Tcl_AppendObjToObj() do that appears impossible (They honor Copy On + * Write!). For the sake of extensions that go off into that realm, + * though, here's a more complex approach that can handle all the + * cases. + * + * First, get the lengths. */ - /* Get lengths */ int lengthSrc; (void) Tcl_GetByteArrayFromObj(objPtr, &length); (void) Tcl_GetByteArrayFromObj(appendObjPtr, &lengthSrc); - /* Grow buffer enough for the append */ + /* + * Grow buffer enough for the append. + */ + TclAppendBytesToByteArray(objPtr, NULL, lengthSrc); - /* Reset objPtr back to the original value */ + /* + * Reset objPtr back to the original value. + */ + Tcl_SetByteArrayLength(objPtr, length); /* - * Now do the append knowing that buffer growth cannot cause - * any trouble. + * Now do the append knowing that buffer growth cannot cause any + * trouble. */ TclAppendBytesToByteArray(objPtr, @@ -1319,6 +1438,7 @@ Tcl_AppendObjToObj( numChars = stringPtr->numChars; if ((numChars >= 0) && (appendObjPtr->typePtr == &tclStringType)) { String *appendStringPtr = GET_STRING(appendObjPtr); + appendNumChars = appendStringPtr->numChars; } @@ -1808,6 +1928,11 @@ Tcl_AppendFormatToObj( width = 0; if (isdigit(UCHAR(ch))) { width = strtoul(format, &end, 10); + if (width < 0) { + msg = overflow; + errCode = "OVERFLOW"; + goto errorMsg; + } format = end; step = TclUtfToUniChar(format, &ch); } else if (ch == '*') { @@ -1893,31 +2018,22 @@ Tcl_AppendFormatToObj( } else if (ch == 'I') { if ((format[1] == '6') && (format[2] == '4')) { format += (step + 2); - step = Tcl_UtfToUniChar(format, &ch); + step = TclUtfToUniChar(format, &ch); #ifndef TCL_WIDE_INT_IS_LONG useWide = 1; #endif } else if ((format[1] == '3') && (format[2] == '2')) { format += (step + 2); - step = Tcl_UtfToUniChar(format, &ch); + step = TclUtfToUniChar(format, &ch); } else { format += step; - step = Tcl_UtfToUniChar(format, &ch); - } - } else if ((ch == 't') || (ch == 'z')) { - format += step; - step = Tcl_UtfToUniChar(format, &ch); -#ifndef TCL_WIDE_INT_IS_LONG - if (sizeof(size_t) > sizeof(int)) { - useWide = 1; + step = TclUtfToUniChar(format, &ch); } -#endif - } else if ((ch == 'q') ||(ch == 'j')) { + } else if ((ch == 't') || (ch == 'z') || (ch == 'q') || (ch == 'j') + || (ch == 'L')) { format += step; - step = Tcl_UtfToUniChar(format, &ch); -#ifndef TCL_WIDE_INT_IS_LONG - useWide = 1; -#endif + step = TclUtfToUniChar(format, &ch); + useBig = 1; } format += step; @@ -1949,13 +2065,17 @@ Tcl_AppendFormatToObj( } break; case 'c': { - char buf[TCL_UTF_MAX]; + char buf[4]; int code, length; if (TclGetIntFromObj(interp, segment, &code) != TCL_OK) { goto error; } length = Tcl_UniCharToUtf(code, buf); + if (!length) { + /* Special case for handling upper surrogates. */ + length = Tcl_UniCharToUtf(-1, buf); + } segment = Tcl_NewStringObj(buf, length); Tcl_IncrRefCount(segment); allocSegment = 1; @@ -1963,11 +2083,6 @@ Tcl_AppendFormatToObj( } case 'u': - if (useBig) { - msg = "unsigned bignum format is invalid"; - errCode = "BADUNSIGNED"; - goto errorMsg; - } case 'd': case 'o': case 'p': @@ -1987,13 +2102,26 @@ Tcl_AppendFormatToObj( } #endif if (useBig) { + int cmpResult; if (Tcl_GetBignumFromObj(interp, segment, &big) != TCL_OK) { goto error; } - isNegative = (mp_cmp_d(&big, 0) == MP_LT); + cmpResult = mp_cmp_d(&big, 0); + isNegative = (cmpResult == MP_LT); + if (cmpResult == MP_EQ) gotHash = 0; + if (ch == 'u') { + if (isNegative) { + mp_clear(&big); + msg = "unsigned bignum format is invalid"; + errCode = "BADUNSIGNED"; + goto errorMsg; + } else { + ch = 'd'; + } + } #ifndef TCL_WIDE_INT_IS_LONG } else if (useWide) { - if (Tcl_GetWideIntFromObj(NULL, segment, &w) != TCL_OK) { + if (TclGetWideIntFromObj(NULL, segment, &w) != TCL_OK) { Tcl_Obj *objPtr; if (Tcl_GetBignumFromObj(interp,segment,&big) != TCL_OK) { @@ -2002,13 +2130,14 @@ Tcl_AppendFormatToObj( mp_mod_2d(&big, (int) CHAR_BIT*sizeof(Tcl_WideInt), &big); objPtr = Tcl_NewBignumObj(&big); Tcl_IncrRefCount(objPtr); - Tcl_GetWideIntFromObj(NULL, objPtr, &w); + TclGetWideIntFromObj(NULL, objPtr, &w); Tcl_DecrRefCount(objPtr); } isNegative = (w < (Tcl_WideInt) 0); + if (w == (Tcl_WideInt) 0) gotHash = 0; #endif } else if (TclGetLongFromObj(NULL, segment, &l) != TCL_OK) { - if (Tcl_GetWideIntFromObj(NULL, segment, &w) != TCL_OK) { + if (TclGetWideIntFromObj(NULL, segment, &w) != TCL_OK) { Tcl_Obj *objPtr; if (Tcl_GetBignumFromObj(interp,segment,&big) != TCL_OK) { @@ -2025,14 +2154,18 @@ Tcl_AppendFormatToObj( if (useShort) { s = (short) l; isNegative = (s < (short) 0); + if (s == (short) 0) gotHash = 0; } else { isNegative = (l < (long) 0); + if (l == (long) 0) gotHash = 0; } } else if (useShort) { s = (short) l; isNegative = (s < (short) 0); + if (s == (short) 0) gotHash = 0; } else { isNegative = (l < (long) 0); + if (l == (long) 0) gotHash = 0; } segment = Tcl_NewObj(); @@ -2049,16 +2182,12 @@ Tcl_AppendFormatToObj( if (gotHash || (ch == 'p')) { switch (ch) { case 'o': - Tcl_AppendToObj(segment, "0", 1); - segmentLimit -= 1; - precision--; - break; - case 'X': - Tcl_AppendToObj(segment, "0X", 2); + Tcl_AppendToObj(segment, "0o", 2); segmentLimit -= 2; break; case 'p': case 'x': + case 'X': Tcl_AppendToObj(segment, "0x", 2); segmentLimit -= 2; break; @@ -2066,10 +2195,14 @@ Tcl_AppendFormatToObj( Tcl_AppendToObj(segment, "0b", 2); segmentLimit -= 2; break; +#if TCL_MAJOR_VERSION < 9 case 'd': - Tcl_AppendToObj(segment, "0d", 2); - segmentLimit -= 2; + if (gotZero) { + Tcl_AppendToObj(segment, "0d", 2); + segmentLimit -= 2; + } break; +#endif } } @@ -2207,7 +2340,7 @@ Tcl_AppendFormatToObj( * Need to be sure zero becomes "0", not "". */ - if ((numDigits == 0) && !((ch == 'o') && gotHash)) { + if (numDigits == 0) { numDigits = 1; } pure = Tcl_NewObj(); @@ -2274,6 +2407,8 @@ Tcl_AppendFormatToObj( break; } + case 'a': + case 'A': case 'e': case 'E': case 'f': @@ -2342,6 +2477,12 @@ Tcl_AppendFormatToObj( errCode = "OVERFLOW"; goto errorMsg; } + if (ch == 'A') { + char *p = TclGetString(segment) + 1; + *p = 'x'; + p = strchr(p, 'P'); + if (p) *p = 'p'; + } break; } default: @@ -2418,7 +2559,7 @@ Tcl_AppendFormatToObj( /* *--------------------------------------------------------------------------- * - * Tcl_Format-- + * Tcl_Format -- * * Results: * A refcount zero Tcl_Obj. @@ -2547,15 +2688,26 @@ AppendPrintfToObjVA( Tcl_ListObjAppendElement(NULL, list, Tcl_NewWideIntObj( va_arg(argList, Tcl_WideInt))); break; + case 3: + Tcl_ListObjAppendElement(NULL, list, Tcl_NewBignumObj( + va_arg(argList, mp_int *))); + break; } break; + case 'a': + case 'A': case 'e': case 'E': case 'f': case 'g': case 'G': + if (size > 0) { Tcl_ListObjAppendElement(NULL, list, Tcl_NewDoubleObj( - va_arg(argList, double))); + (double)va_arg(argList, long double))); + } else { + Tcl_ListObjAppendElement(NULL, list, Tcl_NewDoubleObj( + va_arg(argList, double))); + } seekingConversion = 0; break; case '*': @@ -2575,7 +2727,6 @@ AppendPrintfToObjVA( gotPrecision = 1; p++; break; - /* TODO: support for bignum arguments */ case 'l': ++size; p++; @@ -2603,6 +2754,10 @@ AppendPrintfToObjVA( } p++; break; + case 'L': + size = 3; + p++; + break; case 'h': size = -1; default: @@ -2716,23 +2871,24 @@ TclGetStringStorage( * Performs the [string repeat] function. * * Results: - * A standard Tcl result. + * A (Tcl_Obj *) pointing to the result value, or NULL in case of an + * error. * * Side effects: - * Writes to *objPtrPtr the address of Tcl_Obj that is concatenation - * of count copies of the value in objPtr. + * On error, when interp is not NULL, error information is left in it. * *--------------------------------------------------------------------------- */ -int +Tcl_Obj * TclStringRepeat( Tcl_Interp *interp, Tcl_Obj *objPtr, int count, - Tcl_Obj **objPtrPtr) + int flags) { Tcl_Obj *objResultPtr; + int inPlace = flags & TCL_STRING_IN_PLACE; int length = 0, unichar = 0, done = 1; int binary = TclIsPureByteArray(objPtr); @@ -2767,8 +2923,7 @@ TclStringRepeat( if (length == 0) { /* Any repeats of empty is empty. */ - *objPtrPtr = objPtr; - return TCL_OK; + return objPtr; } if (count > INT_MAX/length) { @@ -2777,13 +2932,13 @@ TclStringRepeat( "max size for a Tcl value (%d bytes) exceeded", INT_MAX)); Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); } - return TCL_ERROR; + return NULL; } if (binary) { /* Efficiently produce a pure byte array result */ - objResultPtr = Tcl_IsShared(objPtr) ? Tcl_DuplicateObj(objPtr) - : objPtr; + objResultPtr = (!inPlace || Tcl_IsShared(objPtr)) ? + Tcl_DuplicateObj(objPtr) : objPtr; Tcl_SetByteArrayLength(objResultPtr, count*length); /* PANIC? */ Tcl_SetByteArrayLength(objResultPtr, length); @@ -2795,8 +2950,11 @@ TclStringRepeat( Tcl_GetByteArrayFromObj(objResultPtr, NULL), (count - done) * length); } else if (unichar) { - /* Efficiently produce a pure Tcl_UniChar array result */ - if (Tcl_IsShared(objPtr)) { + /* + * Efficiently produce a pure Tcl_UniChar array result. + */ + + if (!inPlace || Tcl_IsShared(objPtr)) { objResultPtr = Tcl_NewUnicodeObj(Tcl_GetUnicode(objPtr), length); } else { TclInvalidateStringRep(objPtr); @@ -2807,11 +2965,11 @@ TclStringRepeat( if (interp) { Tcl_SetObjResult(interp, Tcl_ObjPrintf( "string size overflow: unable to alloc %" - TCL_LL_MODIFIER "d bytes", - (Tcl_WideUInt)STRING_SIZE(count*length))); + TCL_Z_MODIFIER "u bytes", + STRING_SIZE(count*length))); Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); } - return TCL_ERROR; + return NULL; } Tcl_SetObjLength(objResultPtr, length); while (count - done > done) { @@ -2821,8 +2979,11 @@ TclStringRepeat( Tcl_AppendUnicodeToObj(objResultPtr, Tcl_GetUnicode(objResultPtr), (count - done) * length); } else { - /* Efficiently concatenate string reps */ - if (Tcl_IsShared(objPtr)) { + /* + * Efficiently concatenate string reps. + */ + + if (!inPlace || Tcl_IsShared(objPtr)) { objResultPtr = Tcl_NewStringObj(Tcl_GetString(objPtr), length); } else { TclFreeIntRep(objPtr); @@ -2835,7 +2996,7 @@ TclStringRepeat( count*length)); Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); } - return TCL_ERROR; + return NULL; } Tcl_SetObjLength(objResultPtr, length); while (count - done > done) { @@ -2845,47 +3006,45 @@ TclStringRepeat( Tcl_AppendToObj(objResultPtr, Tcl_GetString(objResultPtr), (count - done) * length); } - *objPtrPtr = objResultPtr; - return TCL_OK; + return objResultPtr; } /* *--------------------------------------------------------------------------- * - * TclStringCatObjv -- + * TclStringCat -- * * Performs the [string cat] function. * * Results: - * A standard Tcl result. + * A (Tcl_Obj *) pointing to the result value, or NULL in case of an + * error. * * Side effects: - * Writes to *objPtrPtr the address of Tcl_Obj that is concatenation - * of all objc values in objv. + * On error, when interp is not NULL, error information is left in it. * *--------------------------------------------------------------------------- */ -int -TclStringCatObjv( +Tcl_Obj * +TclStringCat( Tcl_Interp *interp, - int inPlace, int objc, Tcl_Obj * const objv[], - Tcl_Obj **objPtrPtr) + int flags) { Tcl_Obj *objResultPtr, * const *ov; int oc, length = 0, binary = 1; int allowUniChar = 1, requestUniChar = 0; int first = objc - 1; /* Index of first value possibly not empty */ int last = 0; /* Index of last value possibly not empty */ + int inPlace = flags & TCL_STRING_IN_PLACE; /* assert ( objc >= 0 ) */ if (objc <= 1) { /* Only one or no objects; return first or empty */ - *objPtrPtr = objc ? objv[0] : Tcl_NewObj(); - return TCL_OK; + return objc ? objv[0] : Tcl_NewObj(); } /* assert ( objc >= 2 ) */ @@ -2901,13 +3060,16 @@ TclStringCatObjv( do { Tcl_Obj *objPtr = *ov++; - if (objPtr->bytes) { + if (TclIsPureByteArray(objPtr)) { + allowUniChar = 0; + } else if (objPtr->bytes) { /* Value has a string rep. */ if (objPtr->length) { /* - * Non-empty string rep. Not a pure bytearray, so we - * won't create a pure bytearray + * Non-empty string rep. Not a pure bytearray, so we won't + * create a pure bytearray. */ + binary = 0; if ((objPtr->typePtr) && (objPtr->typePtr != &tclStringType)) { /* Prevent shimmer of non-string types. */ @@ -2916,31 +3078,37 @@ TclStringCatObjv( } } else { /* assert (objPtr->typePtr != NULL) -- stork! */ - if (TclIsPureByteArray(objPtr)) { - allowUniChar = 0; + binary = 0; + if (objPtr->typePtr == &tclStringType) { + /* Have a pure Unicode value; ask to preserve it */ + requestUniChar = 1; } else { - binary = 0; - if (objPtr->typePtr == &tclStringType) { - /* Have a pure Unicode value; ask to preserve it */ - requestUniChar = 1; - } else { - /* Have another type; prevent shimmer */ - allowUniChar = 0; - } + /* Have another type; prevent shimmer */ + allowUniChar = 0; } } } while (--oc && (binary || allowUniChar)); if (binary) { - /* Result will be pure byte array. Pre-size it */ - ov = objv; oc = objc; + /* + * Result will be pure byte array. Pre-size it + */ + + int numBytes; + ov = objv; + oc = objc; do { Tcl_Obj *objPtr = *ov++; - if (objPtr->bytes == NULL) { - int numBytes; + /* + * Every argument is either a bytearray with a ("pure") + * value we know we can safely use, or it is an empty string. + * We don't need to count bytes for the empty strings. + */ + if (TclIsPureByteArray(objPtr)) { Tcl_GetByteArrayFromObj(objPtr, &numBytes); /* PANIC? */ + if (numBytes) { last = objc - oc; if (length == 0) { @@ -2953,8 +3121,12 @@ TclStringCatObjv( } } while (--oc); } else if (allowUniChar && requestUniChar) { - /* Result will be pure Tcl_UniChar array. Pre-size it. */ - ov = objv; oc = objc; + /* + * Result will be pure Tcl_UniChar array. Pre-size it. + */ + + ov = objv; + oc = objc; do { Tcl_Obj *objPtr = *ov++; @@ -2999,9 +3171,9 @@ TclStringCatObjv( } while (--oc && (length == 0) && (pendingPtr == NULL)); /* - * Either we found a possibly non-empty value, and we - * remember this index as the first and last such value so - * far seen, or (oc == 0) and all values are known empty, + * Either we found a possibly non-empty value, and we remember + * this index as the first and last such value so far seen, + * or (oc == 0) and all values are known empty, * so first = last = objc - 1 signals the right quick return. */ @@ -3013,10 +3185,9 @@ TclStringCatObjv( /* assert ( pendingPtr != NULL ) */ /* - * There's a pending value followed by more values. - * Loop over remaining values generating strings until - * a non-empty value is found, or the pending value gets - * its string generated. + * There's a pending value followed by more values. Loop over + * remaining values generating strings until a non-empty value + * is found, or the pending value gets its string generated. */ do { @@ -3062,8 +3233,7 @@ TclStringCatObjv( if (last <= first /*|| length == 0 */) { /* Only one non-empty value or zero length; return first */ /* NOTE: (length == 0) implies (last <= first) */ - *objPtrPtr = objv[first]; - return TCL_OK; + return objv[first]; } objv += first; objc = (last - first + 1); @@ -3073,10 +3243,10 @@ TclStringCatObjv( unsigned char *dst; /* - * Broken interface! Byte array value routines offer no way - * to handle failure to allocate enough space. Following - * stanza may panic. + * Broken interface! Byte array value routines offer no way to handle + * failure to allocate enough space. Following stanza may panic. */ + if (inPlace && !Tcl_IsShared(*objv)) { int start; @@ -3090,7 +3260,13 @@ TclStringCatObjv( while (objc--) { Tcl_Obj *objPtr = *objv++; - if (objPtr->bytes == NULL) { + /* + * Every argument is either a bytearray with a ("pure") + * value we know we can safely use, or it is an empty string. + * We don't need to copy bytes from the empty strings. + */ + + if (TclIsPureByteArray(objPtr)) { int more; unsigned char *src = Tcl_GetByteArrayFromObj(objPtr, &more); memcpy(dst, src, (size_t) more); @@ -3113,11 +3289,11 @@ TclStringCatObjv( if (interp) { Tcl_SetObjResult(interp, Tcl_ObjPrintf( "concatenation failed: unable to alloc %" - TCL_LL_MODIFIER "d bytes", - (Tcl_WideUInt)STRING_SIZE(length))); + TCL_Z_MODIFIER "u bytes", + STRING_SIZE(length))); Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); } - return TCL_ERROR; + return NULL; } dst = Tcl_GetUnicode(objResultPtr) + start; } else { @@ -3130,11 +3306,11 @@ TclStringCatObjv( if (interp) { Tcl_SetObjResult(interp, Tcl_ObjPrintf( "concatenation failed: unable to alloc %" - TCL_LL_MODIFIER "d bytes", - (Tcl_WideUInt)STRING_SIZE(length))); + TCL_Z_MODIFIER "u bytes", + STRING_SIZE(length))); Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); } - return TCL_ERROR; + return NULL; } dst = Tcl_GetUnicode(objResultPtr); } @@ -3165,7 +3341,7 @@ TclStringCatObjv( length)); Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); } - return TCL_ERROR; + return NULL; } dst = Tcl_GetString(objResultPtr) + start; @@ -3181,7 +3357,7 @@ TclStringCatObjv( length)); Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); } - return TCL_ERROR; + return NULL; } dst = Tcl_GetString(objResultPtr); } @@ -3191,13 +3367,15 @@ TclStringCatObjv( if ((objPtr->bytes == NULL) || (objPtr->length)) { int more; char *src = Tcl_GetStringFromObj(objPtr, &more); + memcpy(dst, src, (size_t) more); dst += more; } } + /* Must NUL-terminate! */ + *dst = '\0'; } - *objPtrPtr = objResultPtr; - return TCL_OK; + return objResultPtr; overflow: if (interp) { @@ -3205,13 +3383,194 @@ TclStringCatObjv( "max size for a Tcl value (%d bytes) exceeded", INT_MAX)); Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); } - return TCL_ERROR; + return NULL; +} + +/* + *--------------------------------------------------------------------------- + * + * TclStringCmp -- + * Compare two Tcl_Obj values as strings. + * + * Results: + * Like memcmp, return -1, 0, or 1. + * + * Side effects: + * String representations may be generated. Internal representation may + * be changed. + * + *--------------------------------------------------------------------------- + */ + +int TclStringCmp( + Tcl_Obj *value1Ptr, + Tcl_Obj *value2Ptr, + int checkEq, /* comparison is only for equality */ + int nocase, /* comparison is not case sensitive */ + int reqlength) /* requested length */ +{ + char *s1, *s2; + int empty, length, match, s1len, s2len; + memCmpFn_t memCmpFn; + + if ((reqlength == 0) || (value1Ptr == value2Ptr)) { + /* + * Always match at 0 chars of if it is the same obj. + */ + match = 0; + } else { + + if (!nocase && TclIsPureByteArray(value1Ptr) + && TclIsPureByteArray(value2Ptr)) { + /* + * Use binary versions of comparisons since that won't cause undue + * type conversions and it is much faster. Only do this if we're + * case-sensitive (which is all that really makes sense with byte + * arrays anyway, and we have no memcasecmp() for some reason... :^) + */ + + s1 = (char *) Tcl_GetByteArrayFromObj(value1Ptr, &s1len); + s2 = (char *) Tcl_GetByteArrayFromObj(value2Ptr, &s2len); + memCmpFn = memcmp; + } else if ((value1Ptr->typePtr == &tclStringType) + && (value2Ptr->typePtr == &tclStringType)) { + /* + * Do a unicode-specific comparison if both of the args are of + * String type. If the char length == byte length, we can do a + * memcmp. In benchmark testing this proved the most efficient + * check between the unicode and string comparison operations. + */ + + if (nocase) { + s1 = (char *) Tcl_GetUnicodeFromObj(value1Ptr, &s1len); + s2 = (char *) Tcl_GetUnicodeFromObj(value2Ptr, &s2len); + memCmpFn = (memCmpFn_t)Tcl_UniCharNcasecmp; + } else { + s1len = Tcl_GetCharLength(value1Ptr); + s2len = Tcl_GetCharLength(value2Ptr); + if ((s1len == value1Ptr->length) + && (value1Ptr->bytes != NULL) + && (s2len == value2Ptr->length) + && (value2Ptr->bytes != NULL)) { + s1 = value1Ptr->bytes; + s2 = value2Ptr->bytes; + memCmpFn = memcmp; + } else { + s1 = (char *) Tcl_GetUnicode(value1Ptr); + s2 = (char *) Tcl_GetUnicode(value2Ptr); + if ( +#ifdef WORDS_BIGENDIAN + 1 +#else + checkEq +#endif + ) { + memCmpFn = memcmp; + s1len *= sizeof(Tcl_UniChar); + s2len *= sizeof(Tcl_UniChar); + } else { + memCmpFn = (memCmpFn_t) Tcl_UniCharNcmp; + } + } + } + } else { + if ((empty = TclCheckEmptyString(value1Ptr)) > 0) { + switch (TclCheckEmptyString(value2Ptr)) { + case -1: + s1 = 0; + s1len = 0; + s2 = TclGetStringFromObj(value2Ptr, &s2len); + break; + case 0: + match = -1; + goto matchdone; + case 1: + default: /* avoid warn: `s2` may be used uninitialized */ + match = 0; + goto matchdone; + } + } else if (TclCheckEmptyString(value2Ptr) > 0) { + switch (empty) { + case -1: + s2 = 0; + s2len = 0; + s1 = TclGetStringFromObj(value1Ptr, &s1len); + break; + case 0: + match = 1; + goto matchdone; + case 1: + default: /* avoid warn: `s1` may be used uninitialized */ + match = 0; + goto matchdone; + } + } else { + s1 = TclGetStringFromObj(value1Ptr, &s1len); + s2 = TclGetStringFromObj(value2Ptr, &s2len); + } + if (!nocase && checkEq) { + /* + * When we have equal-length we can check only for + * (in)equality. We can use memcmp in all (n)eq cases because + * we don't need to worry about lexical LE/BE variance. + */ + + memCmpFn = memcmp; + } else { + /* + * As a catch-all we will work with UTF-8. We cannot use + * memcmp() as that is unsafe with any string containing NUL + * (\xC0\x80 in Tcl's utf rep). We can use the more efficient + * TclpUtfNcmp2 if we are case-sensitive and no specific + * length was requested. + */ + + if ((reqlength < 0) && !nocase) { + memCmpFn = (memCmpFn_t) TclpUtfNcmp2; + } else { + s1len = Tcl_NumUtfChars(s1, s1len); + s2len = Tcl_NumUtfChars(s2, s2len); + memCmpFn = (memCmpFn_t) + (nocase ? Tcl_UtfNcasecmp : Tcl_UtfNcmp); + } + } + } + + length = (s1len < s2len) ? s1len : s2len; + if (reqlength > 0 && reqlength < length) { + length = reqlength; + } else if (reqlength < 0) { + /* + * The requested length is negative, so we ignore it by setting it + * to length + 1 so we correct the match var. + */ + + reqlength = length + 1; + } + + if (checkEq && (s1len != s2len)) { + match = 1; /* This will be reversed below. */ + } else { + /* + * The comparison function should compare up to the minimum byte + * length only. + */ + + match = memCmpFn(s1, s2, (size_t) length); + } + if ((match == 0) && (reqlength > length)) { + match = s1len - s2len; + } + match = (match > 0) ? 1 : (match < 0) ? -1 : 0; + } + matchdone: + return match; } /* *--------------------------------------------------------------------------- * - * TclStringFind -- + * TclStringFirst -- * * Implements the [string first] operation. * @@ -3227,20 +3586,20 @@ TclStringCatObjv( */ int -TclStringFind( +TclStringFirst( Tcl_Obj *needle, Tcl_Obj *haystack, int start) { int lh, ln = Tcl_GetCharLength(needle); + if (start < 0) { + start = 0; + } if (ln == 0) { - /* - * We don't find empty substrings. Bizarre! - * - * TODO: When we one day make this a true substring - * finder, change this to "return 0" - */ + /* We don't find empty substrings. Bizarre! + * Whenever this routine is turned into a proper substring + * finder, change to `return start` after limits imposed. */ return -1; } @@ -3248,58 +3607,57 @@ TclStringFind( unsigned char *end, *try, *bh; unsigned char *bn = Tcl_GetByteArrayFromObj(needle, &ln); + /* Find bytes in bytes */ bh = Tcl_GetByteArrayFromObj(haystack, &lh); end = bh + lh; try = bh + start; while (try + ln <= end) { - try = memchr(try, bn[0], end - try); - + /* + * Look for the leading byte of the needle in the haystack + * starting at try and stopping when there's not enough room + * for the needle left. + */ + try = memchr(try, bn[0], (end + 1 - ln) - try); if (try == NULL) { + /* Leading byte not found -> needle cannot be found. */ return -1; } + /* Leading byte found, check rest of needle. */ if (0 == memcmp(try+1, bn+1, ln-1)) { + /* Checks! Return the successful index. */ return (try - bh); } + /* Rest of needle match failed; Iterate to continue search. */ try++; } return -1; } - lh = Tcl_GetCharLength(haystack); - if (haystack->bytes && (lh == haystack->length)) { - /* haystack is all single-byte chars */ - - if (needle->bytes && (ln == needle->length)) { - /* needle is also all single-byte chars */ - char *found = strstr(haystack->bytes + start, needle->bytes); + /* + * TODO: It might be nice to support some cases where it is not + * necessary to shimmer to &tclStringType to compute the result, + * and instead operate just on the objPtr->bytes values directly. + * However, we also do not want the answer to change based on the + * code pathway, or if it does we want that to be for some values + * we explicitly decline to support. Getting there will involve + * locking down in practice more firmly just what encodings produce + * what supported results for the objPtr->bytes values. For now, + * do only the well-defined Tcl_UniChar array search. + */ - if (found) { - return (found - haystack->bytes); - } else { - return -1; - } - } else { - /* - * Cannot find substring with a multi-byte char inside - * a string with no multi-byte chars. - */ - return -1; - } - } else { + { Tcl_UniChar *try, *end, *uh; Tcl_UniChar *un = Tcl_GetUnicodeFromObj(needle, &ln); uh = Tcl_GetUnicodeFromObj(haystack, &lh); end = uh + lh; - try = uh + start; - while (try + ln <= end) { - if ((*try == *un) - && (0 == memcmp(try+1, un+1, (ln-1)*sizeof(Tcl_UniChar)))) { + for (try = uh + start; try + ln <= end; try++) { + if ((*try == *un) && (0 == + memcmp(try + 1, un + 1, (ln-1) * sizeof(Tcl_UniChar)))) { return (try - uh); } - try++; } return -1; } @@ -3336,24 +3694,24 @@ TclStringLast( * We don't find empty substrings. Bizarre! * * TODO: When we one day make this a true substring - * finder, change this to "return 0" + * finder, change this to "return last", after limitation. */ return -1; } - if (ln > last + 1) { + lh = Tcl_GetCharLength(haystack); + if (last >= lh) { + last = lh - 1; + } + + if (last < ln - 1) { return -1; } if (TclIsPureByteArray(needle) && TclIsPureByteArray(haystack)) { - unsigned char *try, *bh; + unsigned char *try, *bh = Tcl_GetByteArrayFromObj(haystack, &lh); unsigned char *bn = Tcl_GetByteArrayFromObj(needle, &ln); - bh = Tcl_GetByteArrayFromObj(haystack, &lh); - - if (last + 1 > lh) { - last = lh - 1; - } try = bh + last + 1 - ln; while (try >= bh) { if ((*try == bn[0]) @@ -3365,38 +3723,10 @@ TclStringLast( return -1; } - lh = Tcl_GetCharLength(haystack); - if (last + 1 > lh) { - last = lh - 1; - } - if (haystack->bytes && (lh == haystack->length)) { - /* haystack is all single-byte chars */ - - if (needle->bytes && (ln == needle->length)) { - /* needle is also all single-byte chars */ - - char *try = haystack->bytes + last + 1 - ln; - while (try >= haystack->bytes) { - if ((*try == needle->bytes[0]) - && (0 == memcmp(try+1, needle->bytes + 1, ln - 1))) { - return (try - haystack->bytes); - } - try--; - } - return -1; - } else { - /* - * Cannot find substring with a multi-byte char inside - * a string with no multi-byte chars. - */ - return -1; - } - } else { - Tcl_UniChar *try, *uh; + { + Tcl_UniChar *try, *uh = Tcl_GetUnicodeFromObj(haystack, &lh); Tcl_UniChar *un = Tcl_GetUnicodeFromObj(needle, &ln); - uh = Tcl_GetUnicodeFromObj(haystack, &lh); - try = uh + last + 1 - ln; while (try >= uh) { if ((*try == un[0]) @@ -3412,14 +3742,14 @@ TclStringLast( /* *--------------------------------------------------------------------------- * - * TclStringObjReverse -- + * TclStringReverse -- * * Implements the [string reverse] operation. * * Results: - * An unshared Tcl value which is the [string reverse] of the argument - * supplied. When sharing rules permit, the returned value might be the - * argument with modifications done in place. + * A Tcl value which is the [string reverse] of the argument supplied. + * When sharing rules permit and the caller requests, the returned value + * might be the argument with modifications done in place. * * Side effects: * May allocate a new Tcl_Obj. @@ -3431,18 +3761,20 @@ static void ReverseBytes( unsigned char *to, /* Copy bytes into here... */ unsigned char *from, /* ...from here... */ - int count) /* Until this many are copied, */ + int count) /* Until this many are copied, */ /* reversing as you go. */ { unsigned char *src = from + count; + if (to == from) { /* Reversing in place */ while (--src > to) { unsigned char c = *src; + *src = *to; *to++ = c; } - } else { + } else { while (--src >= from) { *to++ = *src; } @@ -3450,17 +3782,19 @@ ReverseBytes( } Tcl_Obj * -TclStringObjReverse( - Tcl_Obj *objPtr) +TclStringReverse( + Tcl_Obj *objPtr, + int flags) { String *stringPtr; Tcl_UniChar ch = 0; + int inPlace = flags & TCL_STRING_IN_PLACE; if (TclIsPureByteArray(objPtr)) { int numBytes; unsigned char *from = Tcl_GetByteArrayFromObj(objPtr, &numBytes); - if (Tcl_IsShared(objPtr)) { + if (!inPlace || Tcl_IsShared(objPtr)) { objPtr = Tcl_NewByteArrayObj(NULL, numBytes); } ReverseBytes(Tcl_GetByteArrayFromObj(objPtr, NULL), from, numBytes); @@ -3474,7 +3808,7 @@ TclStringObjReverse( Tcl_UniChar *from = Tcl_GetUnicode(objPtr); Tcl_UniChar *src = from + stringPtr->numChars; - if (Tcl_IsShared(objPtr)) { + if (!inPlace || Tcl_IsShared(objPtr)) { Tcl_UniChar *to; /* @@ -3482,7 +3816,6 @@ TclStringObjReverse( * Tcl_SetObjLength into growing the unicode rep buffer. */ - ch = 0; objPtr = Tcl_NewUnicodeObj(&ch, 1); Tcl_SetObjLength(objPtr, stringPtr->numChars); to = Tcl_GetUnicode(objPtr); @@ -3490,7 +3823,10 @@ TclStringObjReverse( *to++ = *src; } } else { - /* Reversing in place */ + /* + * Reversing in place. + */ + while (--src > from) { ch = *src; *src = *from; @@ -3504,7 +3840,7 @@ TclStringObjReverse( int numBytes = objPtr->length; char *to, *from = objPtr->bytes; - if (Tcl_IsShared(objPtr)) { + if (!inPlace || Tcl_IsShared(objPtr)) { objPtr = Tcl_NewObj(); Tcl_SetObjLength(objPtr, numBytes); } @@ -3514,20 +3850,22 @@ TclStringObjReverse( /* * Either numChars == -1 and we don't know how many chars are * represented by objPtr->bytes and we need Pass 1 just in case, - * or numChars >= 0 and we know we have fewer chars than bytes, - * so we know there's a multibyte character needing Pass 1. + * or numChars >= 0 and we know we have fewer chars than bytes, so + * we know there's a multibyte character needing Pass 1. * * Pass 1. Reverse the bytes of each multi-byte character. */ + int charCount = 0; int bytesLeft = numBytes; while (bytesLeft) { /* - * NOTE: We know that the from buffer is NUL-terminated. - * It's part of the contract for objPtr->bytes values. - * Thus, we can skip calling Tcl_UtfCharComplete() here. + * NOTE: We know that the from buffer is NUL-terminated. It's + * part of the contract for objPtr->bytes values. Thus, we can + * skip calling Tcl_UtfCharComplete() here. */ + int bytesInChar = TclUtfToUniChar(from, &ch); ReverseBytes((unsigned char *)to, (unsigned char *)from, @@ -3551,6 +3889,150 @@ TclStringObjReverse( /* *--------------------------------------------------------------------------- * + * TclStringReplace -- + * + * Implements the inner engine of the [string replace] command. + * + * The result is a concatenation of a prefix from objPtr, characters + * 0 through first-1, the insertPtr string value, and a suffix from + * objPtr, characters from first + count to the end. The effect is as if + * the inner substring of characters first through first+count-1 are + * removed and replaced with insertPtr. If insertPtr is NULL, it is + * treated as an empty string. When passed the flag TCL_STRING_IN_PLACE, + * this routine will try to do the work within objPtr, so long as no + * sharing forbids it. Without that request, or as needed, a new Tcl + * value will be allocated to be the result. + * + * Results: + * A Tcl value that is the result of the substring replacement. May + * return NULL in case of an error. When NULL is returned and interp is + * non-NULL, error information is left in interp + * + *--------------------------------------------------------------------------- + */ + +Tcl_Obj * +TclStringReplace( + Tcl_Interp *interp, /* For error reporting, may be NULL */ + Tcl_Obj *objPtr, /* String to act upon */ + int first, /* First index to replace */ + int count, /* How many chars to replace */ + Tcl_Obj *insertPtr, /* Replacement string, may be NULL */ + int flags) /* TCL_STRING_IN_PLACE => attempt in-place */ +{ + int inPlace = flags & TCL_STRING_IN_PLACE; + Tcl_Obj *result; + + /* Caller is expected to pass sensible arguments */ + assert ( count >= 0 ) ; + assert ( first >= 0 ) ; + + /* Replace nothing with nothing */ + if ((insertPtr == NULL) && (count == 0)) { + if (inPlace) { + return objPtr; + } else { + return Tcl_DuplicateObj(objPtr); + } + } + + /* + * The caller very likely had to call Tcl_GetCharLength() or similar + * to be able to process index values. This means it is like that + * objPtr is either a proper "bytearray" or a "string" or else it has + * a known and short string rep. + */ + + if (TclIsPureByteArray(objPtr)) { + int numBytes; + unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &numBytes); + + if (insertPtr == NULL) { + /* Replace something with nothing. */ + + assert ( first <= numBytes ) ; + assert ( count <= numBytes ) ; + assert ( first + count <= numBytes ) ; + + result = Tcl_NewByteArrayObj(NULL, numBytes - count);/* PANIC? */ + TclAppendBytesToByteArray(result, bytes, first); + TclAppendBytesToByteArray(result, bytes + first + count, + numBytes - count - first); + return result; + } + + /* Replace everything */ + if ((first == 0) && (count == numBytes)) { + return insertPtr; + } + + if (TclIsPureByteArray(insertPtr)) { + int newBytes; + unsigned char *iBytes + = Tcl_GetByteArrayFromObj(insertPtr, &newBytes); + + if (count == newBytes && inPlace && !Tcl_IsShared(objPtr)) { + /* + * Removal count and replacement count are equal. + * Other conditions permit. Do in-place splice. + */ + + memcpy(bytes + first, iBytes, count); + Tcl_InvalidateStringRep(objPtr); + return objPtr; + } + + if (newBytes > INT_MAX - (numBytes - count)) { + if (interp) { + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "max size for a Tcl value (%d bytes) exceeded", + INT_MAX)); + Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); + } + return NULL; + } + result = Tcl_NewByteArrayObj(NULL, numBytes - count + newBytes); + /* PANIC? */ + Tcl_SetByteArrayLength(result, 0); + TclAppendBytesToByteArray(result, bytes, first); + TclAppendBytesToByteArray(result, iBytes, newBytes); + TclAppendBytesToByteArray(result, bytes + first + count, + numBytes - count - first); + return result; + } + + /* Flow through to try other approaches below */ + } + + /* + * TODO: Figure out how not to generate a Tcl_UniChar array rep + * when it can be determined objPtr->bytes points to a string of + * all single-byte characters so we can index it directly. + */ + + /* The traditional implementation... */ + { + int numChars; + Tcl_UniChar *ustring = Tcl_GetUnicodeFromObj(objPtr, &numChars); + + /* TODO: Is there an in-place option worth pursuing here? */ + + result = Tcl_NewUnicodeObj(ustring, first); + if (insertPtr) { + Tcl_AppendObjToObj(result, insertPtr); + } + if (first + count < numChars) { + Tcl_AppendUnicodeToObj(result, ustring + first + count, + numChars - first - count); + } + + return result; + } +} + +/* + *--------------------------------------------------------------------------- + * * FillUnicodeRep -- * * Populate the Unicode internal rep with the Unicode form of its string @@ -3585,7 +4067,7 @@ ExtendUnicodeRepWithString( { String *stringPtr = GET_STRING(objPtr); int needed, numOrigChars = 0; - Tcl_UniChar *dst; + Tcl_UniChar *dst, unichar = 0; if (stringPtr->hasUnicode) { numOrigChars = stringPtr->numChars; @@ -3608,7 +4090,8 @@ ExtendUnicodeRepWithString( numAppendChars = 0; } for (dst=stringPtr->unicode + numOrigChars; numAppendChars-- > 0; dst++) { - bytes += TclUtfToUniChar(bytes, dst); + bytes += TclUtfToUniChar(bytes, &unichar); + *dst = unichar; } *dst = 0; } |