diff options
Diffstat (limited to 'generic/tclStringObj.c')
| -rw-r--r-- | generic/tclStringObj.c | 771 |
1 files changed, 532 insertions, 239 deletions
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index c45baa1..9913160 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -38,6 +38,7 @@ #include "tommath.h" #include "tclStringRep.h" +#include "assert.h" /* * Prototypes for functions defined later in this file: */ @@ -140,8 +141,8 @@ GrowStringBuffer( objPtr->bytes = NULL; } if (flag == 0 || stringPtr->allocated > 0) { - attempt = 2 * needed; - if (attempt >= 0) { + if (needed <= INT_MAX / 2) { + attempt = 2 * needed; ptr = attemptckrealloc(objPtr->bytes, attempt + 1); } if (ptr == NULL) { @@ -190,8 +191,8 @@ GrowUnicodeBuffer( * Subsequent appends - apply the growth algorithm. */ - attempt = 2 * needed; - if (attempt >= 0 && attempt <= STRING_MAXCHARS) { + if (needed <= STRING_MAXCHARS / 2) { + attempt = 2 * needed; ptr = stringAttemptRealloc(stringPtr, attempt); } if (ptr == NULL) { @@ -418,9 +419,14 @@ Tcl_GetCharLength( } /* - * Optimize the case where we're really dealing with a bytearray object - * without string representation; we don't need to convert to a string to - * perform the get-length operation. + * Optimize the case where we're really dealing with a bytearray object; + * we don't need to convert to a string to perform the get-length operation. + * + * Starting in Tcl 8.7, we check for a "pure" bytearray, because the + * machinery behind that test is using a proper bytearray ObjType. We + * could also compute length of an improper bytearray without shimmering + * but there's no value in that. We *want* to shimmer an improper bytearray + * because improper bytearrays have worthless internal reps. */ if (TclIsPureByteArray(objPtr)) { @@ -476,8 +482,7 @@ Tcl_GetUniChar( /* * Optimize the case where we're really dealing with a bytearray object - * without string representation; we don't need to convert to a string to - * perform the indexing operation. + * we don't need to convert to a string to perform the indexing operation. */ if (TclIsPureByteArray(objPtr)) { @@ -517,7 +522,7 @@ Tcl_GetUniChar( * * Get the Unicode form of the String object. If the object is not * already a String object, it will be converted to one. If the String - * object does not have a Unicode rep, then one is create from the UTF + * object does not have a Unicode rep, then one is created from the UTF * string format. * * Results: @@ -610,8 +615,7 @@ Tcl_GetRange( /* * Optimize the case where we're really dealing with a bytearray object - * without string representation; we don't need to convert to a string to - * perform the substring operation. + * we don't need to convert to a string to perform the substring operation. */ if (TclIsPureByteArray(objPtr)) { @@ -651,6 +655,17 @@ Tcl_GetRange( stringPtr = GET_STRING(objPtr); } +#if TCL_UTF_MAX == 4 + /* See: bug [11ae2be95dac9417] */ + if ((first>0) && ((stringPtr->unicode[first]&0xFC00) == 0xDC00) + && ((stringPtr->unicode[first-1]&0xFC00) == 0xD800)) { + ++first; + } + if ((last+1<stringPtr->numChars) && ((stringPtr->unicode[last+1]&0xFC00) == 0xDC00) + && ((stringPtr->unicode[last]&0xFC00) == 0xD800)) { + ++last; + } +#endif return Tcl_NewUnicodeObj(stringPtr->unicode + first, last-first+1); } @@ -1208,9 +1223,9 @@ Tcl_AppendObjToObj( /* * Handle append of one bytearray object to another as a special case. - * Note that we only do this when the objects don't have string reps; if - * it did, then appending the byte arrays together could well lose - * information; this is a special-case optimization only. + * Note that we only do this when the objects are pure so that the + * bytearray faithfully represent the true value; Otherwise + * appending the byte arrays together could lose information; */ if ((TclIsPureByteArray(objPtr) || objPtr->bytes == &tclEmptyString) @@ -1650,6 +1665,7 @@ Tcl_AppendFormatToObj( const char *span = format, *msg, *errCode; int numBytes = 0, objIndex = 0, gotXpg = 0, gotSequential = 0; int originalLength, limit; + Tcl_UniChar ch = 0; static const char *mixedXPG = "cannot mix \"%\" and \"%n$\" conversion specifiers"; static const char *const badIndex[2] = { @@ -1670,12 +1686,14 @@ Tcl_AppendFormatToObj( while (*format != '\0') { char *end; - int gotMinus, gotHash, gotZero, gotSpace, gotPlus, sawFlag; - int width, gotPrecision, precision, useShort, useWide, useBig; + int gotMinus = 0, gotHash = 0, gotZero = 0, gotSpace = 0, gotPlus = 0; + int width, gotPrecision, precision, sawFlag, useShort = 0, useBig = 0; +#ifndef TCL_WIDE_INT_IS_LONG + int useWide = 0; +#endif int newXpg, numChars, allocSegment = 0, segmentLimit, segmentNumBytes; Tcl_Obj *segment; - Tcl_UniChar ch; - int step = Tcl_UtfToUniChar(format, &ch); + int step = TclUtfToUniChar(format, &ch); format += step; if (ch != '%') { @@ -1699,7 +1717,7 @@ Tcl_AppendFormatToObj( * Step 0. Handle special case of escaped format marker (i.e., %%). */ - step = Tcl_UtfToUniChar(format, &ch); + step = TclUtfToUniChar(format, &ch); if (ch == '%') { span = format; numBytes = step; @@ -1719,7 +1737,7 @@ Tcl_AppendFormatToObj( newXpg = 1; objIndex = position - 1; format = end + 1; - step = Tcl_UtfToUniChar(format, &ch); + step = TclUtfToUniChar(format, &ch); } } if (newXpg) { @@ -1747,7 +1765,6 @@ Tcl_AppendFormatToObj( * Step 2. Set of flags. */ - gotMinus = gotHash = gotZero = gotSpace = gotPlus = 0; sawFlag = 1; do { switch (ch) { @@ -1771,7 +1788,7 @@ Tcl_AppendFormatToObj( } if (sawFlag) { format += step; - step = Tcl_UtfToUniChar(format, &ch); + step = TclUtfToUniChar(format, &ch); } } while (sawFlag); @@ -1783,7 +1800,7 @@ Tcl_AppendFormatToObj( if (isdigit(UCHAR(ch))) { width = strtoul(format, &end, 10); format = end; - step = Tcl_UtfToUniChar(format, &ch); + step = TclUtfToUniChar(format, &ch); } else if (ch == '*') { if (objIndex >= objc - 1) { msg = badIndex[gotXpg]; @@ -1799,7 +1816,7 @@ Tcl_AppendFormatToObj( } objIndex++; format += step; - step = Tcl_UtfToUniChar(format, &ch); + step = TclUtfToUniChar(format, &ch); } if (width > limit) { msg = overflow; @@ -1815,12 +1832,12 @@ Tcl_AppendFormatToObj( if (ch == '.') { gotPrecision = 1; format += step; - step = Tcl_UtfToUniChar(format, &ch); + step = TclUtfToUniChar(format, &ch); } if (isdigit(UCHAR(ch))) { precision = strtoul(format, &end, 10); format = end; - step = Tcl_UtfToUniChar(format, &ch); + step = TclUtfToUniChar(format, &ch); } else if (ch == '*') { if (objIndex >= objc - 1) { msg = badIndex[gotXpg]; @@ -1841,37 +1858,46 @@ Tcl_AppendFormatToObj( } objIndex++; format += step; - step = Tcl_UtfToUniChar(format, &ch); + step = TclUtfToUniChar(format, &ch); } /* * Step 5. Length modifier. */ - useShort = useWide = useBig = 0; if (ch == 'h') { useShort = 1; format += step; - step = Tcl_UtfToUniChar(format, &ch); + step = TclUtfToUniChar(format, &ch); } else if (ch == 'l') { format += step; - step = Tcl_UtfToUniChar(format, &ch); + step = TclUtfToUniChar(format, &ch); if (ch == 'l') { useBig = 1; format += step; - step = Tcl_UtfToUniChar(format, &ch); + step = TclUtfToUniChar(format, &ch); #ifndef TCL_WIDE_INT_IS_LONG } else { useWide = 1; #endif } - } else if ((ch == 'I') && (format[1] == '6') && (format[2] == '4')) { - format += (step + 2); - step = Tcl_UtfToUniChar(format, &ch); - useBig = 1; - } else if (ch == 'L') { + } else if (ch == 'I') { + if ((format[1] == '6') && (format[2] == '4')) { + format += (step + 2); + step = TclUtfToUniChar(format, &ch); +#ifndef TCL_WIDE_INT_IS_LONG + useWide = 1; +#endif + } else if ((format[1] == '3') && (format[2] == '2')) { + format += (step + 2); + step = TclUtfToUniChar(format, &ch); + } else { + format += step; + step = TclUtfToUniChar(format, &ch); + } + } else if ((ch == 't') || (ch == 'z') || (ch == 'q') || (ch == 'j') || (ch == 'L')) { format += step; - step = Tcl_UtfToUniChar(format, &ch); + step = TclUtfToUniChar(format, &ch); useBig = 1; } @@ -1918,13 +1944,9 @@ Tcl_AppendFormatToObj( } case 'u': - if (useBig) { - msg = "unsigned bignum format is invalid"; - errCode = "BADUNSIGNED"; - goto errorMsg; - } case 'd': case 'o': + case 'p': case 'x': case 'X': case 'b': { @@ -1935,13 +1957,31 @@ Tcl_AppendFormatToObj( mp_int big; int toAppend, isNegative = 0; +#ifndef TCL_WIDE_INT_IS_LONG + if (ch == 'p') { + useWide = 1; + } +#endif if (useBig) { + int cmpResult; if (Tcl_GetBignumFromObj(interp, segment, &big) != TCL_OK) { goto error; } - isNegative = (mp_cmp_d(&big, 0) == MP_LT); + cmpResult = mp_cmp_d(&big, 0); + isNegative = (cmpResult == MP_LT); + if (cmpResult == MP_EQ) gotHash = 0; + if (ch == 'u') { + if (isNegative) { + msg = "unsigned bignum format is invalid"; + errCode = "BADUNSIGNED"; + goto errorMsg; + } else { + ch = 'd'; + } + } +#ifndef TCL_WIDE_INT_IS_LONG } else if (useWide) { - if (Tcl_GetWideIntFromObj(NULL, segment, &w) != TCL_OK) { + if (TclGetWideIntFromObj(NULL, segment, &w) != TCL_OK) { Tcl_Obj *objPtr; if (Tcl_GetBignumFromObj(interp,segment,&big) != TCL_OK) { @@ -1950,12 +1990,14 @@ Tcl_AppendFormatToObj( mp_mod_2d(&big, (int) CHAR_BIT*sizeof(Tcl_WideInt), &big); objPtr = Tcl_NewBignumObj(&big); Tcl_IncrRefCount(objPtr); - Tcl_GetWideIntFromObj(NULL, objPtr, &w); + TclGetWideIntFromObj(NULL, objPtr, &w); Tcl_DecrRefCount(objPtr); } isNegative = (w < (Tcl_WideInt) 0); + if (w == (Tcl_WideInt) 0) gotHash = 0; +#endif } else if (TclGetLongFromObj(NULL, segment, &l) != TCL_OK) { - if (Tcl_GetWideIntFromObj(NULL, segment, &w) != TCL_OK) { + if (TclGetWideIntFromObj(NULL, segment, &w) != TCL_OK) { Tcl_Obj *objPtr; if (Tcl_GetBignumFromObj(interp,segment,&big) != TCL_OK) { @@ -1972,14 +2014,18 @@ Tcl_AppendFormatToObj( if (useShort) { s = (short) l; isNegative = (s < (short) 0); + if (s == (short) 0) gotHash = 0; } else { isNegative = (l < (long) 0); + if (l == (long) 0) gotHash = 0; } } else if (useShort) { s = (short) l; isNegative = (s < (short) 0); + if (s == (short) 0) gotHash = 0; } else { isNegative = (l < (long) 0); + if (l == (long) 0) gotHash = 0; } segment = Tcl_NewObj(); @@ -1993,13 +2039,13 @@ Tcl_AppendFormatToObj( segmentLimit -= 1; } - if (gotHash) { + if (gotHash || (ch == 'p')) { switch (ch) { case 'o': - Tcl_AppendToObj(segment, "0", 1); - segmentLimit -= 1; - precision--; + Tcl_AppendToObj(segment, "0o", 2); + segmentLimit -= 2; break; + case 'p': case 'x': case 'X': Tcl_AppendToObj(segment, "0x", 2); @@ -2009,6 +2055,14 @@ Tcl_AppendFormatToObj( Tcl_AppendToObj(segment, "0b", 2); segmentLimit -= 2; break; +#if TCL_MAJOR_VERSION < 9 + case 'd': + if (gotZero) { + Tcl_AppendToObj(segment, "0d", 2); + segmentLimit -= 2; + } + break; +#endif } } @@ -2020,8 +2074,10 @@ Tcl_AppendFormatToObj( if (useShort) { pure = Tcl_NewIntObj((int) s); +#ifndef TCL_WIDE_INT_IS_LONG } else if (useWide) { pure = Tcl_NewWideIntObj(w); +#endif } else if (useBig) { pure = Tcl_NewBignumObj(&big); } else { @@ -2078,6 +2134,7 @@ Tcl_AppendFormatToObj( case 'u': case 'o': + case 'p': case 'x': case 'X': case 'b': { @@ -2104,6 +2161,7 @@ Tcl_AppendFormatToObj( numDigits++; us /= base; } +#ifndef TCL_WIDE_INT_IS_LONG } else if (useWide) { Tcl_WideUInt uw = (Tcl_WideUInt) w; @@ -2112,6 +2170,7 @@ Tcl_AppendFormatToObj( numDigits++; uw /= base; } +#endif } else if (useBig && big.used) { int leftover = (big.used * DIGIT_BIT) % numBits; mp_digit mask = (~(mp_digit)0) << (DIGIT_BIT-leftover); @@ -2141,7 +2200,7 @@ Tcl_AppendFormatToObj( * Need to be sure zero becomes "0", not "". */ - if ((numDigits == 0) && !((ch == 'o') && gotHash)) { + if (numDigits == 0) { numDigits = 1; } pure = Tcl_NewObj(); @@ -2161,7 +2220,11 @@ Tcl_AppendFormatToObj( } digitOffset = (int) (bits % base); if (digitOffset > 9) { - bytes[numDigits] = 'a' + digitOffset - 10; + if (ch == 'X') { + bytes[numDigits] = 'A' + digitOffset - 10; + } else { + bytes[numDigits] = 'a' + digitOffset - 10; + } } else { bytes[numDigits] = '0' + digitOffset; } @@ -2204,6 +2267,8 @@ Tcl_AppendFormatToObj( break; } + case 'a': + case 'A': case 'e': case 'E': case 'f': @@ -2272,6 +2337,12 @@ Tcl_AppendFormatToObj( errCode = "OVERFLOW"; goto errorMsg; } + if (ch == 'A') { + char *p = TclGetString(segment) + 1; + *p = 'x'; + p = strchr(p, 'P'); + if (p) *p = 'p'; + } break; } default: @@ -2283,14 +2354,6 @@ Tcl_AppendFormatToObj( goto error; } - switch (ch) { - case 'E': - case 'G': - case 'X': { - Tcl_SetObjLength(segment, Tcl_UtfToUpper(TclGetString(segment))); - } - } - if (width>0 && numChars<0) { numChars = Tcl_GetCharLength(segment); } @@ -2467,6 +2530,7 @@ AppendPrintfToObjVA( case 'u': case 'd': case 'o': + case 'p': case 'x': case 'X': seekingConversion = 0; @@ -2484,15 +2548,26 @@ AppendPrintfToObjVA( Tcl_ListObjAppendElement(NULL, list, Tcl_NewWideIntObj( va_arg(argList, Tcl_WideInt))); break; + case 3: + Tcl_ListObjAppendElement(NULL, list, Tcl_NewBignumObj( + va_arg(argList, mp_int *))); + break; } break; + case 'a': + case 'A': case 'e': case 'E': case 'f': case 'g': case 'G': + if (size > 0) { Tcl_ListObjAppendElement(NULL, list, Tcl_NewDoubleObj( - va_arg(argList, double))); + (double)va_arg(argList, long double))); + } else { + Tcl_ListObjAppendElement(NULL, list, Tcl_NewDoubleObj( + va_arg(argList, double))); + } seekingConversion = 0; break; case '*': @@ -2512,12 +2587,19 @@ AppendPrintfToObjVA( gotPrecision = 1; p++; break; - /* TODO: support for bignum arguments */ case 'l': ++size; p++; break; - case 'L': + case 't': + case 'z': + if (sizeof(size_t) == sizeof(Tcl_WideInt)) { + size = 2; + } + p++; + break; + case 'j': + case 'q': size = 2; p++; break; @@ -2525,9 +2607,17 @@ AppendPrintfToObjVA( if (p[1]=='6' && p[2]=='4') { p += 2; size = 2; + } else if (p[1]=='3' && p[2]=='2') { + p += 2; + } else if (sizeof(size_t) == sizeof(Tcl_WideInt)) { + size = 2; } p++; break; + case 'L': + size = 3; + p++; + break; case 'h': size = -1; default: @@ -2641,23 +2731,24 @@ TclGetStringStorage( * Performs the [string repeat] function. * * Results: - * A standard Tcl result. + * A (Tcl_Obj *) pointing to the result value, or NULL in case of an + * error. * * Side effects: - * Writes to *objPtrPtr the address of Tcl_Obj that is concatenation - * of count copies of the value in objPtr. + * On error, when interp is not NULL, error information is left in it. * *--------------------------------------------------------------------------- */ -int +Tcl_Obj * TclStringRepeat( Tcl_Interp *interp, Tcl_Obj *objPtr, int count, - Tcl_Obj **objPtrPtr) + int flags) { Tcl_Obj *objResultPtr; + int inPlace = flags & TCL_STRING_IN_PLACE; int length = 0, unichar = 0, done = 1; int binary = TclIsPureByteArray(objPtr); @@ -2692,8 +2783,7 @@ TclStringRepeat( if (length == 0) { /* Any repeats of empty is empty. */ - *objPtrPtr = objPtr; - return TCL_OK; + return objPtr; } if (count > INT_MAX/length) { @@ -2702,13 +2792,13 @@ TclStringRepeat( "max size for a Tcl value (%d bytes) exceeded", INT_MAX)); Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); } - return TCL_ERROR; + return NULL; } if (binary) { /* Efficiently produce a pure byte array result */ - objResultPtr = Tcl_IsShared(objPtr) ? Tcl_DuplicateObj(objPtr) - : objPtr; + objResultPtr = (!inPlace || Tcl_IsShared(objPtr)) ? + Tcl_DuplicateObj(objPtr) : objPtr; Tcl_SetByteArrayLength(objResultPtr, count*length); /* PANIC? */ Tcl_SetByteArrayLength(objResultPtr, length); @@ -2721,7 +2811,7 @@ TclStringRepeat( (count - done) * length); } else if (unichar) { /* Efficiently produce a pure Tcl_UniChar array result */ - if (Tcl_IsShared(objPtr)) { + if (!inPlace || Tcl_IsShared(objPtr)) { objResultPtr = Tcl_NewUnicodeObj(Tcl_GetUnicode(objPtr), length); } else { TclInvalidateStringRep(objPtr); @@ -2732,11 +2822,11 @@ TclStringRepeat( if (interp) { Tcl_SetObjResult(interp, Tcl_ObjPrintf( "string size overflow: unable to alloc %" - TCL_LL_MODIFIER "u bytes", - (Tcl_WideUInt)STRING_SIZE(count*length))); + TCL_Z_MODIFIER "u bytes", + STRING_SIZE(count*length))); Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); } - return TCL_ERROR; + return NULL; } Tcl_SetObjLength(objResultPtr, length); while (count - done > done) { @@ -2747,7 +2837,7 @@ TclStringRepeat( (count - done) * length); } else { /* Efficiently concatenate string reps */ - if (Tcl_IsShared(objPtr)) { + if (!inPlace || Tcl_IsShared(objPtr)) { objResultPtr = Tcl_NewStringObj(Tcl_GetString(objPtr), length); } else { TclFreeIntRep(objPtr); @@ -2760,7 +2850,7 @@ TclStringRepeat( count*length)); Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); } - return TCL_ERROR; + return NULL; } Tcl_SetObjLength(objResultPtr, length); while (count - done > done) { @@ -2770,40 +2860,48 @@ TclStringRepeat( Tcl_AppendToObj(objResultPtr, Tcl_GetString(objResultPtr), (count - done) * length); } - *objPtrPtr = objResultPtr; - return TCL_OK; + return objResultPtr; } /* *--------------------------------------------------------------------------- * - * TclStringCatObjv -- + * TclStringCat -- * * Performs the [string cat] function. * * Results: - * A standard Tcl result. + * A (Tcl_Obj *) pointing to the result value, or NULL in case of an + * error. * * Side effects: - * Writes to *objPtrPtr the address of Tcl_Obj that is concatenation - * of all objc values in objv. + * On error, when interp is not NULL, error information is left in it. * *--------------------------------------------------------------------------- */ -int -TclStringCatObjv( +Tcl_Obj * +TclStringCat( Tcl_Interp *interp, - int inPlace, int objc, Tcl_Obj * const objv[], - Tcl_Obj **objPtrPtr) + int flags) { - Tcl_Obj *objPtr, *objResultPtr, * const *ov; - int oc, length = 0, binary = 1, first = 0; + Tcl_Obj *objResultPtr, * const *ov; + int oc, length = 0, binary = 1; int allowUniChar = 1, requestUniChar = 0; + int first = objc - 1; /* Index of first value possibly not empty */ + int last = 0; /* Index of last value possibly not empty */ + int inPlace = flags & TCL_STRING_IN_PLACE; - /* assert (objc >= 2) */ + /* assert ( objc >= 0 ) */ + + if (objc <= 1) { + /* Only one or no objects; return first or empty */ + return objc ? objv[0] : Tcl_NewObj(); + } + + /* assert ( objc >= 2 ) */ /* * Analyze to determine what representation result should be. @@ -2813,10 +2911,12 @@ TclStringCatObjv( */ ov = objv, oc = objc; - while (oc-- && (binary || allowUniChar)) { - objPtr = *ov++; + do { + Tcl_Obj *objPtr = *ov++; - if (objPtr->bytes) { + if (TclIsPureByteArray(objPtr)) { + allowUniChar = 0; + } else if (objPtr->bytes) { /* Value has a string rep. */ if (objPtr->length) { /* @@ -2831,85 +2931,152 @@ TclStringCatObjv( } } else { /* assert (objPtr->typePtr != NULL) -- stork! */ - if (TclIsPureByteArray(objPtr)) { - allowUniChar = 0; + binary = 0; + if (objPtr->typePtr == &tclStringType) { + /* Have a pure Unicode value; ask to preserve it */ + requestUniChar = 1; } else { - binary = 0; - if (objPtr->typePtr == &tclStringType) { - /* Have a pure Unicode value; ask to preserve it */ - requestUniChar = 1; - } else { - /* Have another type; prevent shimmer */ - allowUniChar = 0; - } + /* Have another type; prevent shimmer */ + allowUniChar = 0; } } - } + } while (--oc && (binary || allowUniChar)); if (binary) { /* Result will be pure byte array. Pre-size it */ ov = objv; oc = objc; - while (oc-- && (length >= 0)) { - objPtr = *ov++; + do { + Tcl_Obj *objPtr = *ov++; if (objPtr->bytes == NULL) { int numBytes; Tcl_GetByteArrayFromObj(objPtr, &numBytes); /* PANIC? */ - if (length == 0) { - first = objc - oc - 1; + if (numBytes) { + last = objc - oc; + if (length == 0) { + first = last; + } else if (numBytes > INT_MAX - length) { + goto overflow; + } + length += numBytes; } - length += numBytes; } - } + } while (--oc); } else if (allowUniChar && requestUniChar) { /* Result will be pure Tcl_UniChar array. Pre-size it. */ ov = objv; oc = objc; - while (oc-- && (length >= 0)) { - objPtr = *ov++; + do { + Tcl_Obj *objPtr = *ov++; if ((objPtr->bytes == NULL) || (objPtr->length)) { int numChars; Tcl_GetUnicodeFromObj(objPtr, &numChars); /* PANIC? */ - if (length == 0) { - first = objc - oc - 1; + if (numChars) { + last = objc - oc; + if (length == 0) { + first = last; + } else if (numChars > INT_MAX - length) { + goto overflow; + } + length += numChars; } - length += numChars; } - } + } while (--oc); } else { /* Result will be concat of string reps. Pre-size it. */ ov = objv; oc = objc; - while (oc-- && (length >= 0)) { - int numBytes; + do { + Tcl_Obj *pendingPtr = NULL; + + /* + * Loop until a possibly non-empty value is reached. + * Keep string rep generation pending when possible. + */ - objPtr = *ov++; + do { + /* assert ( pendingPtr == NULL ) */ + /* assert ( length == 0 ) */ - Tcl_GetStringFromObj(objPtr, &numBytes); /* PANIC? */ - if ((length == 0) && numBytes) { - first = objc - oc - 1; + Tcl_Obj *objPtr = *ov++; + + if (objPtr->bytes == NULL) { + /* No string rep; Take the chance we can avoid making it */ + pendingPtr = objPtr; + } else { + Tcl_GetStringFromObj(objPtr, &length); /* PANIC? */ + } + } while (--oc && (length == 0) && (pendingPtr == NULL)); + + /* + * Either we found a possibly non-empty value, and we + * remember this index as the first and last such value so + * far seen, or (oc == 0) and all values are known empty, + * so first = last = objc - 1 signals the right quick return. + */ + + first = last = objc - oc - 1; + + if (oc && (length == 0)) { + int numBytes; + + /* assert ( pendingPtr != NULL ) */ + + /* + * There's a pending value followed by more values. + * Loop over remaining values generating strings until + * a non-empty value is found, or the pending value gets + * its string generated. + */ + + do { + Tcl_Obj *objPtr = *ov++; + Tcl_GetStringFromObj(objPtr, &numBytes); /* PANIC? */ + } while (--oc && numBytes == 0 && pendingPtr->bytes == NULL); + + if (numBytes) { + last = objc -oc -1; + } + if (oc || numBytes) { + Tcl_GetStringFromObj(pendingPtr, &length); + } + if (length == 0) { + if (numBytes) { + first = last; + } + } else if (numBytes > INT_MAX - length) { + goto overflow; + } + length += numBytes; } - length += numBytes; - } - } + } while (oc && (length == 0)); - if (length < 0) { - if (interp) { - Tcl_SetObjResult(interp, Tcl_ObjPrintf( - "max size for a Tcl value (%d bytes) exceeded", INT_MAX)); - Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); + while (oc) { + int numBytes; + Tcl_Obj *objPtr = *ov++; + + /* assert ( length > 0 && pendingPtr == NULL ) */ + + Tcl_GetStringFromObj(objPtr, &numBytes); /* PANIC? */ + if (numBytes) { + last = objc - oc; + if (numBytes > INT_MAX - length) { + goto overflow; + } + length += numBytes; + } + --oc; } - return TCL_ERROR; } - if (length == 0) { - /* Total length of zero means every value has length zero */ - *objPtrPtr = objv[0]; - return TCL_OK; + if (last <= first /*|| length == 0 */) { + /* Only one non-empty value or zero length; return first */ + /* NOTE: (length == 0) implies (last <= first) */ + return objv[first]; } - objv += first; objc -= first; + objv += first; objc = (last - first + 1); if (binary) { /* Efficiently produce a pure byte array result */ @@ -2956,11 +3123,11 @@ TclStringCatObjv( if (interp) { Tcl_SetObjResult(interp, Tcl_ObjPrintf( "concatenation failed: unable to alloc %" - TCL_LL_MODIFIER "u bytes", - (Tcl_WideUInt)STRING_SIZE(length))); + TCL_Z_MODIFIER "u bytes", + STRING_SIZE(length))); Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); } - return TCL_ERROR; + return NULL; } dst = Tcl_GetUnicode(objResultPtr) + start; } else { @@ -2969,14 +3136,15 @@ TclStringCatObjv( /* Ugly interface! No scheme to init array size. */ objResultPtr = Tcl_NewUnicodeObj(&ch, 0); /* PANIC? */ if (0 == Tcl_AttemptSetObjLength(objResultPtr, length)) { + Tcl_DecrRefCount(objResultPtr); if (interp) { Tcl_SetObjResult(interp, Tcl_ObjPrintf( "concatenation failed: unable to alloc %" - TCL_LL_MODIFIER "u bytes", - (Tcl_WideUInt)STRING_SIZE(length))); + TCL_Z_MODIFIER "u bytes", + STRING_SIZE(length))); Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); } - return TCL_ERROR; + return NULL; } dst = Tcl_GetUnicode(objResultPtr); } @@ -3007,22 +3175,23 @@ TclStringCatObjv( length)); Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); } - return TCL_ERROR; + return NULL; } dst = Tcl_GetString(objResultPtr) + start; - if (length > start) { - TclFreeIntRep(objResultPtr); - } + + /* assert ( length > start ) */ + TclFreeIntRep(objResultPtr); } else { objResultPtr = Tcl_NewObj(); /* PANIC? */ if (0 == Tcl_AttemptSetObjLength(objResultPtr, length)) { + Tcl_DecrRefCount(objResultPtr); if (interp) { Tcl_SetObjResult(interp, Tcl_ObjPrintf( "concatenation failed: unable to alloc %u bytes", length)); Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); } - return TCL_ERROR; + return NULL; } dst = Tcl_GetString(objResultPtr); } @@ -3037,14 +3206,21 @@ TclStringCatObjv( } } } - *objPtrPtr = objResultPtr; - return TCL_OK; + return objResultPtr; + + overflow: + if (interp) { + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "max size for a Tcl value (%d bytes) exceeded", INT_MAX)); + Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); + } + return NULL; } /* *--------------------------------------------------------------------------- * - * TclStringFind -- + * TclStringFirst -- * * Implements the [string first] operation. * @@ -3060,20 +3236,20 @@ TclStringCatObjv( */ int -TclStringFind( +TclStringFirst( Tcl_Obj *needle, Tcl_Obj *haystack, int start) { int lh, ln = Tcl_GetCharLength(needle); + if (start < 0) { + start = 0; + } if (ln == 0) { - /* - * We don't find empty substrings. Bizarre! - * - * TODO: When we one day make this a true substring - * finder, change this to "return 0" - */ + /* We don't find empty substrings. Bizarre! + * Whenever this routine is turned into a proper substring + * finder, change to `return start` after limits imposed. */ return -1; } @@ -3081,58 +3257,57 @@ TclStringFind( unsigned char *end, *try, *bh; unsigned char *bn = Tcl_GetByteArrayFromObj(needle, &ln); + /* Find bytes in bytes */ bh = Tcl_GetByteArrayFromObj(haystack, &lh); end = bh + lh; try = bh + start; while (try + ln <= end) { - try = memchr(try, bn[0], end - try); - + /* + * Look for the leading byte of the needle in the haystack + * starting at try and stopping when there's not enough room + * for the needle left. + */ + try = memchr(try, bn[0], (end + 1 - ln) - try); if (try == NULL) { + /* Leading byte not found -> needle cannot be found. */ return -1; } + /* Leading byte found, check rest of needle. */ if (0 == memcmp(try+1, bn+1, ln-1)) { + /* Checks! Return the successful index. */ return (try - bh); } + /* Rest of needle match failed; Iterate to continue search. */ try++; } return -1; } - lh = Tcl_GetCharLength(haystack); - if (haystack->bytes && (lh == haystack->length)) { - /* haystack is all single-byte chars */ - - if (needle->bytes && (ln == needle->length)) { - /* needle is also all single-byte chars */ - char *found = strstr(haystack->bytes + start, needle->bytes); + /* + * TODO: It might be nice to support some cases where it is not + * necessary to shimmer to &tclStringType to compute the result, + * and instead operate just on the objPtr->bytes values directly. + * However, we also do not want the answer to change based on the + * code pathway, or if it does we want that to be for some values + * we explicitly decline to support. Getting there will involve + * locking down in practice more firmly just what encodings produce + * what supported results for the objPtr->bytes values. For now, + * do only the well-defined Tcl_UniChar array search. + */ - if (found) { - return (found - haystack->bytes); - } else { - return -1; - } - } else { - /* - * Cannot find substring with a multi-byte char inside - * a string with no multi-byte chars. - */ - return -1; - } - } else { + { Tcl_UniChar *try, *end, *uh; Tcl_UniChar *un = Tcl_GetUnicodeFromObj(needle, &ln); uh = Tcl_GetUnicodeFromObj(haystack, &lh); end = uh + lh; - try = uh + start; - while (try + ln <= end) { - if ((*try == *un) - && (0 == memcmp(try+1, un+1, (ln-1)*sizeof(Tcl_UniChar)))) { + for (try = uh + start; try + ln <= end; try++) { + if ((*try == *un) && (0 == + memcmp(try + 1, un + 1, (ln-1) * sizeof(Tcl_UniChar)))) { return (try - uh); } - try++; } return -1; } @@ -3169,24 +3344,24 @@ TclStringLast( * We don't find empty substrings. Bizarre! * * TODO: When we one day make this a true substring - * finder, change this to "return 0" + * finder, change this to "return last", after limitation. */ return -1; } - if (ln > last + 1) { + lh = Tcl_GetCharLength(haystack); + if (last >= lh) { + last = lh - 1; + } + + if (last < ln - 1) { return -1; } if (TclIsPureByteArray(needle) && TclIsPureByteArray(haystack)) { - unsigned char *try, *bh; + unsigned char *try, *bh = Tcl_GetByteArrayFromObj(haystack, &lh); unsigned char *bn = Tcl_GetByteArrayFromObj(needle, &ln); - bh = Tcl_GetByteArrayFromObj(haystack, &lh); - - if (last + 1 > lh) { - last = lh - 1; - } try = bh + last + 1 - ln; while (try >= bh) { if ((*try == bn[0]) @@ -3198,38 +3373,10 @@ TclStringLast( return -1; } - lh = Tcl_GetCharLength(haystack); - if (last + 1 > lh) { - last = lh - 1; - } - if (haystack->bytes && (lh == haystack->length)) { - /* haystack is all single-byte chars */ - - if (needle->bytes && (ln == needle->length)) { - /* needle is also all single-byte chars */ - - char *try = haystack->bytes + last + 1 - ln; - while (try >= haystack->bytes) { - if ((*try == needle->bytes[0]) - && (0 == memcmp(try+1, needle->bytes + 1, ln - 1))) { - return (try - haystack->bytes); - } - try--; - } - return -1; - } else { - /* - * Cannot find substring with a multi-byte char inside - * a string with no multi-byte chars. - */ - return -1; - } - } else { - Tcl_UniChar *try, *uh; + { + Tcl_UniChar *try, *uh = Tcl_GetUnicodeFromObj(haystack, &lh); Tcl_UniChar *un = Tcl_GetUnicodeFromObj(needle, &ln); - uh = Tcl_GetUnicodeFromObj(haystack, &lh); - try = uh + last + 1 - ln; while (try >= uh) { if ((*try == un[0]) @@ -3245,14 +3392,14 @@ TclStringLast( /* *--------------------------------------------------------------------------- * - * TclStringObjReverse -- + * TclStringReverse -- * * Implements the [string reverse] operation. * * Results: - * An unshared Tcl value which is the [string reverse] of the argument - * supplied. When sharing rules permit, the returned value might be the - * argument with modifications done in place. + * A Tcl value which is the [string reverse] of the argument supplied. + * When sharing rules permit and the caller requests, the returned value + * might be the argument with modifications done in place. * * Side effects: * May allocate a new Tcl_Obj. @@ -3283,17 +3430,19 @@ ReverseBytes( } Tcl_Obj * -TclStringObjReverse( - Tcl_Obj *objPtr) +TclStringReverse( + Tcl_Obj *objPtr, + int flags) { String *stringPtr; - Tcl_UniChar ch; + Tcl_UniChar ch = 0; + int inPlace = flags & TCL_STRING_IN_PLACE; if (TclIsPureByteArray(objPtr)) { int numBytes; unsigned char *from = Tcl_GetByteArrayFromObj(objPtr, &numBytes); - if (Tcl_IsShared(objPtr)) { + if (!inPlace || Tcl_IsShared(objPtr)) { objPtr = Tcl_NewByteArrayObj(NULL, numBytes); } ReverseBytes(Tcl_GetByteArrayFromObj(objPtr, NULL), from, numBytes); @@ -3307,7 +3456,7 @@ TclStringObjReverse( Tcl_UniChar *from = Tcl_GetUnicode(objPtr); Tcl_UniChar *src = from + stringPtr->numChars; - if (Tcl_IsShared(objPtr)) { + if (!inPlace || Tcl_IsShared(objPtr)) { Tcl_UniChar *to; /* @@ -3315,7 +3464,6 @@ TclStringObjReverse( * Tcl_SetObjLength into growing the unicode rep buffer. */ - ch = 0; objPtr = Tcl_NewUnicodeObj(&ch, 1); Tcl_SetObjLength(objPtr, stringPtr->numChars); to = Tcl_GetUnicode(objPtr); @@ -3337,7 +3485,7 @@ TclStringObjReverse( int numBytes = objPtr->length; char *to, *from = objPtr->bytes; - if (Tcl_IsShared(objPtr)) { + if (!inPlace || Tcl_IsShared(objPtr)) { objPtr = Tcl_NewObj(); Tcl_SetObjLength(objPtr, numBytes); } @@ -3361,7 +3509,7 @@ TclStringObjReverse( * It's part of the contract for objPtr->bytes values. * Thus, we can skip calling Tcl_UtfCharComplete() here. */ - int bytesInChar = Tcl_UtfToUniChar(from, &ch); + int bytesInChar = TclUtfToUniChar(from, &ch); ReverseBytes((unsigned char *)to, (unsigned char *)from, bytesInChar); @@ -3384,6 +3532,150 @@ TclStringObjReverse( /* *--------------------------------------------------------------------------- * + * TclStringReplace -- + * + * Implements the inner engine of the [string replace] command. + * + * The result is a concatenation of a prefix from objPtr, characters + * 0 through first-1, the insertPtr string value, and a suffix from + * objPtr, characters from first + count to the end. The effect is + * as if the inner substring of characters first through first+count-1 + * are removed and replaced with insertPtr. + * If insertPtr is NULL, it is treated as an empty string. + * When passed the flag TCL_STRING_IN_PLACE, this routine will try + * to do the work within objPtr, so long as no sharing forbids it. + * Without that request, or as needed, a new Tcl value will be allocated + * to be the result. + * + * Results: + * A Tcl value that is the result of the substring replacement. + * May return NULL in case of an error. When NULL is returned and + * interp is non-NULL, error information is left in interp + * + *--------------------------------------------------------------------------- + */ + +Tcl_Obj * +TclStringReplace( + Tcl_Interp *interp, /* For error reporting, may be NULL */ + Tcl_Obj *objPtr, /* String to act upon */ + int first, /* First index to replace */ + int count, /* How many chars to replace */ + Tcl_Obj *insertPtr, /* Replacement string, may be NULL */ + int flags) /* TCL_STRING_IN_PLACE => attempt in-place */ +{ + int inPlace = flags & TCL_STRING_IN_PLACE; + Tcl_Obj *result; + + /* Caller is expected to pass sensible arguments */ + assert ( count >= 0 ) ; + assert ( first >= 0 ) ; + + /* Replace nothing with nothing */ + if ((insertPtr == NULL) && (count == 0)) { + if (inPlace) { + return objPtr; + } else { + return Tcl_DuplicateObj(objPtr); + } + } + + /* + * The caller very likely had to call Tcl_GetCharLength() or similar + * to be able to process index values. This means it is like that + * objPtr is either a proper "bytearray" or a "string" or else it has + * a known and short string rep. + */ + + if (TclIsPureByteArray(objPtr)) { + int numBytes; + unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &numBytes); + + if (insertPtr == NULL) { + /* Replace something with nothing. */ + + assert ( first <= numBytes ) ; + assert ( count <= numBytes ) ; + assert ( first + count <= numBytes ) ; + + result = Tcl_NewByteArrayObj(NULL, numBytes - count);/* PANIC? */ + TclAppendBytesToByteArray(result, bytes, first); + TclAppendBytesToByteArray(result, bytes + first + count, + numBytes - count - first); + return result; + } + + /* Replace everything */ + if ((first == 0) && (count == numBytes)) { + return insertPtr; + } + + if (TclIsPureByteArray(insertPtr)) { + int newBytes; + unsigned char *iBytes + = Tcl_GetByteArrayFromObj(insertPtr, &newBytes); + + if (count == newBytes && inPlace && !Tcl_IsShared(objPtr)) { + /* + * Removal count and replacement count are equal. + * Other conditions permit. Do in-place splice. + */ + + memcpy(bytes + first, iBytes, count); + Tcl_InvalidateStringRep(objPtr); + return objPtr; + } + + if (newBytes > INT_MAX - (numBytes - count)) { + if (interp) { + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "max size for a Tcl value (%d bytes) exceeded", + INT_MAX)); + Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); + } + return NULL; + } + result = Tcl_NewByteArrayObj(NULL, numBytes - count + newBytes); + /* PANIC? */ + TclAppendBytesToByteArray(result, bytes, first); + TclAppendBytesToByteArray(result, iBytes, newBytes); + TclAppendBytesToByteArray(result, bytes + first + count, + numBytes - count - first); + return result; + } + + /* Flow through to try other approaches below */ + } + + /* + * TODO: Figure out how not to generate a Tcl_UniChar array rep + * when it can be determined objPtr->bytes points to a string of + * all single-byte characters so we can index it directly. + */ + + /* The traditional implementation... */ + { + int numChars; + Tcl_UniChar *ustring = Tcl_GetUnicodeFromObj(objPtr, &numChars); + + /* TODO: Is there an in-place option worth pursuing here? */ + + result = Tcl_NewUnicodeObj(ustring, first); + if (insertPtr) { + Tcl_AppendObjToObj(result, insertPtr); + } + if (first + count < numChars) { + Tcl_AppendUnicodeToObj(result, ustring + first + count, + numChars - first - count); + } + + return result; + } +} + +/* + *--------------------------------------------------------------------------- + * * FillUnicodeRep -- * * Populate the Unicode internal rep with the Unicode form of its string @@ -3418,7 +3710,7 @@ ExtendUnicodeRepWithString( { String *stringPtr = GET_STRING(objPtr); int needed, numOrigChars = 0; - Tcl_UniChar *dst; + Tcl_UniChar *dst, unichar = 0; if (stringPtr->hasUnicode) { numOrigChars = stringPtr->numChars; @@ -3441,7 +3733,8 @@ ExtendUnicodeRepWithString( numAppendChars = 0; } for (dst=stringPtr->unicode + numOrigChars; numAppendChars-- > 0; dst++) { - bytes += TclUtfToUniChar(bytes, dst); + bytes += TclUtfToUniChar(bytes, &unichar); + *dst = unichar; } *dst = 0; } |
