summaryrefslogtreecommitdiffstats
path: root/generic/tclStringObj.c
diff options
context:
space:
mode:
Diffstat (limited to 'generic/tclStringObj.c')
-rw-r--r--generic/tclStringObj.c771
1 files changed, 532 insertions, 239 deletions
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index c45baa1..9913160 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -38,6 +38,7 @@
#include "tommath.h"
#include "tclStringRep.h"
+#include "assert.h"
/*
* Prototypes for functions defined later in this file:
*/
@@ -140,8 +141,8 @@ GrowStringBuffer(
objPtr->bytes = NULL;
}
if (flag == 0 || stringPtr->allocated > 0) {
- attempt = 2 * needed;
- if (attempt >= 0) {
+ if (needed <= INT_MAX / 2) {
+ attempt = 2 * needed;
ptr = attemptckrealloc(objPtr->bytes, attempt + 1);
}
if (ptr == NULL) {
@@ -190,8 +191,8 @@ GrowUnicodeBuffer(
* Subsequent appends - apply the growth algorithm.
*/
- attempt = 2 * needed;
- if (attempt >= 0 && attempt <= STRING_MAXCHARS) {
+ if (needed <= STRING_MAXCHARS / 2) {
+ attempt = 2 * needed;
ptr = stringAttemptRealloc(stringPtr, attempt);
}
if (ptr == NULL) {
@@ -418,9 +419,14 @@ Tcl_GetCharLength(
}
/*
- * Optimize the case where we're really dealing with a bytearray object
- * without string representation; we don't need to convert to a string to
- * perform the get-length operation.
+ * Optimize the case where we're really dealing with a bytearray object;
+ * we don't need to convert to a string to perform the get-length operation.
+ *
+ * Starting in Tcl 8.7, we check for a "pure" bytearray, because the
+ * machinery behind that test is using a proper bytearray ObjType. We
+ * could also compute length of an improper bytearray without shimmering
+ * but there's no value in that. We *want* to shimmer an improper bytearray
+ * because improper bytearrays have worthless internal reps.
*/
if (TclIsPureByteArray(objPtr)) {
@@ -476,8 +482,7 @@ Tcl_GetUniChar(
/*
* Optimize the case where we're really dealing with a bytearray object
- * without string representation; we don't need to convert to a string to
- * perform the indexing operation.
+ * we don't need to convert to a string to perform the indexing operation.
*/
if (TclIsPureByteArray(objPtr)) {
@@ -517,7 +522,7 @@ Tcl_GetUniChar(
*
* Get the Unicode form of the String object. If the object is not
* already a String object, it will be converted to one. If the String
- * object does not have a Unicode rep, then one is create from the UTF
+ * object does not have a Unicode rep, then one is created from the UTF
* string format.
*
* Results:
@@ -610,8 +615,7 @@ Tcl_GetRange(
/*
* Optimize the case where we're really dealing with a bytearray object
- * without string representation; we don't need to convert to a string to
- * perform the substring operation.
+ * we don't need to convert to a string to perform the substring operation.
*/
if (TclIsPureByteArray(objPtr)) {
@@ -651,6 +655,17 @@ Tcl_GetRange(
stringPtr = GET_STRING(objPtr);
}
+#if TCL_UTF_MAX == 4
+ /* See: bug [11ae2be95dac9417] */
+ if ((first>0) && ((stringPtr->unicode[first]&0xFC00) == 0xDC00)
+ && ((stringPtr->unicode[first-1]&0xFC00) == 0xD800)) {
+ ++first;
+ }
+ if ((last+1<stringPtr->numChars) && ((stringPtr->unicode[last+1]&0xFC00) == 0xDC00)
+ && ((stringPtr->unicode[last]&0xFC00) == 0xD800)) {
+ ++last;
+ }
+#endif
return Tcl_NewUnicodeObj(stringPtr->unicode + first, last-first+1);
}
@@ -1208,9 +1223,9 @@ Tcl_AppendObjToObj(
/*
* Handle append of one bytearray object to another as a special case.
- * Note that we only do this when the objects don't have string reps; if
- * it did, then appending the byte arrays together could well lose
- * information; this is a special-case optimization only.
+ * Note that we only do this when the objects are pure so that the
+ * bytearray faithfully represent the true value; Otherwise
+ * appending the byte arrays together could lose information;
*/
if ((TclIsPureByteArray(objPtr) || objPtr->bytes == &tclEmptyString)
@@ -1650,6 +1665,7 @@ Tcl_AppendFormatToObj(
const char *span = format, *msg, *errCode;
int numBytes = 0, objIndex = 0, gotXpg = 0, gotSequential = 0;
int originalLength, limit;
+ Tcl_UniChar ch = 0;
static const char *mixedXPG =
"cannot mix \"%\" and \"%n$\" conversion specifiers";
static const char *const badIndex[2] = {
@@ -1670,12 +1686,14 @@ Tcl_AppendFormatToObj(
while (*format != '\0') {
char *end;
- int gotMinus, gotHash, gotZero, gotSpace, gotPlus, sawFlag;
- int width, gotPrecision, precision, useShort, useWide, useBig;
+ int gotMinus = 0, gotHash = 0, gotZero = 0, gotSpace = 0, gotPlus = 0;
+ int width, gotPrecision, precision, sawFlag, useShort = 0, useBig = 0;
+#ifndef TCL_WIDE_INT_IS_LONG
+ int useWide = 0;
+#endif
int newXpg, numChars, allocSegment = 0, segmentLimit, segmentNumBytes;
Tcl_Obj *segment;
- Tcl_UniChar ch;
- int step = Tcl_UtfToUniChar(format, &ch);
+ int step = TclUtfToUniChar(format, &ch);
format += step;
if (ch != '%') {
@@ -1699,7 +1717,7 @@ Tcl_AppendFormatToObj(
* Step 0. Handle special case of escaped format marker (i.e., %%).
*/
- step = Tcl_UtfToUniChar(format, &ch);
+ step = TclUtfToUniChar(format, &ch);
if (ch == '%') {
span = format;
numBytes = step;
@@ -1719,7 +1737,7 @@ Tcl_AppendFormatToObj(
newXpg = 1;
objIndex = position - 1;
format = end + 1;
- step = Tcl_UtfToUniChar(format, &ch);
+ step = TclUtfToUniChar(format, &ch);
}
}
if (newXpg) {
@@ -1747,7 +1765,6 @@ Tcl_AppendFormatToObj(
* Step 2. Set of flags.
*/
- gotMinus = gotHash = gotZero = gotSpace = gotPlus = 0;
sawFlag = 1;
do {
switch (ch) {
@@ -1771,7 +1788,7 @@ Tcl_AppendFormatToObj(
}
if (sawFlag) {
format += step;
- step = Tcl_UtfToUniChar(format, &ch);
+ step = TclUtfToUniChar(format, &ch);
}
} while (sawFlag);
@@ -1783,7 +1800,7 @@ Tcl_AppendFormatToObj(
if (isdigit(UCHAR(ch))) {
width = strtoul(format, &end, 10);
format = end;
- step = Tcl_UtfToUniChar(format, &ch);
+ step = TclUtfToUniChar(format, &ch);
} else if (ch == '*') {
if (objIndex >= objc - 1) {
msg = badIndex[gotXpg];
@@ -1799,7 +1816,7 @@ Tcl_AppendFormatToObj(
}
objIndex++;
format += step;
- step = Tcl_UtfToUniChar(format, &ch);
+ step = TclUtfToUniChar(format, &ch);
}
if (width > limit) {
msg = overflow;
@@ -1815,12 +1832,12 @@ Tcl_AppendFormatToObj(
if (ch == '.') {
gotPrecision = 1;
format += step;
- step = Tcl_UtfToUniChar(format, &ch);
+ step = TclUtfToUniChar(format, &ch);
}
if (isdigit(UCHAR(ch))) {
precision = strtoul(format, &end, 10);
format = end;
- step = Tcl_UtfToUniChar(format, &ch);
+ step = TclUtfToUniChar(format, &ch);
} else if (ch == '*') {
if (objIndex >= objc - 1) {
msg = badIndex[gotXpg];
@@ -1841,37 +1858,46 @@ Tcl_AppendFormatToObj(
}
objIndex++;
format += step;
- step = Tcl_UtfToUniChar(format, &ch);
+ step = TclUtfToUniChar(format, &ch);
}
/*
* Step 5. Length modifier.
*/
- useShort = useWide = useBig = 0;
if (ch == 'h') {
useShort = 1;
format += step;
- step = Tcl_UtfToUniChar(format, &ch);
+ step = TclUtfToUniChar(format, &ch);
} else if (ch == 'l') {
format += step;
- step = Tcl_UtfToUniChar(format, &ch);
+ step = TclUtfToUniChar(format, &ch);
if (ch == 'l') {
useBig = 1;
format += step;
- step = Tcl_UtfToUniChar(format, &ch);
+ step = TclUtfToUniChar(format, &ch);
#ifndef TCL_WIDE_INT_IS_LONG
} else {
useWide = 1;
#endif
}
- } else if ((ch == 'I') && (format[1] == '6') && (format[2] == '4')) {
- format += (step + 2);
- step = Tcl_UtfToUniChar(format, &ch);
- useBig = 1;
- } else if (ch == 'L') {
+ } else if (ch == 'I') {
+ if ((format[1] == '6') && (format[2] == '4')) {
+ format += (step + 2);
+ step = TclUtfToUniChar(format, &ch);
+#ifndef TCL_WIDE_INT_IS_LONG
+ useWide = 1;
+#endif
+ } else if ((format[1] == '3') && (format[2] == '2')) {
+ format += (step + 2);
+ step = TclUtfToUniChar(format, &ch);
+ } else {
+ format += step;
+ step = TclUtfToUniChar(format, &ch);
+ }
+ } else if ((ch == 't') || (ch == 'z') || (ch == 'q') || (ch == 'j') || (ch == 'L')) {
format += step;
- step = Tcl_UtfToUniChar(format, &ch);
+ step = TclUtfToUniChar(format, &ch);
useBig = 1;
}
@@ -1918,13 +1944,9 @@ Tcl_AppendFormatToObj(
}
case 'u':
- if (useBig) {
- msg = "unsigned bignum format is invalid";
- errCode = "BADUNSIGNED";
- goto errorMsg;
- }
case 'd':
case 'o':
+ case 'p':
case 'x':
case 'X':
case 'b': {
@@ -1935,13 +1957,31 @@ Tcl_AppendFormatToObj(
mp_int big;
int toAppend, isNegative = 0;
+#ifndef TCL_WIDE_INT_IS_LONG
+ if (ch == 'p') {
+ useWide = 1;
+ }
+#endif
if (useBig) {
+ int cmpResult;
if (Tcl_GetBignumFromObj(interp, segment, &big) != TCL_OK) {
goto error;
}
- isNegative = (mp_cmp_d(&big, 0) == MP_LT);
+ cmpResult = mp_cmp_d(&big, 0);
+ isNegative = (cmpResult == MP_LT);
+ if (cmpResult == MP_EQ) gotHash = 0;
+ if (ch == 'u') {
+ if (isNegative) {
+ msg = "unsigned bignum format is invalid";
+ errCode = "BADUNSIGNED";
+ goto errorMsg;
+ } else {
+ ch = 'd';
+ }
+ }
+#ifndef TCL_WIDE_INT_IS_LONG
} else if (useWide) {
- if (Tcl_GetWideIntFromObj(NULL, segment, &w) != TCL_OK) {
+ if (TclGetWideIntFromObj(NULL, segment, &w) != TCL_OK) {
Tcl_Obj *objPtr;
if (Tcl_GetBignumFromObj(interp,segment,&big) != TCL_OK) {
@@ -1950,12 +1990,14 @@ Tcl_AppendFormatToObj(
mp_mod_2d(&big, (int) CHAR_BIT*sizeof(Tcl_WideInt), &big);
objPtr = Tcl_NewBignumObj(&big);
Tcl_IncrRefCount(objPtr);
- Tcl_GetWideIntFromObj(NULL, objPtr, &w);
+ TclGetWideIntFromObj(NULL, objPtr, &w);
Tcl_DecrRefCount(objPtr);
}
isNegative = (w < (Tcl_WideInt) 0);
+ if (w == (Tcl_WideInt) 0) gotHash = 0;
+#endif
} else if (TclGetLongFromObj(NULL, segment, &l) != TCL_OK) {
- if (Tcl_GetWideIntFromObj(NULL, segment, &w) != TCL_OK) {
+ if (TclGetWideIntFromObj(NULL, segment, &w) != TCL_OK) {
Tcl_Obj *objPtr;
if (Tcl_GetBignumFromObj(interp,segment,&big) != TCL_OK) {
@@ -1972,14 +2014,18 @@ Tcl_AppendFormatToObj(
if (useShort) {
s = (short) l;
isNegative = (s < (short) 0);
+ if (s == (short) 0) gotHash = 0;
} else {
isNegative = (l < (long) 0);
+ if (l == (long) 0) gotHash = 0;
}
} else if (useShort) {
s = (short) l;
isNegative = (s < (short) 0);
+ if (s == (short) 0) gotHash = 0;
} else {
isNegative = (l < (long) 0);
+ if (l == (long) 0) gotHash = 0;
}
segment = Tcl_NewObj();
@@ -1993,13 +2039,13 @@ Tcl_AppendFormatToObj(
segmentLimit -= 1;
}
- if (gotHash) {
+ if (gotHash || (ch == 'p')) {
switch (ch) {
case 'o':
- Tcl_AppendToObj(segment, "0", 1);
- segmentLimit -= 1;
- precision--;
+ Tcl_AppendToObj(segment, "0o", 2);
+ segmentLimit -= 2;
break;
+ case 'p':
case 'x':
case 'X':
Tcl_AppendToObj(segment, "0x", 2);
@@ -2009,6 +2055,14 @@ Tcl_AppendFormatToObj(
Tcl_AppendToObj(segment, "0b", 2);
segmentLimit -= 2;
break;
+#if TCL_MAJOR_VERSION < 9
+ case 'd':
+ if (gotZero) {
+ Tcl_AppendToObj(segment, "0d", 2);
+ segmentLimit -= 2;
+ }
+ break;
+#endif
}
}
@@ -2020,8 +2074,10 @@ Tcl_AppendFormatToObj(
if (useShort) {
pure = Tcl_NewIntObj((int) s);
+#ifndef TCL_WIDE_INT_IS_LONG
} else if (useWide) {
pure = Tcl_NewWideIntObj(w);
+#endif
} else if (useBig) {
pure = Tcl_NewBignumObj(&big);
} else {
@@ -2078,6 +2134,7 @@ Tcl_AppendFormatToObj(
case 'u':
case 'o':
+ case 'p':
case 'x':
case 'X':
case 'b': {
@@ -2104,6 +2161,7 @@ Tcl_AppendFormatToObj(
numDigits++;
us /= base;
}
+#ifndef TCL_WIDE_INT_IS_LONG
} else if (useWide) {
Tcl_WideUInt uw = (Tcl_WideUInt) w;
@@ -2112,6 +2170,7 @@ Tcl_AppendFormatToObj(
numDigits++;
uw /= base;
}
+#endif
} else if (useBig && big.used) {
int leftover = (big.used * DIGIT_BIT) % numBits;
mp_digit mask = (~(mp_digit)0) << (DIGIT_BIT-leftover);
@@ -2141,7 +2200,7 @@ Tcl_AppendFormatToObj(
* Need to be sure zero becomes "0", not "".
*/
- if ((numDigits == 0) && !((ch == 'o') && gotHash)) {
+ if (numDigits == 0) {
numDigits = 1;
}
pure = Tcl_NewObj();
@@ -2161,7 +2220,11 @@ Tcl_AppendFormatToObj(
}
digitOffset = (int) (bits % base);
if (digitOffset > 9) {
- bytes[numDigits] = 'a' + digitOffset - 10;
+ if (ch == 'X') {
+ bytes[numDigits] = 'A' + digitOffset - 10;
+ } else {
+ bytes[numDigits] = 'a' + digitOffset - 10;
+ }
} else {
bytes[numDigits] = '0' + digitOffset;
}
@@ -2204,6 +2267,8 @@ Tcl_AppendFormatToObj(
break;
}
+ case 'a':
+ case 'A':
case 'e':
case 'E':
case 'f':
@@ -2272,6 +2337,12 @@ Tcl_AppendFormatToObj(
errCode = "OVERFLOW";
goto errorMsg;
}
+ if (ch == 'A') {
+ char *p = TclGetString(segment) + 1;
+ *p = 'x';
+ p = strchr(p, 'P');
+ if (p) *p = 'p';
+ }
break;
}
default:
@@ -2283,14 +2354,6 @@ Tcl_AppendFormatToObj(
goto error;
}
- switch (ch) {
- case 'E':
- case 'G':
- case 'X': {
- Tcl_SetObjLength(segment, Tcl_UtfToUpper(TclGetString(segment)));
- }
- }
-
if (width>0 && numChars<0) {
numChars = Tcl_GetCharLength(segment);
}
@@ -2467,6 +2530,7 @@ AppendPrintfToObjVA(
case 'u':
case 'd':
case 'o':
+ case 'p':
case 'x':
case 'X':
seekingConversion = 0;
@@ -2484,15 +2548,26 @@ AppendPrintfToObjVA(
Tcl_ListObjAppendElement(NULL, list, Tcl_NewWideIntObj(
va_arg(argList, Tcl_WideInt)));
break;
+ case 3:
+ Tcl_ListObjAppendElement(NULL, list, Tcl_NewBignumObj(
+ va_arg(argList, mp_int *)));
+ break;
}
break;
+ case 'a':
+ case 'A':
case 'e':
case 'E':
case 'f':
case 'g':
case 'G':
+ if (size > 0) {
Tcl_ListObjAppendElement(NULL, list, Tcl_NewDoubleObj(
- va_arg(argList, double)));
+ (double)va_arg(argList, long double)));
+ } else {
+ Tcl_ListObjAppendElement(NULL, list, Tcl_NewDoubleObj(
+ va_arg(argList, double)));
+ }
seekingConversion = 0;
break;
case '*':
@@ -2512,12 +2587,19 @@ AppendPrintfToObjVA(
gotPrecision = 1;
p++;
break;
- /* TODO: support for bignum arguments */
case 'l':
++size;
p++;
break;
- case 'L':
+ case 't':
+ case 'z':
+ if (sizeof(size_t) == sizeof(Tcl_WideInt)) {
+ size = 2;
+ }
+ p++;
+ break;
+ case 'j':
+ case 'q':
size = 2;
p++;
break;
@@ -2525,9 +2607,17 @@ AppendPrintfToObjVA(
if (p[1]=='6' && p[2]=='4') {
p += 2;
size = 2;
+ } else if (p[1]=='3' && p[2]=='2') {
+ p += 2;
+ } else if (sizeof(size_t) == sizeof(Tcl_WideInt)) {
+ size = 2;
}
p++;
break;
+ case 'L':
+ size = 3;
+ p++;
+ break;
case 'h':
size = -1;
default:
@@ -2641,23 +2731,24 @@ TclGetStringStorage(
* Performs the [string repeat] function.
*
* Results:
- * A standard Tcl result.
+ * A (Tcl_Obj *) pointing to the result value, or NULL in case of an
+ * error.
*
* Side effects:
- * Writes to *objPtrPtr the address of Tcl_Obj that is concatenation
- * of count copies of the value in objPtr.
+ * On error, when interp is not NULL, error information is left in it.
*
*---------------------------------------------------------------------------
*/
-int
+Tcl_Obj *
TclStringRepeat(
Tcl_Interp *interp,
Tcl_Obj *objPtr,
int count,
- Tcl_Obj **objPtrPtr)
+ int flags)
{
Tcl_Obj *objResultPtr;
+ int inPlace = flags & TCL_STRING_IN_PLACE;
int length = 0, unichar = 0, done = 1;
int binary = TclIsPureByteArray(objPtr);
@@ -2692,8 +2783,7 @@ TclStringRepeat(
if (length == 0) {
/* Any repeats of empty is empty. */
- *objPtrPtr = objPtr;
- return TCL_OK;
+ return objPtr;
}
if (count > INT_MAX/length) {
@@ -2702,13 +2792,13 @@ TclStringRepeat(
"max size for a Tcl value (%d bytes) exceeded", INT_MAX));
Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL);
}
- return TCL_ERROR;
+ return NULL;
}
if (binary) {
/* Efficiently produce a pure byte array result */
- objResultPtr = Tcl_IsShared(objPtr) ? Tcl_DuplicateObj(objPtr)
- : objPtr;
+ objResultPtr = (!inPlace || Tcl_IsShared(objPtr)) ?
+ Tcl_DuplicateObj(objPtr) : objPtr;
Tcl_SetByteArrayLength(objResultPtr, count*length); /* PANIC? */
Tcl_SetByteArrayLength(objResultPtr, length);
@@ -2721,7 +2811,7 @@ TclStringRepeat(
(count - done) * length);
} else if (unichar) {
/* Efficiently produce a pure Tcl_UniChar array result */
- if (Tcl_IsShared(objPtr)) {
+ if (!inPlace || Tcl_IsShared(objPtr)) {
objResultPtr = Tcl_NewUnicodeObj(Tcl_GetUnicode(objPtr), length);
} else {
TclInvalidateStringRep(objPtr);
@@ -2732,11 +2822,11 @@ TclStringRepeat(
if (interp) {
Tcl_SetObjResult(interp, Tcl_ObjPrintf(
"string size overflow: unable to alloc %"
- TCL_LL_MODIFIER "u bytes",
- (Tcl_WideUInt)STRING_SIZE(count*length)));
+ TCL_Z_MODIFIER "u bytes",
+ STRING_SIZE(count*length)));
Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL);
}
- return TCL_ERROR;
+ return NULL;
}
Tcl_SetObjLength(objResultPtr, length);
while (count - done > done) {
@@ -2747,7 +2837,7 @@ TclStringRepeat(
(count - done) * length);
} else {
/* Efficiently concatenate string reps */
- if (Tcl_IsShared(objPtr)) {
+ if (!inPlace || Tcl_IsShared(objPtr)) {
objResultPtr = Tcl_NewStringObj(Tcl_GetString(objPtr), length);
} else {
TclFreeIntRep(objPtr);
@@ -2760,7 +2850,7 @@ TclStringRepeat(
count*length));
Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL);
}
- return TCL_ERROR;
+ return NULL;
}
Tcl_SetObjLength(objResultPtr, length);
while (count - done > done) {
@@ -2770,40 +2860,48 @@ TclStringRepeat(
Tcl_AppendToObj(objResultPtr, Tcl_GetString(objResultPtr),
(count - done) * length);
}
- *objPtrPtr = objResultPtr;
- return TCL_OK;
+ return objResultPtr;
}
/*
*---------------------------------------------------------------------------
*
- * TclStringCatObjv --
+ * TclStringCat --
*
* Performs the [string cat] function.
*
* Results:
- * A standard Tcl result.
+ * A (Tcl_Obj *) pointing to the result value, or NULL in case of an
+ * error.
*
* Side effects:
- * Writes to *objPtrPtr the address of Tcl_Obj that is concatenation
- * of all objc values in objv.
+ * On error, when interp is not NULL, error information is left in it.
*
*---------------------------------------------------------------------------
*/
-int
-TclStringCatObjv(
+Tcl_Obj *
+TclStringCat(
Tcl_Interp *interp,
- int inPlace,
int objc,
Tcl_Obj * const objv[],
- Tcl_Obj **objPtrPtr)
+ int flags)
{
- Tcl_Obj *objPtr, *objResultPtr, * const *ov;
- int oc, length = 0, binary = 1, first = 0;
+ Tcl_Obj *objResultPtr, * const *ov;
+ int oc, length = 0, binary = 1;
int allowUniChar = 1, requestUniChar = 0;
+ int first = objc - 1; /* Index of first value possibly not empty */
+ int last = 0; /* Index of last value possibly not empty */
+ int inPlace = flags & TCL_STRING_IN_PLACE;
- /* assert (objc >= 2) */
+ /* assert ( objc >= 0 ) */
+
+ if (objc <= 1) {
+ /* Only one or no objects; return first or empty */
+ return objc ? objv[0] : Tcl_NewObj();
+ }
+
+ /* assert ( objc >= 2 ) */
/*
* Analyze to determine what representation result should be.
@@ -2813,10 +2911,12 @@ TclStringCatObjv(
*/
ov = objv, oc = objc;
- while (oc-- && (binary || allowUniChar)) {
- objPtr = *ov++;
+ do {
+ Tcl_Obj *objPtr = *ov++;
- if (objPtr->bytes) {
+ if (TclIsPureByteArray(objPtr)) {
+ allowUniChar = 0;
+ } else if (objPtr->bytes) {
/* Value has a string rep. */
if (objPtr->length) {
/*
@@ -2831,85 +2931,152 @@ TclStringCatObjv(
}
} else {
/* assert (objPtr->typePtr != NULL) -- stork! */
- if (TclIsPureByteArray(objPtr)) {
- allowUniChar = 0;
+ binary = 0;
+ if (objPtr->typePtr == &tclStringType) {
+ /* Have a pure Unicode value; ask to preserve it */
+ requestUniChar = 1;
} else {
- binary = 0;
- if (objPtr->typePtr == &tclStringType) {
- /* Have a pure Unicode value; ask to preserve it */
- requestUniChar = 1;
- } else {
- /* Have another type; prevent shimmer */
- allowUniChar = 0;
- }
+ /* Have another type; prevent shimmer */
+ allowUniChar = 0;
}
}
- }
+ } while (--oc && (binary || allowUniChar));
if (binary) {
/* Result will be pure byte array. Pre-size it */
ov = objv; oc = objc;
- while (oc-- && (length >= 0)) {
- objPtr = *ov++;
+ do {
+ Tcl_Obj *objPtr = *ov++;
if (objPtr->bytes == NULL) {
int numBytes;
Tcl_GetByteArrayFromObj(objPtr, &numBytes); /* PANIC? */
- if (length == 0) {
- first = objc - oc - 1;
+ if (numBytes) {
+ last = objc - oc;
+ if (length == 0) {
+ first = last;
+ } else if (numBytes > INT_MAX - length) {
+ goto overflow;
+ }
+ length += numBytes;
}
- length += numBytes;
}
- }
+ } while (--oc);
} else if (allowUniChar && requestUniChar) {
/* Result will be pure Tcl_UniChar array. Pre-size it. */
ov = objv; oc = objc;
- while (oc-- && (length >= 0)) {
- objPtr = *ov++;
+ do {
+ Tcl_Obj *objPtr = *ov++;
if ((objPtr->bytes == NULL) || (objPtr->length)) {
int numChars;
Tcl_GetUnicodeFromObj(objPtr, &numChars); /* PANIC? */
- if (length == 0) {
- first = objc - oc - 1;
+ if (numChars) {
+ last = objc - oc;
+ if (length == 0) {
+ first = last;
+ } else if (numChars > INT_MAX - length) {
+ goto overflow;
+ }
+ length += numChars;
}
- length += numChars;
}
- }
+ } while (--oc);
} else {
/* Result will be concat of string reps. Pre-size it. */
ov = objv; oc = objc;
- while (oc-- && (length >= 0)) {
- int numBytes;
+ do {
+ Tcl_Obj *pendingPtr = NULL;
+
+ /*
+ * Loop until a possibly non-empty value is reached.
+ * Keep string rep generation pending when possible.
+ */
- objPtr = *ov++;
+ do {
+ /* assert ( pendingPtr == NULL ) */
+ /* assert ( length == 0 ) */
- Tcl_GetStringFromObj(objPtr, &numBytes); /* PANIC? */
- if ((length == 0) && numBytes) {
- first = objc - oc - 1;
+ Tcl_Obj *objPtr = *ov++;
+
+ if (objPtr->bytes == NULL) {
+ /* No string rep; Take the chance we can avoid making it */
+ pendingPtr = objPtr;
+ } else {
+ Tcl_GetStringFromObj(objPtr, &length); /* PANIC? */
+ }
+ } while (--oc && (length == 0) && (pendingPtr == NULL));
+
+ /*
+ * Either we found a possibly non-empty value, and we
+ * remember this index as the first and last such value so
+ * far seen, or (oc == 0) and all values are known empty,
+ * so first = last = objc - 1 signals the right quick return.
+ */
+
+ first = last = objc - oc - 1;
+
+ if (oc && (length == 0)) {
+ int numBytes;
+
+ /* assert ( pendingPtr != NULL ) */
+
+ /*
+ * There's a pending value followed by more values.
+ * Loop over remaining values generating strings until
+ * a non-empty value is found, or the pending value gets
+ * its string generated.
+ */
+
+ do {
+ Tcl_Obj *objPtr = *ov++;
+ Tcl_GetStringFromObj(objPtr, &numBytes); /* PANIC? */
+ } while (--oc && numBytes == 0 && pendingPtr->bytes == NULL);
+
+ if (numBytes) {
+ last = objc -oc -1;
+ }
+ if (oc || numBytes) {
+ Tcl_GetStringFromObj(pendingPtr, &length);
+ }
+ if (length == 0) {
+ if (numBytes) {
+ first = last;
+ }
+ } else if (numBytes > INT_MAX - length) {
+ goto overflow;
+ }
+ length += numBytes;
}
- length += numBytes;
- }
- }
+ } while (oc && (length == 0));
- if (length < 0) {
- if (interp) {
- Tcl_SetObjResult(interp, Tcl_ObjPrintf(
- "max size for a Tcl value (%d bytes) exceeded", INT_MAX));
- Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL);
+ while (oc) {
+ int numBytes;
+ Tcl_Obj *objPtr = *ov++;
+
+ /* assert ( length > 0 && pendingPtr == NULL ) */
+
+ Tcl_GetStringFromObj(objPtr, &numBytes); /* PANIC? */
+ if (numBytes) {
+ last = objc - oc;
+ if (numBytes > INT_MAX - length) {
+ goto overflow;
+ }
+ length += numBytes;
+ }
+ --oc;
}
- return TCL_ERROR;
}
- if (length == 0) {
- /* Total length of zero means every value has length zero */
- *objPtrPtr = objv[0];
- return TCL_OK;
+ if (last <= first /*|| length == 0 */) {
+ /* Only one non-empty value or zero length; return first */
+ /* NOTE: (length == 0) implies (last <= first) */
+ return objv[first];
}
- objv += first; objc -= first;
+ objv += first; objc = (last - first + 1);
if (binary) {
/* Efficiently produce a pure byte array result */
@@ -2956,11 +3123,11 @@ TclStringCatObjv(
if (interp) {
Tcl_SetObjResult(interp, Tcl_ObjPrintf(
"concatenation failed: unable to alloc %"
- TCL_LL_MODIFIER "u bytes",
- (Tcl_WideUInt)STRING_SIZE(length)));
+ TCL_Z_MODIFIER "u bytes",
+ STRING_SIZE(length)));
Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL);
}
- return TCL_ERROR;
+ return NULL;
}
dst = Tcl_GetUnicode(objResultPtr) + start;
} else {
@@ -2969,14 +3136,15 @@ TclStringCatObjv(
/* Ugly interface! No scheme to init array size. */
objResultPtr = Tcl_NewUnicodeObj(&ch, 0); /* PANIC? */
if (0 == Tcl_AttemptSetObjLength(objResultPtr, length)) {
+ Tcl_DecrRefCount(objResultPtr);
if (interp) {
Tcl_SetObjResult(interp, Tcl_ObjPrintf(
"concatenation failed: unable to alloc %"
- TCL_LL_MODIFIER "u bytes",
- (Tcl_WideUInt)STRING_SIZE(length)));
+ TCL_Z_MODIFIER "u bytes",
+ STRING_SIZE(length)));
Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL);
}
- return TCL_ERROR;
+ return NULL;
}
dst = Tcl_GetUnicode(objResultPtr);
}
@@ -3007,22 +3175,23 @@ TclStringCatObjv(
length));
Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL);
}
- return TCL_ERROR;
+ return NULL;
}
dst = Tcl_GetString(objResultPtr) + start;
- if (length > start) {
- TclFreeIntRep(objResultPtr);
- }
+
+ /* assert ( length > start ) */
+ TclFreeIntRep(objResultPtr);
} else {
objResultPtr = Tcl_NewObj(); /* PANIC? */
if (0 == Tcl_AttemptSetObjLength(objResultPtr, length)) {
+ Tcl_DecrRefCount(objResultPtr);
if (interp) {
Tcl_SetObjResult(interp, Tcl_ObjPrintf(
"concatenation failed: unable to alloc %u bytes",
length));
Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL);
}
- return TCL_ERROR;
+ return NULL;
}
dst = Tcl_GetString(objResultPtr);
}
@@ -3037,14 +3206,21 @@ TclStringCatObjv(
}
}
}
- *objPtrPtr = objResultPtr;
- return TCL_OK;
+ return objResultPtr;
+
+ overflow:
+ if (interp) {
+ Tcl_SetObjResult(interp, Tcl_ObjPrintf(
+ "max size for a Tcl value (%d bytes) exceeded", INT_MAX));
+ Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL);
+ }
+ return NULL;
}
/*
*---------------------------------------------------------------------------
*
- * TclStringFind --
+ * TclStringFirst --
*
* Implements the [string first] operation.
*
@@ -3060,20 +3236,20 @@ TclStringCatObjv(
*/
int
-TclStringFind(
+TclStringFirst(
Tcl_Obj *needle,
Tcl_Obj *haystack,
int start)
{
int lh, ln = Tcl_GetCharLength(needle);
+ if (start < 0) {
+ start = 0;
+ }
if (ln == 0) {
- /*
- * We don't find empty substrings. Bizarre!
- *
- * TODO: When we one day make this a true substring
- * finder, change this to "return 0"
- */
+ /* We don't find empty substrings. Bizarre!
+ * Whenever this routine is turned into a proper substring
+ * finder, change to `return start` after limits imposed. */
return -1;
}
@@ -3081,58 +3257,57 @@ TclStringFind(
unsigned char *end, *try, *bh;
unsigned char *bn = Tcl_GetByteArrayFromObj(needle, &ln);
+ /* Find bytes in bytes */
bh = Tcl_GetByteArrayFromObj(haystack, &lh);
end = bh + lh;
try = bh + start;
while (try + ln <= end) {
- try = memchr(try, bn[0], end - try);
-
+ /*
+ * Look for the leading byte of the needle in the haystack
+ * starting at try and stopping when there's not enough room
+ * for the needle left.
+ */
+ try = memchr(try, bn[0], (end + 1 - ln) - try);
if (try == NULL) {
+ /* Leading byte not found -> needle cannot be found. */
return -1;
}
+ /* Leading byte found, check rest of needle. */
if (0 == memcmp(try+1, bn+1, ln-1)) {
+ /* Checks! Return the successful index. */
return (try - bh);
}
+ /* Rest of needle match failed; Iterate to continue search. */
try++;
}
return -1;
}
- lh = Tcl_GetCharLength(haystack);
- if (haystack->bytes && (lh == haystack->length)) {
- /* haystack is all single-byte chars */
-
- if (needle->bytes && (ln == needle->length)) {
- /* needle is also all single-byte chars */
- char *found = strstr(haystack->bytes + start, needle->bytes);
+ /*
+ * TODO: It might be nice to support some cases where it is not
+ * necessary to shimmer to &tclStringType to compute the result,
+ * and instead operate just on the objPtr->bytes values directly.
+ * However, we also do not want the answer to change based on the
+ * code pathway, or if it does we want that to be for some values
+ * we explicitly decline to support. Getting there will involve
+ * locking down in practice more firmly just what encodings produce
+ * what supported results for the objPtr->bytes values. For now,
+ * do only the well-defined Tcl_UniChar array search.
+ */
- if (found) {
- return (found - haystack->bytes);
- } else {
- return -1;
- }
- } else {
- /*
- * Cannot find substring with a multi-byte char inside
- * a string with no multi-byte chars.
- */
- return -1;
- }
- } else {
+ {
Tcl_UniChar *try, *end, *uh;
Tcl_UniChar *un = Tcl_GetUnicodeFromObj(needle, &ln);
uh = Tcl_GetUnicodeFromObj(haystack, &lh);
end = uh + lh;
- try = uh + start;
- while (try + ln <= end) {
- if ((*try == *un)
- && (0 == memcmp(try+1, un+1, (ln-1)*sizeof(Tcl_UniChar)))) {
+ for (try = uh + start; try + ln <= end; try++) {
+ if ((*try == *un) && (0 ==
+ memcmp(try + 1, un + 1, (ln-1) * sizeof(Tcl_UniChar)))) {
return (try - uh);
}
- try++;
}
return -1;
}
@@ -3169,24 +3344,24 @@ TclStringLast(
* We don't find empty substrings. Bizarre!
*
* TODO: When we one day make this a true substring
- * finder, change this to "return 0"
+ * finder, change this to "return last", after limitation.
*/
return -1;
}
- if (ln > last + 1) {
+ lh = Tcl_GetCharLength(haystack);
+ if (last >= lh) {
+ last = lh - 1;
+ }
+
+ if (last < ln - 1) {
return -1;
}
if (TclIsPureByteArray(needle) && TclIsPureByteArray(haystack)) {
- unsigned char *try, *bh;
+ unsigned char *try, *bh = Tcl_GetByteArrayFromObj(haystack, &lh);
unsigned char *bn = Tcl_GetByteArrayFromObj(needle, &ln);
- bh = Tcl_GetByteArrayFromObj(haystack, &lh);
-
- if (last + 1 > lh) {
- last = lh - 1;
- }
try = bh + last + 1 - ln;
while (try >= bh) {
if ((*try == bn[0])
@@ -3198,38 +3373,10 @@ TclStringLast(
return -1;
}
- lh = Tcl_GetCharLength(haystack);
- if (last + 1 > lh) {
- last = lh - 1;
- }
- if (haystack->bytes && (lh == haystack->length)) {
- /* haystack is all single-byte chars */
-
- if (needle->bytes && (ln == needle->length)) {
- /* needle is also all single-byte chars */
-
- char *try = haystack->bytes + last + 1 - ln;
- while (try >= haystack->bytes) {
- if ((*try == needle->bytes[0])
- && (0 == memcmp(try+1, needle->bytes + 1, ln - 1))) {
- return (try - haystack->bytes);
- }
- try--;
- }
- return -1;
- } else {
- /*
- * Cannot find substring with a multi-byte char inside
- * a string with no multi-byte chars.
- */
- return -1;
- }
- } else {
- Tcl_UniChar *try, *uh;
+ {
+ Tcl_UniChar *try, *uh = Tcl_GetUnicodeFromObj(haystack, &lh);
Tcl_UniChar *un = Tcl_GetUnicodeFromObj(needle, &ln);
- uh = Tcl_GetUnicodeFromObj(haystack, &lh);
-
try = uh + last + 1 - ln;
while (try >= uh) {
if ((*try == un[0])
@@ -3245,14 +3392,14 @@ TclStringLast(
/*
*---------------------------------------------------------------------------
*
- * TclStringObjReverse --
+ * TclStringReverse --
*
* Implements the [string reverse] operation.
*
* Results:
- * An unshared Tcl value which is the [string reverse] of the argument
- * supplied. When sharing rules permit, the returned value might be the
- * argument with modifications done in place.
+ * A Tcl value which is the [string reverse] of the argument supplied.
+ * When sharing rules permit and the caller requests, the returned value
+ * might be the argument with modifications done in place.
*
* Side effects:
* May allocate a new Tcl_Obj.
@@ -3283,17 +3430,19 @@ ReverseBytes(
}
Tcl_Obj *
-TclStringObjReverse(
- Tcl_Obj *objPtr)
+TclStringReverse(
+ Tcl_Obj *objPtr,
+ int flags)
{
String *stringPtr;
- Tcl_UniChar ch;
+ Tcl_UniChar ch = 0;
+ int inPlace = flags & TCL_STRING_IN_PLACE;
if (TclIsPureByteArray(objPtr)) {
int numBytes;
unsigned char *from = Tcl_GetByteArrayFromObj(objPtr, &numBytes);
- if (Tcl_IsShared(objPtr)) {
+ if (!inPlace || Tcl_IsShared(objPtr)) {
objPtr = Tcl_NewByteArrayObj(NULL, numBytes);
}
ReverseBytes(Tcl_GetByteArrayFromObj(objPtr, NULL), from, numBytes);
@@ -3307,7 +3456,7 @@ TclStringObjReverse(
Tcl_UniChar *from = Tcl_GetUnicode(objPtr);
Tcl_UniChar *src = from + stringPtr->numChars;
- if (Tcl_IsShared(objPtr)) {
+ if (!inPlace || Tcl_IsShared(objPtr)) {
Tcl_UniChar *to;
/*
@@ -3315,7 +3464,6 @@ TclStringObjReverse(
* Tcl_SetObjLength into growing the unicode rep buffer.
*/
- ch = 0;
objPtr = Tcl_NewUnicodeObj(&ch, 1);
Tcl_SetObjLength(objPtr, stringPtr->numChars);
to = Tcl_GetUnicode(objPtr);
@@ -3337,7 +3485,7 @@ TclStringObjReverse(
int numBytes = objPtr->length;
char *to, *from = objPtr->bytes;
- if (Tcl_IsShared(objPtr)) {
+ if (!inPlace || Tcl_IsShared(objPtr)) {
objPtr = Tcl_NewObj();
Tcl_SetObjLength(objPtr, numBytes);
}
@@ -3361,7 +3509,7 @@ TclStringObjReverse(
* It's part of the contract for objPtr->bytes values.
* Thus, we can skip calling Tcl_UtfCharComplete() here.
*/
- int bytesInChar = Tcl_UtfToUniChar(from, &ch);
+ int bytesInChar = TclUtfToUniChar(from, &ch);
ReverseBytes((unsigned char *)to, (unsigned char *)from,
bytesInChar);
@@ -3384,6 +3532,150 @@ TclStringObjReverse(
/*
*---------------------------------------------------------------------------
*
+ * TclStringReplace --
+ *
+ * Implements the inner engine of the [string replace] command.
+ *
+ * The result is a concatenation of a prefix from objPtr, characters
+ * 0 through first-1, the insertPtr string value, and a suffix from
+ * objPtr, characters from first + count to the end. The effect is
+ * as if the inner substring of characters first through first+count-1
+ * are removed and replaced with insertPtr.
+ * If insertPtr is NULL, it is treated as an empty string.
+ * When passed the flag TCL_STRING_IN_PLACE, this routine will try
+ * to do the work within objPtr, so long as no sharing forbids it.
+ * Without that request, or as needed, a new Tcl value will be allocated
+ * to be the result.
+ *
+ * Results:
+ * A Tcl value that is the result of the substring replacement.
+ * May return NULL in case of an error. When NULL is returned and
+ * interp is non-NULL, error information is left in interp
+ *
+ *---------------------------------------------------------------------------
+ */
+
+Tcl_Obj *
+TclStringReplace(
+ Tcl_Interp *interp, /* For error reporting, may be NULL */
+ Tcl_Obj *objPtr, /* String to act upon */
+ int first, /* First index to replace */
+ int count, /* How many chars to replace */
+ Tcl_Obj *insertPtr, /* Replacement string, may be NULL */
+ int flags) /* TCL_STRING_IN_PLACE => attempt in-place */
+{
+ int inPlace = flags & TCL_STRING_IN_PLACE;
+ Tcl_Obj *result;
+
+ /* Caller is expected to pass sensible arguments */
+ assert ( count >= 0 ) ;
+ assert ( first >= 0 ) ;
+
+ /* Replace nothing with nothing */
+ if ((insertPtr == NULL) && (count == 0)) {
+ if (inPlace) {
+ return objPtr;
+ } else {
+ return Tcl_DuplicateObj(objPtr);
+ }
+ }
+
+ /*
+ * The caller very likely had to call Tcl_GetCharLength() or similar
+ * to be able to process index values. This means it is like that
+ * objPtr is either a proper "bytearray" or a "string" or else it has
+ * a known and short string rep.
+ */
+
+ if (TclIsPureByteArray(objPtr)) {
+ int numBytes;
+ unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &numBytes);
+
+ if (insertPtr == NULL) {
+ /* Replace something with nothing. */
+
+ assert ( first <= numBytes ) ;
+ assert ( count <= numBytes ) ;
+ assert ( first + count <= numBytes ) ;
+
+ result = Tcl_NewByteArrayObj(NULL, numBytes - count);/* PANIC? */
+ TclAppendBytesToByteArray(result, bytes, first);
+ TclAppendBytesToByteArray(result, bytes + first + count,
+ numBytes - count - first);
+ return result;
+ }
+
+ /* Replace everything */
+ if ((first == 0) && (count == numBytes)) {
+ return insertPtr;
+ }
+
+ if (TclIsPureByteArray(insertPtr)) {
+ int newBytes;
+ unsigned char *iBytes
+ = Tcl_GetByteArrayFromObj(insertPtr, &newBytes);
+
+ if (count == newBytes && inPlace && !Tcl_IsShared(objPtr)) {
+ /*
+ * Removal count and replacement count are equal.
+ * Other conditions permit. Do in-place splice.
+ */
+
+ memcpy(bytes + first, iBytes, count);
+ Tcl_InvalidateStringRep(objPtr);
+ return objPtr;
+ }
+
+ if (newBytes > INT_MAX - (numBytes - count)) {
+ if (interp) {
+ Tcl_SetObjResult(interp, Tcl_ObjPrintf(
+ "max size for a Tcl value (%d bytes) exceeded",
+ INT_MAX));
+ Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL);
+ }
+ return NULL;
+ }
+ result = Tcl_NewByteArrayObj(NULL, numBytes - count + newBytes);
+ /* PANIC? */
+ TclAppendBytesToByteArray(result, bytes, first);
+ TclAppendBytesToByteArray(result, iBytes, newBytes);
+ TclAppendBytesToByteArray(result, bytes + first + count,
+ numBytes - count - first);
+ return result;
+ }
+
+ /* Flow through to try other approaches below */
+ }
+
+ /*
+ * TODO: Figure out how not to generate a Tcl_UniChar array rep
+ * when it can be determined objPtr->bytes points to a string of
+ * all single-byte characters so we can index it directly.
+ */
+
+ /* The traditional implementation... */
+ {
+ int numChars;
+ Tcl_UniChar *ustring = Tcl_GetUnicodeFromObj(objPtr, &numChars);
+
+ /* TODO: Is there an in-place option worth pursuing here? */
+
+ result = Tcl_NewUnicodeObj(ustring, first);
+ if (insertPtr) {
+ Tcl_AppendObjToObj(result, insertPtr);
+ }
+ if (first + count < numChars) {
+ Tcl_AppendUnicodeToObj(result, ustring + first + count,
+ numChars - first - count);
+ }
+
+ return result;
+ }
+}
+
+/*
+ *---------------------------------------------------------------------------
+ *
* FillUnicodeRep --
*
* Populate the Unicode internal rep with the Unicode form of its string
@@ -3418,7 +3710,7 @@ ExtendUnicodeRepWithString(
{
String *stringPtr = GET_STRING(objPtr);
int needed, numOrigChars = 0;
- Tcl_UniChar *dst;
+ Tcl_UniChar *dst, unichar = 0;
if (stringPtr->hasUnicode) {
numOrigChars = stringPtr->numChars;
@@ -3441,7 +3733,8 @@ ExtendUnicodeRepWithString(
numAppendChars = 0;
}
for (dst=stringPtr->unicode + numOrigChars; numAppendChars-- > 0; dst++) {
- bytes += TclUtfToUniChar(bytes, dst);
+ bytes += TclUtfToUniChar(bytes, &unichar);
+ *dst = unichar;
}
*dst = 0;
}