summaryrefslogtreecommitdiffstats
path: root/generic/tclStringObj.c
diff options
context:
space:
mode:
Diffstat (limited to 'generic/tclStringObj.c')
-rw-r--r--generic/tclStringObj.c223
1 files changed, 158 insertions, 65 deletions
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index 7c898b7..59758bb 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -1650,6 +1650,7 @@ Tcl_AppendFormatToObj(
const char *span = format, *msg, *errCode;
int numBytes = 0, objIndex = 0, gotXpg = 0, gotSequential = 0;
int originalLength, limit;
+ Tcl_UniChar ch = 0;
static const char *mixedXPG =
"cannot mix \"%\" and \"%n$\" conversion specifiers";
static const char *const badIndex[2] = {
@@ -1677,8 +1678,7 @@ Tcl_AppendFormatToObj(
#endif
int newXpg, numChars, allocSegment = 0, segmentLimit, segmentNumBytes;
Tcl_Obj *segment;
- Tcl_UniChar ch;
- int step = Tcl_UtfToUniChar(format, &ch);
+ int step = TclUtfToUniChar(format, &ch);
format += step;
if (ch != '%') {
@@ -1702,7 +1702,7 @@ Tcl_AppendFormatToObj(
* Step 0. Handle special case of escaped format marker (i.e., %%).
*/
- step = Tcl_UtfToUniChar(format, &ch);
+ step = TclUtfToUniChar(format, &ch);
if (ch == '%') {
span = format;
numBytes = step;
@@ -1722,7 +1722,7 @@ Tcl_AppendFormatToObj(
newXpg = 1;
objIndex = position - 1;
format = end + 1;
- step = Tcl_UtfToUniChar(format, &ch);
+ step = TclUtfToUniChar(format, &ch);
}
}
if (newXpg) {
@@ -1773,7 +1773,7 @@ Tcl_AppendFormatToObj(
}
if (sawFlag) {
format += step;
- step = Tcl_UtfToUniChar(format, &ch);
+ step = TclUtfToUniChar(format, &ch);
}
} while (sawFlag);
@@ -1785,7 +1785,7 @@ Tcl_AppendFormatToObj(
if (isdigit(UCHAR(ch))) {
width = strtoul(format, &end, 10);
format = end;
- step = Tcl_UtfToUniChar(format, &ch);
+ step = TclUtfToUniChar(format, &ch);
} else if (ch == '*') {
if (objIndex >= objc - 1) {
msg = badIndex[gotXpg];
@@ -1801,7 +1801,7 @@ Tcl_AppendFormatToObj(
}
objIndex++;
format += step;
- step = Tcl_UtfToUniChar(format, &ch);
+ step = TclUtfToUniChar(format, &ch);
}
if (width > limit) {
msg = overflow;
@@ -1817,12 +1817,12 @@ Tcl_AppendFormatToObj(
if (ch == '.') {
gotPrecision = 1;
format += step;
- step = Tcl_UtfToUniChar(format, &ch);
+ step = TclUtfToUniChar(format, &ch);
}
if (isdigit(UCHAR(ch))) {
precision = strtoul(format, &end, 10);
format = end;
- step = Tcl_UtfToUniChar(format, &ch);
+ step = TclUtfToUniChar(format, &ch);
} else if (ch == '*') {
if (objIndex >= objc - 1) {
msg = badIndex[gotXpg];
@@ -1843,7 +1843,7 @@ Tcl_AppendFormatToObj(
}
objIndex++;
format += step;
- step = Tcl_UtfToUniChar(format, &ch);
+ step = TclUtfToUniChar(format, &ch);
}
/*
@@ -1853,14 +1853,14 @@ Tcl_AppendFormatToObj(
if (ch == 'h') {
useShort = 1;
format += step;
- step = Tcl_UtfToUniChar(format, &ch);
+ step = TclUtfToUniChar(format, &ch);
} else if (ch == 'l') {
format += step;
- step = Tcl_UtfToUniChar(format, &ch);
+ step = TclUtfToUniChar(format, &ch);
if (ch == 'l') {
useBig = 1;
format += step;
- step = Tcl_UtfToUniChar(format, &ch);
+ step = TclUtfToUniChar(format, &ch);
#ifndef TCL_WIDE_INT_IS_LONG
} else {
useWide = 1;
@@ -2029,9 +2029,12 @@ Tcl_AppendFormatToObj(
segmentLimit -= 1;
precision--;
break;
+ case 'X':
+ Tcl_AppendToObj(segment, "0X", 2);
+ segmentLimit -= 2;
+ break;
case 'p':
case 'x':
- case 'X':
Tcl_AppendToObj(segment, "0x", 2);
segmentLimit -= 2;
break;
@@ -2039,6 +2042,10 @@ Tcl_AppendFormatToObj(
Tcl_AppendToObj(segment, "0b", 2);
segmentLimit -= 2;
break;
+ case 'd':
+ Tcl_AppendToObj(segment, "0d", 2);
+ segmentLimit -= 2;
+ break;
}
}
@@ -2196,7 +2203,11 @@ Tcl_AppendFormatToObj(
}
digitOffset = (int) (bits % base);
if (digitOffset > 9) {
- bytes[numDigits] = 'a' + digitOffset - 10;
+ if (ch == 'X') {
+ bytes[numDigits] = 'A' + digitOffset - 10;
+ } else {
+ bytes[numDigits] = 'a' + digitOffset - 10;
+ }
} else {
bytes[numDigits] = '0' + digitOffset;
}
@@ -2318,14 +2329,6 @@ Tcl_AppendFormatToObj(
goto error;
}
- switch (ch) {
- case 'E':
- case 'G':
- case 'X': {
- Tcl_SetObjLength(segment, Tcl_UtfToUpper(TclGetString(segment)));
- }
- }
-
if (width>0 && numChars<0) {
numChars = Tcl_GetCharLength(segment);
}
@@ -2847,11 +2850,21 @@ TclStringCatObjv(
Tcl_Obj * const objv[],
Tcl_Obj **objPtrPtr)
{
- Tcl_Obj *objPtr, *objResultPtr, * const *ov;
- int oc, length = 0, binary = 1, first = 0;
+ Tcl_Obj *objResultPtr, * const *ov;
+ int oc, length = 0, binary = 1;
int allowUniChar = 1, requestUniChar = 0;
+ int first = objc - 1; /* Index of first value possibly not empty */
+ int last = 0; /* Index of last value possibly not empty */
+
+ /* assert ( objc >= 0 ) */
+
+ if (objc <= 1) {
+ /* Only one or no objects; return first or empty */
+ *objPtrPtr = objc ? objv[0] : Tcl_NewObj();
+ return TCL_OK;
+ }
- /* assert (objc >= 2) */
+ /* assert ( objc >= 2 ) */
/*
* Analyze to determine what representation result should be.
@@ -2861,8 +2874,8 @@ TclStringCatObjv(
*/
ov = objv, oc = objc;
- while (oc-- && (binary || allowUniChar)) {
- objPtr = *ov++;
+ do {
+ Tcl_Obj *objPtr = *ov++;
if (objPtr->bytes) {
/* Value has a string rep. */
@@ -2892,72 +2905,144 @@ TclStringCatObjv(
}
}
}
- }
+ } while (--oc && (binary || allowUniChar));
if (binary) {
/* Result will be pure byte array. Pre-size it */
ov = objv; oc = objc;
- while (oc-- && (length >= 0)) {
- objPtr = *ov++;
+ do {
+ Tcl_Obj *objPtr = *ov++;
if (objPtr->bytes == NULL) {
int numBytes;
Tcl_GetByteArrayFromObj(objPtr, &numBytes); /* PANIC? */
- if (length == 0) {
- first = objc - oc - 1;
+ if (numBytes) {
+ last = objc - oc;
+ if (length == 0) {
+ first = last;
+ } else if (numBytes > INT_MAX - length) {
+ goto overflow;
+ }
+ length += numBytes;
}
- length += numBytes;
}
- }
+ } while (--oc);
} else if (allowUniChar && requestUniChar) {
/* Result will be pure Tcl_UniChar array. Pre-size it. */
ov = objv; oc = objc;
- while (oc-- && (length >= 0)) {
- objPtr = *ov++;
+ do {
+ Tcl_Obj *objPtr = *ov++;
if ((objPtr->bytes == NULL) || (objPtr->length)) {
int numChars;
Tcl_GetUnicodeFromObj(objPtr, &numChars); /* PANIC? */
- if (length == 0) {
- first = objc - oc - 1;
+ if (numChars) {
+ last = objc - oc;
+ if (length == 0) {
+ first = last;
+ } else if (numChars > INT_MAX - length) {
+ goto overflow;
+ }
+ length += numChars;
}
- length += numChars;
}
- }
+ } while (--oc);
} else {
/* Result will be concat of string reps. Pre-size it. */
ov = objv; oc = objc;
- while (oc-- && (length >= 0)) {
- int numBytes;
+ do {
+ Tcl_Obj *pendingPtr = NULL;
+
+ /*
+ * Loop until a possibly non-empty value is reached.
+ * Keep string rep generation pending when possible.
+ */
- objPtr = *ov++;
+ do {
+ /* assert ( pendingPtr == NULL ) */
+ /* assert ( length == 0 ) */
- Tcl_GetStringFromObj(objPtr, &numBytes); /* PANIC? */
- if ((length == 0) && numBytes) {
- first = objc - oc - 1;
+ Tcl_Obj *objPtr = *ov++;
+
+ if (objPtr->bytes == NULL) {
+ /* No string rep; Take the chance we can avoid making it */
+ pendingPtr = objPtr;
+ } else {
+ Tcl_GetStringFromObj(objPtr, &length); /* PANIC? */
+ }
+ } while (--oc && (length == 0) && (pendingPtr == NULL));
+
+ /*
+ * Either we found a possibly non-empty value, and we
+ * remember this index as the first and last such value so
+ * far seen, or (oc == 0) and all values are known empty,
+ * so first = last = objc - 1 signals the right quick return.
+ */
+
+ first = last = objc - oc - 1;
+
+ if (oc && (length == 0)) {
+ int numBytes;
+
+ /* assert ( pendingPtr != NULL ) */
+
+ /*
+ * There's a pending value followed by more values.
+ * Loop over remaining values generating strings until
+ * a non-empty value is found, or the pending value gets
+ * its string generated.
+ */
+
+ do {
+ Tcl_Obj *objPtr = *ov++;
+ Tcl_GetStringFromObj(objPtr, &numBytes); /* PANIC? */
+ } while (--oc && numBytes == 0 && pendingPtr->bytes == NULL);
+
+ if (numBytes) {
+ last = objc -oc -1;
+ }
+ if (oc || numBytes) {
+ Tcl_GetStringFromObj(pendingPtr, &length);
+ }
+ if (length == 0) {
+ if (numBytes) {
+ first = last;
+ }
+ } else if (numBytes > INT_MAX - length) {
+ goto overflow;
+ }
+ length += numBytes;
}
- length += numBytes;
- }
- }
+ } while (oc && (length == 0));
- if (length < 0) {
- if (interp) {
- Tcl_SetObjResult(interp, Tcl_ObjPrintf(
- "max size for a Tcl value (%d bytes) exceeded", INT_MAX));
- Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL);
+ while (oc) {
+ int numBytes;
+ Tcl_Obj *objPtr = *ov++;
+
+ /* assert ( length > 0 && pendingPtr == NULL ) */
+
+ Tcl_GetStringFromObj(objPtr, &numBytes); /* PANIC? */
+ if (numBytes) {
+ last = objc - oc;
+ if (numBytes > INT_MAX - length) {
+ goto overflow;
+ }
+ length += numBytes;
+ }
+ --oc;
}
- return TCL_ERROR;
}
- if (length == 0) {
- /* Total length of zero means every value has length zero */
- *objPtrPtr = objv[0];
+ if (last <= first /*|| length == 0 */) {
+ /* Only one non-empty value or zero length; return first */
+ /* NOTE: (length == 0) implies (last <= first) */
+ *objPtrPtr = objv[first];
return TCL_OK;
}
- objv += first; objc -= first;
+ objv += first; objc = (last - first + 1);
if (binary) {
/* Efficiently produce a pure byte array result */
@@ -3058,9 +3143,9 @@ TclStringCatObjv(
return TCL_ERROR;
}
dst = Tcl_GetString(objResultPtr) + start;
- if (length > start) {
- TclFreeIntRep(objResultPtr);
- }
+
+ /* assert ( length > start ) */
+ TclFreeIntRep(objResultPtr);
} else {
objResultPtr = Tcl_NewObj(); /* PANIC? */
if (0 == Tcl_AttemptSetObjLength(objResultPtr, length)) {
@@ -3087,6 +3172,14 @@ TclStringCatObjv(
}
*objPtrPtr = objResultPtr;
return TCL_OK;
+
+ overflow:
+ if (interp) {
+ Tcl_SetObjResult(interp, Tcl_ObjPrintf(
+ "max size for a Tcl value (%d bytes) exceeded", INT_MAX));
+ Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL);
+ }
+ return TCL_ERROR;
}
/*
@@ -3335,7 +3428,7 @@ TclStringObjReverse(
Tcl_Obj *objPtr)
{
String *stringPtr;
- Tcl_UniChar ch;
+ Tcl_UniChar ch = 0;
if (TclIsPureByteArray(objPtr)) {
int numBytes;
@@ -3409,7 +3502,7 @@ TclStringObjReverse(
* It's part of the contract for objPtr->bytes values.
* Thus, we can skip calling Tcl_UtfCharComplete() here.
*/
- int bytesInChar = Tcl_UtfToUniChar(from, &ch);
+ int bytesInChar = TclUtfToUniChar(from, &ch);
ReverseBytes((unsigned char *)to, (unsigned char *)from,
bytesInChar);